blob: cf869691e4239b383c2e669539101cd661c27356 [file] [log] [blame]
Willy Tarreaubaaee002006-06-26 02:48:02 +02001/*
Willy Tarreau62405a22014-12-23 13:51:28 +01002 * include/common/memory.h
3 * Memory management definitions..
4 *
5 * Copyright (C) 2000-2014 Willy Tarreau - w@1wt.eu
6 *
7 * This library is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation, version 2.1
10 * exclusively.
11 *
12 * This library is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
16 *
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with this library; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20 */
Willy Tarreaubaaee002006-06-26 02:48:02 +020021
Willy Tarreau2dd0d472006-06-29 17:53:05 +020022#ifndef _COMMON_MEMORY_H
23#define _COMMON_MEMORY_H
Willy Tarreaubaaee002006-06-26 02:48:02 +020024
Willy Tarreau158fa752017-11-22 15:47:29 +010025#include <sys/mman.h>
26
Willy Tarreaubaaee002006-06-26 02:48:02 +020027#include <stdlib.h>
Willy Tarreaue430e772014-12-23 14:13:16 +010028#include <string.h>
David Carlier4ee76d02018-02-18 19:36:42 +000029#include <stdint.h>
Willy Tarreaubaaee002006-06-26 02:48:02 +020030
Willy Tarreau2dd0d472006-06-29 17:53:05 +020031#include <common/config.h>
Willy Tarreau50e608d2007-05-13 18:26:08 +020032#include <common/mini-clist.h>
Christopher Fauletb349e482017-08-29 09:52:38 +020033#include <common/hathreads.h>
Willy Tarreaubaaee002006-06-26 02:48:02 +020034
Willy Tarreaua84dcb82015-10-28 12:04:02 +010035#ifndef DEBUG_DONT_SHARE_POOLS
Willy Tarreau50e608d2007-05-13 18:26:08 +020036#define MEM_F_SHARED 0x1
Willy Tarreaua84dcb82015-10-28 12:04:02 +010037#else
38#define MEM_F_SHARED 0
39#endif
Willy Tarreau581bf812016-01-25 02:19:13 +010040#define MEM_F_EXACT 0x2
Willy Tarreau50e608d2007-05-13 18:26:08 +020041
Willy Tarreauac421112015-10-28 15:09:29 +010042/* reserve an extra void* at the end of a pool for linking */
43#ifdef DEBUG_MEMORY_POOLS
44#define POOL_EXTRA (sizeof(void *))
45#define POOL_LINK(pool, item) (void **)(((char *)item) + (pool->size))
46#else
47#define POOL_EXTRA (0)
48#define POOL_LINK(pool, item) ((void **)(item))
49#endif
50
Willy Tarreau0a93b642018-10-16 07:58:39 +020051#define MAX_BASE_POOLS 32
52
Willy Tarreauf161d0f2018-02-22 14:05:55 +010053#ifdef CONFIG_HAP_LOCKLESS_POOLS
Olivier Houchardcf975d42018-01-24 18:38:31 +010054struct pool_free_list {
55 void **free_list;
56 uintptr_t seq;
57};
58#endif
59
Willy Tarreau50e608d2007-05-13 18:26:08 +020060struct pool_head {
Willy Tarreau1ca1b702017-11-26 10:50:36 +010061 void **free_list;
Willy Tarreauf161d0f2018-02-22 14:05:55 +010062#ifdef CONFIG_HAP_LOCKLESS_POOLS
Olivier Houchardcf975d42018-01-24 18:38:31 +010063 uintptr_t seq;
64#else
65 __decl_hathreads(HA_SPINLOCK_T lock); /* the spin lock */
66#endif
Willy Tarreau50e608d2007-05-13 18:26:08 +020067 unsigned int used; /* how many chunks are currently in use */
68 unsigned int allocated; /* how many chunks have been allocated */
69 unsigned int limit; /* hard limit on the number of chunks */
70 unsigned int minavail; /* how many chunks are expected to be used */
71 unsigned int size; /* chunk size */
72 unsigned int flags; /* MEM_F_* */
Willy Tarreau7dcd46d2007-05-14 00:16:13 +020073 unsigned int users; /* number of pools sharing this zone */
Willy Tarreau58102cf2015-10-28 16:24:21 +010074 unsigned int failed; /* failed allocations */
Olivier Houchardcf975d42018-01-24 18:38:31 +010075 struct list list; /* list of all known pools */
Willy Tarreau7dcd46d2007-05-14 00:16:13 +020076 char name[12]; /* name of the pool */
Willy Tarreau1ca1b702017-11-26 10:50:36 +010077} __attribute__((aligned(64)));
Willy Tarreau50e608d2007-05-13 18:26:08 +020078
Willy Tarreau0a93b642018-10-16 07:58:39 +020079
80extern struct pool_head pool_base_start[MAX_BASE_POOLS];
81extern unsigned int pool_base_count;
82
Willy Tarreau067ac9f2015-10-08 14:12:13 +020083/* poison each newly allocated area with this byte if >= 0 */
84extern int mem_poison_byte;
Willy Tarreau50e608d2007-05-13 18:26:08 +020085
Willy Tarreaua885f6d2014-12-03 15:25:28 +010086/* Allocates new entries for pool <pool> until there are at least <avail> + 1
87 * available, then returns the last one for immediate use, so that at least
88 * <avail> are left available in the pool upon return. NULL is returned if the
89 * last entry could not be allocated. It's important to note that at least one
90 * allocation is always performed even if there are enough entries in the pool.
91 * A call to the garbage collector is performed at most once in case malloc()
92 * returns an error, before returning NULL.
Willy Tarreau50e608d2007-05-13 18:26:08 +020093 */
Christopher Fauletb349e482017-08-29 09:52:38 +020094void *__pool_refill_alloc(struct pool_head *pool, unsigned int avail);
Willy Tarreaua885f6d2014-12-03 15:25:28 +010095void *pool_refill_alloc(struct pool_head *pool, unsigned int avail);
Willy Tarreau50e608d2007-05-13 18:26:08 +020096
97/* Try to find an existing shared pool with the same characteristics and
98 * returns it, otherwise creates this one. NULL is returned if no memory
99 * is available for a new creation.
100 */
101struct pool_head *create_pool(char *name, unsigned int size, unsigned int flags);
102
103/* Dump statistics on pools usage.
104 */
Willy Tarreau12833bb2014-01-28 16:49:56 +0100105void dump_pools_to_trash();
Willy Tarreau50e608d2007-05-13 18:26:08 +0200106void dump_pools(void);
Willy Tarreau58102cf2015-10-28 16:24:21 +0100107int pool_total_failures();
108unsigned long pool_total_allocated();
109unsigned long pool_total_used();
Willy Tarreau50e608d2007-05-13 18:26:08 +0200110
111/*
Willy Tarreaue6ce59d2007-05-13 19:38:49 +0200112 * This function frees whatever can be freed in pool <pool>.
113 */
Willy Tarreaubafbe012017-11-24 17:34:44 +0100114void pool_flush(struct pool_head *pool);
Willy Tarreaue6ce59d2007-05-13 19:38:49 +0200115
116/*
117 * This function frees whatever can be freed in all pools, but respecting
118 * the minimum thresholds imposed by owners.
Christopher Fauletb349e482017-08-29 09:52:38 +0200119 *
Willy Tarreaubafbe012017-11-24 17:34:44 +0100120 * <pool_ctx> is used when pool_gc is called to release resources to allocate
Christopher Fauletb349e482017-08-29 09:52:38 +0200121 * an element in __pool_refill_alloc. It is important because <pool_ctx> is
122 * already locked, so we need to skip the lock here.
Willy Tarreaue6ce59d2007-05-13 19:38:49 +0200123 */
Willy Tarreaubafbe012017-11-24 17:34:44 +0100124void pool_gc(struct pool_head *pool_ctx);
Willy Tarreaue6ce59d2007-05-13 19:38:49 +0200125
126/*
127 * This function destroys a pull by freeing it completely.
128 * This should be called only under extreme circumstances.
129 */
Willy Tarreaubafbe012017-11-24 17:34:44 +0100130void *pool_destroy(struct pool_head *pool);
Willy Tarreaue6ce59d2007-05-13 19:38:49 +0200131
Willy Tarreau0a93b642018-10-16 07:58:39 +0200132/* returns the pool index for pool <pool>, or -1 if this pool has no index */
133static inline ssize_t pool_get_index(const struct pool_head *pool)
134{
135 size_t idx;
136
137 idx = pool - pool_base_start;
138 if (idx >= MAX_BASE_POOLS)
139 return -1;
140 return idx;
141}
142
Willy Tarreauf161d0f2018-02-22 14:05:55 +0100143#ifdef CONFIG_HAP_LOCKLESS_POOLS
Olivier Houchardcf975d42018-01-24 18:38:31 +0100144/*
145 * Returns a pointer to type <type> taken from the pool <pool_type> if
146 * available, otherwise returns NULL. No malloc() is attempted, and poisonning
147 * is never performed. The purpose is to get the fastest possible allocation.
148 */
149static inline void *__pool_get_first(struct pool_head *pool)
150{
151 struct pool_free_list cmp, new;
152
153 cmp.seq = pool->seq;
154 __ha_barrier_load();
155
156 cmp.free_list = pool->free_list;
157 do {
158 if (cmp.free_list == NULL)
159 return NULL;
160 new.seq = cmp.seq + 1;
161 __ha_barrier_load();
162 new.free_list = *POOL_LINK(pool, cmp.free_list);
163 } while (__ha_cas_dw((void *)&pool->free_list, (void *)&cmp, (void *)&new) == 0);
Tim Duesterhus05f6a432018-02-20 00:49:46 +0100164
Olivier Houchardcf975d42018-01-24 18:38:31 +0100165 HA_ATOMIC_ADD(&pool->used, 1);
166#ifdef DEBUG_MEMORY_POOLS
167 /* keep track of where the element was allocated from */
168 *POOL_LINK(pool, cmp.free_list) = (void *)pool;
169#endif
170 return cmp.free_list;
171}
172
173static inline void *pool_get_first(struct pool_head *pool)
174{
175 void *ret;
176
177 ret = __pool_get_first(pool);
178 return ret;
179}
180/*
181 * Returns a pointer to type <type> taken from the pool <pool_type> or
182 * dynamically allocated. In the first case, <pool_type> is updated to point to
183 * the next element in the list. No memory poisonning is ever performed on the
184 * returned area.
185 */
186static inline void *pool_alloc_dirty(struct pool_head *pool)
187{
188 void *p;
189
190 if ((p = __pool_get_first(pool)) == NULL)
191 p = __pool_refill_alloc(pool, 0);
192 return p;
193}
194
Willy Tarreaue6ce59d2007-05-13 19:38:49 +0200195/*
Olivier Houchardcf975d42018-01-24 18:38:31 +0100196 * Returns a pointer to type <type> taken from the pool <pool_type> or
197 * dynamically allocated. In the first case, <pool_type> is updated to point to
198 * the next element in the list. Memory poisonning is performed if enabled.
199 */
200static inline void *pool_alloc(struct pool_head *pool)
201{
202 void *p;
203
204 p = pool_alloc_dirty(pool);
205#ifdef DEBUG_MEMORY_POOLS
206 if (p) {
207 /* keep track of where the element was allocated from */
208 *POOL_LINK(pool, p) = (void *)pool;
209 }
210#endif
211 if (p && mem_poison_byte >= 0) {
212 memset(p, mem_poison_byte, pool->size);
213 }
214
215 return p;
216}
217
218/*
219 * Puts a memory area back to the corresponding pool.
220 * Items are chained directly through a pointer that
221 * is written in the beginning of the memory area, so
222 * there's no need for any carrier cell. This implies
223 * that each memory area is at least as big as one
224 * pointer. Just like with the libc's free(), nothing
225 * is done if <ptr> is NULL.
226 */
227static inline void pool_free(struct pool_head *pool, void *ptr)
228{
229 if (likely(ptr != NULL)) {
230 void *free_list;
231#ifdef DEBUG_MEMORY_POOLS
232 /* we'll get late corruption if we refill to the wrong pool or double-free */
233 if (*POOL_LINK(pool, ptr) != (void *)pool)
234 *(volatile int *)0 = 0;
235#endif
236 free_list = pool->free_list;
237 do {
238 *POOL_LINK(pool, ptr) = (void *)free_list;
239 __ha_barrier_store();
240 } while (!HA_ATOMIC_CAS(&pool->free_list, (void *)&free_list, ptr));
Tim Duesterhus05f6a432018-02-20 00:49:46 +0100241
Olivier Houchardcf975d42018-01-24 18:38:31 +0100242 HA_ATOMIC_SUB(&pool->used, 1);
243 }
244}
245
Willy Tarreauf161d0f2018-02-22 14:05:55 +0100246#else /* CONFIG_HAP_LOCKLESS_POOLS */
Olivier Houchardcf975d42018-01-24 18:38:31 +0100247/*
Willy Tarreau02622412014-12-08 16:35:23 +0100248 * Returns a pointer to type <type> taken from the pool <pool_type> if
249 * available, otherwise returns NULL. No malloc() is attempted, and poisonning
250 * is never performed. The purpose is to get the fastest possible allocation.
Willy Tarreau50e608d2007-05-13 18:26:08 +0200251 */
Christopher Fauletb349e482017-08-29 09:52:38 +0200252static inline void *__pool_get_first(struct pool_head *pool)
Willy Tarreaue430e772014-12-23 14:13:16 +0100253{
254 void *p;
255
Willy Tarreau02622412014-12-08 16:35:23 +0100256 if ((p = pool->free_list) != NULL) {
Willy Tarreauac421112015-10-28 15:09:29 +0100257 pool->free_list = *POOL_LINK(pool, p);
Willy Tarreaue430e772014-12-23 14:13:16 +0100258 pool->used++;
Willy Tarreaude30a682015-10-28 15:23:51 +0100259#ifdef DEBUG_MEMORY_POOLS
260 /* keep track of where the element was allocated from */
261 *POOL_LINK(pool, p) = (void *)pool;
262#endif
Willy Tarreaue430e772014-12-23 14:13:16 +0100263 }
264 return p;
265}
Willy Tarreau50e608d2007-05-13 18:26:08 +0200266
Christopher Fauletb349e482017-08-29 09:52:38 +0200267static inline void *pool_get_first(struct pool_head *pool)
268{
269 void *ret;
270
Christopher Faulet2a944ee2017-11-07 10:42:54 +0100271 HA_SPIN_LOCK(POOL_LOCK, &pool->lock);
Christopher Fauletb349e482017-08-29 09:52:38 +0200272 ret = __pool_get_first(pool);
Christopher Faulet2a944ee2017-11-07 10:42:54 +0100273 HA_SPIN_UNLOCK(POOL_LOCK, &pool->lock);
Christopher Fauletb349e482017-08-29 09:52:38 +0200274 return ret;
275}
Willy Tarreau50e608d2007-05-13 18:26:08 +0200276/*
Willy Tarreau02622412014-12-08 16:35:23 +0100277 * Returns a pointer to type <type> taken from the pool <pool_type> or
278 * dynamically allocated. In the first case, <pool_type> is updated to point to
279 * the next element in the list. No memory poisonning is ever performed on the
280 * returned area.
281 */
282static inline void *pool_alloc_dirty(struct pool_head *pool)
283{
284 void *p;
285
Christopher Faulet2a944ee2017-11-07 10:42:54 +0100286 HA_SPIN_LOCK(POOL_LOCK, &pool->lock);
Christopher Fauletb349e482017-08-29 09:52:38 +0200287 if ((p = __pool_get_first(pool)) == NULL)
288 p = __pool_refill_alloc(pool, 0);
Christopher Faulet2a944ee2017-11-07 10:42:54 +0100289 HA_SPIN_UNLOCK(POOL_LOCK, &pool->lock);
Willy Tarreau02622412014-12-08 16:35:23 +0100290 return p;
291}
292
Willy Tarreau158fa752017-11-22 15:47:29 +0100293#ifndef DEBUG_UAF /* normal allocator */
294
Willy Tarreauf13322e2017-11-22 10:50:54 +0100295/* allocates an area of size <size> and returns it. The semantics are similar
296 * to those of malloc().
297 */
298static inline void *pool_alloc_area(size_t size)
299{
300 return malloc(size);
301}
302
303/* frees an area <area> of size <size> allocated by pool_alloc_area(). The
304 * semantics are identical to free() except that the size is specified and
305 * may be ignored.
306 */
307static inline void pool_free_area(void *area, size_t __maybe_unused size)
308{
309 free(area);
310}
311
Willy Tarreau158fa752017-11-22 15:47:29 +0100312#else /* use-after-free detector */
313
314/* allocates an area of size <size> and returns it. The semantics are similar
315 * to those of malloc(). However the allocation is rounded up to 4kB so that a
316 * full page is allocated. This ensures the object can be freed alone so that
317 * future dereferences are easily detected. The returned object is always
Willy Tarreau364d7452018-02-22 14:14:23 +0100318 * 16-bytes aligned to avoid issues with unaligned structure objects. In case
319 * some padding is added, the area's start address is copied at the end of the
320 * padding to help detect underflows.
Willy Tarreau158fa752017-11-22 15:47:29 +0100321 */
322static inline void *pool_alloc_area(size_t size)
323{
324 size_t pad = (4096 - size) & 0xFF0;
Willy Tarreau5a9cce42018-02-22 11:39:23 +0100325 void *ret;
Willy Tarreau158fa752017-11-22 15:47:29 +0100326
Willy Tarreau5a9cce42018-02-22 11:39:23 +0100327 ret = mmap(NULL, (size + 4095) & -4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, 0, 0);
Willy Tarreau364d7452018-02-22 14:14:23 +0100328 if (ret == MAP_FAILED)
329 return NULL;
330 if (pad >= sizeof(void *))
331 *(void **)(ret + pad - sizeof(void *)) = ret + pad;
332 return ret + pad;
Willy Tarreau158fa752017-11-22 15:47:29 +0100333}
334
335/* frees an area <area> of size <size> allocated by pool_alloc_area(). The
336 * semantics are identical to free() except that the size must absolutely match
Willy Tarreau364d7452018-02-22 14:14:23 +0100337 * the one passed to pool_alloc_area(). In case some padding is added, the
338 * area's start address is compared to the one at the end of the padding, and
339 * a segfault is triggered if they don't match, indicating an underflow.
Willy Tarreau158fa752017-11-22 15:47:29 +0100340 */
341static inline void pool_free_area(void *area, size_t size)
342{
343 size_t pad = (4096 - size) & 0xFF0;
344
Willy Tarreau364d7452018-02-22 14:14:23 +0100345 if (pad >= sizeof(void *) && *(void **)(area - sizeof(void *)) != area)
346 *(volatile int *)0 = 0;
347
Willy Tarreau158fa752017-11-22 15:47:29 +0100348 munmap(area - pad, (size + 4095) & -4096);
349}
350
351#endif /* DEBUG_UAF */
352
Willy Tarreau02622412014-12-08 16:35:23 +0100353/*
354 * Returns a pointer to type <type> taken from the pool <pool_type> or
355 * dynamically allocated. In the first case, <pool_type> is updated to point to
356 * the next element in the list. Memory poisonning is performed if enabled.
357 */
Willy Tarreaubafbe012017-11-24 17:34:44 +0100358static inline void *pool_alloc(struct pool_head *pool)
Willy Tarreau02622412014-12-08 16:35:23 +0100359{
360 void *p;
361
362 p = pool_alloc_dirty(pool);
Willy Tarreaude30a682015-10-28 15:23:51 +0100363#ifdef DEBUG_MEMORY_POOLS
364 if (p) {
Christopher Faulet2a944ee2017-11-07 10:42:54 +0100365 HA_SPIN_LOCK(POOL_LOCK, &pool->lock);
Willy Tarreaude30a682015-10-28 15:23:51 +0100366 /* keep track of where the element was allocated from */
367 *POOL_LINK(pool, p) = (void *)pool;
Christopher Faulet2a944ee2017-11-07 10:42:54 +0100368 HA_SPIN_UNLOCK(POOL_LOCK, &pool->lock);
Willy Tarreaude30a682015-10-28 15:23:51 +0100369 }
370#endif
371 if (p && mem_poison_byte >= 0) {
Willy Tarreau02622412014-12-08 16:35:23 +0100372 memset(p, mem_poison_byte, pool->size);
Willy Tarreaude30a682015-10-28 15:23:51 +0100373 }
374
Willy Tarreau02622412014-12-08 16:35:23 +0100375 return p;
376}
377
378/*
Willy Tarreau50e608d2007-05-13 18:26:08 +0200379 * Puts a memory area back to the corresponding pool.
380 * Items are chained directly through a pointer that
381 * is written in the beginning of the memory area, so
382 * there's no need for any carrier cell. This implies
383 * that each memory area is at least as big as one
Willy Tarreau48d63db2008-08-03 17:41:33 +0200384 * pointer. Just like with the libc's free(), nothing
385 * is done if <ptr> is NULL.
Willy Tarreau50e608d2007-05-13 18:26:08 +0200386 */
Willy Tarreaubafbe012017-11-24 17:34:44 +0100387static inline void pool_free(struct pool_head *pool, void *ptr)
Willy Tarreaue430e772014-12-23 14:13:16 +0100388{
389 if (likely(ptr != NULL)) {
Christopher Faulet2a944ee2017-11-07 10:42:54 +0100390 HA_SPIN_LOCK(POOL_LOCK, &pool->lock);
Willy Tarreaude30a682015-10-28 15:23:51 +0100391#ifdef DEBUG_MEMORY_POOLS
392 /* we'll get late corruption if we refill to the wrong pool or double-free */
393 if (*POOL_LINK(pool, ptr) != (void *)pool)
394 *(int *)0 = 0;
395#endif
Willy Tarreau158fa752017-11-22 15:47:29 +0100396
397#ifndef DEBUG_UAF /* normal pool behaviour */
Willy Tarreauac421112015-10-28 15:09:29 +0100398 *POOL_LINK(pool, ptr) = (void *)pool->free_list;
Willy Tarreaue430e772014-12-23 14:13:16 +0100399 pool->free_list = (void *)ptr;
Willy Tarreau158fa752017-11-22 15:47:29 +0100400#else /* release the entry for real to detect use after free */
401 /* ensure we crash on double free or free of a const area*/
402 *(uint32_t *)ptr = 0xDEADADD4;
403 pool_free_area(ptr, pool->size + POOL_EXTRA);
404 pool->allocated--;
405#endif /* DEBUG_UAF */
Willy Tarreaue430e772014-12-23 14:13:16 +0100406 pool->used--;
Christopher Faulet2a944ee2017-11-07 10:42:54 +0100407 HA_SPIN_UNLOCK(POOL_LOCK, &pool->lock);
Willy Tarreaue430e772014-12-23 14:13:16 +0100408 }
409}
Willy Tarreauf161d0f2018-02-22 14:05:55 +0100410#endif /* CONFIG_HAP_LOCKLESS_POOLS */
Willy Tarreau2dd0d472006-06-29 17:53:05 +0200411#endif /* _COMMON_MEMORY_H */
Willy Tarreaubaaee002006-06-26 02:48:02 +0200412
413/*
414 * Local variables:
415 * c-indent-level: 8
416 * c-basic-offset: 8
417 * End:
418 */