blob: 83b6021bfa11a555e6126f54b0e8b342791cfa47 [file] [log] [blame]
Willy Tarreaubaaee002006-06-26 02:48:02 +02001/*
Willy Tarreau62405a22014-12-23 13:51:28 +01002 * include/common/memory.h
3 * Memory management definitions..
4 *
5 * Copyright (C) 2000-2014 Willy Tarreau - w@1wt.eu
6 *
7 * This library is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation, version 2.1
10 * exclusively.
11 *
12 * This library is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
16 *
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with this library; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20 */
Willy Tarreaubaaee002006-06-26 02:48:02 +020021
Willy Tarreau2dd0d472006-06-29 17:53:05 +020022#ifndef _COMMON_MEMORY_H
23#define _COMMON_MEMORY_H
Willy Tarreaubaaee002006-06-26 02:48:02 +020024
Willy Tarreau158fa752017-11-22 15:47:29 +010025#include <sys/mman.h>
26
Willy Tarreaubaaee002006-06-26 02:48:02 +020027#include <stdlib.h>
Willy Tarreaue430e772014-12-23 14:13:16 +010028#include <string.h>
David Carlier4ee76d02018-02-18 19:36:42 +000029#include <stdint.h>
Willy Tarreaubaaee002006-06-26 02:48:02 +020030
Willy Tarreau2dd0d472006-06-29 17:53:05 +020031#include <common/config.h>
Willy Tarreau50e608d2007-05-13 18:26:08 +020032#include <common/mini-clist.h>
Christopher Fauletb349e482017-08-29 09:52:38 +020033#include <common/hathreads.h>
Willy Tarreaubaaee002006-06-26 02:48:02 +020034
Willy Tarreaua84dcb82015-10-28 12:04:02 +010035#ifndef DEBUG_DONT_SHARE_POOLS
Willy Tarreau50e608d2007-05-13 18:26:08 +020036#define MEM_F_SHARED 0x1
Willy Tarreaua84dcb82015-10-28 12:04:02 +010037#else
38#define MEM_F_SHARED 0
39#endif
Willy Tarreau581bf812016-01-25 02:19:13 +010040#define MEM_F_EXACT 0x2
Willy Tarreau50e608d2007-05-13 18:26:08 +020041
Willy Tarreauac421112015-10-28 15:09:29 +010042/* reserve an extra void* at the end of a pool for linking */
43#ifdef DEBUG_MEMORY_POOLS
44#define POOL_EXTRA (sizeof(void *))
45#define POOL_LINK(pool, item) (void **)(((char *)item) + (pool->size))
46#else
47#define POOL_EXTRA (0)
48#define POOL_LINK(pool, item) ((void **)(item))
49#endif
50
Willy Tarreauf161d0f2018-02-22 14:05:55 +010051#ifdef CONFIG_HAP_LOCKLESS_POOLS
Olivier Houchardcf975d42018-01-24 18:38:31 +010052struct pool_free_list {
53 void **free_list;
54 uintptr_t seq;
55};
56#endif
57
Willy Tarreau50e608d2007-05-13 18:26:08 +020058struct pool_head {
Willy Tarreau1ca1b702017-11-26 10:50:36 +010059 void **free_list;
Willy Tarreauf161d0f2018-02-22 14:05:55 +010060#ifdef CONFIG_HAP_LOCKLESS_POOLS
Olivier Houchardcf975d42018-01-24 18:38:31 +010061 uintptr_t seq;
62#else
63 __decl_hathreads(HA_SPINLOCK_T lock); /* the spin lock */
64#endif
Willy Tarreau50e608d2007-05-13 18:26:08 +020065 unsigned int used; /* how many chunks are currently in use */
66 unsigned int allocated; /* how many chunks have been allocated */
67 unsigned int limit; /* hard limit on the number of chunks */
68 unsigned int minavail; /* how many chunks are expected to be used */
69 unsigned int size; /* chunk size */
70 unsigned int flags; /* MEM_F_* */
Willy Tarreau7dcd46d2007-05-14 00:16:13 +020071 unsigned int users; /* number of pools sharing this zone */
Willy Tarreau58102cf2015-10-28 16:24:21 +010072 unsigned int failed; /* failed allocations */
Olivier Houchardcf975d42018-01-24 18:38:31 +010073 struct list list; /* list of all known pools */
Willy Tarreau7dcd46d2007-05-14 00:16:13 +020074 char name[12]; /* name of the pool */
Willy Tarreau1ca1b702017-11-26 10:50:36 +010075} __attribute__((aligned(64)));
Willy Tarreau50e608d2007-05-13 18:26:08 +020076
Willy Tarreau067ac9f2015-10-08 14:12:13 +020077/* poison each newly allocated area with this byte if >= 0 */
78extern int mem_poison_byte;
Willy Tarreau50e608d2007-05-13 18:26:08 +020079
Willy Tarreaua885f6d2014-12-03 15:25:28 +010080/* Allocates new entries for pool <pool> until there are at least <avail> + 1
81 * available, then returns the last one for immediate use, so that at least
82 * <avail> are left available in the pool upon return. NULL is returned if the
83 * last entry could not be allocated. It's important to note that at least one
84 * allocation is always performed even if there are enough entries in the pool.
85 * A call to the garbage collector is performed at most once in case malloc()
86 * returns an error, before returning NULL.
Willy Tarreau50e608d2007-05-13 18:26:08 +020087 */
Christopher Fauletb349e482017-08-29 09:52:38 +020088void *__pool_refill_alloc(struct pool_head *pool, unsigned int avail);
Willy Tarreaua885f6d2014-12-03 15:25:28 +010089void *pool_refill_alloc(struct pool_head *pool, unsigned int avail);
Willy Tarreau50e608d2007-05-13 18:26:08 +020090
91/* Try to find an existing shared pool with the same characteristics and
92 * returns it, otherwise creates this one. NULL is returned if no memory
93 * is available for a new creation.
94 */
95struct pool_head *create_pool(char *name, unsigned int size, unsigned int flags);
96
97/* Dump statistics on pools usage.
98 */
Willy Tarreau12833bb2014-01-28 16:49:56 +010099void dump_pools_to_trash();
Willy Tarreau50e608d2007-05-13 18:26:08 +0200100void dump_pools(void);
Willy Tarreau58102cf2015-10-28 16:24:21 +0100101int pool_total_failures();
102unsigned long pool_total_allocated();
103unsigned long pool_total_used();
Willy Tarreau50e608d2007-05-13 18:26:08 +0200104
105/*
Willy Tarreaue6ce59d2007-05-13 19:38:49 +0200106 * This function frees whatever can be freed in pool <pool>.
107 */
Willy Tarreaubafbe012017-11-24 17:34:44 +0100108void pool_flush(struct pool_head *pool);
Willy Tarreaue6ce59d2007-05-13 19:38:49 +0200109
110/*
111 * This function frees whatever can be freed in all pools, but respecting
112 * the minimum thresholds imposed by owners.
Christopher Fauletb349e482017-08-29 09:52:38 +0200113 *
Willy Tarreaubafbe012017-11-24 17:34:44 +0100114 * <pool_ctx> is used when pool_gc is called to release resources to allocate
Christopher Fauletb349e482017-08-29 09:52:38 +0200115 * an element in __pool_refill_alloc. It is important because <pool_ctx> is
116 * already locked, so we need to skip the lock here.
Willy Tarreaue6ce59d2007-05-13 19:38:49 +0200117 */
Willy Tarreaubafbe012017-11-24 17:34:44 +0100118void pool_gc(struct pool_head *pool_ctx);
Willy Tarreaue6ce59d2007-05-13 19:38:49 +0200119
120/*
121 * This function destroys a pull by freeing it completely.
122 * This should be called only under extreme circumstances.
123 */
Willy Tarreaubafbe012017-11-24 17:34:44 +0100124void *pool_destroy(struct pool_head *pool);
Willy Tarreaue6ce59d2007-05-13 19:38:49 +0200125
Willy Tarreauf161d0f2018-02-22 14:05:55 +0100126#ifdef CONFIG_HAP_LOCKLESS_POOLS
Olivier Houchardcf975d42018-01-24 18:38:31 +0100127/*
128 * Returns a pointer to type <type> taken from the pool <pool_type> if
129 * available, otherwise returns NULL. No malloc() is attempted, and poisonning
130 * is never performed. The purpose is to get the fastest possible allocation.
131 */
132static inline void *__pool_get_first(struct pool_head *pool)
133{
134 struct pool_free_list cmp, new;
135
136 cmp.seq = pool->seq;
137 __ha_barrier_load();
138
139 cmp.free_list = pool->free_list;
140 do {
141 if (cmp.free_list == NULL)
142 return NULL;
143 new.seq = cmp.seq + 1;
144 __ha_barrier_load();
145 new.free_list = *POOL_LINK(pool, cmp.free_list);
146 } while (__ha_cas_dw((void *)&pool->free_list, (void *)&cmp, (void *)&new) == 0);
Tim Duesterhus05f6a432018-02-20 00:49:46 +0100147
Olivier Houchardcf975d42018-01-24 18:38:31 +0100148 HA_ATOMIC_ADD(&pool->used, 1);
149#ifdef DEBUG_MEMORY_POOLS
150 /* keep track of where the element was allocated from */
151 *POOL_LINK(pool, cmp.free_list) = (void *)pool;
152#endif
153 return cmp.free_list;
154}
155
156static inline void *pool_get_first(struct pool_head *pool)
157{
158 void *ret;
159
160 ret = __pool_get_first(pool);
161 return ret;
162}
163/*
164 * Returns a pointer to type <type> taken from the pool <pool_type> or
165 * dynamically allocated. In the first case, <pool_type> is updated to point to
166 * the next element in the list. No memory poisonning is ever performed on the
167 * returned area.
168 */
169static inline void *pool_alloc_dirty(struct pool_head *pool)
170{
171 void *p;
172
173 if ((p = __pool_get_first(pool)) == NULL)
174 p = __pool_refill_alloc(pool, 0);
175 return p;
176}
177
Willy Tarreaue6ce59d2007-05-13 19:38:49 +0200178/*
Olivier Houchardcf975d42018-01-24 18:38:31 +0100179 * Returns a pointer to type <type> taken from the pool <pool_type> or
180 * dynamically allocated. In the first case, <pool_type> is updated to point to
181 * the next element in the list. Memory poisonning is performed if enabled.
182 */
183static inline void *pool_alloc(struct pool_head *pool)
184{
185 void *p;
186
187 p = pool_alloc_dirty(pool);
188#ifdef DEBUG_MEMORY_POOLS
189 if (p) {
190 /* keep track of where the element was allocated from */
191 *POOL_LINK(pool, p) = (void *)pool;
192 }
193#endif
194 if (p && mem_poison_byte >= 0) {
195 memset(p, mem_poison_byte, pool->size);
196 }
197
198 return p;
199}
200
201/*
202 * Puts a memory area back to the corresponding pool.
203 * Items are chained directly through a pointer that
204 * is written in the beginning of the memory area, so
205 * there's no need for any carrier cell. This implies
206 * that each memory area is at least as big as one
207 * pointer. Just like with the libc's free(), nothing
208 * is done if <ptr> is NULL.
209 */
210static inline void pool_free(struct pool_head *pool, void *ptr)
211{
212 if (likely(ptr != NULL)) {
213 void *free_list;
214#ifdef DEBUG_MEMORY_POOLS
215 /* we'll get late corruption if we refill to the wrong pool or double-free */
216 if (*POOL_LINK(pool, ptr) != (void *)pool)
217 *(volatile int *)0 = 0;
218#endif
219 free_list = pool->free_list;
220 do {
221 *POOL_LINK(pool, ptr) = (void *)free_list;
222 __ha_barrier_store();
223 } while (!HA_ATOMIC_CAS(&pool->free_list, (void *)&free_list, ptr));
Tim Duesterhus05f6a432018-02-20 00:49:46 +0100224
Olivier Houchardcf975d42018-01-24 18:38:31 +0100225 HA_ATOMIC_SUB(&pool->used, 1);
226 }
227}
228
Willy Tarreauf161d0f2018-02-22 14:05:55 +0100229#else /* CONFIG_HAP_LOCKLESS_POOLS */
Olivier Houchardcf975d42018-01-24 18:38:31 +0100230/*
Willy Tarreau02622412014-12-08 16:35:23 +0100231 * Returns a pointer to type <type> taken from the pool <pool_type> if
232 * available, otherwise returns NULL. No malloc() is attempted, and poisonning
233 * is never performed. The purpose is to get the fastest possible allocation.
Willy Tarreau50e608d2007-05-13 18:26:08 +0200234 */
Christopher Fauletb349e482017-08-29 09:52:38 +0200235static inline void *__pool_get_first(struct pool_head *pool)
Willy Tarreaue430e772014-12-23 14:13:16 +0100236{
237 void *p;
238
Willy Tarreau02622412014-12-08 16:35:23 +0100239 if ((p = pool->free_list) != NULL) {
Willy Tarreauac421112015-10-28 15:09:29 +0100240 pool->free_list = *POOL_LINK(pool, p);
Willy Tarreaue430e772014-12-23 14:13:16 +0100241 pool->used++;
Willy Tarreaude30a682015-10-28 15:23:51 +0100242#ifdef DEBUG_MEMORY_POOLS
243 /* keep track of where the element was allocated from */
244 *POOL_LINK(pool, p) = (void *)pool;
245#endif
Willy Tarreaue430e772014-12-23 14:13:16 +0100246 }
247 return p;
248}
Willy Tarreau50e608d2007-05-13 18:26:08 +0200249
Christopher Fauletb349e482017-08-29 09:52:38 +0200250static inline void *pool_get_first(struct pool_head *pool)
251{
252 void *ret;
253
Christopher Faulet2a944ee2017-11-07 10:42:54 +0100254 HA_SPIN_LOCK(POOL_LOCK, &pool->lock);
Christopher Fauletb349e482017-08-29 09:52:38 +0200255 ret = __pool_get_first(pool);
Christopher Faulet2a944ee2017-11-07 10:42:54 +0100256 HA_SPIN_UNLOCK(POOL_LOCK, &pool->lock);
Christopher Fauletb349e482017-08-29 09:52:38 +0200257 return ret;
258}
Willy Tarreau50e608d2007-05-13 18:26:08 +0200259/*
Willy Tarreau02622412014-12-08 16:35:23 +0100260 * Returns a pointer to type <type> taken from the pool <pool_type> or
261 * dynamically allocated. In the first case, <pool_type> is updated to point to
262 * the next element in the list. No memory poisonning is ever performed on the
263 * returned area.
264 */
265static inline void *pool_alloc_dirty(struct pool_head *pool)
266{
267 void *p;
268
Christopher Faulet2a944ee2017-11-07 10:42:54 +0100269 HA_SPIN_LOCK(POOL_LOCK, &pool->lock);
Christopher Fauletb349e482017-08-29 09:52:38 +0200270 if ((p = __pool_get_first(pool)) == NULL)
271 p = __pool_refill_alloc(pool, 0);
Christopher Faulet2a944ee2017-11-07 10:42:54 +0100272 HA_SPIN_UNLOCK(POOL_LOCK, &pool->lock);
Willy Tarreau02622412014-12-08 16:35:23 +0100273 return p;
274}
275
Willy Tarreau158fa752017-11-22 15:47:29 +0100276#ifndef DEBUG_UAF /* normal allocator */
277
Willy Tarreauf13322e2017-11-22 10:50:54 +0100278/* allocates an area of size <size> and returns it. The semantics are similar
279 * to those of malloc().
280 */
281static inline void *pool_alloc_area(size_t size)
282{
283 return malloc(size);
284}
285
286/* frees an area <area> of size <size> allocated by pool_alloc_area(). The
287 * semantics are identical to free() except that the size is specified and
288 * may be ignored.
289 */
290static inline void pool_free_area(void *area, size_t __maybe_unused size)
291{
292 free(area);
293}
294
Willy Tarreau158fa752017-11-22 15:47:29 +0100295#else /* use-after-free detector */
296
297/* allocates an area of size <size> and returns it. The semantics are similar
298 * to those of malloc(). However the allocation is rounded up to 4kB so that a
299 * full page is allocated. This ensures the object can be freed alone so that
300 * future dereferences are easily detected. The returned object is always
Willy Tarreau364d7452018-02-22 14:14:23 +0100301 * 16-bytes aligned to avoid issues with unaligned structure objects. In case
302 * some padding is added, the area's start address is copied at the end of the
303 * padding to help detect underflows.
Willy Tarreau158fa752017-11-22 15:47:29 +0100304 */
305static inline void *pool_alloc_area(size_t size)
306{
307 size_t pad = (4096 - size) & 0xFF0;
Willy Tarreau5a9cce42018-02-22 11:39:23 +0100308 void *ret;
Willy Tarreau158fa752017-11-22 15:47:29 +0100309
Willy Tarreau5a9cce42018-02-22 11:39:23 +0100310 ret = mmap(NULL, (size + 4095) & -4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, 0, 0);
Willy Tarreau364d7452018-02-22 14:14:23 +0100311 if (ret == MAP_FAILED)
312 return NULL;
313 if (pad >= sizeof(void *))
314 *(void **)(ret + pad - sizeof(void *)) = ret + pad;
315 return ret + pad;
Willy Tarreau158fa752017-11-22 15:47:29 +0100316}
317
318/* frees an area <area> of size <size> allocated by pool_alloc_area(). The
319 * semantics are identical to free() except that the size must absolutely match
Willy Tarreau364d7452018-02-22 14:14:23 +0100320 * the one passed to pool_alloc_area(). In case some padding is added, the
321 * area's start address is compared to the one at the end of the padding, and
322 * a segfault is triggered if they don't match, indicating an underflow.
Willy Tarreau158fa752017-11-22 15:47:29 +0100323 */
324static inline void pool_free_area(void *area, size_t size)
325{
326 size_t pad = (4096 - size) & 0xFF0;
327
Willy Tarreau364d7452018-02-22 14:14:23 +0100328 if (pad >= sizeof(void *) && *(void **)(area - sizeof(void *)) != area)
329 *(volatile int *)0 = 0;
330
Willy Tarreau158fa752017-11-22 15:47:29 +0100331 munmap(area - pad, (size + 4095) & -4096);
332}
333
334#endif /* DEBUG_UAF */
335
Willy Tarreau02622412014-12-08 16:35:23 +0100336/*
337 * Returns a pointer to type <type> taken from the pool <pool_type> or
338 * dynamically allocated. In the first case, <pool_type> is updated to point to
339 * the next element in the list. Memory poisonning is performed if enabled.
340 */
Willy Tarreaubafbe012017-11-24 17:34:44 +0100341static inline void *pool_alloc(struct pool_head *pool)
Willy Tarreau02622412014-12-08 16:35:23 +0100342{
343 void *p;
344
345 p = pool_alloc_dirty(pool);
Willy Tarreaude30a682015-10-28 15:23:51 +0100346#ifdef DEBUG_MEMORY_POOLS
347 if (p) {
Christopher Faulet2a944ee2017-11-07 10:42:54 +0100348 HA_SPIN_LOCK(POOL_LOCK, &pool->lock);
Willy Tarreaude30a682015-10-28 15:23:51 +0100349 /* keep track of where the element was allocated from */
350 *POOL_LINK(pool, p) = (void *)pool;
Christopher Faulet2a944ee2017-11-07 10:42:54 +0100351 HA_SPIN_UNLOCK(POOL_LOCK, &pool->lock);
Willy Tarreaude30a682015-10-28 15:23:51 +0100352 }
353#endif
354 if (p && mem_poison_byte >= 0) {
Willy Tarreau02622412014-12-08 16:35:23 +0100355 memset(p, mem_poison_byte, pool->size);
Willy Tarreaude30a682015-10-28 15:23:51 +0100356 }
357
Willy Tarreau02622412014-12-08 16:35:23 +0100358 return p;
359}
360
361/*
Willy Tarreau50e608d2007-05-13 18:26:08 +0200362 * Puts a memory area back to the corresponding pool.
363 * Items are chained directly through a pointer that
364 * is written in the beginning of the memory area, so
365 * there's no need for any carrier cell. This implies
366 * that each memory area is at least as big as one
Willy Tarreau48d63db2008-08-03 17:41:33 +0200367 * pointer. Just like with the libc's free(), nothing
368 * is done if <ptr> is NULL.
Willy Tarreau50e608d2007-05-13 18:26:08 +0200369 */
Willy Tarreaubafbe012017-11-24 17:34:44 +0100370static inline void pool_free(struct pool_head *pool, void *ptr)
Willy Tarreaue430e772014-12-23 14:13:16 +0100371{
372 if (likely(ptr != NULL)) {
Christopher Faulet2a944ee2017-11-07 10:42:54 +0100373 HA_SPIN_LOCK(POOL_LOCK, &pool->lock);
Willy Tarreaude30a682015-10-28 15:23:51 +0100374#ifdef DEBUG_MEMORY_POOLS
375 /* we'll get late corruption if we refill to the wrong pool or double-free */
376 if (*POOL_LINK(pool, ptr) != (void *)pool)
377 *(int *)0 = 0;
378#endif
Willy Tarreau158fa752017-11-22 15:47:29 +0100379
380#ifndef DEBUG_UAF /* normal pool behaviour */
Willy Tarreauac421112015-10-28 15:09:29 +0100381 *POOL_LINK(pool, ptr) = (void *)pool->free_list;
Willy Tarreaue430e772014-12-23 14:13:16 +0100382 pool->free_list = (void *)ptr;
Willy Tarreau158fa752017-11-22 15:47:29 +0100383#else /* release the entry for real to detect use after free */
384 /* ensure we crash on double free or free of a const area*/
385 *(uint32_t *)ptr = 0xDEADADD4;
386 pool_free_area(ptr, pool->size + POOL_EXTRA);
387 pool->allocated--;
388#endif /* DEBUG_UAF */
Willy Tarreaue430e772014-12-23 14:13:16 +0100389 pool->used--;
Christopher Faulet2a944ee2017-11-07 10:42:54 +0100390 HA_SPIN_UNLOCK(POOL_LOCK, &pool->lock);
Willy Tarreaue430e772014-12-23 14:13:16 +0100391 }
392}
Willy Tarreauf161d0f2018-02-22 14:05:55 +0100393#endif /* CONFIG_HAP_LOCKLESS_POOLS */
Willy Tarreau2dd0d472006-06-29 17:53:05 +0200394#endif /* _COMMON_MEMORY_H */
Willy Tarreaubaaee002006-06-26 02:48:02 +0200395
396/*
397 * Local variables:
398 * c-indent-level: 8
399 * c-basic-offset: 8
400 * End:
401 */