Willy Tarreau | c7e4238 | 2012-08-24 19:22:53 +0200 | [diff] [blame] | 1 | /* |
| 2 | * Buffer management functions. |
| 3 | * |
| 4 | * Copyright 2000-2012 Willy Tarreau <w@1wt.eu> |
| 5 | * |
| 6 | * This program is free software; you can redistribute it and/or |
| 7 | * modify it under the terms of the GNU General Public License |
| 8 | * as published by the Free Software Foundation; either version |
| 9 | * 2 of the License, or (at your option) any later version. |
| 10 | * |
| 11 | */ |
| 12 | |
William Lallemand | be0efd8 | 2012-11-22 18:01:40 +0100 | [diff] [blame] | 13 | #include <ctype.h> |
Willy Tarreau | c7e4238 | 2012-08-24 19:22:53 +0200 | [diff] [blame] | 14 | #include <stdio.h> |
| 15 | #include <string.h> |
| 16 | |
| 17 | #include <common/config.h> |
| 18 | #include <common/buffer.h> |
Willy Tarreau | 9b28e03 | 2012-10-12 23:49:43 +0200 | [diff] [blame] | 19 | #include <common/memory.h> |
Willy Tarreau | c7e4238 | 2012-08-24 19:22:53 +0200 | [diff] [blame] | 20 | |
| 21 | #include <types/global.h> |
| 22 | |
Willy Tarreau | bafbe01 | 2017-11-24 17:34:44 +0100 | [diff] [blame] | 23 | struct pool_head *pool_head_buffer; |
Willy Tarreau | 9b28e03 | 2012-10-12 23:49:43 +0200 | [diff] [blame] | 24 | |
Willy Tarreau | f2f7d6b | 2014-11-24 11:55:08 +0100 | [diff] [blame] | 25 | /* These buffers are used to always have a valid pointer to an empty buffer in |
| 26 | * channels. The first buffer is set once a buffer is empty. The second one is |
| 27 | * set when a buffer is desired but no more are available. It helps knowing |
| 28 | * what channel wants a buffer. They can reliably be exchanged, the split |
| 29 | * between the two is only an optimization. |
Willy Tarreau | 2a4b543 | 2014-11-24 11:39:34 +0100 | [diff] [blame] | 30 | */ |
| 31 | struct buffer buf_empty = { .p = buf_empty.data }; |
Willy Tarreau | f2f7d6b | 2014-11-24 11:55:08 +0100 | [diff] [blame] | 32 | struct buffer buf_wanted = { .p = buf_wanted.data }; |
Willy Tarreau | 9b28e03 | 2012-10-12 23:49:43 +0200 | [diff] [blame] | 33 | |
Christopher Faulet | a73e59b | 2016-12-09 17:30:18 +0100 | [diff] [blame] | 34 | /* list of objects waiting for at least one buffer */ |
| 35 | struct list buffer_wq = LIST_HEAD_INIT(buffer_wq); |
Christopher Faulet | 9dcf9b6 | 2017-11-13 10:34:01 +0100 | [diff] [blame] | 36 | __decl_hathreads(HA_SPINLOCK_T buffer_wq_lock); |
Christopher Faulet | a73e59b | 2016-12-09 17:30:18 +0100 | [diff] [blame] | 37 | |
Christopher Faulet | ad405f1 | 2017-08-29 15:30:11 +0200 | [diff] [blame] | 38 | /* this buffer is always the same size as standard buffers and is used for |
| 39 | * swapping data inside a buffer. |
| 40 | */ |
Christopher Faulet | ba39f23 | 2017-08-29 14:43:04 +0200 | [diff] [blame] | 41 | static THREAD_LOCAL char *swap_buffer = NULL; |
| 42 | |
| 43 | static int init_buffer_per_thread() |
| 44 | { |
| 45 | swap_buffer = calloc(1, global.tune.bufsize); |
| 46 | if (swap_buffer == NULL) |
| 47 | return 0; |
| 48 | return 1; |
| 49 | } |
| 50 | |
| 51 | static void deinit_buffer_per_thread() |
| 52 | { |
| 53 | free(swap_buffer); swap_buffer = NULL; |
| 54 | } |
Christopher Faulet | ad405f1 | 2017-08-29 15:30:11 +0200 | [diff] [blame] | 55 | |
Willy Tarreau | 9b28e03 | 2012-10-12 23:49:43 +0200 | [diff] [blame] | 56 | /* perform minimal intializations, report 0 in case of error, 1 if OK. */ |
| 57 | int init_buffer() |
| 58 | { |
Willy Tarreau | a24adf0 | 2014-11-27 01:11:56 +0100 | [diff] [blame] | 59 | void *buffer; |
| 60 | |
Willy Tarreau | bafbe01 | 2017-11-24 17:34:44 +0100 | [diff] [blame] | 61 | pool_head_buffer = create_pool("buffer", sizeof (struct buffer) + global.tune.bufsize, MEM_F_SHARED|MEM_F_EXACT); |
| 62 | if (!pool_head_buffer) |
Willy Tarreau | a24adf0 | 2014-11-27 01:11:56 +0100 | [diff] [blame] | 63 | return 0; |
| 64 | |
| 65 | /* The reserved buffer is what we leave behind us. Thus we always need |
| 66 | * at least one extra buffer in minavail otherwise we'll end up waking |
| 67 | * up tasks with no memory available, causing a lot of useless wakeups. |
| 68 | * That means that we always want to have at least 3 buffers available |
| 69 | * (2 for current session, one for next session that might be needed to |
| 70 | * release a server connection). |
| 71 | */ |
Willy Tarreau | bafbe01 | 2017-11-24 17:34:44 +0100 | [diff] [blame] | 72 | pool_head_buffer->minavail = MAX(global.tune.reserved_bufs, 3); |
Willy Tarreau | 33cb065 | 2014-12-23 22:52:37 +0100 | [diff] [blame] | 73 | if (global.tune.buf_limit) |
Willy Tarreau | bafbe01 | 2017-11-24 17:34:44 +0100 | [diff] [blame] | 74 | pool_head_buffer->limit = global.tune.buf_limit; |
Willy Tarreau | a24adf0 | 2014-11-27 01:11:56 +0100 | [diff] [blame] | 75 | |
Christopher Faulet | 2a944ee | 2017-11-07 10:42:54 +0100 | [diff] [blame] | 76 | HA_SPIN_INIT(&buffer_wq_lock); |
Emeric Brun | a1dd243 | 2017-06-21 15:42:52 +0200 | [diff] [blame] | 77 | |
Willy Tarreau | bafbe01 | 2017-11-24 17:34:44 +0100 | [diff] [blame] | 78 | buffer = pool_refill_alloc(pool_head_buffer, pool_head_buffer->minavail - 1); |
Willy Tarreau | a24adf0 | 2014-11-27 01:11:56 +0100 | [diff] [blame] | 79 | if (!buffer) |
| 80 | return 0; |
| 81 | |
Willy Tarreau | bafbe01 | 2017-11-24 17:34:44 +0100 | [diff] [blame] | 82 | pool_free(pool_head_buffer, buffer); |
Christopher Faulet | ad405f1 | 2017-08-29 15:30:11 +0200 | [diff] [blame] | 83 | |
Christopher Faulet | cd7879a | 2017-10-27 13:53:47 +0200 | [diff] [blame] | 84 | hap_register_per_thread_init(init_buffer_per_thread); |
| 85 | hap_register_per_thread_deinit(deinit_buffer_per_thread); |
Willy Tarreau | a24adf0 | 2014-11-27 01:11:56 +0100 | [diff] [blame] | 86 | return 1; |
Willy Tarreau | 9b28e03 | 2012-10-12 23:49:43 +0200 | [diff] [blame] | 87 | } |
| 88 | |
Christopher Faulet | ad405f1 | 2017-08-29 15:30:11 +0200 | [diff] [blame] | 89 | void deinit_buffer() |
| 90 | { |
Willy Tarreau | bafbe01 | 2017-11-24 17:34:44 +0100 | [diff] [blame] | 91 | pool_destroy(pool_head_buffer); |
Christopher Faulet | ad405f1 | 2017-08-29 15:30:11 +0200 | [diff] [blame] | 92 | } |
| 93 | |
Willy Tarreau | af81935 | 2012-08-27 22:08:00 +0200 | [diff] [blame] | 94 | /* This function writes the string <str> at position <pos> which must be in |
| 95 | * buffer <b>, and moves <end> just after the end of <str>. <b>'s parameters |
| 96 | * <l> and <r> are updated to be valid after the shift. The shift value |
| 97 | * (positive or negative) is returned. If there's no space left, the move is |
| 98 | * not done. The function does not adjust ->o because it does not make sense to |
| 99 | * use it on data scheduled to be sent. For the same reason, it does not make |
| 100 | * sense to call this function on unparsed data, so <orig> is not updated. The |
| 101 | * string length is taken from parameter <len>. If <len> is null, the <str> |
| 102 | * pointer is allowed to be null. |
| 103 | */ |
| 104 | int buffer_replace2(struct buffer *b, char *pos, char *end, const char *str, int len) |
| 105 | { |
| 106 | int delta; |
| 107 | |
| 108 | delta = len - (end - pos); |
| 109 | |
Thierry FOURNIER | fdda677 | 2015-03-10 01:55:01 +0100 | [diff] [blame] | 110 | if (bi_end(b) + delta > b->data + b->size) |
Willy Tarreau | af81935 | 2012-08-27 22:08:00 +0200 | [diff] [blame] | 111 | return 0; /* no space left */ |
| 112 | |
| 113 | if (buffer_not_empty(b) && |
| 114 | bi_end(b) + delta > bo_ptr(b) && |
| 115 | bo_ptr(b) >= bi_end(b)) |
| 116 | return 0; /* no space left before wrapping data */ |
| 117 | |
| 118 | /* first, protect the end of the buffer */ |
| 119 | memmove(end + delta, end, bi_end(b) - end); |
| 120 | |
| 121 | /* now, copy str over pos */ |
| 122 | if (len) |
| 123 | memcpy(pos, str, len); |
| 124 | |
| 125 | b->i += delta; |
| 126 | |
Willy Tarreau | 5fb3803 | 2012-12-16 19:39:09 +0100 | [diff] [blame] | 127 | if (buffer_empty(b)) |
Willy Tarreau | af81935 | 2012-08-27 22:08:00 +0200 | [diff] [blame] | 128 | b->p = b->data; |
| 129 | |
| 130 | return delta; |
| 131 | } |
| 132 | |
| 133 | /* |
| 134 | * Inserts <str> followed by "\r\n" at position <pos> in buffer <b>. The <len> |
| 135 | * argument informs about the length of string <str> so that we don't have to |
| 136 | * measure it. It does not include the "\r\n". If <str> is NULL, then the buffer |
| 137 | * is only opened for len+2 bytes but nothing is copied in. It may be useful in |
| 138 | * some circumstances. The send limit is *not* adjusted. Same comments as above |
| 139 | * for the valid use cases. |
| 140 | * |
| 141 | * The number of bytes added is returned on success. 0 is returned on failure. |
| 142 | */ |
| 143 | int buffer_insert_line2(struct buffer *b, char *pos, const char *str, int len) |
| 144 | { |
| 145 | int delta; |
| 146 | |
| 147 | delta = len + 2; |
| 148 | |
| 149 | if (bi_end(b) + delta >= b->data + b->size) |
| 150 | return 0; /* no space left */ |
| 151 | |
Godbach | a6547c1 | 2014-10-31 13:16:37 +0800 | [diff] [blame] | 152 | if (buffer_not_empty(b) && |
| 153 | bi_end(b) + delta > bo_ptr(b) && |
| 154 | bo_ptr(b) >= bi_end(b)) |
| 155 | return 0; /* no space left before wrapping data */ |
| 156 | |
Willy Tarreau | af81935 | 2012-08-27 22:08:00 +0200 | [diff] [blame] | 157 | /* first, protect the end of the buffer */ |
| 158 | memmove(pos + delta, pos, bi_end(b) - pos); |
| 159 | |
| 160 | /* now, copy str over pos */ |
| 161 | if (len && str) { |
| 162 | memcpy(pos, str, len); |
| 163 | pos[len] = '\r'; |
| 164 | pos[len + 1] = '\n'; |
| 165 | } |
| 166 | |
| 167 | b->i += delta; |
| 168 | return delta; |
| 169 | } |
| 170 | |
Willy Tarreau | 27187ab | 2015-07-02 12:50:23 +0200 | [diff] [blame] | 171 | /* This function realigns a possibly wrapping buffer so that the input part is |
| 172 | * contiguous and starts at the beginning of the buffer and the output part |
| 173 | * ends at the end of the buffer. This provides the best conditions since it |
| 174 | * allows the largest inputs to be processed at once and ensures that once the |
| 175 | * output data leaves, the whole buffer is available at once. |
Willy Tarreau | c7e4238 | 2012-08-24 19:22:53 +0200 | [diff] [blame] | 176 | */ |
| 177 | void buffer_slow_realign(struct buffer *buf) |
| 178 | { |
Willy Tarreau | 27187ab | 2015-07-02 12:50:23 +0200 | [diff] [blame] | 179 | int block1 = buf->o; |
| 180 | int block2 = 0; |
| 181 | |
| 182 | /* process output data in two steps to cover wrapping */ |
| 183 | if (block1 > buf->p - buf->data) { |
| 184 | block2 = buf->p - buf->data; |
| 185 | block1 -= block2; |
| 186 | } |
| 187 | memcpy(swap_buffer + buf->size - buf->o, bo_ptr(buf), block1); |
| 188 | memcpy(swap_buffer + buf->size - block2, buf->data, block2); |
| 189 | |
| 190 | /* process input data in two steps to cover wrapping */ |
| 191 | block1 = buf->i; |
| 192 | block2 = 0; |
| 193 | |
| 194 | if (block1 > buf->data + buf->size - buf->p) { |
| 195 | block1 = buf->data + buf->size - buf->p; |
| 196 | block2 = buf->i - block1; |
Willy Tarreau | c7e4238 | 2012-08-24 19:22:53 +0200 | [diff] [blame] | 197 | } |
Willy Tarreau | 27187ab | 2015-07-02 12:50:23 +0200 | [diff] [blame] | 198 | memcpy(swap_buffer, bi_ptr(buf), block1); |
| 199 | memcpy(swap_buffer + block1, buf->data, block2); |
| 200 | |
| 201 | /* reinject changes into the buffer */ |
| 202 | memcpy(buf->data, swap_buffer, buf->i); |
| 203 | memcpy(buf->data + buf->size - buf->o, swap_buffer + buf->size - buf->o, buf->o); |
Willy Tarreau | c7e4238 | 2012-08-24 19:22:53 +0200 | [diff] [blame] | 204 | |
| 205 | buf->p = buf->data; |
| 206 | } |
| 207 | |
Willy Tarreau | c7e4238 | 2012-08-24 19:22:53 +0200 | [diff] [blame] | 208 | /* |
| 209 | * Dumps part or all of a buffer. |
| 210 | */ |
| 211 | void buffer_dump(FILE *o, struct buffer *b, int from, int to) |
| 212 | { |
| 213 | fprintf(o, "Dumping buffer %p\n", b); |
William Lallemand | be0efd8 | 2012-11-22 18:01:40 +0100 | [diff] [blame] | 214 | fprintf(o, " data=%p o=%d i=%d p=%p\n" |
| 215 | " relative: p=0x%04x\n", |
| 216 | b->data, b->o, b->i, b->p, (unsigned int)(b->p - b->data)); |
Willy Tarreau | c7e4238 | 2012-08-24 19:22:53 +0200 | [diff] [blame] | 217 | |
| 218 | fprintf(o, "Dumping contents from byte %d to byte %d\n", from, to); |
William Lallemand | be0efd8 | 2012-11-22 18:01:40 +0100 | [diff] [blame] | 219 | fprintf(o, " 0 1 2 3 4 5 6 7 8 9 a b c d e f\n"); |
| 220 | /* dump hexa */ |
| 221 | while (from < to) { |
| 222 | int i; |
| 223 | |
| 224 | fprintf(o, " %04x: ", from); |
| 225 | for (i = 0; ((from + i) < to) && (i < 16) ; i++) { |
| 226 | fprintf(o, "%02x ", (unsigned char)b->data[from + i]); |
| 227 | if (((from + i) & 15) == 7) |
| 228 | fprintf(o, "- "); |
| 229 | } |
Godbach | c08057c | 2013-11-14 10:15:20 +0800 | [diff] [blame] | 230 | if (to - from < 16) { |
Godbach | c3916a7 | 2013-11-21 10:21:22 +0800 | [diff] [blame] | 231 | int j = 0; |
| 232 | |
Godbach | c08057c | 2013-11-14 10:15:20 +0800 | [diff] [blame] | 233 | for (j = 0; j < from + 16 - to; j++) |
| 234 | fprintf(o, " "); |
Godbach | c3916a7 | 2013-11-21 10:21:22 +0800 | [diff] [blame] | 235 | if (j > 8) |
| 236 | fprintf(o, " "); |
Godbach | c08057c | 2013-11-14 10:15:20 +0800 | [diff] [blame] | 237 | } |
William Lallemand | be0efd8 | 2012-11-22 18:01:40 +0100 | [diff] [blame] | 238 | fprintf(o, " "); |
| 239 | for (i = 0; (from + i < to) && (i < 16) ; i++) { |
Willy Tarreau | 95898ac | 2012-11-26 00:57:40 +0100 | [diff] [blame] | 240 | fprintf(o, "%c", isprint((int)b->data[from + i]) ? b->data[from + i] : '.') ; |
William Lallemand | be0efd8 | 2012-11-22 18:01:40 +0100 | [diff] [blame] | 241 | if ((((from + i) & 15) == 15) && ((from + i) != to-1)) |
| 242 | fprintf(o, "\n"); |
| 243 | } |
| 244 | from += i; |
Willy Tarreau | c7e4238 | 2012-08-24 19:22:53 +0200 | [diff] [blame] | 245 | } |
| 246 | fprintf(o, "\n--\n"); |
William Lallemand | be0efd8 | 2012-11-22 18:01:40 +0100 | [diff] [blame] | 247 | fflush(o); |
Willy Tarreau | c7e4238 | 2012-08-24 19:22:53 +0200 | [diff] [blame] | 248 | } |
| 249 | |
Christopher Faulet | a73e59b | 2016-12-09 17:30:18 +0100 | [diff] [blame] | 250 | void __offer_buffer(void *from, unsigned int threshold) |
| 251 | { |
| 252 | struct buffer_wait *wait, *bak; |
| 253 | int avail; |
| 254 | |
| 255 | /* For now, we consider that all objects need 1 buffer, so we can stop |
| 256 | * waking up them once we have enough of them to eat all the available |
| 257 | * buffers. Note that we don't really know if they are streams or just |
| 258 | * other tasks, but that's a rough estimate. Similarly, for each cached |
| 259 | * event we'll need 1 buffer. If no buffer is currently used, always |
| 260 | * wake up the number of tasks we can offer a buffer based on what is |
| 261 | * allocated, and in any case at least one task per two reserved |
| 262 | * buffers. |
| 263 | */ |
Willy Tarreau | bafbe01 | 2017-11-24 17:34:44 +0100 | [diff] [blame] | 264 | avail = pool_head_buffer->allocated - pool_head_buffer->used - global.tune.reserved_bufs / 2; |
Christopher Faulet | a73e59b | 2016-12-09 17:30:18 +0100 | [diff] [blame] | 265 | |
| 266 | list_for_each_entry_safe(wait, bak, &buffer_wq, list) { |
| 267 | if (avail <= threshold) |
| 268 | break; |
| 269 | |
| 270 | if (wait->target == from || !wait->wakeup_cb(wait->target)) |
| 271 | continue; |
| 272 | |
| 273 | LIST_DEL(&wait->list); |
| 274 | LIST_INIT(&wait->list); |
| 275 | |
| 276 | avail--; |
| 277 | } |
| 278 | } |
Willy Tarreau | c7e4238 | 2012-08-24 19:22:53 +0200 | [diff] [blame] | 279 | |
| 280 | /* |
| 281 | * Local variables: |
| 282 | * c-indent-level: 8 |
| 283 | * c-basic-offset: 8 |
| 284 | * End: |
| 285 | */ |