blob: 68b4319c5207ea9f639c93c2eb0f6293b7d2a130 [file] [log] [blame]
Willy Tarreaubaaee002006-06-26 02:48:02 +02001/*
Willy Tarreau7c3c5412009-12-13 15:53:05 +01002 * include/types/buffers.h
3 * Buffer management definitions, macros and inline functions.
4 *
Willy Tarreauace495e2010-05-31 11:27:58 +02005 * Copyright (C) 2000-2010 Willy Tarreau - w@1wt.eu
Willy Tarreau7c3c5412009-12-13 15:53:05 +01006 *
7 * This library is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation, version 2.1
10 * exclusively.
11 *
12 * This library is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
16 *
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with this library; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20 */
Willy Tarreaubaaee002006-06-26 02:48:02 +020021
22#ifndef _TYPES_BUFFERS_H
23#define _TYPES_BUFFERS_H
24
Willy Tarreaue3ba5f02006-06-29 18:54:54 +020025#include <common/config.h>
Willy Tarreau2dd0d472006-06-29 17:53:05 +020026#include <common/memory.h>
Willy Tarreau0a5d5dd2008-11-23 19:31:35 +010027#include <types/stream_interface.h>
Willy Tarreaubaaee002006-06-26 02:48:02 +020028
Willy Tarreau54469402006-07-29 16:59:06 +020029/* The BF_* macros designate Buffer Flags, which may be ORed in the bit field
Willy Tarreau7421efb2012-07-02 15:11:27 +020030 * member 'flags' in struct channel. Here we have several types of flags :
Willy Tarreau3da77c52008-08-29 09:58:42 +020031 *
32 * - pure status flags, reported by the lower layer, which must be cleared
33 * before doing further I/O :
34 * BF_*_NULL, BF_*_PARTIAL
35 *
36 * - pure status flags, reported by mid-layer, which must also be cleared
37 * before doing further I/O :
38 * BF_*_TIMEOUT, BF_*_ERROR
39 *
40 * - read-only indicators reported by lower levels :
41 * BF_STREAMER, BF_STREAMER_FAST
42 *
43 * - write-once status flags reported by the mid-level : BF_SHUTR, BF_SHUTW
44 *
45 * - persistent control flags managed only by higher level :
46 * BF_SHUT*_NOW, BF_*_ENA, BF_HIJACK
47 *
48 * The flags have been arranged for readability, so that the read and write
Willy Tarreau418fd472009-09-06 21:37:23 +020049 * bits have the same position in a byte (read being the lower byte and write
50 * the second one). All flag names are relative to the buffer. For instance,
51 * 'write' indicates the direction from the buffer to the stream interface.
Willy Tarreau54469402006-07-29 16:59:06 +020052 */
Willy Tarreaue393fe22008-08-16 22:18:07 +020053
Willy Tarreau3da77c52008-08-29 09:58:42 +020054#define BF_READ_NULL 0x000001 /* last read detected on producer side */
55#define BF_READ_PARTIAL 0x000002 /* some data were read from producer */
56#define BF_READ_TIMEOUT 0x000004 /* timeout while waiting for producer */
57#define BF_READ_ERROR 0x000008 /* unrecoverable error on producer side */
58#define BF_READ_ACTIVITY (BF_READ_NULL|BF_READ_PARTIAL|BF_READ_ERROR)
Willy Tarreau0f9f5052006-07-29 17:39:25 +020059
Willy Tarreau7c3c5412009-12-13 15:53:05 +010060#define BF_FULL 0x000010 /* buffer cannot accept any more data (l >= max len) */
Willy Tarreau3da77c52008-08-29 09:58:42 +020061#define BF_SHUTR 0x000020 /* producer has already shut down */
Willy Tarreau418fd472009-09-06 21:37:23 +020062#define BF_SHUTR_NOW 0x000040 /* the producer must shut down for reads ASAP */
Willy Tarreau86491c32008-12-14 09:04:47 +010063#define BF_READ_NOEXP 0x000080 /* producer should not expire */
Willy Tarreau54469402006-07-29 16:59:06 +020064
Willy Tarreau3da77c52008-08-29 09:58:42 +020065#define BF_WRITE_NULL 0x000100 /* write(0) or connect() succeeded on consumer side */
66#define BF_WRITE_PARTIAL 0x000200 /* some data were written to the consumer */
67#define BF_WRITE_TIMEOUT 0x000400 /* timeout while waiting for consumer */
68#define BF_WRITE_ERROR 0x000800 /* unrecoverable error on consumer side */
69#define BF_WRITE_ACTIVITY (BF_WRITE_NULL|BF_WRITE_PARTIAL|BF_WRITE_ERROR)
Willy Tarreau54469402006-07-29 16:59:06 +020070
Willy Tarreau2e046c62012-03-01 16:08:30 +010071#define BF_OUT_EMPTY 0x001000 /* out and pipe are empty. Set by last change. */
Willy Tarreau3da77c52008-08-29 09:58:42 +020072#define BF_SHUTW 0x002000 /* consumer has already shut down */
Willy Tarreau418fd472009-09-06 21:37:23 +020073#define BF_SHUTW_NOW 0x004000 /* the consumer must shut down for writes ASAP */
Willy Tarreau520d95e2009-09-19 21:04:57 +020074#define BF_AUTO_CLOSE 0x008000 /* producer can forward shutdown to other side */
Willy Tarreau54469402006-07-29 16:59:06 +020075
Willy Tarreau418fd472009-09-06 21:37:23 +020076/* When either BF_SHUTR_NOW or BF_HIJACK is set, it is strictly forbidden for
77 * the producer to alter the buffer contents. When BF_SHUTW_NOW is set, the
78 * consumer is free to perform a shutw() when it has consumed the last contents,
79 * otherwise the session processor will do it anyway.
80 *
81 * The SHUT* flags work like this :
82 *
83 * SHUTR SHUTR_NOW meaning
84 * 0 0 normal case, connection still open and data is being read
85 * 0 1 closing : the producer cannot feed data anymore but can close
86 * 1 0 closed: the producer has closed its input channel.
87 * 1 1 impossible
88 *
89 * SHUTW SHUTW_NOW meaning
90 * 0 0 normal case, connection still open and data is being written
91 * 0 1 closing: the consumer can send last data and may then close
92 * 1 0 closed: the consumer has closed its output channel.
93 * 1 1 impossible
94 *
Willy Tarreau520d95e2009-09-19 21:04:57 +020095 * The SHUTW_NOW flag should be set by the session processor when SHUTR and AUTO_CLOSE
Willy Tarreau418fd472009-09-06 21:37:23 +020096 * are both set. It may also be set by a hijacker at the end of data. And it may also
97 * be set by the producer when it detects SHUTR while directly forwarding data to the
98 * consumer.
99 *
100 * The SHUTR_NOW flag is mostly used to force the producer to abort when an error is
101 * detected on the consumer side.
102 */
103
Willy Tarreau3da77c52008-08-29 09:58:42 +0200104#define BF_STREAMER 0x010000 /* the producer is identified as streaming data */
105#define BF_STREAMER_FAST 0x020000 /* the consumer seems to eat the stream very fast */
Willy Tarreau0f9f5052006-07-29 17:39:25 +0200106
Willy Tarreau01bf8672008-12-07 18:03:29 +0100107#define BF_HIJACK 0x040000 /* the producer is temporarily replaced by ->hijacker */
Willy Tarreau9a2d1542008-08-30 12:31:07 +0200108#define BF_ANA_TIMEOUT 0x080000 /* the analyser timeout has expired */
109#define BF_READ_ATTACHED 0x100000 /* the read side is attached for the first time */
Willy Tarreau259de1b2009-01-18 21:56:21 +0100110#define BF_KERN_SPLICING 0x200000 /* kernel splicing desired for this buffer */
Willy Tarreau1b194fe2009-03-21 21:10:04 +0100111#define BF_READ_DONTWAIT 0x400000 /* wake the task up after every read (eg: HTTP request) */
Willy Tarreau520d95e2009-09-19 21:04:57 +0200112#define BF_AUTO_CONNECT 0x800000 /* consumer may attempt to establish a new connection */
Willy Tarreau9a2d1542008-08-30 12:31:07 +0200113
Willy Tarreauf1ba4b32009-10-17 14:37:52 +0200114#define BF_DONT_READ 0x1000000 /* disable reading for now */
Willy Tarreauface8392010-01-03 11:37:54 +0100115#define BF_EXPECT_MORE 0x2000000 /* more data expected to be sent very soon (one-shoot) */
Willy Tarreau2be39392010-01-03 17:24:51 +0100116#define BF_SEND_DONTWAIT 0x4000000 /* don't wait for sending data (one-shoot) */
Willy Tarreau96e31212011-05-30 18:10:30 +0200117#define BF_NEVER_WAIT 0x8000000 /* never wait for sending data (permanent) */
Willy Tarreauf1ba4b32009-10-17 14:37:52 +0200118
Willy Tarreau96e31212011-05-30 18:10:30 +0200119#define BF_WAKE_ONCE 0x10000000 /* pretend there is activity on this buffer (one-shoot) */
Willy Tarreau0499e352010-12-17 07:13:42 +0100120
Willy Tarreau9a2d1542008-08-30 12:31:07 +0200121/* Use these masks to clear the flags before going back to lower layers */
122#define BF_CLEAR_READ (~(BF_READ_NULL|BF_READ_PARTIAL|BF_READ_ERROR|BF_READ_ATTACHED))
123#define BF_CLEAR_WRITE (~(BF_WRITE_NULL|BF_WRITE_PARTIAL|BF_WRITE_ERROR))
124#define BF_CLEAR_TIMEOUT (~(BF_READ_TIMEOUT|BF_WRITE_TIMEOUT|BF_ANA_TIMEOUT))
Willy Tarreaufa7e1022008-10-19 07:30:41 +0200125
Willy Tarreaufe3718a2008-11-30 18:14:12 +0100126/* Masks which define input events for stream analysers */
Willy Tarreau0499e352010-12-17 07:13:42 +0100127#define BF_MASK_ANALYSER (BF_READ_ATTACHED|BF_READ_ACTIVITY|BF_READ_TIMEOUT|BF_ANA_TIMEOUT|BF_WRITE_ACTIVITY|BF_WAKE_ONCE)
Willy Tarreaufe3718a2008-11-30 18:14:12 +0100128
Willy Tarreau1d315ea2010-06-04 12:25:31 +0200129/* Mask for static flags which cause analysers to be woken up when they change */
130#define BF_MASK_STATIC (BF_OUT_EMPTY|BF_FULL|BF_SHUTR|BF_SHUTW|BF_SHUTR_NOW|BF_SHUTW_NOW)
Willy Tarreau3da77c52008-08-29 09:58:42 +0200131
Willy Tarreau2df28e82008-08-17 15:20:19 +0200132
133/* Analysers (buffer->analysers).
134 * Those bits indicate that there are some processing to do on the buffer
Willy Tarreau70cb6332008-12-07 11:28:08 +0100135 * contents. It will probably evolve into a linked list later. Those
Willy Tarreau2df28e82008-08-17 15:20:19 +0200136 * analysers could be compared to higher level processors.
137 * The field is blanked by buffer_init() and only by analysers themselves
138 * afterwards.
139 */
Willy Tarreau6e595772010-10-15 14:12:12 +0200140#define AN_REQ_DECODE_PROXY 0x00000001 /* take the proxied address from a 'PROXY' line */
141#define AN_REQ_INSPECT_FE 0x00000002 /* inspect request contents in the frontend */
142#define AN_REQ_WAIT_HTTP 0x00000004 /* wait for an HTTP request */
143#define AN_REQ_HTTP_PROCESS_FE 0x00000008 /* process the frontend's HTTP part */
144#define AN_REQ_SWITCHING_RULES 0x00000010 /* apply the switching rules */
145#define AN_REQ_INSPECT_BE 0x00000020 /* inspect request contents in the backend */
146#define AN_REQ_HTTP_PROCESS_BE 0x00000040 /* process the backend's HTTP part */
Willy Tarreau4a5cade2012-04-05 21:09:48 +0200147#define AN_REQ_SRV_RULES 0x00000080 /* use-server rules */
148#define AN_REQ_HTTP_INNER 0x00000100 /* inner processing of HTTP request */
149#define AN_REQ_HTTP_TARPIT 0x00000200 /* wait for end of HTTP tarpit */
150#define AN_REQ_HTTP_BODY 0x00000400 /* inspect HTTP request body */
151#define AN_REQ_STICKING_RULES 0x00000800 /* table persistence matching */
152#define AN_REQ_PRST_RDP_COOKIE 0x00001000 /* persistence on rdp cookie */
153#define AN_REQ_HTTP_XFER_BODY 0x00002000 /* forward request body */
Willy Tarreau2df28e82008-08-17 15:20:19 +0200154
Willy Tarreaub37c27e2009-10-18 22:53:08 +0200155/* response analysers */
156#define AN_RES_INSPECT 0x00010000 /* content inspection */
157#define AN_RES_WAIT_HTTP 0x00020000 /* wait for HTTP response */
158#define AN_RES_HTTP_PROCESS_BE 0x00040000 /* process backend's HTTP part */
159#define AN_RES_HTTP_PROCESS_FE 0x00040000 /* process frontend's HTTP part (same for now) */
Emeric Brun1d33b292010-01-04 15:47:17 +0100160#define AN_RES_STORE_RULES 0x00080000 /* table persistence matching */
Willy Tarreaud98cf932009-12-27 22:54:55 +0100161#define AN_RES_HTTP_XFER_BODY 0x00100000 /* forward response body */
Willy Tarreaub37c27e2009-10-18 22:53:08 +0200162
163
Willy Tarreau31971e52009-09-20 12:07:52 +0200164/* Magic value to forward infinite size (TCP, ...), used with ->to_forward */
Willy Tarreaud8ee85a2011-03-28 16:06:28 +0200165#define BUF_INFINITE_FORWARD MAX_RANGE(int)
Willy Tarreau31971e52009-09-20 12:07:52 +0200166
Willy Tarreaubaaee002006-06-26 02:48:02 +0200167/* describes a chunk of string */
168struct chunk {
169 char *str; /* beginning of the string itself. Might not be 0-terminated */
Willy Tarreau3fb818c2012-04-11 17:21:08 +0200170 int size; /* total size of the buffer, 0 if the *str is read-only */
Krzysztof Piotr Oledzki6f61b212009-10-04 23:34:15 +0200171 int len; /* current size of the string from first to last char. <0 = uninit. */
Willy Tarreaubaaee002006-06-26 02:48:02 +0200172};
173
Willy Tarreau01bf8672008-12-07 18:03:29 +0100174/* needed for a declaration below */
175struct session;
176
Willy Tarreau7421efb2012-07-02 15:11:27 +0200177struct channel {
Willy Tarreauaad2e492006-10-15 23:32:18 +0200178 unsigned int flags; /* BF_* */
Willy Tarreau0c303ee2008-07-07 00:09:58 +0200179 int rex; /* expiration date for a read, in ticks */
Willy Tarreau26ed74d2008-08-17 12:11:14 +0200180 int wex; /* expiration date for a write or connect, in ticks */
Willy Tarreau0c303ee2008-07-07 00:09:58 +0200181 int rto; /* read timeout, in ticks */
182 int wto; /* write timeout, in ticks */
Willy Tarreau89fa7062012-03-02 16:13:16 +0100183 char *p; /* buffer's start pointer, separates in and out data */
Willy Tarreaua07a34e2009-08-16 23:27:46 +0200184 unsigned int size; /* buffer size in bytes */
Willy Tarreau02d6cfc2012-03-01 18:19:58 +0100185 unsigned int i; /* number of input bytes pending for analysis in the buffer */
Willy Tarreau2e046c62012-03-01 16:08:30 +0100186 unsigned int o; /* number of out bytes the sender can consume from this buffer */
187 unsigned int to_forward; /* number of bytes to forward after out without a wake-up */
Willy Tarreau2df28e82008-08-17 15:20:19 +0200188 unsigned int analysers; /* bit field indicating what to do on the buffer */
Willy Tarreauffab5b42008-08-17 18:03:28 +0200189 int analyse_exp; /* expiration date for current analysers (if set) */
Willy Tarreau7421efb2012-07-02 15:11:27 +0200190 void (*hijacker)(struct session *, struct channel *); /* alternative content producer */
Willy Tarreau8a7af602008-05-03 23:07:14 +0200191 unsigned char xfer_large; /* number of consecutive large xfers */
192 unsigned char xfer_small; /* number of consecutive small xfers */
Willy Tarreaubaaee002006-06-26 02:48:02 +0200193 unsigned long long total; /* total data read */
Willy Tarreaufa7e1022008-10-19 07:30:41 +0200194 struct stream_interface *prod; /* producer attached to this buffer */
195 struct stream_interface *cons; /* consumer attached to this buffer */
Willy Tarreau3eba98a2009-01-25 13:56:13 +0100196 struct pipe *pipe; /* non-NULL only when data present */
Willy Tarreaua07a34e2009-08-16 23:27:46 +0200197 char data[0]; /* <size> bytes */
Willy Tarreaubaaee002006-06-26 02:48:02 +0200198};
199
Willy Tarreaubaaee002006-06-26 02:48:02 +0200200
Willy Tarreau0abebcc2009-01-08 00:09:41 +0100201/* Note about the buffer structure
202
203 The buffer contains two length indicators, one to_forward counter and one
Willy Tarreau2e046c62012-03-01 16:08:30 +0100204 ->o limit. First, it must be understood that the buffer is in fact
Willy Tarreau0abebcc2009-01-08 00:09:41 +0100205 split in two parts :
206 - the visible data (->data, for ->l bytes)
207 - the invisible data, typically in kernel buffers forwarded directly from
Willy Tarreau3eba98a2009-01-25 13:56:13 +0100208 the source stream sock to the destination stream sock (->pipe->data
Willy Tarreau0abebcc2009-01-08 00:09:41 +0100209 bytes). Those are used only during forward.
210
211 In order not to mix data streams, the producer may only feed the invisible
212 data with data to forward, and only when the visible buffer is empty. The
Willy Tarreau4b517ca2011-11-25 20:33:58 +0100213 producer may not always be able to feed the invisible buffer due to platform
Willy Tarreau0abebcc2009-01-08 00:09:41 +0100214 limitations (lack of kernel support).
215
216 Conversely, the consumer must always take data from the invisible data first
217 before ever considering visible data. There is no limit to the size of data
218 to consume from the invisible buffer, as platform-specific implementations
219 will rarely leave enough control on this. So any byte fed into the invisible
220 buffer is expected to reach the destination file descriptor, by any means.
221 However, it's the consumer's responsibility to ensure that the invisible
222 data has been entirely consumed before consuming visible data. This must be
Willy Tarreau3eba98a2009-01-25 13:56:13 +0100223 reflected by ->pipe->data. This is very important as this and only this can
Willy Tarreau0abebcc2009-01-08 00:09:41 +0100224 ensure strict ordering of data between buffers.
225
226 The producer is responsible for decreasing ->to_forward and increasing
Willy Tarreau2e046c62012-03-01 16:08:30 +0100227 ->o. The ->to_forward parameter indicates how many bytes may be fed
Willy Tarreau31971e52009-09-20 12:07:52 +0200228 into either data buffer without waking the parent up. The special value
Willy Tarreau2e046c62012-03-01 16:08:30 +0100229 BUF_INFINITE_FORWARD is never decreased nor increased. The ->o
Willy Tarreau4b517ca2011-11-25 20:33:58 +0100230 parameter says how many bytes may be consumed from the visible buffer. Thus
231 it may never exceed ->l. This parameter is updated by any buffer_write() as
232 well as any data forwarded through the visible buffer. Since the ->to_forward
Willy Tarreau2e046c62012-03-01 16:08:30 +0100233 attribute applies to data after ->w+o, an analyser will not see a
234 buffer which has a non-null to_forward with o < l. A producer is
235 responsible for raising ->o by min(to_forward, l-o) when it
Willy Tarreau4b517ca2011-11-25 20:33:58 +0100236 injects data into the buffer.
Willy Tarreau0abebcc2009-01-08 00:09:41 +0100237
Willy Tarreau2e046c62012-03-01 16:08:30 +0100238 The consumer is responsible for decreasing ->o when it sends data
Willy Tarreau3eba98a2009-01-25 13:56:13 +0100239 from the visible buffer, and ->pipe->data when it sends data from the
Willy Tarreau0abebcc2009-01-08 00:09:41 +0100240 invisible buffer.
241
242 A real-world example consists in part in an HTTP response waiting in a
243 buffer to be forwarded. We know the header length (300) and the amount of
244 data to forward (content-length=9000). The buffer already contains 1000
245 bytes of data after the 300 bytes of headers. Thus the caller will set
Willy Tarreau2e046c62012-03-01 16:08:30 +0100246 ->o to 300 indicating that it explicitly wants to send those data,
Willy Tarreau0abebcc2009-01-08 00:09:41 +0100247 and set ->to_forward to 9000 (content-length). This value must be normalised
248 immediately after updating ->to_forward : since there are already 1300 bytes
Willy Tarreau2e046c62012-03-01 16:08:30 +0100249 in the buffer, 300 of which are already counted in ->o, and that size
250 is smaller than ->to_forward, we must update ->o to 1300 to flush the
Willy Tarreau0abebcc2009-01-08 00:09:41 +0100251 whole buffer, and reduce ->to_forward to 8000. After that, the producer may
252 try to feed the additional data through the invisible buffer using a
253 platform-specific method such as splice().
Willy Tarreau7c3c5412009-12-13 15:53:05 +0100254
255 The ->to_forward entry is also used to detect whether we can fill the buffer
256 or not. The idea is that we need to save some space for data manipulation
257 (mainly header rewriting in HTTP) so we don't want to have a full buffer on
258 input before processing a request or response. Thus, we ensure that there is
259 always global.maxrewrite bytes of free space. Since we don't want to forward
260 chunks without filling the buffer, we rely on ->to_forward. When ->to_forward
261 is null, we may have some processing to do so we don't want to fill the
262 buffer. When ->to_forward is non-null, we know we don't care for at least as
263 many bytes. In the end, we know that each of the ->to_forward bytes will
264 eventually leave the buffer. So as long as ->to_forward is larger than
265 global.maxrewrite, we can fill the buffer. If ->to_forward is smaller than
266 global.maxrewrite, then we don't want to fill the buffer with more than
267 ->size - global.maxrewrite + ->to_forward.
268
269 Note that this also means that anyone touching ->to_forward must also take
270 care of updating the BF_FULL flag. For this reason, it's really advised to
271 use buffer_forward() only.
Willy Tarreau4b517ca2011-11-25 20:33:58 +0100272
273 A buffer may contain up to 5 areas :
274 - the data waiting to be sent. These data are located between ->w and
Willy Tarreau2e046c62012-03-01 16:08:30 +0100275 ->w+o ;
Willy Tarreau4b517ca2011-11-25 20:33:58 +0100276 - the data to process and possibly transform. These data start at
Willy Tarreau2e046c62012-03-01 16:08:30 +0100277 ->w+o and may be up to r-w bytes long. Generally ->lr remains in
Willy Tarreau4b517ca2011-11-25 20:33:58 +0100278 this area ;
279 - the data to preserve. They start at the end of the previous one and stop
280 at ->r. The limit between the two solely depends on the protocol being
281 analysed ; ->lr may be used as a marker.
282 - the spare area : it is the remainder of the buffer, which can be used to
283 store new incoming data. It starts at ->r and is up to ->size-l long. It
284 may be limited by global.maxrewrite.
285 - the reserved are : this is the area which must not be filled and is
286 reserved for possible rewrites ; it is up to global.maxrewrite bytes
287 long.
Willy Tarreau0abebcc2009-01-08 00:09:41 +0100288 */
289
Willy Tarreaubaaee002006-06-26 02:48:02 +0200290#endif /* _TYPES_BUFFERS_H */
291
292/*
293 * Local variables:
294 * c-indent-level: 8
295 * c-basic-offset: 8
296 * End:
297 */