blob: f9554b422469196162f508a107b954584e82b2c4 [file] [log] [blame]
Willy Tarreaubaaee002006-06-26 02:48:02 +02001/*
2 * HTTP protocol analyzer
3 *
Willy Tarreauf68a15a2011-01-06 16:53:21 +01004 * Copyright 2000-2011 Willy Tarreau <w@1wt.eu>
Willy Tarreaubaaee002006-06-26 02:48:02 +02005 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 *
11 */
12
13#include <ctype.h>
14#include <errno.h>
15#include <fcntl.h>
16#include <stdio.h>
17#include <stdlib.h>
18#include <string.h>
19#include <syslog.h>
Willy Tarreau42250582007-04-01 01:30:43 +020020#include <time.h>
Willy Tarreaubaaee002006-06-26 02:48:02 +020021
22#include <sys/socket.h>
23#include <sys/stat.h>
24#include <sys/types.h>
25
Willy Tarreaub05405a2012-01-23 15:35:52 +010026#include <netinet/tcp.h>
27
Willy Tarreau2dd0d472006-06-29 17:53:05 +020028#include <common/appsession.h>
Krzysztof Piotr Oledzkif9423ae2010-01-29 19:26:18 +010029#include <common/base64.h>
Willy Tarreauc7e42382012-08-24 19:22:53 +020030#include <common/chunk.h>
Willy Tarreau2dd0d472006-06-29 17:53:05 +020031#include <common/compat.h>
32#include <common/config.h>
Willy Tarreaua4cd1f52006-12-16 19:57:26 +010033#include <common/debug.h>
Willy Tarreau2dd0d472006-06-29 17:53:05 +020034#include <common/memory.h>
35#include <common/mini-clist.h>
36#include <common/standard.h>
Willy Tarreau0c303ee2008-07-07 00:09:58 +020037#include <common/ticks.h>
Willy Tarreau2dd0d472006-06-29 17:53:05 +020038#include <common/time.h>
39#include <common/uri_auth.h>
40#include <common/version.h>
Willy Tarreaubaaee002006-06-26 02:48:02 +020041
42#include <types/capture.h>
Willy Tarreaubaaee002006-06-26 02:48:02 +020043#include <types/global.h>
Willy Tarreaubaaee002006-06-26 02:48:02 +020044
Willy Tarreau8797c062007-05-07 00:55:35 +020045#include <proto/acl.h>
Willy Tarreau61612d42012-04-19 18:42:05 +020046#include <proto/arg.h>
Krzysztof Piotr Oledzkif9423ae2010-01-29 19:26:18 +010047#include <proto/auth.h>
Willy Tarreaubaaee002006-06-26 02:48:02 +020048#include <proto/backend.h>
Willy Tarreauc7e42382012-08-24 19:22:53 +020049#include <proto/channel.h>
Krzysztof Piotr Oledzki97f07b82009-12-15 22:31:24 +010050#include <proto/checks.h>
William Lallemand82fe75c2012-10-23 10:25:10 +020051#include <proto/compression.h>
Willy Tarreau91861262007-10-17 17:06:05 +020052#include <proto/dumpstats.h>
Willy Tarreaubaaee002006-06-26 02:48:02 +020053#include <proto/fd.h>
Willy Tarreau03fa5df2010-05-24 21:02:37 +020054#include <proto/frontend.h>
Willy Tarreaubaaee002006-06-26 02:48:02 +020055#include <proto/log.h>
Willy Tarreau58f10d72006-12-04 02:26:12 +010056#include <proto/hdr_idx.h>
Thierry FOURNIERed66c292013-11-28 11:05:19 +010057#include <proto/pattern.h>
Willy Tarreaub6866442008-07-14 23:54:42 +020058#include <proto/proto_tcp.h>
Willy Tarreaubaaee002006-06-26 02:48:02 +020059#include <proto/proto_http.h>
Willy Tarreau7f062c42009-03-05 18:43:00 +010060#include <proto/proxy.h>
Willy Tarreaubaaee002006-06-26 02:48:02 +020061#include <proto/queue.h>
Willy Tarreaucd3b0942012-04-27 21:52:18 +020062#include <proto/sample.h>
Willy Tarreau7f062c42009-03-05 18:43:00 +010063#include <proto/server.h>
Willy Tarreaubaaee002006-06-26 02:48:02 +020064#include <proto/session.h>
Willy Tarreaucff64112008-11-03 06:26:53 +010065#include <proto/stream_interface.h>
Willy Tarreaubaaee002006-06-26 02:48:02 +020066#include <proto/task.h>
67
Willy Tarreau522d6c02009-12-06 18:49:18 +010068const char HTTP_100[] =
69 "HTTP/1.1 100 Continue\r\n\r\n";
70
71const struct chunk http_100_chunk = {
72 .str = (char *)&HTTP_100,
73 .len = sizeof(HTTP_100)-1
74};
75
Willy Tarreaua9679ac2010-01-03 17:32:57 +010076/* Warning: no "connection" header is provided with the 3xx messages below */
Willy Tarreaub463dfb2008-06-07 23:08:56 +020077const char *HTTP_301 =
Willy Tarreaubc5aa192010-01-03 15:09:36 +010078 "HTTP/1.1 301 Moved Permanently\r\n"
Willy Tarreaubc5aa192010-01-03 15:09:36 +010079 "Content-length: 0\r\n"
Willy Tarreaub463dfb2008-06-07 23:08:56 +020080 "Location: "; /* not terminated since it will be concatenated with the URL */
81
Willy Tarreau0f772532006-12-23 20:51:41 +010082const char *HTTP_302 =
Willy Tarreaubc5aa192010-01-03 15:09:36 +010083 "HTTP/1.1 302 Found\r\n"
Willy Tarreau0f772532006-12-23 20:51:41 +010084 "Cache-Control: no-cache\r\n"
Willy Tarreaubc5aa192010-01-03 15:09:36 +010085 "Content-length: 0\r\n"
Willy Tarreau0f772532006-12-23 20:51:41 +010086 "Location: "; /* not terminated since it will be concatenated with the URL */
87
88/* same as 302 except that the browser MUST retry with the GET method */
89const char *HTTP_303 =
Willy Tarreaubc5aa192010-01-03 15:09:36 +010090 "HTTP/1.1 303 See Other\r\n"
Willy Tarreau0f772532006-12-23 20:51:41 +010091 "Cache-Control: no-cache\r\n"
Willy Tarreaubc5aa192010-01-03 15:09:36 +010092 "Content-length: 0\r\n"
Willy Tarreau0f772532006-12-23 20:51:41 +010093 "Location: "; /* not terminated since it will be concatenated with the URL */
94
Yves Lafon3e8d1ae2013-03-11 11:06:05 -040095
96/* same as 302 except that the browser MUST retry with the same method */
97const char *HTTP_307 =
98 "HTTP/1.1 307 Temporary Redirect\r\n"
99 "Cache-Control: no-cache\r\n"
100 "Content-length: 0\r\n"
101 "Location: "; /* not terminated since it will be concatenated with the URL */
102
103/* same as 301 except that the browser MUST retry with the same method */
104const char *HTTP_308 =
105 "HTTP/1.1 308 Permanent Redirect\r\n"
106 "Content-length: 0\r\n"
107 "Location: "; /* not terminated since it will be concatenated with the URL */
108
Willy Tarreaubaaee002006-06-26 02:48:02 +0200109/* Warning: this one is an sprintf() fmt string, with <realm> as its only argument */
110const char *HTTP_401_fmt =
111 "HTTP/1.0 401 Unauthorized\r\n"
112 "Cache-Control: no-cache\r\n"
113 "Connection: close\r\n"
Willy Tarreau791d66d2006-07-08 16:53:38 +0200114 "Content-Type: text/html\r\n"
Willy Tarreaubaaee002006-06-26 02:48:02 +0200115 "WWW-Authenticate: Basic realm=\"%s\"\r\n"
116 "\r\n"
117 "<html><body><h1>401 Unauthorized</h1>\nYou need a valid user and password to access this content.\n</body></html>\n";
118
Willy Tarreau844a7e72010-01-31 21:46:18 +0100119const char *HTTP_407_fmt =
120 "HTTP/1.0 407 Unauthorized\r\n"
121 "Cache-Control: no-cache\r\n"
122 "Connection: close\r\n"
123 "Content-Type: text/html\r\n"
124 "Proxy-Authenticate: Basic realm=\"%s\"\r\n"
125 "\r\n"
126 "<html><body><h1>401 Unauthorized</h1>\nYou need a valid user and password to access this content.\n</body></html>\n";
127
Willy Tarreau0f772532006-12-23 20:51:41 +0100128
129const int http_err_codes[HTTP_ERR_SIZE] = {
Willy Tarreauae94d4d2011-05-11 16:28:49 +0200130 [HTTP_ERR_200] = 200, /* used by "monitor-uri" */
Willy Tarreau0f772532006-12-23 20:51:41 +0100131 [HTTP_ERR_400] = 400,
132 [HTTP_ERR_403] = 403,
133 [HTTP_ERR_408] = 408,
134 [HTTP_ERR_500] = 500,
135 [HTTP_ERR_502] = 502,
136 [HTTP_ERR_503] = 503,
137 [HTTP_ERR_504] = 504,
138};
139
Willy Tarreau80587432006-12-24 17:47:20 +0100140static const char *http_err_msgs[HTTP_ERR_SIZE] = {
Willy Tarreauae94d4d2011-05-11 16:28:49 +0200141 [HTTP_ERR_200] =
142 "HTTP/1.0 200 OK\r\n"
143 "Cache-Control: no-cache\r\n"
144 "Connection: close\r\n"
145 "Content-Type: text/html\r\n"
146 "\r\n"
147 "<html><body><h1>200 OK</h1>\nService ready.\n</body></html>\n",
148
Willy Tarreau0f772532006-12-23 20:51:41 +0100149 [HTTP_ERR_400] =
Willy Tarreau80587432006-12-24 17:47:20 +0100150 "HTTP/1.0 400 Bad request\r\n"
Willy Tarreau0f772532006-12-23 20:51:41 +0100151 "Cache-Control: no-cache\r\n"
152 "Connection: close\r\n"
153 "Content-Type: text/html\r\n"
154 "\r\n"
155 "<html><body><h1>400 Bad request</h1>\nYour browser sent an invalid request.\n</body></html>\n",
156
157 [HTTP_ERR_403] =
158 "HTTP/1.0 403 Forbidden\r\n"
159 "Cache-Control: no-cache\r\n"
160 "Connection: close\r\n"
161 "Content-Type: text/html\r\n"
162 "\r\n"
163 "<html><body><h1>403 Forbidden</h1>\nRequest forbidden by administrative rules.\n</body></html>\n",
164
165 [HTTP_ERR_408] =
166 "HTTP/1.0 408 Request Time-out\r\n"
167 "Cache-Control: no-cache\r\n"
168 "Connection: close\r\n"
169 "Content-Type: text/html\r\n"
170 "\r\n"
171 "<html><body><h1>408 Request Time-out</h1>\nYour browser didn't send a complete request in time.\n</body></html>\n",
172
173 [HTTP_ERR_500] =
174 "HTTP/1.0 500 Server Error\r\n"
175 "Cache-Control: no-cache\r\n"
176 "Connection: close\r\n"
177 "Content-Type: text/html\r\n"
178 "\r\n"
179 "<html><body><h1>500 Server Error</h1>\nAn internal server error occured.\n</body></html>\n",
180
181 [HTTP_ERR_502] =
182 "HTTP/1.0 502 Bad Gateway\r\n"
183 "Cache-Control: no-cache\r\n"
184 "Connection: close\r\n"
185 "Content-Type: text/html\r\n"
186 "\r\n"
187 "<html><body><h1>502 Bad Gateway</h1>\nThe server returned an invalid or incomplete response.\n</body></html>\n",
188
189 [HTTP_ERR_503] =
190 "HTTP/1.0 503 Service Unavailable\r\n"
191 "Cache-Control: no-cache\r\n"
192 "Connection: close\r\n"
193 "Content-Type: text/html\r\n"
194 "\r\n"
195 "<html><body><h1>503 Service Unavailable</h1>\nNo server is available to handle this request.\n</body></html>\n",
196
197 [HTTP_ERR_504] =
198 "HTTP/1.0 504 Gateway Time-out\r\n"
199 "Cache-Control: no-cache\r\n"
200 "Connection: close\r\n"
201 "Content-Type: text/html\r\n"
202 "\r\n"
203 "<html><body><h1>504 Gateway Time-out</h1>\nThe server didn't respond in time.\n</body></html>\n",
204
205};
206
Cyril Bonté19979e12012-04-04 12:57:21 +0200207/* status codes available for the stats admin page (strictly 4 chars length) */
208const char *stat_status_codes[STAT_STATUS_SIZE] = {
209 [STAT_STATUS_DENY] = "DENY",
210 [STAT_STATUS_DONE] = "DONE",
211 [STAT_STATUS_ERRP] = "ERRP",
212 [STAT_STATUS_EXCD] = "EXCD",
213 [STAT_STATUS_NONE] = "NONE",
214 [STAT_STATUS_PART] = "PART",
215 [STAT_STATUS_UNKN] = "UNKN",
216};
217
218
Willy Tarreau80587432006-12-24 17:47:20 +0100219/* We must put the messages here since GCC cannot initialize consts depending
220 * on strlen().
221 */
222struct chunk http_err_chunks[HTTP_ERR_SIZE];
223
Willy Tarreaua890d072013-04-02 12:01:06 +0200224/* this struct is used between calls to smp_fetch_hdr() or smp_fetch_cookie() */
225static struct hdr_ctx static_hdr_ctx;
226
Willy Tarreau42250582007-04-01 01:30:43 +0200227#define FD_SETS_ARE_BITFIELDS
228#ifdef FD_SETS_ARE_BITFIELDS
229/*
230 * This map is used with all the FD_* macros to check whether a particular bit
231 * is set or not. Each bit represents an ACSII code. FD_SET() sets those bytes
232 * which should be encoded. When FD_ISSET() returns non-zero, it means that the
233 * byte should be encoded. Be careful to always pass bytes from 0 to 255
234 * exclusively to the macros.
235 */
236fd_set hdr_encode_map[(sizeof(fd_set) > (256/8)) ? 1 : ((256/8) / sizeof(fd_set))];
237fd_set url_encode_map[(sizeof(fd_set) > (256/8)) ? 1 : ((256/8) / sizeof(fd_set))];
238
239#else
240#error "Check if your OS uses bitfields for fd_sets"
241#endif
242
Willy Tarreau80587432006-12-24 17:47:20 +0100243void init_proto_http()
244{
Willy Tarreau42250582007-04-01 01:30:43 +0200245 int i;
246 char *tmp;
Willy Tarreau80587432006-12-24 17:47:20 +0100247 int msg;
Willy Tarreau42250582007-04-01 01:30:43 +0200248
Willy Tarreau80587432006-12-24 17:47:20 +0100249 for (msg = 0; msg < HTTP_ERR_SIZE; msg++) {
250 if (!http_err_msgs[msg]) {
251 Alert("Internal error: no message defined for HTTP return code %d. Aborting.\n", msg);
252 abort();
253 }
254
255 http_err_chunks[msg].str = (char *)http_err_msgs[msg];
256 http_err_chunks[msg].len = strlen(http_err_msgs[msg]);
257 }
Willy Tarreau42250582007-04-01 01:30:43 +0200258
259 /* initialize the log header encoding map : '{|}"#' should be encoded with
260 * '#' as prefix, as well as non-printable characters ( <32 or >= 127 ).
261 * URL encoding only requires '"', '#' to be encoded as well as non-
262 * printable characters above.
263 */
264 memset(hdr_encode_map, 0, sizeof(hdr_encode_map));
265 memset(url_encode_map, 0, sizeof(url_encode_map));
266 for (i = 0; i < 32; i++) {
267 FD_SET(i, hdr_encode_map);
268 FD_SET(i, url_encode_map);
269 }
270 for (i = 127; i < 256; i++) {
271 FD_SET(i, hdr_encode_map);
272 FD_SET(i, url_encode_map);
273 }
274
275 tmp = "\"#{|}";
276 while (*tmp) {
277 FD_SET(*tmp, hdr_encode_map);
278 tmp++;
279 }
280
281 tmp = "\"#";
282 while (*tmp) {
283 FD_SET(*tmp, url_encode_map);
284 tmp++;
285 }
Willy Tarreau332f8bf2007-05-13 21:36:56 +0200286
287 /* memory allocations */
288 pool2_requri = create_pool("requri", REQURI_LEN, MEM_F_SHARED);
William Lallemanda73203e2012-03-12 12:48:57 +0100289 pool2_uniqueid = create_pool("uniqueid", UNIQUEID_LEN, MEM_F_SHARED);
Willy Tarreau80587432006-12-24 17:47:20 +0100290}
Willy Tarreaubaaee002006-06-26 02:48:02 +0200291
Willy Tarreau53b6c742006-12-17 13:37:46 +0100292/*
293 * We have 26 list of methods (1 per first letter), each of which can have
294 * up to 3 entries (2 valid, 1 null).
295 */
296struct http_method_desc {
Willy Tarreauc8987b32013-12-06 23:43:17 +0100297 enum http_meth_t meth;
Willy Tarreau53b6c742006-12-17 13:37:46 +0100298 int len;
299 const char text[8];
300};
301
Willy Tarreau8d5d7f22007-01-21 19:16:41 +0100302const struct http_method_desc http_methods[26][3] = {
Willy Tarreau53b6c742006-12-17 13:37:46 +0100303 ['C' - 'A'] = {
304 [0] = { .meth = HTTP_METH_CONNECT , .len=7, .text="CONNECT" },
305 },
306 ['D' - 'A'] = {
307 [0] = { .meth = HTTP_METH_DELETE , .len=6, .text="DELETE" },
308 },
309 ['G' - 'A'] = {
310 [0] = { .meth = HTTP_METH_GET , .len=3, .text="GET" },
311 },
312 ['H' - 'A'] = {
313 [0] = { .meth = HTTP_METH_HEAD , .len=4, .text="HEAD" },
314 },
315 ['P' - 'A'] = {
316 [0] = { .meth = HTTP_METH_POST , .len=4, .text="POST" },
317 [1] = { .meth = HTTP_METH_PUT , .len=3, .text="PUT" },
318 },
319 ['T' - 'A'] = {
320 [0] = { .meth = HTTP_METH_TRACE , .len=5, .text="TRACE" },
321 },
322 /* rest is empty like this :
323 * [1] = { .meth = HTTP_METH_NONE , .len=0, .text="" },
324 */
325};
326
Willy Tarreau8d5d7f22007-01-21 19:16:41 +0100327/* It is about twice as fast on recent architectures to lookup a byte in a
matt.farnsworth@nokia.com1c2ab962008-04-14 20:47:37 +0200328 * table than to perform a boolean AND or OR between two tests. Refer to
Willy Tarreau8d5d7f22007-01-21 19:16:41 +0100329 * RFC2616 for those chars.
330 */
331
332const char http_is_spht[256] = {
333 [' '] = 1, ['\t'] = 1,
334};
335
336const char http_is_crlf[256] = {
337 ['\r'] = 1, ['\n'] = 1,
338};
339
340const char http_is_lws[256] = {
341 [' '] = 1, ['\t'] = 1,
342 ['\r'] = 1, ['\n'] = 1,
343};
344
345const char http_is_sep[256] = {
346 ['('] = 1, [')'] = 1, ['<'] = 1, ['>'] = 1,
347 ['@'] = 1, [','] = 1, [';'] = 1, [':'] = 1,
348 ['"'] = 1, ['/'] = 1, ['['] = 1, [']'] = 1,
349 ['{'] = 1, ['}'] = 1, ['?'] = 1, ['='] = 1,
350 [' '] = 1, ['\t'] = 1, ['\\'] = 1,
351};
352
353const char http_is_ctl[256] = {
354 [0 ... 31] = 1,
355 [127] = 1,
356};
357
358/*
359 * A token is any ASCII char that is neither a separator nor a CTL char.
360 * Do not overwrite values in assignment since gcc-2.95 will not handle
361 * them correctly. Instead, define every non-CTL char's status.
362 */
363const char http_is_token[256] = {
364 [' '] = 0, ['!'] = 1, ['"'] = 0, ['#'] = 1,
365 ['$'] = 1, ['%'] = 1, ['&'] = 1, ['\''] = 1,
366 ['('] = 0, [')'] = 0, ['*'] = 1, ['+'] = 1,
367 [','] = 0, ['-'] = 1, ['.'] = 1, ['/'] = 0,
368 ['0'] = 1, ['1'] = 1, ['2'] = 1, ['3'] = 1,
369 ['4'] = 1, ['5'] = 1, ['6'] = 1, ['7'] = 1,
370 ['8'] = 1, ['9'] = 1, [':'] = 0, [';'] = 0,
371 ['<'] = 0, ['='] = 0, ['>'] = 0, ['?'] = 0,
372 ['@'] = 0, ['A'] = 1, ['B'] = 1, ['C'] = 1,
373 ['D'] = 1, ['E'] = 1, ['F'] = 1, ['G'] = 1,
374 ['H'] = 1, ['I'] = 1, ['J'] = 1, ['K'] = 1,
375 ['L'] = 1, ['M'] = 1, ['N'] = 1, ['O'] = 1,
376 ['P'] = 1, ['Q'] = 1, ['R'] = 1, ['S'] = 1,
377 ['T'] = 1, ['U'] = 1, ['V'] = 1, ['W'] = 1,
378 ['X'] = 1, ['Y'] = 1, ['Z'] = 1, ['['] = 0,
379 ['\\'] = 0, [']'] = 0, ['^'] = 1, ['_'] = 1,
380 ['`'] = 1, ['a'] = 1, ['b'] = 1, ['c'] = 1,
381 ['d'] = 1, ['e'] = 1, ['f'] = 1, ['g'] = 1,
382 ['h'] = 1, ['i'] = 1, ['j'] = 1, ['k'] = 1,
383 ['l'] = 1, ['m'] = 1, ['n'] = 1, ['o'] = 1,
384 ['p'] = 1, ['q'] = 1, ['r'] = 1, ['s'] = 1,
385 ['t'] = 1, ['u'] = 1, ['v'] = 1, ['w'] = 1,
386 ['x'] = 1, ['y'] = 1, ['z'] = 1, ['{'] = 0,
387 ['|'] = 1, ['}'] = 0, ['~'] = 1,
388};
389
390
Willy Tarreau4b89ad42007-03-04 18:13:58 +0100391/*
392 * An http ver_token is any ASCII which can be found in an HTTP version,
393 * which includes 'H', 'T', 'P', '/', '.' and any digit.
394 */
395const char http_is_ver_token[256] = {
396 ['.'] = 1, ['/'] = 1,
397 ['0'] = 1, ['1'] = 1, ['2'] = 1, ['3'] = 1, ['4'] = 1,
398 ['5'] = 1, ['6'] = 1, ['7'] = 1, ['8'] = 1, ['9'] = 1,
399 ['H'] = 1, ['P'] = 1, ['T'] = 1,
400};
401
402
Willy Tarreau4af6f3a2007-03-18 22:36:26 +0100403/*
Willy Tarreaue988a792010-01-04 21:13:14 +0100404 * Silent debug that outputs only in strace, using fd #-1. Trash is modified.
405 */
406#if defined(DEBUG_FSM)
407static void http_silent_debug(int line, struct session *s)
408{
Willy Tarreau19d14ef2012-10-29 16:51:55 +0100409 chunk_printf(&trash,
410 "[%04d] req: p=%d(%d) s=%d bf=%08x an=%08x data=%p size=%d l=%d w=%p r=%p o=%p sm=%d fw=%ld tf=%08x\n",
411 line,
412 s->si[0].state, s->si[0].fd, s->txn.req.msg_state, s->req->flags, s->req->analysers,
413 s->req->buf->data, s->req->buf->size, s->req->l, s->req->w, s->req->r, s->req->buf->p, s->req->buf->o, s->req->to_forward, s->txn.flags);
414 write(-1, trash.str, trash.len);
Willy Tarreaue988a792010-01-04 21:13:14 +0100415
Willy Tarreau19d14ef2012-10-29 16:51:55 +0100416 chunk_printf(&trash,
417 " %04d rep: p=%d(%d) s=%d bf=%08x an=%08x data=%p size=%d l=%d w=%p r=%p o=%p sm=%d fw=%ld\n",
418 line,
419 s->si[1].state, s->si[1].fd, s->txn.rsp.msg_state, s->rep->flags, s->rep->analysers,
420 s->rep->buf->data, s->rep->buf->size, s->rep->l, s->rep->w, s->rep->r, s->rep->buf->p, s->rep->buf->o, s->rep->to_forward);
421 write(-1, trash.str, trash.len);
Willy Tarreaue988a792010-01-04 21:13:14 +0100422}
423#else
424#define http_silent_debug(l,s) do { } while (0)
425#endif
426
427/*
Willy Tarreau6acf7c92012-03-09 13:30:45 +0100428 * Adds a header and its CRLF at the tail of the message's buffer, just before
429 * the last CRLF. Text length is measured first, so it cannot be NULL.
Willy Tarreau4af6f3a2007-03-18 22:36:26 +0100430 * The header is also automatically added to the index <hdr_idx>, and the end
431 * of headers is automatically adjusted. The number of bytes added is returned
432 * on success, otherwise <0 is returned indicating an error.
433 */
Willy Tarreau6acf7c92012-03-09 13:30:45 +0100434int http_header_add_tail(struct http_msg *msg, struct hdr_idx *hdr_idx, const char *text)
Willy Tarreau4af6f3a2007-03-18 22:36:26 +0100435{
436 int bytes, len;
437
438 len = strlen(text);
Willy Tarreau9b28e032012-10-12 23:49:43 +0200439 bytes = buffer_insert_line2(msg->chn->buf, msg->chn->buf->p + msg->eoh, text, len);
Willy Tarreau4af6f3a2007-03-18 22:36:26 +0100440 if (!bytes)
441 return -1;
Willy Tarreaufa355d42009-11-29 18:12:29 +0100442 http_msg_move_end(msg, bytes);
Willy Tarreau4af6f3a2007-03-18 22:36:26 +0100443 return hdr_idx_add(len, 1, hdr_idx, hdr_idx->tail);
444}
445
446/*
Willy Tarreau6acf7c92012-03-09 13:30:45 +0100447 * Adds a header and its CRLF at the tail of the message's buffer, just before
448 * the last CRLF. <len> bytes are copied, not counting the CRLF. If <text> is NULL, then
Willy Tarreau4af6f3a2007-03-18 22:36:26 +0100449 * the buffer is only opened and the space reserved, but nothing is copied.
450 * The header is also automatically added to the index <hdr_idx>, and the end
451 * of headers is automatically adjusted. The number of bytes added is returned
452 * on success, otherwise <0 is returned indicating an error.
453 */
Willy Tarreau6acf7c92012-03-09 13:30:45 +0100454int http_header_add_tail2(struct http_msg *msg,
455 struct hdr_idx *hdr_idx, const char *text, int len)
Willy Tarreau4af6f3a2007-03-18 22:36:26 +0100456{
457 int bytes;
458
Willy Tarreau9b28e032012-10-12 23:49:43 +0200459 bytes = buffer_insert_line2(msg->chn->buf, msg->chn->buf->p + msg->eoh, text, len);
Willy Tarreau4af6f3a2007-03-18 22:36:26 +0100460 if (!bytes)
461 return -1;
Willy Tarreaufa355d42009-11-29 18:12:29 +0100462 http_msg_move_end(msg, bytes);
Willy Tarreau4af6f3a2007-03-18 22:36:26 +0100463 return hdr_idx_add(len, 1, hdr_idx, hdr_idx->tail);
464}
Willy Tarreaubaaee002006-06-26 02:48:02 +0200465
466/*
Willy Tarreauaa9dce32007-03-18 23:50:16 +0100467 * Checks if <hdr> is exactly <name> for <len> chars, and ends with a colon.
468 * If so, returns the position of the first non-space character relative to
469 * <hdr>, or <end>-<hdr> if not found before. If no value is found, it tries
470 * to return a pointer to the place after the first space. Returns 0 if the
471 * header name does not match. Checks are case-insensitive.
472 */
473int http_header_match2(const char *hdr, const char *end,
474 const char *name, int len)
475{
476 const char *val;
477
478 if (hdr + len >= end)
479 return 0;
480 if (hdr[len] != ':')
481 return 0;
482 if (strncasecmp(hdr, name, len) != 0)
483 return 0;
484 val = hdr + len + 1;
485 while (val < end && HTTP_IS_SPHT(*val))
486 val++;
487 if ((val >= end) && (len + 2 <= end - hdr))
488 return len + 2; /* we may replace starting from second space */
489 return val - hdr;
490}
491
Willy Tarreau04ff9f12013-06-10 18:39:42 +0200492/* Find the first or next occurrence of header <name> in message buffer <sol>
493 * using headers index <idx>, and return it in the <ctx> structure. This
494 * structure holds everything necessary to use the header and find next
495 * occurrence. If its <idx> member is 0, the header is searched from the
496 * beginning. Otherwise, the next occurrence is returned. The function returns
497 * 1 when it finds a value, and 0 when there is no more. It is very similar to
498 * http_find_header2() except that it is designed to work with full-line headers
499 * whose comma is not a delimiter but is part of the syntax. As a special case,
500 * if ctx->val is NULL when searching for a new values of a header, the current
501 * header is rescanned. This allows rescanning after a header deletion.
502 */
503int http_find_full_header2(const char *name, int len,
504 char *sol, struct hdr_idx *idx,
505 struct hdr_ctx *ctx)
506{
507 char *eol, *sov;
508 int cur_idx, old_idx;
509
510 cur_idx = ctx->idx;
511 if (cur_idx) {
512 /* We have previously returned a header, let's search another one */
513 sol = ctx->line;
514 eol = sol + idx->v[cur_idx].len;
515 goto next_hdr;
516 }
517
518 /* first request for this header */
519 sol += hdr_idx_first_pos(idx);
520 old_idx = 0;
521 cur_idx = hdr_idx_first_idx(idx);
522 while (cur_idx) {
523 eol = sol + idx->v[cur_idx].len;
524
525 if (len == 0) {
526 /* No argument was passed, we want any header.
527 * To achieve this, we simply build a fake request. */
528 while (sol + len < eol && sol[len] != ':')
529 len++;
530 name = sol;
531 }
532
533 if ((len < eol - sol) &&
534 (sol[len] == ':') &&
535 (strncasecmp(sol, name, len) == 0)) {
536 ctx->del = len;
537 sov = sol + len + 1;
538 while (sov < eol && http_is_lws[(unsigned char)*sov])
539 sov++;
540
541 ctx->line = sol;
542 ctx->prev = old_idx;
543 ctx->idx = cur_idx;
544 ctx->val = sov - sol;
545 ctx->tws = 0;
546 while (eol > sov && http_is_lws[(unsigned char)*(eol - 1)]) {
547 eol--;
548 ctx->tws++;
549 }
550 ctx->vlen = eol - sov;
551 return 1;
552 }
553 next_hdr:
554 sol = eol + idx->v[cur_idx].cr + 1;
555 old_idx = cur_idx;
556 cur_idx = idx->v[cur_idx].next;
557 }
558 return 0;
559}
560
Willy Tarreau68085d82010-01-18 14:54:04 +0100561/* Find the end of the header value contained between <s> and <e>. See RFC2616,
562 * par 2.2 for more information. Note that it requires a valid header to return
563 * a valid result. This works for headers defined as comma-separated lists.
Willy Tarreau33a7e692007-06-10 19:45:56 +0200564 */
Willy Tarreau68085d82010-01-18 14:54:04 +0100565char *find_hdr_value_end(char *s, const char *e)
Willy Tarreau33a7e692007-06-10 19:45:56 +0200566{
567 int quoted, qdpair;
568
569 quoted = qdpair = 0;
570 for (; s < e; s++) {
571 if (qdpair) qdpair = 0;
Willy Tarreau0f7f51f2010-08-30 11:06:34 +0200572 else if (quoted) {
573 if (*s == '\\') qdpair = 1;
574 else if (*s == '"') quoted = 0;
575 }
Willy Tarreau33a7e692007-06-10 19:45:56 +0200576 else if (*s == '"') quoted = 1;
577 else if (*s == ',') return s;
578 }
579 return s;
580}
581
582/* Find the first or next occurrence of header <name> in message buffer <sol>
583 * using headers index <idx>, and return it in the <ctx> structure. This
584 * structure holds everything necessary to use the header and find next
585 * occurrence. If its <idx> member is 0, the header is searched from the
586 * beginning. Otherwise, the next occurrence is returned. The function returns
Willy Tarreau68085d82010-01-18 14:54:04 +0100587 * 1 when it finds a value, and 0 when there is no more. It is designed to work
588 * with headers defined as comma-separated lists. As a special case, if ctx->val
589 * is NULL when searching for a new values of a header, the current header is
590 * rescanned. This allows rescanning after a header deletion.
Willy Tarreau33a7e692007-06-10 19:45:56 +0200591 */
592int http_find_header2(const char *name, int len,
Willy Tarreau68085d82010-01-18 14:54:04 +0100593 char *sol, struct hdr_idx *idx,
Willy Tarreau33a7e692007-06-10 19:45:56 +0200594 struct hdr_ctx *ctx)
595{
Willy Tarreau68085d82010-01-18 14:54:04 +0100596 char *eol, *sov;
597 int cur_idx, old_idx;
Willy Tarreau33a7e692007-06-10 19:45:56 +0200598
Willy Tarreau68085d82010-01-18 14:54:04 +0100599 cur_idx = ctx->idx;
600 if (cur_idx) {
Willy Tarreau33a7e692007-06-10 19:45:56 +0200601 /* We have previously returned a value, let's search
602 * another one on the same line.
603 */
Willy Tarreau33a7e692007-06-10 19:45:56 +0200604 sol = ctx->line;
Willy Tarreau588bd4f2011-09-01 22:22:28 +0200605 ctx->del = ctx->val + ctx->vlen + ctx->tws;
Willy Tarreau68085d82010-01-18 14:54:04 +0100606 sov = sol + ctx->del;
Willy Tarreau33a7e692007-06-10 19:45:56 +0200607 eol = sol + idx->v[cur_idx].len;
608
609 if (sov >= eol)
610 /* no more values in this header */
611 goto next_hdr;
612
Willy Tarreau68085d82010-01-18 14:54:04 +0100613 /* values remaining for this header, skip the comma but save it
614 * for later use (eg: for header deletion).
615 */
Willy Tarreau33a7e692007-06-10 19:45:56 +0200616 sov++;
617 while (sov < eol && http_is_lws[(unsigned char)*sov])
618 sov++;
619
620 goto return_hdr;
621 }
622
623 /* first request for this header */
624 sol += hdr_idx_first_pos(idx);
Willy Tarreau68085d82010-01-18 14:54:04 +0100625 old_idx = 0;
Willy Tarreau33a7e692007-06-10 19:45:56 +0200626 cur_idx = hdr_idx_first_idx(idx);
Willy Tarreau33a7e692007-06-10 19:45:56 +0200627 while (cur_idx) {
628 eol = sol + idx->v[cur_idx].len;
629
Willy Tarreau1ad7c6d2007-06-10 21:42:55 +0200630 if (len == 0) {
631 /* No argument was passed, we want any header.
632 * To achieve this, we simply build a fake request. */
633 while (sol + len < eol && sol[len] != ':')
634 len++;
635 name = sol;
636 }
637
Willy Tarreau33a7e692007-06-10 19:45:56 +0200638 if ((len < eol - sol) &&
639 (sol[len] == ':') &&
640 (strncasecmp(sol, name, len) == 0)) {
Willy Tarreau68085d82010-01-18 14:54:04 +0100641 ctx->del = len;
Willy Tarreau33a7e692007-06-10 19:45:56 +0200642 sov = sol + len + 1;
643 while (sov < eol && http_is_lws[(unsigned char)*sov])
644 sov++;
Willy Tarreau68085d82010-01-18 14:54:04 +0100645
Willy Tarreau33a7e692007-06-10 19:45:56 +0200646 ctx->line = sol;
Willy Tarreau68085d82010-01-18 14:54:04 +0100647 ctx->prev = old_idx;
648 return_hdr:
Willy Tarreau33a7e692007-06-10 19:45:56 +0200649 ctx->idx = cur_idx;
650 ctx->val = sov - sol;
651
652 eol = find_hdr_value_end(sov, eol);
Willy Tarreau588bd4f2011-09-01 22:22:28 +0200653 ctx->tws = 0;
Willy Tarreau275600b2011-09-16 08:11:26 +0200654 while (eol > sov && http_is_lws[(unsigned char)*(eol - 1)]) {
Willy Tarreau588bd4f2011-09-01 22:22:28 +0200655 eol--;
656 ctx->tws++;
657 }
Willy Tarreau33a7e692007-06-10 19:45:56 +0200658 ctx->vlen = eol - sov;
659 return 1;
660 }
661 next_hdr:
662 sol = eol + idx->v[cur_idx].cr + 1;
Willy Tarreau68085d82010-01-18 14:54:04 +0100663 old_idx = cur_idx;
Willy Tarreau33a7e692007-06-10 19:45:56 +0200664 cur_idx = idx->v[cur_idx].next;
665 }
666 return 0;
667}
668
669int http_find_header(const char *name,
Willy Tarreau68085d82010-01-18 14:54:04 +0100670 char *sol, struct hdr_idx *idx,
Willy Tarreau33a7e692007-06-10 19:45:56 +0200671 struct hdr_ctx *ctx)
672{
673 return http_find_header2(name, strlen(name), sol, idx, ctx);
674}
675
Willy Tarreau68085d82010-01-18 14:54:04 +0100676/* Remove one value of a header. This only works on a <ctx> returned by one of
677 * the http_find_header functions. The value is removed, as well as surrounding
678 * commas if any. If the removed value was alone, the whole header is removed.
Willy Tarreau6acf7c92012-03-09 13:30:45 +0100679 * The ctx is always updated accordingly, as well as the buffer and HTTP
Willy Tarreau68085d82010-01-18 14:54:04 +0100680 * message <msg>. The new index is returned. If it is zero, it means there is
681 * no more header, so any processing may stop. The ctx is always left in a form
682 * that can be handled by http_find_header2() to find next occurrence.
683 */
Willy Tarreau6acf7c92012-03-09 13:30:45 +0100684int http_remove_header2(struct http_msg *msg, struct hdr_idx *idx, struct hdr_ctx *ctx)
Willy Tarreau68085d82010-01-18 14:54:04 +0100685{
686 int cur_idx = ctx->idx;
687 char *sol = ctx->line;
688 struct hdr_idx_elem *hdr;
689 int delta, skip_comma;
690
691 if (!cur_idx)
692 return 0;
693
694 hdr = &idx->v[cur_idx];
Willy Tarreau588bd4f2011-09-01 22:22:28 +0200695 if (sol[ctx->del] == ':' && ctx->val + ctx->vlen + ctx->tws == hdr->len) {
Willy Tarreau68085d82010-01-18 14:54:04 +0100696 /* This was the only value of the header, we must now remove it entirely. */
Willy Tarreau9b28e032012-10-12 23:49:43 +0200697 delta = buffer_replace2(msg->chn->buf, sol, sol + hdr->len + hdr->cr + 1, NULL, 0);
Willy Tarreau68085d82010-01-18 14:54:04 +0100698 http_msg_move_end(msg, delta);
699 idx->used--;
700 hdr->len = 0; /* unused entry */
701 idx->v[ctx->prev].next = idx->v[ctx->idx].next;
Willy Tarreau5c4784f2011-02-12 13:07:35 +0100702 if (idx->tail == ctx->idx)
703 idx->tail = ctx->prev;
Willy Tarreau68085d82010-01-18 14:54:04 +0100704 ctx->idx = ctx->prev; /* walk back to the end of previous header */
705 ctx->line -= idx->v[ctx->idx].len + idx->v[cur_idx].cr + 1;
706 ctx->val = idx->v[ctx->idx].len; /* point to end of previous header */
Willy Tarreau588bd4f2011-09-01 22:22:28 +0200707 ctx->tws = ctx->vlen = 0;
Willy Tarreau68085d82010-01-18 14:54:04 +0100708 return ctx->idx;
709 }
710
711 /* This was not the only value of this header. We have to remove between
Willy Tarreau588bd4f2011-09-01 22:22:28 +0200712 * ctx->del+1 and ctx->val+ctx->vlen+ctx->tws+1 included. If it is the
713 * last entry of the list, we remove the last separator.
Willy Tarreau68085d82010-01-18 14:54:04 +0100714 */
715
Willy Tarreau588bd4f2011-09-01 22:22:28 +0200716 skip_comma = (ctx->val + ctx->vlen + ctx->tws == hdr->len) ? 0 : 1;
Willy Tarreau9b28e032012-10-12 23:49:43 +0200717 delta = buffer_replace2(msg->chn->buf, sol + ctx->del + skip_comma,
Willy Tarreau588bd4f2011-09-01 22:22:28 +0200718 sol + ctx->val + ctx->vlen + ctx->tws + skip_comma,
Willy Tarreau68085d82010-01-18 14:54:04 +0100719 NULL, 0);
720 hdr->len += delta;
721 http_msg_move_end(msg, delta);
722 ctx->val = ctx->del;
Willy Tarreau588bd4f2011-09-01 22:22:28 +0200723 ctx->tws = ctx->vlen = 0;
Willy Tarreau68085d82010-01-18 14:54:04 +0100724 return ctx->idx;
725}
726
Willy Tarreau2d3d94c2008-11-30 20:20:08 +0100727/* This function handles a server error at the stream interface level. The
728 * stream interface is assumed to be already in a closed state. An optional
729 * message is copied into the input buffer, and an HTTP status code stored.
730 * The error flags are set to the values in arguments. Any pending request
Willy Tarreau6f0aa472009-03-08 20:33:29 +0100731 * in this buffer will be lost.
Willy Tarreaubaaee002006-06-26 02:48:02 +0200732 */
Willy Tarreau2d3d94c2008-11-30 20:20:08 +0100733static void http_server_error(struct session *t, struct stream_interface *si,
734 int err, int finst, int status, const struct chunk *msg)
Willy Tarreaubaaee002006-06-26 02:48:02 +0200735{
Willy Tarreau8263d2b2012-08-28 00:06:31 +0200736 channel_auto_read(si->ob);
737 channel_abort(si->ob);
738 channel_auto_close(si->ob);
739 channel_erase(si->ob);
740 channel_auto_close(si->ib);
741 channel_auto_read(si->ib);
Willy Tarreau0f772532006-12-23 20:51:41 +0100742 if (status > 0 && msg) {
Willy Tarreau3bac9ff2007-03-18 17:31:28 +0100743 t->txn.status = status;
Willy Tarreau9dab5fc2012-05-07 11:56:55 +0200744 bo_inject(si->ib, msg->str, msg->len);
Willy Tarreaubaaee002006-06-26 02:48:02 +0200745 }
746 if (!(t->flags & SN_ERR_MASK))
747 t->flags |= err;
748 if (!(t->flags & SN_FINST_MASK))
749 t->flags |= finst;
750}
751
Willy Tarreau80587432006-12-24 17:47:20 +0100752/* This function returns the appropriate error location for the given session
753 * and message.
754 */
755
Willy Tarreau783f2582012-09-04 12:19:04 +0200756struct chunk *http_error_message(struct session *s, int msgnum)
Willy Tarreau80587432006-12-24 17:47:20 +0100757{
Willy Tarreaue2e27a52007-04-01 00:01:37 +0200758 if (s->be->errmsg[msgnum].str)
759 return &s->be->errmsg[msgnum];
Willy Tarreau80587432006-12-24 17:47:20 +0100760 else if (s->fe->errmsg[msgnum].str)
761 return &s->fe->errmsg[msgnum];
762 else
763 return &http_err_chunks[msgnum];
764}
Willy Tarreaubaaee002006-06-26 02:48:02 +0200765
Willy Tarreau53b6c742006-12-17 13:37:46 +0100766/*
767 * returns HTTP_METH_NONE if there is nothing valid to read (empty or non-text
768 * string), HTTP_METH_OTHER for unknown methods, or the identified method.
769 */
Willy Tarreauc8987b32013-12-06 23:43:17 +0100770static enum http_meth_t find_http_meth(const char *str, const int len)
Willy Tarreau53b6c742006-12-17 13:37:46 +0100771{
772 unsigned char m;
Willy Tarreau8d5d7f22007-01-21 19:16:41 +0100773 const struct http_method_desc *h;
Willy Tarreau53b6c742006-12-17 13:37:46 +0100774
775 m = ((unsigned)*str - 'A');
776
777 if (m < 26) {
Willy Tarreau8d5d7f22007-01-21 19:16:41 +0100778 for (h = http_methods[m]; h->len > 0; h++) {
779 if (unlikely(h->len != len))
Willy Tarreau53b6c742006-12-17 13:37:46 +0100780 continue;
Willy Tarreau8d5d7f22007-01-21 19:16:41 +0100781 if (likely(memcmp(str, h->text, h->len) == 0))
Willy Tarreau53b6c742006-12-17 13:37:46 +0100782 return h->meth;
Willy Tarreau53b6c742006-12-17 13:37:46 +0100783 };
784 return HTTP_METH_OTHER;
785 }
786 return HTTP_METH_NONE;
787
788}
789
Willy Tarreau21d2af32008-02-14 20:25:24 +0100790/* Parse the URI from the given transaction (which is assumed to be in request
791 * phase) and look for the "/" beginning the PATH. If not found, return NULL.
792 * It is returned otherwise.
793 */
794static char *
795http_get_path(struct http_txn *txn)
796{
797 char *ptr, *end;
798
Willy Tarreau9b28e032012-10-12 23:49:43 +0200799 ptr = txn->req.chn->buf->p + txn->req.sl.rq.u;
Willy Tarreau21d2af32008-02-14 20:25:24 +0100800 end = ptr + txn->req.sl.rq.u_l;
801
802 if (ptr >= end)
803 return NULL;
804
805 /* RFC2616, par. 5.1.2 :
806 * Request-URI = "*" | absuri | abspath | authority
807 */
808
809 if (*ptr == '*')
810 return NULL;
811
812 if (isalpha((unsigned char)*ptr)) {
813 /* this is a scheme as described by RFC3986, par. 3.1 */
814 ptr++;
815 while (ptr < end &&
816 (isalnum((unsigned char)*ptr) || *ptr == '+' || *ptr == '-' || *ptr == '.'))
817 ptr++;
818 /* skip '://' */
819 if (ptr == end || *ptr++ != ':')
820 return NULL;
821 if (ptr == end || *ptr++ != '/')
822 return NULL;
823 if (ptr == end || *ptr++ != '/')
824 return NULL;
825 }
826 /* skip [user[:passwd]@]host[:[port]] */
827
828 while (ptr < end && *ptr != '/')
829 ptr++;
830
831 if (ptr == end)
832 return NULL;
833
834 /* OK, we got the '/' ! */
835 return ptr;
836}
837
Willy Tarreau71241ab2012-12-27 11:30:54 +0100838/* Returns a 302 for a redirectable request that reaches a server working in
839 * in redirect mode. This may only be called just after the stream interface
840 * has moved to SI_ST_ASS. Unprocessable requests are left unchanged and will
841 * follow normal proxy processing. NOTE: this function is designed to support
842 * being called once data are scheduled for forwarding.
Willy Tarreauefb453c2008-10-26 20:49:47 +0100843 */
Willy Tarreau71241ab2012-12-27 11:30:54 +0100844void http_perform_server_redirect(struct session *s, struct stream_interface *si)
Willy Tarreauefb453c2008-10-26 20:49:47 +0100845{
846 struct http_txn *txn;
Willy Tarreau827aee92011-03-10 16:55:02 +0100847 struct server *srv;
Willy Tarreauefb453c2008-10-26 20:49:47 +0100848 char *path;
Willy Tarreaucde18fc2012-05-30 07:59:54 +0200849 int len, rewind;
Willy Tarreauefb453c2008-10-26 20:49:47 +0100850
851 /* 1: create the response header */
Willy Tarreau19d14ef2012-10-29 16:51:55 +0100852 trash.len = strlen(HTTP_302);
853 memcpy(trash.str, HTTP_302, trash.len);
Willy Tarreauefb453c2008-10-26 20:49:47 +0100854
Willy Tarreau3fdb3662012-11-12 00:42:33 +0100855 srv = objt_server(s->target);
Willy Tarreau827aee92011-03-10 16:55:02 +0100856
Willy Tarreauefb453c2008-10-26 20:49:47 +0100857 /* 2: add the server's prefix */
Willy Tarreau19d14ef2012-10-29 16:51:55 +0100858 if (trash.len + srv->rdr_len > trash.size)
Willy Tarreauefb453c2008-10-26 20:49:47 +0100859 return;
860
Willy Tarreaudcb75c42010-01-10 00:24:22 +0100861 /* special prefix "/" means don't change URL */
Willy Tarreau827aee92011-03-10 16:55:02 +0100862 if (srv->rdr_len != 1 || *srv->rdr_pfx != '/') {
Willy Tarreau19d14ef2012-10-29 16:51:55 +0100863 memcpy(trash.str + trash.len, srv->rdr_pfx, srv->rdr_len);
864 trash.len += srv->rdr_len;
Willy Tarreaudcb75c42010-01-10 00:24:22 +0100865 }
Willy Tarreauefb453c2008-10-26 20:49:47 +0100866
Willy Tarreaucde18fc2012-05-30 07:59:54 +0200867 /* 3: add the request URI. Since it was already forwarded, we need
868 * to temporarily rewind the buffer.
869 */
Willy Tarreauefb453c2008-10-26 20:49:47 +0100870 txn = &s->txn;
Willy Tarreau9b28e032012-10-12 23:49:43 +0200871 b_rew(s->req->buf, rewind = s->req->buf->o);
Willy Tarreaucde18fc2012-05-30 07:59:54 +0200872
Willy Tarreauefb453c2008-10-26 20:49:47 +0100873 path = http_get_path(txn);
Willy Tarreau9b28e032012-10-12 23:49:43 +0200874 len = buffer_count(s->req->buf, path, b_ptr(s->req->buf, txn->req.sl.rq.u + txn->req.sl.rq.u_l));
Willy Tarreaucde18fc2012-05-30 07:59:54 +0200875
Willy Tarreau9b28e032012-10-12 23:49:43 +0200876 b_adv(s->req->buf, rewind);
Willy Tarreaucde18fc2012-05-30 07:59:54 +0200877
Willy Tarreauefb453c2008-10-26 20:49:47 +0100878 if (!path)
879 return;
880
Willy Tarreau19d14ef2012-10-29 16:51:55 +0100881 if (trash.len + len > trash.size - 4) /* 4 for CRLF-CRLF */
Willy Tarreauefb453c2008-10-26 20:49:47 +0100882 return;
883
Willy Tarreau19d14ef2012-10-29 16:51:55 +0100884 memcpy(trash.str + trash.len, path, len);
885 trash.len += len;
Willy Tarreau88d349d2010-01-25 12:15:43 +0100886
887 if (unlikely(txn->flags & TX_USE_PX_CONN)) {
Willy Tarreau19d14ef2012-10-29 16:51:55 +0100888 memcpy(trash.str + trash.len, "\r\nProxy-Connection: close\r\n\r\n", 29);
889 trash.len += 29;
Willy Tarreau88d349d2010-01-25 12:15:43 +0100890 } else {
Willy Tarreau19d14ef2012-10-29 16:51:55 +0100891 memcpy(trash.str + trash.len, "\r\nConnection: close\r\n\r\n", 23);
892 trash.len += 23;
Willy Tarreau88d349d2010-01-25 12:15:43 +0100893 }
Willy Tarreauefb453c2008-10-26 20:49:47 +0100894
895 /* prepare to return without error. */
Willy Tarreau73b013b2012-05-21 16:31:45 +0200896 si_shutr(si);
897 si_shutw(si);
Willy Tarreauefb453c2008-10-26 20:49:47 +0100898 si->err_type = SI_ET_NONE;
Willy Tarreauefb453c2008-10-26 20:49:47 +0100899 si->state = SI_ST_CLO;
900
901 /* send the message */
Willy Tarreau570f2212013-06-10 16:42:09 +0200902 http_server_error(s, si, SN_ERR_LOCAL, SN_FINST_C, 302, &trash);
Willy Tarreauefb453c2008-10-26 20:49:47 +0100903
904 /* FIXME: we should increase a counter of redirects per server and per backend. */
Willy Tarreau4521ba62013-01-24 01:25:25 +0100905 srv_inc_sess_ctr(srv);
Willy Tarreauefb453c2008-10-26 20:49:47 +0100906}
907
Willy Tarreau0cac36f2008-11-30 20:44:17 +0100908/* Return the error message corresponding to si->err_type. It is assumed
Willy Tarreauefb453c2008-10-26 20:49:47 +0100909 * that the server side is closed. Note that err_type is actually a
910 * bitmask, where almost only aborts may be cumulated with other
911 * values. We consider that aborted operations are more important
912 * than timeouts or errors due to the fact that nobody else in the
913 * logs might explain incomplete retries. All others should avoid
914 * being cumulated. It should normally not be possible to have multiple
915 * aborts at once, but just in case, the first one in sequence is reported.
916 */
Willy Tarreau0cac36f2008-11-30 20:44:17 +0100917void http_return_srv_error(struct session *s, struct stream_interface *si)
Willy Tarreauefb453c2008-10-26 20:49:47 +0100918{
Willy Tarreau0cac36f2008-11-30 20:44:17 +0100919 int err_type = si->err_type;
Willy Tarreauefb453c2008-10-26 20:49:47 +0100920
921 if (err_type & SI_ET_QUEUE_ABRT)
Willy Tarreau2d3d94c2008-11-30 20:20:08 +0100922 http_server_error(s, si, SN_ERR_CLICL, SN_FINST_Q,
Willy Tarreau783f2582012-09-04 12:19:04 +0200923 503, http_error_message(s, HTTP_ERR_503));
Willy Tarreauefb453c2008-10-26 20:49:47 +0100924 else if (err_type & SI_ET_CONN_ABRT)
Willy Tarreau2d3d94c2008-11-30 20:20:08 +0100925 http_server_error(s, si, SN_ERR_CLICL, SN_FINST_C,
Willy Tarreau783f2582012-09-04 12:19:04 +0200926 503, http_error_message(s, HTTP_ERR_503));
Willy Tarreauefb453c2008-10-26 20:49:47 +0100927 else if (err_type & SI_ET_QUEUE_TO)
Willy Tarreau2d3d94c2008-11-30 20:20:08 +0100928 http_server_error(s, si, SN_ERR_SRVTO, SN_FINST_Q,
Willy Tarreau783f2582012-09-04 12:19:04 +0200929 503, http_error_message(s, HTTP_ERR_503));
Willy Tarreauefb453c2008-10-26 20:49:47 +0100930 else if (err_type & SI_ET_QUEUE_ERR)
Willy Tarreau2d3d94c2008-11-30 20:20:08 +0100931 http_server_error(s, si, SN_ERR_SRVCL, SN_FINST_Q,
Willy Tarreau783f2582012-09-04 12:19:04 +0200932 503, http_error_message(s, HTTP_ERR_503));
Willy Tarreauefb453c2008-10-26 20:49:47 +0100933 else if (err_type & SI_ET_CONN_TO)
Willy Tarreau2d3d94c2008-11-30 20:20:08 +0100934 http_server_error(s, si, SN_ERR_SRVTO, SN_FINST_C,
Willy Tarreau783f2582012-09-04 12:19:04 +0200935 503, http_error_message(s, HTTP_ERR_503));
Willy Tarreauefb453c2008-10-26 20:49:47 +0100936 else if (err_type & SI_ET_CONN_ERR)
Willy Tarreau2d3d94c2008-11-30 20:20:08 +0100937 http_server_error(s, si, SN_ERR_SRVCL, SN_FINST_C,
Willy Tarreau783f2582012-09-04 12:19:04 +0200938 503, http_error_message(s, HTTP_ERR_503));
Willy Tarreau2d400bb2012-05-14 12:11:47 +0200939 else if (err_type & SI_ET_CONN_RES)
940 http_server_error(s, si, SN_ERR_RESOURCE, SN_FINST_C,
941 503, http_error_message(s, HTTP_ERR_503));
Willy Tarreauefb453c2008-10-26 20:49:47 +0100942 else /* SI_ET_CONN_OTHER and others */
Willy Tarreau2d3d94c2008-11-30 20:20:08 +0100943 http_server_error(s, si, SN_ERR_INTERNAL, SN_FINST_C,
Willy Tarreau783f2582012-09-04 12:19:04 +0200944 500, http_error_message(s, HTTP_ERR_500));
Willy Tarreauefb453c2008-10-26 20:49:47 +0100945}
946
Willy Tarreau42250582007-04-01 01:30:43 +0200947extern const char sess_term_cond[8];
948extern const char sess_fin_state[8];
949extern const char *monthname[12];
Willy Tarreau332f8bf2007-05-13 21:36:56 +0200950struct pool_head *pool2_requri;
Willy Tarreau193b8c62012-11-22 00:17:38 +0100951struct pool_head *pool2_capture = NULL;
William Lallemanda73203e2012-03-12 12:48:57 +0100952struct pool_head *pool2_uniqueid;
Willy Tarreau8d5d7f22007-01-21 19:16:41 +0100953
Willy Tarreau117f59e2007-03-04 18:17:17 +0100954/*
955 * Capture headers from message starting at <som> according to header list
956 * <cap_hdr>, and fill the <idx> structure appropriately.
957 */
958void capture_headers(char *som, struct hdr_idx *idx,
959 char **cap, struct cap_hdr *cap_hdr)
960{
961 char *eol, *sol, *col, *sov;
962 int cur_idx;
963 struct cap_hdr *h;
964 int len;
965
966 sol = som + hdr_idx_first_pos(idx);
967 cur_idx = hdr_idx_first_idx(idx);
968
969 while (cur_idx) {
970 eol = sol + idx->v[cur_idx].len;
971
972 col = sol;
973 while (col < eol && *col != ':')
974 col++;
975
976 sov = col + 1;
977 while (sov < eol && http_is_lws[(unsigned char)*sov])
978 sov++;
979
980 for (h = cap_hdr; h; h = h->next) {
981 if ((h->namelen == col - sol) &&
982 (strncasecmp(sol, h->name, h->namelen) == 0)) {
983 if (cap[h->index] == NULL)
984 cap[h->index] =
Willy Tarreaucf7f3202007-05-13 22:46:04 +0200985 pool_alloc2(h->pool);
Willy Tarreau117f59e2007-03-04 18:17:17 +0100986
987 if (cap[h->index] == NULL) {
988 Alert("HTTP capture : out of memory.\n");
989 continue;
990 }
991
992 len = eol - sov;
993 if (len > h->len)
994 len = h->len;
995
996 memcpy(cap[h->index], sov, len);
997 cap[h->index][len]=0;
998 }
999 }
1000 sol = eol + idx->v[cur_idx].cr + 1;
1001 cur_idx = idx->v[cur_idx].next;
1002 }
1003}
1004
1005
Willy Tarreau42250582007-04-01 01:30:43 +02001006/* either we find an LF at <ptr> or we jump to <bad>.
1007 */
1008#define EXPECT_LF_HERE(ptr, bad) do { if (unlikely(*(ptr) != '\n')) goto bad; } while (0)
1009
1010/* plays with variables <ptr>, <end> and <state>. Jumps to <good> if OK,
1011 * otherwise to <http_msg_ood> with <state> set to <st>.
1012 */
1013#define EAT_AND_JUMP_OR_RETURN(good, st) do { \
1014 ptr++; \
1015 if (likely(ptr < end)) \
1016 goto good; \
1017 else { \
1018 state = (st); \
1019 goto http_msg_ood; \
1020 } \
1021 } while (0)
1022
1023
Willy Tarreaubaaee002006-06-26 02:48:02 +02001024/*
Willy Tarreaua15645d2007-03-18 16:22:39 +01001025 * This function parses a status line between <ptr> and <end>, starting with
Willy Tarreau8973c702007-01-21 23:58:29 +01001026 * parser state <state>. Only states HTTP_MSG_RPVER, HTTP_MSG_RPVER_SP,
1027 * HTTP_MSG_RPCODE, HTTP_MSG_RPCODE_SP and HTTP_MSG_RPREASON are handled. Others
1028 * will give undefined results.
1029 * Note that it is upon the caller's responsibility to ensure that ptr < end,
1030 * and that msg->sol points to the beginning of the response.
1031 * If a complete line is found (which implies that at least one CR or LF is
1032 * found before <end>, the updated <ptr> is returned, otherwise NULL is
1033 * returned indicating an incomplete line (which does not mean that parts have
1034 * not been updated). In the incomplete case, if <ret_ptr> or <ret_state> are
1035 * non-NULL, they are fed with the new <ptr> and <state> values to be passed
1036 * upon next call.
1037 *
Willy Tarreau9cdde232007-05-02 20:58:19 +02001038 * This function was intentionally designed to be called from
Willy Tarreau8973c702007-01-21 23:58:29 +01001039 * http_msg_analyzer() with the lowest overhead. It should integrate perfectly
1040 * within its state machine and use the same macros, hence the need for same
Willy Tarreau9cdde232007-05-02 20:58:19 +02001041 * labels and variable names. Note that msg->sol is left unchanged.
Willy Tarreau8973c702007-01-21 23:58:29 +01001042 */
Willy Tarreau69d8c5d2012-05-08 09:44:41 +02001043const char *http_parse_stsline(struct http_msg *msg,
Willy Tarreau3770f232013-12-07 00:01:53 +01001044 enum ht_state state, const char *ptr, const char *end,
1045 unsigned int *ret_ptr, enum ht_state *ret_state)
Willy Tarreau8973c702007-01-21 23:58:29 +01001046{
Willy Tarreau9b28e032012-10-12 23:49:43 +02001047 const char *msg_start = msg->chn->buf->p;
Willy Tarreau62f791e2012-03-09 11:32:30 +01001048
Willy Tarreau8973c702007-01-21 23:58:29 +01001049 switch (state) {
Willy Tarreau8973c702007-01-21 23:58:29 +01001050 case HTTP_MSG_RPVER:
Willy Tarreaue3f284a2010-09-28 19:42:42 +02001051 http_msg_rpver:
Willy Tarreau4b89ad42007-03-04 18:13:58 +01001052 if (likely(HTTP_IS_VER_TOKEN(*ptr)))
Willy Tarreau8973c702007-01-21 23:58:29 +01001053 EAT_AND_JUMP_OR_RETURN(http_msg_rpver, HTTP_MSG_RPVER);
1054
1055 if (likely(HTTP_IS_SPHT(*ptr))) {
Willy Tarreauea1175a2012-03-05 15:52:30 +01001056 msg->sl.st.v_l = ptr - msg_start;
Willy Tarreau8973c702007-01-21 23:58:29 +01001057 EAT_AND_JUMP_OR_RETURN(http_msg_rpver_sp, HTTP_MSG_RPVER_SP);
1058 }
Willy Tarreau7552c032009-03-01 11:10:40 +01001059 state = HTTP_MSG_ERROR;
1060 break;
1061
Willy Tarreau8973c702007-01-21 23:58:29 +01001062 case HTTP_MSG_RPVER_SP:
Willy Tarreaue3f284a2010-09-28 19:42:42 +02001063 http_msg_rpver_sp:
Willy Tarreau8973c702007-01-21 23:58:29 +01001064 if (likely(!HTTP_IS_LWS(*ptr))) {
Willy Tarreauea1175a2012-03-05 15:52:30 +01001065 msg->sl.st.c = ptr - msg_start;
Willy Tarreau8973c702007-01-21 23:58:29 +01001066 goto http_msg_rpcode;
1067 }
1068 if (likely(HTTP_IS_SPHT(*ptr)))
1069 EAT_AND_JUMP_OR_RETURN(http_msg_rpver_sp, HTTP_MSG_RPVER_SP);
1070 /* so it's a CR/LF, this is invalid */
Willy Tarreau7552c032009-03-01 11:10:40 +01001071 state = HTTP_MSG_ERROR;
1072 break;
Willy Tarreau8973c702007-01-21 23:58:29 +01001073
Willy Tarreau8973c702007-01-21 23:58:29 +01001074 case HTTP_MSG_RPCODE:
Willy Tarreaue3f284a2010-09-28 19:42:42 +02001075 http_msg_rpcode:
Willy Tarreau8973c702007-01-21 23:58:29 +01001076 if (likely(!HTTP_IS_LWS(*ptr)))
1077 EAT_AND_JUMP_OR_RETURN(http_msg_rpcode, HTTP_MSG_RPCODE);
1078
1079 if (likely(HTTP_IS_SPHT(*ptr))) {
Willy Tarreauea1175a2012-03-05 15:52:30 +01001080 msg->sl.st.c_l = ptr - msg_start - msg->sl.st.c;
Willy Tarreau8973c702007-01-21 23:58:29 +01001081 EAT_AND_JUMP_OR_RETURN(http_msg_rpcode_sp, HTTP_MSG_RPCODE_SP);
1082 }
1083
1084 /* so it's a CR/LF, so there is no reason phrase */
Willy Tarreauea1175a2012-03-05 15:52:30 +01001085 msg->sl.st.c_l = ptr - msg_start - msg->sl.st.c;
Willy Tarreau8973c702007-01-21 23:58:29 +01001086 http_msg_rsp_reason:
1087 /* FIXME: should we support HTTP responses without any reason phrase ? */
Willy Tarreauea1175a2012-03-05 15:52:30 +01001088 msg->sl.st.r = ptr - msg_start;
Willy Tarreau8973c702007-01-21 23:58:29 +01001089 msg->sl.st.r_l = 0;
1090 goto http_msg_rpline_eol;
1091
Willy Tarreau8973c702007-01-21 23:58:29 +01001092 case HTTP_MSG_RPCODE_SP:
Willy Tarreaue3f284a2010-09-28 19:42:42 +02001093 http_msg_rpcode_sp:
Willy Tarreau8973c702007-01-21 23:58:29 +01001094 if (likely(!HTTP_IS_LWS(*ptr))) {
Willy Tarreauea1175a2012-03-05 15:52:30 +01001095 msg->sl.st.r = ptr - msg_start;
Willy Tarreau8973c702007-01-21 23:58:29 +01001096 goto http_msg_rpreason;
1097 }
1098 if (likely(HTTP_IS_SPHT(*ptr)))
1099 EAT_AND_JUMP_OR_RETURN(http_msg_rpcode_sp, HTTP_MSG_RPCODE_SP);
1100 /* so it's a CR/LF, so there is no reason phrase */
1101 goto http_msg_rsp_reason;
1102
Willy Tarreau8973c702007-01-21 23:58:29 +01001103 case HTTP_MSG_RPREASON:
Willy Tarreaue3f284a2010-09-28 19:42:42 +02001104 http_msg_rpreason:
Willy Tarreau8973c702007-01-21 23:58:29 +01001105 if (likely(!HTTP_IS_CRLF(*ptr)))
1106 EAT_AND_JUMP_OR_RETURN(http_msg_rpreason, HTTP_MSG_RPREASON);
Willy Tarreauea1175a2012-03-05 15:52:30 +01001107 msg->sl.st.r_l = ptr - msg_start - msg->sl.st.r;
Willy Tarreau8973c702007-01-21 23:58:29 +01001108 http_msg_rpline_eol:
1109 /* We have seen the end of line. Note that we do not
1110 * necessarily have the \n yet, but at least we know that we
1111 * have EITHER \r OR \n, otherwise the response would not be
1112 * complete. We can then record the response length and return
1113 * to the caller which will be able to register it.
1114 */
Willy Tarreau3a215be2012-03-09 21:39:51 +01001115 msg->sl.st.l = ptr - msg_start - msg->sol;
Willy Tarreau8973c702007-01-21 23:58:29 +01001116 return ptr;
1117
Willy Tarreau8973c702007-01-21 23:58:29 +01001118 default:
Willy Tarreau3770f232013-12-07 00:01:53 +01001119#ifdef DEBUG_FULL
Willy Tarreau8973c702007-01-21 23:58:29 +01001120 fprintf(stderr, "FIXME !!!! impossible state at %s:%d = %d\n", __FILE__, __LINE__, state);
1121 exit(1);
1122#endif
Willy Tarreau3770f232013-12-07 00:01:53 +01001123 ;
Willy Tarreau8973c702007-01-21 23:58:29 +01001124 }
1125
1126 http_msg_ood:
Willy Tarreau7552c032009-03-01 11:10:40 +01001127 /* out of valid data */
Willy Tarreau8973c702007-01-21 23:58:29 +01001128 if (ret_state)
1129 *ret_state = state;
1130 if (ret_ptr)
Willy Tarreaua458b672012-03-05 11:17:50 +01001131 *ret_ptr = ptr - msg_start;
Willy Tarreau8973c702007-01-21 23:58:29 +01001132 return NULL;
Willy Tarreau8973c702007-01-21 23:58:29 +01001133}
1134
Willy Tarreau8973c702007-01-21 23:58:29 +01001135/*
Willy Tarreau8d5d7f22007-01-21 19:16:41 +01001136 * This function parses a request line between <ptr> and <end>, starting with
1137 * parser state <state>. Only states HTTP_MSG_RQMETH, HTTP_MSG_RQMETH_SP,
1138 * HTTP_MSG_RQURI, HTTP_MSG_RQURI_SP and HTTP_MSG_RQVER are handled. Others
1139 * will give undefined results.
1140 * Note that it is upon the caller's responsibility to ensure that ptr < end,
1141 * and that msg->sol points to the beginning of the request.
1142 * If a complete line is found (which implies that at least one CR or LF is
1143 * found before <end>, the updated <ptr> is returned, otherwise NULL is
1144 * returned indicating an incomplete line (which does not mean that parts have
1145 * not been updated). In the incomplete case, if <ret_ptr> or <ret_state> are
1146 * non-NULL, they are fed with the new <ptr> and <state> values to be passed
1147 * upon next call.
1148 *
Willy Tarreau9cdde232007-05-02 20:58:19 +02001149 * This function was intentionally designed to be called from
Willy Tarreau8d5d7f22007-01-21 19:16:41 +01001150 * http_msg_analyzer() with the lowest overhead. It should integrate perfectly
1151 * within its state machine and use the same macros, hence the need for same
Willy Tarreau9cdde232007-05-02 20:58:19 +02001152 * labels and variable names. Note that msg->sol is left unchanged.
Willy Tarreaubaaee002006-06-26 02:48:02 +02001153 */
Willy Tarreau69d8c5d2012-05-08 09:44:41 +02001154const char *http_parse_reqline(struct http_msg *msg,
Willy Tarreau3770f232013-12-07 00:01:53 +01001155 enum ht_state state, const char *ptr, const char *end,
1156 unsigned int *ret_ptr, enum ht_state *ret_state)
Willy Tarreaubaaee002006-06-26 02:48:02 +02001157{
Willy Tarreau9b28e032012-10-12 23:49:43 +02001158 const char *msg_start = msg->chn->buf->p;
Willy Tarreau62f791e2012-03-09 11:32:30 +01001159
Willy Tarreau8d5d7f22007-01-21 19:16:41 +01001160 switch (state) {
Willy Tarreau8d5d7f22007-01-21 19:16:41 +01001161 case HTTP_MSG_RQMETH:
Willy Tarreaue3f284a2010-09-28 19:42:42 +02001162 http_msg_rqmeth:
Willy Tarreau8d5d7f22007-01-21 19:16:41 +01001163 if (likely(HTTP_IS_TOKEN(*ptr)))
1164 EAT_AND_JUMP_OR_RETURN(http_msg_rqmeth, HTTP_MSG_RQMETH);
Willy Tarreau58f10d72006-12-04 02:26:12 +01001165
Willy Tarreau8d5d7f22007-01-21 19:16:41 +01001166 if (likely(HTTP_IS_SPHT(*ptr))) {
Willy Tarreauea1175a2012-03-05 15:52:30 +01001167 msg->sl.rq.m_l = ptr - msg_start;
Willy Tarreau8d5d7f22007-01-21 19:16:41 +01001168 EAT_AND_JUMP_OR_RETURN(http_msg_rqmeth_sp, HTTP_MSG_RQMETH_SP);
1169 }
Willy Tarreau58f10d72006-12-04 02:26:12 +01001170
Willy Tarreau8d5d7f22007-01-21 19:16:41 +01001171 if (likely(HTTP_IS_CRLF(*ptr))) {
1172 /* HTTP 0.9 request */
Willy Tarreauea1175a2012-03-05 15:52:30 +01001173 msg->sl.rq.m_l = ptr - msg_start;
Willy Tarreau8d5d7f22007-01-21 19:16:41 +01001174 http_msg_req09_uri:
Willy Tarreauea1175a2012-03-05 15:52:30 +01001175 msg->sl.rq.u = ptr - msg_start;
Willy Tarreau8d5d7f22007-01-21 19:16:41 +01001176 http_msg_req09_uri_e:
Willy Tarreauea1175a2012-03-05 15:52:30 +01001177 msg->sl.rq.u_l = ptr - msg_start - msg->sl.rq.u;
Willy Tarreau8d5d7f22007-01-21 19:16:41 +01001178 http_msg_req09_ver:
Willy Tarreauea1175a2012-03-05 15:52:30 +01001179 msg->sl.rq.v = ptr - msg_start;
Willy Tarreau8d5d7f22007-01-21 19:16:41 +01001180 msg->sl.rq.v_l = 0;
1181 goto http_msg_rqline_eol;
1182 }
Willy Tarreau7552c032009-03-01 11:10:40 +01001183 state = HTTP_MSG_ERROR;
1184 break;
1185
Willy Tarreau8d5d7f22007-01-21 19:16:41 +01001186 case HTTP_MSG_RQMETH_SP:
Willy Tarreaue3f284a2010-09-28 19:42:42 +02001187 http_msg_rqmeth_sp:
Willy Tarreau8d5d7f22007-01-21 19:16:41 +01001188 if (likely(!HTTP_IS_LWS(*ptr))) {
Willy Tarreauea1175a2012-03-05 15:52:30 +01001189 msg->sl.rq.u = ptr - msg_start;
Willy Tarreau8d5d7f22007-01-21 19:16:41 +01001190 goto http_msg_rquri;
1191 }
1192 if (likely(HTTP_IS_SPHT(*ptr)))
1193 EAT_AND_JUMP_OR_RETURN(http_msg_rqmeth_sp, HTTP_MSG_RQMETH_SP);
1194 /* so it's a CR/LF, meaning an HTTP 0.9 request */
1195 goto http_msg_req09_uri;
Willy Tarreau58f10d72006-12-04 02:26:12 +01001196
Willy Tarreau8d5d7f22007-01-21 19:16:41 +01001197 case HTTP_MSG_RQURI:
Willy Tarreaue3f284a2010-09-28 19:42:42 +02001198 http_msg_rquri:
Willy Tarreau2e9506d2012-01-07 23:22:31 +01001199 if (likely((unsigned char)(*ptr - 33) <= 93)) /* 33 to 126 included */
Willy Tarreau8d5d7f22007-01-21 19:16:41 +01001200 EAT_AND_JUMP_OR_RETURN(http_msg_rquri, HTTP_MSG_RQURI);
Willy Tarreau58f10d72006-12-04 02:26:12 +01001201
Willy Tarreau8d5d7f22007-01-21 19:16:41 +01001202 if (likely(HTTP_IS_SPHT(*ptr))) {
Willy Tarreauea1175a2012-03-05 15:52:30 +01001203 msg->sl.rq.u_l = ptr - msg_start - msg->sl.rq.u;
Willy Tarreau8d5d7f22007-01-21 19:16:41 +01001204 EAT_AND_JUMP_OR_RETURN(http_msg_rquri_sp, HTTP_MSG_RQURI_SP);
1205 }
Willy Tarreau58f10d72006-12-04 02:26:12 +01001206
Willy Tarreau2e9506d2012-01-07 23:22:31 +01001207 if (likely((unsigned char)*ptr >= 128)) {
Willy Tarreau422246e2012-01-07 23:54:13 +01001208 /* non-ASCII chars are forbidden unless option
1209 * accept-invalid-http-request is enabled in the frontend.
1210 * In any case, we capture the faulty char.
Willy Tarreau2e9506d2012-01-07 23:22:31 +01001211 */
Willy Tarreau422246e2012-01-07 23:54:13 +01001212 if (msg->err_pos < -1)
1213 goto invalid_char;
1214 if (msg->err_pos == -1)
Willy Tarreau69d8c5d2012-05-08 09:44:41 +02001215 msg->err_pos = ptr - msg_start;
Willy Tarreau2e9506d2012-01-07 23:22:31 +01001216 EAT_AND_JUMP_OR_RETURN(http_msg_rquri, HTTP_MSG_RQURI);
1217 }
1218
1219 if (likely(HTTP_IS_CRLF(*ptr))) {
1220 /* so it's a CR/LF, meaning an HTTP 0.9 request */
1221 goto http_msg_req09_uri_e;
1222 }
1223
1224 /* OK forbidden chars, 0..31 or 127 */
Willy Tarreau422246e2012-01-07 23:54:13 +01001225 invalid_char:
Willy Tarreau69d8c5d2012-05-08 09:44:41 +02001226 msg->err_pos = ptr - msg_start;
Willy Tarreau2e9506d2012-01-07 23:22:31 +01001227 state = HTTP_MSG_ERROR;
1228 break;
Willy Tarreau58f10d72006-12-04 02:26:12 +01001229
Willy Tarreau8d5d7f22007-01-21 19:16:41 +01001230 case HTTP_MSG_RQURI_SP:
Willy Tarreaue3f284a2010-09-28 19:42:42 +02001231 http_msg_rquri_sp:
Willy Tarreau8d5d7f22007-01-21 19:16:41 +01001232 if (likely(!HTTP_IS_LWS(*ptr))) {
Willy Tarreauea1175a2012-03-05 15:52:30 +01001233 msg->sl.rq.v = ptr - msg_start;
Willy Tarreau8d5d7f22007-01-21 19:16:41 +01001234 goto http_msg_rqver;
1235 }
1236 if (likely(HTTP_IS_SPHT(*ptr)))
1237 EAT_AND_JUMP_OR_RETURN(http_msg_rquri_sp, HTTP_MSG_RQURI_SP);
1238 /* so it's a CR/LF, meaning an HTTP 0.9 request */
1239 goto http_msg_req09_ver;
Willy Tarreau58f10d72006-12-04 02:26:12 +01001240
Willy Tarreau8d5d7f22007-01-21 19:16:41 +01001241 case HTTP_MSG_RQVER:
Willy Tarreaue3f284a2010-09-28 19:42:42 +02001242 http_msg_rqver:
Willy Tarreau4b89ad42007-03-04 18:13:58 +01001243 if (likely(HTTP_IS_VER_TOKEN(*ptr)))
Willy Tarreau8d5d7f22007-01-21 19:16:41 +01001244 EAT_AND_JUMP_OR_RETURN(http_msg_rqver, HTTP_MSG_RQVER);
Willy Tarreau4b89ad42007-03-04 18:13:58 +01001245
1246 if (likely(HTTP_IS_CRLF(*ptr))) {
Willy Tarreauea1175a2012-03-05 15:52:30 +01001247 msg->sl.rq.v_l = ptr - msg_start - msg->sl.rq.v;
Willy Tarreau4b89ad42007-03-04 18:13:58 +01001248 http_msg_rqline_eol:
1249 /* We have seen the end of line. Note that we do not
1250 * necessarily have the \n yet, but at least we know that we
1251 * have EITHER \r OR \n, otherwise the request would not be
1252 * complete. We can then record the request length and return
1253 * to the caller which will be able to register it.
1254 */
Willy Tarreau3a215be2012-03-09 21:39:51 +01001255 msg->sl.rq.l = ptr - msg_start - msg->sol;
Willy Tarreau4b89ad42007-03-04 18:13:58 +01001256 return ptr;
1257 }
1258
1259 /* neither an HTTP_VER token nor a CRLF */
Willy Tarreau7552c032009-03-01 11:10:40 +01001260 state = HTTP_MSG_ERROR;
1261 break;
Willy Tarreau58f10d72006-12-04 02:26:12 +01001262
Willy Tarreau8d5d7f22007-01-21 19:16:41 +01001263 default:
Willy Tarreau3770f232013-12-07 00:01:53 +01001264#ifdef DEBUG_FULL
Willy Tarreau8d5d7f22007-01-21 19:16:41 +01001265 fprintf(stderr, "FIXME !!!! impossible state at %s:%d = %d\n", __FILE__, __LINE__, state);
1266 exit(1);
1267#endif
Willy Tarreau3770f232013-12-07 00:01:53 +01001268 ;
Willy Tarreau8d5d7f22007-01-21 19:16:41 +01001269 }
Willy Tarreau58f10d72006-12-04 02:26:12 +01001270
Willy Tarreau8d5d7f22007-01-21 19:16:41 +01001271 http_msg_ood:
Willy Tarreau7552c032009-03-01 11:10:40 +01001272 /* out of valid data */
Willy Tarreau8d5d7f22007-01-21 19:16:41 +01001273 if (ret_state)
1274 *ret_state = state;
1275 if (ret_ptr)
Willy Tarreaua458b672012-03-05 11:17:50 +01001276 *ret_ptr = ptr - msg_start;
Willy Tarreau8d5d7f22007-01-21 19:16:41 +01001277 return NULL;
Willy Tarreau8d5d7f22007-01-21 19:16:41 +01001278}
Willy Tarreau58f10d72006-12-04 02:26:12 +01001279
Krzysztof Piotr Oledzkif9423ae2010-01-29 19:26:18 +01001280/*
1281 * Returns the data from Authorization header. Function may be called more
1282 * than once so data is stored in txn->auth_data. When no header is found
1283 * or auth method is unknown auth_method is set to HTTP_AUTH_WRONG to avoid
1284 * searching again for something we are unable to find anyway.
1285 */
1286
Willy Tarreau7e2c6472012-10-29 20:44:36 +01001287char *get_http_auth_buff;
Krzysztof Piotr Oledzkif9423ae2010-01-29 19:26:18 +01001288
1289int
1290get_http_auth(struct session *s)
1291{
1292
1293 struct http_txn *txn = &s->txn;
1294 struct chunk auth_method;
1295 struct hdr_ctx ctx;
1296 char *h, *p;
1297 int len;
1298
1299#ifdef DEBUG_AUTH
1300 printf("Auth for session %p: %d\n", s, txn->auth.method);
1301#endif
1302
1303 if (txn->auth.method == HTTP_AUTH_WRONG)
1304 return 0;
1305
1306 if (txn->auth.method)
1307 return 1;
1308
1309 txn->auth.method = HTTP_AUTH_WRONG;
1310
1311 ctx.idx = 0;
Willy Tarreau844a7e72010-01-31 21:46:18 +01001312
1313 if (txn->flags & TX_USE_PX_CONN) {
1314 h = "Proxy-Authorization";
1315 len = strlen(h);
1316 } else {
1317 h = "Authorization";
1318 len = strlen(h);
1319 }
1320
Willy Tarreau9b28e032012-10-12 23:49:43 +02001321 if (!http_find_header2(h, len, s->req->buf->p, &txn->hdr_idx, &ctx))
Krzysztof Piotr Oledzkif9423ae2010-01-29 19:26:18 +01001322 return 0;
1323
1324 h = ctx.line + ctx.val;
1325
1326 p = memchr(h, ' ', ctx.vlen);
1327 if (!p || p == h)
1328 return 0;
1329
1330 chunk_initlen(&auth_method, h, 0, p-h);
1331 chunk_initlen(&txn->auth.method_data, p+1, 0, ctx.vlen-(p-h)-1);
1332
1333 if (!strncasecmp("Basic", auth_method.str, auth_method.len)) {
1334
1335 len = base64dec(txn->auth.method_data.str, txn->auth.method_data.len,
Willy Tarreau7e2c6472012-10-29 20:44:36 +01001336 get_http_auth_buff, global.tune.bufsize - 1);
Krzysztof Piotr Oledzkif9423ae2010-01-29 19:26:18 +01001337
1338 if (len < 0)
1339 return 0;
1340
1341
1342 get_http_auth_buff[len] = '\0';
1343
1344 p = strchr(get_http_auth_buff, ':');
1345
1346 if (!p)
1347 return 0;
1348
1349 txn->auth.user = get_http_auth_buff;
1350 *p = '\0';
1351 txn->auth.pass = p+1;
1352
1353 txn->auth.method = HTTP_AUTH_BASIC;
1354 return 1;
1355 }
1356
1357 return 0;
1358}
1359
Willy Tarreau58f10d72006-12-04 02:26:12 +01001360
Willy Tarreau8973c702007-01-21 23:58:29 +01001361/*
1362 * This function parses an HTTP message, either a request or a response,
Willy Tarreau8b1323e2012-03-09 14:46:19 +01001363 * depending on the initial msg->msg_state. The caller is responsible for
1364 * ensuring that the message does not wrap. The function can be preempted
1365 * everywhere when data are missing and recalled at the exact same location
1366 * with no information loss. The message may even be realigned between two
1367 * calls. The header index is re-initialized when switching from
Willy Tarreau9cdde232007-05-02 20:58:19 +02001368 * MSG_R[PQ]BEFORE to MSG_RPVER|MSG_RQMETH. It modifies msg->sol among other
Willy Tarreau26927362012-05-18 23:22:52 +02001369 * fields. Note that msg->sol will be initialized after completing the first
1370 * state, so that none of the msg pointers has to be initialized prior to the
1371 * first call.
Willy Tarreau8973c702007-01-21 23:58:29 +01001372 */
Willy Tarreaua560c212012-03-09 13:50:57 +01001373void http_msg_analyzer(struct http_msg *msg, struct hdr_idx *idx)
Willy Tarreau8d5d7f22007-01-21 19:16:41 +01001374{
Willy Tarreau3770f232013-12-07 00:01:53 +01001375 enum ht_state state; /* updated only when leaving the FSM */
Willy Tarreau8d5d7f22007-01-21 19:16:41 +01001376 register char *ptr, *end; /* request pointers, to avoid dereferences */
Willy Tarreaucdbdd522012-10-12 22:51:15 +02001377 struct buffer *buf;
Willy Tarreau58f10d72006-12-04 02:26:12 +01001378
Willy Tarreaub326fcc2007-03-03 13:54:32 +01001379 state = msg->msg_state;
Willy Tarreau9b28e032012-10-12 23:49:43 +02001380 buf = msg->chn->buf;
Willy Tarreaucdbdd522012-10-12 22:51:15 +02001381 ptr = buf->p + msg->next;
1382 end = buf->p + buf->i;
Willy Tarreau58f10d72006-12-04 02:26:12 +01001383
Willy Tarreau8d5d7f22007-01-21 19:16:41 +01001384 if (unlikely(ptr >= end))
1385 goto http_msg_ood;
Willy Tarreau58f10d72006-12-04 02:26:12 +01001386
Willy Tarreau8d5d7f22007-01-21 19:16:41 +01001387 switch (state) {
Willy Tarreau8973c702007-01-21 23:58:29 +01001388 /*
1389 * First, states that are specific to the response only.
1390 * We check them first so that request and headers are
1391 * closer to each other (accessed more often).
1392 */
Willy Tarreau8973c702007-01-21 23:58:29 +01001393 case HTTP_MSG_RPBEFORE:
Willy Tarreaue3f284a2010-09-28 19:42:42 +02001394 http_msg_rpbefore:
Willy Tarreau8973c702007-01-21 23:58:29 +01001395 if (likely(HTTP_IS_TOKEN(*ptr))) {
Willy Tarreau15de77e2010-01-02 21:59:16 +01001396 /* we have a start of message, but we have to check
1397 * first if we need to remove some CRLF. We can only
Willy Tarreau2e046c62012-03-01 16:08:30 +01001398 * do this when o=0.
Willy Tarreau15de77e2010-01-02 21:59:16 +01001399 */
Willy Tarreaucdbdd522012-10-12 22:51:15 +02001400 if (unlikely(ptr != buf->p)) {
1401 if (buf->o)
Willy Tarreau15de77e2010-01-02 21:59:16 +01001402 goto http_msg_ood;
Willy Tarreau1d3bcce2009-12-27 15:50:06 +01001403 /* Remove empty leading lines, as recommended by RFC2616. */
Willy Tarreaucdbdd522012-10-12 22:51:15 +02001404 bi_fast_delete(buf, ptr - buf->p);
Willy Tarreau8973c702007-01-21 23:58:29 +01001405 }
Willy Tarreau26927362012-05-18 23:22:52 +02001406 msg->sol = 0;
Willy Tarreaue92693a2012-09-24 21:13:39 +02001407 msg->sl.st.l = 0; /* used in debug mode */
Willy Tarreau8973c702007-01-21 23:58:29 +01001408 hdr_idx_init(idx);
1409 state = HTTP_MSG_RPVER;
1410 goto http_msg_rpver;
1411 }
1412
1413 if (unlikely(!HTTP_IS_CRLF(*ptr)))
1414 goto http_msg_invalid;
1415
1416 if (unlikely(*ptr == '\n'))
1417 EAT_AND_JUMP_OR_RETURN(http_msg_rpbefore, HTTP_MSG_RPBEFORE);
1418 EAT_AND_JUMP_OR_RETURN(http_msg_rpbefore_cr, HTTP_MSG_RPBEFORE_CR);
1419 /* stop here */
1420
Willy Tarreau8973c702007-01-21 23:58:29 +01001421 case HTTP_MSG_RPBEFORE_CR:
Willy Tarreaue3f284a2010-09-28 19:42:42 +02001422 http_msg_rpbefore_cr:
Willy Tarreau8973c702007-01-21 23:58:29 +01001423 EXPECT_LF_HERE(ptr, http_msg_invalid);
1424 EAT_AND_JUMP_OR_RETURN(http_msg_rpbefore, HTTP_MSG_RPBEFORE);
1425 /* stop here */
1426
Willy Tarreau8973c702007-01-21 23:58:29 +01001427 case HTTP_MSG_RPVER:
Willy Tarreaue3f284a2010-09-28 19:42:42 +02001428 http_msg_rpver:
Willy Tarreau8973c702007-01-21 23:58:29 +01001429 case HTTP_MSG_RPVER_SP:
1430 case HTTP_MSG_RPCODE:
1431 case HTTP_MSG_RPCODE_SP:
1432 case HTTP_MSG_RPREASON:
Willy Tarreau69d8c5d2012-05-08 09:44:41 +02001433 ptr = (char *)http_parse_stsline(msg,
Willy Tarreaua458b672012-03-05 11:17:50 +01001434 state, ptr, end,
1435 &msg->next, &msg->msg_state);
Willy Tarreau8973c702007-01-21 23:58:29 +01001436 if (unlikely(!ptr))
1437 return;
1438
1439 /* we have a full response and we know that we have either a CR
1440 * or an LF at <ptr>.
1441 */
Willy Tarreau8973c702007-01-21 23:58:29 +01001442 hdr_idx_set_start(idx, msg->sl.st.l, *ptr == '\r');
1443
Willy Tarreaucdbdd522012-10-12 22:51:15 +02001444 msg->sol = ptr - buf->p;
Willy Tarreau8973c702007-01-21 23:58:29 +01001445 if (likely(*ptr == '\r'))
1446 EAT_AND_JUMP_OR_RETURN(http_msg_rpline_end, HTTP_MSG_RPLINE_END);
1447 goto http_msg_rpline_end;
1448
Willy Tarreau8973c702007-01-21 23:58:29 +01001449 case HTTP_MSG_RPLINE_END:
Willy Tarreaue3f284a2010-09-28 19:42:42 +02001450 http_msg_rpline_end:
Willy Tarreau8973c702007-01-21 23:58:29 +01001451 /* msg->sol must point to the first of CR or LF. */
1452 EXPECT_LF_HERE(ptr, http_msg_invalid);
1453 EAT_AND_JUMP_OR_RETURN(http_msg_hdr_first, HTTP_MSG_HDR_FIRST);
1454 /* stop here */
1455
1456 /*
1457 * Second, states that are specific to the request only
1458 */
Willy Tarreau8d5d7f22007-01-21 19:16:41 +01001459 case HTTP_MSG_RQBEFORE:
Willy Tarreaue3f284a2010-09-28 19:42:42 +02001460 http_msg_rqbefore:
Willy Tarreau8d5d7f22007-01-21 19:16:41 +01001461 if (likely(HTTP_IS_TOKEN(*ptr))) {
Willy Tarreau15de77e2010-01-02 21:59:16 +01001462 /* we have a start of message, but we have to check
1463 * first if we need to remove some CRLF. We can only
Willy Tarreau2e046c62012-03-01 16:08:30 +01001464 * do this when o=0.
Willy Tarreau15de77e2010-01-02 21:59:16 +01001465 */
Willy Tarreaucdbdd522012-10-12 22:51:15 +02001466 if (likely(ptr != buf->p)) {
1467 if (buf->o)
Willy Tarreau15de77e2010-01-02 21:59:16 +01001468 goto http_msg_ood;
Willy Tarreau1d3bcce2009-12-27 15:50:06 +01001469 /* Remove empty leading lines, as recommended by RFC2616. */
Willy Tarreaucdbdd522012-10-12 22:51:15 +02001470 bi_fast_delete(buf, ptr - buf->p);
Willy Tarreau8d5d7f22007-01-21 19:16:41 +01001471 }
Willy Tarreau26927362012-05-18 23:22:52 +02001472 msg->sol = 0;
Willy Tarreaue92693a2012-09-24 21:13:39 +02001473 msg->sl.rq.l = 0; /* used in debug mode */
Willy Tarreau8973c702007-01-21 23:58:29 +01001474 state = HTTP_MSG_RQMETH;
1475 goto http_msg_rqmeth;
Willy Tarreau8d5d7f22007-01-21 19:16:41 +01001476 }
Willy Tarreau976f1ee2006-12-17 10:06:03 +01001477
Willy Tarreau8d5d7f22007-01-21 19:16:41 +01001478 if (unlikely(!HTTP_IS_CRLF(*ptr)))
1479 goto http_msg_invalid;
Willy Tarreau976f1ee2006-12-17 10:06:03 +01001480
Willy Tarreau8d5d7f22007-01-21 19:16:41 +01001481 if (unlikely(*ptr == '\n'))
1482 EAT_AND_JUMP_OR_RETURN(http_msg_rqbefore, HTTP_MSG_RQBEFORE);
1483 EAT_AND_JUMP_OR_RETURN(http_msg_rqbefore_cr, HTTP_MSG_RQBEFORE_CR);
Willy Tarreau8973c702007-01-21 23:58:29 +01001484 /* stop here */
Willy Tarreau976f1ee2006-12-17 10:06:03 +01001485
Willy Tarreau8d5d7f22007-01-21 19:16:41 +01001486 case HTTP_MSG_RQBEFORE_CR:
Willy Tarreaue3f284a2010-09-28 19:42:42 +02001487 http_msg_rqbefore_cr:
Willy Tarreau8d5d7f22007-01-21 19:16:41 +01001488 EXPECT_LF_HERE(ptr, http_msg_invalid);
1489 EAT_AND_JUMP_OR_RETURN(http_msg_rqbefore, HTTP_MSG_RQBEFORE);
Willy Tarreau8973c702007-01-21 23:58:29 +01001490 /* stop here */
Willy Tarreau976f1ee2006-12-17 10:06:03 +01001491
Willy Tarreau8d5d7f22007-01-21 19:16:41 +01001492 case HTTP_MSG_RQMETH:
Willy Tarreaue3f284a2010-09-28 19:42:42 +02001493 http_msg_rqmeth:
Willy Tarreau8d5d7f22007-01-21 19:16:41 +01001494 case HTTP_MSG_RQMETH_SP:
1495 case HTTP_MSG_RQURI:
1496 case HTTP_MSG_RQURI_SP:
1497 case HTTP_MSG_RQVER:
Willy Tarreau69d8c5d2012-05-08 09:44:41 +02001498 ptr = (char *)http_parse_reqline(msg,
Willy Tarreaua458b672012-03-05 11:17:50 +01001499 state, ptr, end,
1500 &msg->next, &msg->msg_state);
Willy Tarreau8d5d7f22007-01-21 19:16:41 +01001501 if (unlikely(!ptr))
1502 return;
Willy Tarreau976f1ee2006-12-17 10:06:03 +01001503
Willy Tarreau8d5d7f22007-01-21 19:16:41 +01001504 /* we have a full request and we know that we have either a CR
1505 * or an LF at <ptr>.
1506 */
Willy Tarreau8d5d7f22007-01-21 19:16:41 +01001507 hdr_idx_set_start(idx, msg->sl.rq.l, *ptr == '\r');
Willy Tarreau976f1ee2006-12-17 10:06:03 +01001508
Willy Tarreaucdbdd522012-10-12 22:51:15 +02001509 msg->sol = ptr - buf->p;
Willy Tarreau8d5d7f22007-01-21 19:16:41 +01001510 if (likely(*ptr == '\r'))
1511 EAT_AND_JUMP_OR_RETURN(http_msg_rqline_end, HTTP_MSG_RQLINE_END);
Willy Tarreau8d5d7f22007-01-21 19:16:41 +01001512 goto http_msg_rqline_end;
Willy Tarreau976f1ee2006-12-17 10:06:03 +01001513
Willy Tarreau8d5d7f22007-01-21 19:16:41 +01001514 case HTTP_MSG_RQLINE_END:
Willy Tarreaue3f284a2010-09-28 19:42:42 +02001515 http_msg_rqline_end:
Willy Tarreau8d5d7f22007-01-21 19:16:41 +01001516 /* check for HTTP/0.9 request : no version information available.
1517 * msg->sol must point to the first of CR or LF.
1518 */
1519 if (unlikely(msg->sl.rq.v_l == 0))
1520 goto http_msg_last_lf;
Willy Tarreau976f1ee2006-12-17 10:06:03 +01001521
Willy Tarreau8d5d7f22007-01-21 19:16:41 +01001522 EXPECT_LF_HERE(ptr, http_msg_invalid);
1523 EAT_AND_JUMP_OR_RETURN(http_msg_hdr_first, HTTP_MSG_HDR_FIRST);
Willy Tarreau8973c702007-01-21 23:58:29 +01001524 /* stop here */
Willy Tarreau976f1ee2006-12-17 10:06:03 +01001525
Willy Tarreau8973c702007-01-21 23:58:29 +01001526 /*
1527 * Common states below
1528 */
Willy Tarreau8d5d7f22007-01-21 19:16:41 +01001529 case HTTP_MSG_HDR_FIRST:
Willy Tarreaue3f284a2010-09-28 19:42:42 +02001530 http_msg_hdr_first:
Willy Tarreaucdbdd522012-10-12 22:51:15 +02001531 msg->sol = ptr - buf->p;
Willy Tarreau8d5d7f22007-01-21 19:16:41 +01001532 if (likely(!HTTP_IS_CRLF(*ptr))) {
1533 goto http_msg_hdr_name;
1534 }
1535
1536 if (likely(*ptr == '\r'))
1537 EAT_AND_JUMP_OR_RETURN(http_msg_last_lf, HTTP_MSG_LAST_LF);
1538 goto http_msg_last_lf;
Willy Tarreau976f1ee2006-12-17 10:06:03 +01001539
Willy Tarreau8d5d7f22007-01-21 19:16:41 +01001540 case HTTP_MSG_HDR_NAME:
Willy Tarreaue3f284a2010-09-28 19:42:42 +02001541 http_msg_hdr_name:
Willy Tarreau8d5d7f22007-01-21 19:16:41 +01001542 /* assumes msg->sol points to the first char */
1543 if (likely(HTTP_IS_TOKEN(*ptr)))
1544 EAT_AND_JUMP_OR_RETURN(http_msg_hdr_name, HTTP_MSG_HDR_NAME);
Willy Tarreau58f10d72006-12-04 02:26:12 +01001545
Willy Tarreaufa4a03c2012-03-09 21:28:54 +01001546 if (likely(*ptr == ':'))
Willy Tarreau8d5d7f22007-01-21 19:16:41 +01001547 EAT_AND_JUMP_OR_RETURN(http_msg_hdr_l1_sp, HTTP_MSG_HDR_L1_SP);
Willy Tarreau58f10d72006-12-04 02:26:12 +01001548
Willy Tarreau32a4ec02009-04-02 11:35:18 +02001549 if (likely(msg->err_pos < -1) || *ptr == '\n')
1550 goto http_msg_invalid;
1551
1552 if (msg->err_pos == -1) /* capture error pointer */
Willy Tarreaucdbdd522012-10-12 22:51:15 +02001553 msg->err_pos = ptr - buf->p; /* >= 0 now */
Willy Tarreau32a4ec02009-04-02 11:35:18 +02001554
1555 /* and we still accept this non-token character */
1556 EAT_AND_JUMP_OR_RETURN(http_msg_hdr_name, HTTP_MSG_HDR_NAME);
Willy Tarreau230fd0b2006-12-17 12:05:00 +01001557
Willy Tarreau8d5d7f22007-01-21 19:16:41 +01001558 case HTTP_MSG_HDR_L1_SP:
Willy Tarreaue3f284a2010-09-28 19:42:42 +02001559 http_msg_hdr_l1_sp:
Willy Tarreaufa4a03c2012-03-09 21:28:54 +01001560 /* assumes msg->sol points to the first char */
Willy Tarreau8d5d7f22007-01-21 19:16:41 +01001561 if (likely(HTTP_IS_SPHT(*ptr)))
1562 EAT_AND_JUMP_OR_RETURN(http_msg_hdr_l1_sp, HTTP_MSG_HDR_L1_SP);
Willy Tarreau230fd0b2006-12-17 12:05:00 +01001563
Willy Tarreau8d5d7f22007-01-21 19:16:41 +01001564 /* header value can be basically anything except CR/LF */
Willy Tarreaucdbdd522012-10-12 22:51:15 +02001565 msg->sov = ptr - buf->p;
Willy Tarreau976f1ee2006-12-17 10:06:03 +01001566
Willy Tarreau8d5d7f22007-01-21 19:16:41 +01001567 if (likely(!HTTP_IS_CRLF(*ptr))) {
1568 goto http_msg_hdr_val;
1569 }
1570
1571 if (likely(*ptr == '\r'))
1572 EAT_AND_JUMP_OR_RETURN(http_msg_hdr_l1_lf, HTTP_MSG_HDR_L1_LF);
1573 goto http_msg_hdr_l1_lf;
Willy Tarreau976f1ee2006-12-17 10:06:03 +01001574
Willy Tarreau8d5d7f22007-01-21 19:16:41 +01001575 case HTTP_MSG_HDR_L1_LF:
Willy Tarreaue3f284a2010-09-28 19:42:42 +02001576 http_msg_hdr_l1_lf:
Willy Tarreau8d5d7f22007-01-21 19:16:41 +01001577 EXPECT_LF_HERE(ptr, http_msg_invalid);
1578 EAT_AND_JUMP_OR_RETURN(http_msg_hdr_l1_lws, HTTP_MSG_HDR_L1_LWS);
Willy Tarreau976f1ee2006-12-17 10:06:03 +01001579
Willy Tarreau8d5d7f22007-01-21 19:16:41 +01001580 case HTTP_MSG_HDR_L1_LWS:
Willy Tarreaue3f284a2010-09-28 19:42:42 +02001581 http_msg_hdr_l1_lws:
Willy Tarreau8d5d7f22007-01-21 19:16:41 +01001582 if (likely(HTTP_IS_SPHT(*ptr))) {
1583 /* replace HT,CR,LF with spaces */
Willy Tarreaucdbdd522012-10-12 22:51:15 +02001584 for (; buf->p + msg->sov < ptr; msg->sov++)
1585 buf->p[msg->sov] = ' ';
Willy Tarreau8d5d7f22007-01-21 19:16:41 +01001586 goto http_msg_hdr_l1_sp;
1587 }
Willy Tarreauaa9dce32007-03-18 23:50:16 +01001588 /* we had a header consisting only in spaces ! */
Willy Tarreau12e48b32012-03-05 16:57:34 +01001589 msg->eol = msg->sov;
Willy Tarreau8d5d7f22007-01-21 19:16:41 +01001590 goto http_msg_complete_header;
1591
Willy Tarreau8d5d7f22007-01-21 19:16:41 +01001592 case HTTP_MSG_HDR_VAL:
Willy Tarreaue3f284a2010-09-28 19:42:42 +02001593 http_msg_hdr_val:
Willy Tarreaufa4a03c2012-03-09 21:28:54 +01001594 /* assumes msg->sol points to the first char, and msg->sov
1595 * points to the first character of the value.
Willy Tarreau8d5d7f22007-01-21 19:16:41 +01001596 */
1597 if (likely(!HTTP_IS_CRLF(*ptr)))
1598 EAT_AND_JUMP_OR_RETURN(http_msg_hdr_val, HTTP_MSG_HDR_VAL);
Willy Tarreau976f1ee2006-12-17 10:06:03 +01001599
Willy Tarreaucdbdd522012-10-12 22:51:15 +02001600 msg->eol = ptr - buf->p;
Willy Tarreau8d5d7f22007-01-21 19:16:41 +01001601 /* Note: we could also copy eol into ->eoh so that we have the
1602 * real header end in case it ends with lots of LWS, but is this
1603 * really needed ?
1604 */
1605 if (likely(*ptr == '\r'))
1606 EAT_AND_JUMP_OR_RETURN(http_msg_hdr_l2_lf, HTTP_MSG_HDR_L2_LF);
1607 goto http_msg_hdr_l2_lf;
Willy Tarreau976f1ee2006-12-17 10:06:03 +01001608
Willy Tarreau8d5d7f22007-01-21 19:16:41 +01001609 case HTTP_MSG_HDR_L2_LF:
Willy Tarreaue3f284a2010-09-28 19:42:42 +02001610 http_msg_hdr_l2_lf:
Willy Tarreau8d5d7f22007-01-21 19:16:41 +01001611 EXPECT_LF_HERE(ptr, http_msg_invalid);
1612 EAT_AND_JUMP_OR_RETURN(http_msg_hdr_l2_lws, HTTP_MSG_HDR_L2_LWS);
Willy Tarreau976f1ee2006-12-17 10:06:03 +01001613
Willy Tarreau8d5d7f22007-01-21 19:16:41 +01001614 case HTTP_MSG_HDR_L2_LWS:
Willy Tarreaue3f284a2010-09-28 19:42:42 +02001615 http_msg_hdr_l2_lws:
Willy Tarreau8d5d7f22007-01-21 19:16:41 +01001616 if (unlikely(HTTP_IS_SPHT(*ptr))) {
1617 /* LWS: replace HT,CR,LF with spaces */
Willy Tarreaucdbdd522012-10-12 22:51:15 +02001618 for (; buf->p + msg->eol < ptr; msg->eol++)
1619 buf->p[msg->eol] = ' ';
Willy Tarreau8d5d7f22007-01-21 19:16:41 +01001620 goto http_msg_hdr_val;
1621 }
1622 http_msg_complete_header:
1623 /*
1624 * It was a new header, so the last one is finished.
Willy Tarreaufa4a03c2012-03-09 21:28:54 +01001625 * Assumes msg->sol points to the first char, msg->sov points
1626 * to the first character of the value and msg->eol to the
1627 * first CR or LF so we know how the line ends. We insert last
1628 * header into the index.
Willy Tarreau8d5d7f22007-01-21 19:16:41 +01001629 */
Willy Tarreaucdbdd522012-10-12 22:51:15 +02001630 if (unlikely(hdr_idx_add(msg->eol - msg->sol, buf->p[msg->eol] == '\r',
Willy Tarreau8d5d7f22007-01-21 19:16:41 +01001631 idx, idx->tail) < 0))
1632 goto http_msg_invalid;
Willy Tarreau230fd0b2006-12-17 12:05:00 +01001633
Willy Tarreaucdbdd522012-10-12 22:51:15 +02001634 msg->sol = ptr - buf->p;
Willy Tarreau8d5d7f22007-01-21 19:16:41 +01001635 if (likely(!HTTP_IS_CRLF(*ptr))) {
1636 goto http_msg_hdr_name;
1637 }
1638
1639 if (likely(*ptr == '\r'))
1640 EAT_AND_JUMP_OR_RETURN(http_msg_last_lf, HTTP_MSG_LAST_LF);
1641 goto http_msg_last_lf;
Willy Tarreau230fd0b2006-12-17 12:05:00 +01001642
Willy Tarreau8d5d7f22007-01-21 19:16:41 +01001643 case HTTP_MSG_LAST_LF:
Willy Tarreaue3f284a2010-09-28 19:42:42 +02001644 http_msg_last_lf:
Willy Tarreau8d5d7f22007-01-21 19:16:41 +01001645 /* Assumes msg->sol points to the first of either CR or LF */
1646 EXPECT_LF_HERE(ptr, http_msg_invalid);
1647 ptr++;
Willy Tarreaucdbdd522012-10-12 22:51:15 +02001648 msg->sov = msg->next = ptr - buf->p;
Willy Tarreau3a215be2012-03-09 21:39:51 +01001649 msg->eoh = msg->sol;
1650 msg->sol = 0;
Willy Tarreaub326fcc2007-03-03 13:54:32 +01001651 msg->msg_state = HTTP_MSG_BODY;
Willy Tarreau8d5d7f22007-01-21 19:16:41 +01001652 return;
Willy Tarreaub56928a2012-04-16 14:51:55 +02001653
1654 case HTTP_MSG_ERROR:
1655 /* this may only happen if we call http_msg_analyser() twice with an error */
1656 break;
1657
Willy Tarreau8d5d7f22007-01-21 19:16:41 +01001658 default:
Willy Tarreau3770f232013-12-07 00:01:53 +01001659#ifdef DEBUG_FULL
Willy Tarreau8d5d7f22007-01-21 19:16:41 +01001660 fprintf(stderr, "FIXME !!!! impossible state at %s:%d = %d\n", __FILE__, __LINE__, state);
1661 exit(1);
Willy Tarreau230fd0b2006-12-17 12:05:00 +01001662#endif
Willy Tarreau3770f232013-12-07 00:01:53 +01001663 ;
Willy Tarreau8d5d7f22007-01-21 19:16:41 +01001664 }
1665 http_msg_ood:
1666 /* out of data */
Willy Tarreaub326fcc2007-03-03 13:54:32 +01001667 msg->msg_state = state;
Willy Tarreaucdbdd522012-10-12 22:51:15 +02001668 msg->next = ptr - buf->p;
Willy Tarreau8d5d7f22007-01-21 19:16:41 +01001669 return;
Willy Tarreau58f10d72006-12-04 02:26:12 +01001670
Willy Tarreau8d5d7f22007-01-21 19:16:41 +01001671 http_msg_invalid:
1672 /* invalid message */
Willy Tarreaub326fcc2007-03-03 13:54:32 +01001673 msg->msg_state = HTTP_MSG_ERROR;
Willy Tarreaucdbdd522012-10-12 22:51:15 +02001674 msg->next = ptr - buf->p;
Willy Tarreau8d5d7f22007-01-21 19:16:41 +01001675 return;
1676}
Alexandre Cassen5eb1a902007-11-29 15:43:32 +01001677
Willy Tarreau2492d5b2009-07-11 00:06:00 +02001678/* convert an HTTP/0.9 request into an HTTP/1.0 request. Returns 1 if the
1679 * conversion succeeded, 0 in case of error. If the request was already 1.X,
1680 * nothing is done and 1 is returned.
1681 */
Willy Tarreau418bfcc2012-03-09 13:56:20 +01001682static int http_upgrade_v09_to_v10(struct http_txn *txn)
Willy Tarreau2492d5b2009-07-11 00:06:00 +02001683{
1684 int delta;
1685 char *cur_end;
Willy Tarreau418bfcc2012-03-09 13:56:20 +01001686 struct http_msg *msg = &txn->req;
Willy Tarreau2492d5b2009-07-11 00:06:00 +02001687
1688 if (msg->sl.rq.v_l != 0)
1689 return 1;
1690
Willy Tarreau9b28e032012-10-12 23:49:43 +02001691 cur_end = msg->chn->buf->p + msg->sl.rq.l;
Willy Tarreau2492d5b2009-07-11 00:06:00 +02001692 delta = 0;
1693
1694 if (msg->sl.rq.u_l == 0) {
1695 /* if no URI was set, add "/" */
Willy Tarreau9b28e032012-10-12 23:49:43 +02001696 delta = buffer_replace2(msg->chn->buf, cur_end, cur_end, " /", 2);
Willy Tarreau2492d5b2009-07-11 00:06:00 +02001697 cur_end += delta;
Willy Tarreaufa355d42009-11-29 18:12:29 +01001698 http_msg_move_end(msg, delta);
Willy Tarreau2492d5b2009-07-11 00:06:00 +02001699 }
1700 /* add HTTP version */
Willy Tarreau9b28e032012-10-12 23:49:43 +02001701 delta = buffer_replace2(msg->chn->buf, cur_end, cur_end, " HTTP/1.0\r\n", 11);
Willy Tarreaufa355d42009-11-29 18:12:29 +01001702 http_msg_move_end(msg, delta);
Willy Tarreau2492d5b2009-07-11 00:06:00 +02001703 cur_end += delta;
Willy Tarreau69d8c5d2012-05-08 09:44:41 +02001704 cur_end = (char *)http_parse_reqline(msg,
Willy Tarreau2492d5b2009-07-11 00:06:00 +02001705 HTTP_MSG_RQMETH,
Willy Tarreau9b28e032012-10-12 23:49:43 +02001706 msg->chn->buf->p, cur_end + 1,
Willy Tarreau2492d5b2009-07-11 00:06:00 +02001707 NULL, NULL);
1708 if (unlikely(!cur_end))
1709 return 0;
1710
1711 /* we have a full HTTP/1.0 request now and we know that
1712 * we have either a CR or an LF at <ptr>.
1713 */
1714 hdr_idx_set_start(&txn->hdr_idx, msg->sl.rq.l, *cur_end == '\r');
1715 return 1;
1716}
1717
Willy Tarreaubbf0b372010-01-18 16:54:40 +01001718/* Parse the Connection: header of an HTTP request, looking for both "close"
Willy Tarreau6acf7c92012-03-09 13:30:45 +01001719 * and "keep-alive" values. If we already know that some headers may safely
1720 * be removed, we remove them now. The <to_del> flags are used for that :
Willy Tarreaubbf0b372010-01-18 16:54:40 +01001721 * - bit 0 means remove "close" headers (in HTTP/1.0 requests/responses)
1722 * - bit 1 means remove "keep-alive" headers (in HTTP/1.1 reqs/resp to 1.1).
Willy Tarreau50fc7772012-11-11 22:19:57 +01001723 * Presence of the "Upgrade" token is also checked and reported.
Willy Tarreaubbf0b372010-01-18 16:54:40 +01001724 * The TX_HDR_CONN_* flags are adjusted in txn->flags depending on what was
1725 * found, and TX_CON_*_SET is adjusted depending on what is left so only
1726 * harmless combinations may be removed. Do not call that after changes have
Willy Tarreau6acf7c92012-03-09 13:30:45 +01001727 * been processed.
Willy Tarreau5b154472009-12-21 20:11:07 +01001728 */
Willy Tarreau6acf7c92012-03-09 13:30:45 +01001729void http_parse_connection_header(struct http_txn *txn, struct http_msg *msg, int to_del)
Willy Tarreau5b154472009-12-21 20:11:07 +01001730{
Willy Tarreau5b154472009-12-21 20:11:07 +01001731 struct hdr_ctx ctx;
Willy Tarreau88d349d2010-01-25 12:15:43 +01001732 const char *hdr_val = "Connection";
1733 int hdr_len = 10;
Willy Tarreau5b154472009-12-21 20:11:07 +01001734
Willy Tarreaubbf0b372010-01-18 16:54:40 +01001735 if (txn->flags & TX_HDR_CONN_PRS)
Willy Tarreau5b154472009-12-21 20:11:07 +01001736 return;
1737
Willy Tarreau88d349d2010-01-25 12:15:43 +01001738 if (unlikely(txn->flags & TX_USE_PX_CONN)) {
1739 hdr_val = "Proxy-Connection";
1740 hdr_len = 16;
1741 }
1742
Willy Tarreau5b154472009-12-21 20:11:07 +01001743 ctx.idx = 0;
Willy Tarreaubbf0b372010-01-18 16:54:40 +01001744 txn->flags &= ~(TX_CON_KAL_SET|TX_CON_CLO_SET);
Willy Tarreau9b28e032012-10-12 23:49:43 +02001745 while (http_find_header2(hdr_val, hdr_len, msg->chn->buf->p, &txn->hdr_idx, &ctx)) {
Willy Tarreaubbf0b372010-01-18 16:54:40 +01001746 if (ctx.vlen >= 10 && word_match(ctx.line + ctx.val, ctx.vlen, "keep-alive", 10)) {
1747 txn->flags |= TX_HDR_CONN_KAL;
Willy Tarreau6acf7c92012-03-09 13:30:45 +01001748 if (to_del & 2)
1749 http_remove_header2(msg, &txn->hdr_idx, &ctx);
Willy Tarreaubbf0b372010-01-18 16:54:40 +01001750 else
1751 txn->flags |= TX_CON_KAL_SET;
1752 }
1753 else if (ctx.vlen >= 5 && word_match(ctx.line + ctx.val, ctx.vlen, "close", 5)) {
1754 txn->flags |= TX_HDR_CONN_CLO;
Willy Tarreau6acf7c92012-03-09 13:30:45 +01001755 if (to_del & 1)
1756 http_remove_header2(msg, &txn->hdr_idx, &ctx);
Willy Tarreaubbf0b372010-01-18 16:54:40 +01001757 else
1758 txn->flags |= TX_CON_CLO_SET;
1759 }
Willy Tarreau50fc7772012-11-11 22:19:57 +01001760 else if (ctx.vlen >= 7 && word_match(ctx.line + ctx.val, ctx.vlen, "upgrade", 7)) {
1761 txn->flags |= TX_HDR_CONN_UPG;
1762 }
Willy Tarreau5b154472009-12-21 20:11:07 +01001763 }
1764
Willy Tarreaubbf0b372010-01-18 16:54:40 +01001765 txn->flags |= TX_HDR_CONN_PRS;
1766 return;
1767}
Willy Tarreau5b154472009-12-21 20:11:07 +01001768
Willy Tarreaubbf0b372010-01-18 16:54:40 +01001769/* Apply desired changes on the Connection: header. Values may be removed and/or
1770 * added depending on the <wanted> flags, which are exclusively composed of
1771 * TX_CON_CLO_SET and TX_CON_KAL_SET, depending on what flags are desired. The
1772 * TX_CON_*_SET flags are adjusted in txn->flags depending on what is left.
1773 */
Willy Tarreau6acf7c92012-03-09 13:30:45 +01001774void http_change_connection_header(struct http_txn *txn, struct http_msg *msg, int wanted)
Willy Tarreaubbf0b372010-01-18 16:54:40 +01001775{
1776 struct hdr_ctx ctx;
Willy Tarreau88d349d2010-01-25 12:15:43 +01001777 const char *hdr_val = "Connection";
1778 int hdr_len = 10;
Willy Tarreaubbf0b372010-01-18 16:54:40 +01001779
1780 ctx.idx = 0;
1781
Willy Tarreau88d349d2010-01-25 12:15:43 +01001782
1783 if (unlikely(txn->flags & TX_USE_PX_CONN)) {
1784 hdr_val = "Proxy-Connection";
1785 hdr_len = 16;
1786 }
1787
Willy Tarreaubbf0b372010-01-18 16:54:40 +01001788 txn->flags &= ~(TX_CON_CLO_SET | TX_CON_KAL_SET);
Willy Tarreau9b28e032012-10-12 23:49:43 +02001789 while (http_find_header2(hdr_val, hdr_len, msg->chn->buf->p, &txn->hdr_idx, &ctx)) {
Willy Tarreaubbf0b372010-01-18 16:54:40 +01001790 if (ctx.vlen >= 10 && word_match(ctx.line + ctx.val, ctx.vlen, "keep-alive", 10)) {
1791 if (wanted & TX_CON_KAL_SET)
1792 txn->flags |= TX_CON_KAL_SET;
1793 else
Willy Tarreau6acf7c92012-03-09 13:30:45 +01001794 http_remove_header2(msg, &txn->hdr_idx, &ctx);
Willy Tarreau5b154472009-12-21 20:11:07 +01001795 }
Willy Tarreaubbf0b372010-01-18 16:54:40 +01001796 else if (ctx.vlen >= 5 && word_match(ctx.line + ctx.val, ctx.vlen, "close", 5)) {
1797 if (wanted & TX_CON_CLO_SET)
1798 txn->flags |= TX_CON_CLO_SET;
1799 else
Willy Tarreau6acf7c92012-03-09 13:30:45 +01001800 http_remove_header2(msg, &txn->hdr_idx, &ctx);
Willy Tarreau0dfdf192010-01-05 11:33:11 +01001801 }
Willy Tarreau5b154472009-12-21 20:11:07 +01001802 }
Willy Tarreaubbf0b372010-01-18 16:54:40 +01001803
1804 if (wanted == (txn->flags & (TX_CON_CLO_SET|TX_CON_KAL_SET)))
1805 return;
1806
1807 if ((wanted & TX_CON_CLO_SET) && !(txn->flags & TX_CON_CLO_SET)) {
1808 txn->flags |= TX_CON_CLO_SET;
Willy Tarreau88d349d2010-01-25 12:15:43 +01001809 hdr_val = "Connection: close";
1810 hdr_len = 17;
1811 if (unlikely(txn->flags & TX_USE_PX_CONN)) {
1812 hdr_val = "Proxy-Connection: close";
1813 hdr_len = 23;
1814 }
Willy Tarreau6acf7