blob: 878951f0977db5164970f9bb8a52a86b9d39da38 [file] [log] [blame]
Willy Tarreaubaaee002006-06-26 02:48:02 +02001/*
2 * HTTP protocol analyzer
3 *
Willy Tarreauf68a15a2011-01-06 16:53:21 +01004 * Copyright 2000-2011 Willy Tarreau <w@1wt.eu>
Willy Tarreaubaaee002006-06-26 02:48:02 +02005 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 *
11 */
12
13#include <ctype.h>
14#include <errno.h>
15#include <fcntl.h>
16#include <stdio.h>
17#include <stdlib.h>
18#include <string.h>
19#include <syslog.h>
Willy Tarreau42250582007-04-01 01:30:43 +020020#include <time.h>
Willy Tarreaubaaee002006-06-26 02:48:02 +020021
22#include <sys/socket.h>
23#include <sys/stat.h>
24#include <sys/types.h>
25
Willy Tarreaub05405a2012-01-23 15:35:52 +010026#include <netinet/tcp.h>
27
Willy Tarreau2dd0d472006-06-29 17:53:05 +020028#include <common/appsession.h>
Krzysztof Piotr Oledzkif9423ae2010-01-29 19:26:18 +010029#include <common/base64.h>
Willy Tarreauc7e42382012-08-24 19:22:53 +020030#include <common/chunk.h>
Willy Tarreau2dd0d472006-06-29 17:53:05 +020031#include <common/compat.h>
32#include <common/config.h>
Willy Tarreaua4cd1f52006-12-16 19:57:26 +010033#include <common/debug.h>
Willy Tarreau2dd0d472006-06-29 17:53:05 +020034#include <common/memory.h>
35#include <common/mini-clist.h>
36#include <common/standard.h>
Willy Tarreau0c303ee2008-07-07 00:09:58 +020037#include <common/ticks.h>
Willy Tarreau2dd0d472006-06-29 17:53:05 +020038#include <common/time.h>
39#include <common/uri_auth.h>
40#include <common/version.h>
Willy Tarreaubaaee002006-06-26 02:48:02 +020041
42#include <types/capture.h>
Willy Tarreaubaaee002006-06-26 02:48:02 +020043#include <types/global.h>
Willy Tarreaubaaee002006-06-26 02:48:02 +020044
Willy Tarreau8797c062007-05-07 00:55:35 +020045#include <proto/acl.h>
Willy Tarreau61612d42012-04-19 18:42:05 +020046#include <proto/arg.h>
Krzysztof Piotr Oledzkif9423ae2010-01-29 19:26:18 +010047#include <proto/auth.h>
Willy Tarreaubaaee002006-06-26 02:48:02 +020048#include <proto/backend.h>
Willy Tarreauc7e42382012-08-24 19:22:53 +020049#include <proto/channel.h>
Krzysztof Piotr Oledzki97f07b82009-12-15 22:31:24 +010050#include <proto/checks.h>
William Lallemand82fe75c2012-10-23 10:25:10 +020051#include <proto/compression.h>
Willy Tarreau91861262007-10-17 17:06:05 +020052#include <proto/dumpstats.h>
Willy Tarreaubaaee002006-06-26 02:48:02 +020053#include <proto/fd.h>
Willy Tarreau03fa5df2010-05-24 21:02:37 +020054#include <proto/frontend.h>
Willy Tarreaubaaee002006-06-26 02:48:02 +020055#include <proto/log.h>
Willy Tarreau58f10d72006-12-04 02:26:12 +010056#include <proto/hdr_idx.h>
Thierry FOURNIERed66c292013-11-28 11:05:19 +010057#include <proto/pattern.h>
Willy Tarreaub6866442008-07-14 23:54:42 +020058#include <proto/proto_tcp.h>
Willy Tarreaubaaee002006-06-26 02:48:02 +020059#include <proto/proto_http.h>
Willy Tarreau7f062c42009-03-05 18:43:00 +010060#include <proto/proxy.h>
Willy Tarreaubaaee002006-06-26 02:48:02 +020061#include <proto/queue.h>
Willy Tarreaucd3b0942012-04-27 21:52:18 +020062#include <proto/sample.h>
Willy Tarreau7f062c42009-03-05 18:43:00 +010063#include <proto/server.h>
Willy Tarreaubaaee002006-06-26 02:48:02 +020064#include <proto/session.h>
Willy Tarreaucff64112008-11-03 06:26:53 +010065#include <proto/stream_interface.h>
Willy Tarreaubaaee002006-06-26 02:48:02 +020066#include <proto/task.h>
Baptiste Assmannfabcbe02014-04-24 22:16:59 +020067#include <proto/pattern.h>
Willy Tarreaubaaee002006-06-26 02:48:02 +020068
Willy Tarreau522d6c02009-12-06 18:49:18 +010069const char HTTP_100[] =
70 "HTTP/1.1 100 Continue\r\n\r\n";
71
72const struct chunk http_100_chunk = {
73 .str = (char *)&HTTP_100,
74 .len = sizeof(HTTP_100)-1
75};
76
Willy Tarreaua9679ac2010-01-03 17:32:57 +010077/* Warning: no "connection" header is provided with the 3xx messages below */
Willy Tarreaub463dfb2008-06-07 23:08:56 +020078const char *HTTP_301 =
Willy Tarreaubc5aa192010-01-03 15:09:36 +010079 "HTTP/1.1 301 Moved Permanently\r\n"
Willy Tarreaubc5aa192010-01-03 15:09:36 +010080 "Content-length: 0\r\n"
Willy Tarreaub463dfb2008-06-07 23:08:56 +020081 "Location: "; /* not terminated since it will be concatenated with the URL */
82
Willy Tarreau0f772532006-12-23 20:51:41 +010083const char *HTTP_302 =
Willy Tarreaubc5aa192010-01-03 15:09:36 +010084 "HTTP/1.1 302 Found\r\n"
Willy Tarreau0f772532006-12-23 20:51:41 +010085 "Cache-Control: no-cache\r\n"
Willy Tarreaubc5aa192010-01-03 15:09:36 +010086 "Content-length: 0\r\n"
Willy Tarreau0f772532006-12-23 20:51:41 +010087 "Location: "; /* not terminated since it will be concatenated with the URL */
88
89/* same as 302 except that the browser MUST retry with the GET method */
90const char *HTTP_303 =
Willy Tarreaubc5aa192010-01-03 15:09:36 +010091 "HTTP/1.1 303 See Other\r\n"
Willy Tarreau0f772532006-12-23 20:51:41 +010092 "Cache-Control: no-cache\r\n"
Willy Tarreaubc5aa192010-01-03 15:09:36 +010093 "Content-length: 0\r\n"
Willy Tarreau0f772532006-12-23 20:51:41 +010094 "Location: "; /* not terminated since it will be concatenated with the URL */
95
Yves Lafon3e8d1ae2013-03-11 11:06:05 -040096
97/* same as 302 except that the browser MUST retry with the same method */
98const char *HTTP_307 =
99 "HTTP/1.1 307 Temporary Redirect\r\n"
100 "Cache-Control: no-cache\r\n"
101 "Content-length: 0\r\n"
102 "Location: "; /* not terminated since it will be concatenated with the URL */
103
104/* same as 301 except that the browser MUST retry with the same method */
105const char *HTTP_308 =
106 "HTTP/1.1 308 Permanent Redirect\r\n"
107 "Content-length: 0\r\n"
108 "Location: "; /* not terminated since it will be concatenated with the URL */
109
Willy Tarreaubaaee002006-06-26 02:48:02 +0200110/* Warning: this one is an sprintf() fmt string, with <realm> as its only argument */
111const char *HTTP_401_fmt =
112 "HTTP/1.0 401 Unauthorized\r\n"
113 "Cache-Control: no-cache\r\n"
114 "Connection: close\r\n"
Willy Tarreau791d66d2006-07-08 16:53:38 +0200115 "Content-Type: text/html\r\n"
Willy Tarreaubaaee002006-06-26 02:48:02 +0200116 "WWW-Authenticate: Basic realm=\"%s\"\r\n"
117 "\r\n"
118 "<html><body><h1>401 Unauthorized</h1>\nYou need a valid user and password to access this content.\n</body></html>\n";
119
Willy Tarreau844a7e72010-01-31 21:46:18 +0100120const char *HTTP_407_fmt =
121 "HTTP/1.0 407 Unauthorized\r\n"
122 "Cache-Control: no-cache\r\n"
123 "Connection: close\r\n"
124 "Content-Type: text/html\r\n"
125 "Proxy-Authenticate: Basic realm=\"%s\"\r\n"
126 "\r\n"
127 "<html><body><h1>401 Unauthorized</h1>\nYou need a valid user and password to access this content.\n</body></html>\n";
128
Willy Tarreau0f772532006-12-23 20:51:41 +0100129
130const int http_err_codes[HTTP_ERR_SIZE] = {
Willy Tarreauae94d4d2011-05-11 16:28:49 +0200131 [HTTP_ERR_200] = 200, /* used by "monitor-uri" */
Willy Tarreau0f772532006-12-23 20:51:41 +0100132 [HTTP_ERR_400] = 400,
133 [HTTP_ERR_403] = 403,
134 [HTTP_ERR_408] = 408,
135 [HTTP_ERR_500] = 500,
136 [HTTP_ERR_502] = 502,
137 [HTTP_ERR_503] = 503,
138 [HTTP_ERR_504] = 504,
139};
140
Willy Tarreau80587432006-12-24 17:47:20 +0100141static const char *http_err_msgs[HTTP_ERR_SIZE] = {
Willy Tarreauae94d4d2011-05-11 16:28:49 +0200142 [HTTP_ERR_200] =
143 "HTTP/1.0 200 OK\r\n"
144 "Cache-Control: no-cache\r\n"
145 "Connection: close\r\n"
146 "Content-Type: text/html\r\n"
147 "\r\n"
148 "<html><body><h1>200 OK</h1>\nService ready.\n</body></html>\n",
149
Willy Tarreau0f772532006-12-23 20:51:41 +0100150 [HTTP_ERR_400] =
Willy Tarreau80587432006-12-24 17:47:20 +0100151 "HTTP/1.0 400 Bad request\r\n"
Willy Tarreau0f772532006-12-23 20:51:41 +0100152 "Cache-Control: no-cache\r\n"
153 "Connection: close\r\n"
154 "Content-Type: text/html\r\n"
155 "\r\n"
156 "<html><body><h1>400 Bad request</h1>\nYour browser sent an invalid request.\n</body></html>\n",
157
158 [HTTP_ERR_403] =
159 "HTTP/1.0 403 Forbidden\r\n"
160 "Cache-Control: no-cache\r\n"
161 "Connection: close\r\n"
162 "Content-Type: text/html\r\n"
163 "\r\n"
164 "<html><body><h1>403 Forbidden</h1>\nRequest forbidden by administrative rules.\n</body></html>\n",
165
166 [HTTP_ERR_408] =
167 "HTTP/1.0 408 Request Time-out\r\n"
168 "Cache-Control: no-cache\r\n"
169 "Connection: close\r\n"
170 "Content-Type: text/html\r\n"
171 "\r\n"
172 "<html><body><h1>408 Request Time-out</h1>\nYour browser didn't send a complete request in time.\n</body></html>\n",
173
174 [HTTP_ERR_500] =
175 "HTTP/1.0 500 Server Error\r\n"
176 "Cache-Control: no-cache\r\n"
177 "Connection: close\r\n"
178 "Content-Type: text/html\r\n"
179 "\r\n"
180 "<html><body><h1>500 Server Error</h1>\nAn internal server error occured.\n</body></html>\n",
181
182 [HTTP_ERR_502] =
183 "HTTP/1.0 502 Bad Gateway\r\n"
184 "Cache-Control: no-cache\r\n"
185 "Connection: close\r\n"
186 "Content-Type: text/html\r\n"
187 "\r\n"
188 "<html><body><h1>502 Bad Gateway</h1>\nThe server returned an invalid or incomplete response.\n</body></html>\n",
189
190 [HTTP_ERR_503] =
191 "HTTP/1.0 503 Service Unavailable\r\n"
192 "Cache-Control: no-cache\r\n"
193 "Connection: close\r\n"
194 "Content-Type: text/html\r\n"
195 "\r\n"
196 "<html><body><h1>503 Service Unavailable</h1>\nNo server is available to handle this request.\n</body></html>\n",
197
198 [HTTP_ERR_504] =
199 "HTTP/1.0 504 Gateway Time-out\r\n"
200 "Cache-Control: no-cache\r\n"
201 "Connection: close\r\n"
202 "Content-Type: text/html\r\n"
203 "\r\n"
204 "<html><body><h1>504 Gateway Time-out</h1>\nThe server didn't respond in time.\n</body></html>\n",
205
206};
207
Cyril Bonté19979e12012-04-04 12:57:21 +0200208/* status codes available for the stats admin page (strictly 4 chars length) */
209const char *stat_status_codes[STAT_STATUS_SIZE] = {
210 [STAT_STATUS_DENY] = "DENY",
211 [STAT_STATUS_DONE] = "DONE",
212 [STAT_STATUS_ERRP] = "ERRP",
213 [STAT_STATUS_EXCD] = "EXCD",
214 [STAT_STATUS_NONE] = "NONE",
215 [STAT_STATUS_PART] = "PART",
216 [STAT_STATUS_UNKN] = "UNKN",
217};
218
219
William Lallemand73025dd2014-04-24 14:38:37 +0200220/* List head of all known action keywords for "http-request" */
221struct http_req_action_kw_list http_req_keywords = {
222 .list = LIST_HEAD_INIT(http_req_keywords.list)
223};
224
225/* List head of all known action keywords for "http-response" */
226struct http_res_action_kw_list http_res_keywords = {
227 .list = LIST_HEAD_INIT(http_res_keywords.list)
228};
229
Willy Tarreau80587432006-12-24 17:47:20 +0100230/* We must put the messages here since GCC cannot initialize consts depending
231 * on strlen().
232 */
233struct chunk http_err_chunks[HTTP_ERR_SIZE];
234
Willy Tarreaua890d072013-04-02 12:01:06 +0200235/* this struct is used between calls to smp_fetch_hdr() or smp_fetch_cookie() */
236static struct hdr_ctx static_hdr_ctx;
237
Willy Tarreau42250582007-04-01 01:30:43 +0200238#define FD_SETS_ARE_BITFIELDS
239#ifdef FD_SETS_ARE_BITFIELDS
240/*
241 * This map is used with all the FD_* macros to check whether a particular bit
242 * is set or not. Each bit represents an ACSII code. FD_SET() sets those bytes
243 * which should be encoded. When FD_ISSET() returns non-zero, it means that the
244 * byte should be encoded. Be careful to always pass bytes from 0 to 255
245 * exclusively to the macros.
246 */
247fd_set hdr_encode_map[(sizeof(fd_set) > (256/8)) ? 1 : ((256/8) / sizeof(fd_set))];
248fd_set url_encode_map[(sizeof(fd_set) > (256/8)) ? 1 : ((256/8) / sizeof(fd_set))];
Thierry FOURNIERd048d8b2014-03-13 16:46:18 +0100249fd_set http_encode_map[(sizeof(fd_set) > (256/8)) ? 1 : ((256/8) / sizeof(fd_set))];
Willy Tarreau42250582007-04-01 01:30:43 +0200250
251#else
252#error "Check if your OS uses bitfields for fd_sets"
253#endif
254
Willy Tarreau0b748332014-04-29 00:13:29 +0200255static int http_apply_redirect_rule(struct redirect_rule *rule, struct session *s, struct http_txn *txn);
256
Willy Tarreau80587432006-12-24 17:47:20 +0100257void init_proto_http()
258{
Willy Tarreau42250582007-04-01 01:30:43 +0200259 int i;
260 char *tmp;
Willy Tarreau80587432006-12-24 17:47:20 +0100261 int msg;
Willy Tarreau42250582007-04-01 01:30:43 +0200262
Willy Tarreau80587432006-12-24 17:47:20 +0100263 for (msg = 0; msg < HTTP_ERR_SIZE; msg++) {
264 if (!http_err_msgs[msg]) {
265 Alert("Internal error: no message defined for HTTP return code %d. Aborting.\n", msg);
266 abort();
267 }
268
269 http_err_chunks[msg].str = (char *)http_err_msgs[msg];
270 http_err_chunks[msg].len = strlen(http_err_msgs[msg]);
271 }
Willy Tarreau42250582007-04-01 01:30:43 +0200272
273 /* initialize the log header encoding map : '{|}"#' should be encoded with
274 * '#' as prefix, as well as non-printable characters ( <32 or >= 127 ).
275 * URL encoding only requires '"', '#' to be encoded as well as non-
276 * printable characters above.
277 */
278 memset(hdr_encode_map, 0, sizeof(hdr_encode_map));
279 memset(url_encode_map, 0, sizeof(url_encode_map));
Thierry FOURNIERd048d8b2014-03-13 16:46:18 +0100280 memset(http_encode_map, 0, sizeof(url_encode_map));
Willy Tarreau42250582007-04-01 01:30:43 +0200281 for (i = 0; i < 32; i++) {
282 FD_SET(i, hdr_encode_map);
283 FD_SET(i, url_encode_map);
284 }
285 for (i = 127; i < 256; i++) {
286 FD_SET(i, hdr_encode_map);
287 FD_SET(i, url_encode_map);
288 }
289
290 tmp = "\"#{|}";
291 while (*tmp) {
292 FD_SET(*tmp, hdr_encode_map);
293 tmp++;
294 }
295
296 tmp = "\"#";
297 while (*tmp) {
298 FD_SET(*tmp, url_encode_map);
299 tmp++;
300 }
Willy Tarreau332f8bf2007-05-13 21:36:56 +0200301
Thierry FOURNIERd048d8b2014-03-13 16:46:18 +0100302 /* initialize the http header encoding map. The draft httpbis define the
303 * header content as:
304 *
305 * HTTP-message = start-line
306 * *( header-field CRLF )
307 * CRLF
308 * [ message-body ]
309 * header-field = field-name ":" OWS field-value OWS
310 * field-value = *( field-content / obs-fold )
311 * field-content = field-vchar [ 1*( SP / HTAB ) field-vchar ]
312 * obs-fold = CRLF 1*( SP / HTAB )
313 * field-vchar = VCHAR / obs-text
314 * VCHAR = %x21-7E
315 * obs-text = %x80-FF
316 *
317 * All the chars are encoded except "VCHAR", "obs-text", SP and HTAB.
318 * The encoded chars are form 0x00 to 0x08, 0x0a to 0x1f and 0x7f. The
319 * "obs-fold" is volontary forgotten because haproxy remove this.
320 */
321 memset(http_encode_map, 0, sizeof(http_encode_map));
322 for (i = 0x00; i <= 0x08; i++)
323 FD_SET(i, http_encode_map);
324 for (i = 0x0a; i <= 0x1f; i++)
325 FD_SET(i, http_encode_map);
326 FD_SET(0x7f, http_encode_map);
327
Willy Tarreau332f8bf2007-05-13 21:36:56 +0200328 /* memory allocations */
329 pool2_requri = create_pool("requri", REQURI_LEN, MEM_F_SHARED);
William Lallemanda73203e2012-03-12 12:48:57 +0100330 pool2_uniqueid = create_pool("uniqueid", UNIQUEID_LEN, MEM_F_SHARED);
Willy Tarreau80587432006-12-24 17:47:20 +0100331}
Willy Tarreaubaaee002006-06-26 02:48:02 +0200332
Willy Tarreau53b6c742006-12-17 13:37:46 +0100333/*
334 * We have 26 list of methods (1 per first letter), each of which can have
335 * up to 3 entries (2 valid, 1 null).
336 */
337struct http_method_desc {
Willy Tarreauc8987b32013-12-06 23:43:17 +0100338 enum http_meth_t meth;
Willy Tarreau53b6c742006-12-17 13:37:46 +0100339 int len;
340 const char text[8];
341};
342
Willy Tarreau8d5d7f22007-01-21 19:16:41 +0100343const struct http_method_desc http_methods[26][3] = {
Willy Tarreau53b6c742006-12-17 13:37:46 +0100344 ['C' - 'A'] = {
345 [0] = { .meth = HTTP_METH_CONNECT , .len=7, .text="CONNECT" },
346 },
347 ['D' - 'A'] = {
348 [0] = { .meth = HTTP_METH_DELETE , .len=6, .text="DELETE" },
349 },
350 ['G' - 'A'] = {
351 [0] = { .meth = HTTP_METH_GET , .len=3, .text="GET" },
352 },
353 ['H' - 'A'] = {
354 [0] = { .meth = HTTP_METH_HEAD , .len=4, .text="HEAD" },
355 },
356 ['P' - 'A'] = {
357 [0] = { .meth = HTTP_METH_POST , .len=4, .text="POST" },
358 [1] = { .meth = HTTP_METH_PUT , .len=3, .text="PUT" },
359 },
360 ['T' - 'A'] = {
361 [0] = { .meth = HTTP_METH_TRACE , .len=5, .text="TRACE" },
362 },
363 /* rest is empty like this :
364 * [1] = { .meth = HTTP_METH_NONE , .len=0, .text="" },
365 */
366};
367
Thierry FOURNIERd4373142013-12-17 01:10:10 +0100368const struct http_method_name http_known_methods[HTTP_METH_OTHER] = {
369 [HTTP_METH_NONE] = { "", 0 },
370 [HTTP_METH_OPTIONS] = { "OPTIONS", 7 },
371 [HTTP_METH_GET] = { "GET", 3 },
372 [HTTP_METH_HEAD] = { "HEAD", 4 },
373 [HTTP_METH_POST] = { "POST", 4 },
374 [HTTP_METH_PUT] = { "PUT", 3 },
375 [HTTP_METH_DELETE] = { "DELETE", 6 },
376 [HTTP_METH_TRACE] = { "TRACE", 5 },
377 [HTTP_METH_CONNECT] = { "CONNECT", 7 },
378};
379
Willy Tarreau8d5d7f22007-01-21 19:16:41 +0100380/* It is about twice as fast on recent architectures to lookup a byte in a
matt.farnsworth@nokia.com1c2ab962008-04-14 20:47:37 +0200381 * table than to perform a boolean AND or OR between two tests. Refer to
Willy Tarreau8d5d7f22007-01-21 19:16:41 +0100382 * RFC2616 for those chars.
383 */
384
385const char http_is_spht[256] = {
386 [' '] = 1, ['\t'] = 1,
387};
388
389const char http_is_crlf[256] = {
390 ['\r'] = 1, ['\n'] = 1,
391};
392
393const char http_is_lws[256] = {
394 [' '] = 1, ['\t'] = 1,
395 ['\r'] = 1, ['\n'] = 1,
396};
397
398const char http_is_sep[256] = {
399 ['('] = 1, [')'] = 1, ['<'] = 1, ['>'] = 1,
400 ['@'] = 1, [','] = 1, [';'] = 1, [':'] = 1,
401 ['"'] = 1, ['/'] = 1, ['['] = 1, [']'] = 1,
402 ['{'] = 1, ['}'] = 1, ['?'] = 1, ['='] = 1,
403 [' '] = 1, ['\t'] = 1, ['\\'] = 1,
404};
405
406const char http_is_ctl[256] = {
407 [0 ... 31] = 1,
408 [127] = 1,
409};
410
411/*
412 * A token is any ASCII char that is neither a separator nor a CTL char.
413 * Do not overwrite values in assignment since gcc-2.95 will not handle
414 * them correctly. Instead, define every non-CTL char's status.
415 */
416const char http_is_token[256] = {
417 [' '] = 0, ['!'] = 1, ['"'] = 0, ['#'] = 1,
418 ['$'] = 1, ['%'] = 1, ['&'] = 1, ['\''] = 1,
419 ['('] = 0, [')'] = 0, ['*'] = 1, ['+'] = 1,
420 [','] = 0, ['-'] = 1, ['.'] = 1, ['/'] = 0,
421 ['0'] = 1, ['1'] = 1, ['2'] = 1, ['3'] = 1,
422 ['4'] = 1, ['5'] = 1, ['6'] = 1, ['7'] = 1,
423 ['8'] = 1, ['9'] = 1, [':'] = 0, [';'] = 0,
424 ['<'] = 0, ['='] = 0, ['>'] = 0, ['?'] = 0,
425 ['@'] = 0, ['A'] = 1, ['B'] = 1, ['C'] = 1,
426 ['D'] = 1, ['E'] = 1, ['F'] = 1, ['G'] = 1,
427 ['H'] = 1, ['I'] = 1, ['J'] = 1, ['K'] = 1,
428 ['L'] = 1, ['M'] = 1, ['N'] = 1, ['O'] = 1,
429 ['P'] = 1, ['Q'] = 1, ['R'] = 1, ['S'] = 1,
430 ['T'] = 1, ['U'] = 1, ['V'] = 1, ['W'] = 1,
431 ['X'] = 1, ['Y'] = 1, ['Z'] = 1, ['['] = 0,
432 ['\\'] = 0, [']'] = 0, ['^'] = 1, ['_'] = 1,
433 ['`'] = 1, ['a'] = 1, ['b'] = 1, ['c'] = 1,
434 ['d'] = 1, ['e'] = 1, ['f'] = 1, ['g'] = 1,
435 ['h'] = 1, ['i'] = 1, ['j'] = 1, ['k'] = 1,
436 ['l'] = 1, ['m'] = 1, ['n'] = 1, ['o'] = 1,
437 ['p'] = 1, ['q'] = 1, ['r'] = 1, ['s'] = 1,
438 ['t'] = 1, ['u'] = 1, ['v'] = 1, ['w'] = 1,
439 ['x'] = 1, ['y'] = 1, ['z'] = 1, ['{'] = 0,
440 ['|'] = 1, ['}'] = 0, ['~'] = 1,
441};
442
443
Willy Tarreau4b89ad42007-03-04 18:13:58 +0100444/*
445 * An http ver_token is any ASCII which can be found in an HTTP version,
446 * which includes 'H', 'T', 'P', '/', '.' and any digit.
447 */
448const char http_is_ver_token[256] = {
449 ['.'] = 1, ['/'] = 1,
450 ['0'] = 1, ['1'] = 1, ['2'] = 1, ['3'] = 1, ['4'] = 1,
451 ['5'] = 1, ['6'] = 1, ['7'] = 1, ['8'] = 1, ['9'] = 1,
452 ['H'] = 1, ['P'] = 1, ['T'] = 1,
453};
454
455
Willy Tarreau4af6f3a2007-03-18 22:36:26 +0100456/*
Willy Tarreau6acf7c92012-03-09 13:30:45 +0100457 * Adds a header and its CRLF at the tail of the message's buffer, just before
458 * the last CRLF. Text length is measured first, so it cannot be NULL.
Willy Tarreau4af6f3a2007-03-18 22:36:26 +0100459 * The header is also automatically added to the index <hdr_idx>, and the end
460 * of headers is automatically adjusted. The number of bytes added is returned
461 * on success, otherwise <0 is returned indicating an error.
462 */
Willy Tarreau6acf7c92012-03-09 13:30:45 +0100463int http_header_add_tail(struct http_msg *msg, struct hdr_idx *hdr_idx, const char *text)
Willy Tarreau4af6f3a2007-03-18 22:36:26 +0100464{
465 int bytes, len;
466
467 len = strlen(text);
Willy Tarreau9b28e032012-10-12 23:49:43 +0200468 bytes = buffer_insert_line2(msg->chn->buf, msg->chn->buf->p + msg->eoh, text, len);
Willy Tarreau4af6f3a2007-03-18 22:36:26 +0100469 if (!bytes)
470 return -1;
Willy Tarreaufa355d42009-11-29 18:12:29 +0100471 http_msg_move_end(msg, bytes);
Willy Tarreau4af6f3a2007-03-18 22:36:26 +0100472 return hdr_idx_add(len, 1, hdr_idx, hdr_idx->tail);
473}
474
475/*
Willy Tarreau6acf7c92012-03-09 13:30:45 +0100476 * Adds a header and its CRLF at the tail of the message's buffer, just before
477 * the last CRLF. <len> bytes are copied, not counting the CRLF. If <text> is NULL, then
Willy Tarreau4af6f3a2007-03-18 22:36:26 +0100478 * the buffer is only opened and the space reserved, but nothing is copied.
479 * The header is also automatically added to the index <hdr_idx>, and the end
480 * of headers is automatically adjusted. The number of bytes added is returned
481 * on success, otherwise <0 is returned indicating an error.
482 */
Willy Tarreau6acf7c92012-03-09 13:30:45 +0100483int http_header_add_tail2(struct http_msg *msg,
484 struct hdr_idx *hdr_idx, const char *text, int len)
Willy Tarreau4af6f3a2007-03-18 22:36:26 +0100485{
486 int bytes;
487
Willy Tarreau9b28e032012-10-12 23:49:43 +0200488 bytes = buffer_insert_line2(msg->chn->buf, msg->chn->buf->p + msg->eoh, text, len);
Willy Tarreau4af6f3a2007-03-18 22:36:26 +0100489 if (!bytes)
490 return -1;
Willy Tarreaufa355d42009-11-29 18:12:29 +0100491 http_msg_move_end(msg, bytes);
Willy Tarreau4af6f3a2007-03-18 22:36:26 +0100492 return hdr_idx_add(len, 1, hdr_idx, hdr_idx->tail);
493}
Willy Tarreaubaaee002006-06-26 02:48:02 +0200494
495/*
Willy Tarreauaa9dce32007-03-18 23:50:16 +0100496 * Checks if <hdr> is exactly <name> for <len> chars, and ends with a colon.
497 * If so, returns the position of the first non-space character relative to
498 * <hdr>, or <end>-<hdr> if not found before. If no value is found, it tries
499 * to return a pointer to the place after the first space. Returns 0 if the
500 * header name does not match. Checks are case-insensitive.
501 */
502int http_header_match2(const char *hdr, const char *end,
503 const char *name, int len)
504{
505 const char *val;
506
507 if (hdr + len >= end)
508 return 0;
509 if (hdr[len] != ':')
510 return 0;
511 if (strncasecmp(hdr, name, len) != 0)
512 return 0;
513 val = hdr + len + 1;
514 while (val < end && HTTP_IS_SPHT(*val))
515 val++;
516 if ((val >= end) && (len + 2 <= end - hdr))
517 return len + 2; /* we may replace starting from second space */
518 return val - hdr;
519}
520
Willy Tarreau04ff9f12013-06-10 18:39:42 +0200521/* Find the first or next occurrence of header <name> in message buffer <sol>
522 * using headers index <idx>, and return it in the <ctx> structure. This
523 * structure holds everything necessary to use the header and find next
524 * occurrence. If its <idx> member is 0, the header is searched from the
525 * beginning. Otherwise, the next occurrence is returned. The function returns
526 * 1 when it finds a value, and 0 when there is no more. It is very similar to
527 * http_find_header2() except that it is designed to work with full-line headers
528 * whose comma is not a delimiter but is part of the syntax. As a special case,
529 * if ctx->val is NULL when searching for a new values of a header, the current
530 * header is rescanned. This allows rescanning after a header deletion.
531 */
532int http_find_full_header2(const char *name, int len,
533 char *sol, struct hdr_idx *idx,
534 struct hdr_ctx *ctx)
535{
536 char *eol, *sov;
537 int cur_idx, old_idx;
538
539 cur_idx = ctx->idx;
540 if (cur_idx) {
541 /* We have previously returned a header, let's search another one */
542 sol = ctx->line;
543 eol = sol + idx->v[cur_idx].len;
544 goto next_hdr;
545 }
546
547 /* first request for this header */
548 sol += hdr_idx_first_pos(idx);
549 old_idx = 0;
550 cur_idx = hdr_idx_first_idx(idx);
551 while (cur_idx) {
552 eol = sol + idx->v[cur_idx].len;
553
554 if (len == 0) {
555 /* No argument was passed, we want any header.
556 * To achieve this, we simply build a fake request. */
557 while (sol + len < eol && sol[len] != ':')
558 len++;
559 name = sol;
560 }
561
562 if ((len < eol - sol) &&
563 (sol[len] == ':') &&
564 (strncasecmp(sol, name, len) == 0)) {
565 ctx->del = len;
566 sov = sol + len + 1;
567 while (sov < eol && http_is_lws[(unsigned char)*sov])
568 sov++;
569
570 ctx->line = sol;
571 ctx->prev = old_idx;
572 ctx->idx = cur_idx;
573 ctx->val = sov - sol;
574 ctx->tws = 0;
575 while (eol > sov && http_is_lws[(unsigned char)*(eol - 1)]) {
576 eol--;
577 ctx->tws++;
578 }
579 ctx->vlen = eol - sov;
580 return 1;
581 }
582 next_hdr:
583 sol = eol + idx->v[cur_idx].cr + 1;
584 old_idx = cur_idx;
585 cur_idx = idx->v[cur_idx].next;
586 }
587 return 0;
588}
589
Willy Tarreau68085d82010-01-18 14:54:04 +0100590/* Find the end of the header value contained between <s> and <e>. See RFC2616,
591 * par 2.2 for more information. Note that it requires a valid header to return
592 * a valid result. This works for headers defined as comma-separated lists.
Willy Tarreau33a7e692007-06-10 19:45:56 +0200593 */
Willy Tarreau68085d82010-01-18 14:54:04 +0100594char *find_hdr_value_end(char *s, const char *e)
Willy Tarreau33a7e692007-06-10 19:45:56 +0200595{
596 int quoted, qdpair;
597
598 quoted = qdpair = 0;
599 for (; s < e; s++) {
600 if (qdpair) qdpair = 0;
Willy Tarreau0f7f51f2010-08-30 11:06:34 +0200601 else if (quoted) {
602 if (*s == '\\') qdpair = 1;
603 else if (*s == '"') quoted = 0;
604 }
Willy Tarreau33a7e692007-06-10 19:45:56 +0200605 else if (*s == '"') quoted = 1;
606 else if (*s == ',') return s;
607 }
608 return s;
609}
610
611/* Find the first or next occurrence of header <name> in message buffer <sol>
612 * using headers index <idx>, and return it in the <ctx> structure. This
613 * structure holds everything necessary to use the header and find next
614 * occurrence. If its <idx> member is 0, the header is searched from the
615 * beginning. Otherwise, the next occurrence is returned. The function returns
Willy Tarreau68085d82010-01-18 14:54:04 +0100616 * 1 when it finds a value, and 0 when there is no more. It is designed to work
617 * with headers defined as comma-separated lists. As a special case, if ctx->val
618 * is NULL when searching for a new values of a header, the current header is
619 * rescanned. This allows rescanning after a header deletion.
Willy Tarreau33a7e692007-06-10 19:45:56 +0200620 */
621int http_find_header2(const char *name, int len,
Willy Tarreau68085d82010-01-18 14:54:04 +0100622 char *sol, struct hdr_idx *idx,
Willy Tarreau33a7e692007-06-10 19:45:56 +0200623 struct hdr_ctx *ctx)
624{
Willy Tarreau68085d82010-01-18 14:54:04 +0100625 char *eol, *sov;
626 int cur_idx, old_idx;
Willy Tarreau33a7e692007-06-10 19:45:56 +0200627
Willy Tarreau68085d82010-01-18 14:54:04 +0100628 cur_idx = ctx->idx;
629 if (cur_idx) {
Willy Tarreau33a7e692007-06-10 19:45:56 +0200630 /* We have previously returned a value, let's search
631 * another one on the same line.
632 */
Willy Tarreau33a7e692007-06-10 19:45:56 +0200633 sol = ctx->line;
Willy Tarreau588bd4f2011-09-01 22:22:28 +0200634 ctx->del = ctx->val + ctx->vlen + ctx->tws;
Willy Tarreau68085d82010-01-18 14:54:04 +0100635 sov = sol + ctx->del;
Willy Tarreau33a7e692007-06-10 19:45:56 +0200636 eol = sol + idx->v[cur_idx].len;
637
638 if (sov >= eol)
639 /* no more values in this header */
640 goto next_hdr;
641
Willy Tarreau68085d82010-01-18 14:54:04 +0100642 /* values remaining for this header, skip the comma but save it
643 * for later use (eg: for header deletion).
644 */
Willy Tarreau33a7e692007-06-10 19:45:56 +0200645 sov++;
646 while (sov < eol && http_is_lws[(unsigned char)*sov])
647 sov++;
648
649 goto return_hdr;
650 }
651
652 /* first request for this header */
653 sol += hdr_idx_first_pos(idx);
Willy Tarreau68085d82010-01-18 14:54:04 +0100654 old_idx = 0;
Willy Tarreau33a7e692007-06-10 19:45:56 +0200655 cur_idx = hdr_idx_first_idx(idx);
Willy Tarreau33a7e692007-06-10 19:45:56 +0200656 while (cur_idx) {
657 eol = sol + idx->v[cur_idx].len;
658
Willy Tarreau1ad7c6d2007-06-10 21:42:55 +0200659 if (len == 0) {
660 /* No argument was passed, we want any header.
661 * To achieve this, we simply build a fake request. */
662 while (sol + len < eol && sol[len] != ':')
663 len++;
664 name = sol;
665 }
666
Willy Tarreau33a7e692007-06-10 19:45:56 +0200667 if ((len < eol - sol) &&
668 (sol[len] == ':') &&
669 (strncasecmp(sol, name, len) == 0)) {
Willy Tarreau68085d82010-01-18 14:54:04 +0100670 ctx->del = len;
Willy Tarreau33a7e692007-06-10 19:45:56 +0200671 sov = sol + len + 1;
672 while (sov < eol && http_is_lws[(unsigned char)*sov])
673 sov++;
Willy Tarreau68085d82010-01-18 14:54:04 +0100674
Willy Tarreau33a7e692007-06-10 19:45:56 +0200675 ctx->line = sol;
Willy Tarreau68085d82010-01-18 14:54:04 +0100676 ctx->prev = old_idx;
677 return_hdr:
Willy Tarreau33a7e692007-06-10 19:45:56 +0200678 ctx->idx = cur_idx;
679 ctx->val = sov - sol;
680
681 eol = find_hdr_value_end(sov, eol);
Willy Tarreau588bd4f2011-09-01 22:22:28 +0200682 ctx->tws = 0;
Willy Tarreau275600b2011-09-16 08:11:26 +0200683 while (eol > sov && http_is_lws[(unsigned char)*(eol - 1)]) {
Willy Tarreau588bd4f2011-09-01 22:22:28 +0200684 eol--;
685 ctx->tws++;
686 }
Willy Tarreau33a7e692007-06-10 19:45:56 +0200687 ctx->vlen = eol - sov;
688 return 1;
689 }
690 next_hdr:
691 sol = eol + idx->v[cur_idx].cr + 1;
Willy Tarreau68085d82010-01-18 14:54:04 +0100692 old_idx = cur_idx;
Willy Tarreau33a7e692007-06-10 19:45:56 +0200693 cur_idx = idx->v[cur_idx].next;
694 }
695 return 0;
696}
697
698int http_find_header(const char *name,
Willy Tarreau68085d82010-01-18 14:54:04 +0100699 char *sol, struct hdr_idx *idx,
Willy Tarreau33a7e692007-06-10 19:45:56 +0200700 struct hdr_ctx *ctx)
701{
702 return http_find_header2(name, strlen(name), sol, idx, ctx);
703}
704
Willy Tarreau68085d82010-01-18 14:54:04 +0100705/* Remove one value of a header. This only works on a <ctx> returned by one of
706 * the http_find_header functions. The value is removed, as well as surrounding
707 * commas if any. If the removed value was alone, the whole header is removed.
Willy Tarreau6acf7c92012-03-09 13:30:45 +0100708 * The ctx is always updated accordingly, as well as the buffer and HTTP
Willy Tarreau68085d82010-01-18 14:54:04 +0100709 * message <msg>. The new index is returned. If it is zero, it means there is
710 * no more header, so any processing may stop. The ctx is always left in a form
711 * that can be handled by http_find_header2() to find next occurrence.
712 */
Willy Tarreau6acf7c92012-03-09 13:30:45 +0100713int http_remove_header2(struct http_msg *msg, struct hdr_idx *idx, struct hdr_ctx *ctx)
Willy Tarreau68085d82010-01-18 14:54:04 +0100714{
715 int cur_idx = ctx->idx;
716 char *sol = ctx->line;
717 struct hdr_idx_elem *hdr;
718 int delta, skip_comma;
719
720 if (!cur_idx)
721 return 0;
722
723 hdr = &idx->v[cur_idx];
Willy Tarreau588bd4f2011-09-01 22:22:28 +0200724 if (sol[ctx->del] == ':' && ctx->val + ctx->vlen + ctx->tws == hdr->len) {
Willy Tarreau68085d82010-01-18 14:54:04 +0100725 /* This was the only value of the header, we must now remove it entirely. */
Willy Tarreau9b28e032012-10-12 23:49:43 +0200726 delta = buffer_replace2(msg->chn->buf, sol, sol + hdr->len + hdr->cr + 1, NULL, 0);
Willy Tarreau68085d82010-01-18 14:54:04 +0100727 http_msg_move_end(msg, delta);
728 idx->used--;
729 hdr->len = 0; /* unused entry */
730 idx->v[ctx->prev].next = idx->v[ctx->idx].next;
Willy Tarreau5c4784f2011-02-12 13:07:35 +0100731 if (idx->tail == ctx->idx)
732 idx->tail = ctx->prev;
Willy Tarreau68085d82010-01-18 14:54:04 +0100733 ctx->idx = ctx->prev; /* walk back to the end of previous header */
734 ctx->line -= idx->v[ctx->idx].len + idx->v[cur_idx].cr + 1;
735 ctx->val = idx->v[ctx->idx].len; /* point to end of previous header */
Willy Tarreau588bd4f2011-09-01 22:22:28 +0200736 ctx->tws = ctx->vlen = 0;
Willy Tarreau68085d82010-01-18 14:54:04 +0100737 return ctx->idx;
738 }
739
740 /* This was not the only value of this header. We have to remove between
Willy Tarreau588bd4f2011-09-01 22:22:28 +0200741 * ctx->del+1 and ctx->val+ctx->vlen+ctx->tws+1 included. If it is the
742 * last entry of the list, we remove the last separator.
Willy Tarreau68085d82010-01-18 14:54:04 +0100743 */
744
Willy Tarreau588bd4f2011-09-01 22:22:28 +0200745 skip_comma = (ctx->val + ctx->vlen + ctx->tws == hdr->len) ? 0 : 1;
Willy Tarreau9b28e032012-10-12 23:49:43 +0200746 delta = buffer_replace2(msg->chn->buf, sol + ctx->del + skip_comma,
Willy Tarreau588bd4f2011-09-01 22:22:28 +0200747 sol + ctx->val + ctx->vlen + ctx->tws + skip_comma,
Willy Tarreau68085d82010-01-18 14:54:04 +0100748 NULL, 0);
749 hdr->len += delta;
750 http_msg_move_end(msg, delta);
751 ctx->val = ctx->del;
Willy Tarreau588bd4f2011-09-01 22:22:28 +0200752 ctx->tws = ctx->vlen = 0;
Willy Tarreau68085d82010-01-18 14:54:04 +0100753 return ctx->idx;
754}
755
Willy Tarreau2d3d94c2008-11-30 20:20:08 +0100756/* This function handles a server error at the stream interface level. The
757 * stream interface is assumed to be already in a closed state. An optional
758 * message is copied into the input buffer, and an HTTP status code stored.
759 * The error flags are set to the values in arguments. Any pending request
Willy Tarreau6f0aa472009-03-08 20:33:29 +0100760 * in this buffer will be lost.
Willy Tarreaubaaee002006-06-26 02:48:02 +0200761 */
Willy Tarreauf1fd9dc2014-04-24 20:47:57 +0200762static void http_server_error(struct session *s, struct stream_interface *si,
Willy Tarreau2d3d94c2008-11-30 20:20:08 +0100763 int err, int finst, int status, const struct chunk *msg)
Willy Tarreaubaaee002006-06-26 02:48:02 +0200764{
Willy Tarreau8263d2b2012-08-28 00:06:31 +0200765 channel_auto_read(si->ob);
766 channel_abort(si->ob);
767 channel_auto_close(si->ob);
768 channel_erase(si->ob);
769 channel_auto_close(si->ib);
770 channel_auto_read(si->ib);
Willy Tarreau0f772532006-12-23 20:51:41 +0100771 if (status > 0 && msg) {
Willy Tarreauf1fd9dc2014-04-24 20:47:57 +0200772 s->txn.status = status;
Willy Tarreau9dab5fc2012-05-07 11:56:55 +0200773 bo_inject(si->ib, msg->str, msg->len);
Willy Tarreaubaaee002006-06-26 02:48:02 +0200774 }
Willy Tarreauf1fd9dc2014-04-24 20:47:57 +0200775 if (!(s->flags & SN_ERR_MASK))
776 s->flags |= err;
777 if (!(s->flags & SN_FINST_MASK))
778 s->flags |= finst;
Willy Tarreaubaaee002006-06-26 02:48:02 +0200779}
780
Willy Tarreau80587432006-12-24 17:47:20 +0100781/* This function returns the appropriate error location for the given session
782 * and message.
783 */
784
Willy Tarreau783f2582012-09-04 12:19:04 +0200785struct chunk *http_error_message(struct session *s, int msgnum)
Willy Tarreau80587432006-12-24 17:47:20 +0100786{
Willy Tarreaue2e27a52007-04-01 00:01:37 +0200787 if (s->be->errmsg[msgnum].str)
788 return &s->be->errmsg[msgnum];
Willy Tarreau80587432006-12-24 17:47:20 +0100789 else if (s->fe->errmsg[msgnum].str)
790 return &s->fe->errmsg[msgnum];
791 else
792 return &http_err_chunks[msgnum];
793}
Willy Tarreaubaaee002006-06-26 02:48:02 +0200794
Willy Tarreau53b6c742006-12-17 13:37:46 +0100795/*
796 * returns HTTP_METH_NONE if there is nothing valid to read (empty or non-text
797 * string), HTTP_METH_OTHER for unknown methods, or the identified method.
798 */
Thierry FOURNIERd4373142013-12-17 01:10:10 +0100799enum http_meth_t find_http_meth(const char *str, const int len)
Willy Tarreau53b6c742006-12-17 13:37:46 +0100800{
801 unsigned char m;
Willy Tarreau8d5d7f22007-01-21 19:16:41 +0100802 const struct http_method_desc *h;
Willy Tarreau53b6c742006-12-17 13:37:46 +0100803
804 m = ((unsigned)*str - 'A');
805
806 if (m < 26) {
Willy Tarreau8d5d7f22007-01-21 19:16:41 +0100807 for (h = http_methods[m]; h->len > 0; h++) {
808 if (unlikely(h->len != len))
Willy Tarreau53b6c742006-12-17 13:37:46 +0100809 continue;
Willy Tarreau8d5d7f22007-01-21 19:16:41 +0100810 if (likely(memcmp(str, h->text, h->len) == 0))
Willy Tarreau53b6c742006-12-17 13:37:46 +0100811 return h->meth;
Willy Tarreau53b6c742006-12-17 13:37:46 +0100812 };
813 return HTTP_METH_OTHER;
814 }
815 return HTTP_METH_NONE;
816
817}
818
Willy Tarreau21d2af32008-02-14 20:25:24 +0100819/* Parse the URI from the given transaction (which is assumed to be in request
820 * phase) and look for the "/" beginning the PATH. If not found, return NULL.
821 * It is returned otherwise.
822 */
823static char *
824http_get_path(struct http_txn *txn)
825{
826 char *ptr, *end;
827
Willy Tarreau9b28e032012-10-12 23:49:43 +0200828 ptr = txn->req.chn->buf->p + txn->req.sl.rq.u;
Willy Tarreau21d2af32008-02-14 20:25:24 +0100829 end = ptr + txn->req.sl.rq.u_l;
830
831 if (ptr >= end)
832 return NULL;
833
834 /* RFC2616, par. 5.1.2 :
835 * Request-URI = "*" | absuri | abspath | authority
836 */
837
838 if (*ptr == '*')
839 return NULL;
840
841 if (isalpha((unsigned char)*ptr)) {
842 /* this is a scheme as described by RFC3986, par. 3.1 */
843 ptr++;
844 while (ptr < end &&
845 (isalnum((unsigned char)*ptr) || *ptr == '+' || *ptr == '-' || *ptr == '.'))
846 ptr++;
847 /* skip '://' */
848 if (ptr == end || *ptr++ != ':')
849 return NULL;
850 if (ptr == end || *ptr++ != '/')
851 return NULL;
852 if (ptr == end || *ptr++ != '/')
853 return NULL;
854 }
855 /* skip [user[:passwd]@]host[:[port]] */
856
857 while (ptr < end && *ptr != '/')
858 ptr++;
859
860 if (ptr == end)
861 return NULL;
862
863 /* OK, we got the '/' ! */
864 return ptr;
865}
866
William Lallemand65ad6e12014-01-31 15:08:02 +0100867/* Parse the URI from the given string and look for the "/" beginning the PATH.
868 * If not found, return NULL. It is returned otherwise.
869 */
870static char *
871http_get_path_from_string(char *str)
872{
873 char *ptr = str;
874
875 /* RFC2616, par. 5.1.2 :
876 * Request-URI = "*" | absuri | abspath | authority
877 */
878
879 if (*ptr == '*')
880 return NULL;
881
882 if (isalpha((unsigned char)*ptr)) {
883 /* this is a scheme as described by RFC3986, par. 3.1 */
884 ptr++;
885 while (isalnum((unsigned char)*ptr) || *ptr == '+' || *ptr == '-' || *ptr == '.')
886 ptr++;
887 /* skip '://' */
888 if (*ptr == '\0' || *ptr++ != ':')
889 return NULL;
890 if (*ptr == '\0' || *ptr++ != '/')
891 return NULL;
892 if (*ptr == '\0' || *ptr++ != '/')
893 return NULL;
894 }
895 /* skip [user[:passwd]@]host[:[port]] */
896
897 while (*ptr != '\0' && *ptr != ' ' && *ptr != '/')
898 ptr++;
899
900 if (*ptr == '\0' || *ptr == ' ')
901 return NULL;
902
903 /* OK, we got the '/' ! */
904 return ptr;
905}
906
Willy Tarreau71241ab2012-12-27 11:30:54 +0100907/* Returns a 302 for a redirectable request that reaches a server working in
908 * in redirect mode. This may only be called just after the stream interface
909 * has moved to SI_ST_ASS. Unprocessable requests are left unchanged and will
910 * follow normal proxy processing. NOTE: this function is designed to support
911 * being called once data are scheduled for forwarding.
Willy Tarreauefb453c2008-10-26 20:49:47 +0100912 */
Willy Tarreau71241ab2012-12-27 11:30:54 +0100913void http_perform_server_redirect(struct session *s, struct stream_interface *si)
Willy Tarreauefb453c2008-10-26 20:49:47 +0100914{
915 struct http_txn *txn;
Willy Tarreau827aee92011-03-10 16:55:02 +0100916 struct server *srv;
Willy Tarreauefb453c2008-10-26 20:49:47 +0100917 char *path;
Willy Tarreaucde18fc2012-05-30 07:59:54 +0200918 int len, rewind;
Willy Tarreauefb453c2008-10-26 20:49:47 +0100919
920 /* 1: create the response header */
Willy Tarreau19d14ef2012-10-29 16:51:55 +0100921 trash.len = strlen(HTTP_302);
922 memcpy(trash.str, HTTP_302, trash.len);
Willy Tarreauefb453c2008-10-26 20:49:47 +0100923
Willy Tarreau3fdb3662012-11-12 00:42:33 +0100924 srv = objt_server(s->target);
Willy Tarreau827aee92011-03-10 16:55:02 +0100925
Willy Tarreauefb453c2008-10-26 20:49:47 +0100926 /* 2: add the server's prefix */
Willy Tarreau19d14ef2012-10-29 16:51:55 +0100927 if (trash.len + srv->rdr_len > trash.size)
Willy Tarreauefb453c2008-10-26 20:49:47 +0100928 return;
929
Willy Tarreaudcb75c42010-01-10 00:24:22 +0100930 /* special prefix "/" means don't change URL */
Willy Tarreau827aee92011-03-10 16:55:02 +0100931 if (srv->rdr_len != 1 || *srv->rdr_pfx != '/') {
Willy Tarreau19d14ef2012-10-29 16:51:55 +0100932 memcpy(trash.str + trash.len, srv->rdr_pfx, srv->rdr_len);
933 trash.len += srv->rdr_len;
Willy Tarreaudcb75c42010-01-10 00:24:22 +0100934 }
Willy Tarreauefb453c2008-10-26 20:49:47 +0100935
Willy Tarreaucde18fc2012-05-30 07:59:54 +0200936 /* 3: add the request URI. Since it was already forwarded, we need
937 * to temporarily rewind the buffer.
938 */
Willy Tarreauefb453c2008-10-26 20:49:47 +0100939 txn = &s->txn;
Willy Tarreau211cdec2014-04-17 20:18:08 +0200940 b_rew(s->req->buf, rewind = http_hdr_rewind(&txn->req));
Willy Tarreaucde18fc2012-05-30 07:59:54 +0200941
Willy Tarreauefb453c2008-10-26 20:49:47 +0100942 path = http_get_path(txn);
Willy Tarreau9b28e032012-10-12 23:49:43 +0200943 len = buffer_count(s->req->buf, path, b_ptr(s->req->buf, txn->req.sl.rq.u + txn->req.sl.rq.u_l));
Willy Tarreaucde18fc2012-05-30 07:59:54 +0200944
Willy Tarreau9b28e032012-10-12 23:49:43 +0200945 b_adv(s->req->buf, rewind);
Willy Tarreaucde18fc2012-05-30 07:59:54 +0200946
Willy Tarreauefb453c2008-10-26 20:49:47 +0100947 if (!path)
948 return;
949
Willy Tarreau19d14ef2012-10-29 16:51:55 +0100950 if (trash.len + len > trash.size - 4) /* 4 for CRLF-CRLF */
Willy Tarreauefb453c2008-10-26 20:49:47 +0100951 return;
952
Willy Tarreau19d14ef2012-10-29 16:51:55 +0100953 memcpy(trash.str + trash.len, path, len);
954 trash.len += len;
Willy Tarreau88d349d2010-01-25 12:15:43 +0100955
956 if (unlikely(txn->flags & TX_USE_PX_CONN)) {
Willy Tarreau19d14ef2012-10-29 16:51:55 +0100957 memcpy(trash.str + trash.len, "\r\nProxy-Connection: close\r\n\r\n", 29);
958 trash.len += 29;
Willy Tarreau88d349d2010-01-25 12:15:43 +0100959 } else {
Willy Tarreau19d14ef2012-10-29 16:51:55 +0100960 memcpy(trash.str + trash.len, "\r\nConnection: close\r\n\r\n", 23);
961 trash.len += 23;
Willy Tarreau88d349d2010-01-25 12:15:43 +0100962 }
Willy Tarreauefb453c2008-10-26 20:49:47 +0100963
964 /* prepare to return without error. */
Willy Tarreau73b013b2012-05-21 16:31:45 +0200965 si_shutr(si);
966 si_shutw(si);
Willy Tarreauefb453c2008-10-26 20:49:47 +0100967 si->err_type = SI_ET_NONE;
Willy Tarreauefb453c2008-10-26 20:49:47 +0100968 si->state = SI_ST_CLO;
969
970 /* send the message */
Willy Tarreau570f2212013-06-10 16:42:09 +0200971 http_server_error(s, si, SN_ERR_LOCAL, SN_FINST_C, 302, &trash);
Willy Tarreauefb453c2008-10-26 20:49:47 +0100972
973 /* FIXME: we should increase a counter of redirects per server and per backend. */
Willy Tarreau4521ba62013-01-24 01:25:25 +0100974 srv_inc_sess_ctr(srv);
Bhaskar Maddalaa20cb852014-02-03 16:26:46 -0500975 srv_set_sess_last(srv);
Willy Tarreauefb453c2008-10-26 20:49:47 +0100976}
977
Willy Tarreau0cac36f2008-11-30 20:44:17 +0100978/* Return the error message corresponding to si->err_type. It is assumed
Willy Tarreauefb453c2008-10-26 20:49:47 +0100979 * that the server side is closed. Note that err_type is actually a
980 * bitmask, where almost only aborts may be cumulated with other
981 * values. We consider that aborted operations are more important
982 * than timeouts or errors due to the fact that nobody else in the
983 * logs might explain incomplete retries. All others should avoid
984 * being cumulated. It should normally not be possible to have multiple
985 * aborts at once, but just in case, the first one in sequence is reported.
Willy Tarreau6b726ad2013-12-15 19:31:37 +0100986 * Note that connection errors appearing on the second request of a keep-alive
987 * connection are not reported since this allows the client to retry.
Willy Tarreauefb453c2008-10-26 20:49:47 +0100988 */
Willy Tarreau0cac36f2008-11-30 20:44:17 +0100989void http_return_srv_error(struct session *s, struct stream_interface *si)
Willy Tarreauefb453c2008-10-26 20:49:47 +0100990{
Willy Tarreau0cac36f2008-11-30 20:44:17 +0100991 int err_type = si->err_type;
Willy Tarreauefb453c2008-10-26 20:49:47 +0100992
993 if (err_type & SI_ET_QUEUE_ABRT)
Willy Tarreau2d3d94c2008-11-30 20:20:08 +0100994 http_server_error(s, si, SN_ERR_CLICL, SN_FINST_Q,
Willy Tarreau783f2582012-09-04 12:19:04 +0200995 503, http_error_message(s, HTTP_ERR_503));
Willy Tarreauefb453c2008-10-26 20:49:47 +0100996 else if (err_type & SI_ET_CONN_ABRT)
Willy Tarreau2d3d94c2008-11-30 20:20:08 +0100997 http_server_error(s, si, SN_ERR_CLICL, SN_FINST_C,
Willy Tarreau6b726ad2013-12-15 19:31:37 +0100998 503, (s->txn.flags & TX_NOT_FIRST) ? NULL :
999 http_error_message(s, HTTP_ERR_503));
Willy Tarreauefb453c2008-10-26 20:49:47 +01001000 else if (err_type & SI_ET_QUEUE_TO)
Willy Tarreau2d3d94c2008-11-30 20:20:08 +01001001 http_server_error(s, si, SN_ERR_SRVTO, SN_FINST_Q,
Willy Tarreau783f2582012-09-04 12:19:04 +02001002 503, http_error_message(s, HTTP_ERR_503));
Willy Tarreauefb453c2008-10-26 20:49:47 +01001003 else if (err_type & SI_ET_QUEUE_ERR)
Willy Tarreau2d3d94c2008-11-30 20:20:08 +01001004 http_server_error(s, si, SN_ERR_SRVCL, SN_FINST_Q,
Willy Tarreau783f2582012-09-04 12:19:04 +02001005 503, http_error_message(s, HTTP_ERR_503));
Willy Tarreauefb453c2008-10-26 20:49:47 +01001006 else if (err_type & SI_ET_CONN_TO)
Willy Tarreau2d3d94c2008-11-30 20:20:08 +01001007 http_server_error(s, si, SN_ERR_SRVTO, SN_FINST_C,
Willy Tarreau6b726ad2013-12-15 19:31:37 +01001008 503, (s->txn.flags & TX_NOT_FIRST) ? NULL :
1009 http_error_message(s, HTTP_ERR_503));
Willy Tarreauefb453c2008-10-26 20:49:47 +01001010 else if (err_type & SI_ET_CONN_ERR)
Willy Tarreau2d3d94c2008-11-30 20:20:08 +01001011 http_server_error(s, si, SN_ERR_SRVCL, SN_FINST_C,
Willy Tarreau36346242014-02-24 18:26:30 +01001012 503, (s->flags & SN_SRV_REUSED) ? NULL :
Willy Tarreau6b726ad2013-12-15 19:31:37 +01001013 http_error_message(s, HTTP_ERR_503));
Willy Tarreau2d400bb2012-05-14 12:11:47 +02001014 else if (err_type & SI_ET_CONN_RES)
1015 http_server_error(s, si, SN_ERR_RESOURCE, SN_FINST_C,
Willy Tarreau6b726ad2013-12-15 19:31:37 +01001016 503, (s->txn.flags & TX_NOT_FIRST) ? NULL :
1017 http_error_message(s, HTTP_ERR_503));
Willy Tarreauefb453c2008-10-26 20:49:47 +01001018 else /* SI_ET_CONN_OTHER and others */
Willy Tarreau2d3d94c2008-11-30 20:20:08 +01001019 http_server_error(s, si, SN_ERR_INTERNAL, SN_FINST_C,
Willy Tarreau783f2582012-09-04 12:19:04 +02001020 500, http_error_message(s, HTTP_ERR_500));
Willy Tarreauefb453c2008-10-26 20:49:47 +01001021}
1022
Willy Tarreau42250582007-04-01 01:30:43 +02001023extern const char sess_term_cond[8];
1024extern const char sess_fin_state[8];
1025extern const char *monthname[12];
Willy Tarreau332f8bf2007-05-13 21:36:56 +02001026struct pool_head *pool2_requri;
Willy Tarreau193b8c62012-11-22 00:17:38 +01001027struct pool_head *pool2_capture = NULL;
William Lallemanda73203e2012-03-12 12:48:57 +01001028struct pool_head *pool2_uniqueid;
Willy Tarreau8d5d7f22007-01-21 19:16:41 +01001029
Willy Tarreau117f59e2007-03-04 18:17:17 +01001030/*
1031 * Capture headers from message starting at <som> according to header list
Willy Tarreau54da8db2014-06-13 16:11:48 +02001032 * <cap_hdr>, and fill the <cap> pointers appropriately.
Willy Tarreau117f59e2007-03-04 18:17:17 +01001033 */
1034void capture_headers(char *som, struct hdr_idx *idx,
1035 char **cap, struct cap_hdr *cap_hdr)
1036{
1037 char *eol, *sol, *col, *sov;
1038 int cur_idx;
1039 struct cap_hdr *h;
1040 int len;
1041
1042 sol = som + hdr_idx_first_pos(idx);
1043 cur_idx = hdr_idx_first_idx(idx);
1044
1045 while (cur_idx) {
1046 eol = sol + idx->v[cur_idx].len;
1047
1048 col = sol;
1049 while (col < eol && *col != ':')
1050 col++;
1051
1052 sov = col + 1;
1053 while (sov < eol && http_is_lws[(unsigned char)*sov])
1054 sov++;
1055
1056 for (h = cap_hdr; h; h = h->next) {
Willy Tarreau54da8db2014-06-13 16:11:48 +02001057 if (h->namelen && (h->namelen == col - sol) &&
Willy Tarreau117f59e2007-03-04 18:17:17 +01001058 (strncasecmp(sol, h->name, h->namelen) == 0)) {
1059 if (cap[h->index] == NULL)
1060 cap[h->index] =
Willy Tarreaucf7f3202007-05-13 22:46:04 +02001061 pool_alloc2(h->pool);
Willy Tarreau117f59e2007-03-04 18:17:17 +01001062
1063 if (cap[h->index] == NULL) {
1064 Alert("HTTP capture : out of memory.\n");
1065 continue;
1066 }
1067
1068 len = eol - sov;
1069 if (len > h->len)
1070 len = h->len;
1071
1072 memcpy(cap[h->index], sov, len);
1073 cap[h->index][len]=0;
1074 }
1075 }
1076 sol = eol + idx->v[cur_idx].cr + 1;
1077 cur_idx = idx->v[cur_idx].next;
1078 }
1079}
1080
1081
Willy Tarreau42250582007-04-01 01:30:43 +02001082/* either we find an LF at <ptr> or we jump to <bad>.
1083 */
1084#define EXPECT_LF_HERE(ptr, bad) do { if (unlikely(*(ptr) != '\n')) goto bad; } while (0)
1085
1086/* plays with variables <ptr>, <end> and <state>. Jumps to <good> if OK,
1087 * otherwise to <http_msg_ood> with <state> set to <st>.
1088 */
1089#define EAT_AND_JUMP_OR_RETURN(good, st) do { \
1090 ptr++; \
1091 if (likely(ptr < end)) \
1092 goto good; \
1093 else { \
1094 state = (st); \
1095 goto http_msg_ood; \
1096 } \
1097 } while (0)
1098
1099
Willy Tarreaubaaee002006-06-26 02:48:02 +02001100/*
Willy Tarreaua15645d2007-03-18 16:22:39 +01001101 * This function parses a status line between <ptr> and <end>, starting with
Willy Tarreau8973c702007-01-21 23:58:29 +01001102 * parser state <state>. Only states HTTP_MSG_RPVER, HTTP_MSG_RPVER_SP,
1103 * HTTP_MSG_RPCODE, HTTP_MSG_RPCODE_SP and HTTP_MSG_RPREASON are handled. Others
1104 * will give undefined results.
1105 * Note that it is upon the caller's responsibility to ensure that ptr < end,
1106 * and that msg->sol points to the beginning of the response.
1107 * If a complete line is found (which implies that at least one CR or LF is
1108 * found before <end>, the updated <ptr> is returned, otherwise NULL is
1109 * returned indicating an incomplete line (which does not mean that parts have
1110 * not been updated). In the incomplete case, if <ret_ptr> or <ret_state> are
1111 * non-NULL, they are fed with the new <ptr> and <state> values to be passed
1112 * upon next call.
1113 *
Willy Tarreau9cdde232007-05-02 20:58:19 +02001114 * This function was intentionally designed to be called from
Willy Tarreau8973c702007-01-21 23:58:29 +01001115 * http_msg_analyzer() with the lowest overhead. It should integrate perfectly
1116 * within its state machine and use the same macros, hence the need for same
Willy Tarreau9cdde232007-05-02 20:58:19 +02001117 * labels and variable names. Note that msg->sol is left unchanged.
Willy Tarreau8973c702007-01-21 23:58:29 +01001118 */
Willy Tarreau69d8c5d2012-05-08 09:44:41 +02001119const char *http_parse_stsline(struct http_msg *msg,
Willy Tarreau3770f232013-12-07 00:01:53 +01001120 enum ht_state state, const char *ptr, const char *end,
1121 unsigned int *ret_ptr, enum ht_state *ret_state)
Willy Tarreau8973c702007-01-21 23:58:29 +01001122{
Willy Tarreau9b28e032012-10-12 23:49:43 +02001123 const char *msg_start = msg->chn->buf->p;
Willy Tarreau62f791e2012-03-09 11:32:30 +01001124
Willy Tarreau8973c702007-01-21 23:58:29 +01001125 switch (state) {
Willy Tarreau8973c702007-01-21 23:58:29 +01001126 case HTTP_MSG_RPVER:
Willy Tarreaue3f284a2010-09-28 19:42:42 +02001127 http_msg_rpver:
Willy Tarreau4b89ad42007-03-04 18:13:58 +01001128 if (likely(HTTP_IS_VER_TOKEN(*ptr)))
Willy Tarreau8973c702007-01-21 23:58:29 +01001129 EAT_AND_JUMP_OR_RETURN(http_msg_rpver, HTTP_MSG_RPVER);
1130
1131 if (likely(HTTP_IS_SPHT(*ptr))) {
Willy Tarreauea1175a2012-03-05 15:52:30 +01001132 msg->sl.st.v_l = ptr - msg_start;
Willy Tarreau8973c702007-01-21 23:58:29 +01001133 EAT_AND_JUMP_OR_RETURN(http_msg_rpver_sp, HTTP_MSG_RPVER_SP);
1134 }
Willy Tarreau7552c032009-03-01 11:10:40 +01001135 state = HTTP_MSG_ERROR;
1136 break;
1137
Willy Tarreau8973c702007-01-21 23:58:29 +01001138 case HTTP_MSG_RPVER_SP:
Willy Tarreaue3f284a2010-09-28 19:42:42 +02001139 http_msg_rpver_sp:
Willy Tarreau8973c702007-01-21 23:58:29 +01001140 if (likely(!HTTP_IS_LWS(*ptr))) {
Willy Tarreauea1175a2012-03-05 15:52:30 +01001141 msg->sl.st.c = ptr - msg_start;
Willy Tarreau8973c702007-01-21 23:58:29 +01001142 goto http_msg_rpcode;
1143 }
1144 if (likely(HTTP_IS_SPHT(*ptr)))
1145 EAT_AND_JUMP_OR_RETURN(http_msg_rpver_sp, HTTP_MSG_RPVER_SP);
1146 /* so it's a CR/LF, this is invalid */
Willy Tarreau7552c032009-03-01 11:10:40 +01001147 state = HTTP_MSG_ERROR;
1148 break;
Willy Tarreau8973c702007-01-21 23:58:29 +01001149
Willy Tarreau8973c702007-01-21 23:58:29 +01001150 case HTTP_MSG_RPCODE:
Willy Tarreaue3f284a2010-09-28 19:42:42 +02001151 http_msg_rpcode:
Willy Tarreau8973c702007-01-21 23:58:29 +01001152 if (likely(!HTTP_IS_LWS(*ptr)))
1153 EAT_AND_JUMP_OR_RETURN(http_msg_rpcode, HTTP_MSG_RPCODE);
1154
1155 if (likely(HTTP_IS_SPHT(*ptr))) {
Willy Tarreauea1175a2012-03-05 15:52:30 +01001156 msg->sl.st.c_l = ptr - msg_start - msg->sl.st.c;
Willy Tarreau8973c702007-01-21 23:58:29 +01001157 EAT_AND_JUMP_OR_RETURN(http_msg_rpcode_sp, HTTP_MSG_RPCODE_SP);
1158 }
1159
1160 /* so it's a CR/LF, so there is no reason phrase */
Willy Tarreauea1175a2012-03-05 15:52:30 +01001161 msg->sl.st.c_l = ptr - msg_start - msg->sl.st.c;
Willy Tarreau8973c702007-01-21 23:58:29 +01001162 http_msg_rsp_reason:
1163 /* FIXME: should we support HTTP responses without any reason phrase ? */
Willy Tarreauea1175a2012-03-05 15:52:30 +01001164 msg->sl.st.r = ptr - msg_start;
Willy Tarreau8973c702007-01-21 23:58:29 +01001165 msg->sl.st.r_l = 0;
1166 goto http_msg_rpline_eol;
1167
Willy Tarreau8973c702007-01-21 23:58:29 +01001168 case HTTP_MSG_RPCODE_SP:
Willy Tarreaue3f284a2010-09-28 19:42:42 +02001169 http_msg_rpcode_sp:
Willy Tarreau8973c702007-01-21 23:58:29 +01001170 if (likely(!HTTP_IS_LWS(*ptr))) {
Willy Tarreauea1175a2012-03-05 15:52:30 +01001171 msg->sl.st.r = ptr - msg_start;
Willy Tarreau8973c702007-01-21 23:58:29 +01001172 goto http_msg_rpreason;
1173 }
1174 if (likely(HTTP_IS_SPHT(*ptr)))
1175 EAT_AND_JUMP_OR_RETURN(http_msg_rpcode_sp, HTTP_MSG_RPCODE_SP);
1176 /* so it's a CR/LF, so there is no reason phrase */
1177 goto http_msg_rsp_reason;
1178
Willy Tarreau8973c702007-01-21 23:58:29 +01001179 case HTTP_MSG_RPREASON:
Willy Tarreaue3f284a2010-09-28 19:42:42 +02001180 http_msg_rpreason:
Willy Tarreau8973c702007-01-21 23:58:29 +01001181 if (likely(!HTTP_IS_CRLF(*ptr)))
1182 EAT_AND_JUMP_OR_RETURN(http_msg_rpreason, HTTP_MSG_RPREASON);
Willy Tarreauea1175a2012-03-05 15:52:30 +01001183 msg->sl.st.r_l = ptr - msg_start - msg->sl.st.r;
Willy Tarreau8973c702007-01-21 23:58:29 +01001184 http_msg_rpline_eol:
1185 /* We have seen the end of line. Note that we do not
1186 * necessarily have the \n yet, but at least we know that we
1187 * have EITHER \r OR \n, otherwise the response would not be
1188 * complete. We can then record the response length and return
1189 * to the caller which will be able to register it.
1190 */
Willy Tarreau3a215be2012-03-09 21:39:51 +01001191 msg->sl.st.l = ptr - msg_start - msg->sol;
Willy Tarreau8973c702007-01-21 23:58:29 +01001192 return ptr;
1193
Willy Tarreau8973c702007-01-21 23:58:29 +01001194 default:
Willy Tarreau3770f232013-12-07 00:01:53 +01001195#ifdef DEBUG_FULL
Willy Tarreau8973c702007-01-21 23:58:29 +01001196 fprintf(stderr, "FIXME !!!! impossible state at %s:%d = %d\n", __FILE__, __LINE__, state);
1197 exit(1);
1198#endif
Willy Tarreau3770f232013-12-07 00:01:53 +01001199 ;
Willy Tarreau8973c702007-01-21 23:58:29 +01001200 }
1201
1202 http_msg_ood:
Willy Tarreau7552c032009-03-01 11:10:40 +01001203 /* out of valid data */
Willy Tarreau8973c702007-01-21 23:58:29 +01001204 if (ret_state)
1205 *ret_state = state;
1206 if (ret_ptr)
Willy Tarreaua458b672012-03-05 11:17:50 +01001207 *ret_ptr = ptr - msg_start;
Willy Tarreau8973c702007-01-21 23:58:29 +01001208 return NULL;
Willy Tarreau8973c702007-01-21 23:58:29 +01001209}
1210
Willy Tarreau8973c702007-01-21 23:58:29 +01001211/*
Willy Tarreau8d5d7f22007-01-21 19:16:41 +01001212 * This function parses a request line between <ptr> and <end>, starting with
1213 * parser state <state>. Only states HTTP_MSG_RQMETH, HTTP_MSG_RQMETH_SP,
1214 * HTTP_MSG_RQURI, HTTP_MSG_RQURI_SP and HTTP_MSG_RQVER are handled. Others
1215 * will give undefined results.
1216 * Note that it is upon the caller's responsibility to ensure that ptr < end,
1217 * and that msg->sol points to the beginning of the request.
1218 * If a complete line is found (which implies that at least one CR or LF is
1219 * found before <end>, the updated <ptr> is returned, otherwise NULL is
1220 * returned indicating an incomplete line (which does not mean that parts have
1221 * not been updated). In the incomplete case, if <ret_ptr> or <ret_state> are
1222 * non-NULL, they are fed with the new <ptr> and <state> values to be passed
1223 * upon next call.
1224 *
Willy Tarreau9cdde232007-05-02 20:58:19 +02001225 * This function was intentionally designed to be called from
Willy Tarreau8d5d7f22007-01-21 19:16:41 +01001226 * http_msg_analyzer() with the lowest overhead. It should integrate perfectly
1227 * within its state machine and use the same macros, hence the need for same
Willy Tarreau9cdde232007-05-02 20:58:19 +02001228 * labels and variable names. Note that msg->sol is left unchanged.
Willy Tarreaubaaee002006-06-26 02:48:02 +02001229 */
Willy Tarreau69d8c5d2012-05-08 09:44:41 +02001230const char *http_parse_reqline(struct http_msg *msg,
Willy Tarreau3770f232013-12-07 00:01:53 +01001231 enum ht_state state, const char *ptr, const char *end,
1232 unsigned int *ret_ptr, enum ht_state *ret_state)
Willy Tarreaubaaee002006-06-26 02:48:02 +02001233{
Willy Tarreau9b28e032012-10-12 23:49:43 +02001234 const char *msg_start = msg->chn->buf->p;
Willy Tarreau62f791e2012-03-09 11:32:30 +01001235
Willy Tarreau8d5d7f22007-01-21 19:16:41 +01001236 switch (state) {
Willy Tarreau8d5d7f22007-01-21 19:16:41 +01001237 case HTTP_MSG_RQMETH:
Willy Tarreaue3f284a2010-09-28 19:42:42 +02001238 http_msg_rqmeth:
Willy Tarreau8d5d7f22007-01-21 19:16:41 +01001239 if (likely(HTTP_IS_TOKEN(*ptr)))
1240 EAT_AND_JUMP_OR_RETURN(http_msg_rqmeth, HTTP_MSG_RQMETH);
Willy Tarreau58f10d72006-12-04 02:26:12 +01001241
Willy Tarreau8d5d7f22007-01-21 19:16:41 +01001242 if (likely(HTTP_IS_SPHT(*ptr))) {
Willy Tarreauea1175a2012-03-05 15:52:30 +01001243 msg->sl.rq.m_l = ptr - msg_start;
Willy Tarreau8d5d7f22007-01-21 19:16:41 +01001244 EAT_AND_JUMP_OR_RETURN(http_msg_rqmeth_sp, HTTP_MSG_RQMETH_SP);
1245 }
Willy Tarreau58f10d72006-12-04 02:26:12 +01001246
Willy Tarreau8d5d7f22007-01-21 19:16:41 +01001247 if (likely(HTTP_IS_CRLF(*ptr))) {
1248 /* HTTP 0.9 request */
Willy Tarreauea1175a2012-03-05 15:52:30 +01001249 msg->sl.rq.m_l = ptr - msg_start;
Willy Tarreau8d5d7f22007-01-21 19:16:41 +01001250 http_msg_req09_uri:
Willy Tarreauea1175a2012-03-05 15:52:30 +01001251 msg->sl.rq.u = ptr - msg_start;
Willy Tarreau8d5d7f22007-01-21 19:16:41 +01001252 http_msg_req09_uri_e:
Willy Tarreauea1175a2012-03-05 15:52:30 +01001253 msg->sl.rq.u_l = ptr - msg_start - msg->sl.rq.u;
Willy Tarreau8d5d7f22007-01-21 19:16:41 +01001254 http_msg_req09_ver:
Willy Tarreauea1175a2012-03-05 15:52:30 +01001255 msg->sl.rq.v = ptr - msg_start;
Willy Tarreau8d5d7f22007-01-21 19:16:41 +01001256 msg->sl.rq.v_l = 0;
1257 goto http_msg_rqline_eol;
1258 }
Willy Tarreau7552c032009-03-01 11:10:40 +01001259 state = HTTP_MSG_ERROR;
1260 break;
1261
Willy Tarreau8d5d7f22007-01-21 19:16:41 +01001262 case HTTP_MSG_RQMETH_SP:
Willy Tarreaue3f284a2010-09-28 19:42:42 +02001263 http_msg_rqmeth_sp:
Willy Tarreau8d5d7f22007-01-21 19:16:41 +01001264 if (likely(!HTTP_IS_LWS(*ptr))) {
Willy Tarreauea1175a2012-03-05 15:52:30 +01001265 msg->sl.rq.u = ptr - msg_start;
Willy Tarreau8d5d7f22007-01-21 19:16:41 +01001266 goto http_msg_rquri;
1267 }
1268 if (likely(HTTP_IS_SPHT(*ptr)))
1269 EAT_AND_JUMP_OR_RETURN(http_msg_rqmeth_sp, HTTP_MSG_RQMETH_SP);
1270 /* so it's a CR/LF, meaning an HTTP 0.9 request */
1271 goto http_msg_req09_uri;
Willy Tarreau58f10d72006-12-04 02:26:12 +01001272
Willy Tarreau8d5d7f22007-01-21 19:16:41 +01001273 case HTTP_MSG_RQURI:
Willy Tarreaue3f284a2010-09-28 19:42:42 +02001274 http_msg_rquri:
Willy Tarreau2e9506d2012-01-07 23:22:31 +01001275 if (likely((unsigned char)(*ptr - 33) <= 93)) /* 33 to 126 included */
Willy Tarreau8d5d7f22007-01-21 19:16:41 +01001276 EAT_AND_JUMP_OR_RETURN(http_msg_rquri, HTTP_MSG_RQURI);
Willy Tarreau58f10d72006-12-04 02:26:12 +01001277
Willy Tarreau8d5d7f22007-01-21 19:16:41 +01001278 if (likely(HTTP_IS_SPHT(*ptr))) {
Willy Tarreauea1175a2012-03-05 15:52:30 +01001279 msg->sl.rq.u_l = ptr - msg_start - msg->sl.rq.u;
Willy Tarreau8d5d7f22007-01-21 19:16:41 +01001280 EAT_AND_JUMP_OR_RETURN(http_msg_rquri_sp, HTTP_MSG_RQURI_SP);
1281 }
Willy Tarreau58f10d72006-12-04 02:26:12 +01001282
Willy Tarreau2e9506d2012-01-07 23:22:31 +01001283 if (likely((unsigned char)*ptr >= 128)) {
Willy Tarreau422246e2012-01-07 23:54:13 +01001284 /* non-ASCII chars are forbidden unless option
1285 * accept-invalid-http-request is enabled in the frontend.
1286 * In any case, we capture the faulty char.
Willy Tarreau2e9506d2012-01-07 23:22:31 +01001287 */
Willy Tarreau422246e2012-01-07 23:54:13 +01001288 if (msg->err_pos < -1)
1289 goto invalid_char;
1290 if (msg->err_pos == -1)
Willy Tarreau69d8c5d2012-05-08 09:44:41 +02001291 msg->err_pos = ptr - msg_start;
Willy Tarreau2e9506d2012-01-07 23:22:31 +01001292 EAT_AND_JUMP_OR_RETURN(http_msg_rquri, HTTP_MSG_RQURI);
1293 }
1294
1295 if (likely(HTTP_IS_CRLF(*ptr))) {
1296 /* so it's a CR/LF, meaning an HTTP 0.9 request */
1297 goto http_msg_req09_uri_e;
1298 }
1299
1300 /* OK forbidden chars, 0..31 or 127 */
Willy Tarreau422246e2012-01-07 23:54:13 +01001301 invalid_char:
Willy Tarreau69d8c5d2012-05-08 09:44:41 +02001302 msg->err_pos = ptr - msg_start;
Willy Tarreau2e9506d2012-01-07 23:22:31 +01001303 state = HTTP_MSG_ERROR;
1304 break;
Willy Tarreau58f10d72006-12-04 02:26:12 +01001305
Willy Tarreau8d5d7f22007-01-21 19:16:41 +01001306 case HTTP_MSG_RQURI_SP:
Willy Tarreaue3f284a2010-09-28 19:42:42 +02001307 http_msg_rquri_sp:
Willy Tarreau8d5d7f22007-01-21 19:16:41 +01001308 if (likely(!HTTP_IS_LWS(*ptr))) {
Willy Tarreauea1175a2012-03-05 15:52:30 +01001309 msg->sl.rq.v = ptr - msg_start;
Willy Tarreau8d5d7f22007-01-21 19:16:41 +01001310 goto http_msg_rqver;
1311 }
1312 if (likely(HTTP_IS_SPHT(*ptr)))
1313 EAT_AND_JUMP_OR_RETURN(http_msg_rquri_sp, HTTP_MSG_RQURI_SP);
1314 /* so it's a CR/LF, meaning an HTTP 0.9 request */
1315 goto http_msg_req09_ver;
Willy Tarreau58f10d72006-12-04 02:26:12 +01001316
Willy Tarreau8d5d7f22007-01-21 19:16:41 +01001317 case HTTP_MSG_RQVER:
Willy Tarreaue3f284a2010-09-28 19:42:42 +02001318 http_msg_rqver:
Willy Tarreau4b89ad42007-03-04 18:13:58 +01001319 if (likely(HTTP_IS_VER_TOKEN(*ptr)))
Willy Tarreau8d5d7f22007-01-21 19:16:41 +01001320 EAT_AND_JUMP_OR_RETURN(http_msg_rqver, HTTP_MSG_RQVER);
Willy Tarreau4b89ad42007-03-04 18:13:58 +01001321
1322 if (likely(HTTP_IS_CRLF(*ptr))) {
Willy Tarreauea1175a2012-03-05 15:52:30 +01001323 msg->sl.rq.v_l = ptr - msg_start - msg->sl.rq.v;
Willy Tarreau4b89ad42007-03-04 18:13:58 +01001324 http_msg_rqline_eol:
1325 /* We have seen the end of line. Note that we do not
1326 * necessarily have the \n yet, but at least we know that we
1327 * have EITHER \r OR \n, otherwise the request would not be
1328 * complete. We can then record the request length and return
1329 * to the caller which will be able to register it.
1330 */
Willy Tarreau3a215be2012-03-09 21:39:51 +01001331 msg->sl.rq.l = ptr - msg_start - msg->sol;
Willy Tarreau4b89ad42007-03-04 18:13:58 +01001332 return ptr;
1333 }
1334
1335 /* neither an HTTP_VER token nor a CRLF */
Willy Tarreau7552c032009-03-01 11:10:40 +01001336 state = HTTP_MSG_ERROR;
1337 break;
Willy Tarreau58f10d72006-12-04 02:26:12 +01001338
Willy Tarreau8d5d7f22007-01-21 19:16:41 +01001339 default:
Willy Tarreau3770f232013-12-07 00:01:53 +01001340#ifdef DEBUG_FULL
Willy Tarreau8d5d7f22007-01-21 19:16:41 +01001341 fprintf(stderr, "FIXME !!!! impossible state at %s:%d = %d\n", __FILE__, __LINE__, state);
1342 exit(1);
1343#endif
Willy Tarreau3770f232013-12-07 00:01:53 +01001344 ;
Willy Tarreau8d5d7f22007-01-21 19:16:41 +01001345 }
Willy Tarreau58f10d72006-12-04 02:26:12 +01001346
Willy Tarreau8d5d7f22007-01-21 19:16:41 +01001347 http_msg_ood:
Willy Tarreau7552c032009-03-01 11:10:40 +01001348 /* out of valid data */
Willy Tarreau8d5d7f22007-01-21 19:16:41 +01001349 if (ret_state)
1350 *ret_state = state;
1351 if (ret_ptr)
Willy Tarreaua458b672012-03-05 11:17:50 +01001352 *ret_ptr = ptr - msg_start;
Willy Tarreau8d5d7f22007-01-21 19:16:41 +01001353 return NULL;
Willy Tarreau8d5d7f22007-01-21 19:16:41 +01001354}
Willy Tarreau58f10d72006-12-04 02:26:12 +01001355
Krzysztof Piotr Oledzkif9423ae2010-01-29 19:26:18 +01001356/*
1357 * Returns the data from Authorization header. Function may be called more
1358 * than once so data is stored in txn->auth_data. When no header is found
1359 * or auth method is unknown auth_method is set to HTTP_AUTH_WRONG to avoid
Thierry FOURNIER98d96952014-01-23 12:13:02 +01001360 * searching again for something we are unable to find anyway. However, if
1361 * the result if valid, the cache is not reused because we would risk to
1362 * have the credentials overwritten by another session in parallel.
Krzysztof Piotr Oledzkif9423ae2010-01-29 19:26:18 +01001363 */
1364
Thierry FOURNIER9eec0a62014-01-22 18:38:02 +01001365/* This bufffer is initialized in the file 'src/haproxy.c'. This length is
1366 * set according to global.tune.bufsize.
1367 */
Willy Tarreau7e2c6472012-10-29 20:44:36 +01001368char *get_http_auth_buff;
Krzysztof Piotr Oledzkif9423ae2010-01-29 19:26:18 +01001369
1370int
1371get_http_auth(struct session *s)
1372{
1373
1374 struct http_txn *txn = &s->txn;
1375 struct chunk auth_method;
1376 struct hdr_ctx ctx;
1377 char *h, *p;
1378 int len;
1379
1380#ifdef DEBUG_AUTH
1381 printf("Auth for session %p: %d\n", s, txn->auth.method);
1382#endif
1383
1384 if (txn->auth.method == HTTP_AUTH_WRONG)
1385 return 0;
1386
Krzysztof Piotr Oledzkif9423ae2010-01-29 19:26:18 +01001387 txn->auth.method = HTTP_AUTH_WRONG;
1388
1389 ctx.idx = 0;
Willy Tarreau844a7e72010-01-31 21:46:18 +01001390
1391 if (txn->flags & TX_USE_PX_CONN) {
1392 h = "Proxy-Authorization";
1393 len = strlen(h);
1394 } else {
1395 h = "Authorization";
1396 len = strlen(h);
1397 }
1398
Willy Tarreau9b28e032012-10-12 23:49:43 +02001399 if (!http_find_header2(h, len, s->req->buf->p, &txn->hdr_idx, &ctx))
Krzysztof Piotr Oledzkif9423ae2010-01-29 19:26:18 +01001400 return 0;
1401
1402 h = ctx.line + ctx.val;
1403
1404 p = memchr(h, ' ', ctx.vlen);
1405 if (!p || p == h)
1406 return 0;
1407
1408 chunk_initlen(&auth_method, h, 0, p-h);
1409 chunk_initlen(&txn->auth.method_data, p+1, 0, ctx.vlen-(p-h)-1);
1410
1411 if (!strncasecmp("Basic", auth_method.str, auth_method.len)) {
1412
1413 len = base64dec(txn->auth.method_data.str, txn->auth.method_data.len,
Willy Tarreau7e2c6472012-10-29 20:44:36 +01001414 get_http_auth_buff, global.tune.bufsize - 1);
Krzysztof Piotr Oledzkif9423ae2010-01-29 19:26:18 +01001415
1416 if (len < 0)
1417 return 0;
1418
1419
1420 get_http_auth_buff[len] = '\0';
1421
1422 p = strchr(get_http_auth_buff, ':');
1423
1424 if (!p)
1425 return 0;
1426
1427 txn->auth.user = get_http_auth_buff;
1428 *p = '\0';
1429 txn->auth.pass = p+1;
1430
1431 txn->auth.method = HTTP_AUTH_BASIC;
1432 return 1;
1433 }
1434
1435 return 0;
1436}
1437
Willy Tarreau58f10d72006-12-04 02:26:12 +01001438
Willy Tarreau8973c702007-01-21 23:58:29 +01001439/*
1440 * This function parses an HTTP message, either a request or a response,
Willy Tarreau8b1323e2012-03-09 14:46:19 +01001441 * depending on the initial msg->msg_state. The caller is responsible for
1442 * ensuring that the message does not wrap. The function can be preempted
1443 * everywhere when data are missing and recalled at the exact same location
1444 * with no information loss. The message may even be realigned between two
1445 * calls. The header index is re-initialized when switching from
Willy Tarreau9cdde232007-05-02 20:58:19 +02001446 * MSG_R[PQ]BEFORE to MSG_RPVER|MSG_RQMETH. It modifies msg->sol among other
Willy Tarreau26927362012-05-18 23:22:52 +02001447 * fields. Note that msg->sol will be initialized after completing the first
1448 * state, so that none of the msg pointers has to be initialized prior to the
1449 * first call.
Willy Tarreau8973c702007-01-21 23:58:29 +01001450 */
Willy Tarreaua560c212012-03-09 13:50:57 +01001451void http_msg_analyzer(struct http_msg *msg, struct hdr_idx *idx)
Willy Tarreau8d5d7f22007-01-21 19:16:41 +01001452{
Willy Tarreau3770f232013-12-07 00:01:53 +01001453 enum ht_state state; /* updated only when leaving the FSM */
Willy Tarreau8d5d7f22007-01-21 19:16:41 +01001454 register char *ptr, *end; /* request pointers, to avoid dereferences */
Willy Tarreaucdbdd522012-10-12 22:51:15 +02001455 struct buffer *buf;
Willy Tarreau58f10d72006-12-04 02:26:12 +01001456
Willy Tarreaub326fcc2007-03-03 13:54:32 +01001457 state = msg->msg_state;
Willy Tarreau9b28e032012-10-12 23:49:43 +02001458 buf = msg->chn->buf;
Willy Tarreaucdbdd522012-10-12 22:51:15 +02001459 ptr = buf->p + msg->next;
1460 end = buf->p + buf->i;
Willy Tarreau58f10d72006-12-04 02:26:12 +01001461
Willy Tarreau8d5d7f22007-01-21 19:16:41 +01001462 if (unlikely(ptr >= end))
1463 goto http_msg_ood;
Willy Tarreau58f10d72006-12-04 02:26:12 +01001464
Willy Tarreau8d5d7f22007-01-21 19:16:41 +01001465 switch (state) {
Willy Tarreau8973c702007-01-21 23:58:29 +01001466 /*
1467 * First, states that are specific to the response only.
1468 * We check them first so that request and headers are
1469 * closer to each other (accessed more often).
1470 */
Willy Tarreau8973c702007-01-21 23:58:29 +01001471 case HTTP_MSG_RPBEFORE:
Willy Tarreaue3f284a2010-09-28 19:42:42 +02001472 http_msg_rpbefore:
Willy Tarreau8973c702007-01-21 23:58:29 +01001473 if (likely(HTTP_IS_TOKEN(*ptr))) {
Willy Tarreau15de77e2010-01-02 21:59:16 +01001474 /* we have a start of message, but we have to check
1475 * first if we need to remove some CRLF. We can only
Willy Tarreau2e046c62012-03-01 16:08:30 +01001476 * do this when o=0.
Willy Tarreau15de77e2010-01-02 21:59:16 +01001477 */
Willy Tarreaucdbdd522012-10-12 22:51:15 +02001478 if (unlikely(ptr != buf->p)) {
1479 if (buf->o)
Willy Tarreau15de77e2010-01-02 21:59:16 +01001480 goto http_msg_ood;
Willy Tarreau1d3bcce2009-12-27 15:50:06 +01001481 /* Remove empty leading lines, as recommended by RFC2616. */
Willy Tarreaucdbdd522012-10-12 22:51:15 +02001482 bi_fast_delete(buf, ptr - buf->p);
Willy Tarreau8973c702007-01-21 23:58:29 +01001483 }
Willy Tarreau26927362012-05-18 23:22:52 +02001484 msg->sol = 0;
Willy Tarreaue92693a2012-09-24 21:13:39 +02001485 msg->sl.st.l = 0; /* used in debug mode */
Willy Tarreau8973c702007-01-21 23:58:29 +01001486 hdr_idx_init(idx);
1487 state = HTTP_MSG_RPVER;
1488 goto http_msg_rpver;
1489 }
1490
1491 if (unlikely(!HTTP_IS_CRLF(*ptr)))
1492 goto http_msg_invalid;
1493
1494 if (unlikely(*ptr == '\n'))
1495 EAT_AND_JUMP_OR_RETURN(http_msg_rpbefore, HTTP_MSG_RPBEFORE);
1496 EAT_AND_JUMP_OR_RETURN(http_msg_rpbefore_cr, HTTP_MSG_RPBEFORE_CR);
1497 /* stop here */
1498
Willy Tarreau8973c702007-01-21 23:58:29 +01001499 case HTTP_MSG_RPBEFORE_CR:
Willy Tarreaue3f284a2010-09-28 19:42:42 +02001500 http_msg_rpbefore_cr:
Willy Tarreau8973c702007-01-21 23:58:29 +01001501 EXPECT_LF_HERE(ptr, http_msg_invalid);
1502 EAT_AND_JUMP_OR_RETURN(http_msg_rpbefore, HTTP_MSG_RPBEFORE);
1503 /* stop here */
1504
Willy Tarreau8973c702007-01-21 23:58:29 +01001505 case HTTP_MSG_RPVER:
Willy Tarreaue3f284a2010-09-28 19:42:42 +02001506 http_msg_rpver:
Willy Tarreau8973c702007-01-21 23:58:29 +01001507 case HTTP_MSG_RPVER_SP:
1508 case HTTP_MSG_RPCODE:
1509 case HTTP_MSG_RPCODE_SP:
1510 case HTTP_MSG_RPREASON:
Willy Tarreau69d8c5d2012-05-08 09:44:41 +02001511 ptr = (char *)http_parse_stsline(msg,
Willy Tarreaua458b672012-03-05 11:17:50 +01001512 state, ptr, end,
1513 &msg->next, &msg->msg_state);
Willy Tarreau8973c702007-01-21 23:58:29 +01001514 if (unlikely(!ptr))
1515 return;
1516
1517 /* we have a full response and we know that we have either a CR
1518 * or an LF at <ptr>.
1519 */
Willy Tarreau8973c702007-01-21 23:58:29 +01001520 hdr_idx_set_start(idx, msg->sl.st.l, *ptr == '\r');
1521
Willy Tarreaucdbdd522012-10-12 22:51:15 +02001522 msg->sol = ptr - buf->p;
Willy Tarreau8973c702007-01-21 23:58:29 +01001523 if (likely(*ptr == '\r'))
1524 EAT_AND_JUMP_OR_RETURN(http_msg_rpline_end, HTTP_MSG_RPLINE_END);
1525 goto http_msg_rpline_end;
1526
Willy Tarreau8973c702007-01-21 23:58:29 +01001527 case HTTP_MSG_RPLINE_END:
Willy Tarreaue3f284a2010-09-28 19:42:42 +02001528 http_msg_rpline_end:
Willy Tarreau8973c702007-01-21 23:58:29 +01001529 /* msg->sol must point to the first of CR or LF. */
1530 EXPECT_LF_HERE(ptr, http_msg_invalid);
1531 EAT_AND_JUMP_OR_RETURN(http_msg_hdr_first, HTTP_MSG_HDR_FIRST);
1532 /* stop here */
1533
1534 /*
1535 * Second, states that are specific to the request only
1536 */
Willy Tarreau8d5d7f22007-01-21 19:16:41 +01001537 case HTTP_MSG_RQBEFORE:
Willy Tarreaue3f284a2010-09-28 19:42:42 +02001538 http_msg_rqbefore:
Willy Tarreau8d5d7f22007-01-21 19:16:41 +01001539 if (likely(HTTP_IS_TOKEN(*ptr))) {
Willy Tarreau15de77e2010-01-02 21:59:16 +01001540 /* we have a start of message, but we have to check
1541 * first if we need to remove some CRLF. We can only
Willy Tarreau2e046c62012-03-01 16:08:30 +01001542 * do this when o=0.
Willy Tarreau15de77e2010-01-02 21:59:16 +01001543 */
Willy Tarreaucdbdd522012-10-12 22:51:15 +02001544 if (likely(ptr != buf->p)) {
1545 if (buf->o)
Willy Tarreau15de77e2010-01-02 21:59:16 +01001546 goto http_msg_ood;
Willy Tarreau1d3bcce2009-12-27 15:50:06 +01001547 /* Remove empty leading lines, as recommended by RFC2616. */
Willy Tarreaucdbdd522012-10-12 22:51:15 +02001548 bi_fast_delete(buf, ptr - buf->p);
Willy Tarreau8d5d7f22007-01-21 19:16:41 +01001549 }
Willy Tarreau26927362012-05-18 23:22:52 +02001550 msg->sol = 0;
Willy Tarreaue92693a2012-09-24 21:13:39 +02001551 msg->sl.rq.l = 0; /* used in debug mode */
Willy Tarreau8973c702007-01-21 23:58:29 +01001552 state = HTTP_MSG_RQMETH;
1553 goto http_msg_rqmeth;
Willy Tarreau8d5d7f22007-01-21 19:16:41 +01001554 }
Willy Tarreau976f1ee2006-12-17 10:06:03 +01001555
Willy Tarreau8d5d7f22007-01-21 19:16:41 +01001556 if (unlikely(!HTTP_IS_CRLF(*ptr)))
1557 goto http_msg_invalid;
Willy Tarreau976f1ee2006-12-17 10:06:03 +01001558
Willy Tarreau8d5d7f22007-01-21 19:16:41 +01001559 if (unlikely(*ptr == '\n'))
1560 EAT_AND_JUMP_OR_RETURN(http_msg_rqbefore, HTTP_MSG_RQBEFORE);
1561 EAT_AND_JUMP_OR_RETURN(http_msg_rqbefore_cr, HTTP_MSG_RQBEFORE_CR);
Willy Tarreau8973c702007-01-21 23:58:29 +01001562 /* stop here */
Willy Tarreau976f1ee2006-12-17 10:06:03 +01001563
Willy Tarreau8d5d7f22007-01-21 19:16:41 +01001564 case HTTP_MSG_RQBEFORE_CR:
Willy Tarreaue3f284a2010-09-28 19:42:42 +02001565 http_msg_rqbefore_cr:
Willy Tarreau8d5d7f22007-01-21 19:16:41 +01001566 EXPECT_LF_HERE(ptr, http_msg_invalid);
1567 EAT_AND_JUMP_OR_RETURN(http_msg_rqbefore, HTTP_MSG_RQBEFORE);
Willy Tarreau8973c702007-01-21 23:58:29 +01001568 /* stop here */
Willy Tarreau976f1ee2006-12-17 10:06:03 +01001569
Willy Tarreau8d5d7f22007-01-21 19:16:41 +01001570 case HTTP_MSG_RQMETH:
Willy Tarreaue3f284a2010-09-28 19:42:42 +02001571 http_msg_rqmeth:
Willy Tarreau8d5d7f22007-01-21 19:16:41 +01001572 case HTTP_MSG_RQMETH_SP:
1573 case HTTP_MSG_RQURI:
1574 case HTTP_MSG_RQURI_SP:
1575 case HTTP_MSG_RQVER:
Willy Tarreau69d8c5d2012-05-08 09:44:41 +02001576 ptr = (char *)http_parse_reqline(msg,
Willy Tarreaua458b672012-03-05 11:17:50 +01001577 state, ptr, end,
1578 &msg->next, &msg->msg_state);
Willy Tarreau8d5d7f22007-01-21 19:16:41 +01001579 if (unlikely(!ptr))
1580 return;
Willy Tarreau976f1ee2006-12-17 10:06:03 +01001581
Willy Tarreau8d5d7f22007-01-21 19:16:41 +01001582 /* we have a full request and we know that we have either a CR
1583 * or an LF at <ptr>.
1584 */
Willy Tarreau8d5d7f22007-01-21 19:16:41 +01001585 hdr_idx_set_start(idx, msg->sl.rq.l, *ptr == '\r');
Willy Tarreau976f1ee2006-12-17 10:06:03 +01001586
Willy Tarreaucdbdd522012-10-12 22:51:15 +02001587 msg->sol = ptr - buf->p;
Willy Tarreau8d5d7f22007-01-21 19:16:41 +01001588 if (likely(*ptr == '\r'))
1589 EAT_AND_JUMP_OR_RETURN(http_msg_rqline_end, HTTP_MSG_RQLINE_END);
Willy Tarreau8d5d7f22007-01-21 19:16:41 +01001590 goto http_msg_rqline_end;
Willy Tarreau976f1ee2006-12-17 10:06:03 +01001591
Willy Tarreau8d5d7f22007-01-21 19:16:41 +01001592 case HTTP_MSG_RQLINE_END:
Willy Tarreaue3f284a2010-09-28 19:42:42 +02001593 http_msg_rqline_end:
Willy Tarreau8d5d7f22007-01-21 19:16:41 +01001594 /* check for HTTP/0.9 request : no version information available.
1595 * msg->sol must point to the first of CR or LF.
1596 */
1597 if (unlikely(msg->sl.rq.v_l == 0))
1598 goto http_msg_last_lf;
Willy Tarreau976f1ee2006-12-17 10:06:03 +01001599
Willy Tarreau8d5d7f22007-01-21 19:16:41 +01001600 EXPECT_LF_HERE(ptr, http_msg_invalid);
1601 EAT_AND_JUMP_OR_RETURN(http_msg_hdr_first, HTTP_MSG_HDR_FIRST);
Willy Tarreau8973c702007-01-21 23:58:29 +01001602 /* stop here */
Willy Tarreau976f1ee2006-12-17 10:06:03 +01001603
Willy Tarreau8973c702007-01-21 23:58:29 +01001604 /*
1605 * Common states below
1606 */
Willy Tarreau8d5d7f22007-01-21 19:16:41 +01001607 case HTTP_MSG_HDR_FIRST:
Willy Tarreaue3f284a2010-09-28 19:42:42 +02001608 http_msg_hdr_first:
Willy Tarreaucdbdd522012-10-12 22:51:15 +02001609 msg->sol = ptr - buf->p;
Willy Tarreau8d5d7f22007-01-21 19:16:41 +01001610 if (likely(!HTTP_IS_CRLF(*ptr))) {
1611 goto http_msg_hdr_name;
1612 }
1613
1614 if (likely(*ptr == '\r'))
1615 EAT_AND_JUMP_OR_RETURN(http_msg_last_lf, HTTP_MSG_LAST_LF);
1616 goto http_msg_last_lf;
Willy Tarreau976f1ee2006-12-17 10:06:03 +01001617
Willy Tarreau8d5d7f22007-01-21 19:16:41 +01001618 case HTTP_MSG_HDR_NAME:
Willy Tarreaue3f284a2010-09-28 19:42:42 +02001619 http_msg_hdr_name:
Willy Tarreau8d5d7f22007-01-21 19:16:41 +01001620 /* assumes msg->sol points to the first char */
1621 if (likely(HTTP_IS_TOKEN(*ptr)))
1622 EAT_AND_JUMP_OR_RETURN(http_msg_hdr_name, HTTP_MSG_HDR_NAME);
Willy Tarreau58f10d72006-12-04 02:26:12 +01001623
Willy Tarreaufa4a03c2012-03-09 21:28:54 +01001624 if (likely(*ptr == ':'))
Willy Tarreau8d5d7f22007-01-21 19:16:41 +01001625 EAT_AND_JUMP_OR_RETURN(http_msg_hdr_l1_sp, HTTP_MSG_HDR_L1_SP);
Willy Tarreau58f10d72006-12-04 02:26:12 +01001626
Willy Tarreau32a4ec02009-04-02 11:35:18 +02001627 if (likely(msg->err_pos < -1) || *ptr == '\n')
1628 goto http_msg_invalid;
1629
1630 if (msg->err_pos == -1) /* capture error pointer */
Willy Tarreaucdbdd522012-10-12 22:51:15 +02001631 msg->err_pos = ptr - buf->p; /* >= 0 now */
Willy Tarreau32a4ec02009-04-02 11:35:18 +02001632
1633 /* and we still accept this non-token character */
1634 EAT_AND_JUMP_OR_RETURN(http_msg_hdr_name, HTTP_MSG_HDR_NAME);
Willy Tarreau230fd0b2006-12-17 12:05:00 +01001635
Willy Tarreau8d5d7f22007-01-21 19:16:41 +01001636 case HTTP_MSG_HDR_L1_SP:
Willy Tarreaue3f284a2010-09-28 19:42:42 +02001637 http_msg_hdr_l1_sp:
Willy Tarreaufa4a03c2012-03-09 21:28:54 +01001638 /* assumes msg->sol points to the first char */
Willy Tarreau8d5d7f22007-01-21 19:16:41 +01001639 if (likely(HTTP_IS_SPHT(*ptr)))
1640 EAT_AND_JUMP_OR_RETURN(http_msg_hdr_l1_sp, HTTP_MSG_HDR_L1_SP);
Willy Tarreau230fd0b2006-12-17 12:05:00 +01001641
Willy Tarreau8d5d7f22007-01-21 19:16:41 +01001642 /* header value can be basically anything except CR/LF */
Willy Tarreaucdbdd522012-10-12 22:51:15 +02001643 msg->sov = ptr - buf->p;
Willy Tarreau976f1ee2006-12-17 10:06:03 +01001644
Willy Tarreau8d5d7f22007-01-21 19:16:41 +01001645 if (likely(!HTTP_IS_CRLF(*ptr))) {
1646 goto http_msg_hdr_val;
1647 }
1648
1649 if (likely(*ptr == '\r'))
1650 EAT_AND_JUMP_OR_RETURN(http_msg_hdr_l1_lf, HTTP_MSG_HDR_L1_LF);
1651 goto http_msg_hdr_l1_lf;
Willy Tarreau976f1ee2006-12-17 10:06:03 +01001652
Willy Tarreau8d5d7f22007-01-21 19:16:41 +01001653 case HTTP_MSG_HDR_L1_LF:
Willy Tarreaue3f284a2010-09-28 19:42:42 +02001654 http_msg_hdr_l1_lf:
Willy Tarreau8d5d7f22007-01-21 19:16:41 +01001655 EXPECT_LF_HERE(ptr, http_msg_invalid);
1656 EAT_AND_JUMP_OR_RETURN(http_msg_hdr_l1_lws, HTTP_MSG_HDR_L1_LWS);
Willy Tarreau976f1ee2006-12-17 10:06:03 +01001657
Willy Tarreau8d5d7f22007-01-21 19:16:41 +01001658 case HTTP_MSG_HDR_L1_LWS:
Willy Tarreaue3f284a2010-09-28 19:42:42 +02001659 http_msg_hdr_l1_lws:
Willy Tarreau8d5d7f22007-01-21 19:16:41 +01001660 if (likely(HTTP_IS_SPHT(*ptr))) {
1661 /* replace HT,CR,LF with spaces */
Willy Tarreaucdbdd522012-10-12 22:51:15 +02001662 for (; buf->p + msg->sov < ptr; msg->sov++)
1663 buf->p[msg->sov] = ' ';
Willy Tarreau8d5d7f22007-01-21 19:16:41 +01001664 goto http_msg_hdr_l1_sp;
1665 }
Willy Tarreauaa9dce32007-03-18 23:50:16 +01001666 /* we had a header consisting only in spaces ! */
Willy Tarreau12e48b32012-03-05 16:57:34 +01001667 msg->eol = msg->sov;
Willy Tarreau8d5d7f22007-01-21 19:16:41 +01001668 goto http_msg_complete_header;
1669
Willy Tarreau8d5d7f22007-01-21 19:16:41 +01001670 case HTTP_MSG_HDR_VAL:
Willy Tarreaue3f284a2010-09-28 19:42:42 +02001671 http_msg_hdr_val:
Willy Tarreaufa4a03c2012-03-09 21:28:54 +01001672 /* assumes msg->sol points to the first char, and msg->sov
1673 * points to the first character of the value.
Willy Tarreau8d5d7f22007-01-21 19:16:41 +01001674 */
1675 if (likely(!HTTP_IS_CRLF(*ptr)))
1676 EAT_AND_JUMP_OR_RETURN(http_msg_hdr_val, HTTP_MSG_HDR_VAL);
Willy Tarreau976f1ee2006-12-17 10:06:03 +01001677
Willy Tarreaucdbdd522012-10-12 22:51:15 +02001678 msg->eol = ptr - buf->p;
Willy Tarreau8d5d7f22007-01-21 19:16:41 +01001679 /* Note: we could also copy eol into ->eoh so that we have the
1680 * real header end in case it ends with lots of LWS, but is this
1681 * really needed ?
1682 */
1683 if (likely(*ptr == '\r'))
1684 EAT_AND_JUMP_OR_RETURN(http_msg_hdr_l2_lf, HTTP_MSG_HDR_L2_LF);
1685 goto http_msg_hdr_l2_lf;
Willy Tarreau976f1ee2006-12-17 10:06:03 +01001686
Willy Tarreau8d5d7f22007-01-21 19:16:41 +01001687 case HTTP_MSG_HDR_L2_LF:
Willy Tarreaue3f284a2010-09-28 19:42:42 +02001688 http_msg_hdr_l2_lf:
Willy Tarreau8d5d7f22007-01-21 19:16:41 +01001689 EXPECT_LF_HERE(ptr, http_msg_invalid);
1690 EAT_AND_JUMP_OR_RETURN(http_msg_hdr_l2_lws, HTTP_MSG_HDR_L2_LWS);
Willy Tarreau976f1ee2006-12-17 10:06:03 +01001691
Willy Tarreau8d5d7f22007-01-21 19:16:41 +01001692 case HTTP_MSG_HDR_L2_LWS:
Willy Tarreaue3f284a2010-09-28 19:42:42 +02001693 http_msg_hdr_l2_lws:
Willy Tarreau8d5d7f22007-01-21 19:16:41 +01001694 if (unlikely(HTTP_IS_SPHT(*ptr))) {
1695 /* LWS: replace HT,CR,LF with spaces */
Willy Tarreaucdbdd522012-10-12 22:51:15 +02001696 for (; buf->p + msg->eol < ptr; msg->eol++)
1697 buf->p[msg->eol] = ' ';
Willy Tarreau8d5d7f22007-01-21 19:16:41 +01001698 goto http_msg_hdr_val;
1699 }
1700 http_msg_complete_header:
1701 /*
1702 * It was a new header, so the last one is finished.
Willy Tarreaufa4a03c2012-03-09 21:28:54 +01001703 * Assumes msg->sol points to the first char, msg->sov points
1704 * to the first character of the value and msg->eol to the
1705 * first CR or LF so we know how the line ends. We insert last
1706 * header into the index.
Willy Tarreau8d5d7f22007-01-21 19:16:41 +01001707 */
Willy Tarreaucdbdd522012-10-12 22:51:15 +02001708 if (unlikely(hdr_idx_add(msg->eol - msg->sol, buf->p[msg->eol] == '\r',
Willy Tarreau8d5d7f22007-01-21 19:16:41 +01001709 idx, idx->tail) < 0))
1710 goto http_msg_invalid;
Willy Tarreau230fd0b2006-12-17 12:05:00 +01001711
Willy Tarreaucdbdd522012-10-12 22:51:15 +02001712 msg->sol = ptr - buf->p;
Willy Tarreau8d5d7f22007-01-21 19:16:41 +01001713 if (likely(!HTTP_IS_CRLF(*ptr))) {
1714 goto http_msg_hdr_name;
1715 }
1716
1717 if (likely(*ptr == '\r'))
1718 EAT_AND_JUMP_OR_RETURN(http_msg_last_lf, HTTP_MSG_LAST_LF);
1719 goto http_msg_last_lf;
Willy Tarreau230fd0b2006-12-17 12:05:00 +01001720
Willy Tarreau8d5d7f22007-01-21 19:16:41 +01001721 case HTTP_MSG_LAST_LF:
Willy Tarreaue3f284a2010-09-28 19:42:42 +02001722 http_msg_last_lf:
Willy Tarreau0558a022014-03-13 15:48:45 +01001723 /* Assumes msg->sol points to the first of either CR or LF.
1724 * Sets ->sov and ->next to the total header length, ->eoh to
1725 * the last CRLF, and ->eol to the last CRLF length (1 or 2).
1726 */
Willy Tarreau8d5d7f22007-01-21 19:16:41 +01001727 EXPECT_LF_HERE(ptr, http_msg_invalid);
1728 ptr++;
Willy Tarreaucdbdd522012-10-12 22:51:15 +02001729 msg->sov = msg->next = ptr - buf->p;
Willy Tarreau3a215be2012-03-09 21:39:51 +01001730 msg->eoh = msg->sol;
1731 msg->sol = 0;
Willy Tarreau0558a022014-03-13 15:48:45 +01001732 msg->eol = msg->sov - msg->eoh;
Willy Tarreaub326fcc2007-03-03 13:54:32 +01001733 msg->msg_state = HTTP_MSG_BODY;
Willy Tarreau8d5d7f22007-01-21 19:16:41 +01001734 return;
Willy Tarreaub56928a2012-04-16 14:51:55 +02001735
1736 case HTTP_MSG_ERROR:
1737 /* this may only happen if we call http_msg_analyser() twice with an error */
1738 break;
1739
Willy Tarreau8d5d7f22007-01-21 19:16:41 +01001740 default:
Willy Tarreau3770f232013-12-07 00:01:53 +01001741#ifdef DEBUG_FULL
Willy Tarreau8d5d7f22007-01-21 19:16:41 +01001742 fprintf(stderr, "FIXME !!!! impossible state at %s:%d = %d\n", __FILE__, __LINE__, state);
1743 exit(1);
Willy Tarreau230fd0b2006-12-17 12:05:00 +01001744#endif
Willy Tarreau3770f232013-12-07 00:01:53 +01001745 ;
Willy Tarreau8d5d7f22007-01-21 19:16:41 +01001746 }
1747 http_msg_ood:
1748 /* out of data */
Willy Tarreaub326fcc2007-03-03 13:54:32 +01001749 msg->msg_state = state;
Willy Tarreaucdbdd522012-10-12 22:51:15 +02001750 msg->next = ptr - buf->p;
Willy Tarreau8d5d7f22007-01-21 19:16:41 +01001751 return;
Willy Tarreau58f10d72006-12-04 02:26:12 +01001752
Willy Tarreau8d5d7f22007-01-21 19:16:41 +01001753 http_msg_invalid:
1754 /* invalid message */
Willy Tarreaub326fcc2007-03-03 13:54:32 +01001755 msg->msg_state = HTTP_MSG_ERROR;
Willy Tarreaucdbdd522012-10-12 22:51:15 +02001756 msg->next = ptr - buf->p;
Willy Tarreau8d5d7f22007-01-21 19:16:41 +01001757 return;
1758}
Alexandre Cassen5eb1a902007-11-29 15:43:32 +01001759
Willy Tarreau2492d5b2009-07-11 00:06:00 +02001760/* convert an HTTP/0.9 request into an HTTP/1.0 request. Returns 1 if the
1761 * conversion succeeded, 0 in case of error. If the request was already 1.X,
1762 * nothing is done and 1 is returned.
1763 */
Willy Tarreau418bfcc2012-03-09 13:56:20 +01001764static int http_upgrade_v09_to_v10(struct http_txn *txn)
Willy Tarreau2492d5b2009-07-11 00:06:00 +02001765{
1766 int delta;
1767 char *cur_end;
Willy Tarreau418bfcc2012-03-09 13:56:20 +01001768 struct http_msg *msg = &txn->req;
Willy Tarreau2492d5b2009-07-11 00:06:00 +02001769
1770 if (msg->sl.rq.v_l != 0)
1771 return 1;
1772
Apollon Oikonomopoulos25a15222014-04-06 02:46:00 +03001773 /* RFC 1945 allows only GET for HTTP/0.9 requests */
1774 if (txn->meth != HTTP_METH_GET)
1775 return 0;
1776
Willy Tarreau9b28e032012-10-12 23:49:43 +02001777 cur_end = msg->chn->buf->p + msg->sl.rq.l;
Willy Tarreau2492d5b2009-07-11 00:06:00 +02001778 delta = 0;
1779
1780 if (msg->sl.rq.u_l == 0) {
Apollon Oikonomopoulos25a15222014-04-06 02:46:00 +03001781 /* HTTP/0.9 requests *must* have a request URI, per RFC 1945 */
1782 return 0;
Willy Tarreau2492d5b2009-07-11 00:06:00 +02001783 }
1784 /* add HTTP version */
Willy Tarreau9b28e032012-10-12 23:49:43 +02001785 delta = buffer_replace2(msg->chn->buf, cur_end, cur_end, " HTTP/1.0\r\n", 11);
Willy Tarreaufa355d42009-11-29 18:12:29 +01001786 http_msg_move_end(msg, delta);
Willy Tarreau2492d5b2009-07-11 00:06:00 +02001787 cur_end += delta;
Willy Tarreau69d8c5d2012-05-08 09:44:41 +02001788 cur_end = (char *)http_parse_reqline(msg,
Willy Tarreau2492d5b2009-07-11 00:06:00 +02001789 HTTP_MSG_RQMETH,
Willy Tarreau9b28e032012-10-12 23:49:43 +02001790 msg->chn->buf->p, cur_end + 1,
Willy Tarreau2492d5b2009-07-11 00:06:00 +02001791 NULL, NULL);
1792 if (unlikely(!cur_end))
1793 return 0;
1794
1795 /* we have a full HTTP/1.0 request now and we know that
1796 * we have either a CR or an LF at <ptr>.
1797 */
1798 hdr_idx_set_start(&txn->hdr_idx, msg->sl.rq.l, *cur_end == '\r');
1799 return 1;
1800}
1801
Willy Tarreaubbf0b372010-01-18 16:54:40 +01001802/* Parse the Connection: header of an HTTP request, looking for both "close"
Willy Tarreau6acf7c92012-03-09 13:30:45 +01001803 * and "keep-alive" values. If we already know that some headers may safely
1804 * be removed, we remove them now. The <to_del> flags are used for that :
Willy Tarreaubbf0b372010-01-18 16:54:40 +01001805 * - bit 0 means remove "close" headers (in HTTP/1.0 requests/responses)
1806 * - bit 1 means remove "keep-alive" headers (in HTTP/1.1 reqs/resp to 1.1).
Willy Tarreau50fc7772012-11-11 22:19:57 +01001807 * Presence of the "Upgrade" token is also checked and reported.
Willy Tarreaubbf0b372010-01-18 16:54:40 +01001808 * The TX_HDR_CONN_* flags are adjusted in txn->flags depending on what was
1809 * found, and TX_CON_*_SET is adjusted depending on what is left so only
1810 * harmless combinations may be removed. Do not call that after changes have
Willy Tarreau6acf7c92012-03-09 13:30:45 +01001811 * been processed.
Willy Tarreau5b154472009-12-21 20:11:07 +01001812 */
Willy Tarreau6acf7c92012-03-09 13:30:45 +01001813void http_parse_connection_header(struct http_txn *txn, struct http_msg *msg, int to_del)
Willy Tarreau5b154472009-12-21 20:11:07 +01001814{
Willy Tarreau5b154472009-12-21 20:11:07 +01001815 struct hdr_ctx ctx;
Willy Tarreau88d349d2010-01-25 12:15:43 +01001816 const char *hdr_val = "Connection";
1817 int hdr_len = 10;
Willy Tarreau5b154472009-12-21 20:11:07 +01001818
Willy Tarreaubbf0b372010-01-18 16:54:40 +01001819 if (txn->flags & TX_HDR_CONN_PRS)
Willy Tarreau5b154472009-12-21 20:11:07 +01001820 return;
1821
Willy Tarreau88d349d2010-01-25 12:15:43 +01001822 if (unlikely(txn->flags & TX_USE_PX_CONN)) {
1823 hdr_val = "Proxy-Connection";
1824 hdr_len = 16;
1825 }
1826
Willy Tarreau5b154472009-12-21 20:11:07 +01001827 ctx.idx = 0;
Willy Tarreaubbf0b372010-01-18 16:54:40 +01001828 txn->flags &= ~(TX_CON_KAL_SET|TX_CON_CLO_SET);
Willy Tarreau9b28e032012-10-12 23:49:43 +02001829 while (http_find_header2(hdr_val, hdr_len, msg->chn->buf->p, &txn->hdr_idx, &ctx)) {
Willy Tarreaubbf0b372010-01-18 16:54:40 +01001830 if (ctx.vlen >= 10 && word_match(ctx.line + ctx.val, ctx.vlen, "keep-alive", 10)) {
1831 txn->flags |= TX_HDR_CONN_KAL;
Willy Tarreau