blob: 66a91dba776c7b2d2bfd26a99546baf448883abe [file] [log] [blame]
Willy Tarreau35b51c62018-09-10 15:38:55 +02001/*
2 * HTTP semantics
3 *
4 * Copyright 2000-2018 Willy Tarreau <w@1wt.eu>
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 *
11 */
12
13#include <ctype.h>
14#include <common/config.h>
15#include <common/http.h>
16
17/* It is about twice as fast on recent architectures to lookup a byte in a
18 * table than to perform a boolean AND or OR between two tests. Refer to
19 * RFC2616/RFC5234/RFC7230 for those chars. A token is any ASCII char that is
20 * neither a separator nor a CTL char. An http ver_token is any ASCII which can
21 * be found in an HTTP version, which includes 'H', 'T', 'P', '/', '.' and any
22 * digit. Note: please do not overwrite values in assignment since gcc-2.95
23 * will not handle them correctly. It's worth noting that chars 128..255 are
24 * nothing, not even control chars.
25 */
26const unsigned char http_char_classes[256] = {
27 [ 0] = HTTP_FLG_CTL,
28 [ 1] = HTTP_FLG_CTL,
29 [ 2] = HTTP_FLG_CTL,
30 [ 3] = HTTP_FLG_CTL,
31 [ 4] = HTTP_FLG_CTL,
32 [ 5] = HTTP_FLG_CTL,
33 [ 6] = HTTP_FLG_CTL,
34 [ 7] = HTTP_FLG_CTL,
35 [ 8] = HTTP_FLG_CTL,
36 [ 9] = HTTP_FLG_SPHT | HTTP_FLG_LWS | HTTP_FLG_SEP | HTTP_FLG_CTL,
37 [ 10] = HTTP_FLG_CRLF | HTTP_FLG_LWS | HTTP_FLG_CTL,
38 [ 11] = HTTP_FLG_CTL,
39 [ 12] = HTTP_FLG_CTL,
40 [ 13] = HTTP_FLG_CRLF | HTTP_FLG_LWS | HTTP_FLG_CTL,
41 [ 14] = HTTP_FLG_CTL,
42 [ 15] = HTTP_FLG_CTL,
43 [ 16] = HTTP_FLG_CTL,
44 [ 17] = HTTP_FLG_CTL,
45 [ 18] = HTTP_FLG_CTL,
46 [ 19] = HTTP_FLG_CTL,
47 [ 20] = HTTP_FLG_CTL,
48 [ 21] = HTTP_FLG_CTL,
49 [ 22] = HTTP_FLG_CTL,
50 [ 23] = HTTP_FLG_CTL,
51 [ 24] = HTTP_FLG_CTL,
52 [ 25] = HTTP_FLG_CTL,
53 [ 26] = HTTP_FLG_CTL,
54 [ 27] = HTTP_FLG_CTL,
55 [ 28] = HTTP_FLG_CTL,
56 [ 29] = HTTP_FLG_CTL,
57 [ 30] = HTTP_FLG_CTL,
58 [ 31] = HTTP_FLG_CTL,
59 [' '] = HTTP_FLG_SPHT | HTTP_FLG_LWS | HTTP_FLG_SEP,
60 ['!'] = HTTP_FLG_TOK,
61 ['"'] = HTTP_FLG_SEP,
62 ['#'] = HTTP_FLG_TOK,
63 ['$'] = HTTP_FLG_TOK,
64 ['%'] = HTTP_FLG_TOK,
65 ['&'] = HTTP_FLG_TOK,
66 [ 39] = HTTP_FLG_TOK,
67 ['('] = HTTP_FLG_SEP,
68 [')'] = HTTP_FLG_SEP,
69 ['*'] = HTTP_FLG_TOK,
70 ['+'] = HTTP_FLG_TOK,
71 [','] = HTTP_FLG_SEP,
72 ['-'] = HTTP_FLG_TOK,
73 ['.'] = HTTP_FLG_TOK | HTTP_FLG_VER,
74 ['/'] = HTTP_FLG_SEP | HTTP_FLG_VER,
75 ['0'] = HTTP_FLG_TOK | HTTP_FLG_VER | HTTP_FLG_DIG,
76 ['1'] = HTTP_FLG_TOK | HTTP_FLG_VER | HTTP_FLG_DIG,
77 ['2'] = HTTP_FLG_TOK | HTTP_FLG_VER | HTTP_FLG_DIG,
78 ['3'] = HTTP_FLG_TOK | HTTP_FLG_VER | HTTP_FLG_DIG,
79 ['4'] = HTTP_FLG_TOK | HTTP_FLG_VER | HTTP_FLG_DIG,
80 ['5'] = HTTP_FLG_TOK | HTTP_FLG_VER | HTTP_FLG_DIG,
81 ['6'] = HTTP_FLG_TOK | HTTP_FLG_VER | HTTP_FLG_DIG,
82 ['7'] = HTTP_FLG_TOK | HTTP_FLG_VER | HTTP_FLG_DIG,
83 ['8'] = HTTP_FLG_TOK | HTTP_FLG_VER | HTTP_FLG_DIG,
84 ['9'] = HTTP_FLG_TOK | HTTP_FLG_VER | HTTP_FLG_DIG,
85 [':'] = HTTP_FLG_SEP,
86 [';'] = HTTP_FLG_SEP,
87 ['<'] = HTTP_FLG_SEP,
88 ['='] = HTTP_FLG_SEP,
89 ['>'] = HTTP_FLG_SEP,
90 ['?'] = HTTP_FLG_SEP,
91 ['@'] = HTTP_FLG_SEP,
92 ['A'] = HTTP_FLG_TOK,
93 ['B'] = HTTP_FLG_TOK,
94 ['C'] = HTTP_FLG_TOK,
95 ['D'] = HTTP_FLG_TOK,
96 ['E'] = HTTP_FLG_TOK,
97 ['F'] = HTTP_FLG_TOK,
98 ['G'] = HTTP_FLG_TOK,
99 ['H'] = HTTP_FLG_TOK | HTTP_FLG_VER,
100 ['I'] = HTTP_FLG_TOK,
101 ['J'] = HTTP_FLG_TOK,
102 ['K'] = HTTP_FLG_TOK,
103 ['L'] = HTTP_FLG_TOK,
104 ['M'] = HTTP_FLG_TOK,
105 ['N'] = HTTP_FLG_TOK,
106 ['O'] = HTTP_FLG_TOK,
107 ['P'] = HTTP_FLG_TOK | HTTP_FLG_VER,
108 ['Q'] = HTTP_FLG_TOK,
109 ['R'] = HTTP_FLG_TOK | HTTP_FLG_VER,
110 ['S'] = HTTP_FLG_TOK | HTTP_FLG_VER,
111 ['T'] = HTTP_FLG_TOK | HTTP_FLG_VER,
112 ['U'] = HTTP_FLG_TOK,
113 ['V'] = HTTP_FLG_TOK,
114 ['W'] = HTTP_FLG_TOK,
115 ['X'] = HTTP_FLG_TOK,
116 ['Y'] = HTTP_FLG_TOK,
117 ['Z'] = HTTP_FLG_TOK,
118 ['['] = HTTP_FLG_SEP,
119 [ 92] = HTTP_FLG_SEP,
120 [']'] = HTTP_FLG_SEP,
121 ['^'] = HTTP_FLG_TOK,
122 ['_'] = HTTP_FLG_TOK,
123 ['`'] = HTTP_FLG_TOK,
124 ['a'] = HTTP_FLG_TOK,
125 ['b'] = HTTP_FLG_TOK,
126 ['c'] = HTTP_FLG_TOK,
127 ['d'] = HTTP_FLG_TOK,
128 ['e'] = HTTP_FLG_TOK,
129 ['f'] = HTTP_FLG_TOK,
130 ['g'] = HTTP_FLG_TOK,
131 ['h'] = HTTP_FLG_TOK,
132 ['i'] = HTTP_FLG_TOK,
133 ['j'] = HTTP_FLG_TOK,
134 ['k'] = HTTP_FLG_TOK,
135 ['l'] = HTTP_FLG_TOK,
136 ['m'] = HTTP_FLG_TOK,
137 ['n'] = HTTP_FLG_TOK,
138 ['o'] = HTTP_FLG_TOK,
139 ['p'] = HTTP_FLG_TOK,
140 ['q'] = HTTP_FLG_TOK,
141 ['r'] = HTTP_FLG_TOK,
142 ['s'] = HTTP_FLG_TOK,
143 ['t'] = HTTP_FLG_TOK,
144 ['u'] = HTTP_FLG_TOK,
145 ['v'] = HTTP_FLG_TOK,
146 ['w'] = HTTP_FLG_TOK,
147 ['x'] = HTTP_FLG_TOK,
148 ['y'] = HTTP_FLG_TOK,
149 ['z'] = HTTP_FLG_TOK,
150 ['{'] = HTTP_FLG_SEP,
151 ['|'] = HTTP_FLG_TOK,
152 ['}'] = HTTP_FLG_SEP,
153 ['~'] = HTTP_FLG_TOK,
154 [127] = HTTP_FLG_CTL,
155};
156
157const struct ist http_known_methods[HTTP_METH_OTHER] = {
158 [HTTP_METH_OPTIONS] = IST("OPTIONS"),
159 [HTTP_METH_GET] = IST("GET"),
160 [HTTP_METH_HEAD] = IST("HEAD"),
161 [HTTP_METH_POST] = IST("POST"),
162 [HTTP_METH_PUT] = IST("PUT"),
163 [HTTP_METH_DELETE] = IST("DELETE"),
164 [HTTP_METH_TRACE] = IST("TRACE"),
165 [HTTP_METH_CONNECT] = IST("CONNECT"),
166};
167
168/*
169 * returns a known method among HTTP_METH_* or HTTP_METH_OTHER for all unknown
170 * ones.
171 */
172enum http_meth_t find_http_meth(const char *str, const int len)
173{
174 const struct ist m = ist2(str, len);
175
176 if (isteq(m, ist("GET"))) return HTTP_METH_GET;
177 else if (isteq(m, ist("HEAD"))) return HTTP_METH_HEAD;
178 else if (isteq(m, ist("POST"))) return HTTP_METH_POST;
179 else if (isteq(m, ist("CONNECT"))) return HTTP_METH_CONNECT;
180 else if (isteq(m, ist("PUT"))) return HTTP_METH_PUT;
181 else if (isteq(m, ist("OPTIONS"))) return HTTP_METH_OPTIONS;
182 else if (isteq(m, ist("DELETE"))) return HTTP_METH_DELETE;
183 else if (isteq(m, ist("TRACE"))) return HTTP_METH_TRACE;
184 else return HTTP_METH_OTHER;
185}
Willy Tarreau6b952c82018-09-10 17:45:34 +0200186
187/* Parse the URI from the given transaction (which is assumed to be in request
188 * phase) and look for the "/" beginning the PATH. If not found, ist2(0,0) is
189 * returned. Otherwise the pointer and length are returned.
190 */
191struct ist http_get_path(const struct ist uri)
192{
193 const char *ptr, *end;
194
195 if (!uri.len)
196 goto not_found;
197
198 ptr = uri.ptr;
199 end = ptr + uri.len;
200
201 /* RFC7230, par. 2.7 :
202 * Request-URI = "*" | absuri | abspath | authority
203 */
204
205 if (*ptr == '*')
206 goto not_found;
207
208 if (isalpha((unsigned char)*ptr)) {
209 /* this is a scheme as described by RFC3986, par. 3.1 */
210 ptr++;
211 while (ptr < end &&
212 (isalnum((unsigned char)*ptr) || *ptr == '+' || *ptr == '-' || *ptr == '.'))
213 ptr++;
214 /* skip '://' */
215 if (ptr == end || *ptr++ != ':')
216 goto not_found;
217 if (ptr == end || *ptr++ != '/')
218 goto not_found;
219 if (ptr == end || *ptr++ != '/')
220 goto not_found;
221 }
222 /* skip [user[:passwd]@]host[:[port]] */
223
224 while (ptr < end && *ptr != '/')
225 ptr++;
226
227 if (ptr == end)
228 goto not_found;
229
230 /* OK, we got the '/' ! */
231 return ist2(ptr, end - ptr);
232
233 not_found:
234 return ist2(NULL, 0);
235}