blob: 7a380c2d044a65936d4c6a3a842c6bf588da1f03 [file] [log] [blame]
Willy Tarreau0da5b3b2017-09-21 09:30:46 +02001/*
2 * HTTP/1 protocol analyzer
3 *
4 * Copyright 2000-2017 Willy Tarreau <w@1wt.eu>
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 *
11 */
12
13#include <common/config.h>
14
15#include <proto/h1.h>
16
17/* It is about twice as fast on recent architectures to lookup a byte in a
18 * table than to perform a boolean AND or OR between two tests. Refer to
19 * RFC2616/RFC5234/RFC7230 for those chars. A token is any ASCII char that is
20 * neither a separator nor a CTL char. An http ver_token is any ASCII which can
21 * be found in an HTTP version, which includes 'H', 'T', 'P', '/', '.' and any
22 * digit. Note: please do not overwrite values in assignment since gcc-2.95
23 * will not handle them correctly. It's worth noting that chars 128..255 are
24 * nothing, not even control chars.
25 */
26const unsigned char h1_char_classes[256] = {
27 [ 0] = H1_FLG_CTL,
28 [ 1] = H1_FLG_CTL,
29 [ 2] = H1_FLG_CTL,
30 [ 3] = H1_FLG_CTL,
31 [ 4] = H1_FLG_CTL,
32 [ 5] = H1_FLG_CTL,
33 [ 6] = H1_FLG_CTL,
34 [ 7] = H1_FLG_CTL,
35 [ 8] = H1_FLG_CTL,
36 [ 9] = H1_FLG_SPHT | H1_FLG_LWS | H1_FLG_SEP | H1_FLG_CTL,
37 [ 10] = H1_FLG_CRLF | H1_FLG_LWS | H1_FLG_CTL,
38 [ 11] = H1_FLG_CTL,
39 [ 12] = H1_FLG_CTL,
40 [ 13] = H1_FLG_CRLF | H1_FLG_LWS | H1_FLG_CTL,
41 [ 14] = H1_FLG_CTL,
42 [ 15] = H1_FLG_CTL,
43 [ 16] = H1_FLG_CTL,
44 [ 17] = H1_FLG_CTL,
45 [ 18] = H1_FLG_CTL,
46 [ 19] = H1_FLG_CTL,
47 [ 20] = H1_FLG_CTL,
48 [ 21] = H1_FLG_CTL,
49 [ 22] = H1_FLG_CTL,
50 [ 23] = H1_FLG_CTL,
51 [ 24] = H1_FLG_CTL,
52 [ 25] = H1_FLG_CTL,
53 [ 26] = H1_FLG_CTL,
54 [ 27] = H1_FLG_CTL,
55 [ 28] = H1_FLG_CTL,
56 [ 29] = H1_FLG_CTL,
57 [ 30] = H1_FLG_CTL,
58 [ 31] = H1_FLG_CTL,
59 [' '] = H1_FLG_SPHT | H1_FLG_LWS | H1_FLG_SEP,
60 ['!'] = H1_FLG_TOK,
61 ['"'] = H1_FLG_SEP,
62 ['#'] = H1_FLG_TOK,
63 ['$'] = H1_FLG_TOK,
64 ['%'] = H1_FLG_TOK,
65 ['&'] = H1_FLG_TOK,
66 [ 39] = H1_FLG_TOK,
67 ['('] = H1_FLG_SEP,
68 [')'] = H1_FLG_SEP,
69 ['*'] = H1_FLG_TOK,
70 ['+'] = H1_FLG_TOK,
71 [','] = H1_FLG_SEP,
72 ['-'] = H1_FLG_TOK,
73 ['.'] = H1_FLG_TOK | H1_FLG_VER,
74 ['/'] = H1_FLG_SEP | H1_FLG_VER,
75 ['0'] = H1_FLG_TOK | H1_FLG_VER,
76 ['1'] = H1_FLG_TOK | H1_FLG_VER,
77 ['2'] = H1_FLG_TOK | H1_FLG_VER,
78 ['3'] = H1_FLG_TOK | H1_FLG_VER,
79 ['4'] = H1_FLG_TOK | H1_FLG_VER,
80 ['5'] = H1_FLG_TOK | H1_FLG_VER,
81 ['6'] = H1_FLG_TOK | H1_FLG_VER,
82 ['7'] = H1_FLG_TOK | H1_FLG_VER,
83 ['8'] = H1_FLG_TOK | H1_FLG_VER,
84 ['9'] = H1_FLG_TOK | H1_FLG_VER,
85 [':'] = H1_FLG_SEP,
86 [';'] = H1_FLG_SEP,
87 ['<'] = H1_FLG_SEP,
88 ['='] = H1_FLG_SEP,
89 ['>'] = H1_FLG_SEP,
90 ['?'] = H1_FLG_SEP,
91 ['@'] = H1_FLG_SEP,
92 ['A'] = H1_FLG_TOK,
93 ['B'] = H1_FLG_TOK,
94 ['C'] = H1_FLG_TOK,
95 ['D'] = H1_FLG_TOK,
96 ['E'] = H1_FLG_TOK,
97 ['F'] = H1_FLG_TOK,
98 ['G'] = H1_FLG_TOK,
99 ['H'] = H1_FLG_TOK | H1_FLG_VER,
100 ['I'] = H1_FLG_TOK,
101 ['J'] = H1_FLG_TOK,
102 ['K'] = H1_FLG_TOK,
103 ['L'] = H1_FLG_TOK,
104 ['M'] = H1_FLG_TOK,
105 ['N'] = H1_FLG_TOK,
106 ['O'] = H1_FLG_TOK,
107 ['P'] = H1_FLG_TOK | H1_FLG_VER,
108 ['Q'] = H1_FLG_TOK,
109 ['R'] = H1_FLG_TOK | H1_FLG_VER,
110 ['S'] = H1_FLG_TOK | H1_FLG_VER,
111 ['T'] = H1_FLG_TOK | H1_FLG_VER,
112 ['U'] = H1_FLG_TOK,
113 ['V'] = H1_FLG_TOK,
114 ['W'] = H1_FLG_TOK,
115 ['X'] = H1_FLG_TOK,
116 ['Y'] = H1_FLG_TOK,
117 ['Z'] = H1_FLG_TOK,
118 ['['] = H1_FLG_SEP,
119 [ 92] = H1_FLG_SEP,
120 [']'] = H1_FLG_SEP,
121 ['^'] = H1_FLG_TOK,
122 ['_'] = H1_FLG_TOK,
123 ['`'] = H1_FLG_TOK,
124 ['a'] = H1_FLG_TOK,
125 ['b'] = H1_FLG_TOK,
126 ['c'] = H1_FLG_TOK,
127 ['d'] = H1_FLG_TOK,
128 ['e'] = H1_FLG_TOK,
129 ['f'] = H1_FLG_TOK,
130 ['g'] = H1_FLG_TOK,
131 ['h'] = H1_FLG_TOK,
132 ['i'] = H1_FLG_TOK,
133 ['j'] = H1_FLG_TOK,
134 ['k'] = H1_FLG_TOK,
135 ['l'] = H1_FLG_TOK,
136 ['m'] = H1_FLG_TOK,
137 ['n'] = H1_FLG_TOK,
138 ['o'] = H1_FLG_TOK,
139 ['p'] = H1_FLG_TOK,
140 ['q'] = H1_FLG_TOK,
141 ['r'] = H1_FLG_TOK,
142 ['s'] = H1_FLG_TOK,
143 ['t'] = H1_FLG_TOK,
144 ['u'] = H1_FLG_TOK,
145 ['v'] = H1_FLG_TOK,
146 ['w'] = H1_FLG_TOK,
147 ['x'] = H1_FLG_TOK,
148 ['y'] = H1_FLG_TOK,
149 ['z'] = H1_FLG_TOK,
150 ['{'] = H1_FLG_SEP,
151 ['|'] = H1_FLG_TOK,
152 ['}'] = H1_FLG_SEP,
153 ['~'] = H1_FLG_TOK,
154 [127] = H1_FLG_CTL,
155};
Willy Tarreaudb4893d2017-09-21 08:40:02 +0200156
157
158/* This function skips trailers in the buffer associated with HTTP message
159 * <msg>. The first visited position is msg->next. If the end of the trailers is
160 * found, the function returns >0. So, the caller can automatically schedul it
161 * to be forwarded, and switch msg->msg_state to HTTP_MSG_DONE. If not enough
162 * data are available, the function does not change anything except maybe
163 * msg->sol if it could parse some lines, and returns zero. If a parse error
164 * is encountered, the function returns < 0 and does not change anything except
165 * maybe msg->sol. Note that the message must already be in HTTP_MSG_TRAILERS
166 * state before calling this function, which implies that all non-trailers data
167 * have already been scheduled for forwarding, and that msg->next exactly
168 * matches the length of trailers already parsed and not forwarded. It is also
169 * important to note that this function is designed to be able to parse wrapped
170 * headers at end of buffer.
171 */
172int http_forward_trailers(struct http_msg *msg)
173{
174 const struct buffer *buf = msg->chn->buf;
175
176 /* we have msg->next which points to next line. Look for CRLF. But
177 * first, we reset msg->sol */
178 msg->sol = 0;
179 while (1) {
180 const char *p1 = NULL, *p2 = NULL;
181 const char *start = b_ptr(buf, msg->next + msg->sol);
182 const char *stop = bi_end(buf);
183 const char *ptr = start;
184 int bytes = 0;
185
186 /* scan current line and stop at LF or CRLF */
187 while (1) {
188 if (ptr == stop)
189 return 0;
190
191 if (*ptr == '\n') {
192 if (!p1)
193 p1 = ptr;
194 p2 = ptr;
195 break;
196 }
197
198 if (*ptr == '\r') {
199 if (p1) {
200 msg->err_pos = buffer_count(buf, buf->p, ptr);
201 return -1;
202 }
203 p1 = ptr;
204 }
205
206 ptr++;
207 if (ptr >= buf->data + buf->size)
208 ptr = buf->data;
209 }
210
211 /* after LF; point to beginning of next line */
212 p2++;
213 if (p2 >= buf->data + buf->size)
214 p2 = buf->data;
215
216 bytes = p2 - start;
217 if (bytes < 0)
218 bytes += buf->size;
219 msg->sol += bytes;
220
221 /* LF/CRLF at beginning of line => end of trailers at p2.
222 * Everything was scheduled for forwarding, there's nothing left
223 * from this message. */
224 if (p1 == start)
225 return 1;
226
227 /* OK, next line then */
228 }
229}