blob: d8b90cb23db53d315f180c927ed0b38be7a23dd4 [file] [log] [blame]
Willy Tarreau0da5b3b2017-09-21 09:30:46 +02001/*
2 * include/proto/h1.h
3 * This file contains HTTP/1 protocol definitions.
4 *
5 * Copyright (C) 2000-2017 Willy Tarreau - w@1wt.eu
6 *
7 * This library is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation, version 2.1
10 * exclusively.
11 *
12 * This library is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
16 *
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with this library; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20 */
21
22#ifndef _PROTO_H1_H
23#define _PROTO_H1_H
24
Willy Tarreaudb4893d2017-09-21 08:40:02 +020025#include <common/buffer.h>
Willy Tarreau0da5b3b2017-09-21 09:30:46 +020026#include <common/compiler.h>
27#include <common/config.h>
Willy Tarreau794f9af2017-07-26 09:07:47 +020028#include <common/http-hdr.h>
Willy Tarreaudb4893d2017-09-21 08:40:02 +020029#include <common/standard.h>
Willy Tarreau0da5b3b2017-09-21 09:30:46 +020030#include <types/h1.h>
Willy Tarreaudb4893d2017-09-21 08:40:02 +020031#include <types/proto_http.h>
Willy Tarreau8740c8b2017-09-21 10:22:25 +020032#include <proto/hdr_idx.h>
Willy Tarreau0da5b3b2017-09-21 09:30:46 +020033
34extern const uint8_t h1_char_classes[256];
Willy Tarreau8740c8b2017-09-21 10:22:25 +020035const char *http_parse_reqline(struct http_msg *msg,
36 enum h1_state state, const char *ptr, const char *end,
37 unsigned int *ret_ptr, enum h1_state *ret_state);
38const char *http_parse_stsline(struct http_msg *msg,
39 enum h1_state state, const char *ptr, const char *end,
40 unsigned int *ret_ptr, enum h1_state *ret_state);
41void http_msg_analyzer(struct http_msg *msg, struct hdr_idx *idx);
Willy Tarreaudb4893d2017-09-21 08:40:02 +020042int http_forward_trailers(struct http_msg *msg);
Willy Tarreau794f9af2017-07-26 09:07:47 +020043int h1_headers_to_hdr_list(char *start, const char *stop,
44 struct http_hdr *hdr, unsigned int hdr_num,
45 struct h1m *h1m);
Willy Tarreau7314be82018-06-14 13:32:50 +020046int h1_measure_trailers(const struct buffer *buf, unsigned int max);
Willy Tarreau0da5b3b2017-09-21 09:30:46 +020047
48#define H1_FLG_CTL 0x01
49#define H1_FLG_SEP 0x02
50#define H1_FLG_LWS 0x04
51#define H1_FLG_SPHT 0x08
52#define H1_FLG_CRLF 0x10
53#define H1_FLG_TOK 0x20
54#define H1_FLG_VER 0x40
Willy Tarreau1b4cf9b2017-11-09 11:15:45 +010055#define H1_FLG_DIG 0x80
Willy Tarreau0da5b3b2017-09-21 09:30:46 +020056
57#define HTTP_IS_CTL(x) (h1_char_classes[(uint8_t)(x)] & H1_FLG_CTL)
58#define HTTP_IS_SEP(x) (h1_char_classes[(uint8_t)(x)] & H1_FLG_SEP)
59#define HTTP_IS_LWS(x) (h1_char_classes[(uint8_t)(x)] & H1_FLG_LWS)
60#define HTTP_IS_SPHT(x) (h1_char_classes[(uint8_t)(x)] & H1_FLG_SPHT)
61#define HTTP_IS_CRLF(x) (h1_char_classes[(uint8_t)(x)] & H1_FLG_CRLF)
62#define HTTP_IS_TOKEN(x) (h1_char_classes[(uint8_t)(x)] & H1_FLG_TOK)
63#define HTTP_IS_VER_TOKEN(x) (h1_char_classes[(uint8_t)(x)] & H1_FLG_VER)
Willy Tarreau1b4cf9b2017-11-09 11:15:45 +010064#define HTTP_IS_DIGIT(x) (h1_char_classes[(uint8_t)(x)] & H1_FLG_DIG)
Willy Tarreau0da5b3b2017-09-21 09:30:46 +020065
66
67/* Macros used in the HTTP/1 parser, to check for the expected presence of
68 * certain bytes (ef: LF) or to skip to next byte and yield in case of failure.
69 */
70
71
72/* Expects to find an LF at <ptr>. If not, set <state> to <where> and jump to
73 * <bad>.
74 */
75#define EXPECT_LF_HERE(ptr, bad, state, where) \
76 do { \
77 if (unlikely(*(ptr) != '\n')) { \
78 state = (where); \
79 goto bad; \
80 } \
81 } while (0)
82
83/* Increments pointer <ptr>, continues to label <more> if it's still below
84 * pointer <end>, or goes to <stop> and sets <state> to <where> if the end
85 * of buffer was reached.
86 */
87#define EAT_AND_JUMP_OR_RETURN(ptr, end, more, stop, state, where) \
88 do { \
89 if (likely(++(ptr) < (end))) \
90 goto more; \
91 else { \
92 state = (where); \
93 goto stop; \
94 } \
95 } while (0)
96
97/* for debugging, reports the HTTP/1 message state name */
98static inline const char *h1_msg_state_str(enum h1_state msg_state)
99{
100 switch (msg_state) {
101 case HTTP_MSG_RQBEFORE: return "MSG_RQBEFORE";
102 case HTTP_MSG_RQBEFORE_CR: return "MSG_RQBEFORE_CR";
103 case HTTP_MSG_RQMETH: return "MSG_RQMETH";
104 case HTTP_MSG_RQMETH_SP: return "MSG_RQMETH_SP";
105 case HTTP_MSG_RQURI: return "MSG_RQURI";
106 case HTTP_MSG_RQURI_SP: return "MSG_RQURI_SP";
107 case HTTP_MSG_RQVER: return "MSG_RQVER";
108 case HTTP_MSG_RQLINE_END: return "MSG_RQLINE_END";
109 case HTTP_MSG_RPBEFORE: return "MSG_RPBEFORE";
110 case HTTP_MSG_RPBEFORE_CR: return "MSG_RPBEFORE_CR";
111 case HTTP_MSG_RPVER: return "MSG_RPVER";
112 case HTTP_MSG_RPVER_SP: return "MSG_RPVER_SP";
113 case HTTP_MSG_RPCODE: return "MSG_RPCODE";
114 case HTTP_MSG_RPCODE_SP: return "MSG_RPCODE_SP";
115 case HTTP_MSG_RPREASON: return "MSG_RPREASON";
116 case HTTP_MSG_RPLINE_END: return "MSG_RPLINE_END";
117 case HTTP_MSG_HDR_FIRST: return "MSG_HDR_FIRST";
118 case HTTP_MSG_HDR_NAME: return "MSG_HDR_NAME";
119 case HTTP_MSG_HDR_COL: return "MSG_HDR_COL";
120 case HTTP_MSG_HDR_L1_SP: return "MSG_HDR_L1_SP";
121 case HTTP_MSG_HDR_L1_LF: return "MSG_HDR_L1_LF";
122 case HTTP_MSG_HDR_L1_LWS: return "MSG_HDR_L1_LWS";
123 case HTTP_MSG_HDR_VAL: return "MSG_HDR_VAL";
124 case HTTP_MSG_HDR_L2_LF: return "MSG_HDR_L2_LF";
125 case HTTP_MSG_HDR_L2_LWS: return "MSG_HDR_L2_LWS";
126 case HTTP_MSG_LAST_LF: return "MSG_LAST_LF";
127 case HTTP_MSG_ERROR: return "MSG_ERROR";
128 case HTTP_MSG_BODY: return "MSG_BODY";
129 case HTTP_MSG_100_SENT: return "MSG_100_SENT";
130 case HTTP_MSG_CHUNK_SIZE: return "MSG_CHUNK_SIZE";
131 case HTTP_MSG_DATA: return "MSG_DATA";
132 case HTTP_MSG_CHUNK_CRLF: return "MSG_CHUNK_CRLF";
133 case HTTP_MSG_TRAILERS: return "MSG_TRAILERS";
134 case HTTP_MSG_ENDING: return "MSG_ENDING";
135 case HTTP_MSG_DONE: return "MSG_DONE";
136 case HTTP_MSG_CLOSING: return "MSG_CLOSING";
137 case HTTP_MSG_CLOSED: return "MSG_CLOSED";
138 case HTTP_MSG_TUNNEL: return "MSG_TUNNEL";
139 default: return "MSG_??????";
140 }
141}
142
Willy Tarreaudb4893d2017-09-21 08:40:02 +0200143/* This function may be called only in HTTP_MSG_CHUNK_CRLF. It reads the CRLF or
144 * a possible LF alone at the end of a chunk. The caller should adjust msg->next
145 * in order to include this part into the next forwarding phase. Note that the
Willy Tarreauc0973c62018-06-14 15:53:21 +0200146 * caller must ensure that head+start points to the first byte to parse. It
147 * returns the number of bytes parsed on success, so the caller can set msg_state
148 * to HTTP_MSG_CHUNK_SIZE. If not enough data are available, the function does not
Willy Tarreaub2892562017-09-21 11:33:54 +0200149 * change anything and returns zero. Otherwise it returns a negative value
150 * indicating the error positionn relative to <stop>. Note: this function is
151 * designed to parse wrapped CRLF at the end of the buffer.
Willy Tarreaudb4893d2017-09-21 08:40:02 +0200152 */
Willy Tarreaub2892562017-09-21 11:33:54 +0200153static inline int h1_skip_chunk_crlf(const struct buffer *buf, int start, int stop)
Willy Tarreaudb4893d2017-09-21 08:40:02 +0200154{
Willy Tarreauc0973c62018-06-14 15:53:21 +0200155 const char *ptr = b_peek(buf, start);
Willy Tarreaub2892562017-09-21 11:33:54 +0200156 int bytes = 1;
Willy Tarreaudb4893d2017-09-21 08:40:02 +0200157
158 /* NB: we'll check data availabilty at the end. It's not a
159 * problem because whatever we match first will be checked
160 * against the correct length.
161 */
Willy Tarreaudb4893d2017-09-21 08:40:02 +0200162 if (*ptr == '\r') {
163 bytes++;
164 ptr++;
Willy Tarreauc0973c62018-06-14 15:53:21 +0200165 if (ptr >= b_wrap(buf))
Willy Tarreaudb4893d2017-09-21 08:40:02 +0200166 ptr = buf->data;
167 }
168
Willy Tarreaub2892562017-09-21 11:33:54 +0200169 if (bytes > stop - start)
Willy Tarreaudb4893d2017-09-21 08:40:02 +0200170 return 0;
171
Willy Tarreauc0973c62018-06-14 15:53:21 +0200172 if (*ptr != '\n') // negative position to stop
173 return ptr - __b_peek(buf, stop);
Willy Tarreaub2892562017-09-21 11:33:54 +0200174
Willy Tarreaudb4893d2017-09-21 08:40:02 +0200175 return bytes;
176}
177
Willy Tarreaue56cdd32017-09-21 08:36:33 +0200178/* Parse the chunk size start at buf->p + start and stops before buf->p + stop.
179 * It returns the chunk size in <res> and the amount of bytes read this way :
180 * < 0 : error at this position relative to <stop>
181 * = 0 : not enough bytes to read a complete chunk size
182 * > 0 : number of bytes successfully read that the caller can skip
183 * On success, the caller should adjust its msg->next to point to the first
184 * byte of data after the chunk size, so that we know we can forward exactly
185 * msg->next bytes, and msg->sol to contain the exact number of bytes forming
186 * the chunk size. That way it is always possible to differentiate between the
187 * start of the body and the start of the data. Note: this function is designed
188 * to parse wrapped CRLF at the end of the buffer.
Willy Tarreaudb4893d2017-09-21 08:40:02 +0200189 */
Willy Tarreaue56cdd32017-09-21 08:36:33 +0200190static inline int h1_parse_chunk_size(const struct buffer *buf, int start, int stop, unsigned int *res)
Willy Tarreaudb4893d2017-09-21 08:40:02 +0200191{
Willy Tarreaue56cdd32017-09-21 08:36:33 +0200192 const char *ptr = b_ptr(buf, start);
Willy Tarreaudb4893d2017-09-21 08:40:02 +0200193 const char *ptr_old = ptr;
194 const char *end = buf->data + buf->size;
Willy Tarreaudb4893d2017-09-21 08:40:02 +0200195 unsigned int chunk = 0;
196
Willy Tarreaub15e3fe2017-11-10 11:17:08 +0100197 stop -= start; // bytes left
198 start = stop; // bytes to transfer
199
Willy Tarreaudb4893d2017-09-21 08:40:02 +0200200 /* The chunk size is in the following form, though we are only
201 * interested in the size and CRLF :
202 * 1*HEXDIGIT *WSP *[ ';' extensions ] CRLF
203 */
204 while (1) {
205 int c;
Willy Tarreaub15e3fe2017-11-10 11:17:08 +0100206 if (!stop)
Willy Tarreaudb4893d2017-09-21 08:40:02 +0200207 return 0;
208 c = hex2i(*ptr);
209 if (c < 0) /* not a hex digit anymore */
210 break;
211 if (unlikely(++ptr >= end))
212 ptr = buf->data;
213 if (unlikely(chunk & 0xF8000000)) /* integer overflow will occur if result >= 2GB */
214 goto error;
215 chunk = (chunk << 4) + c;
Willy Tarreaub15e3fe2017-11-10 11:17:08 +0100216 stop--;
Willy Tarreaudb4893d2017-09-21 08:40:02 +0200217 }
218
219 /* empty size not allowed */
220 if (unlikely(ptr == ptr_old))
221 goto error;
222
223 while (HTTP_IS_SPHT(*ptr)) {
224 if (++ptr >= end)
225 ptr = buf->data;
Willy Tarreaub15e3fe2017-11-10 11:17:08 +0100226 if (--stop == 0)
Willy Tarreaudb4893d2017-09-21 08:40:02 +0200227 return 0;
228 }
229
230 /* Up to there, we know that at least one byte is present at *ptr. Check
231 * for the end of chunk size.
232 */
233 while (1) {
234 if (likely(HTTP_IS_CRLF(*ptr))) {
235 /* we now have a CR or an LF at ptr */
236 if (likely(*ptr == '\r')) {
237 if (++ptr >= end)
238 ptr = buf->data;
Willy Tarreaub15e3fe2017-11-10 11:17:08 +0100239 if (--stop == 0)
Willy Tarreaudb4893d2017-09-21 08:40:02 +0200240 return 0;
241 }
242
Willy Tarreaub15e3fe2017-11-10 11:17:08 +0100243 if (*ptr != '\n')
Willy Tarreaudb4893d2017-09-21 08:40:02 +0200244 goto error;
245 if (++ptr >= end)
246 ptr = buf->data;
Willy Tarreaub15e3fe2017-11-10 11:17:08 +0100247 --stop;
Willy Tarreaudb4893d2017-09-21 08:40:02 +0200248 /* done */
249 break;
250 }
251 else if (likely(*ptr == ';')) {
252 /* chunk extension, ends at next CRLF */
253 if (++ptr >= end)
254 ptr = buf->data;
Willy Tarreaub15e3fe2017-11-10 11:17:08 +0100255 if (--stop == 0)
Willy Tarreaudb4893d2017-09-21 08:40:02 +0200256 return 0;
257
258 while (!HTTP_IS_CRLF(*ptr)) {
259 if (++ptr >= end)
260 ptr = buf->data;
Willy Tarreaub15e3fe2017-11-10 11:17:08 +0100261 if (--stop == 0)
Willy Tarreaudb4893d2017-09-21 08:40:02 +0200262 return 0;
263 }
264 /* we have a CRLF now, loop above */
265 continue;
266 }
267 else
268 goto error;
269 }
270
271 /* OK we found our CRLF and now <ptr> points to the next byte, which may
Willy Tarreaue56cdd32017-09-21 08:36:33 +0200272 * or may not be present. Let's return the number of bytes parsed.
Willy Tarreaudb4893d2017-09-21 08:40:02 +0200273 */
Willy Tarreaue56cdd32017-09-21 08:36:33 +0200274 *res = chunk;
Willy Tarreaub15e3fe2017-11-10 11:17:08 +0100275 return start - stop;
Willy Tarreaudb4893d2017-09-21 08:40:02 +0200276 error:
Willy Tarreaue56cdd32017-09-21 08:36:33 +0200277 *res = 0; // just to stop gcc's -Wuninitialized warning :-(
Willy Tarreaub15e3fe2017-11-10 11:17:08 +0100278 return -stop;
Willy Tarreaudb4893d2017-09-21 08:40:02 +0200279}
280
Willy Tarreau4093a4d2017-09-21 11:46:43 +0200281/* initializes an H1 message */
282static inline struct h1m *h1m_init(struct h1m *h1m)
283{
284 h1m->state = HTTP_MSG_RQBEFORE;
Willy Tarreaud22e83a2017-10-31 08:02:24 +0100285 h1m->status = 0;
Willy Tarreau4093a4d2017-09-21 11:46:43 +0200286 h1m->flags = 0;
287 h1m->curr_len = 0;
288 h1m->body_len = 0;
289 h1m->err_pos = 0;
290 h1m->err_state = 0;
291 return h1m;
292}
Willy Tarreau0da5b3b2017-09-21 09:30:46 +0200293
294#endif /* _PROTO_H1_H */