blob: b6a41454514bac898caba6d7de10ca59a1767f0e [file] [log] [blame]
Willy Tarreau0da5b3b2017-09-21 09:30:46 +02001/*
2 * include/proto/h1.h
3 * This file contains HTTP/1 protocol definitions.
4 *
5 * Copyright (C) 2000-2017 Willy Tarreau - w@1wt.eu
6 *
7 * This library is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation, version 2.1
10 * exclusively.
11 *
12 * This library is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
16 *
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with this library; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20 */
21
22#ifndef _PROTO_H1_H
23#define _PROTO_H1_H
24
Willy Tarreaudb4893d2017-09-21 08:40:02 +020025#include <common/buffer.h>
Willy Tarreau0da5b3b2017-09-21 09:30:46 +020026#include <common/compiler.h>
27#include <common/config.h>
Willy Tarreaudb4893d2017-09-21 08:40:02 +020028#include <common/standard.h>
Willy Tarreau0da5b3b2017-09-21 09:30:46 +020029#include <types/h1.h>
Willy Tarreaudb4893d2017-09-21 08:40:02 +020030#include <types/proto_http.h>
Willy Tarreau8740c8b2017-09-21 10:22:25 +020031#include <proto/hdr_idx.h>
Willy Tarreau0da5b3b2017-09-21 09:30:46 +020032
33extern const uint8_t h1_char_classes[256];
Willy Tarreau8740c8b2017-09-21 10:22:25 +020034const char *http_parse_reqline(struct http_msg *msg,
35 enum h1_state state, const char *ptr, const char *end,
36 unsigned int *ret_ptr, enum h1_state *ret_state);
37const char *http_parse_stsline(struct http_msg *msg,
38 enum h1_state state, const char *ptr, const char *end,
39 unsigned int *ret_ptr, enum h1_state *ret_state);
40void http_msg_analyzer(struct http_msg *msg, struct hdr_idx *idx);
Willy Tarreaudb4893d2017-09-21 08:40:02 +020041int http_forward_trailers(struct http_msg *msg);
Willy Tarreau0da5b3b2017-09-21 09:30:46 +020042
43#define H1_FLG_CTL 0x01
44#define H1_FLG_SEP 0x02
45#define H1_FLG_LWS 0x04
46#define H1_FLG_SPHT 0x08
47#define H1_FLG_CRLF 0x10
48#define H1_FLG_TOK 0x20
49#define H1_FLG_VER 0x40
50
51#define HTTP_IS_CTL(x) (h1_char_classes[(uint8_t)(x)] & H1_FLG_CTL)
52#define HTTP_IS_SEP(x) (h1_char_classes[(uint8_t)(x)] & H1_FLG_SEP)
53#define HTTP_IS_LWS(x) (h1_char_classes[(uint8_t)(x)] & H1_FLG_LWS)
54#define HTTP_IS_SPHT(x) (h1_char_classes[(uint8_t)(x)] & H1_FLG_SPHT)
55#define HTTP_IS_CRLF(x) (h1_char_classes[(uint8_t)(x)] & H1_FLG_CRLF)
56#define HTTP_IS_TOKEN(x) (h1_char_classes[(uint8_t)(x)] & H1_FLG_TOK)
57#define HTTP_IS_VER_TOKEN(x) (h1_char_classes[(uint8_t)(x)] & H1_FLG_VER)
58
59
60/* Macros used in the HTTP/1 parser, to check for the expected presence of
61 * certain bytes (ef: LF) or to skip to next byte and yield in case of failure.
62 */
63
64
65/* Expects to find an LF at <ptr>. If not, set <state> to <where> and jump to
66 * <bad>.
67 */
68#define EXPECT_LF_HERE(ptr, bad, state, where) \
69 do { \
70 if (unlikely(*(ptr) != '\n')) { \
71 state = (where); \
72 goto bad; \
73 } \
74 } while (0)
75
76/* Increments pointer <ptr>, continues to label <more> if it's still below
77 * pointer <end>, or goes to <stop> and sets <state> to <where> if the end
78 * of buffer was reached.
79 */
80#define EAT_AND_JUMP_OR_RETURN(ptr, end, more, stop, state, where) \
81 do { \
82 if (likely(++(ptr) < (end))) \
83 goto more; \
84 else { \
85 state = (where); \
86 goto stop; \
87 } \
88 } while (0)
89
90/* for debugging, reports the HTTP/1 message state name */
91static inline const char *h1_msg_state_str(enum h1_state msg_state)
92{
93 switch (msg_state) {
94 case HTTP_MSG_RQBEFORE: return "MSG_RQBEFORE";
95 case HTTP_MSG_RQBEFORE_CR: return "MSG_RQBEFORE_CR";
96 case HTTP_MSG_RQMETH: return "MSG_RQMETH";
97 case HTTP_MSG_RQMETH_SP: return "MSG_RQMETH_SP";
98 case HTTP_MSG_RQURI: return "MSG_RQURI";
99 case HTTP_MSG_RQURI_SP: return "MSG_RQURI_SP";
100 case HTTP_MSG_RQVER: return "MSG_RQVER";
101 case HTTP_MSG_RQLINE_END: return "MSG_RQLINE_END";
102 case HTTP_MSG_RPBEFORE: return "MSG_RPBEFORE";
103 case HTTP_MSG_RPBEFORE_CR: return "MSG_RPBEFORE_CR";
104 case HTTP_MSG_RPVER: return "MSG_RPVER";
105 case HTTP_MSG_RPVER_SP: return "MSG_RPVER_SP";
106 case HTTP_MSG_RPCODE: return "MSG_RPCODE";
107 case HTTP_MSG_RPCODE_SP: return "MSG_RPCODE_SP";
108 case HTTP_MSG_RPREASON: return "MSG_RPREASON";
109 case HTTP_MSG_RPLINE_END: return "MSG_RPLINE_END";
110 case HTTP_MSG_HDR_FIRST: return "MSG_HDR_FIRST";
111 case HTTP_MSG_HDR_NAME: return "MSG_HDR_NAME";
112 case HTTP_MSG_HDR_COL: return "MSG_HDR_COL";
113 case HTTP_MSG_HDR_L1_SP: return "MSG_HDR_L1_SP";
114 case HTTP_MSG_HDR_L1_LF: return "MSG_HDR_L1_LF";
115 case HTTP_MSG_HDR_L1_LWS: return "MSG_HDR_L1_LWS";
116 case HTTP_MSG_HDR_VAL: return "MSG_HDR_VAL";
117 case HTTP_MSG_HDR_L2_LF: return "MSG_HDR_L2_LF";
118 case HTTP_MSG_HDR_L2_LWS: return "MSG_HDR_L2_LWS";
119 case HTTP_MSG_LAST_LF: return "MSG_LAST_LF";
120 case HTTP_MSG_ERROR: return "MSG_ERROR";
121 case HTTP_MSG_BODY: return "MSG_BODY";
122 case HTTP_MSG_100_SENT: return "MSG_100_SENT";
123 case HTTP_MSG_CHUNK_SIZE: return "MSG_CHUNK_SIZE";
124 case HTTP_MSG_DATA: return "MSG_DATA";
125 case HTTP_MSG_CHUNK_CRLF: return "MSG_CHUNK_CRLF";
126 case HTTP_MSG_TRAILERS: return "MSG_TRAILERS";
127 case HTTP_MSG_ENDING: return "MSG_ENDING";
128 case HTTP_MSG_DONE: return "MSG_DONE";
129 case HTTP_MSG_CLOSING: return "MSG_CLOSING";
130 case HTTP_MSG_CLOSED: return "MSG_CLOSED";
131 case HTTP_MSG_TUNNEL: return "MSG_TUNNEL";
132 default: return "MSG_??????";
133 }
134}
135
Willy Tarreaudb4893d2017-09-21 08:40:02 +0200136/* This function may be called only in HTTP_MSG_CHUNK_CRLF. It reads the CRLF or
137 * a possible LF alone at the end of a chunk. The caller should adjust msg->next
138 * in order to include this part into the next forwarding phase. Note that the
139 * caller must ensure that ->p points to the first byte to parse. It returns
140 * the number of bytes parsed on success, so the caller can set msg_state to
141 * HTTP_MSG_CHUNK_SIZE. If not enough data are available, the function does not
142 * change anything and returns zero. If a parse error is encountered, the
143 * function returns < 0. Note: this function is designed to parse wrapped CRLF
144 * at the end of the buffer.
145 */
146static inline int http_skip_chunk_crlf(struct http_msg *msg)
147{
148 const struct buffer *buf = msg->chn->buf;
149 const char *ptr;
150 int bytes;
151
152 /* NB: we'll check data availabilty at the end. It's not a
153 * problem because whatever we match first will be checked
154 * against the correct length.
155 */
156 bytes = 1;
157 ptr = b_ptr(buf, msg->next);
158 if (*ptr == '\r') {
159 bytes++;
160 ptr++;
161 if (ptr >= buf->data + buf->size)
162 ptr = buf->data;
163 }
164
165 if (msg->next + bytes > buf->i)
166 return 0;
167
168 if (*ptr != '\n') {
169 msg->err_pos = buffer_count(buf, buf->p, ptr);
170 return -1;
171 }
172 return bytes;
173}
174
Willy Tarreaue56cdd32017-09-21 08:36:33 +0200175/* Parse the chunk size start at buf->p + start and stops before buf->p + stop.
176 * It returns the chunk size in <res> and the amount of bytes read this way :
177 * < 0 : error at this position relative to <stop>
178 * = 0 : not enough bytes to read a complete chunk size
179 * > 0 : number of bytes successfully read that the caller can skip
180 * On success, the caller should adjust its msg->next to point to the first
181 * byte of data after the chunk size, so that we know we can forward exactly
182 * msg->next bytes, and msg->sol to contain the exact number of bytes forming
183 * the chunk size. That way it is always possible to differentiate between the
184 * start of the body and the start of the data. Note: this function is designed
185 * to parse wrapped CRLF at the end of the buffer.
Willy Tarreaudb4893d2017-09-21 08:40:02 +0200186 */
Willy Tarreaue56cdd32017-09-21 08:36:33 +0200187static inline int h1_parse_chunk_size(const struct buffer *buf, int start, int stop, unsigned int *res)
Willy Tarreaudb4893d2017-09-21 08:40:02 +0200188{
Willy Tarreaue56cdd32017-09-21 08:36:33 +0200189 const char *ptr = b_ptr(buf, start);
Willy Tarreaudb4893d2017-09-21 08:40:02 +0200190 const char *ptr_old = ptr;
191 const char *end = buf->data + buf->size;
Willy Tarreaue56cdd32017-09-21 08:36:33 +0200192 const char *ptr_stop = b_ptr(buf, stop);
Willy Tarreaudb4893d2017-09-21 08:40:02 +0200193 unsigned int chunk = 0;
194
195 /* The chunk size is in the following form, though we are only
196 * interested in the size and CRLF :
197 * 1*HEXDIGIT *WSP *[ ';' extensions ] CRLF
198 */
199 while (1) {
200 int c;
Willy Tarreaue56cdd32017-09-21 08:36:33 +0200201 if (ptr == ptr_stop)
Willy Tarreaudb4893d2017-09-21 08:40:02 +0200202 return 0;
203 c = hex2i(*ptr);
204 if (c < 0) /* not a hex digit anymore */
205 break;
206 if (unlikely(++ptr >= end))
207 ptr = buf->data;
208 if (unlikely(chunk & 0xF8000000)) /* integer overflow will occur if result >= 2GB */
209 goto error;
210 chunk = (chunk << 4) + c;
211 }
212
213 /* empty size not allowed */
214 if (unlikely(ptr == ptr_old))
215 goto error;
216
217 while (HTTP_IS_SPHT(*ptr)) {
218 if (++ptr >= end)
219 ptr = buf->data;
Willy Tarreaue56cdd32017-09-21 08:36:33 +0200220 if (unlikely(ptr == ptr_stop))
Willy Tarreaudb4893d2017-09-21 08:40:02 +0200221 return 0;
222 }
223
224 /* Up to there, we know that at least one byte is present at *ptr. Check
225 * for the end of chunk size.
226 */
227 while (1) {
228 if (likely(HTTP_IS_CRLF(*ptr))) {
229 /* we now have a CR or an LF at ptr */
230 if (likely(*ptr == '\r')) {
231 if (++ptr >= end)
232 ptr = buf->data;
Willy Tarreaue56cdd32017-09-21 08:36:33 +0200233 if (ptr == ptr_stop)
Willy Tarreaudb4893d2017-09-21 08:40:02 +0200234 return 0;
235 }
236
237 if (unlikely(*ptr != '\n'))
238 goto error;
239 if (++ptr >= end)
240 ptr = buf->data;
241 /* done */
242 break;
243 }
244 else if (likely(*ptr == ';')) {
245 /* chunk extension, ends at next CRLF */
246 if (++ptr >= end)
247 ptr = buf->data;
Willy Tarreaue56cdd32017-09-21 08:36:33 +0200248 if (ptr == ptr_stop)
Willy Tarreaudb4893d2017-09-21 08:40:02 +0200249 return 0;
250
251 while (!HTTP_IS_CRLF(*ptr)) {
252 if (++ptr >= end)
253 ptr = buf->data;
Willy Tarreaue56cdd32017-09-21 08:36:33 +0200254 if (ptr == ptr_stop)
Willy Tarreaudb4893d2017-09-21 08:40:02 +0200255 return 0;
256 }
257 /* we have a CRLF now, loop above */
258 continue;
259 }
260 else
261 goto error;
262 }
263
264 /* OK we found our CRLF and now <ptr> points to the next byte, which may
Willy Tarreaue56cdd32017-09-21 08:36:33 +0200265 * or may not be present. Let's return the number of bytes parsed.
Willy Tarreaudb4893d2017-09-21 08:40:02 +0200266 */
Willy Tarreaue56cdd32017-09-21 08:36:33 +0200267 *res = chunk;
268 return (ptr - ptr_old) >= 0 ? (ptr - ptr_old) : (ptr - ptr_old + buf->size);
Willy Tarreaudb4893d2017-09-21 08:40:02 +0200269 error:
Willy Tarreaue56cdd32017-09-21 08:36:33 +0200270 *res = 0; // just to stop gcc's -Wuninitialized warning :-(
271 return -buffer_count(buf, ptr, ptr_stop);
Willy Tarreaudb4893d2017-09-21 08:40:02 +0200272}
273
Willy Tarreau0da5b3b2017-09-21 09:30:46 +0200274
275#endif /* _PROTO_H1_H */