Willy Tarreau | afba57a | 2018-12-11 13:44:24 +0100 | [diff] [blame] | 1 | /* |
| 2 | * include/common/h1.h |
| 3 | * This file contains HTTP/1 protocol definitions. |
| 4 | * |
| 5 | * Copyright (C) 2000-2017 Willy Tarreau - w@1wt.eu |
| 6 | * |
| 7 | * This library is free software; you can redistribute it and/or |
| 8 | * modify it under the terms of the GNU Lesser General Public |
| 9 | * License as published by the Free Software Foundation, version 2.1 |
| 10 | * exclusively. |
| 11 | * |
| 12 | * This library is distributed in the hope that it will be useful, |
| 13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
| 15 | * Lesser General Public License for more details. |
| 16 | * |
| 17 | * You should have received a copy of the GNU Lesser General Public |
| 18 | * License along with this library; if not, write to the Free Software |
| 19 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
| 20 | */ |
| 21 | |
| 22 | #ifndef _COMMON_H1_H |
| 23 | #define _COMMON_H1_H |
| 24 | |
Willy Tarreau | 4c7e4b7 | 2020-05-27 12:58:42 +0200 | [diff] [blame] | 25 | #include <haproxy/api.h> |
Willy Tarreau | afba57a | 2018-12-11 13:44:24 +0100 | [diff] [blame] | 26 | #include <common/buffer.h> |
Willy Tarreau | afba57a | 2018-12-11 13:44:24 +0100 | [diff] [blame] | 27 | #include <common/http.h> |
| 28 | #include <common/http-hdr.h> |
Willy Tarreau | eb6f701 | 2020-05-27 16:21:26 +0200 | [diff] [blame] | 29 | #include <import/ist.h> |
Willy Tarreau | afba57a | 2018-12-11 13:44:24 +0100 | [diff] [blame] | 30 | #include <common/standard.h> |
| 31 | |
| 32 | |
| 33 | /* Possible states while parsing HTTP/1 messages (request|response) */ |
| 34 | enum h1m_state { |
| 35 | H1_MSG_RQBEFORE = 0, // request: leading LF, before start line |
| 36 | H1_MSG_RQBEFORE_CR = 1, // request: leading CRLF, before start line |
| 37 | /* these ones define a request start line */ |
| 38 | H1_MSG_RQMETH = 2, // parsing the Method |
| 39 | H1_MSG_RQMETH_SP = 3, // space(s) after the Method |
| 40 | H1_MSG_RQURI = 4, // parsing the Request URI |
| 41 | H1_MSG_RQURI_SP = 5, // space(s) after the Request URI |
| 42 | H1_MSG_RQVER = 6, // parsing the Request Version |
| 43 | H1_MSG_RQLINE_END = 7, // end of request line (CR or LF) |
| 44 | |
| 45 | H1_MSG_RPBEFORE = 8, // response: leading LF, before start line |
| 46 | H1_MSG_RPBEFORE_CR = 9, // response: leading CRLF, before start line |
| 47 | |
| 48 | /* these ones define a response start line */ |
| 49 | H1_MSG_RPVER = 10, // parsing the Response Version |
| 50 | H1_MSG_RPVER_SP = 11, // space(s) after the Response Version |
| 51 | H1_MSG_RPCODE = 12, // response code |
| 52 | H1_MSG_RPCODE_SP = 13, // space(s) after the response code |
| 53 | H1_MSG_RPREASON = 14, // response reason |
| 54 | H1_MSG_RPLINE_END = 15, // end of response line (CR or LF) |
| 55 | |
| 56 | /* common header processing */ |
| 57 | H1_MSG_HDR_FIRST = 16, // waiting for first header or last CRLF (no LWS possible) |
| 58 | H1_MSG_HDR_NAME = 17, // parsing header name |
| 59 | H1_MSG_HDR_COL = 18, // parsing header colon |
| 60 | H1_MSG_HDR_L1_SP = 19, // parsing header LWS (SP|HT) before value |
| 61 | H1_MSG_HDR_L1_LF = 20, // parsing header LWS (LF) before value |
| 62 | H1_MSG_HDR_L1_LWS = 21, // checking whether it's a new header or an LWS |
| 63 | H1_MSG_HDR_VAL = 22, // parsing header value |
| 64 | H1_MSG_HDR_L2_LF = 23, // parsing header LWS (LF) inside/after value |
| 65 | H1_MSG_HDR_L2_LWS = 24, // checking whether it's a new header or an LWS |
| 66 | |
| 67 | H1_MSG_LAST_LF = 25, // parsing last LF, last state for headers |
| 68 | |
| 69 | /* Body processing. */ |
| 70 | |
| 71 | H1_MSG_CHUNK_SIZE = 26, // parsing the chunk size (RFC7230 #4.1) |
| 72 | H1_MSG_DATA = 27, // skipping data chunk / content-length data |
| 73 | H1_MSG_CHUNK_CRLF = 28, // skipping CRLF after data chunk |
| 74 | H1_MSG_TRAILERS = 29, // trailers (post-data entity headers) |
| 75 | /* we enter this state when we've received the end of the current message */ |
| 76 | H1_MSG_DONE = 30, // message end received, waiting for resync or close |
| 77 | H1_MSG_TUNNEL = 31, // tunneled data after DONE |
| 78 | } __attribute__((packed)); |
| 79 | |
| 80 | |
| 81 | /* HTTP/1 message flags (32 bit), for use in h1m->flags only */ |
| 82 | #define H1_MF_NONE 0x00000000 |
| 83 | #define H1_MF_CLEN 0x00000001 // content-length present |
| 84 | #define H1_MF_CHNK 0x00000002 // chunk present, exclusive with c-l |
| 85 | #define H1_MF_RESP 0x00000004 // this message is the response message |
| 86 | #define H1_MF_TOLOWER 0x00000008 // turn the header names to lower case |
| 87 | #define H1_MF_VER_11 0x00000010 // message indicates version 1.1 or above |
| 88 | #define H1_MF_CONN_CLO 0x00000020 // message contains "connection: close" |
| 89 | #define H1_MF_CONN_KAL 0x00000040 // message contains "connection: keep-alive" |
| 90 | #define H1_MF_CONN_UPG 0x00000080 // message contains "connection: upgrade" |
| 91 | #define H1_MF_XFER_LEN 0x00000100 // message xfer size can be determined |
| 92 | #define H1_MF_XFER_ENC 0x00000200 // transfer-encoding is present |
| 93 | #define H1_MF_NO_PHDR 0x00000400 // don't add pseudo-headers in the header list |
Willy Tarreau | 0f8fb6b | 2019-01-04 10:48:03 +0100 | [diff] [blame] | 94 | #define H1_MF_HDRS_ONLY 0x00000800 // parse headers only |
Christopher Faulet | a51ebb7 | 2019-03-29 15:03:13 +0100 | [diff] [blame] | 95 | #define H1_MF_CLEAN_CONN_HDR 0x00001000 // skip close/keep-alive values of connection headers during parsing |
Christopher Faulet | 4f0f88a | 2019-08-10 11:17:44 +0200 | [diff] [blame] | 96 | #define H1_MF_METH_CONNECT 0x00002000 // Set for a response to a CONNECT request |
| 97 | #define H1_MF_METH_HEAD 0x00004000 // Set for a response to a HEAD request |
Willy Tarreau | afba57a | 2018-12-11 13:44:24 +0100 | [diff] [blame] | 98 | |
| 99 | /* Note: for a connection to be persistent, we need this for the request : |
| 100 | * - one of CLEN or CHNK |
| 101 | * - version 1.0 and KAL and not CLO |
| 102 | * - or version 1.1 and not CLO |
| 103 | * For the response it's the same except that UPG must not appear either. |
| 104 | * So in short, for a request it's (CLEN|CHNK) > 0 && !CLO && (VER_11 || KAL) |
| 105 | * and for a response it's (CLEN|CHNK) > 0 && !(CLO|UPG) && (VER_11 || KAL) |
| 106 | */ |
| 107 | |
| 108 | |
| 109 | /* basic HTTP/1 message state for use in parsers. The err_pos field is special, |
| 110 | * it is pre-set to a negative value (-1 or -2), and once non-negative it contains |
| 111 | * the relative position in the message of the first parse error. -2 is used to tell |
| 112 | * the parser that we want to block the invalid message. -1 is used to only perform |
| 113 | * a silent capture. |
| 114 | */ |
| 115 | struct h1m { |
| 116 | enum h1m_state state; // H1 message state (H1_MSG_*) |
| 117 | /* 24 bits available here */ |
| 118 | uint32_t flags; // H1 message flags (H1_MF_*) |
| 119 | uint64_t curr_len; // content-length or last chunk length |
| 120 | uint64_t body_len; // total known size of the body length |
| 121 | uint32_t next; // next byte to parse, relative to buffer's head |
| 122 | int err_pos; // position in the byte stream of the first error (H1 or H2) |
| 123 | int err_state; // state where the first error was met (H1 or H2) |
| 124 | }; |
| 125 | |
| 126 | /* basic H1 start line, describes either the request and the response */ |
| 127 | union h1_sl { /* useful start line pointers, relative to ->sol */ |
| 128 | struct { |
| 129 | struct ist m; /* METHOD */ |
| 130 | struct ist u; /* URI */ |
| 131 | struct ist v; /* VERSION */ |
| 132 | enum http_meth_t meth; /* method */ |
| 133 | } rq; /* request line : field, length */ |
| 134 | struct { |
| 135 | struct ist v; /* VERSION */ |
| 136 | struct ist c; /* CODE */ |
| 137 | struct ist r; /* REASON */ |
| 138 | uint16_t status; /* status code */ |
| 139 | } st; /* status line : field, length */ |
| 140 | }; |
| 141 | |
| 142 | int h1_headers_to_hdr_list(char *start, const char *stop, |
| 143 | struct http_hdr *hdr, unsigned int hdr_num, |
| 144 | struct h1m *h1m, union h1_sl *slp); |
| 145 | int h1_measure_trailers(const struct buffer *buf, unsigned int ofs, unsigned int max); |
| 146 | |
| 147 | int h1_parse_cont_len_header(struct h1m *h1m, struct ist *value); |
| 148 | void h1_parse_xfer_enc_header(struct h1m *h1m, struct ist value); |
Christopher Faulet | a51ebb7 | 2019-03-29 15:03:13 +0100 | [diff] [blame] | 149 | void h1_parse_connection_header(struct h1m *h1m, struct ist *value); |
Willy Tarreau | afba57a | 2018-12-11 13:44:24 +0100 | [diff] [blame] | 150 | |
| 151 | /* for debugging, reports the HTTP/1 message state name */ |
| 152 | static inline const char *h1m_state_str(enum h1m_state msg_state) |
| 153 | { |
| 154 | switch (msg_state) { |
| 155 | case H1_MSG_RQBEFORE: return "MSG_RQBEFORE"; |
| 156 | case H1_MSG_RQBEFORE_CR: return "MSG_RQBEFORE_CR"; |
| 157 | case H1_MSG_RQMETH: return "MSG_RQMETH"; |
| 158 | case H1_MSG_RQMETH_SP: return "MSG_RQMETH_SP"; |
| 159 | case H1_MSG_RQURI: return "MSG_RQURI"; |
| 160 | case H1_MSG_RQURI_SP: return "MSG_RQURI_SP"; |
| 161 | case H1_MSG_RQVER: return "MSG_RQVER"; |
| 162 | case H1_MSG_RQLINE_END: return "MSG_RQLINE_END"; |
| 163 | case H1_MSG_RPBEFORE: return "MSG_RPBEFORE"; |
| 164 | case H1_MSG_RPBEFORE_CR: return "MSG_RPBEFORE_CR"; |
| 165 | case H1_MSG_RPVER: return "MSG_RPVER"; |
| 166 | case H1_MSG_RPVER_SP: return "MSG_RPVER_SP"; |
| 167 | case H1_MSG_RPCODE: return "MSG_RPCODE"; |
| 168 | case H1_MSG_RPCODE_SP: return "MSG_RPCODE_SP"; |
| 169 | case H1_MSG_RPREASON: return "MSG_RPREASON"; |
| 170 | case H1_MSG_RPLINE_END: return "MSG_RPLINE_END"; |
| 171 | case H1_MSG_HDR_FIRST: return "MSG_HDR_FIRST"; |
| 172 | case H1_MSG_HDR_NAME: return "MSG_HDR_NAME"; |
| 173 | case H1_MSG_HDR_COL: return "MSG_HDR_COL"; |
| 174 | case H1_MSG_HDR_L1_SP: return "MSG_HDR_L1_SP"; |
| 175 | case H1_MSG_HDR_L1_LF: return "MSG_HDR_L1_LF"; |
| 176 | case H1_MSG_HDR_L1_LWS: return "MSG_HDR_L1_LWS"; |
| 177 | case H1_MSG_HDR_VAL: return "MSG_HDR_VAL"; |
| 178 | case H1_MSG_HDR_L2_LF: return "MSG_HDR_L2_LF"; |
| 179 | case H1_MSG_HDR_L2_LWS: return "MSG_HDR_L2_LWS"; |
| 180 | case H1_MSG_LAST_LF: return "MSG_LAST_LF"; |
| 181 | case H1_MSG_CHUNK_SIZE: return "MSG_CHUNK_SIZE"; |
| 182 | case H1_MSG_DATA: return "MSG_DATA"; |
| 183 | case H1_MSG_CHUNK_CRLF: return "MSG_CHUNK_CRLF"; |
| 184 | case H1_MSG_TRAILERS: return "MSG_TRAILERS"; |
| 185 | case H1_MSG_DONE: return "MSG_DONE"; |
| 186 | case H1_MSG_TUNNEL: return "MSG_TUNNEL"; |
| 187 | default: return "MSG_??????"; |
| 188 | } |
| 189 | } |
| 190 | |
| 191 | /* This function may be called only in HTTP_MSG_CHUNK_CRLF. It reads the CRLF or |
| 192 | * a possible LF alone at the end of a chunk. The caller should adjust msg->next |
| 193 | * in order to include this part into the next forwarding phase. Note that the |
| 194 | * caller must ensure that head+start points to the first byte to parse. It |
| 195 | * returns the number of bytes parsed on success, so the caller can set msg_state |
| 196 | * to HTTP_MSG_CHUNK_SIZE. If not enough data are available, the function does not |
| 197 | * change anything and returns zero. Otherwise it returns a negative value |
Ilya Shipitsin | 77e3b4a | 2020-03-10 12:06:11 +0500 | [diff] [blame] | 198 | * indicating the error position relative to <stop>. Note: this function is |
Willy Tarreau | afba57a | 2018-12-11 13:44:24 +0100 | [diff] [blame] | 199 | * designed to parse wrapped CRLF at the end of the buffer. |
| 200 | */ |
| 201 | static inline int h1_skip_chunk_crlf(const struct buffer *buf, int start, int stop) |
| 202 | { |
| 203 | const char *ptr = b_peek(buf, start); |
| 204 | int bytes = 1; |
| 205 | |
Christopher Faulet | 22c57be | 2019-04-19 14:12:27 +0200 | [diff] [blame] | 206 | if (stop <= start) |
| 207 | return 0; |
| 208 | |
Willy Tarreau | afba57a | 2018-12-11 13:44:24 +0100 | [diff] [blame] | 209 | /* NB: we'll check data availability at the end. It's not a |
| 210 | * problem because whatever we match first will be checked |
| 211 | * against the correct length. |
| 212 | */ |
| 213 | if (*ptr == '\r') { |
| 214 | bytes++; |
| 215 | ptr++; |
| 216 | if (ptr >= b_wrap(buf)) |
| 217 | ptr = b_orig(buf); |
| 218 | } |
| 219 | |
| 220 | if (bytes > stop - start) |
| 221 | return 0; |
| 222 | |
| 223 | if (*ptr != '\n') // negative position to stop |
| 224 | return ptr - __b_peek(buf, stop); |
| 225 | |
| 226 | return bytes; |
| 227 | } |
| 228 | |
| 229 | /* Parse the chunk size start at buf + start and stops before buf + stop. The |
| 230 | * positions are relative to the buffer's head. |
| 231 | * It returns the chunk size in <res> and the amount of bytes read this way : |
| 232 | * < 0 : error at this position relative to <stop> |
| 233 | * = 0 : not enough bytes to read a complete chunk size |
| 234 | * > 0 : number of bytes successfully read that the caller can skip |
| 235 | * On success, the caller should adjust its msg->next to point to the first |
| 236 | * byte of data after the chunk size, so that we know we can forward exactly |
| 237 | * msg->next bytes, and msg->sol to contain the exact number of bytes forming |
| 238 | * the chunk size. That way it is always possible to differentiate between the |
| 239 | * start of the body and the start of the data. Note: this function is designed |
| 240 | * to parse wrapped CRLF at the end of the buffer. |
| 241 | */ |
| 242 | static inline int h1_parse_chunk_size(const struct buffer *buf, int start, int stop, unsigned int *res) |
| 243 | { |
| 244 | const char *ptr = b_peek(buf, start); |
| 245 | const char *ptr_old = ptr; |
| 246 | const char *end = b_wrap(buf); |
| 247 | unsigned int chunk = 0; |
| 248 | |
| 249 | stop -= start; // bytes left |
| 250 | start = stop; // bytes to transfer |
| 251 | |
| 252 | /* The chunk size is in the following form, though we are only |
| 253 | * interested in the size and CRLF : |
| 254 | * 1*HEXDIGIT *WSP *[ ';' extensions ] CRLF |
| 255 | */ |
| 256 | while (1) { |
| 257 | int c; |
| 258 | if (!stop) |
| 259 | return 0; |
| 260 | c = hex2i(*ptr); |
| 261 | if (c < 0) /* not a hex digit anymore */ |
| 262 | break; |
| 263 | if (unlikely(++ptr >= end)) |
| 264 | ptr = b_orig(buf); |
| 265 | if (unlikely(chunk & 0xF8000000)) /* integer overflow will occur if result >= 2GB */ |
| 266 | goto error; |
| 267 | chunk = (chunk << 4) + c; |
| 268 | stop--; |
| 269 | } |
| 270 | |
| 271 | /* empty size not allowed */ |
| 272 | if (unlikely(ptr == ptr_old)) |
| 273 | goto error; |
| 274 | |
| 275 | while (HTTP_IS_SPHT(*ptr)) { |
| 276 | if (++ptr >= end) |
| 277 | ptr = b_orig(buf); |
| 278 | if (--stop == 0) |
| 279 | return 0; |
| 280 | } |
| 281 | |
| 282 | /* Up to there, we know that at least one byte is present at *ptr. Check |
| 283 | * for the end of chunk size. |
| 284 | */ |
| 285 | while (1) { |
| 286 | if (likely(HTTP_IS_CRLF(*ptr))) { |
| 287 | /* we now have a CR or an LF at ptr */ |
| 288 | if (likely(*ptr == '\r')) { |
| 289 | if (++ptr >= end) |
| 290 | ptr = b_orig(buf); |
| 291 | if (--stop == 0) |
| 292 | return 0; |
| 293 | } |
| 294 | |
| 295 | if (*ptr != '\n') |
| 296 | goto error; |
| 297 | if (++ptr >= end) |
| 298 | ptr = b_orig(buf); |
| 299 | --stop; |
| 300 | /* done */ |
| 301 | break; |
| 302 | } |
| 303 | else if (likely(*ptr == ';')) { |
| 304 | /* chunk extension, ends at next CRLF */ |
| 305 | if (++ptr >= end) |
| 306 | ptr = b_orig(buf); |
| 307 | if (--stop == 0) |
| 308 | return 0; |
| 309 | |
| 310 | while (!HTTP_IS_CRLF(*ptr)) { |
| 311 | if (++ptr >= end) |
| 312 | ptr = b_orig(buf); |
| 313 | if (--stop == 0) |
| 314 | return 0; |
| 315 | } |
| 316 | /* we have a CRLF now, loop above */ |
| 317 | continue; |
| 318 | } |
| 319 | else |
| 320 | goto error; |
| 321 | } |
| 322 | |
| 323 | /* OK we found our CRLF and now <ptr> points to the next byte, which may |
| 324 | * or may not be present. Let's return the number of bytes parsed. |
| 325 | */ |
| 326 | *res = chunk; |
| 327 | return start - stop; |
| 328 | error: |
| 329 | *res = 0; // just to stop gcc's -Wuninitialized warning :-( |
| 330 | return -stop; |
| 331 | } |
| 332 | |
| 333 | /* initializes an H1 message for a request */ |
| 334 | static inline struct h1m *h1m_init_req(struct h1m *h1m) |
| 335 | { |
| 336 | h1m->state = H1_MSG_RQBEFORE; |
| 337 | h1m->next = 0; |
| 338 | h1m->flags = H1_MF_NONE; |
| 339 | h1m->curr_len = 0; |
| 340 | h1m->body_len = 0; |
| 341 | h1m->err_pos = -2; |
| 342 | h1m->err_state = 0; |
| 343 | return h1m; |
| 344 | } |
| 345 | |
| 346 | /* initializes an H1 message for a response */ |
| 347 | static inline struct h1m *h1m_init_res(struct h1m *h1m) |
| 348 | { |
| 349 | h1m->state = H1_MSG_RPBEFORE; |
| 350 | h1m->next = 0; |
| 351 | h1m->flags = H1_MF_RESP; |
| 352 | h1m->curr_len = 0; |
| 353 | h1m->body_len = 0; |
| 354 | h1m->err_pos = -2; |
| 355 | h1m->err_state = 0; |
| 356 | return h1m; |
| 357 | } |
| 358 | |
| 359 | #endif /* _COMMON_H1_H */ |