Willy Tarreau | 0da5b3b | 2017-09-21 09:30:46 +0200 | [diff] [blame] | 1 | /* |
| 2 | * include/proto/h1.h |
| 3 | * This file contains HTTP/1 protocol definitions. |
| 4 | * |
| 5 | * Copyright (C) 2000-2017 Willy Tarreau - w@1wt.eu |
| 6 | * |
| 7 | * This library is free software; you can redistribute it and/or |
| 8 | * modify it under the terms of the GNU Lesser General Public |
| 9 | * License as published by the Free Software Foundation, version 2.1 |
| 10 | * exclusively. |
| 11 | * |
| 12 | * This library is distributed in the hope that it will be useful, |
| 13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
| 15 | * Lesser General Public License for more details. |
| 16 | * |
| 17 | * You should have received a copy of the GNU Lesser General Public |
| 18 | * License along with this library; if not, write to the Free Software |
| 19 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
| 20 | */ |
| 21 | |
| 22 | #ifndef _PROTO_H1_H |
| 23 | #define _PROTO_H1_H |
| 24 | |
Willy Tarreau | db4893d | 2017-09-21 08:40:02 +0200 | [diff] [blame] | 25 | #include <common/buffer.h> |
Willy Tarreau | 0da5b3b | 2017-09-21 09:30:46 +0200 | [diff] [blame] | 26 | #include <common/compiler.h> |
| 27 | #include <common/config.h> |
Willy Tarreau | 35b51c6 | 2018-09-10 15:38:55 +0200 | [diff] [blame] | 28 | #include <common/http.h> |
Willy Tarreau | 794f9af | 2017-07-26 09:07:47 +0200 | [diff] [blame] | 29 | #include <common/http-hdr.h> |
Willy Tarreau | db4893d | 2017-09-21 08:40:02 +0200 | [diff] [blame] | 30 | #include <common/standard.h> |
Willy Tarreau | 0da5b3b | 2017-09-21 09:30:46 +0200 | [diff] [blame] | 31 | #include <types/h1.h> |
Willy Tarreau | 8740c8b | 2017-09-21 10:22:25 +0200 | [diff] [blame] | 32 | #include <proto/hdr_idx.h> |
Willy Tarreau | 0da5b3b | 2017-09-21 09:30:46 +0200 | [diff] [blame] | 33 | |
Willy Tarreau | 8740c8b | 2017-09-21 10:22:25 +0200 | [diff] [blame] | 34 | const char *http_parse_reqline(struct http_msg *msg, |
| 35 | enum h1_state state, const char *ptr, const char *end, |
| 36 | unsigned int *ret_ptr, enum h1_state *ret_state); |
| 37 | const char *http_parse_stsline(struct http_msg *msg, |
| 38 | enum h1_state state, const char *ptr, const char *end, |
| 39 | unsigned int *ret_ptr, enum h1_state *ret_state); |
| 40 | void http_msg_analyzer(struct http_msg *msg, struct hdr_idx *idx); |
Willy Tarreau | db4893d | 2017-09-21 08:40:02 +0200 | [diff] [blame] | 41 | int http_forward_trailers(struct http_msg *msg); |
Willy Tarreau | 794f9af | 2017-07-26 09:07:47 +0200 | [diff] [blame] | 42 | int h1_headers_to_hdr_list(char *start, const char *stop, |
| 43 | struct http_hdr *hdr, unsigned int hdr_num, |
| 44 | struct h1m *h1m); |
Willy Tarreau | f40e682 | 2018-06-14 16:52:02 +0200 | [diff] [blame] | 45 | int h1_measure_trailers(const struct buffer *buf, unsigned int ofs, unsigned int max); |
Willy Tarreau | 0da5b3b | 2017-09-21 09:30:46 +0200 | [diff] [blame] | 46 | |
Willy Tarreau | 0da5b3b | 2017-09-21 09:30:46 +0200 | [diff] [blame] | 47 | /* Macros used in the HTTP/1 parser, to check for the expected presence of |
| 48 | * certain bytes (ef: LF) or to skip to next byte and yield in case of failure. |
| 49 | */ |
| 50 | |
| 51 | |
| 52 | /* Expects to find an LF at <ptr>. If not, set <state> to <where> and jump to |
| 53 | * <bad>. |
| 54 | */ |
| 55 | #define EXPECT_LF_HERE(ptr, bad, state, where) \ |
| 56 | do { \ |
| 57 | if (unlikely(*(ptr) != '\n')) { \ |
| 58 | state = (where); \ |
| 59 | goto bad; \ |
| 60 | } \ |
| 61 | } while (0) |
| 62 | |
| 63 | /* Increments pointer <ptr>, continues to label <more> if it's still below |
| 64 | * pointer <end>, or goes to <stop> and sets <state> to <where> if the end |
| 65 | * of buffer was reached. |
| 66 | */ |
| 67 | #define EAT_AND_JUMP_OR_RETURN(ptr, end, more, stop, state, where) \ |
| 68 | do { \ |
| 69 | if (likely(++(ptr) < (end))) \ |
| 70 | goto more; \ |
| 71 | else { \ |
| 72 | state = (where); \ |
| 73 | goto stop; \ |
| 74 | } \ |
| 75 | } while (0) |
| 76 | |
| 77 | /* for debugging, reports the HTTP/1 message state name */ |
| 78 | static inline const char *h1_msg_state_str(enum h1_state msg_state) |
| 79 | { |
| 80 | switch (msg_state) { |
| 81 | case HTTP_MSG_RQBEFORE: return "MSG_RQBEFORE"; |
| 82 | case HTTP_MSG_RQBEFORE_CR: return "MSG_RQBEFORE_CR"; |
| 83 | case HTTP_MSG_RQMETH: return "MSG_RQMETH"; |
| 84 | case HTTP_MSG_RQMETH_SP: return "MSG_RQMETH_SP"; |
| 85 | case HTTP_MSG_RQURI: return "MSG_RQURI"; |
| 86 | case HTTP_MSG_RQURI_SP: return "MSG_RQURI_SP"; |
| 87 | case HTTP_MSG_RQVER: return "MSG_RQVER"; |
| 88 | case HTTP_MSG_RQLINE_END: return "MSG_RQLINE_END"; |
| 89 | case HTTP_MSG_RPBEFORE: return "MSG_RPBEFORE"; |
| 90 | case HTTP_MSG_RPBEFORE_CR: return "MSG_RPBEFORE_CR"; |
| 91 | case HTTP_MSG_RPVER: return "MSG_RPVER"; |
| 92 | case HTTP_MSG_RPVER_SP: return "MSG_RPVER_SP"; |
| 93 | case HTTP_MSG_RPCODE: return "MSG_RPCODE"; |
| 94 | case HTTP_MSG_RPCODE_SP: return "MSG_RPCODE_SP"; |
| 95 | case HTTP_MSG_RPREASON: return "MSG_RPREASON"; |
| 96 | case HTTP_MSG_RPLINE_END: return "MSG_RPLINE_END"; |
| 97 | case HTTP_MSG_HDR_FIRST: return "MSG_HDR_FIRST"; |
| 98 | case HTTP_MSG_HDR_NAME: return "MSG_HDR_NAME"; |
| 99 | case HTTP_MSG_HDR_COL: return "MSG_HDR_COL"; |
| 100 | case HTTP_MSG_HDR_L1_SP: return "MSG_HDR_L1_SP"; |
| 101 | case HTTP_MSG_HDR_L1_LF: return "MSG_HDR_L1_LF"; |
| 102 | case HTTP_MSG_HDR_L1_LWS: return "MSG_HDR_L1_LWS"; |
| 103 | case HTTP_MSG_HDR_VAL: return "MSG_HDR_VAL"; |
| 104 | case HTTP_MSG_HDR_L2_LF: return "MSG_HDR_L2_LF"; |
| 105 | case HTTP_MSG_HDR_L2_LWS: return "MSG_HDR_L2_LWS"; |
| 106 | case HTTP_MSG_LAST_LF: return "MSG_LAST_LF"; |
| 107 | case HTTP_MSG_ERROR: return "MSG_ERROR"; |
| 108 | case HTTP_MSG_BODY: return "MSG_BODY"; |
| 109 | case HTTP_MSG_100_SENT: return "MSG_100_SENT"; |
| 110 | case HTTP_MSG_CHUNK_SIZE: return "MSG_CHUNK_SIZE"; |
| 111 | case HTTP_MSG_DATA: return "MSG_DATA"; |
| 112 | case HTTP_MSG_CHUNK_CRLF: return "MSG_CHUNK_CRLF"; |
| 113 | case HTTP_MSG_TRAILERS: return "MSG_TRAILERS"; |
| 114 | case HTTP_MSG_ENDING: return "MSG_ENDING"; |
| 115 | case HTTP_MSG_DONE: return "MSG_DONE"; |
| 116 | case HTTP_MSG_CLOSING: return "MSG_CLOSING"; |
| 117 | case HTTP_MSG_CLOSED: return "MSG_CLOSED"; |
| 118 | case HTTP_MSG_TUNNEL: return "MSG_TUNNEL"; |
| 119 | default: return "MSG_??????"; |
| 120 | } |
| 121 | } |
| 122 | |
Willy Tarreau | db4893d | 2017-09-21 08:40:02 +0200 | [diff] [blame] | 123 | /* This function may be called only in HTTP_MSG_CHUNK_CRLF. It reads the CRLF or |
| 124 | * a possible LF alone at the end of a chunk. The caller should adjust msg->next |
| 125 | * in order to include this part into the next forwarding phase. Note that the |
Willy Tarreau | c0973c6 | 2018-06-14 15:53:21 +0200 | [diff] [blame] | 126 | * caller must ensure that head+start points to the first byte to parse. It |
| 127 | * returns the number of bytes parsed on success, so the caller can set msg_state |
| 128 | * to HTTP_MSG_CHUNK_SIZE. If not enough data are available, the function does not |
Willy Tarreau | b289256 | 2017-09-21 11:33:54 +0200 | [diff] [blame] | 129 | * change anything and returns zero. Otherwise it returns a negative value |
| 130 | * indicating the error positionn relative to <stop>. Note: this function is |
| 131 | * designed to parse wrapped CRLF at the end of the buffer. |
Willy Tarreau | db4893d | 2017-09-21 08:40:02 +0200 | [diff] [blame] | 132 | */ |
Willy Tarreau | b289256 | 2017-09-21 11:33:54 +0200 | [diff] [blame] | 133 | static inline int h1_skip_chunk_crlf(const struct buffer *buf, int start, int stop) |
Willy Tarreau | db4893d | 2017-09-21 08:40:02 +0200 | [diff] [blame] | 134 | { |
Willy Tarreau | c0973c6 | 2018-06-14 15:53:21 +0200 | [diff] [blame] | 135 | const char *ptr = b_peek(buf, start); |
Willy Tarreau | b289256 | 2017-09-21 11:33:54 +0200 | [diff] [blame] | 136 | int bytes = 1; |
Willy Tarreau | db4893d | 2017-09-21 08:40:02 +0200 | [diff] [blame] | 137 | |
| 138 | /* NB: we'll check data availabilty at the end. It's not a |
| 139 | * problem because whatever we match first will be checked |
| 140 | * against the correct length. |
| 141 | */ |
Willy Tarreau | db4893d | 2017-09-21 08:40:02 +0200 | [diff] [blame] | 142 | if (*ptr == '\r') { |
| 143 | bytes++; |
| 144 | ptr++; |
Willy Tarreau | c0973c6 | 2018-06-14 15:53:21 +0200 | [diff] [blame] | 145 | if (ptr >= b_wrap(buf)) |
Willy Tarreau | 591d445 | 2018-06-15 17:21:00 +0200 | [diff] [blame] | 146 | ptr = b_orig(buf); |
Willy Tarreau | db4893d | 2017-09-21 08:40:02 +0200 | [diff] [blame] | 147 | } |
| 148 | |
Willy Tarreau | b289256 | 2017-09-21 11:33:54 +0200 | [diff] [blame] | 149 | if (bytes > stop - start) |
Willy Tarreau | db4893d | 2017-09-21 08:40:02 +0200 | [diff] [blame] | 150 | return 0; |
| 151 | |
Willy Tarreau | c0973c6 | 2018-06-14 15:53:21 +0200 | [diff] [blame] | 152 | if (*ptr != '\n') // negative position to stop |
| 153 | return ptr - __b_peek(buf, stop); |
Willy Tarreau | b289256 | 2017-09-21 11:33:54 +0200 | [diff] [blame] | 154 | |
Willy Tarreau | db4893d | 2017-09-21 08:40:02 +0200 | [diff] [blame] | 155 | return bytes; |
| 156 | } |
| 157 | |
Willy Tarreau | 84d6b7a | 2018-06-14 15:59:05 +0200 | [diff] [blame] | 158 | /* Parse the chunk size start at buf + start and stops before buf + stop. The |
| 159 | * positions are relative to the buffer's head. |
Willy Tarreau | e56cdd3 | 2017-09-21 08:36:33 +0200 | [diff] [blame] | 160 | * It returns the chunk size in <res> and the amount of bytes read this way : |
| 161 | * < 0 : error at this position relative to <stop> |
| 162 | * = 0 : not enough bytes to read a complete chunk size |
| 163 | * > 0 : number of bytes successfully read that the caller can skip |
| 164 | * On success, the caller should adjust its msg->next to point to the first |
| 165 | * byte of data after the chunk size, so that we know we can forward exactly |
| 166 | * msg->next bytes, and msg->sol to contain the exact number of bytes forming |
| 167 | * the chunk size. That way it is always possible to differentiate between the |
| 168 | * start of the body and the start of the data. Note: this function is designed |
| 169 | * to parse wrapped CRLF at the end of the buffer. |
Willy Tarreau | db4893d | 2017-09-21 08:40:02 +0200 | [diff] [blame] | 170 | */ |
Willy Tarreau | e56cdd3 | 2017-09-21 08:36:33 +0200 | [diff] [blame] | 171 | static inline int h1_parse_chunk_size(const struct buffer *buf, int start, int stop, unsigned int *res) |
Willy Tarreau | db4893d | 2017-09-21 08:40:02 +0200 | [diff] [blame] | 172 | { |
Willy Tarreau | 84d6b7a | 2018-06-14 15:59:05 +0200 | [diff] [blame] | 173 | const char *ptr = b_peek(buf, start); |
Willy Tarreau | db4893d | 2017-09-21 08:40:02 +0200 | [diff] [blame] | 174 | const char *ptr_old = ptr; |
Willy Tarreau | 84d6b7a | 2018-06-14 15:59:05 +0200 | [diff] [blame] | 175 | const char *end = b_wrap(buf); |
Willy Tarreau | db4893d | 2017-09-21 08:40:02 +0200 | [diff] [blame] | 176 | unsigned int chunk = 0; |
| 177 | |
Willy Tarreau | b15e3fe | 2017-11-10 11:17:08 +0100 | [diff] [blame] | 178 | stop -= start; // bytes left |
| 179 | start = stop; // bytes to transfer |
| 180 | |
Willy Tarreau | db4893d | 2017-09-21 08:40:02 +0200 | [diff] [blame] | 181 | /* The chunk size is in the following form, though we are only |
| 182 | * interested in the size and CRLF : |
| 183 | * 1*HEXDIGIT *WSP *[ ';' extensions ] CRLF |
| 184 | */ |
| 185 | while (1) { |
| 186 | int c; |
Willy Tarreau | b15e3fe | 2017-11-10 11:17:08 +0100 | [diff] [blame] | 187 | if (!stop) |
Willy Tarreau | db4893d | 2017-09-21 08:40:02 +0200 | [diff] [blame] | 188 | return 0; |
| 189 | c = hex2i(*ptr); |
| 190 | if (c < 0) /* not a hex digit anymore */ |
| 191 | break; |
| 192 | if (unlikely(++ptr >= end)) |
Willy Tarreau | 591d445 | 2018-06-15 17:21:00 +0200 | [diff] [blame] | 193 | ptr = b_orig(buf); |
Willy Tarreau | db4893d | 2017-09-21 08:40:02 +0200 | [diff] [blame] | 194 | if (unlikely(chunk & 0xF8000000)) /* integer overflow will occur if result >= 2GB */ |
| 195 | goto error; |
| 196 | chunk = (chunk << 4) + c; |
Willy Tarreau | b15e3fe | 2017-11-10 11:17:08 +0100 | [diff] [blame] | 197 | stop--; |
Willy Tarreau | db4893d | 2017-09-21 08:40:02 +0200 | [diff] [blame] | 198 | } |
| 199 | |
| 200 | /* empty size not allowed */ |
| 201 | if (unlikely(ptr == ptr_old)) |
| 202 | goto error; |
| 203 | |
| 204 | while (HTTP_IS_SPHT(*ptr)) { |
| 205 | if (++ptr >= end) |
Willy Tarreau | 591d445 | 2018-06-15 17:21:00 +0200 | [diff] [blame] | 206 | ptr = b_orig(buf); |
Willy Tarreau | b15e3fe | 2017-11-10 11:17:08 +0100 | [diff] [blame] | 207 | if (--stop == 0) |
Willy Tarreau | db4893d | 2017-09-21 08:40:02 +0200 | [diff] [blame] | 208 | return 0; |
| 209 | } |
| 210 | |
| 211 | /* Up to there, we know that at least one byte is present at *ptr. Check |
| 212 | * for the end of chunk size. |
| 213 | */ |
| 214 | while (1) { |
| 215 | if (likely(HTTP_IS_CRLF(*ptr))) { |
| 216 | /* we now have a CR or an LF at ptr */ |
| 217 | if (likely(*ptr == '\r')) { |
| 218 | if (++ptr >= end) |
Willy Tarreau | 591d445 | 2018-06-15 17:21:00 +0200 | [diff] [blame] | 219 | ptr = b_orig(buf); |
Willy Tarreau | b15e3fe | 2017-11-10 11:17:08 +0100 | [diff] [blame] | 220 | if (--stop == 0) |
Willy Tarreau | db4893d | 2017-09-21 08:40:02 +0200 | [diff] [blame] | 221 | return 0; |
| 222 | } |
| 223 | |
Willy Tarreau | b15e3fe | 2017-11-10 11:17:08 +0100 | [diff] [blame] | 224 | if (*ptr != '\n') |
Willy Tarreau | db4893d | 2017-09-21 08:40:02 +0200 | [diff] [blame] | 225 | goto error; |
| 226 | if (++ptr >= end) |
Willy Tarreau | 591d445 | 2018-06-15 17:21:00 +0200 | [diff] [blame] | 227 | ptr = b_orig(buf); |
Willy Tarreau | b15e3fe | 2017-11-10 11:17:08 +0100 | [diff] [blame] | 228 | --stop; |
Willy Tarreau | db4893d | 2017-09-21 08:40:02 +0200 | [diff] [blame] | 229 | /* done */ |
| 230 | break; |
| 231 | } |
| 232 | else if (likely(*ptr == ';')) { |
| 233 | /* chunk extension, ends at next CRLF */ |
| 234 | if (++ptr >= end) |
Willy Tarreau | 591d445 | 2018-06-15 17:21:00 +0200 | [diff] [blame] | 235 | ptr = b_orig(buf); |
Willy Tarreau | b15e3fe | 2017-11-10 11:17:08 +0100 | [diff] [blame] | 236 | if (--stop == 0) |
Willy Tarreau | db4893d | 2017-09-21 08:40:02 +0200 | [diff] [blame] | 237 | return 0; |
| 238 | |
| 239 | while (!HTTP_IS_CRLF(*ptr)) { |
| 240 | if (++ptr >= end) |
Willy Tarreau | 591d445 | 2018-06-15 17:21:00 +0200 | [diff] [blame] | 241 | ptr = b_orig(buf); |
Willy Tarreau | b15e3fe | 2017-11-10 11:17:08 +0100 | [diff] [blame] | 242 | if (--stop == 0) |
Willy Tarreau | db4893d | 2017-09-21 08:40:02 +0200 | [diff] [blame] | 243 | return 0; |
| 244 | } |
| 245 | /* we have a CRLF now, loop above */ |
| 246 | continue; |
| 247 | } |
| 248 | else |
| 249 | goto error; |
| 250 | } |
| 251 | |
| 252 | /* OK we found our CRLF and now <ptr> points to the next byte, which may |
Willy Tarreau | e56cdd3 | 2017-09-21 08:36:33 +0200 | [diff] [blame] | 253 | * or may not be present. Let's return the number of bytes parsed. |
Willy Tarreau | db4893d | 2017-09-21 08:40:02 +0200 | [diff] [blame] | 254 | */ |
Willy Tarreau | e56cdd3 | 2017-09-21 08:36:33 +0200 | [diff] [blame] | 255 | *res = chunk; |
Willy Tarreau | b15e3fe | 2017-11-10 11:17:08 +0100 | [diff] [blame] | 256 | return start - stop; |
Willy Tarreau | db4893d | 2017-09-21 08:40:02 +0200 | [diff] [blame] | 257 | error: |
Willy Tarreau | e56cdd3 | 2017-09-21 08:36:33 +0200 | [diff] [blame] | 258 | *res = 0; // just to stop gcc's -Wuninitialized warning :-( |
Willy Tarreau | b15e3fe | 2017-11-10 11:17:08 +0100 | [diff] [blame] | 259 | return -stop; |
Willy Tarreau | db4893d | 2017-09-21 08:40:02 +0200 | [diff] [blame] | 260 | } |
| 261 | |
Willy Tarreau | 4093a4d | 2017-09-21 11:46:43 +0200 | [diff] [blame] | 262 | /* initializes an H1 message */ |
| 263 | static inline struct h1m *h1m_init(struct h1m *h1m) |
| 264 | { |
| 265 | h1m->state = HTTP_MSG_RQBEFORE; |
Willy Tarreau | d22e83a | 2017-10-31 08:02:24 +0100 | [diff] [blame] | 266 | h1m->status = 0; |
Willy Tarreau | 4093a4d | 2017-09-21 11:46:43 +0200 | [diff] [blame] | 267 | h1m->flags = 0; |
| 268 | h1m->curr_len = 0; |
| 269 | h1m->body_len = 0; |
| 270 | h1m->err_pos = 0; |
| 271 | h1m->err_state = 0; |
| 272 | return h1m; |
| 273 | } |
Willy Tarreau | 0da5b3b | 2017-09-21 09:30:46 +0200 | [diff] [blame] | 274 | |
| 275 | #endif /* _PROTO_H1_H */ |