Willy Tarreau | 0da5b3b | 2017-09-21 09:30:46 +0200 | [diff] [blame] | 1 | /* |
| 2 | * HTTP/1 protocol analyzer |
| 3 | * |
| 4 | * Copyright 2000-2017 Willy Tarreau <w@1wt.eu> |
| 5 | * |
| 6 | * This program is free software; you can redistribute it and/or |
| 7 | * modify it under the terms of the GNU General Public License |
| 8 | * as published by the Free Software Foundation; either version |
| 9 | * 2 of the License, or (at your option) any later version. |
| 10 | * |
| 11 | */ |
| 12 | |
| 13 | #include <common/config.h> |
| 14 | |
| 15 | #include <proto/h1.h> |
| 16 | |
| 17 | /* It is about twice as fast on recent architectures to lookup a byte in a |
| 18 | * table than to perform a boolean AND or OR between two tests. Refer to |
| 19 | * RFC2616/RFC5234/RFC7230 for those chars. A token is any ASCII char that is |
| 20 | * neither a separator nor a CTL char. An http ver_token is any ASCII which can |
| 21 | * be found in an HTTP version, which includes 'H', 'T', 'P', '/', '.' and any |
| 22 | * digit. Note: please do not overwrite values in assignment since gcc-2.95 |
| 23 | * will not handle them correctly. It's worth noting that chars 128..255 are |
| 24 | * nothing, not even control chars. |
| 25 | */ |
| 26 | const unsigned char h1_char_classes[256] = { |
| 27 | [ 0] = H1_FLG_CTL, |
| 28 | [ 1] = H1_FLG_CTL, |
| 29 | [ 2] = H1_FLG_CTL, |
| 30 | [ 3] = H1_FLG_CTL, |
| 31 | [ 4] = H1_FLG_CTL, |
| 32 | [ 5] = H1_FLG_CTL, |
| 33 | [ 6] = H1_FLG_CTL, |
| 34 | [ 7] = H1_FLG_CTL, |
| 35 | [ 8] = H1_FLG_CTL, |
| 36 | [ 9] = H1_FLG_SPHT | H1_FLG_LWS | H1_FLG_SEP | H1_FLG_CTL, |
| 37 | [ 10] = H1_FLG_CRLF | H1_FLG_LWS | H1_FLG_CTL, |
| 38 | [ 11] = H1_FLG_CTL, |
| 39 | [ 12] = H1_FLG_CTL, |
| 40 | [ 13] = H1_FLG_CRLF | H1_FLG_LWS | H1_FLG_CTL, |
| 41 | [ 14] = H1_FLG_CTL, |
| 42 | [ 15] = H1_FLG_CTL, |
| 43 | [ 16] = H1_FLG_CTL, |
| 44 | [ 17] = H1_FLG_CTL, |
| 45 | [ 18] = H1_FLG_CTL, |
| 46 | [ 19] = H1_FLG_CTL, |
| 47 | [ 20] = H1_FLG_CTL, |
| 48 | [ 21] = H1_FLG_CTL, |
| 49 | [ 22] = H1_FLG_CTL, |
| 50 | [ 23] = H1_FLG_CTL, |
| 51 | [ 24] = H1_FLG_CTL, |
| 52 | [ 25] = H1_FLG_CTL, |
| 53 | [ 26] = H1_FLG_CTL, |
| 54 | [ 27] = H1_FLG_CTL, |
| 55 | [ 28] = H1_FLG_CTL, |
| 56 | [ 29] = H1_FLG_CTL, |
| 57 | [ 30] = H1_FLG_CTL, |
| 58 | [ 31] = H1_FLG_CTL, |
| 59 | [' '] = H1_FLG_SPHT | H1_FLG_LWS | H1_FLG_SEP, |
| 60 | ['!'] = H1_FLG_TOK, |
| 61 | ['"'] = H1_FLG_SEP, |
| 62 | ['#'] = H1_FLG_TOK, |
| 63 | ['$'] = H1_FLG_TOK, |
| 64 | ['%'] = H1_FLG_TOK, |
| 65 | ['&'] = H1_FLG_TOK, |
| 66 | [ 39] = H1_FLG_TOK, |
| 67 | ['('] = H1_FLG_SEP, |
| 68 | [')'] = H1_FLG_SEP, |
| 69 | ['*'] = H1_FLG_TOK, |
| 70 | ['+'] = H1_FLG_TOK, |
| 71 | [','] = H1_FLG_SEP, |
| 72 | ['-'] = H1_FLG_TOK, |
| 73 | ['.'] = H1_FLG_TOK | H1_FLG_VER, |
| 74 | ['/'] = H1_FLG_SEP | H1_FLG_VER, |
| 75 | ['0'] = H1_FLG_TOK | H1_FLG_VER, |
| 76 | ['1'] = H1_FLG_TOK | H1_FLG_VER, |
| 77 | ['2'] = H1_FLG_TOK | H1_FLG_VER, |
| 78 | ['3'] = H1_FLG_TOK | H1_FLG_VER, |
| 79 | ['4'] = H1_FLG_TOK | H1_FLG_VER, |
| 80 | ['5'] = H1_FLG_TOK | H1_FLG_VER, |
| 81 | ['6'] = H1_FLG_TOK | H1_FLG_VER, |
| 82 | ['7'] = H1_FLG_TOK | H1_FLG_VER, |
| 83 | ['8'] = H1_FLG_TOK | H1_FLG_VER, |
| 84 | ['9'] = H1_FLG_TOK | H1_FLG_VER, |
| 85 | [':'] = H1_FLG_SEP, |
| 86 | [';'] = H1_FLG_SEP, |
| 87 | ['<'] = H1_FLG_SEP, |
| 88 | ['='] = H1_FLG_SEP, |
| 89 | ['>'] = H1_FLG_SEP, |
| 90 | ['?'] = H1_FLG_SEP, |
| 91 | ['@'] = H1_FLG_SEP, |
| 92 | ['A'] = H1_FLG_TOK, |
| 93 | ['B'] = H1_FLG_TOK, |
| 94 | ['C'] = H1_FLG_TOK, |
| 95 | ['D'] = H1_FLG_TOK, |
| 96 | ['E'] = H1_FLG_TOK, |
| 97 | ['F'] = H1_FLG_TOK, |
| 98 | ['G'] = H1_FLG_TOK, |
| 99 | ['H'] = H1_FLG_TOK | H1_FLG_VER, |
| 100 | ['I'] = H1_FLG_TOK, |
| 101 | ['J'] = H1_FLG_TOK, |
| 102 | ['K'] = H1_FLG_TOK, |
| 103 | ['L'] = H1_FLG_TOK, |
| 104 | ['M'] = H1_FLG_TOK, |
| 105 | ['N'] = H1_FLG_TOK, |
| 106 | ['O'] = H1_FLG_TOK, |
| 107 | ['P'] = H1_FLG_TOK | H1_FLG_VER, |
| 108 | ['Q'] = H1_FLG_TOK, |
| 109 | ['R'] = H1_FLG_TOK | H1_FLG_VER, |
| 110 | ['S'] = H1_FLG_TOK | H1_FLG_VER, |
| 111 | ['T'] = H1_FLG_TOK | H1_FLG_VER, |
| 112 | ['U'] = H1_FLG_TOK, |
| 113 | ['V'] = H1_FLG_TOK, |
| 114 | ['W'] = H1_FLG_TOK, |
| 115 | ['X'] = H1_FLG_TOK, |
| 116 | ['Y'] = H1_FLG_TOK, |
| 117 | ['Z'] = H1_FLG_TOK, |
| 118 | ['['] = H1_FLG_SEP, |
| 119 | [ 92] = H1_FLG_SEP, |
| 120 | [']'] = H1_FLG_SEP, |
| 121 | ['^'] = H1_FLG_TOK, |
| 122 | ['_'] = H1_FLG_TOK, |
| 123 | ['`'] = H1_FLG_TOK, |
| 124 | ['a'] = H1_FLG_TOK, |
| 125 | ['b'] = H1_FLG_TOK, |
| 126 | ['c'] = H1_FLG_TOK, |
| 127 | ['d'] = H1_FLG_TOK, |
| 128 | ['e'] = H1_FLG_TOK, |
| 129 | ['f'] = H1_FLG_TOK, |
| 130 | ['g'] = H1_FLG_TOK, |
| 131 | ['h'] = H1_FLG_TOK, |
| 132 | ['i'] = H1_FLG_TOK, |
| 133 | ['j'] = H1_FLG_TOK, |
| 134 | ['k'] = H1_FLG_TOK, |
| 135 | ['l'] = H1_FLG_TOK, |
| 136 | ['m'] = H1_FLG_TOK, |
| 137 | ['n'] = H1_FLG_TOK, |
| 138 | ['o'] = H1_FLG_TOK, |
| 139 | ['p'] = H1_FLG_TOK, |
| 140 | ['q'] = H1_FLG_TOK, |
| 141 | ['r'] = H1_FLG_TOK, |
| 142 | ['s'] = H1_FLG_TOK, |
| 143 | ['t'] = H1_FLG_TOK, |
| 144 | ['u'] = H1_FLG_TOK, |
| 145 | ['v'] = H1_FLG_TOK, |
| 146 | ['w'] = H1_FLG_TOK, |
| 147 | ['x'] = H1_FLG_TOK, |
| 148 | ['y'] = H1_FLG_TOK, |
| 149 | ['z'] = H1_FLG_TOK, |
| 150 | ['{'] = H1_FLG_SEP, |
| 151 | ['|'] = H1_FLG_TOK, |
| 152 | ['}'] = H1_FLG_SEP, |
| 153 | ['~'] = H1_FLG_TOK, |
| 154 | [127] = H1_FLG_CTL, |
| 155 | }; |
Willy Tarreau | db4893d | 2017-09-21 08:40:02 +0200 | [diff] [blame^] | 156 | |
| 157 | |
| 158 | /* This function skips trailers in the buffer associated with HTTP message |
| 159 | * <msg>. The first visited position is msg->next. If the end of the trailers is |
| 160 | * found, the function returns >0. So, the caller can automatically schedul it |
| 161 | * to be forwarded, and switch msg->msg_state to HTTP_MSG_DONE. If not enough |
| 162 | * data are available, the function does not change anything except maybe |
| 163 | * msg->sol if it could parse some lines, and returns zero. If a parse error |
| 164 | * is encountered, the function returns < 0 and does not change anything except |
| 165 | * maybe msg->sol. Note that the message must already be in HTTP_MSG_TRAILERS |
| 166 | * state before calling this function, which implies that all non-trailers data |
| 167 | * have already been scheduled for forwarding, and that msg->next exactly |
| 168 | * matches the length of trailers already parsed and not forwarded. It is also |
| 169 | * important to note that this function is designed to be able to parse wrapped |
| 170 | * headers at end of buffer. |
| 171 | */ |
| 172 | int http_forward_trailers(struct http_msg *msg) |
| 173 | { |
| 174 | const struct buffer *buf = msg->chn->buf; |
| 175 | |
| 176 | /* we have msg->next which points to next line. Look for CRLF. But |
| 177 | * first, we reset msg->sol */ |
| 178 | msg->sol = 0; |
| 179 | while (1) { |
| 180 | const char *p1 = NULL, *p2 = NULL; |
| 181 | const char *start = b_ptr(buf, msg->next + msg->sol); |
| 182 | const char *stop = bi_end(buf); |
| 183 | const char *ptr = start; |
| 184 | int bytes = 0; |
| 185 | |
| 186 | /* scan current line and stop at LF or CRLF */ |
| 187 | while (1) { |
| 188 | if (ptr == stop) |
| 189 | return 0; |
| 190 | |
| 191 | if (*ptr == '\n') { |
| 192 | if (!p1) |
| 193 | p1 = ptr; |
| 194 | p2 = ptr; |
| 195 | break; |
| 196 | } |
| 197 | |
| 198 | if (*ptr == '\r') { |
| 199 | if (p1) { |
| 200 | msg->err_pos = buffer_count(buf, buf->p, ptr); |
| 201 | return -1; |
| 202 | } |
| 203 | p1 = ptr; |
| 204 | } |
| 205 | |
| 206 | ptr++; |
| 207 | if (ptr >= buf->data + buf->size) |
| 208 | ptr = buf->data; |
| 209 | } |
| 210 | |
| 211 | /* after LF; point to beginning of next line */ |
| 212 | p2++; |
| 213 | if (p2 >= buf->data + buf->size) |
| 214 | p2 = buf->data; |
| 215 | |
| 216 | bytes = p2 - start; |
| 217 | if (bytes < 0) |
| 218 | bytes += buf->size; |
| 219 | msg->sol += bytes; |
| 220 | |
| 221 | /* LF/CRLF at beginning of line => end of trailers at p2. |
| 222 | * Everything was scheduled for forwarding, there's nothing left |
| 223 | * from this message. */ |
| 224 | if (p1 == start) |
| 225 | return 1; |
| 226 | |
| 227 | /* OK, next line then */ |
| 228 | } |
| 229 | } |