Willy Tarreau | 841bc7d | 2018-12-10 15:26:35 +0100 | [diff] [blame] | 1 | /* |
| 2 | * HPACK encoding table generator. It produces a stream of |
| 3 | * <len><idx><name> and a table pointing to the first <len> of each series. |
| 4 | * The end of the stream is marked by <len>=0. In parallel, a length-indexed |
| 5 | * table is built to access the first entry of each length. |
| 6 | * |
| 7 | * Build like this : |
Willy Tarreau | fc80e30 | 2020-05-27 11:05:33 +0200 | [diff] [blame] | 8 | * gcc -I../../include -o gen-enc gen-enc.c |
Willy Tarreau | 841bc7d | 2018-12-10 15:26:35 +0100 | [diff] [blame] | 9 | */ |
Willy Tarreau | 318adf4 | 2020-05-22 12:09:16 +0200 | [diff] [blame] | 10 | #define HPACK_STANDALONE |
| 11 | |
Willy Tarreau | 841bc7d | 2018-12-10 15:26:35 +0100 | [diff] [blame] | 12 | #include <ctype.h> |
Willy Tarreau | a1bd1fa | 2019-03-29 17:26:33 +0100 | [diff] [blame] | 13 | #include <inttypes.h> |
Willy Tarreau | 841bc7d | 2018-12-10 15:26:35 +0100 | [diff] [blame] | 14 | #include <stdio.h> |
| 15 | #include <stdlib.h> |
| 16 | #include <unistd.h> |
Willy Tarreau | eb6f701 | 2020-05-27 16:21:26 +0200 | [diff] [blame] | 17 | #include <import/ist.h> |
Willy Tarreau | be327fa | 2020-06-03 09:09:57 +0200 | [diff] [blame] | 18 | #include <haproxy/hpack-tbl-t.h> |
Willy Tarreau | 841bc7d | 2018-12-10 15:26:35 +0100 | [diff] [blame] | 19 | #include "../../src/hpack-tbl.c" |
| 20 | |
| 21 | struct idxhdr { |
| 22 | const char *ptr; |
| 23 | int len; |
| 24 | int idx; |
| 25 | }; |
| 26 | |
| 27 | struct idxhdr idxhdr[HPACK_SHT_SIZE]; |
| 28 | static int positions[32]; |
| 29 | static char known_hdr[1024]; |
| 30 | |
| 31 | /* preferred ordering of headers of similar size. Those not mentioned will be |
| 32 | * less prioritized. |
| 33 | */ |
| 34 | const struct { |
| 35 | const char *name; |
| 36 | const int rank; |
| 37 | } ranks[] = { |
| 38 | { .name = "age", .rank = 1 }, |
| 39 | { .name = "via", .rank = 2 }, |
| 40 | |
| 41 | { .name = "date", .rank = 1 }, |
| 42 | { .name = "host", .rank = 2 }, |
| 43 | |
| 44 | { .name = "accept", .rank = 1 }, |
| 45 | { .name = "server", .rank = 2 }, |
| 46 | { .name = "cookie", .rank = 3 }, |
| 47 | |
| 48 | { .name = "referer", .rank = 1 }, |
| 49 | { .name = "expires", .rank = 2 }, |
| 50 | |
| 51 | { .name = "location", .rank = 1 }, |
| 52 | |
| 53 | { .name = "user-agent", .rank = 1 }, |
| 54 | { .name = "set-cookie", .rank = 2 }, |
| 55 | |
| 56 | { .name = "content-type", .rank = 1 }, |
| 57 | |
| 58 | { .name = "cache-control", .rank = 1 }, |
| 59 | { .name = "last-modified", .rank = 2 }, |
| 60 | { .name = "accept-ranges", .rank = 3 }, |
| 61 | { .name = "if-none-match", .rank = 4 }, |
| 62 | |
| 63 | { .name = "content-length", .rank = 1 }, |
| 64 | |
| 65 | { .name = "accept-encoding", .rank = 1 }, |
| 66 | { .name = "accept-language", .rank = 2 }, |
| 67 | |
| 68 | { .name = "content-encoding", .rank = 1 }, |
| 69 | |
| 70 | { .name = "transfer-encoding", .rank = 1 }, |
| 71 | { .name = "if-modified-since", .rank = 2 }, |
| 72 | |
| 73 | { .name = "content-disposition", .rank = 1 }, |
| 74 | }; |
| 75 | |
| 76 | /* returns the rank of header <name> or 255 if not found */ |
| 77 | int get_hdr_rank(const char *name) |
| 78 | { |
| 79 | int i; |
| 80 | |
| 81 | for (i = 0; i < sizeof(ranks) / sizeof(ranks[0]); i++) { |
| 82 | if (strcmp(ranks[i].name, name) == 0) |
| 83 | return ranks[i].rank; |
| 84 | } |
| 85 | return 255; |
| 86 | } |
| 87 | |
| 88 | /* sorts first on the length, second on the name, and third on the idx, so that |
| 89 | * headers which appear with multiple occurrences are always met first. |
| 90 | */ |
| 91 | int cmp_idx(const void *l, const void *r) |
| 92 | { |
| 93 | const struct idxhdr *a = l, *b = r; |
| 94 | int ranka, rankb; |
| 95 | int ret; |
| 96 | |
| 97 | if (a->len < b->len) |
| 98 | return -1; |
| 99 | else if (a->len > b->len) |
| 100 | return 1; |
| 101 | |
| 102 | ranka = get_hdr_rank(a->ptr); |
| 103 | rankb = get_hdr_rank(b->ptr); |
| 104 | |
| 105 | if (ranka < rankb) |
| 106 | return -1; |
| 107 | else if (ranka > rankb) |
| 108 | return 1; |
| 109 | |
| 110 | /* same rank, check for duplicates and use index */ |
| 111 | ret = strcmp(a->ptr, b->ptr); |
| 112 | if (ret != 0) |
| 113 | return ret; |
| 114 | |
| 115 | if (a->idx < b->idx) |
| 116 | return -1; |
| 117 | else if (a->idx > b->idx) |
| 118 | return 1; |
| 119 | else |
| 120 | return 0; |
| 121 | } |
| 122 | |
| 123 | int main(int argc, char **argv) |
| 124 | { |
| 125 | int pos; |
| 126 | int prev; |
| 127 | int len; |
| 128 | int i; |
| 129 | |
| 130 | for (len = 0; len < 32; len++) |
| 131 | positions[len] = -1; |
| 132 | |
| 133 | for (i = 0; i < HPACK_SHT_SIZE; i++) { |
| 134 | idxhdr[i].ptr = hpack_sht[i].n.ptr; |
| 135 | idxhdr[i].len = hpack_sht[i].n.len; |
| 136 | idxhdr[i].idx = i; |
| 137 | } |
| 138 | |
| 139 | /* sorts all header names by length first, then by name, and finally by |
| 140 | * idx so that we meet smaller headers first, that within a length they |
| 141 | * appear in frequency order, and that multiple occurrences appear with |
| 142 | * the smallest index first. |
| 143 | */ |
| 144 | qsort(&idxhdr[1], HPACK_SHT_SIZE - 1, sizeof(idxhdr[0]), cmp_idx); |
| 145 | |
| 146 | pos = 0; |
| 147 | prev = -1; |
| 148 | for (i = 1; i < HPACK_SHT_SIZE; i++) { |
| 149 | len = idxhdr[i].len; |
| 150 | if (len > 31) { |
| 151 | //printf("skipping %s (len=%d)\n", idxhdr[i].ptr, idxhdr[i].len); |
| 152 | continue; |
| 153 | } |
| 154 | |
| 155 | /* first occurrence of this length? */ |
| 156 | if (positions[len] == -1) |
| 157 | positions[len] = pos; |
| 158 | else if (prev >= 0 && |
| 159 | memcmp(&known_hdr[prev] + 2, idxhdr[i].ptr, len) == 0) { |
| 160 | /* duplicate header field */ |
| 161 | continue; |
| 162 | } |
| 163 | |
| 164 | /* store <len> <idx> <name> in the output array */ |
| 165 | |
| 166 | if (pos + 1 + len + 2 >= sizeof(known_hdr)) |
| 167 | abort(); |
| 168 | |
| 169 | prev = pos; |
| 170 | known_hdr[pos++] = len; |
| 171 | known_hdr[pos++] = idxhdr[i].idx; |
| 172 | memcpy(&known_hdr[pos], idxhdr[i].ptr, len); |
| 173 | pos += len; |
| 174 | //printf("%d %d %s\n", len, idxhdr[i].idx, idxhdr[i].ptr); |
| 175 | } |
| 176 | |
| 177 | if (pos + 1 >= sizeof(known_hdr)) |
| 178 | abort(); |
| 179 | known_hdr[pos++] = 0; // size zero ends the stream |
| 180 | |
| 181 | printf("const char hpack_enc_stream[%d] = {\n", pos); |
| 182 | for (i = 0; i < pos; i++) { |
| 183 | if ((i & 7) == 0) |
| 184 | printf("\t /* % 4d: */", i); |
| 185 | |
| 186 | printf(" 0x%02x,", known_hdr[i]); |
| 187 | |
| 188 | if ((i & 7) == 7 || (i == pos - 1)) |
| 189 | putchar('\n'); |
| 190 | } |
| 191 | printf("};\n\n"); |
| 192 | |
| 193 | printf("const signed short hpack_pos_len[32] = {\n"); |
| 194 | for (i = 0; i < 32; i++) { |
| 195 | if ((i & 7) == 0) |
| 196 | printf("\t /* % 4d: */", i); |
| 197 | |
| 198 | printf(" % 4d,", positions[i]); |
| 199 | |
| 200 | if ((i & 7) == 7 || (i == pos - 1)) |
| 201 | putchar('\n'); |
| 202 | } |
| 203 | printf("};\n\n"); |
| 204 | return 0; |
| 205 | } |