blob: 32f33769e893139b92f92a304260beb91c9ee691 [file] [log] [blame]
Tim Duesterhusdbd25c32021-04-15 21:45:55 +02001/*
2 * HTTP request URI normalization.
3 *
4 * Copyright 2021 Tim Duesterhus <tim@bastelstu.be>
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 *
11 */
12
Tim Duesterhusd371e992021-04-15 21:45:58 +020013#include <import/ist.h>
14
Tim Duesterhusdbd25c32021-04-15 21:45:55 +020015#include <haproxy/api.h>
Tim Duesterhusd7b89be2021-04-15 21:46:01 +020016#include <haproxy/buf.h>
17#include <haproxy/chunk.h>
Tim Duesterhusa4071932021-04-15 21:46:02 +020018#include <haproxy/tools.h>
Tim Duesterhusdbd25c32021-04-15 21:45:55 +020019#include <haproxy/uri_normalizer.h>
20
Tim Duesterhusa4071932021-04-15 21:46:02 +020021/* Uppercases letters used in percent encoding.
22 *
23 * If `strict` is set to 0 then percent characters that are not followed by a
24 * hexadecimal digit are returned as-is without modifying the following letters.
25 * If `strict` is set to 1 then `URI_NORMALIZER_ERR_INVALID_INPUT` is returned
26 * for invalid sequences.
27 */
28enum uri_normalizer_err uri_normalizer_percent_upper(const struct ist input, int strict, struct ist *dst)
29{
30 enum uri_normalizer_err err;
31
32 const size_t size = istclear(dst);
33 struct ist output = *dst;
34
35 struct ist scanner = input;
36
37 /* The output will have the same length. */
38 if (size < istlen(input)) {
39 err = URI_NORMALIZER_ERR_ALLOC;
40 goto fail;
41 }
42
43 while (istlen(scanner)) {
44 const char current = istshift(&scanner);
45
46 if (current == '%') {
47 if (istlen(scanner) >= 2) {
48 if (ishex(istptr(scanner)[0]) && ishex(istptr(scanner)[1])) {
49 output = __istappend(output, current);
50 output = __istappend(output, toupper(istshift(&scanner)));
51 output = __istappend(output, toupper(istshift(&scanner)));
52 continue;
53 }
54 }
55
56 if (strict) {
57 err = URI_NORMALIZER_ERR_INVALID_INPUT;
58 goto fail;
59 }
60 else {
61 output = __istappend(output, current);
62 }
63 }
64 else {
65 output = __istappend(output, current);
66 }
67 }
68
69 *dst = output;
70
71 return URI_NORMALIZER_ERR_NONE;
72
73 fail:
74
75 return err;
76}
77
Tim Duesterhus560e1a62021-04-15 21:46:00 +020078/* Merges `/../` with preceding path segments.
79 *
80 * If `full` is set to `0` then `/../` will be printed at the start of the resulting
81 * path if the number of `/../` exceeds the number of other segments. If `full` is
82 * set to `1` these will not be printed.
83 */
84enum uri_normalizer_err uri_normalizer_path_dotdot(const struct ist path, int full, struct ist *dst)
Tim Duesterhus9982fc22021-04-15 21:45:59 +020085{
86 enum uri_normalizer_err err;
87
88 const size_t size = istclear(dst);
89 char * const tail = istptr(*dst) + size;
90 char *head = tail;
91
92 ssize_t offset = istlen(path) - 1;
93
94 int up = 0;
95
96 /* The path will either be shortened or have the same length. */
97 if (size < istlen(path)) {
98 err = URI_NORMALIZER_ERR_ALLOC;
99 goto fail;
100 }
101
102 /* Handle `/..` at the end of the path without a trailing slash. */
103 if (offset >= 2 && istmatch(istadv(path, offset - 2), ist("/.."))) {
104 up++;
105 offset -= 2;
106 }
107
108 while (offset >= 0) {
109 if (offset >= 3 && istmatch(istadv(path, offset - 3), ist("/../"))) {
110 up++;
111 offset -= 3;
112 continue;
113 }
114
115 if (up > 0) {
116 /* Skip the slash. */
117 offset--;
118
119 /* First check whether we already reached the start of the path,
120 * before popping the current `/../`.
121 */
122 if (offset >= 0) {
123 up--;
124
125 /* Skip the current path segment. */
126 while (offset >= 0 && istptr(path)[offset] != '/')
127 offset--;
128 }
129 }
130 else {
131 /* Prepend the slash. */
132 *(--head) = istptr(path)[offset];
133 offset--;
134
135 /* Prepend the current path segment. */
136 while (offset >= 0 && istptr(path)[offset] != '/') {
137 *(--head) = istptr(path)[offset];
138 offset--;
139 }
140 }
141 }
142
143 if (up > 0) {
144 /* Prepend a trailing slash. */
145 *(--head) = '/';
146
Tim Duesterhus560e1a62021-04-15 21:46:00 +0200147 if (!full) {
148 /* Prepend unconsumed `/..`. */
149 do {
150 *(--head) = '.';
151 *(--head) = '.';
152 *(--head) = '/';
153 up--;
154 } while (up > 0);
155 }
Tim Duesterhus9982fc22021-04-15 21:45:59 +0200156 }
157
158 *dst = ist2(head, tail - head);
159
160 return URI_NORMALIZER_ERR_NONE;
161
162 fail:
163
164 return err;
165}
166
Tim Duesterhusd371e992021-04-15 21:45:58 +0200167/* Merges adjacent slashes in the given path. */
168enum uri_normalizer_err uri_normalizer_path_merge_slashes(const struct ist path, struct ist *dst)
169{
170 enum uri_normalizer_err err;
171
172 const size_t size = istclear(dst);
173 struct ist newpath = *dst;
174
175 struct ist scanner = path;
176
177 /* The path will either be shortened or have the same length. */
178 if (size < istlen(path)) {
179 err = URI_NORMALIZER_ERR_ALLOC;
180 goto fail;
181 }
182
183 while (istlen(scanner) > 0) {
184 const char current = istshift(&scanner);
185
186 if (current == '/') {
187 while (istlen(scanner) > 0 && *istptr(scanner) == '/')
188 scanner = istnext(scanner);
189 }
190
191 newpath = __istappend(newpath, current);
192 }
193
194 *dst = newpath;
195
196 return URI_NORMALIZER_ERR_NONE;
197
198 fail:
199
200 return err;
201}
202
Tim Duesterhusd7b89be2021-04-15 21:46:01 +0200203/* Compares two query parameters by name. Query parameters are ordered
204 * as with memcmp. Shorter parameter names are ordered lower. Identical
205 * parameter names are compared by their pointer to maintain a stable
206 * sort.
207 */
208static int query_param_cmp(const void *a, const void *b)
209{
210 const struct ist param_a = *(struct ist*)a;
211 const struct ist param_b = *(struct ist*)b;
212 const struct ist param_a_name = iststop(param_a, '=');
213 const struct ist param_b_name = iststop(param_b, '=');
214
215 int cmp = istdiff(param_a_name, param_b_name);
216
217 if (cmp != 0)
218 return cmp;
219
220 /* The contents are identical: Compare the pointer. */
221 if (istptr(param_a) < istptr(param_b))
222 return -1;
223
224 if (istptr(param_a) > istptr(param_b))
225 return 1;
226
227 return 0;
228}
229
230/* Sorts the parameters within the given query string. */
231enum uri_normalizer_err uri_normalizer_query_sort(const struct ist query, const char delim, struct ist *dst)
232{
233 enum uri_normalizer_err err;
234
235 const size_t size = istclear(dst);
236 struct ist newquery = *dst;
237
238 struct ist scanner = query;
239
240 const struct buffer *trash = get_trash_chunk();
241 struct ist *params = (struct ist *)b_orig(trash);
242 const size_t max_param = b_size(trash) / sizeof(*params);
243 size_t param_count = 0;
244
245 size_t i;
246
247 /* The query will have the same length. */
248 if (size < istlen(query)) {
249 err = URI_NORMALIZER_ERR_ALLOC;
250 goto fail;
251 }
252
253 /* Handle the leading '?'. */
254 newquery = __istappend(newquery, istshift(&scanner));
255
256 while (istlen(scanner) > 0) {
257 const struct ist param = istsplit(&scanner, delim);
258
259 if (param_count + 1 > max_param) {
260 err = URI_NORMALIZER_ERR_ALLOC;
261 goto fail;
262 }
263
264 params[param_count] = param;
265 param_count++;
266 }
267
268 qsort(params, param_count, sizeof(*params), query_param_cmp);
269
270 for (i = 0; i < param_count; i++) {
271 if (i > 0)
Maximilian Mader11f6f852021-04-21 00:22:48 +0200272 newquery = __istappend(newquery, delim);
Tim Duesterhusd7b89be2021-04-15 21:46:01 +0200273
274 if (istcat(&newquery, params[i], size) < 0) {
275 /* This is impossible, because we checked the size of the destination buffer. */
276 my_unreachable();
277 err = URI_NORMALIZER_ERR_INTERNAL_ERROR;
278 goto fail;
279 }
280 }
281
282 *dst = newquery;
283
284 return URI_NORMALIZER_ERR_NONE;
285
286 fail:
287
288 return err;
289}
Tim Duesterhusd371e992021-04-15 21:45:58 +0200290
Tim Duesterhusdbd25c32021-04-15 21:45:55 +0200291/*
292 * Local variables:
293 * c-indent-level: 8
294 * c-basic-offset: 8
295 * End:
296 */