blob: 8d959365eccdebf55f994ebd455cf9aff72b619c [file] [log] [blame]
Tim Duesterhusdbd25c32021-04-15 21:45:55 +02001/*
2 * HTTP request URI normalization.
3 *
4 * Copyright 2021 Tim Duesterhus <tim@bastelstu.be>
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 *
11 */
12
Tim Duesterhusd371e992021-04-15 21:45:58 +020013#include <import/ist.h>
14
Tim Duesterhusdbd25c32021-04-15 21:45:55 +020015#include <haproxy/api.h>
Tim Duesterhusd7b89be2021-04-15 21:46:01 +020016#include <haproxy/buf.h>
17#include <haproxy/chunk.h>
Tim Duesterhusa4071932021-04-15 21:46:02 +020018#include <haproxy/tools.h>
Tim Duesterhusdbd25c32021-04-15 21:45:55 +020019#include <haproxy/uri_normalizer.h>
20
Tim Duesterhusa4071932021-04-15 21:46:02 +020021/* Uppercases letters used in percent encoding.
22 *
23 * If `strict` is set to 0 then percent characters that are not followed by a
24 * hexadecimal digit are returned as-is without modifying the following letters.
25 * If `strict` is set to 1 then `URI_NORMALIZER_ERR_INVALID_INPUT` is returned
26 * for invalid sequences.
27 */
28enum uri_normalizer_err uri_normalizer_percent_upper(const struct ist input, int strict, struct ist *dst)
29{
30 enum uri_normalizer_err err;
31
32 const size_t size = istclear(dst);
33 struct ist output = *dst;
34
35 struct ist scanner = input;
36
37 /* The output will have the same length. */
38 if (size < istlen(input)) {
39 err = URI_NORMALIZER_ERR_ALLOC;
40 goto fail;
41 }
42
43 while (istlen(scanner)) {
44 const char current = istshift(&scanner);
45
46 if (current == '%') {
47 if (istlen(scanner) >= 2) {
48 if (ishex(istptr(scanner)[0]) && ishex(istptr(scanner)[1])) {
49 output = __istappend(output, current);
50 output = __istappend(output, toupper(istshift(&scanner)));
51 output = __istappend(output, toupper(istshift(&scanner)));
52 continue;
53 }
54 }
55
56 if (strict) {
57 err = URI_NORMALIZER_ERR_INVALID_INPUT;
58 goto fail;
59 }
60 else {
61 output = __istappend(output, current);
62 }
63 }
64 else {
65 output = __istappend(output, current);
66 }
67 }
68
69 *dst = output;
70
71 return URI_NORMALIZER_ERR_NONE;
72
73 fail:
74
75 return err;
76}
77
Maximilian Maderff3bb8b2021-04-21 00:22:50 +020078/* Removes `/./` from the given path. */
79enum uri_normalizer_err uri_normalizer_path_dot(const struct ist path, struct ist *dst)
80{
81 enum uri_normalizer_err err;
82
83 const size_t size = istclear(dst);
84 struct ist newpath = *dst;
85
86 struct ist scanner = path;
87
88 /* The path will either be shortened or have the same length. */
89 if (size < istlen(path)) {
90 err = URI_NORMALIZER_ERR_ALLOC;
91 goto fail;
92 }
93
94 while (istlen(scanner) > 0) {
95 const struct ist segment = istsplit(&scanner, '/');
96
97 if (!isteq(segment, ist("."))) {
98 if (istcat(&newpath, segment, size) < 0) {
99 /* This is impossible, because we checked the size of the destination buffer. */
100 my_unreachable();
101 err = URI_NORMALIZER_ERR_INTERNAL_ERROR;
102 goto fail;
103 }
104
105 if (istend(segment) != istend(scanner))
106 newpath = __istappend(newpath, '/');
107 }
108 }
109
110 *dst = newpath;
111
112 return URI_NORMALIZER_ERR_NONE;
113
114 fail:
115
116 return err;
117}
118
Tim Duesterhus560e1a62021-04-15 21:46:00 +0200119/* Merges `/../` with preceding path segments.
120 *
121 * If `full` is set to `0` then `/../` will be printed at the start of the resulting
122 * path if the number of `/../` exceeds the number of other segments. If `full` is
123 * set to `1` these will not be printed.
124 */
125enum uri_normalizer_err uri_normalizer_path_dotdot(const struct ist path, int full, struct ist *dst)
Tim Duesterhus9982fc22021-04-15 21:45:59 +0200126{
127 enum uri_normalizer_err err;
128
129 const size_t size = istclear(dst);
130 char * const tail = istptr(*dst) + size;
131 char *head = tail;
132
133 ssize_t offset = istlen(path) - 1;
134
135 int up = 0;
136
137 /* The path will either be shortened or have the same length. */
138 if (size < istlen(path)) {
139 err = URI_NORMALIZER_ERR_ALLOC;
140 goto fail;
141 }
142
143 /* Handle `/..` at the end of the path without a trailing slash. */
144 if (offset >= 2 && istmatch(istadv(path, offset - 2), ist("/.."))) {
145 up++;
146 offset -= 2;
147 }
148
149 while (offset >= 0) {
150 if (offset >= 3 && istmatch(istadv(path, offset - 3), ist("/../"))) {
151 up++;
152 offset -= 3;
153 continue;
154 }
155
156 if (up > 0) {
157 /* Skip the slash. */
158 offset--;
159
160 /* First check whether we already reached the start of the path,
161 * before popping the current `/../`.
162 */
163 if (offset >= 0) {
164 up--;
165
166 /* Skip the current path segment. */
167 while (offset >= 0 && istptr(path)[offset] != '/')
168 offset--;
169 }
170 }
171 else {
172 /* Prepend the slash. */
173 *(--head) = istptr(path)[offset];
174 offset--;
175
176 /* Prepend the current path segment. */
177 while (offset >= 0 && istptr(path)[offset] != '/') {
178 *(--head) = istptr(path)[offset];
179 offset--;
180 }
181 }
182 }
183
184 if (up > 0) {
185 /* Prepend a trailing slash. */
186 *(--head) = '/';
187
Tim Duesterhus560e1a62021-04-15 21:46:00 +0200188 if (!full) {
189 /* Prepend unconsumed `/..`. */
190 do {
191 *(--head) = '.';
192 *(--head) = '.';
193 *(--head) = '/';
194 up--;
195 } while (up > 0);
196 }
Tim Duesterhus9982fc22021-04-15 21:45:59 +0200197 }
198
199 *dst = ist2(head, tail - head);
200
201 return URI_NORMALIZER_ERR_NONE;
202
203 fail:
204
205 return err;
206}
207
Tim Duesterhusd371e992021-04-15 21:45:58 +0200208/* Merges adjacent slashes in the given path. */
209enum uri_normalizer_err uri_normalizer_path_merge_slashes(const struct ist path, struct ist *dst)
210{
211 enum uri_normalizer_err err;
212
213 const size_t size = istclear(dst);
214 struct ist newpath = *dst;
215
216 struct ist scanner = path;
217
218 /* The path will either be shortened or have the same length. */
219 if (size < istlen(path)) {
220 err = URI_NORMALIZER_ERR_ALLOC;
221 goto fail;
222 }
223
224 while (istlen(scanner) > 0) {
225 const char current = istshift(&scanner);
226
227 if (current == '/') {
228 while (istlen(scanner) > 0 && *istptr(scanner) == '/')
229 scanner = istnext(scanner);
230 }
231
232 newpath = __istappend(newpath, current);
233 }
234
235 *dst = newpath;
236
237 return URI_NORMALIZER_ERR_NONE;
238
239 fail:
240
241 return err;
242}
243
Tim Duesterhusd7b89be2021-04-15 21:46:01 +0200244/* Compares two query parameters by name. Query parameters are ordered
245 * as with memcmp. Shorter parameter names are ordered lower. Identical
246 * parameter names are compared by their pointer to maintain a stable
247 * sort.
248 */
249static int query_param_cmp(const void *a, const void *b)
250{
251 const struct ist param_a = *(struct ist*)a;
252 const struct ist param_b = *(struct ist*)b;
253 const struct ist param_a_name = iststop(param_a, '=');
254 const struct ist param_b_name = iststop(param_b, '=');
255
256 int cmp = istdiff(param_a_name, param_b_name);
257
258 if (cmp != 0)
259 return cmp;
260
261 /* The contents are identical: Compare the pointer. */
262 if (istptr(param_a) < istptr(param_b))
263 return -1;
264
265 if (istptr(param_a) > istptr(param_b))
266 return 1;
267
268 return 0;
269}
270
271/* Sorts the parameters within the given query string. */
272enum uri_normalizer_err uri_normalizer_query_sort(const struct ist query, const char delim, struct ist *dst)
273{
274 enum uri_normalizer_err err;
275
276 const size_t size = istclear(dst);
277 struct ist newquery = *dst;
278
279 struct ist scanner = query;
280
281 const struct buffer *trash = get_trash_chunk();
282 struct ist *params = (struct ist *)b_orig(trash);
Maximilian Maderc9c79572021-04-21 00:22:49 +0200283 const size_t max_param = b_size(trash) / sizeof(*params);
Tim Duesterhusd7b89be2021-04-15 21:46:01 +0200284 size_t param_count = 0;
285
286 size_t i;
287
288 /* The query will have the same length. */
289 if (size < istlen(query)) {
290 err = URI_NORMALIZER_ERR_ALLOC;
291 goto fail;
292 }
293
294 /* Handle the leading '?'. */
295 newquery = __istappend(newquery, istshift(&scanner));
296
297 while (istlen(scanner) > 0) {
298 const struct ist param = istsplit(&scanner, delim);
299
300 if (param_count + 1 > max_param) {
301 err = URI_NORMALIZER_ERR_ALLOC;
302 goto fail;
303 }
304
305 params[param_count] = param;
306 param_count++;
307 }
308
309 qsort(params, param_count, sizeof(*params), query_param_cmp);
310
311 for (i = 0; i < param_count; i++) {
312 if (i > 0)
Maximilian Mader11f6f852021-04-21 00:22:48 +0200313 newquery = __istappend(newquery, delim);
Tim Duesterhusd7b89be2021-04-15 21:46:01 +0200314
315 if (istcat(&newquery, params[i], size) < 0) {
316 /* This is impossible, because we checked the size of the destination buffer. */
317 my_unreachable();
318 err = URI_NORMALIZER_ERR_INTERNAL_ERROR;
319 goto fail;
320 }
321 }
322
323 *dst = newquery;
324
325 return URI_NORMALIZER_ERR_NONE;
326
327 fail:
328
329 return err;
330}
Tim Duesterhusd371e992021-04-15 21:45:58 +0200331
Tim Duesterhusdbd25c32021-04-15 21:45:55 +0200332/*
333 * Local variables:
334 * c-indent-level: 8
335 * c-basic-offset: 8
336 * End:
337 */