Blame - src/uri_normalizer.c - haproxy

blob: ea9632b268b479e6de3854859319bee32ca2947a [file] [log] [blame]

Tim Duesterhus	dbd25c3	2021-04-15 21:45:55 +0200	[diff] [blame]	1	/*
				2	* HTTP request URI normalization.
				3	*
				4	* Copyright 2021 Tim Duesterhus <tim@bastelstu.be>
				5	*
				6	* This program is free software; you can redistribute it and/or
				7	* modify it under the terms of the GNU General Public License
				8	* as published by the Free Software Foundation; either version
				9	* 2 of the License, or (at your option) any later version.
				10	*
				11	*/
				12
Tim Duesterhus	d371e99	2021-04-15 21:45:58 +0200	[diff] [blame]	13	#include <import/ist.h>
				14
Tim Duesterhus	dbd25c3	2021-04-15 21:45:55 +0200	[diff] [blame]	15	#include <haproxy/api.h>
Tim Duesterhus	d7b89be	2021-04-15 21:46:01 +0200	[diff] [blame]	16	#include <haproxy/buf.h>
				17	#include <haproxy/chunk.h>
Tim Duesterhus	a407193	2021-04-15 21:46:02 +0200	[diff] [blame]	18	#include <haproxy/tools.h>
Tim Duesterhus	dbd25c3	2021-04-15 21:45:55 +0200	[diff] [blame]	19	#include <haproxy/uri_normalizer.h>
				20
Tim Duesterhus	a407193	2021-04-15 21:46:02 +0200	[diff] [blame]	21	/* Uppercases letters used in percent encoding.
				22	*
				23	* If `strict` is set to 0 then percent characters that are not followed by a
				24	* hexadecimal digit are returned as-is without modifying the following letters.
				25	* If `strict` is set to 1 then `URI_NORMALIZER_ERR_INVALID_INPUT` is returned
				26	* for invalid sequences.
				27	*/
				28	enum uri_normalizer_err uri_normalizer_percent_upper(const struct ist input, int strict, struct ist *dst)
				29	{
				30	enum uri_normalizer_err err;
				31
				32	const size_t size = istclear(dst);
				33	struct ist output = *dst;
				34
				35	struct ist scanner = input;
				36
				37	/* The output will have the same length. */
				38	if (size < istlen(input)) {
				39	err = URI_NORMALIZER_ERR_ALLOC;
				40	goto fail;
				41	}
				42
				43	while (istlen(scanner)) {
				44	const char current = istshift(&scanner);
				45
				46	if (current == '%') {
				47	if (istlen(scanner) >= 2) {
				48	if (ishex(istptr(scanner)[0]) && ishex(istptr(scanner)[1])) {
				49	output = __istappend(output, current);
				50	output = __istappend(output, toupper(istshift(&scanner)));
				51	output = __istappend(output, toupper(istshift(&scanner)));
				52	continue;
				53	}
				54	}
				55
				56	if (strict) {
				57	err = URI_NORMALIZER_ERR_INVALID_INPUT;
				58	goto fail;
				59	}
				60	else {
				61	output = __istappend(output, current);
				62	}
				63	}
				64	else {
				65	output = __istappend(output, current);
				66	}
				67	}
				68
				69	*dst = output;
				70
				71	return URI_NORMALIZER_ERR_NONE;
				72
				73	fail:
				74
				75	return err;
				76	}
				77
Tim Duesterhus	560e1a6	2021-04-15 21:46:00 +0200	[diff] [blame]	78	/* Merges `/../` with preceding path segments.
				79	*
				80	* If `full` is set to `0` then `/../` will be printed at the start of the resulting
				81	* path if the number of `/../` exceeds the number of other segments. If `full` is
				82	* set to `1` these will not be printed.
				83	*/
				84	enum uri_normalizer_err uri_normalizer_path_dotdot(const struct ist path, int full, struct ist *dst)
Tim Duesterhus	9982fc2	2021-04-15 21:45:59 +0200	[diff] [blame]	85	{
				86	enum uri_normalizer_err err;
				87
				88	const size_t size = istclear(dst);
				89	char * const tail = istptr(*dst) + size;
				90	char *head = tail;
				91
				92	ssize_t offset = istlen(path) - 1;
				93
				94	int up = 0;
				95
				96	/* The path will either be shortened or have the same length. */
				97	if (size < istlen(path)) {
				98	err = URI_NORMALIZER_ERR_ALLOC;
				99	goto fail;
				100	}
				101
				102	/* Handle `/..` at the end of the path without a trailing slash. */
				103	if (offset >= 2 && istmatch(istadv(path, offset - 2), ist("/.."))) {
				104	up++;
				105	offset -= 2;
				106	}
				107
				108	while (offset >= 0) {
				109	if (offset >= 3 && istmatch(istadv(path, offset - 3), ist("/../"))) {
				110	up++;
				111	offset -= 3;
				112	continue;
				113	}
				114
				115	if (up > 0) {
				116	/* Skip the slash. */
				117	offset--;
				118
				119	/* First check whether we already reached the start of the path,
				120	* before popping the current `/../`.
				121	*/
				122	if (offset >= 0) {
				123	up--;
				124
				125	/* Skip the current path segment. */
				126	while (offset >= 0 && istptr(path)[offset] != '/')
				127	offset--;
				128	}
				129	}
				130	else {
				131	/* Prepend the slash. */
				132	*(--head) = istptr(path)[offset];
				133	offset--;
				134
				135	/* Prepend the current path segment. */
				136	while (offset >= 0 && istptr(path)[offset] != '/') {
				137	*(--head) = istptr(path)[offset];
				138	offset--;
				139	}
				140	}
				141	}
				142
				143	if (up > 0) {
				144	/* Prepend a trailing slash. */
				145	*(--head) = '/';
				146
Tim Duesterhus	560e1a6	2021-04-15 21:46:00 +0200	[diff] [blame]	147	if (!full) {
				148	/* Prepend unconsumed `/..`. */
				149	do {
				150	*(--head) = '.';
				151	*(--head) = '.';
				152	*(--head) = '/';
				153	up--;
				154	} while (up > 0);
				155	}
Tim Duesterhus	9982fc2	2021-04-15 21:45:59 +0200	[diff] [blame]	156	}
				157
				158	*dst = ist2(head, tail - head);
				159
				160	return URI_NORMALIZER_ERR_NONE;
				161
				162	fail:
				163
				164	return err;
				165	}
				166
Tim Duesterhus	d371e99	2021-04-15 21:45:58 +0200	[diff] [blame]	167	/* Merges adjacent slashes in the given path. */
				168	enum uri_normalizer_err uri_normalizer_path_merge_slashes(const struct ist path, struct ist *dst)
				169	{
				170	enum uri_normalizer_err err;
				171
				172	const size_t size = istclear(dst);
				173	struct ist newpath = *dst;
				174
				175	struct ist scanner = path;
				176
				177	/* The path will either be shortened or have the same length. */
				178	if (size < istlen(path)) {
				179	err = URI_NORMALIZER_ERR_ALLOC;
				180	goto fail;
				181	}
				182
				183	while (istlen(scanner) > 0) {
				184	const char current = istshift(&scanner);
				185
				186	if (current == '/') {
				187	while (istlen(scanner) > 0 && *istptr(scanner) == '/')
				188	scanner = istnext(scanner);
				189	}
				190
				191	newpath = __istappend(newpath, current);
				192	}
				193
				194	*dst = newpath;
				195
				196	return URI_NORMALIZER_ERR_NONE;
				197
				198	fail:
				199
				200	return err;
				201	}
				202
Tim Duesterhus	d7b89be	2021-04-15 21:46:01 +0200	[diff] [blame]	203	/* Compares two query parameters by name. Query parameters are ordered
				204	* as with memcmp. Shorter parameter names are ordered lower. Identical
				205	* parameter names are compared by their pointer to maintain a stable
				206	* sort.
				207	*/
				208	static int query_param_cmp(const void a, const void b)
				209	{
				210	const struct ist param_a = (struct ist)a;
				211	const struct ist param_b = (struct ist)b;
				212	const struct ist param_a_name = iststop(param_a, '=');
				213	const struct ist param_b_name = iststop(param_b, '=');
				214
				215	int cmp = istdiff(param_a_name, param_b_name);
				216
				217	if (cmp != 0)
				218	return cmp;
				219
				220	/* The contents are identical: Compare the pointer. */
				221	if (istptr(param_a) < istptr(param_b))
				222	return -1;
				223
				224	if (istptr(param_a) > istptr(param_b))
				225	return 1;
				226
				227	return 0;
				228	}
				229
				230	/* Sorts the parameters within the given query string. */
				231	enum uri_normalizer_err uri_normalizer_query_sort(const struct ist query, const char delim, struct ist *dst)
				232	{
				233	enum uri_normalizer_err err;
				234
				235	const size_t size = istclear(dst);
				236	struct ist newquery = *dst;
				237
				238	struct ist scanner = query;
				239
				240	const struct buffer *trash = get_trash_chunk();
				241	struct ist params = (struct ist )b_orig(trash);
				242	const size_t max_param = b_size(trash) / sizeof(*params);
				243	size_t param_count = 0;
				244
				245	size_t i;
				246
				247	/* The query will have the same length. */
				248	if (size < istlen(query)) {
				249	err = URI_NORMALIZER_ERR_ALLOC;
				250	goto fail;
				251	}
				252
				253	/* Handle the leading '?'. */
				254	newquery = __istappend(newquery, istshift(&scanner));
				255
				256	while (istlen(scanner) > 0) {
				257	const struct ist param = istsplit(&scanner, delim);
				258
				259	if (param_count + 1 > max_param) {
				260	err = URI_NORMALIZER_ERR_ALLOC;
				261	goto fail;
				262	}
				263
				264	params[param_count] = param;
				265	param_count++;
				266	}
				267
				268	qsort(params, param_count, sizeof(*params), query_param_cmp);
				269
				270	for (i = 0; i < param_count; i++) {
				271	if (i > 0)
				272	newquery = __istappend(newquery, '&');
				273
				274	if (istcat(&newquery, params[i], size) < 0) {
				275	/* This is impossible, because we checked the size of the destination buffer. */
				276	my_unreachable();
				277	err = URI_NORMALIZER_ERR_INTERNAL_ERROR;
				278	goto fail;
				279	}
				280	}
				281
				282	*dst = newquery;
				283
				284	return URI_NORMALIZER_ERR_NONE;
				285
				286	fail:
				287
				288	return err;
				289	}
Tim Duesterhus	d371e99	2021-04-15 21:45:58 +0200	[diff] [blame]	290
Tim Duesterhus	dbd25c3	2021-04-15 21:45:55 +0200	[diff] [blame]	291	/*
				292	* Local variables:
				293	* c-indent-level: 8
				294	* c-basic-offset: 8
				295	* End:
				296	*/