MINOR: http: use http uri parser for path

Replace http_get_path by the http_uri_parser API. The new functions is
renamed http_parse_path. Replace duplicated code for scheme and
authority parsing by invocations to http_parse_scheme/authority.

If no scheme is found for an URI detected as an absolute-uri/authority,
consider it to be an authority format : no path will be found. For an
absolute-uri or absolute-path, use the remaining of the string as the
path. A new http_uri_parser state is declared to mark the path parsing
as done.
diff --git a/src/backend.c b/src/backend.c
index f5fec1d..ceb24a7 100644
--- a/src/backend.c
+++ b/src/backend.c
@@ -719,7 +719,10 @@
 
 					uri = htx_sl_req_uri(http_get_stline(htxbuf(&s->req.buf)));
 					if (s->be->lbprm.arg_opt1 & 2) {
-						uri = http_get_path(uri);
+						struct http_uri_parser parser =
+						  http_uri_parser_init(uri);
+
+						uri = http_parse_path(&parser);
 						if (!isttest(uri))
 							uri = ist("");
 					}
diff --git a/src/hlua.c b/src/hlua.c
index 0c0895e..2ad3d19 100644
--- a/src/hlua.c
+++ b/src/hlua.c
@@ -4125,6 +4125,7 @@
 	struct ist path;
 	unsigned long long len = 0;
 	int32_t pos;
+	struct http_uri_parser parser;
 
 	/* Check stack size. */
 	if (!lua_checkstack(L, 3))
@@ -4193,7 +4194,8 @@
 		return 0;
 	lua_settable(L, -3);
 
-	path = http_get_path(htx_sl_req_uri(sl));
+	parser = http_uri_parser_init(htx_sl_req_uri(sl));
+	path = http_parse_path(&parser);
 	if (isttest(path)) {
 		char *p, *q, *end;
 
diff --git a/src/http.c b/src/http.c
index 8b3d20b..75c899d 100644
--- a/src/http.c
+++ b/src/http.c
@@ -563,50 +563,52 @@
 /* Parse the URI from the given transaction (which is assumed to be in request
  * phase) and look for the "/" beginning the PATH. If not found, ist2(0,0) is
  * returned. Otherwise the pointer and length are returned.
+ *
+ * <parser> must have been initialized via http_uri_parser_init. See the
+ * related http_uri_parser documentation for the specific API usage.
  */
-struct ist http_get_path(const struct ist uri)
+struct ist http_parse_path(struct http_uri_parser *parser)
 {
 	const char *ptr, *end;
 
-	if (!uri.len)
+	if (parser->state >= URI_PARSER_STATE_PATH_DONE)
 		goto not_found;
 
-	ptr = uri.ptr;
-	end = ptr + uri.len;
+	if (parser->format == URI_PARSER_FORMAT_EMPTY ||
+	    parser->format == URI_PARSER_FORMAT_ASTERISK) {
+		goto not_found;
+	}
+
+	ptr = istptr(parser->uri);
+	end = istend(parser->uri);
 
-	/* RFC7230, par. 2.7 :
-	 * Request-URI = "*" | absuri | abspath | authority
+	/* If the uri is in absolute-path format, first skip the scheme and
+	 * authority parts. No scheme will be found if the uri is in authority
+	 * format, which indicates that the path won't be present.
 	 */
+	if (parser->format == URI_PARSER_FORMAT_ABSURI_OR_AUTHORITY) {
+		if (parser->state < URI_PARSER_STATE_SCHEME_DONE) {
+			/* If no scheme found, uri is in authority format. No
+			 * path is present.
+			 */
+			if (!isttest(http_parse_scheme(parser)))
+				goto not_found;
+		}
 
-	if (*ptr == '*')
-		goto not_found;
+		if (parser->state < URI_PARSER_STATE_AUTHORITY_DONE)
+			http_parse_authority(parser, 1);
 
-	if (isalpha((unsigned char)*ptr)) {
-		/* this is a scheme as described by RFC3986, par. 3.1 */
-		ptr++;
-		while (ptr < end &&
-		       (isalnum((unsigned char)*ptr) || *ptr == '+' || *ptr == '-' || *ptr == '.'))
-			ptr++;
-		/* skip '://' */
-		if (ptr == end || *ptr++ != ':')
-			goto not_found;
-		if (ptr == end || *ptr++ != '/')
-			goto not_found;
-		if (ptr == end || *ptr++ != '/')
+		ptr = istptr(parser->uri);
+
+		if (ptr == end)
 			goto not_found;
 	}
-	/* skip [user[:passwd]@]host[:[port]] */
-
-	while (ptr < end && *ptr != '/')
-		ptr++;
 
-	if (ptr == end)
-		goto not_found;
-
-	/* OK, we got the '/' ! */
+	parser->state = URI_PARSER_STATE_PATH_DONE;
 	return ist2(ptr, end - ptr);
 
  not_found:
+	parser->state = URI_PARSER_STATE_PATH_DONE;
 	return IST_NULL;
 }
 
diff --git a/src/http_act.c b/src/http_act.c
index f16b639..c2fee04 100644
--- a/src/http_act.c
+++ b/src/http_act.c
@@ -216,7 +216,8 @@
 
 	switch ((enum act_normalize_uri) rule->action) {
 		case ACT_NORMALIZE_URI_PATH_MERGE_SLASHES: {
-			const struct ist path = http_get_path(uri);
+			struct http_uri_parser parser = http_uri_parser_init(uri);
+			const struct ist path = http_parse_path(&parser);
 			struct ist newpath = ist2(replace->area, replace->size);
 
 			if (!isttest(path))
@@ -233,7 +234,8 @@
 			break;
 		}
 		case ACT_NORMALIZE_URI_PATH_STRIP_DOT: {
-			const struct ist path = http_get_path(uri);
+			struct http_uri_parser parser = http_uri_parser_init(uri);
+			const struct ist path = http_parse_path(&parser);
 			struct ist newpath = ist2(replace->area, replace->size);
 
 			if (!isttest(path))
@@ -251,7 +253,8 @@
 		}
 		case ACT_NORMALIZE_URI_PATH_STRIP_DOTDOT:
 		case ACT_NORMALIZE_URI_PATH_STRIP_DOTDOT_FULL: {
-			const struct ist path = http_get_path(uri);
+			struct http_uri_parser parser = http_uri_parser_init(uri);
+			const struct ist path = http_parse_path(&parser);
 			struct ist newpath = ist2(replace->area, replace->size);
 
 			if (!isttest(path))
@@ -268,7 +271,8 @@
 			break;
 		}
 		case ACT_NORMALIZE_URI_QUERY_SORT_BY_NAME: {
-			const struct ist path = http_get_path(uri);
+			struct http_uri_parser parser = http_uri_parser_init(uri);
+			const struct ist path = http_parse_path(&parser);
 			struct ist newquery = ist2(replace->area, replace->size);
 
 			if (!isttest(path))
@@ -286,7 +290,8 @@
 		}
 		case ACT_NORMALIZE_URI_PERCENT_TO_UPPERCASE:
 		case ACT_NORMALIZE_URI_PERCENT_TO_UPPERCASE_STRICT: {
-			const struct ist path = http_get_path(uri);
+			struct http_uri_parser parser = http_uri_parser_init(uri);
+			const struct ist path = http_parse_path(&parser);
 			struct ist newpath = ist2(replace->area, replace->size);
 
 			if (!isttest(path))
@@ -304,7 +309,8 @@
 		}
 		case ACT_NORMALIZE_URI_PERCENT_DECODE_UNRESERVED:
 		case ACT_NORMALIZE_URI_PERCENT_DECODE_UNRESERVED_STRICT: {
-			const struct ist path = http_get_path(uri);
+			struct http_uri_parser parser = http_uri_parser_init(uri);
+			const struct ist path = http_parse_path(&parser);
 			struct ist newpath = ist2(replace->area, replace->size);
 
 			if (!isttest(path))
@@ -321,7 +327,8 @@
 			break;
 		}
 		case ACT_NORMALIZE_URI_FRAGMENT_STRIP: {
-			const struct ist path = http_get_path(uri);
+			struct http_uri_parser parser = http_uri_parser_init(uri);
+			const struct ist path = http_parse_path(&parser);
 			struct ist newpath = ist2(replace->area, replace->size);
 
 			if (!isttest(path))
@@ -338,7 +345,8 @@
 			break;
 		}
 		case ACT_NORMALIZE_URI_FRAGMENT_ENCODE: {
-			const struct ist path = http_get_path(uri);
+			struct http_uri_parser parser = http_uri_parser_init(uri);
+			const struct ist path = http_parse_path(&parser);
 			struct ist newpath = ist2(replace->area, replace->size);
 
 			if (!isttest(path))
@@ -517,10 +525,14 @@
 		goto fail_alloc;
 	uri = htx_sl_req_uri(http_get_stline(htxbuf(&s->req.buf)));
 
-	if (rule->action == 1) // replace-path
-		uri = iststop(http_get_path(uri), '?');
-	else if (rule->action == 4) // replace-pathq
-		uri = http_get_path(uri);
+	if (rule->action == 1) { // replace-path
+		struct http_uri_parser parser = http_uri_parser_init(uri);
+		uri = iststop(http_parse_path(&parser), '?');
+	}
+	else if (rule->action == 4) { // replace-pathq
+		struct http_uri_parser parser = http_uri_parser_init(uri);
+		uri = http_parse_path(&parser);
+	}
 
 	if (!regex_exec_match2(rule->arg.http.re, uri.ptr, uri.len, MAX_MATCH, pmatch, 0))
 		goto leave;
diff --git a/src/http_ana.c b/src/http_ana.c
index da436e7..5eca741 100644
--- a/src/http_ana.c
+++ b/src/http_ana.c
@@ -206,9 +206,10 @@
 	if (unlikely(sess->fe->monitor_uri_len != 0)) {
 		const struct ist monitor_uri = ist2(sess->fe->monitor_uri,
 		                                    sess->fe->monitor_uri_len);
+		struct http_uri_parser parser = http_uri_parser_init(htx_sl_req_uri(sl));
 
 		if ((istptr(monitor_uri)[0] == '/' &&
-		     isteq(http_get_path(htx_sl_req_uri(sl)), monitor_uri)) ||
+		     isteq(http_parse_path(&parser), monitor_uri)) ||
 		    isteq(htx_sl_req_uri(sl), monitor_uri)) {
 			/*
 			 * We have found the monitor URI
@@ -622,6 +623,7 @@
 	if ((s->be->options & PR_O_HTTP_PROXY) && !(s->flags & SF_ADDR_SET)) {
 		struct htx_sl *sl;
 		struct ist uri, path;
+		struct http_uri_parser parser = http_uri_parser_init(uri);
 
 		if (!sockaddr_alloc(&s->target_addr, NULL, 0)) {
 			if (!(s->flags & SF_ERR_MASK))
@@ -630,7 +632,7 @@
 		}
 		sl = http_get_stline(htx);
 		uri = htx_sl_req_uri(sl);
-		path = http_get_path(uri);
+		path = http_parse_path(&parser);
 
 		if (url2sa(uri.ptr, uri.len - path.len, s->target_addr, NULL) == -1)
 			goto return_bad_req;
@@ -2409,6 +2411,7 @@
 		case REDIRECT_TYPE_SCHEME: {
 			struct http_hdr_ctx ctx;
 			struct ist path, host;
+			struct http_uri_parser parser;
 
 			host = ist("");
 			ctx.blk = NULL;
@@ -2416,7 +2419,8 @@
 				host = ctx.value;
 
 			sl = http_get_stline(htx);
-			path = http_get_path(htx_sl_req_uri(sl));
+			parser = http_uri_parser_init(htx_sl_req_uri(sl));
+			path = http_parse_path(&parser);
 			/* build message using path */
 			if (isttest(path)) {
 				if (rule->flags & REDIRECT_FLAG_DROP_QS) {
@@ -2462,9 +2466,11 @@
 
 		case REDIRECT_TYPE_PREFIX: {
 			struct ist path;
+			struct http_uri_parser parser;
 
 			sl = http_get_stline(htx);
-			path = http_get_path(htx_sl_req_uri(sl));
+			parser = http_uri_parser_init(htx_sl_req_uri(sl));
+			path = http_parse_path(&parser);
 			/* build message using path */
 			if (isttest(path)) {
 				if (rule->flags & REDIRECT_FLAG_DROP_QS) {
@@ -3858,8 +3864,10 @@
 	htx = htxbuf(&s->req.buf);
 	sl = http_get_stline(htx);
 	uri = htx_sl_req_uri(sl);
-	if (*uri_auth->uri_prefix == '/')
-		uri = http_get_path(uri);
+	if (*uri_auth->uri_prefix == '/') {
+		struct http_uri_parser parser = http_uri_parser_init(uri);
+		uri = http_parse_path(&parser);
+	}
 
 	/* check URI size */
 	if (uri_auth->uri_len > uri.len)
@@ -4173,6 +4181,7 @@
 	struct htx_sl *sl;
 	struct ist path, location;
 	unsigned int flags;
+	struct http_uri_parser parser;
 
 	/*
 	 * Create the location
@@ -4190,7 +4199,8 @@
 	/* 2: add the request Path */
 	htx = htxbuf(&req->buf);
 	sl = http_get_stline(htx);
-	path = http_get_path(htx_sl_req_uri(sl));
+	parser = http_uri_parser_init(htx_sl_req_uri(sl));
+	path = http_parse_path(&parser);
 	if (!isttest(path))
 		return;
 
diff --git a/src/http_fetch.c b/src/http_fetch.c
index a28cbaf..d3192aa 100644
--- a/src/http_fetch.c
+++ b/src/http_fetch.c
@@ -1039,17 +1039,18 @@
 	struct htx *htx = smp_prefetch_htx(smp, chn, NULL, 1);
 	struct htx_sl *sl;
 	struct ist path;
+	struct http_uri_parser parser;
 
 	if (!htx)
 		return 0;
 
 	sl = http_get_stline(htx);
-	path = http_get_path(htx_sl_req_uri(sl));
+	parser = http_uri_parser_init(htx_sl_req_uri(sl));
 
 	if (kw[4] == 'q' && (kw[0] == 'p' || kw[0] == 'b')) // pathq or baseq
-		path = http_get_path(htx_sl_req_uri(sl));
+		path = http_parse_path(&parser);
 	else
-		path = iststop(http_get_path(htx_sl_req_uri(sl)), '?');
+		path = iststop(http_parse_path(&parser), '?');
 
 	if (!isttest(path))
 		return 0;
@@ -1077,6 +1078,7 @@
 	struct buffer *temp;
 	struct http_hdr_ctx ctx;
 	struct ist path;
+	struct http_uri_parser parser;
 
 	if (!htx)
 		return 0;
@@ -1091,7 +1093,8 @@
 
 	/* now retrieve the path */
 	sl = http_get_stline(htx);
-	path = http_get_path(htx_sl_req_uri(sl));
+	parser = http_uri_parser_init(htx_sl_req_uri(sl));
+	path = http_parse_path(&parser);
 	if (isttest(path)) {
 		size_t len;
 
@@ -1128,6 +1131,7 @@
 	struct http_hdr_ctx ctx;
 	struct ist path;
 	unsigned int hash = 0;
+	struct http_uri_parser parser;
 
 	if (!htx)
 		return 0;
@@ -1141,7 +1145,8 @@
 
 	/* now retrieve the path */
 	sl = http_get_stline(htx);
-	path = http_get_path(htx_sl_req_uri(sl));
+	parser = http_uri_parser_init(htx_sl_req_uri(sl));
+	path = http_parse_path(&parser);
 	if (isttest(path)) {
 		size_t len;
 
@@ -1486,6 +1491,7 @@
 	struct http_txn *txn;
 	struct ist path;
 	const char *ptr;
+	struct http_uri_parser parser;
 
 	if (!smp->strm)
 		return 0;
@@ -1508,7 +1514,8 @@
 		ptr++;
 	path.len = ptr - path.ptr;
 
-	path = http_get_path(path);
+	parser = http_uri_parser_init(path);
+	path = http_parse_path(&parser);
 	if (!isttest(path))
 		return 0;
 
@@ -1952,6 +1959,7 @@
 	struct htx_sl *sl;
 	struct ist path;
 	unsigned int hash = 0;
+	struct http_uri_parser parser;
 
 	if (!htx)
 		return 0;
@@ -1965,7 +1973,8 @@
 
 	/* now retrieve the path */
 	sl = http_get_stline(htx);
-	path = http_get_path(htx_sl_req_uri(sl));
+	parser = http_uri_parser_init(htx_sl_req_uri(sl));
+	path = http_parse_path(&parser);
 	if (path.len && *(path.ptr) == '/') {
 		while (path.len--)
 			hash = *(path.ptr++) + (hash << 6) + (hash << 16) - hash;
diff --git a/src/http_htx.c b/src/http_htx.c
index 2e39191..bbbac4a 100644
--- a/src/http_htx.c
+++ b/src/http_htx.c
@@ -407,12 +407,14 @@
 	struct htx_sl *sl = http_get_stline(htx);
 	struct ist meth, uri, vsn, p;
 	size_t plen = 0;
+	struct http_uri_parser parser;
 
 	if (!sl)
 		return 0;
 
 	uri = htx_sl_req_uri(sl);
-	p = http_get_path(uri);
+	parser = http_uri_parser_init(uri);
+	p = http_parse_path(&parser);
 	if (!isttest(p))
 		p = uri;
 	if (with_qs)
@@ -1791,7 +1793,7 @@
 		vsn = ist2(temp->area + meth.len, HTX_SL_REQ_VLEN(sl));
 
 		/* reconstruct uri without port */
-		path = http_get_path(uri);
+		path = http_parse_path(&parser);
 		chunk_istcat(temp, scheme);
 		chunk_istcat(temp, host);
 		chunk_istcat(temp, path);
diff --git a/src/log.c b/src/log.c
index 41810f5..2ba645a 100644
--- a/src/log.c
+++ b/src/log.c
@@ -1961,6 +1961,7 @@
 	struct timeval tv;
 	struct strm_logs tmp_strm_log;
 	struct ist path;
+	struct http_uri_parser parser;
 
 	/* FIXME: let's limit ourselves to frontend logging for now. */
 
@@ -2790,7 +2791,8 @@
 				path = ist2(uri, spc - uri);
 
 				// extract relative path without query params from url
-				path = iststop(http_get_path(path), '?');
+				parser = http_uri_parser_init(path);
+				path = iststop(http_parse_path(&parser), '?');
 				if (!txn || !txn->uri || nspaces == 0) {
 					chunk.area = "<BADREQ>";
 					chunk.data = strlen("<BADREQ>");
diff --git a/src/mux_fcgi.c b/src/mux_fcgi.c
index 18a9ff5..1be7b96 100644
--- a/src/mux_fcgi.c
+++ b/src/mux_fcgi.c
@@ -1327,7 +1327,8 @@
 #endif
 	if ((params->mask & FCGI_SP_URI_MASK) != FCGI_SP_URI_MASK) {
 		/* one of scriptname, pathinfo or query_string is no set */
-		struct ist path = http_get_path(params->uri);
+		struct http_uri_parser parser = http_uri_parser_init(params->uri);
+		struct ist path = http_parse_path(&parser);
 		int len;
 
 		/* No scrit_name set but no valid path ==> error */