MINOR: http: use http uri parser for path
Replace http_get_path by the http_uri_parser API. The new functions is
renamed http_parse_path. Replace duplicated code for scheme and
authority parsing by invocations to http_parse_scheme/authority.
If no scheme is found for an URI detected as an absolute-uri/authority,
consider it to be an authority format : no path will be found. For an
absolute-uri or absolute-path, use the remaining of the string as the
path. A new http_uri_parser state is declared to mark the path parsing
as done.
diff --git a/src/backend.c b/src/backend.c
index f5fec1d..ceb24a7 100644
--- a/src/backend.c
+++ b/src/backend.c
@@ -719,7 +719,10 @@
uri = htx_sl_req_uri(http_get_stline(htxbuf(&s->req.buf)));
if (s->be->lbprm.arg_opt1 & 2) {
- uri = http_get_path(uri);
+ struct http_uri_parser parser =
+ http_uri_parser_init(uri);
+
+ uri = http_parse_path(&parser);
if (!isttest(uri))
uri = ist("");
}
diff --git a/src/hlua.c b/src/hlua.c
index 0c0895e..2ad3d19 100644
--- a/src/hlua.c
+++ b/src/hlua.c
@@ -4125,6 +4125,7 @@
struct ist path;
unsigned long long len = 0;
int32_t pos;
+ struct http_uri_parser parser;
/* Check stack size. */
if (!lua_checkstack(L, 3))
@@ -4193,7 +4194,8 @@
return 0;
lua_settable(L, -3);
- path = http_get_path(htx_sl_req_uri(sl));
+ parser = http_uri_parser_init(htx_sl_req_uri(sl));
+ path = http_parse_path(&parser);
if (isttest(path)) {
char *p, *q, *end;
diff --git a/src/http.c b/src/http.c
index 8b3d20b..75c899d 100644
--- a/src/http.c
+++ b/src/http.c
@@ -563,50 +563,52 @@
/* Parse the URI from the given transaction (which is assumed to be in request
* phase) and look for the "/" beginning the PATH. If not found, ist2(0,0) is
* returned. Otherwise the pointer and length are returned.
+ *
+ * <parser> must have been initialized via http_uri_parser_init. See the
+ * related http_uri_parser documentation for the specific API usage.
*/
-struct ist http_get_path(const struct ist uri)
+struct ist http_parse_path(struct http_uri_parser *parser)
{
const char *ptr, *end;
- if (!uri.len)
+ if (parser->state >= URI_PARSER_STATE_PATH_DONE)
goto not_found;
- ptr = uri.ptr;
- end = ptr + uri.len;
+ if (parser->format == URI_PARSER_FORMAT_EMPTY ||
+ parser->format == URI_PARSER_FORMAT_ASTERISK) {
+ goto not_found;
+ }
+
+ ptr = istptr(parser->uri);
+ end = istend(parser->uri);
- /* RFC7230, par. 2.7 :
- * Request-URI = "*" | absuri | abspath | authority
+ /* If the uri is in absolute-path format, first skip the scheme and
+ * authority parts. No scheme will be found if the uri is in authority
+ * format, which indicates that the path won't be present.
*/
+ if (parser->format == URI_PARSER_FORMAT_ABSURI_OR_AUTHORITY) {
+ if (parser->state < URI_PARSER_STATE_SCHEME_DONE) {
+ /* If no scheme found, uri is in authority format. No
+ * path is present.
+ */
+ if (!isttest(http_parse_scheme(parser)))
+ goto not_found;
+ }
- if (*ptr == '*')
- goto not_found;
+ if (parser->state < URI_PARSER_STATE_AUTHORITY_DONE)
+ http_parse_authority(parser, 1);
- if (isalpha((unsigned char)*ptr)) {
- /* this is a scheme as described by RFC3986, par. 3.1 */
- ptr++;
- while (ptr < end &&
- (isalnum((unsigned char)*ptr) || *ptr == '+' || *ptr == '-' || *ptr == '.'))
- ptr++;
- /* skip '://' */
- if (ptr == end || *ptr++ != ':')
- goto not_found;
- if (ptr == end || *ptr++ != '/')
- goto not_found;
- if (ptr == end || *ptr++ != '/')
+ ptr = istptr(parser->uri);
+
+ if (ptr == end)
goto not_found;
}
- /* skip [user[:passwd]@]host[:[port]] */
-
- while (ptr < end && *ptr != '/')
- ptr++;
- if (ptr == end)
- goto not_found;
-
- /* OK, we got the '/' ! */
+ parser->state = URI_PARSER_STATE_PATH_DONE;
return ist2(ptr, end - ptr);
not_found:
+ parser->state = URI_PARSER_STATE_PATH_DONE;
return IST_NULL;
}
diff --git a/src/http_act.c b/src/http_act.c
index f16b639..c2fee04 100644
--- a/src/http_act.c
+++ b/src/http_act.c
@@ -216,7 +216,8 @@
switch ((enum act_normalize_uri) rule->action) {
case ACT_NORMALIZE_URI_PATH_MERGE_SLASHES: {
- const struct ist path = http_get_path(uri);
+ struct http_uri_parser parser = http_uri_parser_init(uri);
+ const struct ist path = http_parse_path(&parser);
struct ist newpath = ist2(replace->area, replace->size);
if (!isttest(path))
@@ -233,7 +234,8 @@
break;
}
case ACT_NORMALIZE_URI_PATH_STRIP_DOT: {
- const struct ist path = http_get_path(uri);
+ struct http_uri_parser parser = http_uri_parser_init(uri);
+ const struct ist path = http_parse_path(&parser);
struct ist newpath = ist2(replace->area, replace->size);
if (!isttest(path))
@@ -251,7 +253,8 @@
}
case ACT_NORMALIZE_URI_PATH_STRIP_DOTDOT:
case ACT_NORMALIZE_URI_PATH_STRIP_DOTDOT_FULL: {
- const struct ist path = http_get_path(uri);
+ struct http_uri_parser parser = http_uri_parser_init(uri);
+ const struct ist path = http_parse_path(&parser);
struct ist newpath = ist2(replace->area, replace->size);
if (!isttest(path))
@@ -268,7 +271,8 @@
break;
}
case ACT_NORMALIZE_URI_QUERY_SORT_BY_NAME: {
- const struct ist path = http_get_path(uri);
+ struct http_uri_parser parser = http_uri_parser_init(uri);
+ const struct ist path = http_parse_path(&parser);
struct ist newquery = ist2(replace->area, replace->size);
if (!isttest(path))
@@ -286,7 +290,8 @@
}
case ACT_NORMALIZE_URI_PERCENT_TO_UPPERCASE:
case ACT_NORMALIZE_URI_PERCENT_TO_UPPERCASE_STRICT: {
- const struct ist path = http_get_path(uri);
+ struct http_uri_parser parser = http_uri_parser_init(uri);
+ const struct ist path = http_parse_path(&parser);
struct ist newpath = ist2(replace->area, replace->size);
if (!isttest(path))
@@ -304,7 +309,8 @@
}
case ACT_NORMALIZE_URI_PERCENT_DECODE_UNRESERVED:
case ACT_NORMALIZE_URI_PERCENT_DECODE_UNRESERVED_STRICT: {
- const struct ist path = http_get_path(uri);
+ struct http_uri_parser parser = http_uri_parser_init(uri);
+ const struct ist path = http_parse_path(&parser);
struct ist newpath = ist2(replace->area, replace->size);
if (!isttest(path))
@@ -321,7 +327,8 @@
break;
}
case ACT_NORMALIZE_URI_FRAGMENT_STRIP: {
- const struct ist path = http_get_path(uri);
+ struct http_uri_parser parser = http_uri_parser_init(uri);
+ const struct ist path = http_parse_path(&parser);
struct ist newpath = ist2(replace->area, replace->size);
if (!isttest(path))
@@ -338,7 +345,8 @@
break;
}
case ACT_NORMALIZE_URI_FRAGMENT_ENCODE: {
- const struct ist path = http_get_path(uri);
+ struct http_uri_parser parser = http_uri_parser_init(uri);
+ const struct ist path = http_parse_path(&parser);
struct ist newpath = ist2(replace->area, replace->size);
if (!isttest(path))
@@ -517,10 +525,14 @@
goto fail_alloc;
uri = htx_sl_req_uri(http_get_stline(htxbuf(&s->req.buf)));
- if (rule->action == 1) // replace-path
- uri = iststop(http_get_path(uri), '?');
- else if (rule->action == 4) // replace-pathq
- uri = http_get_path(uri);
+ if (rule->action == 1) { // replace-path
+ struct http_uri_parser parser = http_uri_parser_init(uri);
+ uri = iststop(http_parse_path(&parser), '?');
+ }
+ else if (rule->action == 4) { // replace-pathq
+ struct http_uri_parser parser = http_uri_parser_init(uri);
+ uri = http_parse_path(&parser);
+ }
if (!regex_exec_match2(rule->arg.http.re, uri.ptr, uri.len, MAX_MATCH, pmatch, 0))
goto leave;
diff --git a/src/http_ana.c b/src/http_ana.c
index da436e7..5eca741 100644
--- a/src/http_ana.c
+++ b/src/http_ana.c
@@ -206,9 +206,10 @@
if (unlikely(sess->fe->monitor_uri_len != 0)) {
const struct ist monitor_uri = ist2(sess->fe->monitor_uri,
sess->fe->monitor_uri_len);
+ struct http_uri_parser parser = http_uri_parser_init(htx_sl_req_uri(sl));
if ((istptr(monitor_uri)[0] == '/' &&
- isteq(http_get_path(htx_sl_req_uri(sl)), monitor_uri)) ||
+ isteq(http_parse_path(&parser), monitor_uri)) ||
isteq(htx_sl_req_uri(sl), monitor_uri)) {
/*
* We have found the monitor URI
@@ -622,6 +623,7 @@
if ((s->be->options & PR_O_HTTP_PROXY) && !(s->flags & SF_ADDR_SET)) {
struct htx_sl *sl;
struct ist uri, path;
+ struct http_uri_parser parser = http_uri_parser_init(uri);
if (!sockaddr_alloc(&s->target_addr, NULL, 0)) {
if (!(s->flags & SF_ERR_MASK))
@@ -630,7 +632,7 @@
}
sl = http_get_stline(htx);
uri = htx_sl_req_uri(sl);
- path = http_get_path(uri);
+ path = http_parse_path(&parser);
if (url2sa(uri.ptr, uri.len - path.len, s->target_addr, NULL) == -1)
goto return_bad_req;
@@ -2409,6 +2411,7 @@
case REDIRECT_TYPE_SCHEME: {
struct http_hdr_ctx ctx;
struct ist path, host;
+ struct http_uri_parser parser;
host = ist("");
ctx.blk = NULL;
@@ -2416,7 +2419,8 @@
host = ctx.value;
sl = http_get_stline(htx);
- path = http_get_path(htx_sl_req_uri(sl));
+ parser = http_uri_parser_init(htx_sl_req_uri(sl));
+ path = http_parse_path(&parser);
/* build message using path */
if (isttest(path)) {
if (rule->flags & REDIRECT_FLAG_DROP_QS) {
@@ -2462,9 +2466,11 @@
case REDIRECT_TYPE_PREFIX: {
struct ist path;
+ struct http_uri_parser parser;
sl = http_get_stline(htx);
- path = http_get_path(htx_sl_req_uri(sl));
+ parser = http_uri_parser_init(htx_sl_req_uri(sl));
+ path = http_parse_path(&parser);
/* build message using path */
if (isttest(path)) {
if (rule->flags & REDIRECT_FLAG_DROP_QS) {
@@ -3858,8 +3864,10 @@
htx = htxbuf(&s->req.buf);
sl = http_get_stline(htx);
uri = htx_sl_req_uri(sl);
- if (*uri_auth->uri_prefix == '/')
- uri = http_get_path(uri);
+ if (*uri_auth->uri_prefix == '/') {
+ struct http_uri_parser parser = http_uri_parser_init(uri);
+ uri = http_parse_path(&parser);
+ }
/* check URI size */
if (uri_auth->uri_len > uri.len)
@@ -4173,6 +4181,7 @@
struct htx_sl *sl;
struct ist path, location;
unsigned int flags;
+ struct http_uri_parser parser;
/*
* Create the location
@@ -4190,7 +4199,8 @@
/* 2: add the request Path */
htx = htxbuf(&req->buf);
sl = http_get_stline(htx);
- path = http_get_path(htx_sl_req_uri(sl));
+ parser = http_uri_parser_init(htx_sl_req_uri(sl));
+ path = http_parse_path(&parser);
if (!isttest(path))
return;
diff --git a/src/http_fetch.c b/src/http_fetch.c
index a28cbaf..d3192aa 100644
--- a/src/http_fetch.c
+++ b/src/http_fetch.c
@@ -1039,17 +1039,18 @@
struct htx *htx = smp_prefetch_htx(smp, chn, NULL, 1);
struct htx_sl *sl;
struct ist path;
+ struct http_uri_parser parser;
if (!htx)
return 0;
sl = http_get_stline(htx);
- path = http_get_path(htx_sl_req_uri(sl));
+ parser = http_uri_parser_init(htx_sl_req_uri(sl));
if (kw[4] == 'q' && (kw[0] == 'p' || kw[0] == 'b')) // pathq or baseq
- path = http_get_path(htx_sl_req_uri(sl));
+ path = http_parse_path(&parser);
else
- path = iststop(http_get_path(htx_sl_req_uri(sl)), '?');
+ path = iststop(http_parse_path(&parser), '?');
if (!isttest(path))
return 0;
@@ -1077,6 +1078,7 @@
struct buffer *temp;
struct http_hdr_ctx ctx;
struct ist path;
+ struct http_uri_parser parser;
if (!htx)
return 0;
@@ -1091,7 +1093,8 @@
/* now retrieve the path */
sl = http_get_stline(htx);
- path = http_get_path(htx_sl_req_uri(sl));
+ parser = http_uri_parser_init(htx_sl_req_uri(sl));
+ path = http_parse_path(&parser);
if (isttest(path)) {
size_t len;
@@ -1128,6 +1131,7 @@
struct http_hdr_ctx ctx;
struct ist path;
unsigned int hash = 0;
+ struct http_uri_parser parser;
if (!htx)
return 0;
@@ -1141,7 +1145,8 @@
/* now retrieve the path */
sl = http_get_stline(htx);
- path = http_get_path(htx_sl_req_uri(sl));
+ parser = http_uri_parser_init(htx_sl_req_uri(sl));
+ path = http_parse_path(&parser);
if (isttest(path)) {
size_t len;
@@ -1486,6 +1491,7 @@
struct http_txn *txn;
struct ist path;
const char *ptr;
+ struct http_uri_parser parser;
if (!smp->strm)
return 0;
@@ -1508,7 +1514,8 @@
ptr++;
path.len = ptr - path.ptr;
- path = http_get_path(path);
+ parser = http_uri_parser_init(path);
+ path = http_parse_path(&parser);
if (!isttest(path))
return 0;
@@ -1952,6 +1959,7 @@
struct htx_sl *sl;
struct ist path;
unsigned int hash = 0;
+ struct http_uri_parser parser;
if (!htx)
return 0;
@@ -1965,7 +1973,8 @@
/* now retrieve the path */
sl = http_get_stline(htx);
- path = http_get_path(htx_sl_req_uri(sl));
+ parser = http_uri_parser_init(htx_sl_req_uri(sl));
+ path = http_parse_path(&parser);
if (path.len && *(path.ptr) == '/') {
while (path.len--)
hash = *(path.ptr++) + (hash << 6) + (hash << 16) - hash;
diff --git a/src/http_htx.c b/src/http_htx.c
index 2e39191..bbbac4a 100644
--- a/src/http_htx.c
+++ b/src/http_htx.c
@@ -407,12 +407,14 @@
struct htx_sl *sl = http_get_stline(htx);
struct ist meth, uri, vsn, p;
size_t plen = 0;
+ struct http_uri_parser parser;
if (!sl)
return 0;
uri = htx_sl_req_uri(sl);
- p = http_get_path(uri);
+ parser = http_uri_parser_init(uri);
+ p = http_parse_path(&parser);
if (!isttest(p))
p = uri;
if (with_qs)
@@ -1791,7 +1793,7 @@
vsn = ist2(temp->area + meth.len, HTX_SL_REQ_VLEN(sl));
/* reconstruct uri without port */
- path = http_get_path(uri);
+ path = http_parse_path(&parser);
chunk_istcat(temp, scheme);
chunk_istcat(temp, host);
chunk_istcat(temp, path);
diff --git a/src/log.c b/src/log.c
index 41810f5..2ba645a 100644
--- a/src/log.c
+++ b/src/log.c
@@ -1961,6 +1961,7 @@
struct timeval tv;
struct strm_logs tmp_strm_log;
struct ist path;
+ struct http_uri_parser parser;
/* FIXME: let's limit ourselves to frontend logging for now. */
@@ -2790,7 +2791,8 @@
path = ist2(uri, spc - uri);
// extract relative path without query params from url
- path = iststop(http_get_path(path), '?');
+ parser = http_uri_parser_init(path);
+ path = iststop(http_parse_path(&parser), '?');
if (!txn || !txn->uri || nspaces == 0) {
chunk.area = "<BADREQ>";
chunk.data = strlen("<BADREQ>");
diff --git a/src/mux_fcgi.c b/src/mux_fcgi.c
index 18a9ff5..1be7b96 100644
--- a/src/mux_fcgi.c
+++ b/src/mux_fcgi.c
@@ -1327,7 +1327,8 @@
#endif
if ((params->mask & FCGI_SP_URI_MASK) != FCGI_SP_URI_MASK) {
/* one of scriptname, pathinfo or query_string is no set */
- struct ist path = http_get_path(params->uri);
+ struct http_uri_parser parser = http_uri_parser_init(params->uri);
+ struct ist path = http_parse_path(&parser);
int len;
/* No scrit_name set but no valid path ==> error */