MEDIUM: compression: consider the "q=" attribute in Accept-Encoding
Till now we didn't consider "q=". It's problematic because the first
effect is that compression tokens were not even matched if it was
present.
It is important to parse it correctly because we still want to allow
a user-agent to send "q=0" to explicitly disable a compressor, or to
specify its preferences.
Now, q-values are respected in order of precedence, and when several
q-values are equal, the first occurrence is used.
diff --git a/src/proto_http.c b/src/proto_http.c
index 89e75b7..0bca45c 100644
--- a/src/proto_http.c
+++ b/src/proto_http.c
@@ -2144,6 +2144,38 @@
return 1;
}
+/* Parses a qvalue and returns it multipled by 1000, from 0 to 1000. If the
+ * value is larger than 1000, it is bound to 1000. The parser consumes up to
+ * 1 digit, one dot and 3 digits and stops on the first invalid character.
+ * Unparsable qvalues return 1000 as "q=1.000".
+ */
+int parse_qvalue(const char *qvalue)
+{
+ int q = 1000;
+
+ if (!isdigit(*qvalue))
+ goto out;
+ q = (*qvalue++ - '0') * 1000;
+
+ if (*qvalue++ != '.')
+ goto out;
+
+ if (!isdigit(*qvalue))
+ goto out;
+ q += (*qvalue++ - '0') * 100;
+
+ if (!isdigit(*qvalue))
+ goto out;
+ q += (*qvalue++ - '0') * 10;
+
+ if (!isdigit(*qvalue))
+ goto out;
+ q += (*qvalue++ - '0') * 1;
+ out:
+ if (q > 1000)
+ q = 1000;
+ return q;
+}
/*
* Selects a compression algorithm depending on the client request.
@@ -2175,26 +2207,71 @@
/* search for the algo in the backend in priority or the frontend */
if ((s->be->comp && (comp_algo_back = s->be->comp->algos)) || (s->fe->comp && (comp_algo_back = s->fe->comp->algos))) {
+ int best_q = 0;
+
ctx.idx = 0;
while (http_find_header2("Accept-Encoding", 15, req->p, &txn->hdr_idx, &ctx)) {
- for (comp_algo = comp_algo_back; comp_algo; comp_algo = comp_algo->next) {
- if (word_match(ctx.line + ctx.val, ctx.vlen, comp_algo->name, comp_algo->name_len)) {
- s->comp_algo = comp_algo;
+ const char *qval;
+ int q;
+ int toklen;
+
+ /* try to isolate the token from the optional q-value */
+ toklen = 0;
+ while (toklen < ctx.vlen && http_is_token[(unsigned char)*(ctx.line + ctx.val + toklen)])
+ toklen++;
+
+ qval = ctx.line + ctx.val + toklen;
+ while (1) {
+ while (qval < ctx.line + ctx.val + ctx.vlen && http_is_lws[(unsigned char)*qval])
+ qval++;
+
+ if (qval >= ctx.line + ctx.val + ctx.vlen || *qval != ';') {
+ qval = NULL;
+ break;
+ }
+ qval++;
- /* remove all occurrences of the header when "compression offload" is set */
+ while (qval < ctx.line + ctx.val + ctx.vlen && http_is_lws[(unsigned char)*qval])
+ qval++;
- if ((s->be->comp && s->be->comp->offload) ||
- (s->fe->comp && s->fe->comp->offload)) {
- http_remove_header2(msg, &txn->hdr_idx, &ctx);
- ctx.idx = 0;
- while (http_find_header2("Accept-Encoding", 15, req->p, &txn->hdr_idx, &ctx)) {
- http_remove_header2(msg, &txn->hdr_idx, &ctx);
- }
- }
- return 1;
+ if (qval >= ctx.line + ctx.val + ctx.vlen) {
+ qval = NULL;
+ break;
}
+ if (strncmp(qval, "q=", MIN(ctx.line + ctx.val + ctx.vlen - qval, 2)) == 0)
+ break;
+
+ while (qval < ctx.line + ctx.val + ctx.vlen && *qval != ';')
+ qval++;
+ }
+
+ /* here we have qval pointing to the first "q=" attribute or NULL if not found */
+ q = qval ? parse_qvalue(qval + 2) : 1000;
+
+ if (q <= best_q)
+ continue;
+
+ for (comp_algo = comp_algo_back; comp_algo; comp_algo = comp_algo->next) {
+ if (*(ctx.line + ctx.val) == '*' ||
+ word_match(ctx.line + ctx.val, toklen, comp_algo->name, comp_algo->name_len)) {
+ s->comp_algo = comp_algo;
+ best_q = q;
+ break;
+ }
+ }
+ }
+ }
+
+ /* remove all occurrences of the header when "compression offload" is set */
+ if (s->comp_algo) {
+ if ((s->be->comp && s->be->comp->offload) || (s->fe->comp && s->fe->comp->offload)) {
+ http_remove_header2(msg, &txn->hdr_idx, &ctx);
+ ctx.idx = 0;
+ while (http_find_header2("Accept-Encoding", 15, req->p, &txn->hdr_idx, &ctx)) {
+ http_remove_header2(msg, &txn->hdr_idx, &ctx);
}
}
+ return 1;
}
/* identity is implicit does not require headers */