REORG: http: move the HTTP/1 header block parser to h1.c
Since it still depends on http_msg, it was not renamed yet.
diff --git a/include/proto/h1.h b/include/proto/h1.h
index 3551152..a0fa7a7 100644
--- a/include/proto/h1.h
+++ b/include/proto/h1.h
@@ -28,8 +28,16 @@
#include <common/standard.h>
#include <types/h1.h>
#include <types/proto_http.h>
+#include <proto/hdr_idx.h>
extern const uint8_t h1_char_classes[256];
+const char *http_parse_reqline(struct http_msg *msg,
+ enum h1_state state, const char *ptr, const char *end,
+ unsigned int *ret_ptr, enum h1_state *ret_state);
+const char *http_parse_stsline(struct http_msg *msg,
+ enum h1_state state, const char *ptr, const char *end,
+ unsigned int *ret_ptr, enum h1_state *ret_state);
+void http_msg_analyzer(struct http_msg *msg, struct hdr_idx *idx);
int http_forward_trailers(struct http_msg *msg);
#define H1_FLG_CTL 0x01
diff --git a/src/h1.c b/src/h1.c
index 7a380c2..bca820c 100644
--- a/src/h1.c
+++ b/src/h1.c
@@ -13,6 +13,7 @@
#include <common/config.h>
#include <proto/h1.h>
+#include <proto/hdr_idx.h>
/* It is about twice as fast on recent architectures to lookup a byte in a
* table than to perform a boolean AND or OR between two tests. Refer to
@@ -155,6 +156,645 @@
};
+/*
+ * This function parses a status line between <ptr> and <end>, starting with
+ * parser state <state>. Only states HTTP_MSG_RPVER, HTTP_MSG_RPVER_SP,
+ * HTTP_MSG_RPCODE, HTTP_MSG_RPCODE_SP and HTTP_MSG_RPREASON are handled. Others
+ * will give undefined results.
+ * Note that it is upon the caller's responsibility to ensure that ptr < end,
+ * and that msg->sol points to the beginning of the response.
+ * If a complete line is found (which implies that at least one CR or LF is
+ * found before <end>, the updated <ptr> is returned, otherwise NULL is
+ * returned indicating an incomplete line (which does not mean that parts have
+ * not been updated). In the incomplete case, if <ret_ptr> or <ret_state> are
+ * non-NULL, they are fed with the new <ptr> and <state> values to be passed
+ * upon next call.
+ *
+ * This function was intentionally designed to be called from
+ * http_msg_analyzer() with the lowest overhead. It should integrate perfectly
+ * within its state machine and use the same macros, hence the need for same
+ * labels and variable names. Note that msg->sol is left unchanged.
+ */
+const char *http_parse_stsline(struct http_msg *msg,
+ enum h1_state state, const char *ptr, const char *end,
+ unsigned int *ret_ptr, enum h1_state *ret_state)
+{
+ const char *msg_start = msg->chn->buf->p;
+
+ switch (state) {
+ case HTTP_MSG_RPVER:
+ http_msg_rpver:
+ if (likely(HTTP_IS_VER_TOKEN(*ptr)))
+ EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpver, http_msg_ood, state, HTTP_MSG_RPVER);
+
+ if (likely(HTTP_IS_SPHT(*ptr))) {
+ msg->sl.st.v_l = ptr - msg_start;
+ EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpver_sp, http_msg_ood, state, HTTP_MSG_RPVER_SP);
+ }
+ msg->err_state = HTTP_MSG_RPVER;
+ state = HTTP_MSG_ERROR;
+ break;
+
+ case HTTP_MSG_RPVER_SP:
+ http_msg_rpver_sp:
+ if (likely(!HTTP_IS_LWS(*ptr))) {
+ msg->sl.st.c = ptr - msg_start;
+ goto http_msg_rpcode;
+ }
+ if (likely(HTTP_IS_SPHT(*ptr)))
+ EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpver_sp, http_msg_ood, state, HTTP_MSG_RPVER_SP);
+ /* so it's a CR/LF, this is invalid */
+ msg->err_state = HTTP_MSG_RPVER_SP;
+ state = HTTP_MSG_ERROR;
+ break;
+
+ case HTTP_MSG_RPCODE:
+ http_msg_rpcode:
+ if (likely(!HTTP_IS_LWS(*ptr)))
+ EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpcode, http_msg_ood, state, HTTP_MSG_RPCODE);
+
+ if (likely(HTTP_IS_SPHT(*ptr))) {
+ msg->sl.st.c_l = ptr - msg_start - msg->sl.st.c;
+ EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpcode_sp, http_msg_ood, state, HTTP_MSG_RPCODE_SP);
+ }
+
+ /* so it's a CR/LF, so there is no reason phrase */
+ msg->sl.st.c_l = ptr - msg_start - msg->sl.st.c;
+ http_msg_rsp_reason:
+ /* FIXME: should we support HTTP responses without any reason phrase ? */
+ msg->sl.st.r = ptr - msg_start;
+ msg->sl.st.r_l = 0;
+ goto http_msg_rpline_eol;
+
+ case HTTP_MSG_RPCODE_SP:
+ http_msg_rpcode_sp:
+ if (likely(!HTTP_IS_LWS(*ptr))) {
+ msg->sl.st.r = ptr - msg_start;
+ goto http_msg_rpreason;
+ }
+ if (likely(HTTP_IS_SPHT(*ptr)))
+ EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpcode_sp, http_msg_ood, state, HTTP_MSG_RPCODE_SP);
+ /* so it's a CR/LF, so there is no reason phrase */
+ goto http_msg_rsp_reason;
+
+ case HTTP_MSG_RPREASON:
+ http_msg_rpreason:
+ if (likely(!HTTP_IS_CRLF(*ptr)))
+ EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpreason, http_msg_ood, state, HTTP_MSG_RPREASON);
+ msg->sl.st.r_l = ptr - msg_start - msg->sl.st.r;
+ http_msg_rpline_eol:
+ /* We have seen the end of line. Note that we do not
+ * necessarily have the \n yet, but at least we know that we
+ * have EITHER \r OR \n, otherwise the response would not be
+ * complete. We can then record the response length and return
+ * to the caller which will be able to register it.
+ */
+ msg->sl.st.l = ptr - msg_start - msg->sol;
+ return ptr;
+
+ default:
+#ifdef DEBUG_FULL
+ fprintf(stderr, "FIXME !!!! impossible state at %s:%d = %d\n", __FILE__, __LINE__, state);
+ exit(1);
+#endif
+ ;
+ }
+
+ http_msg_ood:
+ /* out of valid data */
+ if (ret_state)
+ *ret_state = state;
+ if (ret_ptr)
+ *ret_ptr = ptr - msg_start;
+ return NULL;
+}
+
+/*
+ * This function parses a request line between <ptr> and <end>, starting with
+ * parser state <state>. Only states HTTP_MSG_RQMETH, HTTP_MSG_RQMETH_SP,
+ * HTTP_MSG_RQURI, HTTP_MSG_RQURI_SP and HTTP_MSG_RQVER are handled. Others
+ * will give undefined results.
+ * Note that it is upon the caller's responsibility to ensure that ptr < end,
+ * and that msg->sol points to the beginning of the request.
+ * If a complete line is found (which implies that at least one CR or LF is
+ * found before <end>, the updated <ptr> is returned, otherwise NULL is
+ * returned indicating an incomplete line (which does not mean that parts have
+ * not been updated). In the incomplete case, if <ret_ptr> or <ret_state> are
+ * non-NULL, they are fed with the new <ptr> and <state> values to be passed
+ * upon next call.
+ *
+ * This function was intentionally designed to be called from
+ * http_msg_analyzer() with the lowest overhead. It should integrate perfectly
+ * within its state machine and use the same macros, hence the need for same
+ * labels and variable names. Note that msg->sol is left unchanged.
+ */
+const char *http_parse_reqline(struct http_msg *msg,
+ enum h1_state state, const char *ptr, const char *end,
+ unsigned int *ret_ptr, enum h1_state *ret_state)
+{
+ const char *msg_start = msg->chn->buf->p;
+
+ switch (state) {
+ case HTTP_MSG_RQMETH:
+ http_msg_rqmeth:
+ if (likely(HTTP_IS_TOKEN(*ptr)))
+ EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqmeth, http_msg_ood, state, HTTP_MSG_RQMETH);
+
+ if (likely(HTTP_IS_SPHT(*ptr))) {
+ msg->sl.rq.m_l = ptr - msg_start;
+ EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqmeth_sp, http_msg_ood, state, HTTP_MSG_RQMETH_SP);
+ }
+
+ if (likely(HTTP_IS_CRLF(*ptr))) {
+ /* HTTP 0.9 request */
+ msg->sl.rq.m_l = ptr - msg_start;
+ http_msg_req09_uri:
+ msg->sl.rq.u = ptr - msg_start;
+ http_msg_req09_uri_e:
+ msg->sl.rq.u_l = ptr - msg_start - msg->sl.rq.u;
+ http_msg_req09_ver:
+ msg->sl.rq.v = ptr - msg_start;
+ msg->sl.rq.v_l = 0;
+ goto http_msg_rqline_eol;
+ }
+ msg->err_state = HTTP_MSG_RQMETH;
+ state = HTTP_MSG_ERROR;
+ break;
+
+ case HTTP_MSG_RQMETH_SP:
+ http_msg_rqmeth_sp:
+ if (likely(!HTTP_IS_LWS(*ptr))) {
+ msg->sl.rq.u = ptr - msg_start;
+ goto http_msg_rquri;
+ }
+ if (likely(HTTP_IS_SPHT(*ptr)))
+ EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqmeth_sp, http_msg_ood, state, HTTP_MSG_RQMETH_SP);
+ /* so it's a CR/LF, meaning an HTTP 0.9 request */
+ goto http_msg_req09_uri;
+
+ case HTTP_MSG_RQURI:
+ http_msg_rquri:
+#if defined(__x86_64__) || \
+ defined(__i386__) || defined(__i486__) || defined(__i586__) || defined(__i686__) || \
+ defined(__ARM_ARCH_7A__)
+ /* speedup: skip bytes not between 0x21 and 0x7e inclusive */
+ while (ptr <= end - sizeof(int)) {
+ int x = *(int *)ptr - 0x21212121;
+ if (x & 0x80808080)
+ break;
+
+ x -= 0x5e5e5e5e;
+ if (!(x & 0x80808080))
+ break;
+
+ ptr += sizeof(int);
+ }
+#endif
+ if (ptr >= end) {
+ state = HTTP_MSG_RQURI;
+ goto http_msg_ood;
+ }
+ http_msg_rquri2:
+ if (likely((unsigned char)(*ptr - 33) <= 93)) /* 33 to 126 included */
+ EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rquri2, http_msg_ood, state, HTTP_MSG_RQURI);
+
+ if (likely(HTTP_IS_SPHT(*ptr))) {
+ msg->sl.rq.u_l = ptr - msg_start - msg->sl.rq.u;
+ EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rquri_sp, http_msg_ood, state, HTTP_MSG_RQURI_SP);
+ }
+
+ if (likely((unsigned char)*ptr >= 128)) {
+ /* non-ASCII chars are forbidden unless option
+ * accept-invalid-http-request is enabled in the frontend.
+ * In any case, we capture the faulty char.
+ */
+ if (msg->err_pos < -1)
+ goto invalid_char;
+ if (msg->err_pos == -1)
+ msg->err_pos = ptr - msg_start;
+ EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rquri, http_msg_ood, state, HTTP_MSG_RQURI);
+ }
+
+ if (likely(HTTP_IS_CRLF(*ptr))) {
+ /* so it's a CR/LF, meaning an HTTP 0.9 request */
+ goto http_msg_req09_uri_e;
+ }
+
+ /* OK forbidden chars, 0..31 or 127 */
+ invalid_char:
+ msg->err_pos = ptr - msg_start;
+ msg->err_state = HTTP_MSG_RQURI;
+ state = HTTP_MSG_ERROR;
+ break;
+
+ case HTTP_MSG_RQURI_SP:
+ http_msg_rquri_sp:
+ if (likely(!HTTP_IS_LWS(*ptr))) {
+ msg->sl.rq.v = ptr - msg_start;
+ goto http_msg_rqver;
+ }
+ if (likely(HTTP_IS_SPHT(*ptr)))
+ EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rquri_sp, http_msg_ood, state, HTTP_MSG_RQURI_SP);
+ /* so it's a CR/LF, meaning an HTTP 0.9 request */
+ goto http_msg_req09_ver;
+
+ case HTTP_MSG_RQVER:
+ http_msg_rqver:
+ if (likely(HTTP_IS_VER_TOKEN(*ptr)))
+ EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqver, http_msg_ood, state, HTTP_MSG_RQVER);
+
+ if (likely(HTTP_IS_CRLF(*ptr))) {
+ msg->sl.rq.v_l = ptr - msg_start - msg->sl.rq.v;
+ http_msg_rqline_eol:
+ /* We have seen the end of line. Note that we do not
+ * necessarily have the \n yet, but at least we know that we
+ * have EITHER \r OR \n, otherwise the request would not be
+ * complete. We can then record the request length and return
+ * to the caller which will be able to register it.
+ */
+ msg->sl.rq.l = ptr - msg_start - msg->sol;
+ return ptr;
+ }
+
+ /* neither an HTTP_VER token nor a CRLF */
+ msg->err_state = HTTP_MSG_RQVER;
+ state = HTTP_MSG_ERROR;
+ break;
+
+ default:
+#ifdef DEBUG_FULL
+ fprintf(stderr, "FIXME !!!! impossible state at %s:%d = %d\n", __FILE__, __LINE__, state);
+ exit(1);
+#endif
+ ;
+ }
+
+ http_msg_ood:
+ /* out of valid data */
+ if (ret_state)
+ *ret_state = state;
+ if (ret_ptr)
+ *ret_ptr = ptr - msg_start;
+ return NULL;
+}
+
+/*
+ * This function parses an HTTP message, either a request or a response,
+ * depending on the initial msg->msg_state. The caller is responsible for
+ * ensuring that the message does not wrap. The function can be preempted
+ * everywhere when data are missing and recalled at the exact same location
+ * with no information loss. The message may even be realigned between two
+ * calls. The header index is re-initialized when switching from
+ * MSG_R[PQ]BEFORE to MSG_RPVER|MSG_RQMETH. It modifies msg->sol among other
+ * fields. Note that msg->sol will be initialized after completing the first
+ * state, so that none of the msg pointers has to be initialized prior to the
+ * first call.
+ */
+void http_msg_analyzer(struct http_msg *msg, struct hdr_idx *idx)
+{
+ enum h1_state state; /* updated only when leaving the FSM */
+ register char *ptr, *end; /* request pointers, to avoid dereferences */
+ struct buffer *buf;
+
+ state = msg->msg_state;
+ buf = msg->chn->buf;
+ ptr = buf->p + msg->next;
+ end = buf->p + buf->i;
+
+ if (unlikely(ptr >= end))
+ goto http_msg_ood;
+
+ switch (state) {
+ /*
+ * First, states that are specific to the response only.
+ * We check them first so that request and headers are
+ * closer to each other (accessed more often).
+ */
+ case HTTP_MSG_RPBEFORE:
+ http_msg_rpbefore:
+ if (likely(HTTP_IS_TOKEN(*ptr))) {
+ /* we have a start of message, but we have to check
+ * first if we need to remove some CRLF. We can only
+ * do this when o=0.
+ */
+ if (unlikely(ptr != buf->p)) {
+ if (buf->o)
+ goto http_msg_ood;
+ /* Remove empty leading lines, as recommended by RFC2616. */
+ bi_fast_delete(buf, ptr - buf->p);
+ }
+ msg->sol = 0;
+ msg->sl.st.l = 0; /* used in debug mode */
+ hdr_idx_init(idx);
+ state = HTTP_MSG_RPVER;
+ goto http_msg_rpver;
+ }
+
+ if (unlikely(!HTTP_IS_CRLF(*ptr))) {
+ state = HTTP_MSG_RPBEFORE;
+ goto http_msg_invalid;
+ }
+
+ if (unlikely(*ptr == '\n'))
+ EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpbefore, http_msg_ood, state, HTTP_MSG_RPBEFORE);
+ EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpbefore_cr, http_msg_ood, state, HTTP_MSG_RPBEFORE_CR);
+ /* stop here */
+
+ case HTTP_MSG_RPBEFORE_CR:
+ http_msg_rpbefore_cr:
+ EXPECT_LF_HERE(ptr, http_msg_invalid, state, HTTP_MSG_RPBEFORE_CR);
+ EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpbefore, http_msg_ood, state, HTTP_MSG_RPBEFORE);
+ /* stop here */
+
+ case HTTP_MSG_RPVER:
+ http_msg_rpver:
+ case HTTP_MSG_RPVER_SP:
+ case HTTP_MSG_RPCODE:
+ case HTTP_MSG_RPCODE_SP:
+ case HTTP_MSG_RPREASON:
+ ptr = (char *)http_parse_stsline(msg,
+ state, ptr, end,
+ &msg->next, &msg->msg_state);
+ if (unlikely(!ptr))
+ return;
+
+ /* we have a full response and we know that we have either a CR
+ * or an LF at <ptr>.
+ */
+ hdr_idx_set_start(idx, msg->sl.st.l, *ptr == '\r');
+
+ msg->sol = ptr - buf->p;
+ if (likely(*ptr == '\r'))
+ EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpline_end, http_msg_ood, state, HTTP_MSG_RPLINE_END);
+ goto http_msg_rpline_end;
+
+ case HTTP_MSG_RPLINE_END:
+ http_msg_rpline_end:
+ /* msg->sol must point to the first of CR or LF. */
+ EXPECT_LF_HERE(ptr, http_msg_invalid, state, HTTP_MSG_RPLINE_END);
+ EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_first, http_msg_ood, state, HTTP_MSG_HDR_FIRST);
+ /* stop here */
+
+ /*
+ * Second, states that are specific to the request only
+ */
+ case HTTP_MSG_RQBEFORE:
+ http_msg_rqbefore:
+ if (likely(HTTP_IS_TOKEN(*ptr))) {
+ /* we have a start of message, but we have to check
+ * first if we need to remove some CRLF. We can only
+ * do this when o=0.
+ */
+ if (likely(ptr != buf->p)) {
+ if (buf->o)
+ goto http_msg_ood;
+ /* Remove empty leading lines, as recommended by RFC2616. */
+ bi_fast_delete(buf, ptr - buf->p);
+ }
+ msg->sol = 0;
+ msg->sl.rq.l = 0; /* used in debug mode */
+ state = HTTP_MSG_RQMETH;
+ goto http_msg_rqmeth;
+ }
+
+ if (unlikely(!HTTP_IS_CRLF(*ptr))) {
+ state = HTTP_MSG_RQBEFORE;
+ goto http_msg_invalid;
+ }
+
+ if (unlikely(*ptr == '\n'))
+ EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqbefore, http_msg_ood, state, HTTP_MSG_RQBEFORE);
+ EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqbefore_cr, http_msg_ood, state, HTTP_MSG_RQBEFORE_CR);
+ /* stop here */
+
+ case HTTP_MSG_RQBEFORE_CR:
+ http_msg_rqbefore_cr:
+ EXPECT_LF_HERE(ptr, http_msg_invalid, state, HTTP_MSG_RQBEFORE_CR);
+ EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqbefore, http_msg_ood, state, HTTP_MSG_RQBEFORE);
+ /* stop here */
+
+ case HTTP_MSG_RQMETH:
+ http_msg_rqmeth:
+ case HTTP_MSG_RQMETH_SP:
+ case HTTP_MSG_RQURI:
+ case HTTP_MSG_RQURI_SP:
+ case HTTP_MSG_RQVER:
+ ptr = (char *)http_parse_reqline(msg,
+ state, ptr, end,
+ &msg->next, &msg->msg_state);
+ if (unlikely(!ptr))
+ return;
+
+ /* we have a full request and we know that we have either a CR
+ * or an LF at <ptr>.
+ */
+ hdr_idx_set_start(idx, msg->sl.rq.l, *ptr == '\r');
+
+ msg->sol = ptr - buf->p;
+ if (likely(*ptr == '\r'))
+ EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqline_end, http_msg_ood, state, HTTP_MSG_RQLINE_END);
+ goto http_msg_rqline_end;
+
+ case HTTP_MSG_RQLINE_END:
+ http_msg_rqline_end:
+ /* check for HTTP/0.9 request : no version information available.
+ * msg->sol must point to the first of CR or LF.
+ */
+ if (unlikely(msg->sl.rq.v_l == 0))
+ goto http_msg_last_lf;
+
+ EXPECT_LF_HERE(ptr, http_msg_invalid, state, HTTP_MSG_RQLINE_END);
+ EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_first, http_msg_ood, state, HTTP_MSG_HDR_FIRST);
+ /* stop here */
+
+ /*
+ * Common states below
+ */
+ case HTTP_MSG_HDR_FIRST:
+ http_msg_hdr_first:
+ msg->sol = ptr - buf->p;
+ if (likely(!HTTP_IS_CRLF(*ptr))) {
+ goto http_msg_hdr_name;
+ }
+
+ if (likely(*ptr == '\r'))
+ EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_last_lf, http_msg_ood, state, HTTP_MSG_LAST_LF);
+ goto http_msg_last_lf;
+
+ case HTTP_MSG_HDR_NAME:
+ http_msg_hdr_name:
+ /* assumes msg->sol points to the first char */
+ if (likely(HTTP_IS_TOKEN(*ptr)))
+ EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_name, http_msg_ood, state, HTTP_MSG_HDR_NAME);
+
+ if (likely(*ptr == ':'))
+ EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_l1_sp, http_msg_ood, state, HTTP_MSG_HDR_L1_SP);
+
+ if (likely(msg->err_pos < -1) || *ptr == '\n') {
+ state = HTTP_MSG_HDR_NAME;
+ goto http_msg_invalid;
+ }
+
+ if (msg->err_pos == -1) /* capture error pointer */
+ msg->err_pos = ptr - buf->p; /* >= 0 now */
+
+ /* and we still accept this non-token character */
+ EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_name, http_msg_ood, state, HTTP_MSG_HDR_NAME);
+
+ case HTTP_MSG_HDR_L1_SP:
+ http_msg_hdr_l1_sp:
+ /* assumes msg->sol points to the first char */
+ if (likely(HTTP_IS_SPHT(*ptr)))
+ EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_l1_sp, http_msg_ood, state, HTTP_MSG_HDR_L1_SP);
+
+ /* header value can be basically anything except CR/LF */
+ msg->sov = ptr - buf->p;
+
+ if (likely(!HTTP_IS_CRLF(*ptr))) {
+ goto http_msg_hdr_val;
+ }
+
+ if (likely(*ptr == '\r'))
+ EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_l1_lf, http_msg_ood, state, HTTP_MSG_HDR_L1_LF);
+ goto http_msg_hdr_l1_lf;
+
+ case HTTP_MSG_HDR_L1_LF:
+ http_msg_hdr_l1_lf:
+ EXPECT_LF_HERE(ptr, http_msg_invalid, state, HTTP_MSG_HDR_L1_LF);
+ EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_l1_lws, http_msg_ood, state, HTTP_MSG_HDR_L1_LWS);
+
+ case HTTP_MSG_HDR_L1_LWS:
+ http_msg_hdr_l1_lws:
+ if (likely(HTTP_IS_SPHT(*ptr))) {
+ /* replace HT,CR,LF with spaces */
+ for (; buf->p + msg->sov < ptr; msg->sov++)
+ buf->p[msg->sov] = ' ';
+ goto http_msg_hdr_l1_sp;
+ }
+ /* we had a header consisting only in spaces ! */
+ msg->eol = msg->sov;
+ goto http_msg_complete_header;
+
+ case HTTP_MSG_HDR_VAL:
+ http_msg_hdr_val:
+ /* assumes msg->sol points to the first char, and msg->sov
+ * points to the first character of the value.
+ */
+
+ /* speedup: we'll skip packs of 4 or 8 bytes not containing bytes 0x0D
+ * and lower. In fact since most of the time is spent in the loop, we
+ * also remove the sign bit test so that bytes 0x8e..0x0d break the
+ * loop, but we don't care since they're very rare in header values.
+ */
+#if defined(__x86_64__)
+ while (ptr <= end - sizeof(long)) {
+ if ((*(long *)ptr - 0x0e0e0e0e0e0e0e0eULL) & 0x8080808080808080ULL)
+ goto http_msg_hdr_val2;
+ ptr += sizeof(long);
+ }
+#endif
+#if defined(__x86_64__) || \
+ defined(__i386__) || defined(__i486__) || defined(__i586__) || defined(__i686__) || \
+ defined(__ARM_ARCH_7A__)
+ while (ptr <= end - sizeof(int)) {
+ if ((*(int*)ptr - 0x0e0e0e0e) & 0x80808080)
+ goto http_msg_hdr_val2;
+ ptr += sizeof(int);
+ }
+#endif
+ if (ptr >= end) {
+ state = HTTP_MSG_HDR_VAL;
+ goto http_msg_ood;
+ }
+ http_msg_hdr_val2:
+ if (likely(!HTTP_IS_CRLF(*ptr)))
+ EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_val2, http_msg_ood, state, HTTP_MSG_HDR_VAL);
+
+ msg->eol = ptr - buf->p;
+ /* Note: we could also copy eol into ->eoh so that we have the
+ * real header end in case it ends with lots of LWS, but is this
+ * really needed ?
+ */
+ if (likely(*ptr == '\r'))
+ EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_l2_lf, http_msg_ood, state, HTTP_MSG_HDR_L2_LF);
+ goto http_msg_hdr_l2_lf;
+
+ case HTTP_MSG_HDR_L2_LF:
+ http_msg_hdr_l2_lf:
+ EXPECT_LF_HERE(ptr, http_msg_invalid, state, HTTP_MSG_HDR_L2_LF);
+ EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_l2_lws, http_msg_ood, state, HTTP_MSG_HDR_L2_LWS);
+
+ case HTTP_MSG_HDR_L2_LWS:
+ http_msg_hdr_l2_lws:
+ if (unlikely(HTTP_IS_SPHT(*ptr))) {
+ /* LWS: replace HT,CR,LF with spaces */
+ for (; buf->p + msg->eol < ptr; msg->eol++)
+ buf->p[msg->eol] = ' ';
+ goto http_msg_hdr_val;
+ }
+ http_msg_complete_header:
+ /*
+ * It was a new header, so the last one is finished.
+ * Assumes msg->sol points to the first char, msg->sov points
+ * to the first character of the value and msg->eol to the
+ * first CR or LF so we know how the line ends. We insert last
+ * header into the index.
+ */
+ if (unlikely(hdr_idx_add(msg->eol - msg->sol, buf->p[msg->eol] == '\r',
+ idx, idx->tail) < 0)) {
+ state = HTTP_MSG_HDR_L2_LWS;
+ goto http_msg_invalid;
+ }
+
+ msg->sol = ptr - buf->p;
+ if (likely(!HTTP_IS_CRLF(*ptr))) {
+ goto http_msg_hdr_name;
+ }
+
+ if (likely(*ptr == '\r'))
+ EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_last_lf, http_msg_ood, state, HTTP_MSG_LAST_LF);
+ goto http_msg_last_lf;
+
+ case HTTP_MSG_LAST_LF:
+ http_msg_last_lf:
+ /* Assumes msg->sol points to the first of either CR or LF.
+ * Sets ->sov and ->next to the total header length, ->eoh to
+ * the last CRLF, and ->eol to the last CRLF length (1 or 2).
+ */
+ EXPECT_LF_HERE(ptr, http_msg_invalid, state, HTTP_MSG_LAST_LF);
+ ptr++;
+ msg->sov = msg->next = ptr - buf->p;
+ msg->eoh = msg->sol;
+ msg->sol = 0;
+ msg->eol = msg->sov - msg->eoh;
+ msg->msg_state = HTTP_MSG_BODY;
+ return;
+
+ case HTTP_MSG_ERROR:
+ /* this may only happen if we call http_msg_analyser() twice with an error */
+ break;
+
+ default:
+#ifdef DEBUG_FULL
+ fprintf(stderr, "FIXME !!!! impossible state at %s:%d = %d\n", __FILE__, __LINE__, state);
+ exit(1);
+#endif
+ ;
+ }
+ http_msg_ood:
+ /* out of data */
+ msg->msg_state = state;
+ msg->next = ptr - buf->p;
+ return;
+
+ http_msg_invalid:
+ /* invalid message */
+ msg->err_state = state;
+ msg->msg_state = HTTP_MSG_ERROR;
+ msg->next = ptr - buf->p;
+ return;
+}
+
/* This function skips trailers in the buffer associated with HTTP message
* <msg>. The first visited position is msg->next. If the end of the trailers is
* found, the function returns >0. So, the caller can automatically schedul it
diff --git a/src/proto_http.c b/src/proto_http.c
index 92e6083..c0532d7 100644
--- a/src/proto_http.c
+++ b/src/proto_http.c
@@ -1234,288 +1234,6 @@
}
/*
- * This function parses a status line between <ptr> and <end>, starting with
- * parser state <state>. Only states HTTP_MSG_RPVER, HTTP_MSG_RPVER_SP,
- * HTTP_MSG_RPCODE, HTTP_MSG_RPCODE_SP and HTTP_MSG_RPREASON are handled. Others
- * will give undefined results.
- * Note that it is upon the caller's responsibility to ensure that ptr < end,
- * and that msg->sol points to the beginning of the response.
- * If a complete line is found (which implies that at least one CR or LF is
- * found before <end>, the updated <ptr> is returned, otherwise NULL is
- * returned indicating an incomplete line (which does not mean that parts have
- * not been updated). In the incomplete case, if <ret_ptr> or <ret_state> are
- * non-NULL, they are fed with the new <ptr> and <state> values to be passed
- * upon next call.
- *
- * This function was intentionally designed to be called from
- * http_msg_analyzer() with the lowest overhead. It should integrate perfectly
- * within its state machine and use the same macros, hence the need for same
- * labels and variable names. Note that msg->sol is left unchanged.
- */
-const char *http_parse_stsline(struct http_msg *msg,
- enum h1_state state, const char *ptr, const char *end,
- unsigned int *ret_ptr, enum h1_state *ret_state)
-{
- const char *msg_start = msg->chn->buf->p;
-
- switch (state) {
- case HTTP_MSG_RPVER:
- http_msg_rpver:
- if (likely(HTTP_IS_VER_TOKEN(*ptr)))
- EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpver, http_msg_ood, state, HTTP_MSG_RPVER);
-
- if (likely(HTTP_IS_SPHT(*ptr))) {
- msg->sl.st.v_l = ptr - msg_start;
- EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpver_sp, http_msg_ood, state, HTTP_MSG_RPVER_SP);
- }
- msg->err_state = HTTP_MSG_RPVER;
- state = HTTP_MSG_ERROR;
- break;
-
- case HTTP_MSG_RPVER_SP:
- http_msg_rpver_sp:
- if (likely(!HTTP_IS_LWS(*ptr))) {
- msg->sl.st.c = ptr - msg_start;
- goto http_msg_rpcode;
- }
- if (likely(HTTP_IS_SPHT(*ptr)))
- EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpver_sp, http_msg_ood, state, HTTP_MSG_RPVER_SP);
- /* so it's a CR/LF, this is invalid */
- msg->err_state = HTTP_MSG_RPVER_SP;
- state = HTTP_MSG_ERROR;
- break;
-
- case HTTP_MSG_RPCODE:
- http_msg_rpcode:
- if (likely(!HTTP_IS_LWS(*ptr)))
- EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpcode, http_msg_ood, state, HTTP_MSG_RPCODE);
-
- if (likely(HTTP_IS_SPHT(*ptr))) {
- msg->sl.st.c_l = ptr - msg_start - msg->sl.st.c;
- EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpcode_sp, http_msg_ood, state, HTTP_MSG_RPCODE_SP);
- }
-
- /* so it's a CR/LF, so there is no reason phrase */
- msg->sl.st.c_l = ptr - msg_start - msg->sl.st.c;
- http_msg_rsp_reason:
- /* FIXME: should we support HTTP responses without any reason phrase ? */
- msg->sl.st.r = ptr - msg_start;
- msg->sl.st.r_l = 0;
- goto http_msg_rpline_eol;
-
- case HTTP_MSG_RPCODE_SP:
- http_msg_rpcode_sp:
- if (likely(!HTTP_IS_LWS(*ptr))) {
- msg->sl.st.r = ptr - msg_start;
- goto http_msg_rpreason;
- }
- if (likely(HTTP_IS_SPHT(*ptr)))
- EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpcode_sp, http_msg_ood, state, HTTP_MSG_RPCODE_SP);
- /* so it's a CR/LF, so there is no reason phrase */
- goto http_msg_rsp_reason;
-
- case HTTP_MSG_RPREASON:
- http_msg_rpreason:
- if (likely(!HTTP_IS_CRLF(*ptr)))
- EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpreason, http_msg_ood, state, HTTP_MSG_RPREASON);
- msg->sl.st.r_l = ptr - msg_start - msg->sl.st.r;
- http_msg_rpline_eol:
- /* We have seen the end of line. Note that we do not
- * necessarily have the \n yet, but at least we know that we
- * have EITHER \r OR \n, otherwise the response would not be
- * complete. We can then record the response length and return
- * to the caller which will be able to register it.
- */
- msg->sl.st.l = ptr - msg_start - msg->sol;
- return ptr;
-
- default:
-#ifdef DEBUG_FULL
- fprintf(stderr, "FIXME !!!! impossible state at %s:%d = %d\n", __FILE__, __LINE__, state);
- exit(1);
-#endif
- ;
- }
-
- http_msg_ood:
- /* out of valid data */
- if (ret_state)
- *ret_state = state;
- if (ret_ptr)
- *ret_ptr = ptr - msg_start;
- return NULL;
-}
-
-/*
- * This function parses a request line between <ptr> and <end>, starting with
- * parser state <state>. Only states HTTP_MSG_RQMETH, HTTP_MSG_RQMETH_SP,
- * HTTP_MSG_RQURI, HTTP_MSG_RQURI_SP and HTTP_MSG_RQVER are handled. Others
- * will give undefined results.
- * Note that it is upon the caller's responsibility to ensure that ptr < end,
- * and that msg->sol points to the beginning of the request.
- * If a complete line is found (which implies that at least one CR or LF is
- * found before <end>, the updated <ptr> is returned, otherwise NULL is
- * returned indicating an incomplete line (which does not mean that parts have
- * not been updated). In the incomplete case, if <ret_ptr> or <ret_state> are
- * non-NULL, they are fed with the new <ptr> and <state> values to be passed
- * upon next call.
- *
- * This function was intentionally designed to be called from
- * http_msg_analyzer() with the lowest overhead. It should integrate perfectly
- * within its state machine and use the same macros, hence the need for same
- * labels and variable names. Note that msg->sol is left unchanged.
- */
-const char *http_parse_reqline(struct http_msg *msg,
- enum h1_state state, const char *ptr, const char *end,
- unsigned int *ret_ptr, enum h1_state *ret_state)
-{
- const char *msg_start = msg->chn->buf->p;
-
- switch (state) {
- case HTTP_MSG_RQMETH:
- http_msg_rqmeth:
- if (likely(HTTP_IS_TOKEN(*ptr)))
- EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqmeth, http_msg_ood, state, HTTP_MSG_RQMETH);
-
- if (likely(HTTP_IS_SPHT(*ptr))) {
- msg->sl.rq.m_l = ptr - msg_start;
- EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqmeth_sp, http_msg_ood, state, HTTP_MSG_RQMETH_SP);
- }
-
- if (likely(HTTP_IS_CRLF(*ptr))) {
- /* HTTP 0.9 request */
- msg->sl.rq.m_l = ptr - msg_start;
- http_msg_req09_uri:
- msg->sl.rq.u = ptr - msg_start;
- http_msg_req09_uri_e:
- msg->sl.rq.u_l = ptr - msg_start - msg->sl.rq.u;
- http_msg_req09_ver:
- msg->sl.rq.v = ptr - msg_start;
- msg->sl.rq.v_l = 0;
- goto http_msg_rqline_eol;
- }
- msg->err_state = HTTP_MSG_RQMETH;
- state = HTTP_MSG_ERROR;
- break;
-
- case HTTP_MSG_RQMETH_SP:
- http_msg_rqmeth_sp:
- if (likely(!HTTP_IS_LWS(*ptr))) {
- msg->sl.rq.u = ptr - msg_start;
- goto http_msg_rquri;
- }
- if (likely(HTTP_IS_SPHT(*ptr)))
- EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqmeth_sp, http_msg_ood, state, HTTP_MSG_RQMETH_SP);
- /* so it's a CR/LF, meaning an HTTP 0.9 request */
- goto http_msg_req09_uri;
-
- case HTTP_MSG_RQURI:
- http_msg_rquri:
-#if defined(__x86_64__) || \
- defined(__i386__) || defined(__i486__) || defined(__i586__) || defined(__i686__) || \
- defined(__ARM_ARCH_7A__)
- /* speedup: skip bytes not between 0x21 and 0x7e inclusive */
- while (ptr <= end - sizeof(int)) {
- int x = *(int *)ptr - 0x21212121;
- if (x & 0x80808080)
- break;
-
- x -= 0x5e5e5e5e;
- if (!(x & 0x80808080))
- break;
-
- ptr += sizeof(int);
- }
-#endif
- if (ptr >= end) {
- state = HTTP_MSG_RQURI;
- goto http_msg_ood;
- }
- http_msg_rquri2:
- if (likely((unsigned char)(*ptr - 33) <= 93)) /* 33 to 126 included */
- EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rquri2, http_msg_ood, state, HTTP_MSG_RQURI);
-
- if (likely(HTTP_IS_SPHT(*ptr))) {
- msg->sl.rq.u_l = ptr - msg_start - msg->sl.rq.u;
- EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rquri_sp, http_msg_ood, state, HTTP_MSG_RQURI_SP);
- }
-
- if (likely((unsigned char)*ptr >= 128)) {
- /* non-ASCII chars are forbidden unless option
- * accept-invalid-http-request is enabled in the frontend.
- * In any case, we capture the faulty char.
- */
- if (msg->err_pos < -1)
- goto invalid_char;
- if (msg->err_pos == -1)
- msg->err_pos = ptr - msg_start;
- EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rquri, http_msg_ood, state, HTTP_MSG_RQURI);
- }
-
- if (likely(HTTP_IS_CRLF(*ptr))) {
- /* so it's a CR/LF, meaning an HTTP 0.9 request */
- goto http_msg_req09_uri_e;
- }
-
- /* OK forbidden chars, 0..31 or 127 */
- invalid_char:
- msg->err_pos = ptr - msg_start;
- msg->err_state = HTTP_MSG_RQURI;
- state = HTTP_MSG_ERROR;
- break;
-
- case HTTP_MSG_RQURI_SP:
- http_msg_rquri_sp:
- if (likely(!HTTP_IS_LWS(*ptr))) {
- msg->sl.rq.v = ptr - msg_start;
- goto http_msg_rqver;
- }
- if (likely(HTTP_IS_SPHT(*ptr)))
- EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rquri_sp, http_msg_ood, state, HTTP_MSG_RQURI_SP);
- /* so it's a CR/LF, meaning an HTTP 0.9 request */
- goto http_msg_req09_ver;
-
- case HTTP_MSG_RQVER:
- http_msg_rqver:
- if (likely(HTTP_IS_VER_TOKEN(*ptr)))
- EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqver, http_msg_ood, state, HTTP_MSG_RQVER);
-
- if (likely(HTTP_IS_CRLF(*ptr))) {
- msg->sl.rq.v_l = ptr - msg_start - msg->sl.rq.v;
- http_msg_rqline_eol:
- /* We have seen the end of line. Note that we do not
- * necessarily have the \n yet, but at least we know that we
- * have EITHER \r OR \n, otherwise the request would not be
- * complete. We can then record the request length and return
- * to the caller which will be able to register it.
- */
- msg->sl.rq.l = ptr - msg_start - msg->sol;
- return ptr;
- }
-
- /* neither an HTTP_VER token nor a CRLF */
- msg->err_state = HTTP_MSG_RQVER;
- state = HTTP_MSG_ERROR;
- break;
-
- default:
-#ifdef DEBUG_FULL
- fprintf(stderr, "FIXME !!!! impossible state at %s:%d = %d\n", __FILE__, __LINE__, state);
- exit(1);
-#endif
- ;
- }
-
- http_msg_ood:
- /* out of valid data */
- if (ret_state)
- *ret_state = state;
- if (ret_ptr)
- *ret_ptr = ptr - msg_start;
- return NULL;
-}
-
-/*
* Returns the data from Authorization header. Function may be called more
* than once so data is stored in txn->auth_data. When no header is found
* or auth method is unknown auth_method is set to HTTP_AUTH_WRONG to avoid
@@ -1596,363 +1314,6 @@
return 0;
}
-
-/*
- * This function parses an HTTP message, either a request or a response,
- * depending on the initial msg->msg_state. The caller is responsible for
- * ensuring that the message does not wrap. The function can be preempted
- * everywhere when data are missing and recalled at the exact same location
- * with no information loss. The message may even be realigned between two
- * calls. The header index is re-initialized when switching from
- * MSG_R[PQ]BEFORE to MSG_RPVER|MSG_RQMETH. It modifies msg->sol among other
- * fields. Note that msg->sol will be initialized after completing the first
- * state, so that none of the msg pointers has to be initialized prior to the
- * first call.
- */
-void http_msg_analyzer(struct http_msg *msg, struct hdr_idx *idx)
-{
- enum h1_state state; /* updated only when leaving the FSM */
- register char *ptr, *end; /* request pointers, to avoid dereferences */
- struct buffer *buf;
-
- state = msg->msg_state;
- buf = msg->chn->buf;
- ptr = buf->p + msg->next;
- end = buf->p + buf->i;
-
- if (unlikely(ptr >= end))
- goto http_msg_ood;
-
- switch (state) {
- /*
- * First, states that are specific to the response only.
- * We check them first so that request and headers are
- * closer to each other (accessed more often).
- */
- case HTTP_MSG_RPBEFORE:
- http_msg_rpbefore:
- if (likely(HTTP_IS_TOKEN(*ptr))) {
- /* we have a start of message, but we have to check
- * first if we need to remove some CRLF. We can only
- * do this when o=0.
- */
- if (unlikely(ptr != buf->p)) {
- if (buf->o)
- goto http_msg_ood;
- /* Remove empty leading lines, as recommended by RFC2616. */
- bi_fast_delete(buf, ptr - buf->p);
- }
- msg->sol = 0;
- msg->sl.st.l = 0; /* used in debug mode */
- hdr_idx_init(idx);
- state = HTTP_MSG_RPVER;
- goto http_msg_rpver;
- }
-
- if (unlikely(!HTTP_IS_CRLF(*ptr))) {
- state = HTTP_MSG_RPBEFORE;
- goto http_msg_invalid;
- }
-
- if (unlikely(*ptr == '\n'))
- EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpbefore, http_msg_ood, state, HTTP_MSG_RPBEFORE);
- EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpbefore_cr, http_msg_ood, state, HTTP_MSG_RPBEFORE_CR);
- /* stop here */
-
- case HTTP_MSG_RPBEFORE_CR:
- http_msg_rpbefore_cr:
- EXPECT_LF_HERE(ptr, http_msg_invalid, state, HTTP_MSG_RPBEFORE_CR);
- EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpbefore, http_msg_ood, state, HTTP_MSG_RPBEFORE);
- /* stop here */
-
- case HTTP_MSG_RPVER:
- http_msg_rpver:
- case HTTP_MSG_RPVER_SP:
- case HTTP_MSG_RPCODE:
- case HTTP_MSG_RPCODE_SP:
- case HTTP_MSG_RPREASON:
- ptr = (char *)http_parse_stsline(msg,
- state, ptr, end,
- &msg->next, &msg->msg_state);
- if (unlikely(!ptr))
- return;
-
- /* we have a full response and we know that we have either a CR
- * or an LF at <ptr>.
- */
- hdr_idx_set_start(idx, msg->sl.st.l, *ptr == '\r');
-
- msg->sol = ptr - buf->p;
- if (likely(*ptr == '\r'))
- EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpline_end, http_msg_ood, state, HTTP_MSG_RPLINE_END);
- goto http_msg_rpline_end;
-
- case HTTP_MSG_RPLINE_END:
- http_msg_rpline_end:
- /* msg->sol must point to the first of CR or LF. */
- EXPECT_LF_HERE(ptr, http_msg_invalid, state, HTTP_MSG_RPLINE_END);
- EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_first, http_msg_ood, state, HTTP_MSG_HDR_FIRST);
- /* stop here */
-
- /*
- * Second, states that are specific to the request only
- */
- case HTTP_MSG_RQBEFORE:
- http_msg_rqbefore:
- if (likely(HTTP_IS_TOKEN(*ptr))) {
- /* we have a start of message, but we have to check
- * first if we need to remove some CRLF. We can only
- * do this when o=0.
- */
- if (likely(ptr != buf->p)) {
- if (buf->o)
- goto http_msg_ood;
- /* Remove empty leading lines, as recommended by RFC2616. */
- bi_fast_delete(buf, ptr - buf->p);
- }
- msg->sol = 0;
- msg->sl.rq.l = 0; /* used in debug mode */
- state = HTTP_MSG_RQMETH;
- goto http_msg_rqmeth;
- }
-
- if (unlikely(!HTTP_IS_CRLF(*ptr))) {
- state = HTTP_MSG_RQBEFORE;
- goto http_msg_invalid;
- }
-
- if (unlikely(*ptr == '\n'))
- EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqbefore, http_msg_ood, state, HTTP_MSG_RQBEFORE);
- EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqbefore_cr, http_msg_ood, state, HTTP_MSG_RQBEFORE_CR);
- /* stop here */
-
- case HTTP_MSG_RQBEFORE_CR:
- http_msg_rqbefore_cr:
- EXPECT_LF_HERE(ptr, http_msg_invalid, state, HTTP_MSG_RQBEFORE_CR);
- EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqbefore, http_msg_ood, state, HTTP_MSG_RQBEFORE);
- /* stop here */
-
- case HTTP_MSG_RQMETH:
- http_msg_rqmeth:
- case HTTP_MSG_RQMETH_SP:
- case HTTP_MSG_RQURI:
- case HTTP_MSG_RQURI_SP:
- case HTTP_MSG_RQVER:
- ptr = (char *)http_parse_reqline(msg,
- state, ptr, end,
- &msg->next, &msg->msg_state);
- if (unlikely(!ptr))
- return;
-
- /* we have a full request and we know that we have either a CR
- * or an LF at <ptr>.
- */
- hdr_idx_set_start(idx, msg->sl.rq.l, *ptr == '\r');
-
- msg->sol = ptr - buf->p;
- if (likely(*ptr == '\r'))
- EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqline_end, http_msg_ood, state, HTTP_MSG_RQLINE_END);
- goto http_msg_rqline_end;
-
- case HTTP_MSG_RQLINE_END:
- http_msg_rqline_end:
- /* check for HTTP/0.9 request : no version information available.
- * msg->sol must point to the first of CR or LF.
- */
- if (unlikely(msg->sl.rq.v_l == 0))
- goto http_msg_last_lf;
-
- EXPECT_LF_HERE(ptr, http_msg_invalid, state, HTTP_MSG_RQLINE_END);
- EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_first, http_msg_ood, state, HTTP_MSG_HDR_FIRST);
- /* stop here */
-
- /*
- * Common states below
- */
- case HTTP_MSG_HDR_FIRST:
- http_msg_hdr_first:
- msg->sol = ptr - buf->p;
- if (likely(!HTTP_IS_CRLF(*ptr))) {
- goto http_msg_hdr_name;
- }
-
- if (likely(*ptr == '\r'))
- EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_last_lf, http_msg_ood, state, HTTP_MSG_LAST_LF);
- goto http_msg_last_lf;
-
- case HTTP_MSG_HDR_NAME:
- http_msg_hdr_name:
- /* assumes msg->sol points to the first char */
- if (likely(HTTP_IS_TOKEN(*ptr)))
- EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_name, http_msg_ood, state, HTTP_MSG_HDR_NAME);
-
- if (likely(*ptr == ':'))
- EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_l1_sp, http_msg_ood, state, HTTP_MSG_HDR_L1_SP);
-
- if (likely(msg->err_pos < -1) || *ptr == '\n') {
- state = HTTP_MSG_HDR_NAME;
- goto http_msg_invalid;
- }
-
- if (msg->err_pos == -1) /* capture error pointer */
- msg->err_pos = ptr - buf->p; /* >= 0 now */
-
- /* and we still accept this non-token character */
- EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_name, http_msg_ood, state, HTTP_MSG_HDR_NAME);
-
- case HTTP_MSG_HDR_L1_SP:
- http_msg_hdr_l1_sp:
- /* assumes msg->sol points to the first char */
- if (likely(HTTP_IS_SPHT(*ptr)))
- EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_l1_sp, http_msg_ood, state, HTTP_MSG_HDR_L1_SP);
-
- /* header value can be basically anything except CR/LF */
- msg->sov = ptr - buf->p;
-
- if (likely(!HTTP_IS_CRLF(*ptr))) {
- goto http_msg_hdr_val;
- }
-
- if (likely(*ptr == '\r'))
- EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_l1_lf, http_msg_ood, state, HTTP_MSG_HDR_L1_LF);
- goto http_msg_hdr_l1_lf;
-
- case HTTP_MSG_HDR_L1_LF:
- http_msg_hdr_l1_lf:
- EXPECT_LF_HERE(ptr, http_msg_invalid, state, HTTP_MSG_HDR_L1_LF);
- EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_l1_lws, http_msg_ood, state, HTTP_MSG_HDR_L1_LWS);
-
- case HTTP_MSG_HDR_L1_LWS:
- http_msg_hdr_l1_lws:
- if (likely(HTTP_IS_SPHT(*ptr))) {
- /* replace HT,CR,LF with spaces */
- for (; buf->p + msg->sov < ptr; msg->sov++)
- buf->p[msg->sov] = ' ';
- goto http_msg_hdr_l1_sp;
- }
- /* we had a header consisting only in spaces ! */
- msg->eol = msg->sov;
- goto http_msg_complete_header;
-
- case HTTP_MSG_HDR_VAL:
- http_msg_hdr_val:
- /* assumes msg->sol points to the first char, and msg->sov
- * points to the first character of the value.
- */
-
- /* speedup: we'll skip packs of 4 or 8 bytes not containing bytes 0x0D
- * and lower. In fact since most of the time is spent in the loop, we
- * also remove the sign bit test so that bytes 0x8e..0x0d break the
- * loop, but we don't care since they're very rare in header values.
- */
-#if defined(__x86_64__)
- while (ptr <= end - sizeof(long)) {
- if ((*(long *)ptr - 0x0e0e0e0e0e0e0e0eULL) & 0x8080808080808080ULL)
- goto http_msg_hdr_val2;
- ptr += sizeof(long);
- }
-#endif
-#if defined(__x86_64__) || \
- defined(__i386__) || defined(__i486__) || defined(__i586__) || defined(__i686__) || \
- defined(__ARM_ARCH_7A__)
- while (ptr <= end - sizeof(int)) {
- if ((*(int*)ptr - 0x0e0e0e0e) & 0x80808080)
- goto http_msg_hdr_val2;
- ptr += sizeof(int);
- }
-#endif
- if (ptr >= end) {
- state = HTTP_MSG_HDR_VAL;
- goto http_msg_ood;
- }
- http_msg_hdr_val2:
- if (likely(!HTTP_IS_CRLF(*ptr)))
- EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_val2, http_msg_ood, state, HTTP_MSG_HDR_VAL);
-
- msg->eol = ptr - buf->p;
- /* Note: we could also copy eol into ->eoh so that we have the
- * real header end in case it ends with lots of LWS, but is this
- * really needed ?
- */
- if (likely(*ptr == '\r'))
- EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_l2_lf, http_msg_ood, state, HTTP_MSG_HDR_L2_LF);
- goto http_msg_hdr_l2_lf;
-
- case HTTP_MSG_HDR_L2_LF:
- http_msg_hdr_l2_lf:
- EXPECT_LF_HERE(ptr, http_msg_invalid, state, HTTP_MSG_HDR_L2_LF);
- EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_l2_lws, http_msg_ood, state, HTTP_MSG_HDR_L2_LWS);
-
- case HTTP_MSG_HDR_L2_LWS:
- http_msg_hdr_l2_lws:
- if (unlikely(HTTP_IS_SPHT(*ptr))) {
- /* LWS: replace HT,CR,LF with spaces */
- for (; buf->p + msg->eol < ptr; msg->eol++)
- buf->p[msg->eol] = ' ';
- goto http_msg_hdr_val;
- }
- http_msg_complete_header:
- /*
- * It was a new header, so the last one is finished.
- * Assumes msg->sol points to the first char, msg->sov points
- * to the first character of the value and msg->eol to the
- * first CR or LF so we know how the line ends. We insert last
- * header into the index.
- */
- if (unlikely(hdr_idx_add(msg->eol - msg->sol, buf->p[msg->eol] == '\r',
- idx, idx->tail) < 0)) {
- state = HTTP_MSG_HDR_L2_LWS;
- goto http_msg_invalid;
- }
-
- msg->sol = ptr - buf->p;
- if (likely(!HTTP_IS_CRLF(*ptr))) {
- goto http_msg_hdr_name;
- }
-
- if (likely(*ptr == '\r'))
- EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_last_lf, http_msg_ood, state, HTTP_MSG_LAST_LF);
- goto http_msg_last_lf;
-
- case HTTP_MSG_LAST_LF:
- http_msg_last_lf:
- /* Assumes msg->sol points to the first of either CR or LF.
- * Sets ->sov and ->next to the total header length, ->eoh to
- * the last CRLF, and ->eol to the last CRLF length (1 or 2).
- */
- EXPECT_LF_HERE(ptr, http_msg_invalid, state, HTTP_MSG_LAST_LF);
- ptr++;
- msg->sov = msg->next = ptr - buf->p;
- msg->eoh = msg->sol;
- msg->sol = 0;
- msg->eol = msg->sov - msg->eoh;
- msg->msg_state = HTTP_MSG_BODY;
- return;
-
- case HTTP_MSG_ERROR:
- /* this may only happen if we call http_msg_analyser() twice with an error */
- break;
-
- default:
-#ifdef DEBUG_FULL
- fprintf(stderr, "FIXME !!!! impossible state at %s:%d = %d\n", __FILE__, __LINE__, state);
- exit(1);
-#endif
- ;
- }
- http_msg_ood:
- /* out of data */
- msg->msg_state = state;
- msg->next = ptr - buf->p;
- return;
-
- http_msg_invalid:
- /* invalid message */
- msg->err_state = state;
- msg->msg_state = HTTP_MSG_ERROR;
- msg->next = ptr - buf->p;
- return;
-}
/* convert an HTTP/0.9 request into an HTTP/1.0 request. Returns 1 if the
* conversion succeeded, 0 in case of error. If the request was already 1.X,