REORG: http: move the HTTP/1 chunk parser to h1.{c,h}
Functions http_parse_chunk_size(), http_skip_chunk_crlf() and
http_forward_trailers() were moved to h1.h and h1.c respectively so
that they can be called from outside. The parts that were inline
remained inline as it's critical for performance (+41% perf
difference reported in an earlier test). For now the "http_" prefix
remains in their name since they still depend on the http_msg type.
diff --git a/include/proto/h1.h b/include/proto/h1.h
index 7dff096..3551152 100644
--- a/include/proto/h1.h
+++ b/include/proto/h1.h
@@ -22,11 +22,15 @@
#ifndef _PROTO_H1_H
#define _PROTO_H1_H
+#include <common/buffer.h>
#include <common/compiler.h>
#include <common/config.h>
+#include <common/standard.h>
#include <types/h1.h>
+#include <types/proto_http.h>
extern const uint8_t h1_char_classes[256];
+int http_forward_trailers(struct http_msg *msg);
#define H1_FLG_CTL 0x01
#define H1_FLG_SEP 0x02
@@ -121,5 +125,145 @@
}
}
+/* This function may be called only in HTTP_MSG_CHUNK_CRLF. It reads the CRLF or
+ * a possible LF alone at the end of a chunk. The caller should adjust msg->next
+ * in order to include this part into the next forwarding phase. Note that the
+ * caller must ensure that ->p points to the first byte to parse. It returns
+ * the number of bytes parsed on success, so the caller can set msg_state to
+ * HTTP_MSG_CHUNK_SIZE. If not enough data are available, the function does not
+ * change anything and returns zero. If a parse error is encountered, the
+ * function returns < 0. Note: this function is designed to parse wrapped CRLF
+ * at the end of the buffer.
+ */
+static inline int http_skip_chunk_crlf(struct http_msg *msg)
+{
+ const struct buffer *buf = msg->chn->buf;
+ const char *ptr;
+ int bytes;
+
+ /* NB: we'll check data availabilty at the end. It's not a
+ * problem because whatever we match first will be checked
+ * against the correct length.
+ */
+ bytes = 1;
+ ptr = b_ptr(buf, msg->next);
+ if (*ptr == '\r') {
+ bytes++;
+ ptr++;
+ if (ptr >= buf->data + buf->size)
+ ptr = buf->data;
+ }
+
+ if (msg->next + bytes > buf->i)
+ return 0;
+
+ if (*ptr != '\n') {
+ msg->err_pos = buffer_count(buf, buf->p, ptr);
+ return -1;
+ }
+ return bytes;
+}
+
+/* Parse the chunk size at msg->next. Once done, caller should adjust ->next to
+ * point to the first byte of data after the chunk size, so that we know we can
+ * forward exactly msg->next bytes. msg->sol contains the exact number of bytes
+ * forming the chunk size. That way it is always possible to differentiate
+ * between the start of the body and the start of the data. Return the number
+ * of byte parsed on success, 0 when some data is missing, <0 on error. Note:
+ * this function is designed to parse wrapped CRLF at the end of the buffer.
+ */
+static inline int http_parse_chunk_size(struct http_msg *msg)
+{
+ const struct buffer *buf = msg->chn->buf;
+ const char *ptr = b_ptr(buf, msg->next);
+ const char *ptr_old = ptr;
+ const char *end = buf->data + buf->size;
+ const char *stop = bi_end(buf);
+ unsigned int chunk = 0;
+
+ /* The chunk size is in the following form, though we are only
+ * interested in the size and CRLF :
+ * 1*HEXDIGIT *WSP *[ ';' extensions ] CRLF
+ */
+ while (1) {
+ int c;
+ if (ptr == stop)
+ return 0;
+ c = hex2i(*ptr);
+ if (c < 0) /* not a hex digit anymore */
+ break;
+ if (unlikely(++ptr >= end))
+ ptr = buf->data;
+ if (unlikely(chunk & 0xF8000000)) /* integer overflow will occur if result >= 2GB */
+ goto error;
+ chunk = (chunk << 4) + c;
+ }
+
+ /* empty size not allowed */
+ if (unlikely(ptr == ptr_old))
+ goto error;
+
+ while (HTTP_IS_SPHT(*ptr)) {
+ if (++ptr >= end)
+ ptr = buf->data;
+ if (unlikely(ptr == stop))
+ return 0;
+ }
+
+ /* Up to there, we know that at least one byte is present at *ptr. Check
+ * for the end of chunk size.
+ */
+ while (1) {
+ if (likely(HTTP_IS_CRLF(*ptr))) {
+ /* we now have a CR or an LF at ptr */
+ if (likely(*ptr == '\r')) {
+ if (++ptr >= end)
+ ptr = buf->data;
+ if (ptr == stop)
+ return 0;
+ }
+
+ if (unlikely(*ptr != '\n'))
+ goto error;
+ if (++ptr >= end)
+ ptr = buf->data;
+ /* done */
+ break;
+ }
+ else if (likely(*ptr == ';')) {
+ /* chunk extension, ends at next CRLF */
+ if (++ptr >= end)
+ ptr = buf->data;
+ if (ptr == stop)
+ return 0;
+
+ while (!HTTP_IS_CRLF(*ptr)) {
+ if (++ptr >= end)
+ ptr = buf->data;
+ if (ptr == stop)
+ return 0;
+ }
+ /* we have a CRLF now, loop above */
+ continue;
+ }
+ else
+ goto error;
+ }
+
+ /* OK we found our CRLF and now <ptr> points to the next byte, which may
+ * or may not be present. We save the number of bytes parsed into
+ * msg->sol.
+ */
+ msg->sol = ptr - ptr_old;
+ if (unlikely(ptr < ptr_old))
+ msg->sol += buf->size;
+ msg->chunk_len = chunk;
+ msg->body_len += chunk;
+ return msg->sol;
+ error:
+ msg->err_pos = buffer_count(buf, buf->p, ptr);
+ return -1;
+}
+
#endif /* _PROTO_H1_H */
diff --git a/include/types/proto_http.h b/include/types/proto_http.h
index 3f99df7..027bfce 100644
--- a/include/types/proto_http.h
+++ b/include/types/proto_http.h
@@ -27,6 +27,7 @@
#include <common/mini-clist.h>
#include <common/regex.h>
+#include <types/channel.h>
#include <types/h1.h>
#include <types/hdr_idx.h>
#include <types/filters.h>
diff --git a/src/h1.c b/src/h1.c
index 044709a..7a380c2 100644
--- a/src/h1.c
+++ b/src/h1.c
@@ -153,3 +153,77 @@
['~'] = H1_FLG_TOK,
[127] = H1_FLG_CTL,
};
+
+
+/* This function skips trailers in the buffer associated with HTTP message
+ * <msg>. The first visited position is msg->next. If the end of the trailers is
+ * found, the function returns >0. So, the caller can automatically schedul it
+ * to be forwarded, and switch msg->msg_state to HTTP_MSG_DONE. If not enough
+ * data are available, the function does not change anything except maybe
+ * msg->sol if it could parse some lines, and returns zero. If a parse error
+ * is encountered, the function returns < 0 and does not change anything except
+ * maybe msg->sol. Note that the message must already be in HTTP_MSG_TRAILERS
+ * state before calling this function, which implies that all non-trailers data
+ * have already been scheduled for forwarding, and that msg->next exactly
+ * matches the length of trailers already parsed and not forwarded. It is also
+ * important to note that this function is designed to be able to parse wrapped
+ * headers at end of buffer.
+ */
+int http_forward_trailers(struct http_msg *msg)
+{
+ const struct buffer *buf = msg->chn->buf;
+
+ /* we have msg->next which points to next line. Look for CRLF. But
+ * first, we reset msg->sol */
+ msg->sol = 0;
+ while (1) {
+ const char *p1 = NULL, *p2 = NULL;
+ const char *start = b_ptr(buf, msg->next + msg->sol);
+ const char *stop = bi_end(buf);
+ const char *ptr = start;
+ int bytes = 0;
+
+ /* scan current line and stop at LF or CRLF */
+ while (1) {
+ if (ptr == stop)
+ return 0;
+
+ if (*ptr == '\n') {
+ if (!p1)
+ p1 = ptr;
+ p2 = ptr;
+ break;
+ }
+
+ if (*ptr == '\r') {
+ if (p1) {
+ msg->err_pos = buffer_count(buf, buf->p, ptr);
+ return -1;
+ }
+ p1 = ptr;
+ }
+
+ ptr++;
+ if (ptr >= buf->data + buf->size)
+ ptr = buf->data;
+ }
+
+ /* after LF; point to beginning of next line */
+ p2++;
+ if (p2 >= buf->data + buf->size)
+ p2 = buf->data;
+
+ bytes = p2 - start;
+ if (bytes < 0)
+ bytes += buf->size;
+ msg->sol += bytes;
+
+ /* LF/CRLF at beginning of line => end of trailers at p2.
+ * Everything was scheduled for forwarding, there's nothing left
+ * from this message. */
+ if (p1 == start)
+ return 1;
+
+ /* OK, next line then */
+ }
+}
diff --git a/src/proto_http.c b/src/proto_http.c
index a599f65..92e6083 100644
--- a/src/proto_http.c
+++ b/src/proto_http.c
@@ -2108,219 +2108,6 @@
return;
}
-/* Parse the chunk size at msg->next. Once done, caller should adjust ->next to
- * point to the first byte of data after the chunk size, so that we know we can
- * forward exactly msg->next bytes. msg->sol contains the exact number of bytes
- * forming the chunk size. That way it is always possible to differentiate
- * between the start of the body and the start of the data. Return the number
- * of byte parsed on success, 0 when some data is missing, <0 on error. Note:
- * this function is designed to parse wrapped CRLF at the end of the buffer.
- */
-static inline int http_parse_chunk_size(struct http_msg *msg)
-{
- const struct buffer *buf = msg->chn->buf;
- const char *ptr = b_ptr(buf, msg->next);
- const char *ptr_old = ptr;
- const char *end = buf->data + buf->size;
- const char *stop = bi_end(buf);
- unsigned int chunk = 0;
-
- /* The chunk size is in the following form, though we are only
- * interested in the size and CRLF :
- * 1*HEXDIGIT *WSP *[ ';' extensions ] CRLF
- */
- while (1) {
- int c;
- if (ptr == stop)
- return 0;
- c = hex2i(*ptr);
- if (c < 0) /* not a hex digit anymore */
- break;
- if (unlikely(++ptr >= end))
- ptr = buf->data;
- if (chunk & 0xF8000000) /* integer overflow will occur if result >= 2GB */
- goto error;
- chunk = (chunk << 4) + c;
- }
-
- /* empty size not allowed */
- if (unlikely(ptr == ptr_old))
- goto error;
-
- while (HTTP_IS_SPHT(*ptr)) {
- if (++ptr >= end)
- ptr = buf->data;
- if (unlikely(ptr == stop))
- return 0;
- }
-
- /* Up to there, we know that at least one byte is present at *ptr. Check
- * for the end of chunk size.
- */
- while (1) {
- if (likely(HTTP_IS_CRLF(*ptr))) {
- /* we now have a CR or an LF at ptr */
- if (likely(*ptr == '\r')) {
- if (++ptr >= end)
- ptr = buf->data;
- if (ptr == stop)
- return 0;
- }
-
- if (*ptr != '\n')
- goto error;
- if (++ptr >= end)
- ptr = buf->data;
- /* done */
- break;
- }
- else if (*ptr == ';') {
- /* chunk extension, ends at next CRLF */
- if (++ptr >= end)
- ptr = buf->data;
- if (ptr == stop)
- return 0;
-
- while (!HTTP_IS_CRLF(*ptr)) {
- if (++ptr >= end)
- ptr = buf->data;
- if (ptr == stop)
- return 0;
- }
- /* we have a CRLF now, loop above */
- continue;
- }
- else
- goto error;
- }
-
- /* OK we found our CRLF and now <ptr> points to the next byte, which may
- * or may not be present. We save the number of bytes parsed into
- * msg->sol.
- */
- msg->sol = ptr - ptr_old;
- if (unlikely(ptr < ptr_old))
- msg->sol += buf->size;
- msg->chunk_len = chunk;
- msg->body_len += chunk;
- return msg->sol;
- error:
- msg->err_pos = buffer_count(buf, buf->p, ptr);
- return -1;
-}
-
-/* This function skips trailers in the buffer associated with HTTP message
- * <msg>. The first visited position is msg->next. If the end of the trailers is
- * found, the function returns >0. So, the caller can automatically schedul it
- * to be forwarded, and switch msg->msg_state to HTTP_MSG_DONE. If not enough
- * data are available, the function does not change anything except maybe
- * msg->sol if it could parse some lines, and returns zero. If a parse error
- * is encountered, the function returns < 0 and does not change anything except
- * maybe msg->sol. Note that the message must already be in HTTP_MSG_TRAILERS
- * state before calling this function, which implies that all non-trailers data
- * have already been scheduled for forwarding, and that msg->next exactly
- * matches the length of trailers already parsed and not forwarded. It is also
- * important to note that this function is designed to be able to parse wrapped
- * headers at end of buffer.
- */
-static int http_forward_trailers(struct http_msg *msg)
-{
- const struct buffer *buf = msg->chn->buf;
-
- /* we have msg->next which points to next line. Look for CRLF. But
- * first, we reset msg->sol */
- msg->sol = 0;
- while (1) {
- const char *p1 = NULL, *p2 = NULL;
- const char *start = b_ptr(buf, msg->next + msg->sol);
- const char *stop = bi_end(buf);
- const char *ptr = start;
- int bytes = 0;
-
- /* scan current line and stop at LF or CRLF */
- while (1) {
- if (ptr == stop)
- return 0;
-
- if (*ptr == '\n') {
- if (!p1)
- p1 = ptr;
- p2 = ptr;
- break;
- }
-
- if (*ptr == '\r') {
- if (p1) {
- msg->err_pos = buffer_count(buf, buf->p, ptr);
- return -1;
- }
- p1 = ptr;
- }
-
- ptr++;
- if (ptr >= buf->data + buf->size)
- ptr = buf->data;
- }
-
- /* after LF; point to beginning of next line */
- p2++;
- if (p2 >= buf->data + buf->size)
- p2 = buf->data;
-
- bytes = p2 - start;
- if (bytes < 0)
- bytes += buf->size;
- msg->sol += bytes;
-
- /* LF/CRLF at beginning of line => end of trailers at p2.
- * Everything was scheduled for forwarding, there's nothing left
- * from this message. */
- if (p1 == start)
- return 1;
-
- /* OK, next line then */
- }
-}
-
-/* This function may be called only in HTTP_MSG_CHUNK_CRLF. It reads the CRLF or
- * a possible LF alone at the end of a chunk. The caller should adjust msg->next
- * in order to include this part into the next forwarding phase. Note that the
- * caller must ensure that ->p points to the first byte to parse. It returns
- * the number of bytes parsed on success, so the caller can set msg_state to
- * HTTP_MSG_CHUNK_SIZE. If not enough data are available, the function does not
- * change anything and returns zero. If a parse error is encountered, the
- * function returns < 0. Note: this function is designed to parse wrapped CRLF
- * at the end of the buffer.
- */
-static inline int http_skip_chunk_crlf(struct http_msg *msg)
-{
- const struct buffer *buf = msg->chn->buf;
- const char *ptr;
- int bytes;
-
- /* NB: we'll check data availabilty at the end. It's not a
- * problem because whatever we match first will be checked
- * against the correct length.
- */
- bytes = 1;
- ptr = b_ptr(buf, msg->next);
- if (*ptr == '\r') {
- bytes++;
- ptr++;
- if (ptr >= buf->data + buf->size)
- ptr = buf->data;
- }
-
- if (msg->next + bytes > buf->i)
- return 0;
-
- if (*ptr != '\n') {
- msg->err_pos = buffer_count(buf, buf->p, ptr);
- return -1;
- }
- return bytes;
-}
-
/* Parses a qvalue and returns it multipled by 1000, from 0 to 1000. If the
* value is larger than 1000, it is bound to 1000. The parser consumes up to
* 1 digit, one dot and 3 digits and stops on the first invalid character.