BUG/MEDIUM: mux-h2: split the stream's and connection's window sizes

The SETTINGS frame parser updates all streams' window for each
INITIAL_WINDOW_SIZE setting received on the connection (like h2spec
does in test 6.5.3), which can start to be expensive if repeated when
there are many streams (up to 100 by default). A quick test shows that
it's possible to parse only 35000 settings per second on a 3 GHz core
for 100 streams, which is rather small.

Given that window sizes are relative and may be negative, there's no
point in pre-initializing them for each stream and update them from
the settings. Instead, let's make them relative to the connection's
initial window size so that any change immediately affects all streams.
The only thing that remains needed is to wake up the streams that were
unblocked by the update, which is now done once at the end of
h2_process_demux() instead of once per setting. This now results in
5.7 million settings being processed per second, which is way better.

In order to keep the change small, the h2s' mws field was renamed to
"sws" for "stream window size", and an h2s_mws() function was added
to add it to the connection's initial window setting and determine the
window size to use when muxing. The h2c_update_all_ws() function was
renamed to h2c_unblock_sfctl() since it's now only used to unblock
previously blocked streams.

This needs to be backported to all versions till 1.8.

(cherry picked from commit 1d4a0f88100daeb17dd0c9470c659b1ec288bc07)
[wt: context adjustment, port to legacy parts]
Signed-off-by: Willy Tarreau <w@1wt.eu>
diff --git a/src/mux_h2.c b/src/mux_h2.c
index d605fe9..f90e943 100644
--- a/src/mux_h2.c
+++ b/src/mux_h2.c
@@ -208,7 +208,7 @@
 	struct eb32_node by_id; /* place in h2c's streams_by_id */
 	int32_t id; /* stream ID */
 	uint32_t flags;      /* H2_SF_* */
-	int mws;             /* mux window size for this stream */
+	int sws;             /* stream window size, to be added to the mux's initial window size */
 	enum h2_err errcode; /* H2 err code (H2_ERR_*) */
 	enum h2_ss st;
 	uint16_t status;     /* HTTP response status */
@@ -707,6 +707,14 @@
 	return h2s ? h2s->id : 0;
 }
 
+/* returns the sum of the stream's own window size and the mux's initial
+ * window, which together form the stream's effective window size.
+ */
+static inline int h2s_mws(const struct h2s *h2s)
+{
+	return h2s->sws + h2s->h2c->miw;
+}
+
 /* returns true of the mux is currently busy as seen from stream <h2s> */
 static inline __maybe_unused int h2c_mux_busy(const struct h2c *h2c, const struct h2s *h2s)
 {
@@ -945,7 +953,7 @@
 	LIST_INIT(&h2s->sending_list);
 	h2s->h2c       = h2c;
 	h2s->cs        = NULL;
-	h2s->mws       = h2c->miw;
+	h2s->sws       = 0;
 	h2s->flags     = H2_SF_NONE;
 	h2s->errcode   = H2_ERR_NO_ERROR;
 	h2s->st        = H2_SS_IDLE;
@@ -1543,30 +1551,23 @@
 	}
 }
 
-/* Increase all streams' outgoing window size by the difference passed in
- * argument. This is needed upon receipt of the settings frame if the initial
- * window size is different. The difference may be negative and the resulting
- * window size as well, for the time it takes to receive some window updates.
+/* Wake up all blocked streams whose window size has become positive after the
+ * mux's initial window was adjusted. This should be done after having processed
+ * SETTINGS frames which have updated the mux's initial window size.
  */
-static void h2c_update_all_ws(struct h2c *h2c, int diff)
+static void h2c_unblock_sfctl(struct h2c *h2c)
 {
 	struct h2s *h2s;
 	struct eb32_node *node;
 
-	if (!diff)
-		return;
-
 	node = eb32_first(&h2c->streams_by_id);
 	while (node) {
 		h2s = container_of(node, struct h2s, by_id);
-		h2s->mws += diff;
-
-		if (h2s->mws > 0 && (h2s->flags & H2_SF_BLK_SFCTL)) {
+		if (h2s->flags & H2_SF_BLK_SFCTL && h2s_mws(h2s) > 0) {
 			h2s->flags &= ~H2_SF_BLK_SFCTL;
 			if (h2s->send_wait && !LIST_ADDED(&h2s->list))
 				LIST_ADDQ(&h2c->send_list, &h2s->list);
 		}
-
 		node = eb32_next(node);
 	}
 }
@@ -1607,7 +1608,6 @@
 				error = H2_ERR_FLOW_CONTROL_ERROR;
 				goto fail;
 			}
-			h2c_update_all_ws(h2c, arg - h2c->miw);
 			h2c->miw = arg;
 			break;
 		case H2_SETTINGS_MAX_FRAME_SIZE:
@@ -1869,13 +1869,13 @@
 			goto strm_err;
 		}
 
-		if (h2s->mws >= 0 && h2s->mws + inc < 0) {
+		if (h2s_mws(h2s) >= 0 && h2s_mws(h2s) + inc < 0) {
 			error = H2_ERR_FLOW_CONTROL_ERROR;
 			goto strm_err;
 		}
 
-		h2s->mws += inc;
-		if (h2s->mws > 0 && (h2s->flags & H2_SF_BLK_SFCTL)) {
+		h2s->sws += inc;
+		if (h2s_mws(h2s) > 0 && (h2s->flags & H2_SF_BLK_SFCTL)) {
 			h2s->flags &= ~H2_SF_BLK_SFCTL;
 			if (h2s->send_wait && !LIST_ADDED(&h2s->list))
 				LIST_ADDQ(&h2c->send_list, &h2s->list);
@@ -2237,6 +2237,7 @@
 	struct h2s *h2s = NULL, *tmp_h2s;
 	struct h2_fh hdr;
 	unsigned int padlen = 0;
+	int32_t old_iw = h2c->miw;
 
 	if (h2c->st0 >= H2_CS_ERROR)
 		return;
@@ -2625,6 +2626,9 @@
 		h2s_notify_recv(h2s);
 	}
 
+	if (old_iw != h2c->miw)
+		h2c_unblock_sfctl(h2c);
+
 	h2c_restart_reading(h2c, 0);
 }
 
@@ -4259,8 +4263,8 @@
 	if (size > max)
 		size = max;
 
-	if (size > h2s->mws)
-		size = h2s->mws;
+	if (size > h2s_mws(h2s))
+		size = h2s_mws(h2s);
 
 	if (size <= 0) {
 		h2s->flags |= H2_SF_BLK_SFCTL;
@@ -4362,7 +4366,7 @@
 		ofs += size;
 		total += size;
 		h1m->curr_len -= size;
-		h2s->mws -= size;
+		h2s->sws -= size;
 		h2c->mws -= size;
 
 		if (size && !h1m->curr_len && (h1m->flags & H1_MF_CHNK)) {
@@ -4390,7 +4394,7 @@
 	}
 
  end:
-	trace("[%d] sent simple H2 DATA response (sid=%d) = %d bytes out (%u in, st=%s, ep=%u, es=%s, h2cws=%d h2sws=%d) data=%u", h2c->st0, h2s->id, size+9, (unsigned int)total, h1m_state_str(h1m->state), h1m->err_pos, h1m_state_str(h1m->err_state), h2c->mws, h2s->mws, (unsigned int)b_data(buf));
+	trace("[%d] sent simple H2 DATA response (sid=%d) = %d bytes out (%u in, st=%s, ep=%u, es=%s, h2cws=%d h2sws=%d) data=%u", h2c->st0, h2s->id, size+9, (unsigned int)total, h1m_state_str(h1m->state), h1m->err_pos, h1m_state_str(h1m->err_state), h2c->mws, h2s_mws(h2s), (unsigned int)b_data(buf));
 	return total;
 }
 
@@ -4937,7 +4941,7 @@
 	 */
 	if (unlikely(fsize == count &&
 	             htx->used == 1 && type == HTX_BLK_DATA &&
-	             fsize <= h2s->mws && fsize <= h2c->mws && fsize <= h2c->mfs)) {
+	             fsize <= h2s_mws(h2s) && fsize <= h2c->mws && fsize <= h2c->mfs)) {
 		void *old_area = mbuf->area;
 
 		if (b_data(mbuf)) {
@@ -4972,7 +4976,7 @@
 		h2_set_frame_size(outbuf.area, fsize);
 
 		/* update windows */
-		h2s->mws -= fsize;
+		h2s->sws -= fsize;
 		h2c->mws -= fsize;
 
 		/* and exchange with our old area */
@@ -5024,7 +5028,7 @@
 	if (!fsize)
 		goto send_empty;
 
-	if (h2s->mws <= 0) {
+	if (h2s_mws(h2s) <= 0) {
 		h2s->flags |= H2_SF_BLK_SFCTL;
 		if (LIST_ADDED(&h2s->list))
 			LIST_DEL_INIT(&h2s->list);
@@ -5034,8 +5038,8 @@
 	if (fsize > count)
 		fsize = count;
 
-	if (fsize > h2s->mws)
-		fsize = h2s->mws; // >0
+	if (fsize > h2s_mws(h2s))
+		fsize = h2s_mws(h2s); // >0
 
 	if (h2c->mfs && fsize > h2c->mfs)
 		fsize = h2c->mfs; // >0
@@ -5071,7 +5075,7 @@
 
 	/* now let's copy this this into the output buffer */
 	memcpy(outbuf.area + 9, htx_get_blk_ptr(htx, blk), fsize);
-	h2s->mws -= fsize;
+	h2s->sws -= fsize;
 	h2c->mws -= fsize;
 	count    -= fsize;