blob: d9f0b565f71a6867d2bea522239fc738cabf7d69 [file] [log] [blame]
Willy Tarreau62f52692017-10-08 23:01:42 +02001/*
2 * HTTP/2 mux-demux for connections
3 *
4 * Copyright 2017 Willy Tarreau <w@1wt.eu>
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 *
11 */
12
13#include <common/cfgparse.h>
14#include <common/config.h>
Willy Tarreau5ab6b572017-09-22 08:05:00 +020015#include <common/h2.h>
16#include <common/hpack-tbl.h>
Willy Tarreaue4820742017-07-27 13:37:23 +020017#include <common/net_helper.h>
Willy Tarreau35dbd5d2017-09-22 09:13:49 +020018#include <proto/applet.h>
Willy Tarreau62f52692017-10-08 23:01:42 +020019#include <proto/connection.h>
Willy Tarreau3ccf4b22017-10-13 19:07:26 +020020#include <proto/h1.h>
Willy Tarreau62f52692017-10-08 23:01:42 +020021#include <proto/stream.h>
Willy Tarreau5ab6b572017-09-22 08:05:00 +020022#include <eb32tree.h>
Willy Tarreau62f52692017-10-08 23:01:42 +020023
24
Willy Tarreau2a856182017-05-16 15:20:39 +020025/* dummy streams returned for idle and closed states */
26static const struct h2s *h2_closed_stream;
27static const struct h2s *h2_idle_stream;
28
Willy Tarreau5ab6b572017-09-22 08:05:00 +020029/* the h2c connection pool */
30static struct pool_head *pool2_h2c;
Willy Tarreau18312642017-10-11 07:57:07 +020031/* the h2s stream pool */
32static struct pool_head *pool2_h2s;
Willy Tarreau5ab6b572017-09-22 08:05:00 +020033
34/* Connection flags (32 bit), in h2c->flags */
35#define H2_CF_NONE 0x00000000
36
Willy Tarreau2e5b60e2017-09-25 11:49:03 +020037/* Flags indicating why writing to the mux is blocked. */
38#define H2_CF_MUX_MALLOC 0x00000001 // mux blocked on lack of connection's mux buffer
39#define H2_CF_MUX_MFULL 0x00000002 // mux blocked on connection's mux buffer full
40#define H2_CF_MUX_BLOCK_ANY 0x00000003 // aggregate of the mux flags above
41
42/* Flags indicating why writing to the demux is blocked. */
43#define H2_CF_DEM_DALLOC 0x00000004 // demux blocked on lack of connection's demux buffer
44#define H2_CF_DEM_DFULL 0x00000008 // demux blocked on connection's demux buffer full
45#define H2_CF_DEM_MBUSY 0x00000010 // demux blocked on connection's mux side busy
46#define H2_CF_DEM_MROOM 0x00000020 // demux blocked on lack of room in mux buffer
47#define H2_CF_DEM_SALLOC 0x00000040 // demux blocked on lack of stream's request buffer
48#define H2_CF_DEM_SFULL 0x00000080 // demux blocked on stream request buffer full
49#define H2_CF_DEM_BLOCK_ANY 0x000000FC // aggregate of the demux flags above
50
Willy Tarreau081d4722017-05-16 21:51:05 +020051/* other flags */
52#define H2_CF_GOAWAY_SENT 0x00000100 // a GOAWAY frame was successfully sent
53#define H2_CF_GOAWAY_FAILED 0x00000200 // a GOAWAY frame failed to be sent
54
55
Willy Tarreau5ab6b572017-09-22 08:05:00 +020056/* H2 connection state, in h2c->st0 */
57enum h2_cs {
58 H2_CS_PREFACE, // init done, waiting for connection preface
59 H2_CS_SETTINGS1, // preface OK, waiting for first settings frame
60 H2_CS_FRAME_H, // first settings frame ok, waiting for frame header
61 H2_CS_FRAME_P, // frame header OK, waiting for frame payload
62 H2_CS_FRAME_A, // frame payload OK, trying to send ACK/RST frame
63 H2_CS_ERROR, // send GOAWAY(errcode) and close the connection ASAP
64 H2_CS_ERROR2, // GOAWAY(errcode) sent, close the connection ASAP
65 H2_CS_ENTRIES // must be last
66} __attribute__((packed));
67
68/* H2 connection descriptor */
69struct h2c {
70 struct connection *conn;
71
72 enum h2_cs st0; /* mux state */
73 enum h2_err errcode; /* H2 err code (H2_ERR_*) */
74
75 /* 16 bit hole here */
76 uint32_t flags; /* connection flags: H2_CF_* */
77 int32_t max_id; /* highest ID known on this connection, <0 before preface */
78 uint32_t rcvd_c; /* newly received data to ACK for the connection */
79 uint32_t rcvd_s; /* newly received data to ACK for the current stream (dsi) */
80
81 /* states for the demux direction */
82 struct hpack_dht *ddht; /* demux dynamic header table */
83 struct buffer *dbuf; /* demux buffer */
84
85 int32_t dsi; /* demux stream ID (<0 = idle) */
86 int32_t dfl; /* demux frame length (if dsi >= 0) */
87 int8_t dft; /* demux frame type (if dsi >= 0) */
88 int8_t dff; /* demux frame flags (if dsi >= 0) */
89 /* 16 bit hole here */
90 int32_t last_sid; /* last processed stream ID for GOAWAY, <0 before preface */
91
92 /* states for the mux direction */
93 struct buffer *mbuf; /* mux buffer */
94 int32_t msi; /* mux stream ID (<0 = idle) */
95 int32_t mfl; /* mux frame length (if dsi >= 0) */
96 int8_t mft; /* mux frame type (if dsi >= 0) */
97 int8_t mff; /* mux frame flags (if dsi >= 0) */
98 /* 16 bit hole here */
99 int32_t miw; /* mux initial window size for all new streams */
100 int32_t mws; /* mux window size. Can be negative. */
101 int32_t mfs; /* mux's max frame size */
102
103 struct eb_root streams_by_id; /* all active streams by their ID */
104 struct list send_list; /* list of blocked streams requesting to send */
105 struct list fctl_list; /* list of streams blocked by connection's fctl */
Willy Tarreau35dbd5d2017-09-22 09:13:49 +0200106 struct buffer_wait dbuf_wait; /* wait list for demux buffer allocation */
Willy Tarreau14398122017-09-22 14:26:04 +0200107 struct buffer_wait mbuf_wait; /* wait list for mux buffer allocation */
Willy Tarreau5ab6b572017-09-22 08:05:00 +0200108};
109
Willy Tarreau18312642017-10-11 07:57:07 +0200110/* H2 stream state, in h2s->st */
111enum h2_ss {
112 H2_SS_IDLE = 0, // idle
113 H2_SS_RLOC, // reserved(local)
114 H2_SS_RREM, // reserved(remote)
115 H2_SS_OPEN, // open
116 H2_SS_HREM, // half-closed(remote)
117 H2_SS_HLOC, // half-closed(local)
Willy Tarreau96060ba2017-10-16 18:34:34 +0200118 H2_SS_ERROR, // an error needs to be sent using RST_STREAM
119 H2_SS_RESET, // closed after sending RST_STREAM
Willy Tarreau18312642017-10-11 07:57:07 +0200120 H2_SS_CLOSED, // closed
121 H2_SS_ENTRIES // must be last
122} __attribute__((packed));
123
124/* HTTP/2 stream flags (32 bit), in h2s->flags */
125#define H2_SF_NONE 0x00000000
126#define H2_SF_ES_RCVD 0x00000001
127#define H2_SF_ES_SENT 0x00000002
128
129#define H2_SF_RST_RCVD 0x00000004 // received RST_STREAM
130#define H2_SF_RST_SENT 0x00000008 // sent RST_STREAM
131
Willy Tarreau2e5b60e2017-09-25 11:49:03 +0200132/* stream flags indicating the reason the stream is blocked */
133#define H2_SF_BLK_MBUSY 0x00000010 // blocked waiting for mux access (transient)
134#define H2_SF_BLK_MROOM 0x00000020 // blocked waiting for room in the mux
135#define H2_SF_BLK_MFCTL 0x00000040 // blocked due to mux fctl
136#define H2_SF_BLK_SFCTL 0x00000080 // blocked due to stream fctl
137#define H2_SF_BLK_ANY 0x000000F0 // any of the reasons above
138
Willy Tarreau18312642017-10-11 07:57:07 +0200139/* H2 stream descriptor, describing the stream as it appears in the H2C, and as
140 * it is being processed in the internal HTTP representation (H1 for now).
141 */
142struct h2s {
143 struct conn_stream *cs;
144 struct h2c *h2c;
145 struct h1m req, res; /* request and response parser state for H1 */
146 struct eb32_node by_id; /* place in h2c's streams_by_id */
147 struct list list; /* position in active/blocked lists if blocked>0 */
148 int32_t id; /* stream ID */
149 uint32_t flags; /* H2_SF_* */
150 int mws; /* mux window size for this stream */
151 enum h2_err errcode; /* H2 err code (H2_ERR_*) */
152 enum h2_ss st;
153};
Willy Tarreau5ab6b572017-09-22 08:05:00 +0200154
Willy Tarreauc6405142017-09-21 20:23:50 +0200155/* descriptor for an h2 frame header */
156struct h2_fh {
157 uint32_t len; /* length, host order, 24 bits */
158 uint32_t sid; /* stream id, host order, 31 bits */
159 uint8_t ft; /* frame type */
160 uint8_t ff; /* frame flags */
161};
162
Willy Tarreaufe20e5b2017-07-27 11:42:14 +0200163/* a few settings from the global section */
164static int h2_settings_header_table_size = 4096; /* initial value */
Willy Tarreaue6baec02017-07-27 11:45:11 +0200165static int h2_settings_initial_window_size = 65535; /* initial value */
Willy Tarreau5242ef82017-07-27 11:47:28 +0200166static int h2_settings_max_concurrent_streams = 100;
Willy Tarreaufe20e5b2017-07-27 11:42:14 +0200167
Willy Tarreau2a856182017-05-16 15:20:39 +0200168/* a dmumy closed stream */
169static const struct h2s *h2_closed_stream = &(const struct h2s){
170 .cs = NULL,
171 .h2c = NULL,
172 .st = H2_SS_CLOSED,
173 .id = 0,
174};
175
176/* and a dummy idle stream for use with any unannounced stream */
177static const struct h2s *h2_idle_stream = &(const struct h2s){
178 .cs = NULL,
179 .h2c = NULL,
180 .st = H2_SS_IDLE,
181 .id = 0,
182};
183
Willy Tarreaufe20e5b2017-07-27 11:42:14 +0200184
Willy Tarreau35dbd5d2017-09-22 09:13:49 +0200185/*****************************************************/
186/* functions below are for dynamic buffer management */
187/*****************************************************/
188
189/* re-enables receiving on mux <target> after a buffer was allocated. It returns
190 * 1 if the allocation succeeds, in which case the connection is woken up, or 0
191 * if it's impossible to wake up and we prefer to be woken up later.
192 */
193static int h2_dbuf_available(void *target)
194{
195 struct h2c *h2c = target;
196
197 /* take the buffer now as we'll get scheduled waiting for ->wake() */
198 if (b_alloc_margin(&h2c->dbuf, 0)) {
Willy Tarreau1b62c5c2017-09-25 11:55:01 +0200199 h2c->flags &= ~H2_CF_DEM_DALLOC;
200 if (!(h2c->flags & H2_CF_DEM_BLOCK_ANY))
201 conn_xprt_want_recv(h2c->conn);
Willy Tarreau35dbd5d2017-09-22 09:13:49 +0200202 return 1;
203 }
204 return 0;
205}
206
207static inline struct buffer *h2_get_dbuf(struct h2c *h2c)
208{
209 struct buffer *buf = NULL;
210
211 if (likely(LIST_ISEMPTY(&h2c->dbuf_wait.list)) &&
212 unlikely((buf = b_alloc_margin(&h2c->dbuf, 0)) == NULL)) {
213 h2c->dbuf_wait.target = h2c->conn;
214 h2c->dbuf_wait.wakeup_cb = h2_dbuf_available;
215 SPIN_LOCK(BUF_WQ_LOCK, &buffer_wq_lock);
216 LIST_ADDQ(&buffer_wq, &h2c->dbuf_wait.list);
217 SPIN_UNLOCK(BUF_WQ_LOCK, &buffer_wq_lock);
218 __conn_xprt_stop_recv(h2c->conn);
219 }
220 return buf;
221}
222
223static inline void h2_release_dbuf(struct h2c *h2c)
224{
225 if (h2c->dbuf->size) {
226 b_free(&h2c->dbuf);
227 offer_buffers(h2c->dbuf_wait.target,
228 tasks_run_queue + applets_active_queue);
229 }
230}
231
Willy Tarreau14398122017-09-22 14:26:04 +0200232/* re-enables sending on mux <target> after a buffer was allocated. It returns
233 * 1 if the allocation succeeds, in which case the connection is woken up, or 0
234 * if it's impossible to wake up and we prefer to be woken up later.
235 */
236static int h2_mbuf_available(void *target)
237{
238 struct h2c *h2c = target;
239
240 /* take the buffer now as we'll get scheduled waiting for ->wake(). */
241 if (b_alloc_margin(&h2c->mbuf, 0)) {
Willy Tarreau1b62c5c2017-09-25 11:55:01 +0200242 if (h2c->flags & H2_CF_MUX_MALLOC) {
243 h2c->flags &= ~H2_CF_MUX_MALLOC;
244 if (!(h2c->flags & H2_CF_MUX_BLOCK_ANY))
245 conn_xprt_want_send(h2c->conn);
246 }
247
248 if (h2c->flags & H2_CF_DEM_MROOM) {
249 h2c->flags &= ~H2_CF_DEM_MROOM;
250 if (!(h2c->flags & H2_CF_DEM_BLOCK_ANY))
251 conn_xprt_want_recv(h2c->conn);
252 }
253
Willy Tarreau14398122017-09-22 14:26:04 +0200254 /* FIXME: we should in fact call something like h2_update_poll()
255 * now to recompte the polling. For now it will be enough like
256 * this.
257 */
Willy Tarreau14398122017-09-22 14:26:04 +0200258 return 1;
259 }
260 return 0;
261}
262
263static inline struct buffer *h2_get_mbuf(struct h2c *h2c)
264{
265 struct buffer *buf = NULL;
266
267 if (likely(LIST_ISEMPTY(&h2c->mbuf_wait.list)) &&
268 unlikely((buf = b_alloc_margin(&h2c->mbuf, 0)) == NULL)) {
269 h2c->mbuf_wait.target = h2c;
270 h2c->mbuf_wait.wakeup_cb = h2_mbuf_available;
271 SPIN_LOCK(BUF_WQ_LOCK, &buffer_wq_lock);
272 LIST_ADDQ(&buffer_wq, &h2c->mbuf_wait.list);
273 SPIN_UNLOCK(BUF_WQ_LOCK, &buffer_wq_lock);
274
275 /* FIXME: we should in fact only block the direction being
276 * currently used. For now it will be enough like this.
277 */
278 __conn_xprt_stop_send(h2c->conn);
279 __conn_xprt_stop_recv(h2c->conn);
280 }
281 return buf;
282}
283
284static inline void h2_release_mbuf(struct h2c *h2c)
285{
286 if (h2c->mbuf->size) {
287 b_free(&h2c->mbuf);
288 offer_buffers(h2c->mbuf_wait.target,
289 tasks_run_queue + applets_active_queue);
290 }
291}
292
Willy Tarreau35dbd5d2017-09-22 09:13:49 +0200293
Willy Tarreau62f52692017-10-08 23:01:42 +0200294/*****************************************************************/
295/* functions below are dedicated to the mux setup and management */
296/*****************************************************************/
297
Willy Tarreau32218eb2017-09-22 08:07:25 +0200298/* tries to initialize the inbound h2c mux. Returns < 0 in case of failure. */
299static int h2c_frt_init(struct connection *conn)
300{
301 struct h2c *h2c;
302
303 h2c = pool_alloc2(pool2_h2c);
304 if (!h2c)
305 goto fail;
306
307 h2c->ddht = hpack_dht_alloc(h2_settings_header_table_size);
308 if (!h2c->ddht)
309 goto fail;
310
311 /* Initialise the context. */
312 h2c->st0 = H2_CS_PREFACE;
313 h2c->conn = conn;
314 h2c->max_id = -1;
315 h2c->errcode = H2_ERR_NO_ERROR;
316 h2c->flags = H2_CF_NONE;
317 h2c->rcvd_c = 0;
318 h2c->rcvd_s = 0;
319
320 h2c->dbuf = &buf_empty;
321 h2c->dsi = -1;
322 h2c->msi = -1;
323 h2c->last_sid = -1;
324
325 h2c->mbuf = &buf_empty;
326 h2c->miw = 65535; /* mux initial window size */
327 h2c->mws = 65535; /* mux window size */
328 h2c->mfs = 16384; /* initial max frame size */
329 h2c->streams_by_id = EB_ROOT_UNIQUE;
330 LIST_INIT(&h2c->send_list);
331 LIST_INIT(&h2c->fctl_list);
Willy Tarreau35dbd5d2017-09-22 09:13:49 +0200332 LIST_INIT(&h2c->dbuf_wait.list);
Willy Tarreau14398122017-09-22 14:26:04 +0200333 LIST_INIT(&h2c->mbuf_wait.list);
Willy Tarreau32218eb2017-09-22 08:07:25 +0200334 conn->mux_ctx = h2c;
335
336 conn_xprt_want_recv(conn);
337 /* mux->wake will be called soon to complete the operation */
338 return 0;
339 fail:
340 pool_free2(pool2_h2c, h2c);
341 return -1;
342}
343
Willy Tarreau62f52692017-10-08 23:01:42 +0200344/* Initialize the mux once it's attached. For outgoing connections, the context
345 * is already initialized before installing the mux, so we detect incoming
346 * connections from the fact that the context is still NULL. Returns < 0 on
347 * error.
348 */
349static int h2_init(struct connection *conn)
350{
351 if (conn->mux_ctx) {
352 /* we don't support outgoing connections for now */
353 return -1;
354 }
355
Willy Tarreau32218eb2017-09-22 08:07:25 +0200356 return h2c_frt_init(conn);
Willy Tarreau62f52692017-10-08 23:01:42 +0200357}
358
Willy Tarreau2373acc2017-10-12 17:35:14 +0200359/* returns the stream associated with id <id> or NULL if not found */
360static inline struct h2s *h2c_st_by_id(struct h2c *h2c, int id)
361{
362 struct eb32_node *node;
363
Willy Tarreau2a856182017-05-16 15:20:39 +0200364 if (id > h2c->max_id)
365 return (struct h2s *)h2_idle_stream;
366
Willy Tarreau2373acc2017-10-12 17:35:14 +0200367 node = eb32_lookup(&h2c->streams_by_id, id);
368 if (!node)
Willy Tarreau2a856182017-05-16 15:20:39 +0200369 return (struct h2s *)h2_closed_stream;
Willy Tarreau2373acc2017-10-12 17:35:14 +0200370
371 return container_of(node, struct h2s, by_id);
372}
373
Willy Tarreau62f52692017-10-08 23:01:42 +0200374/* release function for a connection. This one should be called to free all
375 * resources allocated to the mux.
376 */
377static void h2_release(struct connection *conn)
378{
Willy Tarreau32218eb2017-09-22 08:07:25 +0200379 struct h2c *h2c = conn->mux_ctx;
380
381 LIST_DEL(&conn->list);
382
383 if (h2c) {
384 hpack_dht_free(h2c->ddht);
Willy Tarreau35dbd5d2017-09-22 09:13:49 +0200385 h2_release_dbuf(h2c);
386 SPIN_LOCK(BUF_WQ_LOCK, &buffer_wq_lock);
387 LIST_DEL(&h2c->dbuf_wait.list);
388 SPIN_UNLOCK(BUF_WQ_LOCK, &buffer_wq_lock);
Willy Tarreau14398122017-09-22 14:26:04 +0200389
390 h2_release_mbuf(h2c);
391 SPIN_LOCK(BUF_WQ_LOCK, &buffer_wq_lock);
392 LIST_DEL(&h2c->mbuf_wait.list);
393 SPIN_UNLOCK(BUF_WQ_LOCK, &buffer_wq_lock);
394
Willy Tarreau32218eb2017-09-22 08:07:25 +0200395 pool_free2(pool2_h2c, h2c);
396 }
397
398 conn->mux = NULL;
399 conn->mux_ctx = NULL;
400
401 conn_stop_tracking(conn);
402 conn_full_close(conn);
403 if (conn->destroy_cb)
404 conn->destroy_cb(conn);
405 conn_free(conn);
Willy Tarreau62f52692017-10-08 23:01:42 +0200406}
407
408
Willy Tarreau71681172017-10-23 14:39:06 +0200409/******************************************************/
410/* functions below are for the H2 protocol processing */
411/******************************************************/
412
413/* returns the stream if of stream <h2s> or 0 if <h2s> is NULL */
414static inline int h2s_id(const struct h2s *h2s)
415{
416 return h2s ? h2s->id : 0;
417}
418
Willy Tarreau5b5e6872017-09-25 16:17:25 +0200419/* returns true of the mux is currently busy as seen from stream <h2s> */
420static inline int h2c_mux_busy(const struct h2c *h2c, const struct h2s *h2s)
421{
422 if (h2c->msi < 0)
423 return 0;
424
425 if (h2c->msi == h2s_id(h2s))
426 return 0;
427
428 return 1;
429}
430
Willy Tarreau741d6df2017-10-17 08:00:59 +0200431/* marks an error on the connection */
432static inline void h2c_error(struct h2c *h2c, enum h2_err err)
433{
434 h2c->errcode = err;
435 h2c->st0 = H2_CS_ERROR;
436}
437
Willy Tarreau2e43f082017-10-17 08:03:59 +0200438/* marks an error on the stream */
439static inline void h2s_error(struct h2s *h2s, enum h2_err err)
440{
441 if (h2s->st > H2_SS_IDLE && h2s->st < H2_SS_ERROR) {
442 h2s->errcode = err;
443 h2s->st = H2_SS_ERROR;
444 if (h2s->cs)
445 h2s->cs->flags |= CS_FL_ERROR;
446 }
447}
448
Willy Tarreaue4820742017-07-27 13:37:23 +0200449/* writes the 24-bit frame size <len> at address <frame> */
450static inline void h2_set_frame_size(void *frame, uint32_t len)
451{
452 uint8_t *out = frame;
453
454 *out = len >> 16;
455 write_n16(out + 1, len);
456}
457
Willy Tarreau54c15062017-10-10 17:10:03 +0200458/* reads <bytes> bytes from buffer <b> starting at relative offset <o> from the
459 * current pointer, dealing with wrapping, and stores the result in <dst>. It's
460 * the caller's responsibility to verify that there are at least <bytes> bytes
461 * available in the buffer's input prior to calling this function.
462 */
463static inline void h2_get_buf_bytes(void *dst, size_t bytes,
464 const struct buffer *b, int o)
465{
466 readv_bytes(dst, bytes, b_ptr(b, o), b_end(b) - b_ptr(b, o), b->data);
467}
468
469static inline uint16_t h2_get_n16(const struct buffer *b, int o)
470{
471 return readv_n16(b_ptr(b, o), b_end(b) - b_ptr(b, o), b->data);
472}
473
474static inline uint32_t h2_get_n32(const struct buffer *b, int o)
475{
476 return readv_n32(b_ptr(b, o), b_end(b) - b_ptr(b, o), b->data);
477}
478
479static inline uint64_t h2_get_n64(const struct buffer *b, int o)
480{
481 return readv_n64(b_ptr(b, o), b_end(b) - b_ptr(b, o), b->data);
482}
483
484
Willy Tarreau715d5312017-07-11 15:20:24 +0200485/* Peeks an H2 frame header from buffer <b> into descriptor <h>. The algorithm
486 * is not obvious. It turns out that H2 headers are neither aligned nor do they
487 * use regular sizes. And to add to the trouble, the buffer may wrap so each
488 * byte read must be checked. The header is formed like this :
489 *
490 * b0 b1 b2 b3 b4 b5..b8
491 * +----------+---------+--------+----+----+----------------------+
492 * |len[23:16]|len[15:8]|len[7:0]|type|flag|sid[31:0] (big endian)|
493 * +----------+---------+--------+----+----+----------------------+
494 *
495 * Here we read a big-endian 64 bit word from h[1]. This way in a single read
496 * we get the sid properly aligned and ordered, and 16 bits of len properly
497 * ordered as well. The type and flags can be extracted using bit shifts from
498 * the word, and only one extra read is needed to fetch len[16:23].
499 * Returns zero if some bytes are missing, otherwise non-zero on success.
500 */
501static int h2_peek_frame_hdr(const struct buffer *b, struct h2_fh *h)
502{
503 uint64_t w;
504
505 if (b->i < 9)
506 return 0;
507
508 w = readv_n64(b_ptr(b,1), b_end(b) - b_ptr(b,1), b->data);
509 h->len = *b->p << 16;
510 h->sid = w & 0x7FFFFFFF; /* RFC7540#4.1: R bit must be ignored */
511 h->ff = w >> 32;
512 h->ft = w >> 40;
513 h->len += w >> 48;
514 return 1;
515}
516
517/* skip the next 9 bytes corresponding to the frame header possibly parsed by
518 * h2_peek_frame_hdr() above.
519 */
520static inline void h2_skip_frame_hdr(struct buffer *b)
521{
522 bi_del(b, 9);
523}
524
525/* same as above, automatically advances the buffer on success */
526static inline int h2_get_frame_hdr(struct buffer *b, struct h2_fh *h)
527{
528 int ret;
529
530 ret = h2_peek_frame_hdr(b, h);
531 if (ret > 0)
532 h2_skip_frame_hdr(b);
533 return ret;
534}
535
Willy Tarreau3ccf4b22017-10-13 19:07:26 +0200536/* creates a new stream <id> on the h2c connection and returns it, or NULL in
537 * case of memory allocation error.
538 */
539static struct h2s *h2c_stream_new(struct h2c *h2c, int id)
540{
541 struct conn_stream *cs;
542 struct h2s *h2s;
543
544 h2s = pool_alloc2(pool2_h2s);
545 if (!h2s)
546 goto out;
547
548 h2s->h2c = h2c;
549 h2s->mws = h2c->miw;
550 h2s->flags = H2_SF_NONE;
551 h2s->errcode = H2_ERR_NO_ERROR;
552 h2s->st = H2_SS_IDLE;
553 h1m_init(&h2s->req);
554 h1m_init(&h2s->res);
555 h2s->by_id.key = h2s->id = id;
556 h2c->max_id = id;
557 LIST_INIT(&h2s->list);
558
559 eb32_insert(&h2c->streams_by_id, &h2s->by_id);
560
561 cs = cs_new(h2c->conn);
562 if (!cs)
563 goto out_close;
564
565 h2s->cs = cs;
566 cs->ctx = h2s;
567
568 if (stream_create_from_cs(cs) < 0)
569 goto out_free_cs;
570
571 /* OK done, the stream lives its own life now */
572 return h2s;
573
574 out_free_cs:
575 cs_free(cs);
576 out_close:
577 eb32_delete(&h2s->by_id);
578 pool_free2(pool2_h2s, h2s);
579 h2s = NULL;
580 out:
581 return h2s;
582}
583
Willy Tarreaube5b7152017-09-25 16:25:39 +0200584/* try to send a settings frame on the connection. Returns > 0 on success, 0 if
585 * it couldn't do anything. It may return an error in h2c. See RFC7540#11.3 for
586 * the various settings codes.
587 */
588static int h2c_snd_settings(struct h2c *h2c)
589{
590 struct buffer *res;
591 char buf_data[100]; // enough for 15 settings
592 struct chunk buf;
593 int ret;
594
595 if (h2c_mux_busy(h2c, NULL)) {
596 h2c->flags |= H2_CF_DEM_MBUSY;
597 return 0;
598 }
599
600 res = h2_get_mbuf(h2c);
601 if (!res) {
602 h2c->flags |= H2_CF_MUX_MALLOC;
603 h2c->flags |= H2_CF_DEM_MROOM;
604 return 0;
605 }
606
607 chunk_init(&buf, buf_data, sizeof(buf_data));
608 chunk_memcpy(&buf,
609 "\x00\x00\x00" /* length : 0 for now */
610 "\x04\x00" /* type : 4 (settings), flags : 0 */
611 "\x00\x00\x00\x00", /* stream ID : 0 */
612 9);
613
614 if (h2_settings_header_table_size != 4096) {
615 char str[6] = "\x00\x01"; /* header_table_size */
616
617 write_n32(str + 2, h2_settings_header_table_size);
618 chunk_memcat(&buf, str, 6);
619 }
620
621 if (h2_settings_initial_window_size != 65535) {
622 char str[6] = "\x00\x04"; /* initial_window_size */
623
624 write_n32(str + 2, h2_settings_initial_window_size);
625 chunk_memcat(&buf, str, 6);
626 }
627
628 if (h2_settings_max_concurrent_streams != 0) {
629 char str[6] = "\x00\x03"; /* max_concurrent_streams */
630
631 /* Note: 0 means "unlimited" for haproxy's config but not for
632 * the protocol, so never send this value!
633 */
634 write_n32(str + 2, h2_settings_max_concurrent_streams);
635 chunk_memcat(&buf, str, 6);
636 }
637
638 if (global.tune.bufsize != 16384) {
639 char str[6] = "\x00\x05"; /* max_frame_size */
640
641 /* note: similarly we could also emit MAX_HEADER_LIST_SIZE to
642 * match bufsize - rewrite size, but at the moment it seems
643 * that clients don't take care of it.
644 */
645 write_n32(str + 2, global.tune.bufsize);
646 chunk_memcat(&buf, str, 6);
647 }
648
649 h2_set_frame_size(buf.str, buf.len - 9);
650 ret = bo_istput(res, ist2(buf.str, buf.len));
651 if (unlikely(ret <= 0)) {
652 if (!ret) {
653 h2c->flags |= H2_CF_MUX_MFULL;
654 h2c->flags |= H2_CF_DEM_MROOM;
655 return 0;
656 }
657 else {
658 h2c_error(h2c, H2_ERR_INTERNAL_ERROR);
659 return 0;
660 }
661 }
662 return ret;
663}
664
Willy Tarreau52eed752017-09-22 15:05:09 +0200665/* Try to receive a connection preface, then upon success try to send our
666 * preface which is a SETTINGS frame. Returns > 0 on success or zero on
667 * missing data. It may return an error in h2c.
668 */
669static int h2c_frt_recv_preface(struct h2c *h2c)
670{
671 int ret1;
Willy Tarreaube5b7152017-09-25 16:25:39 +0200672 int ret2;
Willy Tarreau52eed752017-09-22 15:05:09 +0200673
674 ret1 = b_isteq(h2c->dbuf, 0, h2c->dbuf->i, ist(H2_CONN_PREFACE));
675
676 if (unlikely(ret1 <= 0)) {
677 if (ret1 < 0 || conn_xprt_read0_pending(h2c->conn))
678 h2c_error(h2c, H2_ERR_PROTOCOL_ERROR);
679 return 0;
680 }
681
Willy Tarreaube5b7152017-09-25 16:25:39 +0200682 ret2 = h2c_snd_settings(h2c);
683 if (ret2 > 0)
684 bi_del(h2c->dbuf, ret1);
Willy Tarreau52eed752017-09-22 15:05:09 +0200685
Willy Tarreaube5b7152017-09-25 16:25:39 +0200686 return ret2;
Willy Tarreau52eed752017-09-22 15:05:09 +0200687}
688
Willy Tarreau081d4722017-05-16 21:51:05 +0200689/* try to send a GOAWAY frame on the connection to report an error or a graceful
690 * shutdown, with h2c->errcode as the error code. Returns > 0 on success or zero
691 * if nothing was done. It uses h2c->last_sid as the advertised ID, or copies it
692 * from h2c->max_id if it's not set yet (<0). In case of lack of room to write
693 * the message, it subscribes the requester (either <h2s> or <h2c>) to future
694 * notifications. It sets H2_CF_GOAWAY_SENT on success, and H2_CF_GOAWAY_FAILED
695 * on unrecoverable failure. It will not attempt to send one again in this last
696 * case so that it is safe to use h2c_error() to report such errors.
697 */
698static int h2c_send_goaway_error(struct h2c *h2c, struct h2s *h2s)
699{
700 struct buffer *res;
701 char str[17];
702 int ret;
703
704 if (h2c->flags & H2_CF_GOAWAY_FAILED)
705 return 1; // claim that it worked
706
707 if (h2c_mux_busy(h2c, h2s)) {
708 if (h2s)
709 h2s->flags |= H2_SF_BLK_MBUSY;
710 else
711 h2c->flags |= H2_CF_DEM_MBUSY;
712 return 0;
713 }
714
715 res = h2_get_mbuf(h2c);
716 if (!res) {
717 h2c->flags |= H2_CF_MUX_MALLOC;
718 if (h2s)
719 h2s->flags |= H2_SF_BLK_MROOM;
720 else
721 h2c->flags |= H2_CF_DEM_MROOM;
722 return 0;
723 }
724
725 /* len: 8, type: 7, flags: none, sid: 0 */
726 memcpy(str, "\x00\x00\x08\x07\x00\x00\x00\x00\x00", 9);
727
728 if (h2c->last_sid < 0)
729 h2c->last_sid = h2c->max_id;
730
731 write_n32(str + 9, h2c->last_sid);
732 write_n32(str + 13, h2c->errcode);
733 ret = bo_istput(res, ist2(str, 17));
734 if (unlikely(ret <= 0)) {
735 if (!ret) {
736 h2c->flags |= H2_CF_MUX_MFULL;
737 if (h2s)
738 h2s->flags |= H2_SF_BLK_MROOM;
739 else
740 h2c->flags |= H2_CF_DEM_MROOM;
741 return 0;
742 }
743 else {
744 /* we cannot report this error using GOAWAY, so we mark
745 * it and claim a success.
746 */
747 h2c_error(h2c, H2_ERR_INTERNAL_ERROR);
748 h2c->flags |= H2_CF_GOAWAY_FAILED;
749 return 1;
750 }
751 }
752 h2c->flags |= H2_CF_GOAWAY_SENT;
753 return ret;
754}
755
Willy Tarreau3421aba2017-07-27 15:41:03 +0200756/* Increase all streams' outgoing window size by the difference passed in
757 * argument. This is needed upon receipt of the settings frame if the initial
758 * window size is different. The difference may be negative and the resulting
759 * window size as well, for the time it takes to receive some window updates.
760 */
761static void h2c_update_all_ws(struct h2c *h2c, int diff)
762{
763 struct h2s *h2s;
764 struct eb32_node *node;
765
766 if (!diff)
767 return;
768
769 node = eb32_first(&h2c->streams_by_id);
770 while (node) {
771 h2s = container_of(node, struct h2s, by_id);
772 h2s->mws += diff;
773 node = eb32_next(node);
774 }
775}
776
777/* processes a SETTINGS frame whose payload is <payload> for <plen> bytes, and
778 * ACKs it if needed. Returns > 0 on success or zero on missing data. It may
779 * return an error in h2c. Described in RFC7540#6.5.
780 */
781static int h2c_handle_settings(struct h2c *h2c)
782{
783 unsigned int offset;
784 int error;
785
786 if (h2c->dff & H2_F_SETTINGS_ACK) {
787 if (h2c->dfl) {
788 error = H2_ERR_FRAME_SIZE_ERROR;
789 goto fail;
790 }
791 return 1;
792 }
793
794 if (h2c->dsi != 0) {
795 error = H2_ERR_PROTOCOL_ERROR;
796 goto fail;
797 }
798
799 if (h2c->dfl % 6) {
800 error = H2_ERR_FRAME_SIZE_ERROR;
801 goto fail;
802 }
803
804 /* that's the limit we can process */
805 if (h2c->dfl > global.tune.bufsize) {
806 error = H2_ERR_FRAME_SIZE_ERROR;
807 goto fail;
808 }
809
810 /* process full frame only */
811 if (h2c->dbuf->i < h2c->dfl)
812 return 0;
813
814 /* parse the frame */
815 for (offset = 0; offset < h2c->dfl; offset += 6) {
816 uint16_t type = h2_get_n16(h2c->dbuf, offset);
817 int32_t arg = h2_get_n32(h2c->dbuf, offset + 2);
818
819 switch (type) {
820 case H2_SETTINGS_INITIAL_WINDOW_SIZE:
821 /* we need to update all existing streams with the
822 * difference from the previous iws.
823 */
824 if (arg < 0) { // RFC7540#6.5.2
825 error = H2_ERR_FLOW_CONTROL_ERROR;
826 goto fail;
827 }
828 h2c_update_all_ws(h2c, arg - h2c->miw);
829 h2c->miw = arg;
830 break;
831 case H2_SETTINGS_MAX_FRAME_SIZE:
832 if (arg < 16384 || arg > 16777215) { // RFC7540#6.5.2
833 error = H2_ERR_PROTOCOL_ERROR;
834 goto fail;
835 }
836 h2c->mfs = arg;
837 break;
838 }
839 }
840
841 /* need to ACK this frame now */
842 h2c->st0 = H2_CS_FRAME_A;
843 return 1;
844 fail:
845 h2c_error(h2c, error);
846 return 0;
847}
848
849/* try to send an ACK for a settings frame on the connection. Returns > 0 on
850 * success or one of the h2_status values.
851 */
852static int h2c_ack_settings(struct h2c *h2c)
853{
854 struct buffer *res;
855 char str[9];
856 int ret = -1;
857
858 if (h2c_mux_busy(h2c, NULL)) {
859 h2c->flags |= H2_CF_DEM_MBUSY;
860 return 0;
861 }
862
863 res = h2_get_mbuf(h2c);
864 if (!res) {
865 h2c->flags |= H2_CF_MUX_MALLOC;
866 h2c->flags |= H2_CF_DEM_MROOM;
867 return 0;
868 }
869
870 memcpy(str,
871 "\x00\x00\x00" /* length : 0 (no data) */
872 "\x04" "\x01" /* type : 4, flags : ACK */
873 "\x00\x00\x00\x00" /* stream ID */, 9);
874
875 ret = bo_istput(res, ist2(str, 9));
876 if (unlikely(ret <= 0)) {
877 if (!ret) {
878 h2c->flags |= H2_CF_MUX_MFULL;
879 h2c->flags |= H2_CF_DEM_MROOM;
880 return 0;
881 }
882 else {
883 h2c_error(h2c, H2_ERR_INTERNAL_ERROR);
884 return 0;
885 }
886 }
887 return ret;
888}
889
Willy Tarreaucf68c782017-10-10 17:11:41 +0200890/* processes a PING frame and schedules an ACK if needed. The caller must pass
891 * the pointer to the payload in <payload>. Returns > 0 on success or zero on
892 * missing data. It may return an error in h2c.
893 */
894static int h2c_handle_ping(struct h2c *h2c)
895{
896 /* frame length must be exactly 8 */
897 if (h2c->dfl != 8) {
898 h2c_error(h2c, H2_ERR_FRAME_SIZE_ERROR);
899 return 0;
900 }
901
902 /* schedule a response */
903 if (!(h2c->dft & H2_F_PING_ACK))
904 h2c->st0 = H2_CS_FRAME_A;
905 return 1;
906}
907
908/* try to send an ACK for a ping frame on the connection. Returns > 0 on
909 * success, 0 on missing data or one of the h2_status values.
910 */
911static int h2c_ack_ping(struct h2c *h2c)
912{
913 struct buffer *res;
914 char str[17];
915 int ret = -1;
916
917 if (h2c->dbuf->i < 8)
918 return 0;
919
920 if (h2c_mux_busy(h2c, NULL)) {
921 h2c->flags |= H2_CF_DEM_MBUSY;
922 return 0;
923 }
924
925 res = h2_get_mbuf(h2c);
926 if (!res) {
927 h2c->flags |= H2_CF_MUX_MALLOC;
928 h2c->flags |= H2_CF_DEM_MROOM;
929 return 0;
930 }
931
932 memcpy(str,
933 "\x00\x00\x08" /* length : 8 (same payload) */
934 "\x06" "\x01" /* type : 6, flags : ACK */
935 "\x00\x00\x00\x00" /* stream ID */, 9);
936
937 /* copy the original payload */
938 h2_get_buf_bytes(str + 9, 8, h2c->dbuf, 0);
939
940 ret = bo_istput(res, ist2(str, 17));
941 if (unlikely(ret <= 0)) {
942 if (!ret) {
943 h2c->flags |= H2_CF_MUX_MFULL;
944 h2c->flags |= H2_CF_DEM_MROOM;
945 return 0;
946 }
947 else {
948 h2c_error(h2c, H2_ERR_INTERNAL_ERROR);
949 return 0;
950 }
951 }
952 return ret;
953}
954
Willy Tarreaubc933932017-10-09 16:21:43 +0200955/* process Rx frames to be demultiplexed */
956static void h2_process_demux(struct h2c *h2c)
957{
Willy Tarreauf3ee0692017-10-17 08:18:25 +0200958 struct h2s *h2s;
959
Willy Tarreau081d4722017-05-16 21:51:05 +0200960 if (h2c->st0 >= H2_CS_ERROR)
961 return;
Willy Tarreau52eed752017-09-22 15:05:09 +0200962
963 if (unlikely(h2c->st0 < H2_CS_FRAME_H)) {
964 if (h2c->st0 == H2_CS_PREFACE) {
965 if (unlikely(h2c_frt_recv_preface(h2c) <= 0)) {
966 /* RFC7540#3.5: a GOAWAY frame MAY be omitted */
967 if (h2c->st0 == H2_CS_ERROR)
968 h2c->st0 = H2_CS_ERROR2;
969 goto fail;
970 }
971
972 h2c->max_id = 0;
973 h2c->st0 = H2_CS_SETTINGS1;
974 }
Willy Tarreau4c3690b2017-10-10 15:16:55 +0200975
976 if (h2c->st0 == H2_CS_SETTINGS1) {
977 struct h2_fh hdr;
978
979 /* ensure that what is pending is a valid SETTINGS frame
980 * without an ACK.
981 */
982 if (!h2_get_frame_hdr(h2c->dbuf, &hdr)) {
983 /* RFC7540#3.5: a GOAWAY frame MAY be omitted */
984 if (h2c->st0 == H2_CS_ERROR)
985 h2c->st0 = H2_CS_ERROR2;
986 goto fail;
987 }
988
989 if (hdr.sid || hdr.ft != H2_FT_SETTINGS || hdr.ff & H2_F_SETTINGS_ACK) {
990 /* RFC7540#3.5: a GOAWAY frame MAY be omitted */
991 h2c_error(h2c, H2_ERR_PROTOCOL_ERROR);
992 h2c->st0 = H2_CS_ERROR2;
993 goto fail;
994 }
995
996 if ((int)hdr.len < 0 || (int)hdr.len > h2c->mfs) {
997 /* RFC7540#3.5: a GOAWAY frame MAY be omitted */
998 h2c_error(h2c, H2_ERR_FRAME_SIZE_ERROR);
999 h2c->st0 = H2_CS_ERROR2;
1000 goto fail;
1001 }
1002
1003 /* that's OK, switch to FRAME_P to process it */
1004 h2c->dfl = hdr.len;
1005 h2c->dsi = hdr.sid;
1006 h2c->dft = hdr.ft;
1007 h2c->dff = hdr.ff;
1008 h2c->st0 = H2_CS_FRAME_P;
1009 }
Willy Tarreau52eed752017-09-22 15:05:09 +02001010 }
Willy Tarreau7e98c052017-10-10 15:56:59 +02001011
1012 /* process as many incoming frames as possible below */
1013 while (h2c->dbuf->i) {
1014 int ret = 0;
1015
1016 if (h2c->st0 >= H2_CS_ERROR)
1017 break;
1018
1019 if (h2c->st0 == H2_CS_FRAME_H) {
1020 struct h2_fh hdr;
1021
1022 if (!h2_peek_frame_hdr(h2c->dbuf, &hdr))
1023 break;
1024
1025 if ((int)hdr.len < 0 || (int)hdr.len > h2c->mfs) {
1026 h2c_error(h2c, H2_ERR_FRAME_SIZE_ERROR);
1027 h2c->st0 = H2_CS_ERROR;
1028 break;
1029 }
1030
1031 h2c->dfl = hdr.len;
1032 h2c->dsi = hdr.sid;
1033 h2c->dft = hdr.ft;
1034 h2c->dff = hdr.ff;
1035 h2c->st0 = H2_CS_FRAME_P;
1036 h2_skip_frame_hdr(h2c->dbuf);
1037 }
1038
1039 /* Only H2_CS_FRAME_P and H2_CS_FRAME_A here */
Willy Tarreauf3ee0692017-10-17 08:18:25 +02001040 h2s = h2c_st_by_id(h2c, h2c->dsi);
Willy Tarreau7e98c052017-10-10 15:56:59 +02001041
1042 switch (h2c->dft) {
Willy Tarreau3421aba2017-07-27 15:41:03 +02001043 case H2_FT_SETTINGS:
1044 if (h2c->st0 == H2_CS_FRAME_P)
1045 ret = h2c_handle_settings(h2c);
1046
1047 if (h2c->st0 == H2_CS_FRAME_A)
1048 ret = h2c_ack_settings(h2c);
1049 break;
1050
Willy Tarreaucf68c782017-10-10 17:11:41 +02001051 case H2_FT_PING:
1052 if (h2c->st0 == H2_CS_FRAME_P)
1053 ret = h2c_handle_ping(h2c);
1054
1055 if (h2c->st0 == H2_CS_FRAME_A)
1056 ret = h2c_ack_ping(h2c);
1057 break;
1058
Willy Tarreau7e98c052017-10-10 15:56:59 +02001059 /* FIXME: implement all supported frame types here */
1060 default:
1061 /* drop frames that we ignore. They may be larger than
1062 * the buffer so we drain all of their contents until
1063 * we reach the end.
1064 */
1065 ret = MIN(h2c->dbuf->i, h2c->dfl);
1066 bi_del(h2c->dbuf, ret);
1067 h2c->dfl -= ret;
1068 ret = h2c->dfl == 0;
1069 }
1070
1071 /* error or missing data condition met above ? */
1072 if (ret <= 0)
1073 break;
1074
1075 if (h2c->st0 != H2_CS_FRAME_H) {
1076 bi_del(h2c->dbuf, h2c->dfl);
1077 h2c->st0 = H2_CS_FRAME_H;
1078 }
1079 }
Willy Tarreau52eed752017-09-22 15:05:09 +02001080
1081 fail:
1082 /* we can go here on missing data, blocked response or error */
1083 return;
Willy Tarreaubc933932017-10-09 16:21:43 +02001084}
1085
1086/* process Tx frames from streams to be multiplexed. Returns > 0 if it reached
1087 * the end.
1088 */
1089static int h2_process_mux(struct h2c *h2c)
1090{
Willy Tarreaubacdf5a2017-10-17 10:57:04 +02001091 struct h2s *h2s, *h2s_back;
1092
1093 /* First we always process the flow control list because the streams
1094 * waiting there were already elected for immediate emission but were
1095 * blocked just on this.
1096 */
1097
1098 list_for_each_entry_safe(h2s, h2s_back, &h2c->fctl_list, list) {
1099 if (h2c->mws <= 0 || h2c->flags & H2_CF_MUX_BLOCK_ANY ||
1100 h2c->st0 >= H2_CS_ERROR)
1101 break;
1102
1103 /* In theory it's possible that h2s->cs == NULL here :
1104 * - client sends crap that causes a parse error
1105 * - RST_STREAM is produced and CS_FL_ERROR at the same time
1106 * - RST_STREAM cannot be emitted because mux is busy/full
1107 * - stream gets notified, detaches and quits
1108 * - mux buffer gets ready and wakes pending streams up
1109 * - bam!
1110 */
1111 h2s->flags &= ~H2_SF_BLK_ANY;
1112
1113 if (h2s->cs) {
1114 h2s->cs->data_cb->send(h2s->cs);
1115 h2s->cs->data_cb->wake(h2s->cs);
1116 }
1117
1118 /* depending on callee's blocking reasons, we may queue in send
1119 * list or completely dequeue.
1120 */
1121 if ((h2s->flags & H2_SF_BLK_MFCTL) == 0) {
1122 if (h2s->flags & H2_SF_BLK_ANY) {
1123 LIST_DEL(&h2s->list);
1124 LIST_ADDQ(&h2c->send_list, &h2s->list);
1125 }
1126 else {
1127 LIST_DEL(&h2s->list);
1128 LIST_INIT(&h2s->list);
1129 if (h2s->cs)
1130 h2s->cs->flags &= ~CS_FL_DATA_WR_ENA;
1131 }
1132 }
1133 }
1134
1135 list_for_each_entry_safe(h2s, h2s_back, &h2c->send_list, list) {
1136 if (h2c->st0 >= H2_CS_ERROR || h2c->flags & H2_CF_MUX_BLOCK_ANY)
1137 break;
1138
1139 /* In theory it's possible that h2s->cs == NULL here :
1140 * - client sends crap that causes a parse error
1141 * - RST_STREAM is produced and CS_FL_ERROR at the same time
1142 * - RST_STREAM cannot be emitted because mux is busy/full
1143 * - stream gets notified, detaches and quits
1144 * - mux buffer gets ready and wakes pending streams up
1145 * - bam!
1146 */
1147 h2s->flags &= ~H2_SF_BLK_ANY;
1148
1149 if (h2s->cs) {
1150 h2s->cs->data_cb->send(h2s->cs);
1151 h2s->cs->data_cb->wake(h2s->cs);
1152 }
1153 /* depending on callee's blocking reasons, we may queue in fctl
1154 * list or completely dequeue.
1155 */
1156 if (h2s->flags & H2_SF_BLK_MFCTL) {
1157 /* stream hit the connection's flow control */
1158 LIST_DEL(&h2s->list);
1159 LIST_ADDQ(&h2c->fctl_list, &h2s->list);
1160 }
1161 else if (!(h2s->flags & H2_SF_BLK_ANY)) {
1162 LIST_DEL(&h2s->list);
1163 LIST_INIT(&h2s->list);
1164 if (h2s->cs)
1165 h2s->cs->flags &= ~CS_FL_DATA_WR_ENA;
1166 }
1167 }
1168
Willy Tarreau081d4722017-05-16 21:51:05 +02001169 if (unlikely(h2c->st0 > H2_CS_ERROR)) {
1170 if (h2c->st0 == H2_CS_ERROR) {
1171 if (h2c->max_id >= 0) {
1172 h2c_send_goaway_error(h2c, NULL);
1173 if (h2c->flags & H2_CF_MUX_BLOCK_ANY)
1174 return 0;
1175 }
1176
1177 h2c->st0 = H2_CS_ERROR2; // sent (or failed hard) !
1178 }
1179 return 1;
1180 }
Willy Tarreaubacdf5a2017-10-17 10:57:04 +02001181 return (h2c->mws <= 0 || LIST_ISEMPTY(&h2c->fctl_list)) && LIST_ISEMPTY(&h2c->send_list);
Willy Tarreaubc933932017-10-09 16:21:43 +02001182}
1183
Willy Tarreau71681172017-10-23 14:39:06 +02001184
Willy Tarreau62f52692017-10-08 23:01:42 +02001185/*********************************************************/
1186/* functions below are I/O callbacks from the connection */
1187/*********************************************************/
1188
1189/* callback called on recv event by the connection handler */
1190static void h2_recv(struct connection *conn)
1191{
Willy Tarreaua2af5122017-10-09 11:56:46 +02001192 struct h2c *h2c = conn->mux_ctx;
Willy Tarreau35dbd5d2017-09-22 09:13:49 +02001193 struct buffer *buf;
Willy Tarreaua2af5122017-10-09 11:56:46 +02001194 int max;
1195
1196 if (conn->flags & CO_FL_ERROR)
Willy Tarreaufbe3b4f2017-10-09 15:14:19 +02001197 return;
1198
1199 if (h2c->flags & H2_CF_DEM_BLOCK_ANY)
1200 return;
Willy Tarreaua2af5122017-10-09 11:56:46 +02001201
Willy Tarreau35dbd5d2017-09-22 09:13:49 +02001202 buf = h2_get_dbuf(h2c);
Willy Tarreau1b62c5c2017-09-25 11:55:01 +02001203 if (!buf) {
1204 h2c->flags |= H2_CF_DEM_DALLOC;
Willy Tarreau35dbd5d2017-09-22 09:13:49 +02001205 return;
Willy Tarreau1b62c5c2017-09-25 11:55:01 +02001206 }
Willy Tarreau35dbd5d2017-09-22 09:13:49 +02001207
Willy Tarreaua2af5122017-10-09 11:56:46 +02001208 /* note: buf->o == 0 */
1209 max = buf->size - buf->i;
1210 if (!max) {
Willy Tarreaufbe3b4f2017-10-09 15:14:19 +02001211 h2c->flags |= H2_CF_DEM_DFULL;
Willy Tarreaua2af5122017-10-09 11:56:46 +02001212 return;
1213 }
1214
1215 conn->xprt->rcv_buf(conn, buf, max);
1216 if (conn->flags & CO_FL_ERROR)
Willy Tarreaufbe3b4f2017-10-09 15:14:19 +02001217 return;
Willy Tarreaua2af5122017-10-09 11:56:46 +02001218
Willy Tarreaufbe3b4f2017-10-09 15:14:19 +02001219 if (!buf->i) {
Willy Tarreau35dbd5d2017-09-22 09:13:49 +02001220 h2_release_dbuf(h2c);
Willy Tarreaua2af5122017-10-09 11:56:46 +02001221 return;
1222 }
1223
Willy Tarreaufbe3b4f2017-10-09 15:14:19 +02001224 if (buf->i == buf->size)
1225 h2c->flags |= H2_CF_DEM_DFULL;
1226
Willy Tarreaubc933932017-10-09 16:21:43 +02001227 h2_process_demux(h2c);
Willy Tarreaua2af5122017-10-09 11:56:46 +02001228
Willy Tarreaufbe3b4f2017-10-09 15:14:19 +02001229 /* after streams have been processed, we should have made some room */
Willy Tarreau081d4722017-05-16 21:51:05 +02001230 if (h2c->st0 >= H2_CS_ERROR)
1231 buf->i = 0;
1232
Willy Tarreaufbe3b4f2017-10-09 15:14:19 +02001233 if (buf->i != buf->size)
1234 h2c->flags &= ~H2_CF_DEM_DFULL;
Willy Tarreaua2af5122017-10-09 11:56:46 +02001235 return;
Willy Tarreau62f52692017-10-08 23:01:42 +02001236}
1237
1238/* callback called on send event by the connection handler */
1239static void h2_send(struct connection *conn)
1240{
Willy Tarreaua2af5122017-10-09 11:56:46 +02001241 struct h2c *h2c = conn->mux_ctx;
Willy Tarreaubc933932017-10-09 16:21:43 +02001242 int done;
Willy Tarreaua2af5122017-10-09 11:56:46 +02001243
1244 if (conn->flags & CO_FL_ERROR)
Willy Tarreaufbe3b4f2017-10-09 15:14:19 +02001245 return;
Willy Tarreaua2af5122017-10-09 11:56:46 +02001246
1247 if (conn->flags & (CO_FL_HANDSHAKE|CO_FL_WAIT_L4_CONN|CO_FL_WAIT_L6_CONN)) {
1248 /* a handshake was requested */
1249 return;
1250 }
1251
Willy Tarreaubc933932017-10-09 16:21:43 +02001252 /* This loop is quite simple : it tries to fill as much as it can from
1253 * pending streams into the existing buffer until it's reportedly full
1254 * or the end of send requests is reached. Then it tries to send this
1255 * buffer's contents out, marks it not full if at least one byte could
1256 * be sent, and tries again.
1257 *
1258 * The snd_buf() function normally takes a "flags" argument which may
1259 * be made of a combination of CO_SFL_MSG_MORE to indicate that more
1260 * data immediately comes and CO_SFL_STREAMER to indicate that the
1261 * connection is streaming lots of data (used to increase TLS record
1262 * size at the expense of latency). The former can be sent any time
1263 * there's a buffer full flag, as it indicates at least one stream
1264 * attempted to send and failed so there are pending data. An
1265 * alternative would be to set it as long as there's an active stream
1266 * but that would be problematic for ACKs until we have an absolute
1267 * guarantee that all waiters have at least one byte to send. The
1268 * latter should possibly not be set for now.
1269 */
1270
1271 done = 0;
1272 while (!done) {
1273 unsigned int flags = 0;
1274
1275 /* fill as much as we can into the current buffer */
1276 while (((h2c->flags & (H2_CF_MUX_MFULL|H2_CF_MUX_MALLOC)) == 0) && !done)
1277 done = h2_process_mux(h2c);
1278
1279 if (conn->flags & CO_FL_ERROR)
1280 break;
1281
1282 if (h2c->flags & (H2_CF_MUX_MFULL | H2_CF_DEM_MBUSY | H2_CF_DEM_MROOM))
1283 flags |= CO_SFL_MSG_MORE;
1284
1285 if (conn->xprt->snd_buf(conn, h2c->mbuf, flags) <= 0)
1286 break;
1287
1288 /* wrote at least one byte, the buffer is not full anymore */
1289 h2c->flags &= ~(H2_CF_MUX_MFULL | H2_CF_DEM_MROOM);
1290 }
1291
Willy Tarreaua2af5122017-10-09 11:56:46 +02001292 if (conn->flags & CO_FL_SOCK_WR_SH) {
1293 /* output closed, nothing to send, clear the buffer to release it */
1294 h2c->mbuf->o = 0;
Willy Tarreaua2af5122017-10-09 11:56:46 +02001295 }
Willy Tarreaufbe3b4f2017-10-09 15:14:19 +02001296}
Willy Tarreaua2af5122017-10-09 11:56:46 +02001297
Willy Tarreaufbe3b4f2017-10-09 15:14:19 +02001298/* call the wake up function of all streams attached to the connection */
1299static void h2_wake_all_streams(struct h2c *h2c)
1300{
1301 struct eb32_node *node;
1302 struct h2s *h2s;
1303 unsigned int flags = 0;
Willy Tarreau14398122017-09-22 14:26:04 +02001304
Willy Tarreaufbe3b4f2017-10-09 15:14:19 +02001305 if (h2c->st0 >= H2_CS_ERROR || h2c->conn->flags & CO_FL_ERROR)
1306 flags |= CS_FL_ERROR;
Willy Tarreaua2af5122017-10-09 11:56:46 +02001307
Willy Tarreaufbe3b4f2017-10-09 15:14:19 +02001308 if (conn_xprt_read0_pending(h2c->conn))
1309 flags |= CS_FL_EOS;
Willy Tarreaua2af5122017-10-09 11:56:46 +02001310
Willy Tarreaufbe3b4f2017-10-09 15:14:19 +02001311 node = eb32_first(&h2c->streams_by_id);
1312 while (node) {
1313 h2s = container_of(node, struct h2s, by_id);
1314 node = eb32_next(node);
1315 if (h2s->cs) {
1316 h2s->cs->flags |= flags;
1317 /* recv is used to force to detect CS_FL_EOS that wake()
1318 * doesn't handle in the stream int code.
1319 */
1320 h2s->cs->data_cb->recv(h2s->cs);
1321 h2s->cs->data_cb->wake(h2s->cs);
1322 }
1323 }
Willy Tarreau62f52692017-10-08 23:01:42 +02001324}
1325
1326/* callback called on any event by the connection handler.
1327 * It applies changes and returns zero, or < 0 if it wants immediate
1328 * destruction of the connection (which normally doesn not happen in h2).
1329 */
1330static int h2_wake(struct connection *conn)
1331{
Willy Tarreaua2af5122017-10-09 11:56:46 +02001332 struct h2c *h2c = conn->mux_ctx;
1333
Willy Tarreau26bd7612017-10-09 16:47:04 +02001334 if (conn->flags & CO_FL_ERROR || conn_xprt_read0_pending(conn) ||
Willy Tarreau29a98242017-10-31 06:59:15 +01001335 h2c->st0 == H2_CS_ERROR2 || h2c->flags & H2_CF_GOAWAY_FAILED ||
1336 (eb_is_empty(&h2c->streams_by_id) && h2c->last_sid >= 0 &&
1337 h2c->max_id >= h2c->last_sid)) {
Willy Tarreaufbe3b4f2017-10-09 15:14:19 +02001338 h2_wake_all_streams(h2c);
1339
1340 if (eb_is_empty(&h2c->streams_by_id)) {
1341 /* no more stream, kill the connection now */
1342 h2_release(conn);
1343 return -1;
1344 }
1345 else {
1346 /* some streams still there, we need to signal them all and
1347 * wait for their departure.
1348 */
1349 __conn_xprt_stop_recv(conn);
1350 __conn_xprt_stop_send(conn);
1351 return 0;
1352 }
1353 }
1354
1355 if (!h2c->dbuf->i)
1356 h2_release_dbuf(h2c);
1357
1358 /* stop being notified of incoming data if we can't process them */
1359 if (h2c->st0 >= H2_CS_ERROR ||
1360 (h2c->flags & H2_CF_DEM_BLOCK_ANY) || conn_xprt_read0_pending(conn)) {
1361 /* FIXME: we should clear a read timeout here */
1362 __conn_xprt_stop_recv(conn);
1363 }
1364 else {
1365 /* FIXME: we should (re-)arm a read timeout here */
1366 __conn_xprt_want_recv(conn);
1367 }
1368
1369 /* adjust output polling */
Willy Tarreau51606832017-10-17 15:30:07 +02001370 if (!(conn->flags & CO_FL_SOCK_WR_SH) &&
1371 (h2c->st0 == H2_CS_ERROR ||
1372 h2c->mbuf->o ||
1373 (h2c->mws > 0 && !LIST_ISEMPTY(&h2c->fctl_list)) ||
1374 (!(h2c->flags & H2_CF_MUX_BLOCK_ANY) && !LIST_ISEMPTY(&h2c->send_list)))) {
Willy Tarreaufbe3b4f2017-10-09 15:14:19 +02001375 /* FIXME: we should (re-)arm a send timeout here */
1376 __conn_xprt_want_send(conn);
1377 }
1378 else {
1379 /* FIXME: we should clear a send timeout here */
1380 h2_release_mbuf(h2c);
1381 __conn_xprt_stop_send(conn);
Willy Tarreaua2af5122017-10-09 11:56:46 +02001382 }
1383
Willy Tarreau62f52692017-10-08 23:01:42 +02001384 return 0;
1385}
1386
1387/*******************************************/
1388/* functions below are used by the streams */
1389/*******************************************/
1390
1391/*
1392 * Attach a new stream to a connection
1393 * (Used for outgoing connections)
1394 */
1395static struct conn_stream *h2_attach(struct connection *conn)
1396{
1397 return NULL;
1398}
1399
1400/* callback used to update the mux's polling flags after changing a cs' status.
1401 * The caller (cs_update_mux_polling) will take care of propagating any changes
1402 * to the transport layer.
1403 */
1404static void h2_update_poll(struct conn_stream *cs)
1405{
Willy Tarreau1d393222017-10-17 10:26:19 +02001406 struct h2s *h2s = cs->ctx;
1407
1408 if (!h2s)
1409 return;
1410
Willy Tarreaud7739c82017-10-30 15:38:23 +01001411 /* we may unblock a blocked read */
1412
1413 if (cs->flags & CS_FL_DATA_RD_ENA &&
1414 h2s->h2c->flags & H2_CF_DEM_SFULL && h2s->h2c->dsi == h2s->id) {
1415 h2s->h2c->flags &= ~H2_CF_DEM_SFULL;
1416 conn_xprt_want_recv(cs->conn);
1417 }
1418
Willy Tarreau1d393222017-10-17 10:26:19 +02001419 /* Note: the stream and stream-int code doesn't allow us to perform a
1420 * synchronous send() here unfortunately, because this code is called
1421 * as si_update() from the process_stream() context. This means that
1422 * we have to queue the current cs and defer its processing after the
1423 * connection's cs list is processed anyway.
1424 */
1425
1426 if (cs->flags & CS_FL_DATA_WR_ENA) {
1427 if (LIST_ISEMPTY(&h2s->list)) {
1428 if (LIST_ISEMPTY(&h2s->h2c->send_list) &&
1429 !h2s->h2c->mbuf->o && // not yet subscribed
1430 !(cs->conn->flags & CO_FL_SOCK_WR_SH))
1431 conn_xprt_want_send(cs->conn);
1432 LIST_ADDQ(&h2s->h2c->send_list, &h2s->list);
1433 }
1434 }
1435 else if (!LIST_ISEMPTY(&h2s->list)) {
1436 LIST_DEL(&h2s->list);
1437 LIST_INIT(&h2s->list);
1438 h2s->flags &= ~(H2_SF_BLK_MBUSY | H2_SF_BLK_MROOM | H2_SF_BLK_MFCTL);
1439 }
1440
1441 /* this can happen from within si_chk_snd() */
1442 if (h2s->h2c->mbuf->o && !(cs->conn->flags & CO_FL_XPRT_WR_ENA))
1443 conn_xprt_want_send(cs->conn);
Willy Tarreau62f52692017-10-08 23:01:42 +02001444}
1445
1446/*
1447 * Detach the stream from the connection and possibly release the connection.
1448 */
1449static void h2_detach(struct conn_stream *cs)
1450{
1451}
1452
1453static void h2_shutr(struct conn_stream *cs, enum cs_shr_mode mode)
1454{
1455}
1456
1457static void h2_shutw(struct conn_stream *cs, enum cs_shw_mode mode)
1458{
1459}
1460
1461/*
1462 * Called from the upper layer, to get more data
1463 */
1464static int h2_rcv_buf(struct conn_stream *cs, struct buffer *buf, int count)
1465{
1466 /* FIXME: not handled for now */
1467 cs->flags |= CS_FL_ERROR;
1468 return 0;
1469}
1470
1471/* Called from the upper layer, to send data */
1472static int h2_snd_buf(struct conn_stream *cs, struct buffer *buf, int flags)
1473{
1474 /* FIXME: not handled for now */
1475 cs->flags |= CS_FL_ERROR;
1476 return 0;
1477}
1478
1479
1480/*******************************************************/
1481/* functions below are dedicated to the config parsers */
1482/*******************************************************/
1483
Willy Tarreaufe20e5b2017-07-27 11:42:14 +02001484/* config parser for global "tune.h2.header-table-size" */
1485static int h2_parse_header_table_size(char **args, int section_type, struct proxy *curpx,
1486 struct proxy *defpx, const char *file, int line,
1487 char **err)
1488{
1489 if (too_many_args(1, args, err, NULL))
1490 return -1;
1491
1492 h2_settings_header_table_size = atoi(args[1]);
1493 if (h2_settings_header_table_size < 4096 || h2_settings_header_table_size > 65536) {
1494 memprintf(err, "'%s' expects a numeric value between 4096 and 65536.", args[0]);
1495 return -1;
1496 }
1497 return 0;
1498}
Willy Tarreau62f52692017-10-08 23:01:42 +02001499
Willy Tarreaue6baec02017-07-27 11:45:11 +02001500/* config parser for global "tune.h2.initial-window-size" */
1501static int h2_parse_initial_window_size(char **args, int section_type, struct proxy *curpx,
1502 struct proxy *defpx, const char *file, int line,
1503 char **err)
1504{
1505 if (too_many_args(1, args, err, NULL))
1506 return -1;
1507
1508 h2_settings_initial_window_size = atoi(args[1]);
1509 if (h2_settings_initial_window_size < 0) {
1510 memprintf(err, "'%s' expects a positive numeric value.", args[0]);
1511 return -1;
1512 }
1513 return 0;
1514}
1515
Willy Tarreau5242ef82017-07-27 11:47:28 +02001516/* config parser for global "tune.h2.max-concurrent-streams" */
1517static int h2_parse_max_concurrent_streams(char **args, int section_type, struct proxy *curpx,
1518 struct proxy *defpx, const char *file, int line,
1519 char **err)
1520{
1521 if (too_many_args(1, args, err, NULL))
1522 return -1;
1523
1524 h2_settings_max_concurrent_streams = atoi(args[1]);
1525 if (h2_settings_max_concurrent_streams < 0) {
1526 memprintf(err, "'%s' expects a positive numeric value.", args[0]);
1527 return -1;
1528 }
1529 return 0;
1530}
1531
Willy Tarreau62f52692017-10-08 23:01:42 +02001532
1533/****************************************/
1534/* MUX initialization and instanciation */
1535/***************************************/
1536
1537/* The mux operations */
1538const struct mux_ops h2_ops = {
1539 .init = h2_init,
1540 .recv = h2_recv,
1541 .send = h2_send,
1542 .wake = h2_wake,
1543 .update_poll = h2_update_poll,
1544 .rcv_buf = h2_rcv_buf,
1545 .snd_buf = h2_snd_buf,
1546 .attach = h2_attach,
1547 .detach = h2_detach,
1548 .shutr = h2_shutr,
1549 .shutw = h2_shutw,
1550 .release = h2_release,
1551 .name = "H2",
1552};
1553
1554/* ALPN selection : this mux registers ALPN tolen "h2" */
1555static struct alpn_mux_list alpn_mux_h2 =
1556 { .token = IST("h2"), .mode = ALPN_MODE_HTTP, .mux = &h2_ops };
1557
1558/* config keyword parsers */
1559static struct cfg_kw_list cfg_kws = {ILH, {
Willy Tarreaufe20e5b2017-07-27 11:42:14 +02001560 { CFG_GLOBAL, "tune.h2.header-table-size", h2_parse_header_table_size },
Willy Tarreaue6baec02017-07-27 11:45:11 +02001561 { CFG_GLOBAL, "tune.h2.initial-window-size", h2_parse_initial_window_size },
Willy Tarreau5242ef82017-07-27 11:47:28 +02001562 { CFG_GLOBAL, "tune.h2.max-concurrent-streams", h2_parse_max_concurrent_streams },
Willy Tarreau62f52692017-10-08 23:01:42 +02001563 { 0, NULL, NULL }
1564}};
1565
Willy Tarreau5ab6b572017-09-22 08:05:00 +02001566static void __h2_deinit(void)
1567{
Willy Tarreau18312642017-10-11 07:57:07 +02001568 pool_destroy2(pool2_h2s);
Willy Tarreau5ab6b572017-09-22 08:05:00 +02001569 pool_destroy2(pool2_h2c);
1570}
1571
Willy Tarreau62f52692017-10-08 23:01:42 +02001572__attribute__((constructor))
1573static void __h2_init(void)
1574{
1575 alpn_register_mux(&alpn_mux_h2);
1576 cfg_register_keywords(&cfg_kws);
Willy Tarreau5ab6b572017-09-22 08:05:00 +02001577 hap_register_post_deinit(__h2_deinit);
1578 pool2_h2c = create_pool("h2c", sizeof(struct h2c), MEM_F_SHARED);
Willy Tarreau18312642017-10-11 07:57:07 +02001579 pool2_h2s = create_pool("h2s", sizeof(struct h2s), MEM_F_SHARED);
Willy Tarreau62f52692017-10-08 23:01:42 +02001580}