blob: 29d3cef39881aed53168d6fb9654df124e99156e [file] [log] [blame]
Baptiste Assmann325137d2015-04-13 23:40:55 +02001/*
2 * Name server resolution
3 *
Willy Tarreau714f3452021-05-09 06:47:26 +02004 * Copyright 2020 HAProxy Technologies
Baptiste Assmann325137d2015-04-13 23:40:55 +02005 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 *
11 */
12
13#include <errno.h>
Baptiste Assmann325137d2015-04-13 23:40:55 +020014#include <stdio.h>
15#include <stdlib.h>
16#include <string.h>
17#include <unistd.h>
18
19#include <sys/types.h>
20
Willy Tarreau122eba92020-06-04 10:15:32 +020021#include <haproxy/action.h>
Willy Tarreau4c7e4b72020-05-27 12:58:42 +020022#include <haproxy/api.h>
Christopher Faulet6b0a0fb2022-04-04 11:29:28 +020023#include <haproxy/applet.h>
Willy Tarreau6be78492020-06-05 00:00:29 +020024#include <haproxy/cfgparse.h>
Willy Tarreauf1d32c42020-06-04 21:07:02 +020025#include <haproxy/channel.h>
Willy Tarreaub2551052020-06-09 09:07:15 +020026#include <haproxy/check.h>
Willy Tarreau83487a82020-06-04 20:19:54 +020027#include <haproxy/cli.h>
Christopher Faulet908628c2022-03-25 16:43:49 +010028#include <haproxy/conn_stream.h>
29#include <haproxy/cs_utils.h>
Willy Tarreau7c18b542020-06-11 09:23:02 +020030#include <haproxy/dgram.h>
Willy Tarreaueb92deb2020-06-04 10:53:16 +020031#include <haproxy/dns.h>
Willy Tarreau8d366972020-05-27 16:10:29 +020032#include <haproxy/errors.h>
Willy Tarreaub2551052020-06-09 09:07:15 +020033#include <haproxy/fd.h>
Willy Tarreauaeed4a82020-06-04 22:01:04 +020034#include <haproxy/log.h>
Emeric Brund26a6232021-01-04 13:32:20 +010035#include <haproxy/ring.h>
Emeric Brunfd647d52021-02-12 20:03:38 +010036#include <haproxy/stream.h>
Willy Tarreau9f9e9fc2021-05-08 13:09:46 +020037#include <haproxy/tools.h>
Baptiste Assmann325137d2015-04-13 23:40:55 +020038
Emeric Brund26a6232021-01-04 13:32:20 +010039static THREAD_LOCAL char *dns_msg_trash;
Baptiste Assmann325137d2015-04-13 23:40:55 +020040
Emeric Brunfd647d52021-02-12 20:03:38 +010041DECLARE_STATIC_POOL(dns_session_pool, "dns_session", sizeof(struct dns_session));
42DECLARE_STATIC_POOL(dns_query_pool, "dns_query", sizeof(struct dns_query));
43DECLARE_STATIC_POOL(dns_msg_buf, "dns_msg_buf", DNS_TCP_MSG_RING_MAX_SIZE);
44
Christopher Faulet67957bd2017-09-27 11:00:59 +020045/* Opens an UDP socket on the namesaver's IP/Port, if required. Returns 0 on
Christopher Faulet1e711be2021-03-04 16:58:35 +010046 * success, -1 otherwise. ns->dgram must be defined.
Baptiste Assmann325137d2015-04-13 23:40:55 +020047 */
Emeric Brund26a6232021-01-04 13:32:20 +010048static int dns_connect_nameserver(struct dns_nameserver *ns)
Baptiste Assmann325137d2015-04-13 23:40:55 +020049{
Christopher Faulet1e711be2021-03-04 16:58:35 +010050 struct dgram_conn *dgram = &ns->dgram->conn;
51 int fd;
Baptiste Assmann325137d2015-04-13 23:40:55 +020052
Christopher Faulet1e711be2021-03-04 16:58:35 +010053 /* Already connected */
54 if (dgram->t.sock.fd != -1)
Emeric Brun526b7922021-02-15 14:28:27 +010055 return 0;
Christopher Faulet1e711be2021-03-04 16:58:35 +010056
57 /* Create an UDP socket and connect it on the nameserver's IP/Port */
58 if ((fd = socket(dgram->addr.to.ss_family, SOCK_DGRAM, IPPROTO_UDP)) == -1) {
59 send_log(NULL, LOG_WARNING,
60 "DNS : section '%s': can't create socket for nameserver '%s'.\n",
61 ns->counters->pid, ns->id);
62 return -1;
63 }
64 if (connect(fd, (struct sockaddr*)&dgram->addr.to, get_addr_len(&dgram->addr.to)) == -1) {
65 send_log(NULL, LOG_WARNING,
66 "DNS : section '%s': can't connect socket for nameserver '%s'.\n",
67 ns->counters->id, ns->id);
68 close(fd);
69 return -1;
Emeric Brunc9437992021-02-12 19:42:55 +010070 }
Emeric Brun526b7922021-02-15 14:28:27 +010071
Christopher Faulet1e711be2021-03-04 16:58:35 +010072 /* Make the socket non blocking */
Willy Tarreau38247432022-04-26 10:24:14 +020073 fd_set_nonblock(fd);
Christopher Faulet1e711be2021-03-04 16:58:35 +010074
75 /* Add the fd in the fd list and update its parameters */
76 dgram->t.sock.fd = fd;
77 fd_insert(fd, dgram, dgram_fd_handler, MAX_THREADS_MASK);
78 fd_want_recv(fd);
79 return 0;
Baptiste Assmann325137d2015-04-13 23:40:55 +020080}
81
Emeric Brund26a6232021-01-04 13:32:20 +010082/* Sends a message to a name server
83 * It returns message length on success
84 * or -1 in error case
85 * 0 is returned in case of output ring buffer is full
86 */
87int dns_send_nameserver(struct dns_nameserver *ns, void *buf, size_t len)
88{
89 int ret = -1;
90
91 if (ns->dgram) {
92 struct dgram_conn *dgram = &ns->dgram->conn;
93 int fd = dgram->t.sock.fd;
94
95 if (dgram->t.sock.fd == -1) {
96 if (dns_connect_nameserver(ns) == -1)
97 return -1;
98 fd = dgram->t.sock.fd;
99 }
100
101 ret = send(fd, buf, len, 0);
102 if (ret < 0) {
Willy Tarreauacef5e22022-04-25 20:32:15 +0200103 if (errno == EAGAIN || errno == EWOULDBLOCK) {
Emeric Brund26a6232021-01-04 13:32:20 +0100104 struct ist myist;
105
Tim Duesterhus92c696e2021-02-28 16:11:36 +0100106 myist = ist2(buf, len);
Emeric Brund26a6232021-01-04 13:32:20 +0100107 ret = ring_write(ns->dgram->ring_req, DNS_TCP_MSG_MAX_SIZE, NULL, 0, &myist, 1);
108 if (!ret) {
109 ns->counters->snd_error++;
110 return -1;
111 }
112 fd_cant_send(fd);
113 return ret;
114 }
115 ns->counters->snd_error++;
116 fd_delete(fd);
Emeric Brund26a6232021-01-04 13:32:20 +0100117 dgram->t.sock.fd = -1;
118 return -1;
119 }
120 ns->counters->sent++;
121 }
Emeric Brunfd647d52021-02-12 20:03:38 +0100122 else if (ns->stream) {
123 struct ist myist;
124
Tim Duesterhus92c696e2021-02-28 16:11:36 +0100125 myist = ist2(buf, len);
Emeric Brunfd647d52021-02-12 20:03:38 +0100126 ret = ring_write(ns->stream->ring_req, DNS_TCP_MSG_MAX_SIZE, NULL, 0, &myist, 1);
127 if (!ret) {
128 ns->counters->snd_error++;
129 return -1;
130 }
131 task_wakeup(ns->stream->task_req, TASK_WOKEN_MSG);
132 return ret;
133 }
Emeric Brund26a6232021-01-04 13:32:20 +0100134
135 return ret;
136}
137
Emeric Brunfd647d52021-02-12 20:03:38 +0100138void dns_session_free(struct dns_session *);
139
Emeric Brund26a6232021-01-04 13:32:20 +0100140/* Receives a dns message
141 * Returns message length
142 * 0 is returned if no more message available
143 * -1 in error case
144 */
145ssize_t dns_recv_nameserver(struct dns_nameserver *ns, void *data, size_t size)
146{
147 ssize_t ret = -1;
148
149 if (ns->dgram) {
150 struct dgram_conn *dgram = &ns->dgram->conn;
151 int fd = dgram->t.sock.fd;
152
153 if (fd == -1)
154 return -1;
155
156 if ((ret = recv(fd, data, size, 0)) < 0) {
Willy Tarreauacef5e22022-04-25 20:32:15 +0200157 if (errno == EAGAIN || errno == EWOULDBLOCK) {
Emeric Brund26a6232021-01-04 13:32:20 +0100158 fd_cant_recv(fd);
159 return 0;
160 }
161 fd_delete(fd);
Emeric Brund26a6232021-01-04 13:32:20 +0100162 dgram->t.sock.fd = -1;
163 return -1;
164 }
165 }
Emeric Brunfd647d52021-02-12 20:03:38 +0100166 else if (ns->stream) {
167 struct dns_stream_server *dss = ns->stream;
168 struct dns_session *ds;
169
170 HA_SPIN_LOCK(DNS_LOCK, &dss->lock);
171
172 if (!LIST_ISEMPTY(&dss->wait_sess)) {
173 ds = LIST_NEXT(&dss->wait_sess, struct dns_session *, waiter);
Emeric Brunfd647d52021-02-12 20:03:38 +0100174 ret = ds->rx_msg.len < size ? ds->rx_msg.len : size;
175 memcpy(data, ds->rx_msg.area, ret);
176
177 ds->rx_msg.len = 0;
178
Willy Tarreaudde1b442021-10-21 14:33:38 +0200179 /* This barrier is here to ensure that all data is
180 * stored if the appctx detect the elem is out of the
181 * list.
182 */
183 __ha_barrier_store();
184
Emeric Brunfd647d52021-02-12 20:03:38 +0100185 LIST_DEL_INIT(&ds->waiter);
186
187 if (ds->appctx) {
Willy Tarreaudde1b442021-10-21 14:33:38 +0200188 /* This second barrier is here to ensure that
189 * the waked up appctx won't miss that the elem
190 * is removed from the list.
191 */
192 __ha_barrier_store();
193
Ilya Shipitsin0de36ad2021-02-20 00:23:36 +0500194 /* awake appctx because it may have other
Emeric Brunfd647d52021-02-12 20:03:38 +0100195 * message to receive
196 */
197 appctx_wakeup(ds->appctx);
198
199 /* dns_session could already be into free_sess list
200 * so we firstly remove it */
201 LIST_DEL_INIT(&ds->list);
202
203 /* decrease nb_queries to free a slot for a new query on that sess */
204 ds->nb_queries--;
205 if (ds->nb_queries) {
206 /* it remains pipelined unanswered request
207 * into this session but we just decrease
208 * the counter so the session
209 * can not be full of pipelined requests
210 * so we can add if to free_sess list
211 * to receive a new request
212 */
Willy Tarreau2b718102021-04-21 07:32:39 +0200213 LIST_INSERT(&ds->dss->free_sess, &ds->list);
Emeric Brunfd647d52021-02-12 20:03:38 +0100214 }
215 else {
216 /* there is no more pipelined requests
217 * into this session, so we move it
218 * to idle_sess list */
Willy Tarreau2b718102021-04-21 07:32:39 +0200219 LIST_INSERT(&ds->dss->idle_sess, &ds->list);
Emeric Brunfd647d52021-02-12 20:03:38 +0100220
221 /* update the counter of idle sessions */
222 ds->dss->idle_conns++;
223
224 /* Note: this is useless there to update
225 * the max_active_conns since we increase
226 * the idle count */
227 }
228 }
229 else {
230 /* there is no more appctx for this session
231 * it means it is ready to die
232 */
233 dns_session_free(ds);
234 }
235
236
237 }
238
239 HA_SPIN_UNLOCK(DNS_LOCK, &dss->lock);
240 }
Emeric Brund26a6232021-01-04 13:32:20 +0100241
242 return ret;
243}
244
245static void dns_resolve_recv(struct dgram_conn *dgram)
246{
247 struct dns_nameserver *ns;
248 int fd;
249
250 fd = dgram->t.sock.fd;
251
252 /* check if ready for reading */
253 if (!fd_recv_ready(fd))
254 return;
255
256 /* no need to go further if we can't retrieve the nameserver */
257 if ((ns = dgram->owner) == NULL) {
Willy Tarreauf5090652021-04-06 17:23:40 +0200258 _HA_ATOMIC_AND(&fdtab[fd].state, ~(FD_POLL_HUP|FD_POLL_ERR));
Emeric Brund26a6232021-01-04 13:32:20 +0100259 fd_stop_recv(fd);
260 return;
261 }
262
263 ns->process_responses(ns);
264}
265
266/* Called when a dns network socket is ready to send data */
267static void dns_resolve_send(struct dgram_conn *dgram)
268{
269 int fd;
270 struct dns_nameserver *ns;
271 struct ring *ring;
272 struct buffer *buf;
273 uint64_t msg_len;
274 size_t len, cnt, ofs;
275
276 fd = dgram->t.sock.fd;
277
278 /* check if ready for sending */
279 if (!fd_send_ready(fd))
280 return;
281
282 /* no need to go further if we can't retrieve the nameserver */
283 if ((ns = dgram->owner) == NULL) {
Willy Tarreauf5090652021-04-06 17:23:40 +0200284 _HA_ATOMIC_AND(&fdtab[fd].state, ~(FD_POLL_HUP|FD_POLL_ERR));
Emeric Brund26a6232021-01-04 13:32:20 +0100285 fd_stop_send(fd);
286 return;
287 }
288
289 ring = ns->dgram->ring_req;
290 buf = &ring->buf;
291
292 HA_RWLOCK_RDLOCK(DNS_LOCK, &ring->lock);
293 ofs = ns->dgram->ofs_req;
294
295 /* explanation for the initialization below: it would be better to do
296 * this in the parsing function but this would occasionally result in
297 * dropped events because we'd take a reference on the oldest message
298 * and keep it while being scheduled. Thus instead let's take it the
299 * first time we enter here so that we have a chance to pass many
300 * existing messages before grabbing a reference to a location. This
301 * value cannot be produced after initialization.
302 */
303 if (unlikely(ofs == ~0)) {
304 ofs = 0;
Willy Tarreau4781b152021-04-06 13:53:36 +0200305 HA_ATOMIC_INC(b_peek(buf, ofs));
Emeric Brund26a6232021-01-04 13:32:20 +0100306 ofs += ring->ofs;
307 }
308
309 /* we were already there, adjust the offset to be relative to
310 * the buffer's head and remove us from the counter.
311 */
312 ofs -= ring->ofs;
313 BUG_ON(ofs >= buf->size);
Willy Tarreau4781b152021-04-06 13:53:36 +0200314 HA_ATOMIC_DEC(b_peek(buf, ofs));
Emeric Brund26a6232021-01-04 13:32:20 +0100315
316 while (ofs + 1 < b_data(buf)) {
317 int ret;
318
319 cnt = 1;
320 len = b_peek_varint(buf, ofs + cnt, &msg_len);
321 if (!len)
322 break;
323 cnt += len;
324 BUG_ON(msg_len + ofs + cnt + 1 > b_data(buf));
325 if (unlikely(msg_len > DNS_TCP_MSG_MAX_SIZE)) {
326 /* too large a message to ever fit, let's skip it */
327 ofs += cnt + msg_len;
328 continue;
329 }
330
331 len = b_getblk(buf, dns_msg_trash, msg_len, ofs + cnt);
332
333 ret = send(fd, dns_msg_trash, len, 0);
334 if (ret < 0) {
Willy Tarreauacef5e22022-04-25 20:32:15 +0200335 if (errno == EAGAIN || errno == EWOULDBLOCK) {
Emeric Brund26a6232021-01-04 13:32:20 +0100336 fd_cant_send(fd);
337 goto out;
338 }
339 ns->counters->snd_error++;
340 fd_delete(fd);
Emeric Brund26a6232021-01-04 13:32:20 +0100341 fd = dgram->t.sock.fd = -1;
342 goto out;
343 }
344 ns->counters->sent++;
345
346 ofs += cnt + len;
347 }
348
349 /* we don't want/need to be waked up any more for sending
350 * because all ring content is sent */
351 fd_stop_send(fd);
352
353out:
354
Willy Tarreau4781b152021-04-06 13:53:36 +0200355 HA_ATOMIC_INC(b_peek(buf, ofs));
Emeric Brund26a6232021-01-04 13:32:20 +0100356 ofs += ring->ofs;
357 ns->dgram->ofs_req = ofs;
358 HA_RWLOCK_RDUNLOCK(DNS_LOCK, &ring->lock);
359
360}
361
Emeric Brunc9437992021-02-12 19:42:55 +0100362/* proto_udp callback functions for a DNS resolution */
363struct dgram_data_cb dns_dgram_cb = {
364 .recv = dns_resolve_recv,
365 .send = dns_resolve_send,
366};
Baptiste Assmann325137d2015-04-13 23:40:55 +0200367
Emeric Brunc9437992021-02-12 19:42:55 +0100368int dns_dgram_init(struct dns_nameserver *ns, struct sockaddr_storage *sk)
Baptiste Assmann325137d2015-04-13 23:40:55 +0200369{
Emeric Brunc9437992021-02-12 19:42:55 +0100370 struct dns_dgram_server *dgram;
Baptiste Assmann201c07f2017-05-22 15:17:15 +0200371
Emeric Brunc9437992021-02-12 19:42:55 +0100372 if ((dgram = calloc(1, sizeof(*dgram))) == NULL)
Christopher Faulet67957bd2017-09-27 11:00:59 +0200373 return -1;
Baptiste Assmannc1ce5f32016-05-14 11:26:22 +0200374
Emeric Brunc9437992021-02-12 19:42:55 +0100375 /* Leave dgram partially initialized, no FD attached for
376 * now. */
377 dgram->conn.owner = ns;
378 dgram->conn.data = &dns_dgram_cb;
379 dgram->conn.t.sock.fd = -1;
380 dgram->conn.addr.to = *sk;
381 ns->dgram = dgram;
Baptiste Assmannc1ce5f32016-05-14 11:26:22 +0200382
Emeric Brunc9437992021-02-12 19:42:55 +0100383 dgram->ofs_req = ~0; /* init ring offset */
384 dgram->ring_req = ring_new(2*DNS_TCP_MSG_RING_MAX_SIZE);
385 if (!dgram->ring_req) {
386 ha_alert("memory allocation error initializing the ring for nameserver.\n");
387 goto out;
Baptiste Assmannc1ce5f32016-05-14 11:26:22 +0200388 }
389
Emeric Brunc9437992021-02-12 19:42:55 +0100390 /* attach the task as reader */
391 if (!ring_attach(dgram->ring_req)) {
392 /* mark server attached to the ring */
393 ha_alert("nameserver sets too many watchers > 255 on ring. This is a bug and should not happen.\n");
394 goto out;
Baptiste Assmannc1ce5f32016-05-14 11:26:22 +0200395 }
Baptiste Assmannc1ce5f32016-05-14 11:26:22 +0200396 return 0;
Emeric Brunc9437992021-02-12 19:42:55 +0100397out:
398 if (dgram->ring_req)
399 ring_free(dgram->ring_req);
Christopher Fauletd6c6b5f2020-09-08 10:27:24 +0200400
Emeric Brunc9437992021-02-12 19:42:55 +0100401 free(dgram);
Olivier Houchard2ec2db92018-01-08 16:28:57 +0100402
Emeric Brunfd647d52021-02-12 20:03:38 +0100403 return -1;
404}
405
406/*
407 * IO Handler to handle message push to dns tcp server
408 */
409static void dns_session_io_handler(struct appctx *appctx)
410{
Christopher Faulet908628c2022-03-25 16:43:49 +0100411 struct conn_stream *cs = appctx->owner;
Emeric Brunfd647d52021-02-12 20:03:38 +0100412 struct dns_session *ds = appctx->ctx.sft.ptr;
413 struct ring *ring = &ds->ring;
414 struct buffer *buf = &ring->buf;
415 uint64_t msg_len;
416 int available_room;
417 size_t len, cnt, ofs;
418 int ret = 0;
419
420 /* if stopping was requested, close immediately */
421 if (unlikely(stopping))
422 goto close;
423
424 /* we want to be sure to not miss that we have been awaked for a shutdown */
425 __ha_barrier_load();
426
427 /* that means the connection was requested to shutdown
428 * for instance idle expire */
429 if (ds->shutdown)
430 goto close;
431
432 /* an error was detected */
Christopher Faulet908628c2022-03-25 16:43:49 +0100433 if (unlikely(cs_ic(cs)->flags & (CF_WRITE_ERROR|CF_SHUTW)))
Emeric Brunfd647d52021-02-12 20:03:38 +0100434 goto close;
435
436 /* con closed by server side, we will skip data write and drain data from channel */
Christopher Faulet908628c2022-03-25 16:43:49 +0100437 if ((cs_oc(cs)->flags & CF_SHUTW)) {
Emeric Brunfd647d52021-02-12 20:03:38 +0100438 goto read;
439 }
440
441 /* if the connection is not established, inform the stream that we want
442 * to be notified whenever the connection completes.
443 */
Christopher Faulet62e75742022-03-31 09:16:34 +0200444 if (cs_opposite(cs)->state < CS_ST_EST) {
Christopher Fauleta0bdec32022-04-04 07:51:21 +0200445 cs_cant_get(cs);
446 cs_rx_conn_blk(cs);
447 cs_rx_endp_more(cs);
Emeric Brunfd647d52021-02-12 20:03:38 +0100448 return;
449 }
450
451
452 ofs = ds->ofs;
453
454 HA_RWLOCK_WRLOCK(DNS_LOCK, &ring->lock);
455 LIST_DEL_INIT(&appctx->wait_entry);
456 HA_RWLOCK_WRUNLOCK(DNS_LOCK, &ring->lock);
457
458 HA_RWLOCK_RDLOCK(DNS_LOCK, &ring->lock);
459
460 /* explanation for the initialization below: it would be better to do
461 * this in the parsing function but this would occasionally result in
462 * dropped events because we'd take a reference on the oldest message
463 * and keep it while being scheduled. Thus instead let's take it the
464 * first time we enter here so that we have a chance to pass many
465 * existing messages before grabbing a reference to a location. This
466 * value cannot be produced after initialization.
467 */
468 if (unlikely(ofs == ~0)) {
469 ofs = 0;
470
Willy Tarreau4781b152021-04-06 13:53:36 +0200471 HA_ATOMIC_INC(b_peek(buf, ofs));
Emeric Brunfd647d52021-02-12 20:03:38 +0100472 ofs += ring->ofs;
473 }
474
475 /* in this loop, ofs always points to the counter byte that precedes
476 * the message so that we can take our reference there if we have to
477 * stop before the end (ret=0).
478 */
Christopher Faulet62e75742022-03-31 09:16:34 +0200479 if (cs_opposite(cs)->state == CS_ST_EST) {
Emeric Brunfd647d52021-02-12 20:03:38 +0100480 /* we were already there, adjust the offset to be relative to
481 * the buffer's head and remove us from the counter.
482 */
483 ofs -= ring->ofs;
484 BUG_ON(ofs >= buf->size);
Willy Tarreau4781b152021-04-06 13:53:36 +0200485 HA_ATOMIC_DEC(b_peek(buf, ofs));
Emeric Brunfd647d52021-02-12 20:03:38 +0100486
487 ret = 1;
488 while (ofs + 1 < b_data(buf)) {
489 struct dns_query *query;
490 uint16_t original_qid;
491 uint16_t new_qid;
492
493 cnt = 1;
494 len = b_peek_varint(buf, ofs + cnt, &msg_len);
495 if (!len)
496 break;
497 cnt += len;
498 BUG_ON(msg_len + ofs + cnt + 1 > b_data(buf));
499
500 /* retrieve available room on output channel */
Christopher Faulet908628c2022-03-25 16:43:49 +0100501 available_room = channel_recv_max(cs_ic(cs));
Emeric Brunfd647d52021-02-12 20:03:38 +0100502
503 /* tx_msg_offset null means we are at the start of a new message */
504 if (!ds->tx_msg_offset) {
505 uint16_t slen;
506
507 /* check if there is enough room to put message len and query id */
508 if (available_room < sizeof(slen) + sizeof(new_qid)) {
Christopher Fauleta0bdec32022-04-04 07:51:21 +0200509 cs_rx_room_blk(cs);
Emeric Brunfd647d52021-02-12 20:03:38 +0100510 ret = 0;
511 break;
512 }
513
514 /* put msg len into then channel */
515 slen = (uint16_t)msg_len;
516 slen = htons(slen);
Christopher Faulet908628c2022-03-25 16:43:49 +0100517 ci_putblk(cs_ic(cs), (char *)&slen, sizeof(slen));
Emeric Brunfd647d52021-02-12 20:03:38 +0100518 available_room -= sizeof(slen);
519
520 /* backup original query id */
521 len = b_getblk(buf, (char *)&original_qid, sizeof(original_qid), ofs + cnt);
Emeric Brun538bb042021-02-15 13:58:06 +0100522 if (!len) {
523 /* should never happen since messages are atomically
524 * written into ring
525 */
526 ret = 0;
527 break;
528 }
Emeric Brunfd647d52021-02-12 20:03:38 +0100529
530 /* generates new query id */
531 new_qid = ++ds->query_counter;
532 new_qid = htons(new_qid);
533
534 /* put new query id into the channel */
Christopher Faulet908628c2022-03-25 16:43:49 +0100535 ci_putblk(cs_ic(cs), (char *)&new_qid, sizeof(new_qid));
Emeric Brunfd647d52021-02-12 20:03:38 +0100536 available_room -= sizeof(new_qid);
537
538 /* keep query id mapping */
539
540 query = pool_alloc(dns_query_pool);
541 if (query) {
542 query->qid.key = new_qid;
543 query->original_qid = original_qid;
544 query->expire = tick_add(now_ms, 5000);
545 LIST_INIT(&query->list);
546 if (LIST_ISEMPTY(&ds->queries)) {
547 /* enable task to handle expire */
548 ds->task_exp->expire = query->expire;
549 /* ensure this will be executed by the same
550 * thread than ds_session_release
551 * to ensure session_release is free
552 * to destroy the task */
553 task_queue(ds->task_exp);
554 }
Willy Tarreau2b718102021-04-21 07:32:39 +0200555 LIST_APPEND(&ds->queries, &query->list);
Emeric Brunfd647d52021-02-12 20:03:38 +0100556 eb32_insert(&ds->query_ids, &query->qid);
557 ds->onfly_queries++;
558 }
559
560 /* update the tx_offset to handle output in 16k streams */
561 ds->tx_msg_offset = sizeof(original_qid);
562
563 }
564
565 /* check if it remains available room on output chan */
566 if (unlikely(!available_room)) {
Christopher Fauleta0bdec32022-04-04 07:51:21 +0200567 cs_rx_room_blk(cs);
Emeric Brunfd647d52021-02-12 20:03:38 +0100568 ret = 0;
569 break;
570 }
571
572 chunk_reset(&trash);
573 if ((msg_len - ds->tx_msg_offset) > available_room) {
574 /* remaining msg data is too large to be written in output channel at one time */
575
576 len = b_getblk(buf, trash.area, available_room, ofs + cnt + ds->tx_msg_offset);
577
578 /* update offset to complete mesg forwarding later */
579 ds->tx_msg_offset += len;
580 }
581 else {
582 /* remaining msg data can be written in output channel at one time */
583 len = b_getblk(buf, trash.area, msg_len - ds->tx_msg_offset, ofs + cnt + ds->tx_msg_offset);
584
585 /* reset tx_msg_offset to mark forward fully processed */
586 ds->tx_msg_offset = 0;
587 }
588 trash.data += len;
589
Christopher Faulet908628c2022-03-25 16:43:49 +0100590 if (ci_putchk(cs_ic(cs), &trash) == -1) {
Emeric Brun743afee2021-02-15 14:12:06 +0100591 /* should never happen since we
592 * check available_room is large
Ilya Shipitsin0de36ad2021-02-20 00:23:36 +0500593 * enough here.
Emeric Brun743afee2021-02-15 14:12:06 +0100594 */
Christopher Fauleta0bdec32022-04-04 07:51:21 +0200595 cs_rx_room_blk(cs);
Emeric Brun743afee2021-02-15 14:12:06 +0100596 ret = 0;
597 break;
598 }
Emeric Brunfd647d52021-02-12 20:03:38 +0100599
600 if (ds->tx_msg_offset) {
601 /* msg was not fully processed, we must be awake to drain pending data */
602
Christopher Fauleta0bdec32022-04-04 07:51:21 +0200603 cs_rx_room_blk(cs);
Emeric Brunfd647d52021-02-12 20:03:38 +0100604 ret = 0;
605 break;
606 }
607 /* switch to next message */
608 ofs += cnt + msg_len;
609 }
610
Willy Tarreau4781b152021-04-06 13:53:36 +0200611 HA_ATOMIC_INC(b_peek(buf, ofs));
Emeric Brunfd647d52021-02-12 20:03:38 +0100612 ofs += ring->ofs;
613 ds->ofs = ofs;
614 }
615 HA_RWLOCK_RDUNLOCK(DNS_LOCK, &ring->lock);
616
617 if (ret) {
618 /* let's be woken up once new request to write arrived */
619 HA_RWLOCK_WRLOCK(DNS_LOCK, &ring->lock);
Willy Tarreau62e467c2021-10-20 11:02:13 +0200620 BUG_ON(LIST_INLIST(&appctx->wait_entry));
Willy Tarreau2b718102021-04-21 07:32:39 +0200621 LIST_APPEND(&ring->waiters, &appctx->wait_entry);
Emeric Brunfd647d52021-02-12 20:03:38 +0100622 HA_RWLOCK_WRUNLOCK(DNS_LOCK, &ring->lock);
Christopher Fauleta0bdec32022-04-04 07:51:21 +0200623 cs_rx_endp_done(cs);
Emeric Brunfd647d52021-02-12 20:03:38 +0100624 }
625
626read:
627
Ilya Shipitsin0de36ad2021-02-20 00:23:36 +0500628 /* if session is not a waiter it means there is no committed
Emeric Brunfd647d52021-02-12 20:03:38 +0100629 * message into rx_buf and we are free to use it
630 * Note: we need a load barrier here to not miss the
631 * delete from the list
632 */
Emeric Brun70455902021-10-20 10:49:53 +0200633
Willy Tarreaudde1b442021-10-21 14:33:38 +0200634 __ha_barrier_load();
635 if (!LIST_INLIST_ATOMIC(&ds->waiter)) {
Emeric Brunfd647d52021-02-12 20:03:38 +0100636 while (1) {
637 uint16_t query_id;
638 struct eb32_node *eb;
639 struct dns_query *query;
640
641 if (!ds->rx_msg.len) {
642 /* next message len is not fully available into the channel */
Christopher Faulet908628c2022-03-25 16:43:49 +0100643 if (co_data(cs_oc(cs)) < 2)
Emeric Brunfd647d52021-02-12 20:03:38 +0100644 break;
645
646 /* retrieve message len */
Christopher Faulet908628c2022-03-25 16:43:49 +0100647 co_getblk(cs_oc(cs), (char *)&msg_len, 2, 0);
Emeric Brunfd647d52021-02-12 20:03:38 +0100648
649 /* mark as consumed */
Christopher Faulet908628c2022-03-25 16:43:49 +0100650 co_skip(cs_oc(cs), 2);
Emeric Brunfd647d52021-02-12 20:03:38 +0100651
652 /* store message len */
653 ds->rx_msg.len = ntohs(msg_len);
654 }
655
Christopher Faulet908628c2022-03-25 16:43:49 +0100656 if (!co_data(cs_oc(cs))) {
Emeric Brunfd647d52021-02-12 20:03:38 +0100657 /* we need more data but nothing is available */
658 break;
659 }
660
Christopher Faulet908628c2022-03-25 16:43:49 +0100661 if (co_data(cs_oc(cs)) + ds->rx_msg.offset < ds->rx_msg.len) {
Emeric Brunfd647d52021-02-12 20:03:38 +0100662 /* message only partially available */
663
664 /* read available data */
Christopher Faulet908628c2022-03-25 16:43:49 +0100665 co_getblk(cs_oc(cs), ds->rx_msg.area + ds->rx_msg.offset, co_data(cs_oc(cs)), 0);
Emeric Brunfd647d52021-02-12 20:03:38 +0100666
667 /* update message offset */
Christopher Faulet908628c2022-03-25 16:43:49 +0100668 ds->rx_msg.offset += co_data(cs_oc(cs));
Emeric Brunfd647d52021-02-12 20:03:38 +0100669
670 /* consume all pending data from the channel */
Christopher Faulet908628c2022-03-25 16:43:49 +0100671 co_skip(cs_oc(cs), co_data(cs_oc(cs)));
Emeric Brunfd647d52021-02-12 20:03:38 +0100672
673 /* we need to wait for more data */
674 break;
675 }
676
Ilya Shipitsin0de36ad2021-02-20 00:23:36 +0500677 /* enough data is available into the channel to read the message until the end */
Emeric Brunfd647d52021-02-12 20:03:38 +0100678
679 /* read from the channel until the end of the message */
Christopher Faulet908628c2022-03-25 16:43:49 +0100680 co_getblk(cs_oc(cs), ds->rx_msg.area + ds->rx_msg.offset, ds->rx_msg.len - ds->rx_msg.offset, 0);
Emeric Brunfd647d52021-02-12 20:03:38 +0100681
682 /* consume all data until the end of the message from the channel */
Christopher Faulet908628c2022-03-25 16:43:49 +0100683 co_skip(cs_oc(cs), ds->rx_msg.len - ds->rx_msg.offset);
Emeric Brunfd647d52021-02-12 20:03:38 +0100684
685 /* reset reader offset to 0 for next message reand */
686 ds->rx_msg.offset = 0;
687
688 /* try remap query id to original */
689 memcpy(&query_id, ds->rx_msg.area, sizeof(query_id));
690 eb = eb32_lookup(&ds->query_ids, query_id);
691 if (!eb) {
692 /* query id not found means we have an unknown corresponding
693 * request, perhaps server's bug or or the query reached
694 * timeout
695 */
696 ds->rx_msg.len = 0;
697 continue;
698 }
699
700 /* re-map the original query id set by the requester */
701 query = eb32_entry(eb, struct dns_query, qid);
702 memcpy(ds->rx_msg.area, &query->original_qid, sizeof(query->original_qid));
703
704 /* remove query ids mapping from pending queries list/tree */
705 eb32_delete(&query->qid);
Willy Tarreau2b718102021-04-21 07:32:39 +0200706 LIST_DELETE(&query->list);
Emeric Brunfd647d52021-02-12 20:03:38 +0100707 pool_free(dns_query_pool, query);
708 ds->onfly_queries--;
709
Emeric Brunfd647d52021-02-12 20:03:38 +0100710 /* the dns_session is also added in queue of the
711 * wait_sess list where the task processing
712 * response will pop available responses
713 */
Willy Tarreaudde1b442021-10-21 14:33:38 +0200714 HA_SPIN_LOCK(DNS_LOCK, &ds->dss->lock);
715
Willy Tarreau62e467c2021-10-20 11:02:13 +0200716 BUG_ON(LIST_INLIST(&ds->waiter));
Willy Tarreau2b718102021-04-21 07:32:39 +0200717 LIST_APPEND(&ds->dss->wait_sess, &ds->waiter);
Emeric Brunfd647d52021-02-12 20:03:38 +0100718
Willy Tarreaudde1b442021-10-21 14:33:38 +0200719 HA_SPIN_UNLOCK(DNS_LOCK, &ds->dss->lock);
720
Emeric Brunfd647d52021-02-12 20:03:38 +0100721 /* awake the task processing the responses */
722 task_wakeup(ds->dss->task_rsp, TASK_WOKEN_INIT);
723
724 break;
725 }
726
Willy Tarreau2b718102021-04-21 07:32:39 +0200727 if (!LIST_INLIST(&ds->waiter)) {
Emeric Brunfd647d52021-02-12 20:03:38 +0100728 /* there is no more pending data to read and the con was closed by the server side */
Christopher Faulet908628c2022-03-25 16:43:49 +0100729 if (!co_data(cs_oc(cs)) && (cs_oc(cs)->flags & CF_SHUTW)) {
Emeric Brunfd647d52021-02-12 20:03:38 +0100730 goto close;
731 }
732 }
733
734 }
735
Emeric Brunfd647d52021-02-12 20:03:38 +0100736 return;
737close:
Christopher Fauletda098e62022-03-31 17:44:45 +0200738 cs_shutw(cs);
739 cs_shutr(cs);
Christopher Faulet908628c2022-03-25 16:43:49 +0100740 cs_ic(cs)->flags |= CF_READ_NULL;
Emeric Brunfd647d52021-02-12 20:03:38 +0100741}
742
743void dns_queries_flush(struct dns_session *ds)
744{
745 struct dns_query *query, *queryb;
746
747 list_for_each_entry_safe(query, queryb, &ds->queries, list) {
748 eb32_delete(&query->qid);
Willy Tarreau2b718102021-04-21 07:32:39 +0200749 LIST_DELETE(&query->list);
Emeric Brunfd647d52021-02-12 20:03:38 +0100750 pool_free(dns_query_pool, query);
751 }
752}
753
754void dns_session_free(struct dns_session *ds)
755{
756 if (ds->rx_msg.area)
757 pool_free(dns_msg_buf, ds->rx_msg.area);
758 if (ds->tx_ring_area)
759 pool_free(dns_msg_buf, ds->tx_ring_area);
760 if (ds->task_exp)
761 task_destroy(ds->task_exp);
762
763 dns_queries_flush(ds);
764
Emeric Brund20dc212021-10-19 15:40:10 +0200765 /* Ensure to remove this session from external lists
766 * Note: we are under the lock of dns_stream_server
767 * which own the heads of those lists.
768 */
769 LIST_DEL_INIT(&ds->waiter);
770 LIST_DEL_INIT(&ds->list);
771
Emeric Brunfd647d52021-02-12 20:03:38 +0100772 ds->dss->cur_conns--;
773 /* Note: this is useless to update
774 * max_active_conns here because
775 * we decrease the value
776 */
Willy Tarreau62e467c2021-10-20 11:02:13 +0200777
778 BUG_ON(!LIST_ISEMPTY(&ds->list));
779 BUG_ON(!LIST_ISEMPTY(&ds->waiter));
780 BUG_ON(!LIST_ISEMPTY(&ds->queries));
781 BUG_ON(!LIST_ISEMPTY(&ds->ring.waiters));
782 BUG_ON(!eb_is_empty(&ds->query_ids));
Emeric Brunfd647d52021-02-12 20:03:38 +0100783 pool_free(dns_session_pool, ds);
784}
785
786static struct appctx *dns_session_create(struct dns_session *ds);
787
788/*
789 * Function to release a DNS tcp session
790 */
791static void dns_session_release(struct appctx *appctx)
792{
793 struct dns_session *ds = appctx->ctx.sft.ptr;
Willy Tarreaue3e648c2021-02-24 17:38:46 +0100794 struct dns_stream_server *dss __maybe_unused;
Emeric Brunfd647d52021-02-12 20:03:38 +0100795
796 if (!ds)
797 return;
798
Willy Tarreaub56a8782021-10-20 14:38:43 +0200799 /* We do not call ring_appctx_detach here
800 * because we want to keep readers counters
801 * to retry a conn with a different appctx.
802 */
803 HA_RWLOCK_WRLOCK(DNS_LOCK, &ds->ring.lock);
804 LIST_DEL_INIT(&appctx->wait_entry);
805 HA_RWLOCK_WRUNLOCK(DNS_LOCK, &ds->ring.lock);
806
Emeric Brunfd647d52021-02-12 20:03:38 +0100807 dss = ds->dss;
808
809 HA_SPIN_LOCK(DNS_LOCK, &dss->lock);
810 LIST_DEL_INIT(&ds->list);
811
812 if (stopping) {
813 dns_session_free(ds);
814 HA_SPIN_UNLOCK(DNS_LOCK, &dss->lock);
815 return;
816 }
817
818 if (!ds->nb_queries) {
819 /* this is an idle session */
820 /* Note: this is useless to update max_active_sess
821 * here because we decrease idle_conns but
822 * dns_session_free decrease curconns
823 */
824
825 ds->dss->idle_conns--;
826 dns_session_free(ds);
827 HA_SPIN_UNLOCK(DNS_LOCK, &dss->lock);
828 return;
829 }
830
831 if (ds->onfly_queries == ds->nb_queries) {
832 /* the session can be released because
833 * it means that all queries AND
834 * responses are in fly */
835 dns_session_free(ds);
836 HA_SPIN_UNLOCK(DNS_LOCK, &dss->lock);
837 return;
838 }
839
Emeric Brunfd647d52021-02-12 20:03:38 +0100840 /* if there is no pending complete response
841 * message, ensure to reset
842 * message offsets if the session
843 * was closed with an incomplete pending response
844 */
Willy Tarreau2b718102021-04-21 07:32:39 +0200845 if (!LIST_INLIST(&ds->waiter))
Emeric Brunfd647d52021-02-12 20:03:38 +0100846 ds->rx_msg.len = ds->rx_msg.offset = 0;
847
848 /* we flush pending sent queries because we never
849 * have responses
850 */
851 ds->nb_queries -= ds->onfly_queries;
852 dns_queries_flush(ds);
853
854 /* reset offset to be sure to start from message start */
855 ds->tx_msg_offset = 0;
856
857 /* here the ofs and the attached counter
858 * are kept unchanged
859 */
860
861 /* Create a new appctx, We hope we can
862 * create from the release callback! */
863 ds->appctx = dns_session_create(ds);
864 if (!ds->appctx) {
865 dns_session_free(ds);
866 HA_SPIN_UNLOCK(DNS_LOCK, &dss->lock);
867 return;
868 }
869
870 if (ds->nb_queries < DNS_STREAM_MAX_PIPELINED_REQ)
Willy Tarreau2b718102021-04-21 07:32:39 +0200871 LIST_INSERT(&ds->dss->free_sess, &ds->list);
Emeric Brunfd647d52021-02-12 20:03:38 +0100872
873 HA_SPIN_UNLOCK(DNS_LOCK, &dss->lock);
874}
875
876/* DNS tcp session applet */
877static struct applet dns_session_applet = {
878 .obj_type = OBJ_TYPE_APPLET,
879 .name = "<STRMDNS>", /* used for logging */
880 .fct = dns_session_io_handler,
881 .release = dns_session_release,
882};
883
884/*
885 * Function used to create an appctx for a DNS session
886 */
887static struct appctx *dns_session_create(struct dns_session *ds)
888{
889 struct appctx *appctx;
890 struct session *sess;
Christopher Faulet13a35e52021-12-20 15:34:16 +0100891 struct conn_stream *cs;
Emeric Brunfd647d52021-02-12 20:03:38 +0100892 struct stream *s;
893 struct applet *applet = &dns_session_applet;
Christopher Fauleta9e8b392022-03-23 11:01:09 +0100894 struct sockaddr_storage *addr = NULL;
Emeric Brunfd647d52021-02-12 20:03:38 +0100895
Christopher Faulet9ec2f4d2022-03-23 15:15:29 +0100896 appctx = appctx_new(applet, NULL);
Christopher Faulet2479e5f2022-01-19 14:50:11 +0100897 if (!appctx)
Christopher Fauleta9e8b392022-03-23 11:01:09 +0100898 goto out_close;
Emeric Brunfd647d52021-02-12 20:03:38 +0100899 appctx->ctx.sft.ptr = (void *)ds;
900
901 sess = session_new(ds->dss->srv->proxy, NULL, &appctx->obj_type);
902 if (!sess) {
Christopher Faulet13a35e52021-12-20 15:34:16 +0100903 ha_alert("out of memory in dns_session_create().\n");
Emeric Brunfd647d52021-02-12 20:03:38 +0100904 goto out_free_appctx;
905 }
906
Christopher Fauleta9e8b392022-03-23 11:01:09 +0100907 if (!sockaddr_alloc(&addr, &ds->dss->srv->addr, sizeof(ds->dss->srv->addr)))
Christopher Faulet2479e5f2022-01-19 14:50:11 +0100908 goto out_free_sess;
Christopher Fauleta9e8b392022-03-23 11:01:09 +0100909
Christopher Faulet9ec2f4d2022-03-23 15:15:29 +0100910 cs = cs_new_from_applet(appctx->endp, sess, &BUF_NULL);
Christopher Fauleta9e8b392022-03-23 11:01:09 +0100911 if (!cs) {
912 ha_alert("Failed to initialize stream in dns_session_create().\n");
Christopher Fauleta9e8b392022-03-23 11:01:09 +0100913 goto out_free_addr;
Christopher Faulet13a35e52021-12-20 15:34:16 +0100914 }
915
Christopher Fauleta9e8b392022-03-23 11:01:09 +0100916 s = DISGUISE(cs_strm(cs));
Christopher Faulet8da67aa2022-03-29 17:53:09 +0200917 s->csb->dst = addr;
Christopher Faulet8abe7122022-03-30 15:10:18 +0200918 s->csb->flags |= CS_FL_NOLINGER;
Emeric Brunfd647d52021-02-12 20:03:38 +0100919 s->target = &ds->dss->srv->obj_type;
Emeric Brunfd647d52021-02-12 20:03:38 +0100920 s->flags = SF_ASSIGNED|SF_ADDR_SET;
Emeric Brunfd647d52021-02-12 20:03:38 +0100921
922 s->do_log = NULL;
923 s->uniq_id = 0;
924
925 s->res.flags |= CF_READ_DONTWAIT;
926 /* for rto and rex to eternity to not expire on idle recv:
927 * We are using a syslog server.
928 */
929 s->res.rto = TICK_ETERNITY;
930 s->res.rex = TICK_ETERNITY;
931 ds->appctx = appctx;
Emeric Brunfd647d52021-02-12 20:03:38 +0100932 return appctx;
933
934 /* Error unrolling */
Christopher Fauleta9e8b392022-03-23 11:01:09 +0100935 out_free_addr:
936 sockaddr_free(&addr);
Emeric Brunfd647d52021-02-12 20:03:38 +0100937 out_free_sess:
938 session_free(sess);
939 out_free_appctx:
940 appctx_free(appctx);
941 out_close:
942 return NULL;
943}
944
945/* Task processing expiration of unresponded queries, this one is supposed
946 * to be stuck on the same thread than the appctx handler
947 */
Willy Tarreau144f84a2021-03-02 16:09:26 +0100948static struct task *dns_process_query_exp(struct task *t, void *context, unsigned int state)
Emeric Brunfd647d52021-02-12 20:03:38 +0100949{
950 struct dns_session *ds = (struct dns_session *)context;
951 struct dns_query *query, *queryb;
952
953 t->expire = TICK_ETERNITY;
954
955 list_for_each_entry_safe(query, queryb, &ds->queries, list) {
956 if (tick_is_expired(query->expire, now_ms)) {
957 eb32_delete(&query->qid);
Willy Tarreau2b718102021-04-21 07:32:39 +0200958 LIST_DELETE(&query->list);
Emeric Brunfd647d52021-02-12 20:03:38 +0100959 pool_free(dns_query_pool, query);
960 ds->onfly_queries--;
961 }
962 else {
963 t->expire = query->expire;
964 break;
965 }
966 }
967
968 return t;
969}
970
971/* Task processing expiration of idle sessions */
Willy Tarreau144f84a2021-03-02 16:09:26 +0100972static struct task *dns_process_idle_exp(struct task *t, void *context, unsigned int state)
Emeric Brunfd647d52021-02-12 20:03:38 +0100973{
974 struct dns_stream_server *dss = (struct dns_stream_server *)context;
975 struct dns_session *ds, *dsb;
976 int target = 0;
977 int cur_active_conns;
978
979 HA_SPIN_LOCK(DNS_LOCK, &dss->lock);
980
981
982 cur_active_conns = dss->cur_conns - dss->idle_conns;
983 if (cur_active_conns > dss->max_active_conns)
984 dss->max_active_conns = cur_active_conns;
985
986 target = (dss->max_active_conns - cur_active_conns) / 2;
987 list_for_each_entry_safe(ds, dsb, &dss->idle_sess, list) {
988 if (!target)
989 break;
990
991 /* remove conn to pending list to ensure it won't be reused */
992 LIST_DEL_INIT(&ds->list);
993
994 /* force session shutdown */
995 ds->shutdown = 1;
996
Ilya Shipitsin0de36ad2021-02-20 00:23:36 +0500997 /* to be sure that the appctx won't miss shutdown */
Emeric Brunfd647d52021-02-12 20:03:38 +0100998 __ha_barrier_store();
999
1000 /* wake appctx to perform the shutdown */
1001 appctx_wakeup(ds->appctx);
1002 }
1003
1004 /* reset max to current active conns */
1005 dss->max_active_conns = cur_active_conns;
1006
1007 HA_SPIN_UNLOCK(DNS_LOCK, &dss->lock);
1008
1009 t->expire = tick_add(now_ms, 5000);
1010
1011 return t;
1012}
1013
1014struct dns_session *dns_session_new(struct dns_stream_server *dss)
1015{
1016 struct dns_session *ds;
1017
1018 if (dss->maxconn && (dss->maxconn <= dss->cur_conns))
1019 return NULL;
1020
1021 ds = pool_alloc(dns_session_pool);
1022 if (!ds)
1023 return NULL;
1024
1025 ds->ofs = ~0;
1026 ds->dss = dss;
1027 LIST_INIT(&ds->list);
1028 LIST_INIT(&ds->queries);
1029 LIST_INIT(&ds->waiter);
1030 ds->rx_msg.offset = ds->rx_msg.len = 0;
1031 ds->rx_msg.area = NULL;
1032 ds->tx_ring_area = NULL;
1033 ds->task_exp = NULL;
1034 ds->appctx = NULL;
1035 ds->shutdown = 0;
1036 ds->nb_queries = 0;
1037 ds->query_ids = EB_ROOT_UNIQUE;
1038 ds->rx_msg.area = pool_alloc(dns_msg_buf);
1039 if (!ds->rx_msg.area)
1040 goto error;
1041
1042 ds->tx_ring_area = pool_alloc(dns_msg_buf);
1043 if (!ds->tx_ring_area)
1044 goto error;
1045
1046 ring_init(&ds->ring, ds->tx_ring_area, DNS_TCP_MSG_RING_MAX_SIZE);
Christopher Faulet1a1b6742021-03-04 16:53:27 +01001047 /* never fail because it is the first watcher attached to the ring */
1048 DISGUISE(ring_attach(&ds->ring));
Emeric Brunfd647d52021-02-12 20:03:38 +01001049
Willy Tarreaubeeabf52021-10-01 18:23:30 +02001050 if ((ds->task_exp = task_new_here()) == NULL)
Emeric Brunfd647d52021-02-12 20:03:38 +01001051 goto error;
1052
1053 ds->task_exp->process = dns_process_query_exp;
1054 ds->task_exp->context = ds;
1055
1056 ds->appctx = dns_session_create(ds);
1057 if (!ds->appctx)
1058 goto error;
1059
1060 dss->cur_conns++;
1061
1062 return ds;
1063
1064error:
1065 if (ds->task_exp)
1066 task_destroy(ds->task_exp);
1067 if (ds->rx_msg.area)
1068 pool_free(dns_msg_buf, ds->rx_msg.area);
1069 if (ds->tx_ring_area)
1070 pool_free(dns_msg_buf, ds->tx_ring_area);
1071
1072 pool_free(dns_session_pool, ds);
1073
1074 return NULL;
1075}
1076
1077/*
1078 * Task used to consume pending messages from nameserver ring
1079 * and forward them to dns_session ring.
1080 * Note: If no slot found a new dns_session is allocated
1081 */
Willy Tarreau144f84a2021-03-02 16:09:26 +01001082static struct task *dns_process_req(struct task *t, void *context, unsigned int state)
Emeric Brunfd647d52021-02-12 20:03:38 +01001083{
1084 struct dns_nameserver *ns = (struct dns_nameserver *)context;
1085 struct dns_stream_server *dss = ns->stream;
1086 struct ring *ring = dss->ring_req;
1087 struct buffer *buf = &ring->buf;
1088 uint64_t msg_len;
1089 size_t len, cnt, ofs;
1090 struct dns_session *ds, *ads;
1091 HA_SPIN_LOCK(DNS_LOCK, &dss->lock);
1092
1093 ofs = dss->ofs_req;
1094
1095 HA_RWLOCK_RDLOCK(DNS_LOCK, &ring->lock);
1096
1097 /* explanation for the initialization below: it would be better to do
1098 * this in the parsing function but this would occasionally result in
1099 * dropped events because we'd take a reference on the oldest message
1100 * and keep it while being scheduled. Thus instead let's take it the
1101 * first time we enter here so that we have a chance to pass many
1102 * existing messages before grabbing a reference to a location. This
1103 * value cannot be produced after initialization.
1104 */
1105 if (unlikely(ofs == ~0)) {
1106 ofs = 0;
Willy Tarreau4781b152021-04-06 13:53:36 +02001107 HA_ATOMIC_INC(b_peek(buf, ofs));
Emeric Brunfd647d52021-02-12 20:03:38 +01001108 ofs += ring->ofs;
1109 }
1110
1111 /* we were already there, adjust the offset to be relative to
1112 * the buffer's head and remove us from the counter.
1113 */
1114 ofs -= ring->ofs;
1115 BUG_ON(ofs >= buf->size);
Willy Tarreau4781b152021-04-06 13:53:36 +02001116 HA_ATOMIC_DEC(b_peek(buf, ofs));
Emeric Brunfd647d52021-02-12 20:03:38 +01001117
1118 while (ofs + 1 < b_data(buf)) {
1119 struct ist myist;
1120
1121 cnt = 1;
1122 len = b_peek_varint(buf, ofs + cnt, &msg_len);
1123 if (!len)
1124 break;
1125 cnt += len;
1126 BUG_ON(msg_len + ofs + cnt + 1 > b_data(buf));
1127 if (unlikely(msg_len > DNS_TCP_MSG_MAX_SIZE)) {
1128 /* too large a message to ever fit, let's skip it */
1129 ofs += cnt + msg_len;
1130 continue;
1131 }
1132
1133 len = b_getblk(buf, dns_msg_trash, msg_len, ofs + cnt);
1134
Tim Duesterhus92c696e2021-02-28 16:11:36 +01001135 myist = ist2(dns_msg_trash, len);
Emeric Brunfd647d52021-02-12 20:03:38 +01001136
1137 ads = NULL;
Ilya Shipitsin0de36ad2021-02-20 00:23:36 +05001138 /* try to push request into active sess with free slot */
Emeric Brunfd647d52021-02-12 20:03:38 +01001139 if (!LIST_ISEMPTY(&dss->free_sess)) {
1140 ds = LIST_NEXT(&dss->free_sess, struct dns_session *, list);
1141
1142 if (ring_write(&ds->ring, DNS_TCP_MSG_MAX_SIZE, NULL, 0, &myist, 1) > 0) {
1143 ds->nb_queries++;
1144 if (ds->nb_queries >= DNS_STREAM_MAX_PIPELINED_REQ)
1145 LIST_DEL_INIT(&ds->list);
1146 ads = ds;
1147 }
1148 else {
1149 /* it means we were unable to put a request in this slot,
1150 * it may be close to be full so we put it at the end
1151 * of free conn list */
1152 LIST_DEL_INIT(&ds->list);
Willy Tarreau2b718102021-04-21 07:32:39 +02001153 LIST_APPEND(&dss->free_sess, &ds->list);
Emeric Brunfd647d52021-02-12 20:03:38 +01001154 }
1155 }
1156
1157 if (!ads) {
Ilya Shipitsin0de36ad2021-02-20 00:23:36 +05001158 /* try to push request into idle, this one should have enough free space */
Emeric Brunfd647d52021-02-12 20:03:38 +01001159 if (!LIST_ISEMPTY(&dss->idle_sess)) {
1160 ds = LIST_NEXT(&dss->idle_sess, struct dns_session *, list);
1161
1162 /* ring is empty so this ring_write should never fail */
1163 ring_write(&ds->ring, DNS_TCP_MSG_MAX_SIZE, NULL, 0, &myist, 1);
1164 ds->nb_queries++;
1165 LIST_DEL_INIT(&ds->list);
1166
1167 ds->dss->idle_conns--;
1168
1169 /* we may have to update the max_active_conns */
1170 if (ds->dss->max_active_conns < ds->dss->cur_conns - ds->dss->idle_conns)
1171 ds->dss->max_active_conns = ds->dss->cur_conns - ds->dss->idle_conns;
1172
1173 /* since we may unable to find a free list to handle
1174 * this request, this request may be large and fill
1175 * the ring buffer so we prefer to put at the end of free
1176 * list. */
Willy Tarreau2b718102021-04-21 07:32:39 +02001177 LIST_APPEND(&dss->free_sess, &ds->list);
Emeric Brunfd647d52021-02-12 20:03:38 +01001178 ads = ds;
1179 }
1180 }
1181
Ilya Shipitsin0de36ad2021-02-20 00:23:36 +05001182 /* we didn't find a session available with large enough room */
Emeric Brunfd647d52021-02-12 20:03:38 +01001183 if (!ads) {
1184 /* allocate a new session */
1185 ads = dns_session_new(dss);
1186 if (ads) {
1187 /* ring is empty so this ring_write should never fail */
1188 ring_write(&ads->ring, DNS_TCP_MSG_MAX_SIZE, NULL, 0, &myist, 1);
1189 ads->nb_queries++;
Willy Tarreau2b718102021-04-21 07:32:39 +02001190 LIST_INSERT(&dss->free_sess, &ads->list);
Emeric Brunfd647d52021-02-12 20:03:38 +01001191 }
1192 else
1193 ns->counters->snd_error++;
1194 }
1195
1196 if (ads)
1197 ns->counters->sent++;
1198
1199 ofs += cnt + len;
1200 }
1201
Willy Tarreau4781b152021-04-06 13:53:36 +02001202 HA_ATOMIC_INC(b_peek(buf, ofs));
Emeric Brunfd647d52021-02-12 20:03:38 +01001203 ofs += ring->ofs;
1204 dss->ofs_req = ofs;
1205 HA_RWLOCK_RDUNLOCK(DNS_LOCK, &ring->lock);
1206
1207
1208 HA_SPIN_UNLOCK(DNS_LOCK, &dss->lock);
1209 return t;
1210}
1211
1212/*
1213 * Task used to consume response
1214 * Note: upper layer callback is called
1215 */
Willy Tarreau144f84a2021-03-02 16:09:26 +01001216static struct task *dns_process_rsp(struct task *t, void *context, unsigned int state)
Emeric Brunfd647d52021-02-12 20:03:38 +01001217{
1218 struct dns_nameserver *ns = (struct dns_nameserver *)context;
1219
1220 ns->process_responses(ns);
1221
1222 return t;
1223}
1224
1225/* Function used to initialize an TCP nameserver */
1226int dns_stream_init(struct dns_nameserver *ns, struct server *srv)
1227{
1228 struct dns_stream_server *dss = NULL;
1229
1230 dss = calloc(1, sizeof(*dss));
1231 if (!dss) {
1232 ha_alert("memory allocation error initializing dns tcp server '%s'.\n", srv->id);
1233 goto out;
1234 }
1235
1236 dss->srv = srv;
1237 dss->maxconn = srv->maxconn;
1238
1239 dss->ofs_req = ~0; /* init ring offset */
1240 dss->ring_req = ring_new(2*DNS_TCP_MSG_RING_MAX_SIZE);
1241 if (!dss->ring_req) {
1242 ha_alert("memory allocation error initializing the ring for dns tcp server '%s'.\n", srv->id);
1243 goto out;
1244 }
1245 /* Create the task associated to the resolver target handling conns */
Willy Tarreaubeeabf52021-10-01 18:23:30 +02001246 if ((dss->task_req = task_new_anywhere()) == NULL) {
Emeric Brunfd647d52021-02-12 20:03:38 +01001247 ha_alert("memory allocation error initializing the ring for dns tcp server '%s'.\n", srv->id);
1248 goto out;
1249 }
1250
1251 /* Update task's parameters */
1252 dss->task_req->process = dns_process_req;
1253 dss->task_req->context = ns;
1254
1255 /* attach the task as reader */
1256 if (!ring_attach(dss->ring_req)) {
1257 /* mark server attached to the ring */
1258 ha_alert("server '%s': too many watchers for ring. this should never happen.\n", srv->id);
1259 goto out;
1260 }
1261
1262 /* Create the task associated to the resolver target handling conns */
Willy Tarreaubeeabf52021-10-01 18:23:30 +02001263 if ((dss->task_rsp = task_new_anywhere()) == NULL) {
Emeric Brunfd647d52021-02-12 20:03:38 +01001264 ha_alert("memory allocation error initializing the ring for dns tcp server '%s'.\n", srv->id);
1265 goto out;
1266 }
1267
1268 /* Update task's parameters */
1269 dss->task_rsp->process = dns_process_rsp;
1270 dss->task_rsp->context = ns;
1271
1272 /* Create the task associated to the resolver target handling conns */
Willy Tarreaubeeabf52021-10-01 18:23:30 +02001273 if ((dss->task_idle = task_new_anywhere()) == NULL) {
Emeric Brunfd647d52021-02-12 20:03:38 +01001274 ha_alert("memory allocation error initializing the ring for dns tcp server '%s'.\n", srv->id);
1275 goto out;
1276 }
1277
1278 /* Update task's parameters */
1279 dss->task_idle->process = dns_process_idle_exp;
1280 dss->task_idle->context = dss;
1281 dss->task_idle->expire = tick_add(now_ms, 5000);
1282
Ilya Shipitsin0de36ad2021-02-20 00:23:36 +05001283 /* let start the task to free idle conns immediately */
Emeric Brunfd647d52021-02-12 20:03:38 +01001284 task_queue(dss->task_idle);
1285
1286 LIST_INIT(&dss->free_sess);
1287 LIST_INIT(&dss->idle_sess);
1288 LIST_INIT(&dss->wait_sess);
1289 HA_SPIN_INIT(&dss->lock);
1290 ns->stream = dss;
1291 return 0;
1292out:
1293 if (dss && dss->task_rsp)
1294 task_destroy(dss->task_rsp);
1295 if (dss && dss->task_req)
1296 task_destroy(dss->task_req);
1297 if (dss && dss->ring_req)
1298 ring_free(dss->ring_req);
1299
1300 free(dss);
Emeric Brunc9437992021-02-12 19:42:55 +01001301 return -1;
Christopher Faulet67957bd2017-09-27 11:00:59 +02001302}
1303
Emeric Brunc9437992021-02-12 19:42:55 +01001304int init_dns_buffers()
Baptiste Assmann325137d2015-04-13 23:40:55 +02001305{
Emeric Brunc9437992021-02-12 19:42:55 +01001306 dns_msg_trash = malloc(DNS_TCP_MSG_MAX_SIZE);
1307 if (!dns_msg_trash)
1308 return 0;
Baptiste Assmann325137d2015-04-13 23:40:55 +02001309
Emeric Brunc9437992021-02-12 19:42:55 +01001310 return 1;
1311}
Baptiste Assmannc1ce5f32016-05-14 11:26:22 +02001312
Emeric Brunc9437992021-02-12 19:42:55 +01001313void deinit_dns_buffers()
1314{
Willy Tarreau61cfdf42021-02-20 10:46:51 +01001315 ha_free(&dns_msg_trash);
Emeric Brunc9437992021-02-12 19:42:55 +01001316}
Emeric Brund26a6232021-01-04 13:32:20 +01001317
1318REGISTER_PER_THREAD_ALLOC(init_dns_buffers);
1319REGISTER_PER_THREAD_FREE(deinit_dns_buffers);