blob: a4adbf832f306406444cf3e5f093b2f9012a4c31 [file] [log] [blame]
Baptiste Assmann325137d2015-04-13 23:40:55 +02001/*
2 * Name server resolution
3 *
Willy Tarreau714f3452021-05-09 06:47:26 +02004 * Copyright 2020 HAProxy Technologies
Baptiste Assmann325137d2015-04-13 23:40:55 +02005 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 *
11 */
12
13#include <errno.h>
14#include <fcntl.h>
15#include <stdio.h>
16#include <stdlib.h>
17#include <string.h>
18#include <unistd.h>
19
20#include <sys/types.h>
21
Willy Tarreau122eba92020-06-04 10:15:32 +020022#include <haproxy/action.h>
Willy Tarreau4c7e4b72020-05-27 12:58:42 +020023#include <haproxy/api.h>
Willy Tarreau6be78492020-06-05 00:00:29 +020024#include <haproxy/cfgparse.h>
Willy Tarreauf1d32c42020-06-04 21:07:02 +020025#include <haproxy/channel.h>
Willy Tarreaub2551052020-06-09 09:07:15 +020026#include <haproxy/check.h>
Willy Tarreau83487a82020-06-04 20:19:54 +020027#include <haproxy/cli.h>
Christopher Faulet908628c2022-03-25 16:43:49 +010028#include <haproxy/conn_stream.h>
29#include <haproxy/cs_utils.h>
Willy Tarreau7c18b542020-06-11 09:23:02 +020030#include <haproxy/dgram.h>
Willy Tarreaueb92deb2020-06-04 10:53:16 +020031#include <haproxy/dns.h>
Willy Tarreau8d366972020-05-27 16:10:29 +020032#include <haproxy/errors.h>
Willy Tarreaub2551052020-06-09 09:07:15 +020033#include <haproxy/fd.h>
Willy Tarreauaeed4a82020-06-04 22:01:04 +020034#include <haproxy/log.h>
Emeric Brund26a6232021-01-04 13:32:20 +010035#include <haproxy/ring.h>
Emeric Brunfd647d52021-02-12 20:03:38 +010036#include <haproxy/stream.h>
37#include <haproxy/stream_interface.h>
Willy Tarreau9f9e9fc2021-05-08 13:09:46 +020038#include <haproxy/tools.h>
Baptiste Assmann325137d2015-04-13 23:40:55 +020039
Emeric Brund26a6232021-01-04 13:32:20 +010040static THREAD_LOCAL char *dns_msg_trash;
Baptiste Assmann325137d2015-04-13 23:40:55 +020041
Emeric Brunfd647d52021-02-12 20:03:38 +010042DECLARE_STATIC_POOL(dns_session_pool, "dns_session", sizeof(struct dns_session));
43DECLARE_STATIC_POOL(dns_query_pool, "dns_query", sizeof(struct dns_query));
44DECLARE_STATIC_POOL(dns_msg_buf, "dns_msg_buf", DNS_TCP_MSG_RING_MAX_SIZE);
45
Christopher Faulet67957bd2017-09-27 11:00:59 +020046/* Opens an UDP socket on the namesaver's IP/Port, if required. Returns 0 on
Christopher Faulet1e711be2021-03-04 16:58:35 +010047 * success, -1 otherwise. ns->dgram must be defined.
Baptiste Assmann325137d2015-04-13 23:40:55 +020048 */
Emeric Brund26a6232021-01-04 13:32:20 +010049static int dns_connect_nameserver(struct dns_nameserver *ns)
Baptiste Assmann325137d2015-04-13 23:40:55 +020050{
Christopher Faulet1e711be2021-03-04 16:58:35 +010051 struct dgram_conn *dgram = &ns->dgram->conn;
52 int fd;
Baptiste Assmann325137d2015-04-13 23:40:55 +020053
Christopher Faulet1e711be2021-03-04 16:58:35 +010054 /* Already connected */
55 if (dgram->t.sock.fd != -1)
Emeric Brun526b7922021-02-15 14:28:27 +010056 return 0;
Christopher Faulet1e711be2021-03-04 16:58:35 +010057
58 /* Create an UDP socket and connect it on the nameserver's IP/Port */
59 if ((fd = socket(dgram->addr.to.ss_family, SOCK_DGRAM, IPPROTO_UDP)) == -1) {
60 send_log(NULL, LOG_WARNING,
61 "DNS : section '%s': can't create socket for nameserver '%s'.\n",
62 ns->counters->pid, ns->id);
63 return -1;
64 }
65 if (connect(fd, (struct sockaddr*)&dgram->addr.to, get_addr_len(&dgram->addr.to)) == -1) {
66 send_log(NULL, LOG_WARNING,
67 "DNS : section '%s': can't connect socket for nameserver '%s'.\n",
68 ns->counters->id, ns->id);
69 close(fd);
70 return -1;
Emeric Brunc9437992021-02-12 19:42:55 +010071 }
Emeric Brun526b7922021-02-15 14:28:27 +010072
Christopher Faulet1e711be2021-03-04 16:58:35 +010073 /* Make the socket non blocking */
74 fcntl(fd, F_SETFL, O_NONBLOCK);
75
76 /* Add the fd in the fd list and update its parameters */
77 dgram->t.sock.fd = fd;
78 fd_insert(fd, dgram, dgram_fd_handler, MAX_THREADS_MASK);
79 fd_want_recv(fd);
80 return 0;
Baptiste Assmann325137d2015-04-13 23:40:55 +020081}
82
Emeric Brund26a6232021-01-04 13:32:20 +010083/* Sends a message to a name server
84 * It returns message length on success
85 * or -1 in error case
86 * 0 is returned in case of output ring buffer is full
87 */
88int dns_send_nameserver(struct dns_nameserver *ns, void *buf, size_t len)
89{
90 int ret = -1;
91
92 if (ns->dgram) {
93 struct dgram_conn *dgram = &ns->dgram->conn;
94 int fd = dgram->t.sock.fd;
95
96 if (dgram->t.sock.fd == -1) {
97 if (dns_connect_nameserver(ns) == -1)
98 return -1;
99 fd = dgram->t.sock.fd;
100 }
101
102 ret = send(fd, buf, len, 0);
103 if (ret < 0) {
104 if (errno == EAGAIN) {
105 struct ist myist;
106
Tim Duesterhus92c696e2021-02-28 16:11:36 +0100107 myist = ist2(buf, len);
Emeric Brund26a6232021-01-04 13:32:20 +0100108 ret = ring_write(ns->dgram->ring_req, DNS_TCP_MSG_MAX_SIZE, NULL, 0, &myist, 1);
109 if (!ret) {
110 ns->counters->snd_error++;
111 return -1;
112 }
113 fd_cant_send(fd);
114 return ret;
115 }
116 ns->counters->snd_error++;
117 fd_delete(fd);
Emeric Brund26a6232021-01-04 13:32:20 +0100118 dgram->t.sock.fd = -1;
119 return -1;
120 }
121 ns->counters->sent++;
122 }
Emeric Brunfd647d52021-02-12 20:03:38 +0100123 else if (ns->stream) {
124 struct ist myist;
125
Tim Duesterhus92c696e2021-02-28 16:11:36 +0100126 myist = ist2(buf, len);
Emeric Brunfd647d52021-02-12 20:03:38 +0100127 ret = ring_write(ns->stream->ring_req, DNS_TCP_MSG_MAX_SIZE, NULL, 0, &myist, 1);
128 if (!ret) {
129 ns->counters->snd_error++;
130 return -1;
131 }
132 task_wakeup(ns->stream->task_req, TASK_WOKEN_MSG);
133 return ret;
134 }
Emeric Brund26a6232021-01-04 13:32:20 +0100135
136 return ret;
137}
138
Emeric Brunfd647d52021-02-12 20:03:38 +0100139void dns_session_free(struct dns_session *);
140
Emeric Brund26a6232021-01-04 13:32:20 +0100141/* Receives a dns message
142 * Returns message length
143 * 0 is returned if no more message available
144 * -1 in error case
145 */
146ssize_t dns_recv_nameserver(struct dns_nameserver *ns, void *data, size_t size)
147{
148 ssize_t ret = -1;
149
150 if (ns->dgram) {
151 struct dgram_conn *dgram = &ns->dgram->conn;
152 int fd = dgram->t.sock.fd;
153
154 if (fd == -1)
155 return -1;
156
157 if ((ret = recv(fd, data, size, 0)) < 0) {
158 if (errno == EAGAIN) {
159 fd_cant_recv(fd);
160 return 0;
161 }
162 fd_delete(fd);
Emeric Brund26a6232021-01-04 13:32:20 +0100163 dgram->t.sock.fd = -1;
164 return -1;
165 }
166 }
Emeric Brunfd647d52021-02-12 20:03:38 +0100167 else if (ns->stream) {
168 struct dns_stream_server *dss = ns->stream;
169 struct dns_session *ds;
170
171 HA_SPIN_LOCK(DNS_LOCK, &dss->lock);
172
173 if (!LIST_ISEMPTY(&dss->wait_sess)) {
174 ds = LIST_NEXT(&dss->wait_sess, struct dns_session *, waiter);
Emeric Brunfd647d52021-02-12 20:03:38 +0100175 ret = ds->rx_msg.len < size ? ds->rx_msg.len : size;
176 memcpy(data, ds->rx_msg.area, ret);
177
178 ds->rx_msg.len = 0;
179
Willy Tarreaudde1b442021-10-21 14:33:38 +0200180 /* This barrier is here to ensure that all data is
181 * stored if the appctx detect the elem is out of the
182 * list.
183 */
184 __ha_barrier_store();
185
Emeric Brunfd647d52021-02-12 20:03:38 +0100186 LIST_DEL_INIT(&ds->waiter);
187
188 if (ds->appctx) {
Willy Tarreaudde1b442021-10-21 14:33:38 +0200189 /* This second barrier is here to ensure that
190 * the waked up appctx won't miss that the elem
191 * is removed from the list.
192 */
193 __ha_barrier_store();
194
Ilya Shipitsin0de36ad2021-02-20 00:23:36 +0500195 /* awake appctx because it may have other
Emeric Brunfd647d52021-02-12 20:03:38 +0100196 * message to receive
197 */
198 appctx_wakeup(ds->appctx);
199
200 /* dns_session could already be into free_sess list
201 * so we firstly remove it */
202 LIST_DEL_INIT(&ds->list);
203
204 /* decrease nb_queries to free a slot for a new query on that sess */
205 ds->nb_queries--;
206 if (ds->nb_queries) {
207 /* it remains pipelined unanswered request
208 * into this session but we just decrease
209 * the counter so the session
210 * can not be full of pipelined requests
211 * so we can add if to free_sess list
212 * to receive a new request
213 */
Willy Tarreau2b718102021-04-21 07:32:39 +0200214 LIST_INSERT(&ds->dss->free_sess, &ds->list);
Emeric Brunfd647d52021-02-12 20:03:38 +0100215 }
216 else {
217 /* there is no more pipelined requests
218 * into this session, so we move it
219 * to idle_sess list */
Willy Tarreau2b718102021-04-21 07:32:39 +0200220 LIST_INSERT(&ds->dss->idle_sess, &ds->list);
Emeric Brunfd647d52021-02-12 20:03:38 +0100221
222 /* update the counter of idle sessions */
223 ds->dss->idle_conns++;
224
225 /* Note: this is useless there to update
226 * the max_active_conns since we increase
227 * the idle count */
228 }
229 }
230 else {
231 /* there is no more appctx for this session
232 * it means it is ready to die
233 */
234 dns_session_free(ds);
235 }
236
237
238 }
239
240 HA_SPIN_UNLOCK(DNS_LOCK, &dss->lock);
241 }
Emeric Brund26a6232021-01-04 13:32:20 +0100242
243 return ret;
244}
245
246static void dns_resolve_recv(struct dgram_conn *dgram)
247{
248 struct dns_nameserver *ns;
249 int fd;
250
251 fd = dgram->t.sock.fd;
252
253 /* check if ready for reading */
254 if (!fd_recv_ready(fd))
255 return;
256
257 /* no need to go further if we can't retrieve the nameserver */
258 if ((ns = dgram->owner) == NULL) {
Willy Tarreauf5090652021-04-06 17:23:40 +0200259 _HA_ATOMIC_AND(&fdtab[fd].state, ~(FD_POLL_HUP|FD_POLL_ERR));
Emeric Brund26a6232021-01-04 13:32:20 +0100260 fd_stop_recv(fd);
261 return;
262 }
263
264 ns->process_responses(ns);
265}
266
267/* Called when a dns network socket is ready to send data */
268static void dns_resolve_send(struct dgram_conn *dgram)
269{
270 int fd;
271 struct dns_nameserver *ns;
272 struct ring *ring;
273 struct buffer *buf;
274 uint64_t msg_len;
275 size_t len, cnt, ofs;
276
277 fd = dgram->t.sock.fd;
278
279 /* check if ready for sending */
280 if (!fd_send_ready(fd))
281 return;
282
283 /* no need to go further if we can't retrieve the nameserver */
284 if ((ns = dgram->owner) == NULL) {
Willy Tarreauf5090652021-04-06 17:23:40 +0200285 _HA_ATOMIC_AND(&fdtab[fd].state, ~(FD_POLL_HUP|FD_POLL_ERR));
Emeric Brund26a6232021-01-04 13:32:20 +0100286 fd_stop_send(fd);
287 return;
288 }
289
290 ring = ns->dgram->ring_req;
291 buf = &ring->buf;
292
293 HA_RWLOCK_RDLOCK(DNS_LOCK, &ring->lock);
294 ofs = ns->dgram->ofs_req;
295
296 /* explanation for the initialization below: it would be better to do
297 * this in the parsing function but this would occasionally result in
298 * dropped events because we'd take a reference on the oldest message
299 * and keep it while being scheduled. Thus instead let's take it the
300 * first time we enter here so that we have a chance to pass many
301 * existing messages before grabbing a reference to a location. This
302 * value cannot be produced after initialization.
303 */
304 if (unlikely(ofs == ~0)) {
305 ofs = 0;
Willy Tarreau4781b152021-04-06 13:53:36 +0200306 HA_ATOMIC_INC(b_peek(buf, ofs));
Emeric Brund26a6232021-01-04 13:32:20 +0100307 ofs += ring->ofs;
308 }
309
310 /* we were already there, adjust the offset to be relative to
311 * the buffer's head and remove us from the counter.
312 */
313 ofs -= ring->ofs;
314 BUG_ON(ofs >= buf->size);
Willy Tarreau4781b152021-04-06 13:53:36 +0200315 HA_ATOMIC_DEC(b_peek(buf, ofs));
Emeric Brund26a6232021-01-04 13:32:20 +0100316
317 while (ofs + 1 < b_data(buf)) {
318 int ret;
319
320 cnt = 1;
321 len = b_peek_varint(buf, ofs + cnt, &msg_len);
322 if (!len)
323 break;
324 cnt += len;
325 BUG_ON(msg_len + ofs + cnt + 1 > b_data(buf));
326 if (unlikely(msg_len > DNS_TCP_MSG_MAX_SIZE)) {
327 /* too large a message to ever fit, let's skip it */
328 ofs += cnt + msg_len;
329 continue;
330 }
331
332 len = b_getblk(buf, dns_msg_trash, msg_len, ofs + cnt);
333
334 ret = send(fd, dns_msg_trash, len, 0);
335 if (ret < 0) {
336 if (errno == EAGAIN) {
337 fd_cant_send(fd);
338 goto out;
339 }
340 ns->counters->snd_error++;
341 fd_delete(fd);
Emeric Brund26a6232021-01-04 13:32:20 +0100342 fd = dgram->t.sock.fd = -1;
343 goto out;
344 }
345 ns->counters->sent++;
346
347 ofs += cnt + len;
348 }
349
350 /* we don't want/need to be waked up any more for sending
351 * because all ring content is sent */
352 fd_stop_send(fd);
353
354out:
355
Willy Tarreau4781b152021-04-06 13:53:36 +0200356 HA_ATOMIC_INC(b_peek(buf, ofs));
Emeric Brund26a6232021-01-04 13:32:20 +0100357 ofs += ring->ofs;
358 ns->dgram->ofs_req = ofs;
359 HA_RWLOCK_RDUNLOCK(DNS_LOCK, &ring->lock);
360
361}
362
Emeric Brunc9437992021-02-12 19:42:55 +0100363/* proto_udp callback functions for a DNS resolution */
364struct dgram_data_cb dns_dgram_cb = {
365 .recv = dns_resolve_recv,
366 .send = dns_resolve_send,
367};
Baptiste Assmann325137d2015-04-13 23:40:55 +0200368
Emeric Brunc9437992021-02-12 19:42:55 +0100369int dns_dgram_init(struct dns_nameserver *ns, struct sockaddr_storage *sk)
Baptiste Assmann325137d2015-04-13 23:40:55 +0200370{
Emeric Brunc9437992021-02-12 19:42:55 +0100371 struct dns_dgram_server *dgram;
Baptiste Assmann201c07f2017-05-22 15:17:15 +0200372
Emeric Brunc9437992021-02-12 19:42:55 +0100373 if ((dgram = calloc(1, sizeof(*dgram))) == NULL)
Christopher Faulet67957bd2017-09-27 11:00:59 +0200374 return -1;
Baptiste Assmannc1ce5f32016-05-14 11:26:22 +0200375
Emeric Brunc9437992021-02-12 19:42:55 +0100376 /* Leave dgram partially initialized, no FD attached for
377 * now. */
378 dgram->conn.owner = ns;
379 dgram->conn.data = &dns_dgram_cb;
380 dgram->conn.t.sock.fd = -1;
381 dgram->conn.addr.to = *sk;
382 ns->dgram = dgram;
Baptiste Assmannc1ce5f32016-05-14 11:26:22 +0200383
Emeric Brunc9437992021-02-12 19:42:55 +0100384 dgram->ofs_req = ~0; /* init ring offset */
385 dgram->ring_req = ring_new(2*DNS_TCP_MSG_RING_MAX_SIZE);
386 if (!dgram->ring_req) {
387 ha_alert("memory allocation error initializing the ring for nameserver.\n");
388 goto out;
Baptiste Assmannc1ce5f32016-05-14 11:26:22 +0200389 }
390
Emeric Brunc9437992021-02-12 19:42:55 +0100391 /* attach the task as reader */
392 if (!ring_attach(dgram->ring_req)) {
393 /* mark server attached to the ring */
394 ha_alert("nameserver sets too many watchers > 255 on ring. This is a bug and should not happen.\n");
395 goto out;
Baptiste Assmannc1ce5f32016-05-14 11:26:22 +0200396 }
Baptiste Assmannc1ce5f32016-05-14 11:26:22 +0200397 return 0;
Emeric Brunc9437992021-02-12 19:42:55 +0100398out:
399 if (dgram->ring_req)
400 ring_free(dgram->ring_req);
Christopher Fauletd6c6b5f2020-09-08 10:27:24 +0200401
Emeric Brunc9437992021-02-12 19:42:55 +0100402 free(dgram);
Olivier Houchard2ec2db92018-01-08 16:28:57 +0100403
Emeric Brunfd647d52021-02-12 20:03:38 +0100404 return -1;
405}
406
407/*
408 * IO Handler to handle message push to dns tcp server
409 */
410static void dns_session_io_handler(struct appctx *appctx)
411{
Christopher Faulet908628c2022-03-25 16:43:49 +0100412 struct conn_stream *cs = appctx->owner;
Emeric Brunfd647d52021-02-12 20:03:38 +0100413 struct dns_session *ds = appctx->ctx.sft.ptr;
414 struct ring *ring = &ds->ring;
415 struct buffer *buf = &ring->buf;
416 uint64_t msg_len;
417 int available_room;
418 size_t len, cnt, ofs;
419 int ret = 0;
420
421 /* if stopping was requested, close immediately */
422 if (unlikely(stopping))
423 goto close;
424
425 /* we want to be sure to not miss that we have been awaked for a shutdown */
426 __ha_barrier_load();
427
428 /* that means the connection was requested to shutdown
429 * for instance idle expire */
430 if (ds->shutdown)
431 goto close;
432
433 /* an error was detected */
Christopher Faulet908628c2022-03-25 16:43:49 +0100434 if (unlikely(cs_ic(cs)->flags & (CF_WRITE_ERROR|CF_SHUTW)))
Emeric Brunfd647d52021-02-12 20:03:38 +0100435 goto close;
436
437 /* con closed by server side, we will skip data write and drain data from channel */
Christopher Faulet908628c2022-03-25 16:43:49 +0100438 if ((cs_oc(cs)->flags & CF_SHUTW)) {
Emeric Brunfd647d52021-02-12 20:03:38 +0100439 goto read;
440 }
441
442 /* if the connection is not established, inform the stream that we want
443 * to be notified whenever the connection completes.
444 */
Christopher Faulet62e75742022-03-31 09:16:34 +0200445 if (cs_opposite(cs)->state < CS_ST_EST) {
Christopher Faulet908628c2022-03-25 16:43:49 +0100446 si_cant_get(cs->si);
447 si_rx_conn_blk(cs->si);
448 si_rx_endp_more(cs->si);
Emeric Brunfd647d52021-02-12 20:03:38 +0100449 return;
450 }
451
452
453 ofs = ds->ofs;
454
455 HA_RWLOCK_WRLOCK(DNS_LOCK, &ring->lock);
456 LIST_DEL_INIT(&appctx->wait_entry);
457 HA_RWLOCK_WRUNLOCK(DNS_LOCK, &ring->lock);
458
459 HA_RWLOCK_RDLOCK(DNS_LOCK, &ring->lock);
460
461 /* explanation for the initialization below: it would be better to do
462 * this in the parsing function but this would occasionally result in
463 * dropped events because we'd take a reference on the oldest message
464 * and keep it while being scheduled. Thus instead let's take it the
465 * first time we enter here so that we have a chance to pass many
466 * existing messages before grabbing a reference to a location. This
467 * value cannot be produced after initialization.
468 */
469 if (unlikely(ofs == ~0)) {
470 ofs = 0;
471
Willy Tarreau4781b152021-04-06 13:53:36 +0200472 HA_ATOMIC_INC(b_peek(buf, ofs));
Emeric Brunfd647d52021-02-12 20:03:38 +0100473 ofs += ring->ofs;
474 }
475
476 /* in this loop, ofs always points to the counter byte that precedes
477 * the message so that we can take our reference there if we have to
478 * stop before the end (ret=0).
479 */
Christopher Faulet62e75742022-03-31 09:16:34 +0200480 if (cs_opposite(cs)->state == CS_ST_EST) {
Emeric Brunfd647d52021-02-12 20:03:38 +0100481 /* we were already there, adjust the offset to be relative to
482 * the buffer's head and remove us from the counter.
483 */
484 ofs -= ring->ofs;
485 BUG_ON(ofs >= buf->size);
Willy Tarreau4781b152021-04-06 13:53:36 +0200486 HA_ATOMIC_DEC(b_peek(buf, ofs));
Emeric Brunfd647d52021-02-12 20:03:38 +0100487
488 ret = 1;
489 while (ofs + 1 < b_data(buf)) {
490 struct dns_query *query;
491 uint16_t original_qid;
492 uint16_t new_qid;
493
494 cnt = 1;
495 len = b_peek_varint(buf, ofs + cnt, &msg_len);
496 if (!len)
497 break;
498 cnt += len;
499 BUG_ON(msg_len + ofs + cnt + 1 > b_data(buf));
500
501 /* retrieve available room on output channel */
Christopher Faulet908628c2022-03-25 16:43:49 +0100502 available_room = channel_recv_max(cs_ic(cs));
Emeric Brunfd647d52021-02-12 20:03:38 +0100503
504 /* tx_msg_offset null means we are at the start of a new message */
505 if (!ds->tx_msg_offset) {
506 uint16_t slen;
507
508 /* check if there is enough room to put message len and query id */
509 if (available_room < sizeof(slen) + sizeof(new_qid)) {
Christopher Faulet908628c2022-03-25 16:43:49 +0100510 si_rx_room_blk(cs->si);
Emeric Brunfd647d52021-02-12 20:03:38 +0100511 ret = 0;
512 break;
513 }
514
515 /* put msg len into then channel */
516 slen = (uint16_t)msg_len;
517 slen = htons(slen);
Christopher Faulet908628c2022-03-25 16:43:49 +0100518 ci_putblk(cs_ic(cs), (char *)&slen, sizeof(slen));
Emeric Brunfd647d52021-02-12 20:03:38 +0100519 available_room -= sizeof(slen);
520
521 /* backup original query id */
522 len = b_getblk(buf, (char *)&original_qid, sizeof(original_qid), ofs + cnt);
Emeric Brun538bb042021-02-15 13:58:06 +0100523 if (!len) {
524 /* should never happen since messages are atomically
525 * written into ring
526 */
527 ret = 0;
528 break;
529 }
Emeric Brunfd647d52021-02-12 20:03:38 +0100530
531 /* generates new query id */
532 new_qid = ++ds->query_counter;
533 new_qid = htons(new_qid);
534
535 /* put new query id into the channel */
Christopher Faulet908628c2022-03-25 16:43:49 +0100536 ci_putblk(cs_ic(cs), (char *)&new_qid, sizeof(new_qid));
Emeric Brunfd647d52021-02-12 20:03:38 +0100537 available_room -= sizeof(new_qid);
538
539 /* keep query id mapping */
540
541 query = pool_alloc(dns_query_pool);
542 if (query) {
543 query->qid.key = new_qid;
544 query->original_qid = original_qid;
545 query->expire = tick_add(now_ms, 5000);
546 LIST_INIT(&query->list);
547 if (LIST_ISEMPTY(&ds->queries)) {
548 /* enable task to handle expire */
549 ds->task_exp->expire = query->expire;
550 /* ensure this will be executed by the same
551 * thread than ds_session_release
552 * to ensure session_release is free
553 * to destroy the task */
554 task_queue(ds->task_exp);
555 }
Willy Tarreau2b718102021-04-21 07:32:39 +0200556 LIST_APPEND(&ds->queries, &query->list);
Emeric Brunfd647d52021-02-12 20:03:38 +0100557 eb32_insert(&ds->query_ids, &query->qid);
558 ds->onfly_queries++;
559 }
560
561 /* update the tx_offset to handle output in 16k streams */
562 ds->tx_msg_offset = sizeof(original_qid);
563
564 }
565
566 /* check if it remains available room on output chan */
567 if (unlikely(!available_room)) {
Christopher Faulet908628c2022-03-25 16:43:49 +0100568 si_rx_room_blk(cs->si);
Emeric Brunfd647d52021-02-12 20:03:38 +0100569 ret = 0;
570 break;
571 }
572
573 chunk_reset(&trash);
574 if ((msg_len - ds->tx_msg_offset) > available_room) {
575 /* remaining msg data is too large to be written in output channel at one time */
576
577 len = b_getblk(buf, trash.area, available_room, ofs + cnt + ds->tx_msg_offset);
578
579 /* update offset to complete mesg forwarding later */
580 ds->tx_msg_offset += len;
581 }
582 else {
583 /* remaining msg data can be written in output channel at one time */
584 len = b_getblk(buf, trash.area, msg_len - ds->tx_msg_offset, ofs + cnt + ds->tx_msg_offset);
585
586 /* reset tx_msg_offset to mark forward fully processed */
587 ds->tx_msg_offset = 0;
588 }
589 trash.data += len;
590
Christopher Faulet908628c2022-03-25 16:43:49 +0100591 if (ci_putchk(cs_ic(cs), &trash) == -1) {
Emeric Brun743afee2021-02-15 14:12:06 +0100592 /* should never happen since we
593 * check available_room is large
Ilya Shipitsin0de36ad2021-02-20 00:23:36 +0500594 * enough here.
Emeric Brun743afee2021-02-15 14:12:06 +0100595 */
Christopher Faulet908628c2022-03-25 16:43:49 +0100596 si_rx_room_blk(cs->si);
Emeric Brun743afee2021-02-15 14:12:06 +0100597 ret = 0;
598 break;
599 }
Emeric Brunfd647d52021-02-12 20:03:38 +0100600
601 if (ds->tx_msg_offset) {
602 /* msg was not fully processed, we must be awake to drain pending data */
603
Christopher Faulet908628c2022-03-25 16:43:49 +0100604 si_rx_room_blk(cs->si);
Emeric Brunfd647d52021-02-12 20:03:38 +0100605 ret = 0;
606 break;
607 }
608 /* switch to next message */
609 ofs += cnt + msg_len;
610 }
611
Willy Tarreau4781b152021-04-06 13:53:36 +0200612 HA_ATOMIC_INC(b_peek(buf, ofs));
Emeric Brunfd647d52021-02-12 20:03:38 +0100613 ofs += ring->ofs;
614 ds->ofs = ofs;
615 }
616 HA_RWLOCK_RDUNLOCK(DNS_LOCK, &ring->lock);
617
618 if (ret) {
619 /* let's be woken up once new request to write arrived */
620 HA_RWLOCK_WRLOCK(DNS_LOCK, &ring->lock);
Willy Tarreau62e467c2021-10-20 11:02:13 +0200621 BUG_ON(LIST_INLIST(&appctx->wait_entry));
Willy Tarreau2b718102021-04-21 07:32:39 +0200622 LIST_APPEND(&ring->waiters, &appctx->wait_entry);
Emeric Brunfd647d52021-02-12 20:03:38 +0100623 HA_RWLOCK_WRUNLOCK(DNS_LOCK, &ring->lock);
Christopher Faulet908628c2022-03-25 16:43:49 +0100624 si_rx_endp_done(cs->si);
Emeric Brunfd647d52021-02-12 20:03:38 +0100625 }
626
627read:
628
Ilya Shipitsin0de36ad2021-02-20 00:23:36 +0500629 /* if session is not a waiter it means there is no committed
Emeric Brunfd647d52021-02-12 20:03:38 +0100630 * message into rx_buf and we are free to use it
631 * Note: we need a load barrier here to not miss the
632 * delete from the list
633 */
Emeric Brun70455902021-10-20 10:49:53 +0200634
Willy Tarreaudde1b442021-10-21 14:33:38 +0200635 __ha_barrier_load();
636 if (!LIST_INLIST_ATOMIC(&ds->waiter)) {
Emeric Brunfd647d52021-02-12 20:03:38 +0100637 while (1) {
638 uint16_t query_id;
639 struct eb32_node *eb;
640 struct dns_query *query;
641
642 if (!ds->rx_msg.len) {
643 /* next message len is not fully available into the channel */
Christopher Faulet908628c2022-03-25 16:43:49 +0100644 if (co_data(cs_oc(cs)) < 2)
Emeric Brunfd647d52021-02-12 20:03:38 +0100645 break;
646
647 /* retrieve message len */
Christopher Faulet908628c2022-03-25 16:43:49 +0100648 co_getblk(cs_oc(cs), (char *)&msg_len, 2, 0);
Emeric Brunfd647d52021-02-12 20:03:38 +0100649
650 /* mark as consumed */
Christopher Faulet908628c2022-03-25 16:43:49 +0100651 co_skip(cs_oc(cs), 2);
Emeric Brunfd647d52021-02-12 20:03:38 +0100652
653 /* store message len */
654 ds->rx_msg.len = ntohs(msg_len);
655 }
656
Christopher Faulet908628c2022-03-25 16:43:49 +0100657 if (!co_data(cs_oc(cs))) {
Emeric Brunfd647d52021-02-12 20:03:38 +0100658 /* we need more data but nothing is available */
659 break;
660 }
661
Christopher Faulet908628c2022-03-25 16:43:49 +0100662 if (co_data(cs_oc(cs)) + ds->rx_msg.offset < ds->rx_msg.len) {
Emeric Brunfd647d52021-02-12 20:03:38 +0100663 /* message only partially available */
664
665 /* read available data */
Christopher Faulet908628c2022-03-25 16:43:49 +0100666 co_getblk(cs_oc(cs), ds->rx_msg.area + ds->rx_msg.offset, co_data(cs_oc(cs)), 0);
Emeric Brunfd647d52021-02-12 20:03:38 +0100667
668 /* update message offset */
Christopher Faulet908628c2022-03-25 16:43:49 +0100669 ds->rx_msg.offset += co_data(cs_oc(cs));
Emeric Brunfd647d52021-02-12 20:03:38 +0100670
671 /* consume all pending data from the channel */
Christopher Faulet908628c2022-03-25 16:43:49 +0100672 co_skip(cs_oc(cs), co_data(cs_oc(cs)));
Emeric Brunfd647d52021-02-12 20:03:38 +0100673
674 /* we need to wait for more data */
675 break;
676 }
677
Ilya Shipitsin0de36ad2021-02-20 00:23:36 +0500678 /* enough data is available into the channel to read the message until the end */
Emeric Brunfd647d52021-02-12 20:03:38 +0100679
680 /* read from the channel until the end of the message */
Christopher Faulet908628c2022-03-25 16:43:49 +0100681 co_getblk(cs_oc(cs), ds->rx_msg.area + ds->rx_msg.offset, ds->rx_msg.len - ds->rx_msg.offset, 0);
Emeric Brunfd647d52021-02-12 20:03:38 +0100682
683 /* consume all data until the end of the message from the channel */
Christopher Faulet908628c2022-03-25 16:43:49 +0100684 co_skip(cs_oc(cs), ds->rx_msg.len - ds->rx_msg.offset);
Emeric Brunfd647d52021-02-12 20:03:38 +0100685
686 /* reset reader offset to 0 for next message reand */
687 ds->rx_msg.offset = 0;
688
689 /* try remap query id to original */
690 memcpy(&query_id, ds->rx_msg.area, sizeof(query_id));
691 eb = eb32_lookup(&ds->query_ids, query_id);
692 if (!eb) {
693 /* query id not found means we have an unknown corresponding
694 * request, perhaps server's bug or or the query reached
695 * timeout
696 */
697 ds->rx_msg.len = 0;
698 continue;
699 }
700
701 /* re-map the original query id set by the requester */
702 query = eb32_entry(eb, struct dns_query, qid);
703 memcpy(ds->rx_msg.area, &query->original_qid, sizeof(query->original_qid));
704
705 /* remove query ids mapping from pending queries list/tree */
706 eb32_delete(&query->qid);
Willy Tarreau2b718102021-04-21 07:32:39 +0200707 LIST_DELETE(&query->list);
Emeric Brunfd647d52021-02-12 20:03:38 +0100708 pool_free(dns_query_pool, query);
709 ds->onfly_queries--;
710
Emeric Brunfd647d52021-02-12 20:03:38 +0100711 /* the dns_session is also added in queue of the
712 * wait_sess list where the task processing
713 * response will pop available responses
714 */
Willy Tarreaudde1b442021-10-21 14:33:38 +0200715 HA_SPIN_LOCK(DNS_LOCK, &ds->dss->lock);
716
Willy Tarreau62e467c2021-10-20 11:02:13 +0200717 BUG_ON(LIST_INLIST(&ds->waiter));
Willy Tarreau2b718102021-04-21 07:32:39 +0200718 LIST_APPEND(&ds->dss->wait_sess, &ds->waiter);
Emeric Brunfd647d52021-02-12 20:03:38 +0100719
Willy Tarreaudde1b442021-10-21 14:33:38 +0200720 HA_SPIN_UNLOCK(DNS_LOCK, &ds->dss->lock);
721
Emeric Brunfd647d52021-02-12 20:03:38 +0100722 /* awake the task processing the responses */
723 task_wakeup(ds->dss->task_rsp, TASK_WOKEN_INIT);
724
725 break;
726 }
727
Willy Tarreau2b718102021-04-21 07:32:39 +0200728 if (!LIST_INLIST(&ds->waiter)) {
Emeric Brunfd647d52021-02-12 20:03:38 +0100729 /* there is no more pending data to read and the con was closed by the server side */
Christopher Faulet908628c2022-03-25 16:43:49 +0100730 if (!co_data(cs_oc(cs)) && (cs_oc(cs)->flags & CF_SHUTW)) {
Emeric Brunfd647d52021-02-12 20:03:38 +0100731 goto close;
732 }
733 }
734
735 }
736
Emeric Brunfd647d52021-02-12 20:03:38 +0100737 return;
738close:
Christopher Faulet908628c2022-03-25 16:43:49 +0100739 si_shutw(cs->si);
740 si_shutr(cs->si);
741 cs_ic(cs)->flags |= CF_READ_NULL;
Emeric Brunfd647d52021-02-12 20:03:38 +0100742}
743
744void dns_queries_flush(struct dns_session *ds)
745{
746 struct dns_query *query, *queryb;
747
748 list_for_each_entry_safe(query, queryb, &ds->queries, list) {
749 eb32_delete(&query->qid);
Willy Tarreau2b718102021-04-21 07:32:39 +0200750 LIST_DELETE(&query->list);
Emeric Brunfd647d52021-02-12 20:03:38 +0100751 pool_free(dns_query_pool, query);
752 }
753}
754
755void dns_session_free(struct dns_session *ds)
756{
757 if (ds->rx_msg.area)
758 pool_free(dns_msg_buf, ds->rx_msg.area);
759 if (ds->tx_ring_area)
760 pool_free(dns_msg_buf, ds->tx_ring_area);
761 if (ds->task_exp)
762 task_destroy(ds->task_exp);
763
764 dns_queries_flush(ds);
765
Emeric Brund20dc212021-10-19 15:40:10 +0200766 /* Ensure to remove this session from external lists
767 * Note: we are under the lock of dns_stream_server
768 * which own the heads of those lists.
769 */
770 LIST_DEL_INIT(&ds->waiter);
771 LIST_DEL_INIT(&ds->list);
772
Emeric Brunfd647d52021-02-12 20:03:38 +0100773 ds->dss->cur_conns--;
774 /* Note: this is useless to update
775 * max_active_conns here because
776 * we decrease the value
777 */
Willy Tarreau62e467c2021-10-20 11:02:13 +0200778
779 BUG_ON(!LIST_ISEMPTY(&ds->list));
780 BUG_ON(!LIST_ISEMPTY(&ds->waiter));
781 BUG_ON(!LIST_ISEMPTY(&ds->queries));
782 BUG_ON(!LIST_ISEMPTY(&ds->ring.waiters));
783 BUG_ON(!eb_is_empty(&ds->query_ids));
Emeric Brunfd647d52021-02-12 20:03:38 +0100784 pool_free(dns_session_pool, ds);
785}
786
787static struct appctx *dns_session_create(struct dns_session *ds);
788
789/*
790 * Function to release a DNS tcp session
791 */
792static void dns_session_release(struct appctx *appctx)
793{
794 struct dns_session *ds = appctx->ctx.sft.ptr;
Willy Tarreaue3e648c2021-02-24 17:38:46 +0100795 struct dns_stream_server *dss __maybe_unused;
Emeric Brunfd647d52021-02-12 20:03:38 +0100796
797 if (!ds)
798 return;
799
Willy Tarreaub56a8782021-10-20 14:38:43 +0200800 /* We do not call ring_appctx_detach here
801 * because we want to keep readers counters
802 * to retry a conn with a different appctx.
803 */
804 HA_RWLOCK_WRLOCK(DNS_LOCK, &ds->ring.lock);
805 LIST_DEL_INIT(&appctx->wait_entry);
806 HA_RWLOCK_WRUNLOCK(DNS_LOCK, &ds->ring.lock);
807
Emeric Brunfd647d52021-02-12 20:03:38 +0100808 dss = ds->dss;
809
810 HA_SPIN_LOCK(DNS_LOCK, &dss->lock);
811 LIST_DEL_INIT(&ds->list);
812
813 if (stopping) {
814 dns_session_free(ds);
815 HA_SPIN_UNLOCK(DNS_LOCK, &dss->lock);
816 return;
817 }
818
819 if (!ds->nb_queries) {
820 /* this is an idle session */
821 /* Note: this is useless to update max_active_sess
822 * here because we decrease idle_conns but
823 * dns_session_free decrease curconns
824 */
825
826 ds->dss->idle_conns--;
827 dns_session_free(ds);
828 HA_SPIN_UNLOCK(DNS_LOCK, &dss->lock);
829 return;
830 }
831
832 if (ds->onfly_queries == ds->nb_queries) {
833 /* the session can be released because
834 * it means that all queries AND
835 * responses are in fly */
836 dns_session_free(ds);
837 HA_SPIN_UNLOCK(DNS_LOCK, &dss->lock);
838 return;
839 }
840
Emeric Brunfd647d52021-02-12 20:03:38 +0100841 /* if there is no pending complete response
842 * message, ensure to reset
843 * message offsets if the session
844 * was closed with an incomplete pending response
845 */
Willy Tarreau2b718102021-04-21 07:32:39 +0200846 if (!LIST_INLIST(&ds->waiter))
Emeric Brunfd647d52021-02-12 20:03:38 +0100847 ds->rx_msg.len = ds->rx_msg.offset = 0;
848
849 /* we flush pending sent queries because we never
850 * have responses
851 */
852 ds->nb_queries -= ds->onfly_queries;
853 dns_queries_flush(ds);
854
855 /* reset offset to be sure to start from message start */
856 ds->tx_msg_offset = 0;
857
858 /* here the ofs and the attached counter
859 * are kept unchanged
860 */
861
862 /* Create a new appctx, We hope we can
863 * create from the release callback! */
864 ds->appctx = dns_session_create(ds);
865 if (!ds->appctx) {
866 dns_session_free(ds);
867 HA_SPIN_UNLOCK(DNS_LOCK, &dss->lock);
868 return;
869 }
870
871 if (ds->nb_queries < DNS_STREAM_MAX_PIPELINED_REQ)
Willy Tarreau2b718102021-04-21 07:32:39 +0200872 LIST_INSERT(&ds->dss->free_sess, &ds->list);
Emeric Brunfd647d52021-02-12 20:03:38 +0100873
874 HA_SPIN_UNLOCK(DNS_LOCK, &dss->lock);
875}
876
877/* DNS tcp session applet */
878static struct applet dns_session_applet = {
879 .obj_type = OBJ_TYPE_APPLET,
880 .name = "<STRMDNS>", /* used for logging */
881 .fct = dns_session_io_handler,
882 .release = dns_session_release,
883};
884
885/*
886 * Function used to create an appctx for a DNS session
887 */
888static struct appctx *dns_session_create(struct dns_session *ds)
889{
890 struct appctx *appctx;
891 struct session *sess;
Christopher Faulet13a35e52021-12-20 15:34:16 +0100892 struct conn_stream *cs;
Emeric Brunfd647d52021-02-12 20:03:38 +0100893 struct stream *s;
894 struct applet *applet = &dns_session_applet;
Christopher Fauleta9e8b392022-03-23 11:01:09 +0100895 struct sockaddr_storage *addr = NULL;
Emeric Brunfd647d52021-02-12 20:03:38 +0100896
Christopher Faulet9ec2f4d2022-03-23 15:15:29 +0100897 appctx = appctx_new(applet, NULL);
Christopher Faulet2479e5f2022-01-19 14:50:11 +0100898 if (!appctx)
Christopher Fauleta9e8b392022-03-23 11:01:09 +0100899 goto out_close;
Emeric Brunfd647d52021-02-12 20:03:38 +0100900 appctx->ctx.sft.ptr = (void *)ds;
901
902 sess = session_new(ds->dss->srv->proxy, NULL, &appctx->obj_type);
903 if (!sess) {
Christopher Faulet13a35e52021-12-20 15:34:16 +0100904 ha_alert("out of memory in dns_session_create().\n");
Emeric Brunfd647d52021-02-12 20:03:38 +0100905 goto out_free_appctx;
906 }
907
Christopher Fauleta9e8b392022-03-23 11:01:09 +0100908 if (!sockaddr_alloc(&addr, &ds->dss->srv->addr, sizeof(ds->dss->srv->addr)))
Christopher Faulet2479e5f2022-01-19 14:50:11 +0100909 goto out_free_sess;
Christopher Fauleta9e8b392022-03-23 11:01:09 +0100910
Christopher Faulet9ec2f4d2022-03-23 15:15:29 +0100911 cs = cs_new_from_applet(appctx->endp, sess, &BUF_NULL);
Christopher Fauleta9e8b392022-03-23 11:01:09 +0100912 if (!cs) {
913 ha_alert("Failed to initialize stream in dns_session_create().\n");
Christopher Fauleta9e8b392022-03-23 11:01:09 +0100914 goto out_free_addr;
Christopher Faulet13a35e52021-12-20 15:34:16 +0100915 }
916
Christopher Fauleta9e8b392022-03-23 11:01:09 +0100917 s = DISGUISE(cs_strm(cs));
Christopher Faulet8da67aa2022-03-29 17:53:09 +0200918 s->csb->dst = addr;
Christopher Faulet8abe7122022-03-30 15:10:18 +0200919 s->csb->flags |= CS_FL_NOLINGER;
Emeric Brunfd647d52021-02-12 20:03:38 +0100920 s->target = &ds->dss->srv->obj_type;
Emeric Brunfd647d52021-02-12 20:03:38 +0100921 s->flags = SF_ASSIGNED|SF_ADDR_SET;
Emeric Brunfd647d52021-02-12 20:03:38 +0100922
923 s->do_log = NULL;
924 s->uniq_id = 0;
925
926 s->res.flags |= CF_READ_DONTWAIT;
927 /* for rto and rex to eternity to not expire on idle recv:
928 * We are using a syslog server.
929 */
930 s->res.rto = TICK_ETERNITY;
931 s->res.rex = TICK_ETERNITY;
932 ds->appctx = appctx;
Emeric Brunfd647d52021-02-12 20:03:38 +0100933 return appctx;
934
935 /* Error unrolling */
Christopher Fauleta9e8b392022-03-23 11:01:09 +0100936 out_free_addr:
937 sockaddr_free(&addr);
Emeric Brunfd647d52021-02-12 20:03:38 +0100938 out_free_sess:
939 session_free(sess);
940 out_free_appctx:
941 appctx_free(appctx);
942 out_close:
943 return NULL;
944}
945
946/* Task processing expiration of unresponded queries, this one is supposed
947 * to be stuck on the same thread than the appctx handler
948 */
Willy Tarreau144f84a2021-03-02 16:09:26 +0100949static struct task *dns_process_query_exp(struct task *t, void *context, unsigned int state)
Emeric Brunfd647d52021-02-12 20:03:38 +0100950{
951 struct dns_session *ds = (struct dns_session *)context;
952 struct dns_query *query, *queryb;
953
954 t->expire = TICK_ETERNITY;
955
956 list_for_each_entry_safe(query, queryb, &ds->queries, list) {
957 if (tick_is_expired(query->expire, now_ms)) {
958 eb32_delete(&query->qid);
Willy Tarreau2b718102021-04-21 07:32:39 +0200959 LIST_DELETE(&query->list);
Emeric Brunfd647d52021-02-12 20:03:38 +0100960 pool_free(dns_query_pool, query);
961 ds->onfly_queries--;
962 }
963 else {
964 t->expire = query->expire;
965 break;
966 }
967 }
968
969 return t;
970}
971
972/* Task processing expiration of idle sessions */
Willy Tarreau144f84a2021-03-02 16:09:26 +0100973static struct task *dns_process_idle_exp(struct task *t, void *context, unsigned int state)
Emeric Brunfd647d52021-02-12 20:03:38 +0100974{
975 struct dns_stream_server *dss = (struct dns_stream_server *)context;
976 struct dns_session *ds, *dsb;
977 int target = 0;
978 int cur_active_conns;
979
980 HA_SPIN_LOCK(DNS_LOCK, &dss->lock);
981
982
983 cur_active_conns = dss->cur_conns - dss->idle_conns;
984 if (cur_active_conns > dss->max_active_conns)
985 dss->max_active_conns = cur_active_conns;
986
987 target = (dss->max_active_conns - cur_active_conns) / 2;
988 list_for_each_entry_safe(ds, dsb, &dss->idle_sess, list) {
989 if (!target)
990 break;
991
992 /* remove conn to pending list to ensure it won't be reused */
993 LIST_DEL_INIT(&ds->list);
994
995 /* force session shutdown */
996 ds->shutdown = 1;
997
Ilya Shipitsin0de36ad2021-02-20 00:23:36 +0500998 /* to be sure that the appctx won't miss shutdown */
Emeric Brunfd647d52021-02-12 20:03:38 +0100999 __ha_barrier_store();
1000
1001 /* wake appctx to perform the shutdown */
1002 appctx_wakeup(ds->appctx);
1003 }
1004
1005 /* reset max to current active conns */
1006 dss->max_active_conns = cur_active_conns;
1007
1008 HA_SPIN_UNLOCK(DNS_LOCK, &dss->lock);
1009
1010 t->expire = tick_add(now_ms, 5000);
1011
1012 return t;
1013}
1014
1015struct dns_session *dns_session_new(struct dns_stream_server *dss)
1016{
1017 struct dns_session *ds;
1018
1019 if (dss->maxconn && (dss->maxconn <= dss->cur_conns))
1020 return NULL;
1021
1022 ds = pool_alloc(dns_session_pool);
1023 if (!ds)
1024 return NULL;
1025
1026 ds->ofs = ~0;
1027 ds->dss = dss;
1028 LIST_INIT(&ds->list);
1029 LIST_INIT(&ds->queries);
1030 LIST_INIT(&ds->waiter);
1031 ds->rx_msg.offset = ds->rx_msg.len = 0;
1032 ds->rx_msg.area = NULL;
1033 ds->tx_ring_area = NULL;
1034 ds->task_exp = NULL;
1035 ds->appctx = NULL;
1036 ds->shutdown = 0;
1037 ds->nb_queries = 0;
1038 ds->query_ids = EB_ROOT_UNIQUE;
1039 ds->rx_msg.area = pool_alloc(dns_msg_buf);
1040 if (!ds->rx_msg.area)
1041 goto error;
1042
1043 ds->tx_ring_area = pool_alloc(dns_msg_buf);
1044 if (!ds->tx_ring_area)
1045 goto error;
1046
1047 ring_init(&ds->ring, ds->tx_ring_area, DNS_TCP_MSG_RING_MAX_SIZE);
Christopher Faulet1a1b6742021-03-04 16:53:27 +01001048 /* never fail because it is the first watcher attached to the ring */
1049 DISGUISE(ring_attach(&ds->ring));
Emeric Brunfd647d52021-02-12 20:03:38 +01001050
Willy Tarreaubeeabf52021-10-01 18:23:30 +02001051 if ((ds->task_exp = task_new_here()) == NULL)
Emeric Brunfd647d52021-02-12 20:03:38 +01001052 goto error;
1053
1054 ds->task_exp->process = dns_process_query_exp;
1055 ds->task_exp->context = ds;
1056
1057 ds->appctx = dns_session_create(ds);
1058 if (!ds->appctx)
1059 goto error;
1060
1061 dss->cur_conns++;
1062
1063 return ds;
1064
1065error:
1066 if (ds->task_exp)
1067 task_destroy(ds->task_exp);
1068 if (ds->rx_msg.area)
1069 pool_free(dns_msg_buf, ds->rx_msg.area);
1070 if (ds->tx_ring_area)
1071 pool_free(dns_msg_buf, ds->tx_ring_area);
1072
1073 pool_free(dns_session_pool, ds);
1074
1075 return NULL;
1076}
1077
1078/*
1079 * Task used to consume pending messages from nameserver ring
1080 * and forward them to dns_session ring.
1081 * Note: If no slot found a new dns_session is allocated
1082 */
Willy Tarreau144f84a2021-03-02 16:09:26 +01001083static struct task *dns_process_req(struct task *t, void *context, unsigned int state)
Emeric Brunfd647d52021-02-12 20:03:38 +01001084{
1085 struct dns_nameserver *ns = (struct dns_nameserver *)context;
1086 struct dns_stream_server *dss = ns->stream;
1087 struct ring *ring = dss->ring_req;
1088 struct buffer *buf = &ring->buf;
1089 uint64_t msg_len;
1090 size_t len, cnt, ofs;
1091 struct dns_session *ds, *ads;
1092 HA_SPIN_LOCK(DNS_LOCK, &dss->lock);
1093
1094 ofs = dss->ofs_req;
1095
1096 HA_RWLOCK_RDLOCK(DNS_LOCK, &ring->lock);
1097
1098 /* explanation for the initialization below: it would be better to do
1099 * this in the parsing function but this would occasionally result in
1100 * dropped events because we'd take a reference on the oldest message
1101 * and keep it while being scheduled. Thus instead let's take it the
1102 * first time we enter here so that we have a chance to pass many
1103 * existing messages before grabbing a reference to a location. This
1104 * value cannot be produced after initialization.
1105 */
1106 if (unlikely(ofs == ~0)) {
1107 ofs = 0;
Willy Tarreau4781b152021-04-06 13:53:36 +02001108 HA_ATOMIC_INC(b_peek(buf, ofs));
Emeric Brunfd647d52021-02-12 20:03:38 +01001109 ofs += ring->ofs;
1110 }
1111
1112 /* we were already there, adjust the offset to be relative to
1113 * the buffer's head and remove us from the counter.
1114 */
1115 ofs -= ring->ofs;
1116 BUG_ON(ofs >= buf->size);
Willy Tarreau4781b152021-04-06 13:53:36 +02001117 HA_ATOMIC_DEC(b_peek(buf, ofs));
Emeric Brunfd647d52021-02-12 20:03:38 +01001118
1119 while (ofs + 1 < b_data(buf)) {
1120 struct ist myist;
1121
1122 cnt = 1;
1123 len = b_peek_varint(buf, ofs + cnt, &msg_len);
1124 if (!len)
1125 break;
1126 cnt += len;
1127 BUG_ON(msg_len + ofs + cnt + 1 > b_data(buf));
1128 if (unlikely(msg_len > DNS_TCP_MSG_MAX_SIZE)) {
1129 /* too large a message to ever fit, let's skip it */
1130 ofs += cnt + msg_len;
1131 continue;
1132 }
1133
1134 len = b_getblk(buf, dns_msg_trash, msg_len, ofs + cnt);
1135
Tim Duesterhus92c696e2021-02-28 16:11:36 +01001136 myist = ist2(dns_msg_trash, len);
Emeric Brunfd647d52021-02-12 20:03:38 +01001137
1138 ads = NULL;
Ilya Shipitsin0de36ad2021-02-20 00:23:36 +05001139 /* try to push request into active sess with free slot */
Emeric Brunfd647d52021-02-12 20:03:38 +01001140 if (!LIST_ISEMPTY(&dss->free_sess)) {
1141 ds = LIST_NEXT(&dss->free_sess, struct dns_session *, list);
1142
1143 if (ring_write(&ds->ring, DNS_TCP_MSG_MAX_SIZE, NULL, 0, &myist, 1) > 0) {
1144 ds->nb_queries++;
1145 if (ds->nb_queries >= DNS_STREAM_MAX_PIPELINED_REQ)
1146 LIST_DEL_INIT(&ds->list);
1147 ads = ds;
1148 }
1149 else {
1150 /* it means we were unable to put a request in this slot,
1151 * it may be close to be full so we put it at the end
1152 * of free conn list */
1153 LIST_DEL_INIT(&ds->list);
Willy Tarreau2b718102021-04-21 07:32:39 +02001154 LIST_APPEND(&dss->free_sess, &ds->list);
Emeric Brunfd647d52021-02-12 20:03:38 +01001155 }
1156 }
1157
1158 if (!ads) {
Ilya Shipitsin0de36ad2021-02-20 00:23:36 +05001159 /* try to push request into idle, this one should have enough free space */
Emeric Brunfd647d52021-02-12 20:03:38 +01001160 if (!LIST_ISEMPTY(&dss->idle_sess)) {
1161 ds = LIST_NEXT(&dss->idle_sess, struct dns_session *, list);
1162
1163 /* ring is empty so this ring_write should never fail */
1164 ring_write(&ds->ring, DNS_TCP_MSG_MAX_SIZE, NULL, 0, &myist, 1);
1165 ds->nb_queries++;
1166 LIST_DEL_INIT(&ds->list);
1167
1168 ds->dss->idle_conns--;
1169
1170 /* we may have to update the max_active_conns */
1171 if (ds->dss->max_active_conns < ds->dss->cur_conns - ds->dss->idle_conns)
1172 ds->dss->max_active_conns = ds->dss->cur_conns - ds->dss->idle_conns;
1173
1174 /* since we may unable to find a free list to handle
1175 * this request, this request may be large and fill
1176 * the ring buffer so we prefer to put at the end of free
1177 * list. */
Willy Tarreau2b718102021-04-21 07:32:39 +02001178 LIST_APPEND(&dss->free_sess, &ds->list);
Emeric Brunfd647d52021-02-12 20:03:38 +01001179 ads = ds;
1180 }
1181 }
1182
Ilya Shipitsin0de36ad2021-02-20 00:23:36 +05001183 /* we didn't find a session available with large enough room */
Emeric Brunfd647d52021-02-12 20:03:38 +01001184 if (!ads) {
1185 /* allocate a new session */
1186 ads = dns_session_new(dss);
1187 if (ads) {
1188 /* ring is empty so this ring_write should never fail */
1189 ring_write(&ads->ring, DNS_TCP_MSG_MAX_SIZE, NULL, 0, &myist, 1);
1190 ads->nb_queries++;
Willy Tarreau2b718102021-04-21 07:32:39 +02001191 LIST_INSERT(&dss->free_sess, &ads->list);
Emeric Brunfd647d52021-02-12 20:03:38 +01001192 }
1193 else
1194 ns->counters->snd_error++;
1195 }
1196
1197 if (ads)
1198 ns->counters->sent++;
1199
1200 ofs += cnt + len;
1201 }
1202
Willy Tarreau4781b152021-04-06 13:53:36 +02001203 HA_ATOMIC_INC(b_peek(buf, ofs));
Emeric Brunfd647d52021-02-12 20:03:38 +01001204 ofs += ring->ofs;
1205 dss->ofs_req = ofs;
1206 HA_RWLOCK_RDUNLOCK(DNS_LOCK, &ring->lock);
1207
1208
1209 HA_SPIN_UNLOCK(DNS_LOCK, &dss->lock);
1210 return t;
1211}
1212
1213/*
1214 * Task used to consume response
1215 * Note: upper layer callback is called
1216 */
Willy Tarreau144f84a2021-03-02 16:09:26 +01001217static struct task *dns_process_rsp(struct task *t, void *context, unsigned int state)
Emeric Brunfd647d52021-02-12 20:03:38 +01001218{
1219 struct dns_nameserver *ns = (struct dns_nameserver *)context;
1220
1221 ns->process_responses(ns);
1222
1223 return t;
1224}
1225
1226/* Function used to initialize an TCP nameserver */
1227int dns_stream_init(struct dns_nameserver *ns, struct server *srv)
1228{
1229 struct dns_stream_server *dss = NULL;
1230
1231 dss = calloc(1, sizeof(*dss));
1232 if (!dss) {
1233 ha_alert("memory allocation error initializing dns tcp server '%s'.\n", srv->id);
1234 goto out;
1235 }
1236
1237 dss->srv = srv;
1238 dss->maxconn = srv->maxconn;
1239
1240 dss->ofs_req = ~0; /* init ring offset */
1241 dss->ring_req = ring_new(2*DNS_TCP_MSG_RING_MAX_SIZE);
1242 if (!dss->ring_req) {
1243 ha_alert("memory allocation error initializing the ring for dns tcp server '%s'.\n", srv->id);
1244 goto out;
1245 }
1246 /* Create the task associated to the resolver target handling conns */
Willy Tarreaubeeabf52021-10-01 18:23:30 +02001247 if ((dss->task_req = task_new_anywhere()) == NULL) {
Emeric Brunfd647d52021-02-12 20:03:38 +01001248 ha_alert("memory allocation error initializing the ring for dns tcp server '%s'.\n", srv->id);
1249 goto out;
1250 }
1251
1252 /* Update task's parameters */
1253 dss->task_req->process = dns_process_req;
1254 dss->task_req->context = ns;
1255
1256 /* attach the task as reader */
1257 if (!ring_attach(dss->ring_req)) {
1258 /* mark server attached to the ring */
1259 ha_alert("server '%s': too many watchers for ring. this should never happen.\n", srv->id);
1260 goto out;
1261 }
1262
1263 /* Create the task associated to the resolver target handling conns */
Willy Tarreaubeeabf52021-10-01 18:23:30 +02001264 if ((dss->task_rsp = task_new_anywhere()) == NULL) {
Emeric Brunfd647d52021-02-12 20:03:38 +01001265 ha_alert("memory allocation error initializing the ring for dns tcp server '%s'.\n", srv->id);
1266 goto out;
1267 }
1268
1269 /* Update task's parameters */
1270 dss->task_rsp->process = dns_process_rsp;
1271 dss->task_rsp->context = ns;
1272
1273 /* Create the task associated to the resolver target handling conns */
Willy Tarreaubeeabf52021-10-01 18:23:30 +02001274 if ((dss->task_idle = task_new_anywhere()) == NULL) {
Emeric Brunfd647d52021-02-12 20:03:38 +01001275 ha_alert("memory allocation error initializing the ring for dns tcp server '%s'.\n", srv->id);
1276 goto out;
1277 }
1278
1279 /* Update task's parameters */
1280 dss->task_idle->process = dns_process_idle_exp;
1281 dss->task_idle->context = dss;
1282 dss->task_idle->expire = tick_add(now_ms, 5000);
1283
Ilya Shipitsin0de36ad2021-02-20 00:23:36 +05001284 /* let start the task to free idle conns immediately */
Emeric Brunfd647d52021-02-12 20:03:38 +01001285 task_queue(dss->task_idle);
1286
1287 LIST_INIT(&dss->free_sess);
1288 LIST_INIT(&dss->idle_sess);
1289 LIST_INIT(&dss->wait_sess);
1290 HA_SPIN_INIT(&dss->lock);
1291 ns->stream = dss;
1292 return 0;
1293out:
1294 if (dss && dss->task_rsp)
1295 task_destroy(dss->task_rsp);
1296 if (dss && dss->task_req)
1297 task_destroy(dss->task_req);
1298 if (dss && dss->ring_req)
1299 ring_free(dss->ring_req);
1300
1301 free(dss);
Emeric Brunc9437992021-02-12 19:42:55 +01001302 return -1;
Christopher Faulet67957bd2017-09-27 11:00:59 +02001303}
1304
Emeric Brunc9437992021-02-12 19:42:55 +01001305int init_dns_buffers()
Baptiste Assmann325137d2015-04-13 23:40:55 +02001306{
Emeric Brunc9437992021-02-12 19:42:55 +01001307 dns_msg_trash = malloc(DNS_TCP_MSG_MAX_SIZE);
1308 if (!dns_msg_trash)
1309 return 0;
Baptiste Assmann325137d2015-04-13 23:40:55 +02001310
Emeric Brunc9437992021-02-12 19:42:55 +01001311 return 1;
1312}
Baptiste Assmannc1ce5f32016-05-14 11:26:22 +02001313
Emeric Brunc9437992021-02-12 19:42:55 +01001314void deinit_dns_buffers()
1315{
Willy Tarreau61cfdf42021-02-20 10:46:51 +01001316 ha_free(&dns_msg_trash);
Emeric Brunc9437992021-02-12 19:42:55 +01001317}
Emeric Brund26a6232021-01-04 13:32:20 +01001318
1319REGISTER_PER_THREAD_ALLOC(init_dns_buffers);
1320REGISTER_PER_THREAD_FREE(deinit_dns_buffers);