blob: fa6f2b907363f324403fca3d577270bdf37dcbcb [file] [log] [blame]
Baptiste Assmann325137d2015-04-13 23:40:55 +02001/*
2 * Name server resolution
3 *
Willy Tarreau714f3452021-05-09 06:47:26 +02004 * Copyright 2020 HAProxy Technologies
Baptiste Assmann325137d2015-04-13 23:40:55 +02005 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 *
11 */
12
13#include <errno.h>
14#include <fcntl.h>
15#include <stdio.h>
16#include <stdlib.h>
17#include <string.h>
18#include <unistd.h>
19
20#include <sys/types.h>
21
Willy Tarreau122eba92020-06-04 10:15:32 +020022#include <haproxy/action.h>
Willy Tarreau4c7e4b72020-05-27 12:58:42 +020023#include <haproxy/api.h>
Willy Tarreau6be78492020-06-05 00:00:29 +020024#include <haproxy/cfgparse.h>
Willy Tarreauf1d32c42020-06-04 21:07:02 +020025#include <haproxy/channel.h>
Willy Tarreaub2551052020-06-09 09:07:15 +020026#include <haproxy/check.h>
Willy Tarreau83487a82020-06-04 20:19:54 +020027#include <haproxy/cli.h>
Willy Tarreau7c18b542020-06-11 09:23:02 +020028#include <haproxy/dgram.h>
Willy Tarreaueb92deb2020-06-04 10:53:16 +020029#include <haproxy/dns.h>
Willy Tarreau8d366972020-05-27 16:10:29 +020030#include <haproxy/errors.h>
Willy Tarreaub2551052020-06-09 09:07:15 +020031#include <haproxy/fd.h>
Willy Tarreauaeed4a82020-06-04 22:01:04 +020032#include <haproxy/log.h>
Emeric Brund26a6232021-01-04 13:32:20 +010033#include <haproxy/ring.h>
Emeric Brunfd647d52021-02-12 20:03:38 +010034#include <haproxy/stream.h>
35#include <haproxy/stream_interface.h>
Willy Tarreau9f9e9fc2021-05-08 13:09:46 +020036#include <haproxy/tools.h>
Baptiste Assmann325137d2015-04-13 23:40:55 +020037
Emeric Brund26a6232021-01-04 13:32:20 +010038static THREAD_LOCAL char *dns_msg_trash;
Baptiste Assmann325137d2015-04-13 23:40:55 +020039
Emeric Brunfd647d52021-02-12 20:03:38 +010040DECLARE_STATIC_POOL(dns_session_pool, "dns_session", sizeof(struct dns_session));
41DECLARE_STATIC_POOL(dns_query_pool, "dns_query", sizeof(struct dns_query));
42DECLARE_STATIC_POOL(dns_msg_buf, "dns_msg_buf", DNS_TCP_MSG_RING_MAX_SIZE);
43
Christopher Faulet67957bd2017-09-27 11:00:59 +020044/* Opens an UDP socket on the namesaver's IP/Port, if required. Returns 0 on
Christopher Faulet1e711be2021-03-04 16:58:35 +010045 * success, -1 otherwise. ns->dgram must be defined.
Baptiste Assmann325137d2015-04-13 23:40:55 +020046 */
Emeric Brund26a6232021-01-04 13:32:20 +010047static int dns_connect_nameserver(struct dns_nameserver *ns)
Baptiste Assmann325137d2015-04-13 23:40:55 +020048{
Christopher Faulet1e711be2021-03-04 16:58:35 +010049 struct dgram_conn *dgram = &ns->dgram->conn;
50 int fd;
Baptiste Assmann325137d2015-04-13 23:40:55 +020051
Christopher Faulet1e711be2021-03-04 16:58:35 +010052 /* Already connected */
53 if (dgram->t.sock.fd != -1)
Emeric Brun526b7922021-02-15 14:28:27 +010054 return 0;
Christopher Faulet1e711be2021-03-04 16:58:35 +010055
56 /* Create an UDP socket and connect it on the nameserver's IP/Port */
57 if ((fd = socket(dgram->addr.to.ss_family, SOCK_DGRAM, IPPROTO_UDP)) == -1) {
58 send_log(NULL, LOG_WARNING,
59 "DNS : section '%s': can't create socket for nameserver '%s'.\n",
60 ns->counters->pid, ns->id);
61 return -1;
62 }
63 if (connect(fd, (struct sockaddr*)&dgram->addr.to, get_addr_len(&dgram->addr.to)) == -1) {
64 send_log(NULL, LOG_WARNING,
65 "DNS : section '%s': can't connect socket for nameserver '%s'.\n",
66 ns->counters->id, ns->id);
67 close(fd);
68 return -1;
Emeric Brunc9437992021-02-12 19:42:55 +010069 }
Emeric Brun526b7922021-02-15 14:28:27 +010070
Christopher Faulet1e711be2021-03-04 16:58:35 +010071 /* Make the socket non blocking */
72 fcntl(fd, F_SETFL, O_NONBLOCK);
73
74 /* Add the fd in the fd list and update its parameters */
75 dgram->t.sock.fd = fd;
76 fd_insert(fd, dgram, dgram_fd_handler, MAX_THREADS_MASK);
77 fd_want_recv(fd);
78 return 0;
Baptiste Assmann325137d2015-04-13 23:40:55 +020079}
80
Emeric Brund26a6232021-01-04 13:32:20 +010081/* Sends a message to a name server
82 * It returns message length on success
83 * or -1 in error case
84 * 0 is returned in case of output ring buffer is full
85 */
86int dns_send_nameserver(struct dns_nameserver *ns, void *buf, size_t len)
87{
88 int ret = -1;
89
90 if (ns->dgram) {
91 struct dgram_conn *dgram = &ns->dgram->conn;
92 int fd = dgram->t.sock.fd;
93
94 if (dgram->t.sock.fd == -1) {
95 if (dns_connect_nameserver(ns) == -1)
96 return -1;
97 fd = dgram->t.sock.fd;
98 }
99
100 ret = send(fd, buf, len, 0);
101 if (ret < 0) {
102 if (errno == EAGAIN) {
103 struct ist myist;
104
Tim Duesterhus92c696e2021-02-28 16:11:36 +0100105 myist = ist2(buf, len);
Emeric Brund26a6232021-01-04 13:32:20 +0100106 ret = ring_write(ns->dgram->ring_req, DNS_TCP_MSG_MAX_SIZE, NULL, 0, &myist, 1);
107 if (!ret) {
108 ns->counters->snd_error++;
109 return -1;
110 }
111 fd_cant_send(fd);
112 return ret;
113 }
114 ns->counters->snd_error++;
115 fd_delete(fd);
Emeric Brund26a6232021-01-04 13:32:20 +0100116 dgram->t.sock.fd = -1;
117 return -1;
118 }
119 ns->counters->sent++;
120 }
Emeric Brunfd647d52021-02-12 20:03:38 +0100121 else if (ns->stream) {
122 struct ist myist;
123
Tim Duesterhus92c696e2021-02-28 16:11:36 +0100124 myist = ist2(buf, len);
Emeric Brunfd647d52021-02-12 20:03:38 +0100125 ret = ring_write(ns->stream->ring_req, DNS_TCP_MSG_MAX_SIZE, NULL, 0, &myist, 1);
126 if (!ret) {
127 ns->counters->snd_error++;
128 return -1;
129 }
130 task_wakeup(ns->stream->task_req, TASK_WOKEN_MSG);
131 return ret;
132 }
Emeric Brund26a6232021-01-04 13:32:20 +0100133
134 return ret;
135}
136
Emeric Brunfd647d52021-02-12 20:03:38 +0100137void dns_session_free(struct dns_session *);
138
Emeric Brund26a6232021-01-04 13:32:20 +0100139/* Receives a dns message
140 * Returns message length
141 * 0 is returned if no more message available
142 * -1 in error case
143 */
144ssize_t dns_recv_nameserver(struct dns_nameserver *ns, void *data, size_t size)
145{
146 ssize_t ret = -1;
147
148 if (ns->dgram) {
149 struct dgram_conn *dgram = &ns->dgram->conn;
150 int fd = dgram->t.sock.fd;
151
152 if (fd == -1)
153 return -1;
154
155 if ((ret = recv(fd, data, size, 0)) < 0) {
156 if (errno == EAGAIN) {
157 fd_cant_recv(fd);
158 return 0;
159 }
160 fd_delete(fd);
Emeric Brund26a6232021-01-04 13:32:20 +0100161 dgram->t.sock.fd = -1;
162 return -1;
163 }
164 }
Emeric Brunfd647d52021-02-12 20:03:38 +0100165 else if (ns->stream) {
166 struct dns_stream_server *dss = ns->stream;
167 struct dns_session *ds;
168
169 HA_SPIN_LOCK(DNS_LOCK, &dss->lock);
170
171 if (!LIST_ISEMPTY(&dss->wait_sess)) {
172 ds = LIST_NEXT(&dss->wait_sess, struct dns_session *, waiter);
Emeric Brunfd647d52021-02-12 20:03:38 +0100173 ret = ds->rx_msg.len < size ? ds->rx_msg.len : size;
174 memcpy(data, ds->rx_msg.area, ret);
175
176 ds->rx_msg.len = 0;
177
178 /* This barrier is here to ensure that all data is
179 * stored if the appctx detect the elem is out of the list */
180 __ha_barrier_store();
181
182 LIST_DEL_INIT(&ds->waiter);
183
184 if (ds->appctx) {
185 /* This second barrier is here to ensure that
186 * the waked up appctx won't miss that the
187 * elem is removed from the list */
188 __ha_barrier_store();
189
Ilya Shipitsin0de36ad2021-02-20 00:23:36 +0500190 /* awake appctx because it may have other
Emeric Brunfd647d52021-02-12 20:03:38 +0100191 * message to receive
192 */
193 appctx_wakeup(ds->appctx);
194
195 /* dns_session could already be into free_sess list
196 * so we firstly remove it */
197 LIST_DEL_INIT(&ds->list);
198
199 /* decrease nb_queries to free a slot for a new query on that sess */
200 ds->nb_queries--;
201 if (ds->nb_queries) {
202 /* it remains pipelined unanswered request
203 * into this session but we just decrease
204 * the counter so the session
205 * can not be full of pipelined requests
206 * so we can add if to free_sess list
207 * to receive a new request
208 */
Willy Tarreau2b718102021-04-21 07:32:39 +0200209 LIST_INSERT(&ds->dss->free_sess, &ds->list);
Emeric Brunfd647d52021-02-12 20:03:38 +0100210 }
211 else {
212 /* there is no more pipelined requests
213 * into this session, so we move it
214 * to idle_sess list */
Willy Tarreau2b718102021-04-21 07:32:39 +0200215 LIST_INSERT(&ds->dss->idle_sess, &ds->list);
Emeric Brunfd647d52021-02-12 20:03:38 +0100216
217 /* update the counter of idle sessions */
218 ds->dss->idle_conns++;
219
220 /* Note: this is useless there to update
221 * the max_active_conns since we increase
222 * the idle count */
223 }
224 }
225 else {
226 /* there is no more appctx for this session
227 * it means it is ready to die
228 */
229 dns_session_free(ds);
230 }
231
232
233 }
234
235 HA_SPIN_UNLOCK(DNS_LOCK, &dss->lock);
236 }
Emeric Brund26a6232021-01-04 13:32:20 +0100237
238 return ret;
239}
240
241static void dns_resolve_recv(struct dgram_conn *dgram)
242{
243 struct dns_nameserver *ns;
244 int fd;
245
246 fd = dgram->t.sock.fd;
247
248 /* check if ready for reading */
249 if (!fd_recv_ready(fd))
250 return;
251
252 /* no need to go further if we can't retrieve the nameserver */
253 if ((ns = dgram->owner) == NULL) {
Willy Tarreauf5090652021-04-06 17:23:40 +0200254 _HA_ATOMIC_AND(&fdtab[fd].state, ~(FD_POLL_HUP|FD_POLL_ERR));
Emeric Brund26a6232021-01-04 13:32:20 +0100255 fd_stop_recv(fd);
256 return;
257 }
258
259 ns->process_responses(ns);
260}
261
262/* Called when a dns network socket is ready to send data */
263static void dns_resolve_send(struct dgram_conn *dgram)
264{
265 int fd;
266 struct dns_nameserver *ns;
267 struct ring *ring;
268 struct buffer *buf;
269 uint64_t msg_len;
270 size_t len, cnt, ofs;
271
272 fd = dgram->t.sock.fd;
273
274 /* check if ready for sending */
275 if (!fd_send_ready(fd))
276 return;
277
278 /* no need to go further if we can't retrieve the nameserver */
279 if ((ns = dgram->owner) == NULL) {
Willy Tarreauf5090652021-04-06 17:23:40 +0200280 _HA_ATOMIC_AND(&fdtab[fd].state, ~(FD_POLL_HUP|FD_POLL_ERR));
Emeric Brund26a6232021-01-04 13:32:20 +0100281 fd_stop_send(fd);
282 return;
283 }
284
285 ring = ns->dgram->ring_req;
286 buf = &ring->buf;
287
288 HA_RWLOCK_RDLOCK(DNS_LOCK, &ring->lock);
289 ofs = ns->dgram->ofs_req;
290
291 /* explanation for the initialization below: it would be better to do
292 * this in the parsing function but this would occasionally result in
293 * dropped events because we'd take a reference on the oldest message
294 * and keep it while being scheduled. Thus instead let's take it the
295 * first time we enter here so that we have a chance to pass many
296 * existing messages before grabbing a reference to a location. This
297 * value cannot be produced after initialization.
298 */
299 if (unlikely(ofs == ~0)) {
300 ofs = 0;
Willy Tarreau4781b152021-04-06 13:53:36 +0200301 HA_ATOMIC_INC(b_peek(buf, ofs));
Emeric Brund26a6232021-01-04 13:32:20 +0100302 ofs += ring->ofs;
303 }
304
305 /* we were already there, adjust the offset to be relative to
306 * the buffer's head and remove us from the counter.
307 */
308 ofs -= ring->ofs;
309 BUG_ON(ofs >= buf->size);
Willy Tarreau4781b152021-04-06 13:53:36 +0200310 HA_ATOMIC_DEC(b_peek(buf, ofs));
Emeric Brund26a6232021-01-04 13:32:20 +0100311
312 while (ofs + 1 < b_data(buf)) {
313 int ret;
314
315 cnt = 1;
316 len = b_peek_varint(buf, ofs + cnt, &msg_len);
317 if (!len)
318 break;
319 cnt += len;
320 BUG_ON(msg_len + ofs + cnt + 1 > b_data(buf));
321 if (unlikely(msg_len > DNS_TCP_MSG_MAX_SIZE)) {
322 /* too large a message to ever fit, let's skip it */
323 ofs += cnt + msg_len;
324 continue;
325 }
326
327 len = b_getblk(buf, dns_msg_trash, msg_len, ofs + cnt);
328
329 ret = send(fd, dns_msg_trash, len, 0);
330 if (ret < 0) {
331 if (errno == EAGAIN) {
332 fd_cant_send(fd);
333 goto out;
334 }
335 ns->counters->snd_error++;
336 fd_delete(fd);
Emeric Brund26a6232021-01-04 13:32:20 +0100337 fd = dgram->t.sock.fd = -1;
338 goto out;
339 }
340 ns->counters->sent++;
341
342 ofs += cnt + len;
343 }
344
345 /* we don't want/need to be waked up any more for sending
346 * because all ring content is sent */
347 fd_stop_send(fd);
348
349out:
350
Willy Tarreau4781b152021-04-06 13:53:36 +0200351 HA_ATOMIC_INC(b_peek(buf, ofs));
Emeric Brund26a6232021-01-04 13:32:20 +0100352 ofs += ring->ofs;
353 ns->dgram->ofs_req = ofs;
354 HA_RWLOCK_RDUNLOCK(DNS_LOCK, &ring->lock);
355
356}
357
Emeric Brunc9437992021-02-12 19:42:55 +0100358/* proto_udp callback functions for a DNS resolution */
359struct dgram_data_cb dns_dgram_cb = {
360 .recv = dns_resolve_recv,
361 .send = dns_resolve_send,
362};
Baptiste Assmann325137d2015-04-13 23:40:55 +0200363
Emeric Brunc9437992021-02-12 19:42:55 +0100364int dns_dgram_init(struct dns_nameserver *ns, struct sockaddr_storage *sk)
Baptiste Assmann325137d2015-04-13 23:40:55 +0200365{
Emeric Brunc9437992021-02-12 19:42:55 +0100366 struct dns_dgram_server *dgram;
Baptiste Assmann201c07f2017-05-22 15:17:15 +0200367
Emeric Brunc9437992021-02-12 19:42:55 +0100368 if ((dgram = calloc(1, sizeof(*dgram))) == NULL)
Christopher Faulet67957bd2017-09-27 11:00:59 +0200369 return -1;
Baptiste Assmannc1ce5f32016-05-14 11:26:22 +0200370
Emeric Brunc9437992021-02-12 19:42:55 +0100371 /* Leave dgram partially initialized, no FD attached for
372 * now. */
373 dgram->conn.owner = ns;
374 dgram->conn.data = &dns_dgram_cb;
375 dgram->conn.t.sock.fd = -1;
376 dgram->conn.addr.to = *sk;
377 ns->dgram = dgram;
Baptiste Assmannc1ce5f32016-05-14 11:26:22 +0200378
Emeric Brunc9437992021-02-12 19:42:55 +0100379 dgram->ofs_req = ~0; /* init ring offset */
380 dgram->ring_req = ring_new(2*DNS_TCP_MSG_RING_MAX_SIZE);
381 if (!dgram->ring_req) {
382 ha_alert("memory allocation error initializing the ring for nameserver.\n");
383 goto out;
Baptiste Assmannc1ce5f32016-05-14 11:26:22 +0200384 }
385
Emeric Brunc9437992021-02-12 19:42:55 +0100386 /* attach the task as reader */
387 if (!ring_attach(dgram->ring_req)) {
388 /* mark server attached to the ring */
389 ha_alert("nameserver sets too many watchers > 255 on ring. This is a bug and should not happen.\n");
390 goto out;
Baptiste Assmannc1ce5f32016-05-14 11:26:22 +0200391 }
Baptiste Assmannc1ce5f32016-05-14 11:26:22 +0200392 return 0;
Emeric Brunc9437992021-02-12 19:42:55 +0100393out:
394 if (dgram->ring_req)
395 ring_free(dgram->ring_req);
Christopher Fauletd6c6b5f2020-09-08 10:27:24 +0200396
Emeric Brunc9437992021-02-12 19:42:55 +0100397 free(dgram);
Olivier Houchard2ec2db92018-01-08 16:28:57 +0100398
Emeric Brunfd647d52021-02-12 20:03:38 +0100399 return -1;
400}
401
402/*
403 * IO Handler to handle message push to dns tcp server
404 */
405static void dns_session_io_handler(struct appctx *appctx)
406{
407 struct stream_interface *si = appctx->owner;
408 struct dns_session *ds = appctx->ctx.sft.ptr;
409 struct ring *ring = &ds->ring;
410 struct buffer *buf = &ring->buf;
411 uint64_t msg_len;
412 int available_room;
413 size_t len, cnt, ofs;
414 int ret = 0;
415
416 /* if stopping was requested, close immediately */
417 if (unlikely(stopping))
418 goto close;
419
420 /* we want to be sure to not miss that we have been awaked for a shutdown */
421 __ha_barrier_load();
422
423 /* that means the connection was requested to shutdown
424 * for instance idle expire */
425 if (ds->shutdown)
426 goto close;
427
428 /* an error was detected */
429 if (unlikely(si_ic(si)->flags & (CF_WRITE_ERROR|CF_SHUTW)))
430 goto close;
431
432 /* con closed by server side, we will skip data write and drain data from channel */
433 if ((si_oc(si)->flags & CF_SHUTW)) {
434 goto read;
435 }
436
437 /* if the connection is not established, inform the stream that we want
438 * to be notified whenever the connection completes.
439 */
440 if (si_opposite(si)->state < SI_ST_EST) {
441 si_cant_get(si);
442 si_rx_conn_blk(si);
443 si_rx_endp_more(si);
444 return;
445 }
446
447
448 ofs = ds->ofs;
449
450 HA_RWLOCK_WRLOCK(DNS_LOCK, &ring->lock);
451 LIST_DEL_INIT(&appctx->wait_entry);
452 HA_RWLOCK_WRUNLOCK(DNS_LOCK, &ring->lock);
453
454 HA_RWLOCK_RDLOCK(DNS_LOCK, &ring->lock);
455
456 /* explanation for the initialization below: it would be better to do
457 * this in the parsing function but this would occasionally result in
458 * dropped events because we'd take a reference on the oldest message
459 * and keep it while being scheduled. Thus instead let's take it the
460 * first time we enter here so that we have a chance to pass many
461 * existing messages before grabbing a reference to a location. This
462 * value cannot be produced after initialization.
463 */
464 if (unlikely(ofs == ~0)) {
465 ofs = 0;
466
Willy Tarreau4781b152021-04-06 13:53:36 +0200467 HA_ATOMIC_INC(b_peek(buf, ofs));
Emeric Brunfd647d52021-02-12 20:03:38 +0100468 ofs += ring->ofs;
469 }
470
471 /* in this loop, ofs always points to the counter byte that precedes
472 * the message so that we can take our reference there if we have to
473 * stop before the end (ret=0).
474 */
475 if (si_opposite(si)->state == SI_ST_EST) {
476 /* we were already there, adjust the offset to be relative to
477 * the buffer's head and remove us from the counter.
478 */
479 ofs -= ring->ofs;
480 BUG_ON(ofs >= buf->size);
Willy Tarreau4781b152021-04-06 13:53:36 +0200481 HA_ATOMIC_DEC(b_peek(buf, ofs));
Emeric Brunfd647d52021-02-12 20:03:38 +0100482
483 ret = 1;
484 while (ofs + 1 < b_data(buf)) {
485 struct dns_query *query;
486 uint16_t original_qid;
487 uint16_t new_qid;
488
489 cnt = 1;
490 len = b_peek_varint(buf, ofs + cnt, &msg_len);
491 if (!len)
492 break;
493 cnt += len;
494 BUG_ON(msg_len + ofs + cnt + 1 > b_data(buf));
495
496 /* retrieve available room on output channel */
497 available_room = channel_recv_max(si_ic(si));
498
499 /* tx_msg_offset null means we are at the start of a new message */
500 if (!ds->tx_msg_offset) {
501 uint16_t slen;
502
503 /* check if there is enough room to put message len and query id */
504 if (available_room < sizeof(slen) + sizeof(new_qid)) {
505 si_rx_room_blk(si);
506 ret = 0;
507 break;
508 }
509
510 /* put msg len into then channel */
511 slen = (uint16_t)msg_len;
512 slen = htons(slen);
513 ci_putblk(si_ic(si), (char *)&slen, sizeof(slen));
514 available_room -= sizeof(slen);
515
516 /* backup original query id */
517 len = b_getblk(buf, (char *)&original_qid, sizeof(original_qid), ofs + cnt);
Emeric Brun538bb042021-02-15 13:58:06 +0100518 if (!len) {
519 /* should never happen since messages are atomically
520 * written into ring
521 */
522 ret = 0;
523 break;
524 }
Emeric Brunfd647d52021-02-12 20:03:38 +0100525
526 /* generates new query id */
527 new_qid = ++ds->query_counter;
528 new_qid = htons(new_qid);
529
530 /* put new query id into the channel */
531 ci_putblk(si_ic(si), (char *)&new_qid, sizeof(new_qid));
532 available_room -= sizeof(new_qid);
533
534 /* keep query id mapping */
535
536 query = pool_alloc(dns_query_pool);
537 if (query) {
538 query->qid.key = new_qid;
539 query->original_qid = original_qid;
540 query->expire = tick_add(now_ms, 5000);
541 LIST_INIT(&query->list);
542 if (LIST_ISEMPTY(&ds->queries)) {
543 /* enable task to handle expire */
544 ds->task_exp->expire = query->expire;
545 /* ensure this will be executed by the same
546 * thread than ds_session_release
547 * to ensure session_release is free
548 * to destroy the task */
549 task_queue(ds->task_exp);
550 }
Willy Tarreau2b718102021-04-21 07:32:39 +0200551 LIST_APPEND(&ds->queries, &query->list);
Emeric Brunfd647d52021-02-12 20:03:38 +0100552 eb32_insert(&ds->query_ids, &query->qid);
553 ds->onfly_queries++;
554 }
555
556 /* update the tx_offset to handle output in 16k streams */
557 ds->tx_msg_offset = sizeof(original_qid);
558
559 }
560
561 /* check if it remains available room on output chan */
562 if (unlikely(!available_room)) {
563 si_rx_room_blk(si);
564 ret = 0;
565 break;
566 }
567
568 chunk_reset(&trash);
569 if ((msg_len - ds->tx_msg_offset) > available_room) {
570 /* remaining msg data is too large to be written in output channel at one time */
571
572 len = b_getblk(buf, trash.area, available_room, ofs + cnt + ds->tx_msg_offset);
573
574 /* update offset to complete mesg forwarding later */
575 ds->tx_msg_offset += len;
576 }
577 else {
578 /* remaining msg data can be written in output channel at one time */
579 len = b_getblk(buf, trash.area, msg_len - ds->tx_msg_offset, ofs + cnt + ds->tx_msg_offset);
580
581 /* reset tx_msg_offset to mark forward fully processed */
582 ds->tx_msg_offset = 0;
583 }
584 trash.data += len;
585
Emeric Brun743afee2021-02-15 14:12:06 +0100586 if (ci_putchk(si_ic(si), &trash) == -1) {
587 /* should never happen since we
588 * check available_room is large
Ilya Shipitsin0de36ad2021-02-20 00:23:36 +0500589 * enough here.
Emeric Brun743afee2021-02-15 14:12:06 +0100590 */
591 si_rx_room_blk(si);
592 ret = 0;
593 break;
594 }
Emeric Brunfd647d52021-02-12 20:03:38 +0100595
596 if (ds->tx_msg_offset) {
597 /* msg was not fully processed, we must be awake to drain pending data */
598
599 si_rx_room_blk(si);
600 ret = 0;
601 break;
602 }
603 /* switch to next message */
604 ofs += cnt + msg_len;
605 }
606
Willy Tarreau4781b152021-04-06 13:53:36 +0200607 HA_ATOMIC_INC(b_peek(buf, ofs));
Emeric Brunfd647d52021-02-12 20:03:38 +0100608 ofs += ring->ofs;
609 ds->ofs = ofs;
610 }
611 HA_RWLOCK_RDUNLOCK(DNS_LOCK, &ring->lock);
612
613 if (ret) {
614 /* let's be woken up once new request to write arrived */
615 HA_RWLOCK_WRLOCK(DNS_LOCK, &ring->lock);
Willy Tarreau2b718102021-04-21 07:32:39 +0200616 LIST_APPEND(&ring->waiters, &appctx->wait_entry);
Emeric Brunfd647d52021-02-12 20:03:38 +0100617 HA_RWLOCK_WRUNLOCK(DNS_LOCK, &ring->lock);
618 si_rx_endp_done(si);
619 }
620
621read:
622
Ilya Shipitsin0de36ad2021-02-20 00:23:36 +0500623 /* if session is not a waiter it means there is no committed
Emeric Brunfd647d52021-02-12 20:03:38 +0100624 * message into rx_buf and we are free to use it
625 * Note: we need a load barrier here to not miss the
626 * delete from the list
627 */
628 __ha_barrier_load();
Willy Tarreau2b718102021-04-21 07:32:39 +0200629 if (!LIST_INLIST(&ds->waiter)) {
Emeric Brunfd647d52021-02-12 20:03:38 +0100630 while (1) {
631 uint16_t query_id;
632 struct eb32_node *eb;
633 struct dns_query *query;
634
635 if (!ds->rx_msg.len) {
636 /* next message len is not fully available into the channel */
637 if (co_data(si_oc(si)) < 2)
638 break;
639
640 /* retrieve message len */
641 co_getblk(si_oc(si), (char *)&msg_len, 2, 0);
642
643 /* mark as consumed */
644 co_skip(si_oc(si), 2);
645
646 /* store message len */
647 ds->rx_msg.len = ntohs(msg_len);
648 }
649
650 if (!co_data(si_oc(si))) {
651 /* we need more data but nothing is available */
652 break;
653 }
654
655 if (co_data(si_oc(si)) + ds->rx_msg.offset < ds->rx_msg.len) {
656 /* message only partially available */
657
658 /* read available data */
659 co_getblk(si_oc(si), ds->rx_msg.area + ds->rx_msg.offset, co_data(si_oc(si)), 0);
660
661 /* update message offset */
662 ds->rx_msg.offset += co_data(si_oc(si));
663
664 /* consume all pending data from the channel */
665 co_skip(si_oc(si), co_data(si_oc(si)));
666
667 /* we need to wait for more data */
668 break;
669 }
670
Ilya Shipitsin0de36ad2021-02-20 00:23:36 +0500671 /* enough data is available into the channel to read the message until the end */
Emeric Brunfd647d52021-02-12 20:03:38 +0100672
673 /* read from the channel until the end of the message */
674 co_getblk(si_oc(si), ds->rx_msg.area + ds->rx_msg.offset, ds->rx_msg.len - ds->rx_msg.offset, 0);
675
676 /* consume all data until the end of the message from the channel */
677 co_skip(si_oc(si), ds->rx_msg.len - ds->rx_msg.offset);
678
679 /* reset reader offset to 0 for next message reand */
680 ds->rx_msg.offset = 0;
681
682 /* try remap query id to original */
683 memcpy(&query_id, ds->rx_msg.area, sizeof(query_id));
684 eb = eb32_lookup(&ds->query_ids, query_id);
685 if (!eb) {
686 /* query id not found means we have an unknown corresponding
687 * request, perhaps server's bug or or the query reached
688 * timeout
689 */
690 ds->rx_msg.len = 0;
691 continue;
692 }
693
694 /* re-map the original query id set by the requester */
695 query = eb32_entry(eb, struct dns_query, qid);
696 memcpy(ds->rx_msg.area, &query->original_qid, sizeof(query->original_qid));
697
698 /* remove query ids mapping from pending queries list/tree */
699 eb32_delete(&query->qid);
Willy Tarreau2b718102021-04-21 07:32:39 +0200700 LIST_DELETE(&query->list);
Emeric Brunfd647d52021-02-12 20:03:38 +0100701 pool_free(dns_query_pool, query);
702 ds->onfly_queries--;
703
704 /* lock the dns_stream_server containing lists heads */
705 HA_SPIN_LOCK(DNS_LOCK, &ds->dss->lock);
706
707 /* the dns_session is also added in queue of the
708 * wait_sess list where the task processing
709 * response will pop available responses
710 */
Willy Tarreau2b718102021-04-21 07:32:39 +0200711 LIST_APPEND(&ds->dss->wait_sess, &ds->waiter);
Emeric Brunfd647d52021-02-12 20:03:38 +0100712
713 /* lock the dns_stream_server containing lists heads */
714 HA_SPIN_UNLOCK(DNS_LOCK, &ds->dss->lock);
715
716 /* awake the task processing the responses */
717 task_wakeup(ds->dss->task_rsp, TASK_WOKEN_INIT);
718
719 break;
720 }
721
Willy Tarreau2b718102021-04-21 07:32:39 +0200722 if (!LIST_INLIST(&ds->waiter)) {
Emeric Brunfd647d52021-02-12 20:03:38 +0100723 /* there is no more pending data to read and the con was closed by the server side */
724 if (!co_data(si_oc(si)) && (si_oc(si)->flags & CF_SHUTW)) {
725 goto close;
726 }
727 }
728
729 }
730
731
732 return;
733close:
734 si_shutw(si);
735 si_shutr(si);
736 si_ic(si)->flags |= CF_READ_NULL;
737}
738
739void dns_queries_flush(struct dns_session *ds)
740{
741 struct dns_query *query, *queryb;
742
743 list_for_each_entry_safe(query, queryb, &ds->queries, list) {
744 eb32_delete(&query->qid);
Willy Tarreau2b718102021-04-21 07:32:39 +0200745 LIST_DELETE(&query->list);
Emeric Brunfd647d52021-02-12 20:03:38 +0100746 pool_free(dns_query_pool, query);
747 }
748}
749
750void dns_session_free(struct dns_session *ds)
751{
752 if (ds->rx_msg.area)
753 pool_free(dns_msg_buf, ds->rx_msg.area);
754 if (ds->tx_ring_area)
755 pool_free(dns_msg_buf, ds->tx_ring_area);
756 if (ds->task_exp)
757 task_destroy(ds->task_exp);
758
759 dns_queries_flush(ds);
760
761 ds->dss->cur_conns--;
762 /* Note: this is useless to update
763 * max_active_conns here because
764 * we decrease the value
765 */
766 pool_free(dns_session_pool, ds);
767}
768
769static struct appctx *dns_session_create(struct dns_session *ds);
770
771/*
772 * Function to release a DNS tcp session
773 */
774static void dns_session_release(struct appctx *appctx)
775{
776 struct dns_session *ds = appctx->ctx.sft.ptr;
Willy Tarreaue3e648c2021-02-24 17:38:46 +0100777 struct dns_stream_server *dss __maybe_unused;
Emeric Brunfd647d52021-02-12 20:03:38 +0100778
779 if (!ds)
780 return;
781
782 dss = ds->dss;
783
784 HA_SPIN_LOCK(DNS_LOCK, &dss->lock);
785 LIST_DEL_INIT(&ds->list);
786
787 if (stopping) {
788 dns_session_free(ds);
789 HA_SPIN_UNLOCK(DNS_LOCK, &dss->lock);
790 return;
791 }
792
793 if (!ds->nb_queries) {
794 /* this is an idle session */
795 /* Note: this is useless to update max_active_sess
796 * here because we decrease idle_conns but
797 * dns_session_free decrease curconns
798 */
799
800 ds->dss->idle_conns--;
801 dns_session_free(ds);
802 HA_SPIN_UNLOCK(DNS_LOCK, &dss->lock);
803 return;
804 }
805
806 if (ds->onfly_queries == ds->nb_queries) {
807 /* the session can be released because
808 * it means that all queries AND
809 * responses are in fly */
810 dns_session_free(ds);
811 HA_SPIN_UNLOCK(DNS_LOCK, &dss->lock);
812 return;
813 }
814
815 /* We do not call ring_appctx_detach here
816 * because we want to keep readers counters
817 * to retry a con with a different appctx*/
818 HA_RWLOCK_WRLOCK(DNS_LOCK, &ds->ring.lock);
819 LIST_DEL_INIT(&appctx->wait_entry);
820 HA_RWLOCK_WRUNLOCK(DNS_LOCK, &ds->ring.lock);
821
822 /* if there is no pending complete response
823 * message, ensure to reset
824 * message offsets if the session
825 * was closed with an incomplete pending response
826 */
Willy Tarreau2b718102021-04-21 07:32:39 +0200827 if (!LIST_INLIST(&ds->waiter))
Emeric Brunfd647d52021-02-12 20:03:38 +0100828 ds->rx_msg.len = ds->rx_msg.offset = 0;
829
830 /* we flush pending sent queries because we never
831 * have responses
832 */
833 ds->nb_queries -= ds->onfly_queries;
834 dns_queries_flush(ds);
835
836 /* reset offset to be sure to start from message start */
837 ds->tx_msg_offset = 0;
838
839 /* here the ofs and the attached counter
840 * are kept unchanged
841 */
842
843 /* Create a new appctx, We hope we can
844 * create from the release callback! */
845 ds->appctx = dns_session_create(ds);
846 if (!ds->appctx) {
847 dns_session_free(ds);
848 HA_SPIN_UNLOCK(DNS_LOCK, &dss->lock);
849 return;
850 }
851
852 if (ds->nb_queries < DNS_STREAM_MAX_PIPELINED_REQ)
Willy Tarreau2b718102021-04-21 07:32:39 +0200853 LIST_INSERT(&ds->dss->free_sess, &ds->list);
Emeric Brunfd647d52021-02-12 20:03:38 +0100854
855 HA_SPIN_UNLOCK(DNS_LOCK, &dss->lock);
856}
857
858/* DNS tcp session applet */
859static struct applet dns_session_applet = {
860 .obj_type = OBJ_TYPE_APPLET,
861 .name = "<STRMDNS>", /* used for logging */
862 .fct = dns_session_io_handler,
863 .release = dns_session_release,
864};
865
866/*
867 * Function used to create an appctx for a DNS session
868 */
869static struct appctx *dns_session_create(struct dns_session *ds)
870{
871 struct appctx *appctx;
872 struct session *sess;
873 struct stream *s;
874 struct applet *applet = &dns_session_applet;
875
Willy Tarreaue6124462021-09-13 10:07:38 +0200876 appctx = appctx_new(applet);
Emeric Brunfd647d52021-02-12 20:03:38 +0100877 if (!appctx)
878 goto out_close;
879
880 appctx->ctx.sft.ptr = (void *)ds;
881
882 sess = session_new(ds->dss->srv->proxy, NULL, &appctx->obj_type);
883 if (!sess) {
884 ha_alert("out of memory in peer_session_create().\n");
885 goto out_free_appctx;
886 }
887
888 if ((s = stream_new(sess, &appctx->obj_type, &BUF_NULL)) == NULL) {
889 ha_alert("Failed to initialize stream in peer_session_create().\n");
890 goto out_free_sess;
891 }
892
893
894 s->target = &ds->dss->srv->obj_type;
895 if (!sockaddr_alloc(&s->target_addr, &ds->dss->srv->addr, sizeof(ds->dss->srv->addr)))
896 goto out_free_strm;
897 s->flags = SF_ASSIGNED|SF_ADDR_SET;
898 s->si[1].flags |= SI_FL_NOLINGER;
899
900 s->do_log = NULL;
901 s->uniq_id = 0;
902
903 s->res.flags |= CF_READ_DONTWAIT;
904 /* for rto and rex to eternity to not expire on idle recv:
905 * We are using a syslog server.
906 */
907 s->res.rto = TICK_ETERNITY;
908 s->res.rex = TICK_ETERNITY;
909 ds->appctx = appctx;
910 task_wakeup(s->task, TASK_WOKEN_INIT);
911 return appctx;
912
913 /* Error unrolling */
914 out_free_strm:
Willy Tarreau2b718102021-04-21 07:32:39 +0200915 LIST_DELETE(&s->list);
Emeric Brunfd647d52021-02-12 20:03:38 +0100916 pool_free(pool_head_stream, s);
917 out_free_sess:
918 session_free(sess);
919 out_free_appctx:
920 appctx_free(appctx);
921 out_close:
922 return NULL;
923}
924
925/* Task processing expiration of unresponded queries, this one is supposed
926 * to be stuck on the same thread than the appctx handler
927 */
Willy Tarreau144f84a2021-03-02 16:09:26 +0100928static struct task *dns_process_query_exp(struct task *t, void *context, unsigned int state)
Emeric Brunfd647d52021-02-12 20:03:38 +0100929{
930 struct dns_session *ds = (struct dns_session *)context;
931 struct dns_query *query, *queryb;
932
933 t->expire = TICK_ETERNITY;
934
935 list_for_each_entry_safe(query, queryb, &ds->queries, list) {
936 if (tick_is_expired(query->expire, now_ms)) {
937 eb32_delete(&query->qid);
Willy Tarreau2b718102021-04-21 07:32:39 +0200938 LIST_DELETE(&query->list);
Emeric Brunfd647d52021-02-12 20:03:38 +0100939 pool_free(dns_query_pool, query);
940 ds->onfly_queries--;
941 }
942 else {
943 t->expire = query->expire;
944 break;
945 }
946 }
947
948 return t;
949}
950
951/* Task processing expiration of idle sessions */
Willy Tarreau144f84a2021-03-02 16:09:26 +0100952static struct task *dns_process_idle_exp(struct task *t, void *context, unsigned int state)
Emeric Brunfd647d52021-02-12 20:03:38 +0100953{
954 struct dns_stream_server *dss = (struct dns_stream_server *)context;
955 struct dns_session *ds, *dsb;
956 int target = 0;
957 int cur_active_conns;
958
959 HA_SPIN_LOCK(DNS_LOCK, &dss->lock);
960
961
962 cur_active_conns = dss->cur_conns - dss->idle_conns;
963 if (cur_active_conns > dss->max_active_conns)
964 dss->max_active_conns = cur_active_conns;
965
966 target = (dss->max_active_conns - cur_active_conns) / 2;
967 list_for_each_entry_safe(ds, dsb, &dss->idle_sess, list) {
968 if (!target)
969 break;
970
971 /* remove conn to pending list to ensure it won't be reused */
972 LIST_DEL_INIT(&ds->list);
973
974 /* force session shutdown */
975 ds->shutdown = 1;
976
Ilya Shipitsin0de36ad2021-02-20 00:23:36 +0500977 /* to be sure that the appctx won't miss shutdown */
Emeric Brunfd647d52021-02-12 20:03:38 +0100978 __ha_barrier_store();
979
980 /* wake appctx to perform the shutdown */
981 appctx_wakeup(ds->appctx);
982 }
983
984 /* reset max to current active conns */
985 dss->max_active_conns = cur_active_conns;
986
987 HA_SPIN_UNLOCK(DNS_LOCK, &dss->lock);
988
989 t->expire = tick_add(now_ms, 5000);
990
991 return t;
992}
993
994struct dns_session *dns_session_new(struct dns_stream_server *dss)
995{
996 struct dns_session *ds;
997
998 if (dss->maxconn && (dss->maxconn <= dss->cur_conns))
999 return NULL;
1000
1001 ds = pool_alloc(dns_session_pool);
1002 if (!ds)
1003 return NULL;
1004
1005 ds->ofs = ~0;
1006 ds->dss = dss;
1007 LIST_INIT(&ds->list);
1008 LIST_INIT(&ds->queries);
1009 LIST_INIT(&ds->waiter);
1010 ds->rx_msg.offset = ds->rx_msg.len = 0;
1011 ds->rx_msg.area = NULL;
1012 ds->tx_ring_area = NULL;
1013 ds->task_exp = NULL;
1014 ds->appctx = NULL;
1015 ds->shutdown = 0;
1016 ds->nb_queries = 0;
1017 ds->query_ids = EB_ROOT_UNIQUE;
1018 ds->rx_msg.area = pool_alloc(dns_msg_buf);
1019 if (!ds->rx_msg.area)
1020 goto error;
1021
1022 ds->tx_ring_area = pool_alloc(dns_msg_buf);
1023 if (!ds->tx_ring_area)
1024 goto error;
1025
1026 ring_init(&ds->ring, ds->tx_ring_area, DNS_TCP_MSG_RING_MAX_SIZE);
Christopher Faulet1a1b6742021-03-04 16:53:27 +01001027 /* never fail because it is the first watcher attached to the ring */
1028 DISGUISE(ring_attach(&ds->ring));
Emeric Brunfd647d52021-02-12 20:03:38 +01001029
Willy Tarreaubeeabf52021-10-01 18:23:30 +02001030 if ((ds->task_exp = task_new_here()) == NULL)
Emeric Brunfd647d52021-02-12 20:03:38 +01001031 goto error;
1032
1033 ds->task_exp->process = dns_process_query_exp;
1034 ds->task_exp->context = ds;
1035
1036 ds->appctx = dns_session_create(ds);
1037 if (!ds->appctx)
1038 goto error;
1039
1040 dss->cur_conns++;
1041
1042 return ds;
1043
1044error:
1045 if (ds->task_exp)
1046 task_destroy(ds->task_exp);
1047 if (ds->rx_msg.area)
1048 pool_free(dns_msg_buf, ds->rx_msg.area);
1049 if (ds->tx_ring_area)
1050 pool_free(dns_msg_buf, ds->tx_ring_area);
1051
1052 pool_free(dns_session_pool, ds);
1053
1054 return NULL;
1055}
1056
1057/*
1058 * Task used to consume pending messages from nameserver ring
1059 * and forward them to dns_session ring.
1060 * Note: If no slot found a new dns_session is allocated
1061 */
Willy Tarreau144f84a2021-03-02 16:09:26 +01001062static struct task *dns_process_req(struct task *t, void *context, unsigned int state)
Emeric Brunfd647d52021-02-12 20:03:38 +01001063{
1064 struct dns_nameserver *ns = (struct dns_nameserver *)context;
1065 struct dns_stream_server *dss = ns->stream;
1066 struct ring *ring = dss->ring_req;
1067 struct buffer *buf = &ring->buf;
1068 uint64_t msg_len;
1069 size_t len, cnt, ofs;
1070 struct dns_session *ds, *ads;
1071 HA_SPIN_LOCK(DNS_LOCK, &dss->lock);
1072
1073 ofs = dss->ofs_req;
1074
1075 HA_RWLOCK_RDLOCK(DNS_LOCK, &ring->lock);
1076
1077 /* explanation for the initialization below: it would be better to do
1078 * this in the parsing function but this would occasionally result in
1079 * dropped events because we'd take a reference on the oldest message
1080 * and keep it while being scheduled. Thus instead let's take it the
1081 * first time we enter here so that we have a chance to pass many
1082 * existing messages before grabbing a reference to a location. This
1083 * value cannot be produced after initialization.
1084 */
1085 if (unlikely(ofs == ~0)) {
1086 ofs = 0;
Willy Tarreau4781b152021-04-06 13:53:36 +02001087 HA_ATOMIC_INC(b_peek(buf, ofs));
Emeric Brunfd647d52021-02-12 20:03:38 +01001088 ofs += ring->ofs;
1089 }
1090
1091 /* we were already there, adjust the offset to be relative to
1092 * the buffer's head and remove us from the counter.
1093 */
1094 ofs -= ring->ofs;
1095 BUG_ON(ofs >= buf->size);
Willy Tarreau4781b152021-04-06 13:53:36 +02001096 HA_ATOMIC_DEC(b_peek(buf, ofs));
Emeric Brunfd647d52021-02-12 20:03:38 +01001097
1098 while (ofs + 1 < b_data(buf)) {
1099 struct ist myist;
1100
1101 cnt = 1;
1102 len = b_peek_varint(buf, ofs + cnt, &msg_len);
1103 if (!len)
1104 break;
1105 cnt += len;
1106 BUG_ON(msg_len + ofs + cnt + 1 > b_data(buf));
1107 if (unlikely(msg_len > DNS_TCP_MSG_MAX_SIZE)) {
1108 /* too large a message to ever fit, let's skip it */
1109 ofs += cnt + msg_len;
1110 continue;
1111 }
1112
1113 len = b_getblk(buf, dns_msg_trash, msg_len, ofs + cnt);
1114
Tim Duesterhus92c696e2021-02-28 16:11:36 +01001115 myist = ist2(dns_msg_trash, len);
Emeric Brunfd647d52021-02-12 20:03:38 +01001116
1117 ads = NULL;
Ilya Shipitsin0de36ad2021-02-20 00:23:36 +05001118 /* try to push request into active sess with free slot */
Emeric Brunfd647d52021-02-12 20:03:38 +01001119 if (!LIST_ISEMPTY(&dss->free_sess)) {
1120 ds = LIST_NEXT(&dss->free_sess, struct dns_session *, list);
1121
1122 if (ring_write(&ds->ring, DNS_TCP_MSG_MAX_SIZE, NULL, 0, &myist, 1) > 0) {
1123 ds->nb_queries++;
1124 if (ds->nb_queries >= DNS_STREAM_MAX_PIPELINED_REQ)
1125 LIST_DEL_INIT(&ds->list);
1126 ads = ds;
1127 }
1128 else {
1129 /* it means we were unable to put a request in this slot,
1130 * it may be close to be full so we put it at the end
1131 * of free conn list */
1132 LIST_DEL_INIT(&ds->list);
Willy Tarreau2b718102021-04-21 07:32:39 +02001133 LIST_APPEND(&dss->free_sess, &ds->list);
Emeric Brunfd647d52021-02-12 20:03:38 +01001134 }
1135 }
1136
1137 if (!ads) {
Ilya Shipitsin0de36ad2021-02-20 00:23:36 +05001138 /* try to push request into idle, this one should have enough free space */
Emeric Brunfd647d52021-02-12 20:03:38 +01001139 if (!LIST_ISEMPTY(&dss->idle_sess)) {
1140 ds = LIST_NEXT(&dss->idle_sess, struct dns_session *, list);
1141
1142 /* ring is empty so this ring_write should never fail */
1143 ring_write(&ds->ring, DNS_TCP_MSG_MAX_SIZE, NULL, 0, &myist, 1);
1144 ds->nb_queries++;
1145 LIST_DEL_INIT(&ds->list);
1146
1147 ds->dss->idle_conns--;
1148
1149 /* we may have to update the max_active_conns */
1150 if (ds->dss->max_active_conns < ds->dss->cur_conns - ds->dss->idle_conns)
1151 ds->dss->max_active_conns = ds->dss->cur_conns - ds->dss->idle_conns;
1152
1153 /* since we may unable to find a free list to handle
1154 * this request, this request may be large and fill
1155 * the ring buffer so we prefer to put at the end of free
1156 * list. */
Willy Tarreau2b718102021-04-21 07:32:39 +02001157 LIST_APPEND(&dss->free_sess, &ds->list);
Emeric Brunfd647d52021-02-12 20:03:38 +01001158 ads = ds;
1159 }
1160 }
1161
Ilya Shipitsin0de36ad2021-02-20 00:23:36 +05001162 /* we didn't find a session available with large enough room */
Emeric Brunfd647d52021-02-12 20:03:38 +01001163 if (!ads) {
1164 /* allocate a new session */
1165 ads = dns_session_new(dss);
1166 if (ads) {
1167 /* ring is empty so this ring_write should never fail */
1168 ring_write(&ads->ring, DNS_TCP_MSG_MAX_SIZE, NULL, 0, &myist, 1);
1169 ads->nb_queries++;
Willy Tarreau2b718102021-04-21 07:32:39 +02001170 LIST_INSERT(&dss->free_sess, &ads->list);
Emeric Brunfd647d52021-02-12 20:03:38 +01001171 }
1172 else
1173 ns->counters->snd_error++;
1174 }
1175
1176 if (ads)
1177 ns->counters->sent++;
1178
1179 ofs += cnt + len;
1180 }
1181
Willy Tarreau4781b152021-04-06 13:53:36 +02001182 HA_ATOMIC_INC(b_peek(buf, ofs));
Emeric Brunfd647d52021-02-12 20:03:38 +01001183 ofs += ring->ofs;
1184 dss->ofs_req = ofs;
1185 HA_RWLOCK_RDUNLOCK(DNS_LOCK, &ring->lock);
1186
1187
1188 HA_SPIN_UNLOCK(DNS_LOCK, &dss->lock);
1189 return t;
1190}
1191
1192/*
1193 * Task used to consume response
1194 * Note: upper layer callback is called
1195 */
Willy Tarreau144f84a2021-03-02 16:09:26 +01001196static struct task *dns_process_rsp(struct task *t, void *context, unsigned int state)
Emeric Brunfd647d52021-02-12 20:03:38 +01001197{
1198 struct dns_nameserver *ns = (struct dns_nameserver *)context;
1199
1200 ns->process_responses(ns);
1201
1202 return t;
1203}
1204
1205/* Function used to initialize an TCP nameserver */
1206int dns_stream_init(struct dns_nameserver *ns, struct server *srv)
1207{
1208 struct dns_stream_server *dss = NULL;
1209
1210 dss = calloc(1, sizeof(*dss));
1211 if (!dss) {
1212 ha_alert("memory allocation error initializing dns tcp server '%s'.\n", srv->id);
1213 goto out;
1214 }
1215
1216 dss->srv = srv;
1217 dss->maxconn = srv->maxconn;
1218
1219 dss->ofs_req = ~0; /* init ring offset */
1220 dss->ring_req = ring_new(2*DNS_TCP_MSG_RING_MAX_SIZE);
1221 if (!dss->ring_req) {
1222 ha_alert("memory allocation error initializing the ring for dns tcp server '%s'.\n", srv->id);
1223 goto out;
1224 }
1225 /* Create the task associated to the resolver target handling conns */
Willy Tarreaubeeabf52021-10-01 18:23:30 +02001226 if ((dss->task_req = task_new_anywhere()) == NULL) {
Emeric Brunfd647d52021-02-12 20:03:38 +01001227 ha_alert("memory allocation error initializing the ring for dns tcp server '%s'.\n", srv->id);
1228 goto out;
1229 }
1230
1231 /* Update task's parameters */
1232 dss->task_req->process = dns_process_req;
1233 dss->task_req->context = ns;
1234
1235 /* attach the task as reader */
1236 if (!ring_attach(dss->ring_req)) {
1237 /* mark server attached to the ring */
1238 ha_alert("server '%s': too many watchers for ring. this should never happen.\n", srv->id);
1239 goto out;
1240 }
1241
1242 /* Create the task associated to the resolver target handling conns */
Willy Tarreaubeeabf52021-10-01 18:23:30 +02001243 if ((dss->task_rsp = task_new_anywhere()) == NULL) {
Emeric Brunfd647d52021-02-12 20:03:38 +01001244 ha_alert("memory allocation error initializing the ring for dns tcp server '%s'.\n", srv->id);
1245 goto out;
1246 }
1247
1248 /* Update task's parameters */
1249 dss->task_rsp->process = dns_process_rsp;
1250 dss->task_rsp->context = ns;
1251
1252 /* Create the task associated to the resolver target handling conns */
Willy Tarreaubeeabf52021-10-01 18:23:30 +02001253 if ((dss->task_idle = task_new_anywhere()) == NULL) {
Emeric Brunfd647d52021-02-12 20:03:38 +01001254 ha_alert("memory allocation error initializing the ring for dns tcp server '%s'.\n", srv->id);
1255 goto out;
1256 }
1257
1258 /* Update task's parameters */
1259 dss->task_idle->process = dns_process_idle_exp;
1260 dss->task_idle->context = dss;
1261 dss->task_idle->expire = tick_add(now_ms, 5000);
1262
Ilya Shipitsin0de36ad2021-02-20 00:23:36 +05001263 /* let start the task to free idle conns immediately */
Emeric Brunfd647d52021-02-12 20:03:38 +01001264 task_queue(dss->task_idle);
1265
1266 LIST_INIT(&dss->free_sess);
1267 LIST_INIT(&dss->idle_sess);
1268 LIST_INIT(&dss->wait_sess);
1269 HA_SPIN_INIT(&dss->lock);
1270 ns->stream = dss;
1271 return 0;
1272out:
1273 if (dss && dss->task_rsp)
1274 task_destroy(dss->task_rsp);
1275 if (dss && dss->task_req)
1276 task_destroy(dss->task_req);
1277 if (dss && dss->ring_req)
1278 ring_free(dss->ring_req);
1279
1280 free(dss);
Emeric Brunc9437992021-02-12 19:42:55 +01001281 return -1;
Christopher Faulet67957bd2017-09-27 11:00:59 +02001282}
1283
Emeric Brunc9437992021-02-12 19:42:55 +01001284int init_dns_buffers()
Baptiste Assmann325137d2015-04-13 23:40:55 +02001285{
Emeric Brunc9437992021-02-12 19:42:55 +01001286 dns_msg_trash = malloc(DNS_TCP_MSG_MAX_SIZE);
1287 if (!dns_msg_trash)
1288 return 0;
Baptiste Assmann325137d2015-04-13 23:40:55 +02001289
Emeric Brunc9437992021-02-12 19:42:55 +01001290 return 1;
1291}
Baptiste Assmannc1ce5f32016-05-14 11:26:22 +02001292
Emeric Brunc9437992021-02-12 19:42:55 +01001293void deinit_dns_buffers()
1294{
Willy Tarreau61cfdf42021-02-20 10:46:51 +01001295 ha_free(&dns_msg_trash);
Emeric Brunc9437992021-02-12 19:42:55 +01001296}
Emeric Brund26a6232021-01-04 13:32:20 +01001297
1298REGISTER_PER_THREAD_ALLOC(init_dns_buffers);
1299REGISTER_PER_THREAD_FREE(deinit_dns_buffers);