blob: 1f03dee797dffc038f7a4a4853d3abb06d6b9df4 [file] [log] [blame]
Baptiste Assmann325137d2015-04-13 23:40:55 +02001/*
2 * Name server resolution
3 *
Willy Tarreau714f3452021-05-09 06:47:26 +02004 * Copyright 2020 HAProxy Technologies
Baptiste Assmann325137d2015-04-13 23:40:55 +02005 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 *
11 */
12
13#include <errno.h>
14#include <fcntl.h>
15#include <stdio.h>
16#include <stdlib.h>
17#include <string.h>
18#include <unistd.h>
19
20#include <sys/types.h>
21
Willy Tarreau122eba92020-06-04 10:15:32 +020022#include <haproxy/action.h>
Willy Tarreau4c7e4b72020-05-27 12:58:42 +020023#include <haproxy/api.h>
Willy Tarreau6be78492020-06-05 00:00:29 +020024#include <haproxy/cfgparse.h>
Willy Tarreauf1d32c42020-06-04 21:07:02 +020025#include <haproxy/channel.h>
Willy Tarreaub2551052020-06-09 09:07:15 +020026#include <haproxy/check.h>
Willy Tarreau83487a82020-06-04 20:19:54 +020027#include <haproxy/cli.h>
Willy Tarreau7c18b542020-06-11 09:23:02 +020028#include <haproxy/dgram.h>
Willy Tarreaueb92deb2020-06-04 10:53:16 +020029#include <haproxy/dns.h>
Willy Tarreau8d366972020-05-27 16:10:29 +020030#include <haproxy/errors.h>
Willy Tarreaub2551052020-06-09 09:07:15 +020031#include <haproxy/fd.h>
Willy Tarreauaeed4a82020-06-04 22:01:04 +020032#include <haproxy/log.h>
Emeric Brund26a6232021-01-04 13:32:20 +010033#include <haproxy/ring.h>
Emeric Brunfd647d52021-02-12 20:03:38 +010034#include <haproxy/stream.h>
35#include <haproxy/stream_interface.h>
Willy Tarreau9f9e9fc2021-05-08 13:09:46 +020036#include <haproxy/tools.h>
Baptiste Assmann325137d2015-04-13 23:40:55 +020037
Emeric Brund26a6232021-01-04 13:32:20 +010038static THREAD_LOCAL char *dns_msg_trash;
Baptiste Assmann325137d2015-04-13 23:40:55 +020039
Emeric Brunfd647d52021-02-12 20:03:38 +010040DECLARE_STATIC_POOL(dns_session_pool, "dns_session", sizeof(struct dns_session));
41DECLARE_STATIC_POOL(dns_query_pool, "dns_query", sizeof(struct dns_query));
42DECLARE_STATIC_POOL(dns_msg_buf, "dns_msg_buf", DNS_TCP_MSG_RING_MAX_SIZE);
43
Christopher Faulet67957bd2017-09-27 11:00:59 +020044/* Opens an UDP socket on the namesaver's IP/Port, if required. Returns 0 on
Christopher Faulet1e711be2021-03-04 16:58:35 +010045 * success, -1 otherwise. ns->dgram must be defined.
Baptiste Assmann325137d2015-04-13 23:40:55 +020046 */
Emeric Brund26a6232021-01-04 13:32:20 +010047static int dns_connect_nameserver(struct dns_nameserver *ns)
Baptiste Assmann325137d2015-04-13 23:40:55 +020048{
Christopher Faulet1e711be2021-03-04 16:58:35 +010049 struct dgram_conn *dgram = &ns->dgram->conn;
50 int fd;
Baptiste Assmann325137d2015-04-13 23:40:55 +020051
Christopher Faulet1e711be2021-03-04 16:58:35 +010052 /* Already connected */
53 if (dgram->t.sock.fd != -1)
Emeric Brun526b7922021-02-15 14:28:27 +010054 return 0;
Christopher Faulet1e711be2021-03-04 16:58:35 +010055
56 /* Create an UDP socket and connect it on the nameserver's IP/Port */
57 if ((fd = socket(dgram->addr.to.ss_family, SOCK_DGRAM, IPPROTO_UDP)) == -1) {
58 send_log(NULL, LOG_WARNING,
59 "DNS : section '%s': can't create socket for nameserver '%s'.\n",
60 ns->counters->pid, ns->id);
61 return -1;
62 }
63 if (connect(fd, (struct sockaddr*)&dgram->addr.to, get_addr_len(&dgram->addr.to)) == -1) {
64 send_log(NULL, LOG_WARNING,
65 "DNS : section '%s': can't connect socket for nameserver '%s'.\n",
66 ns->counters->id, ns->id);
67 close(fd);
68 return -1;
Emeric Brunc9437992021-02-12 19:42:55 +010069 }
Emeric Brun526b7922021-02-15 14:28:27 +010070
Christopher Faulet1e711be2021-03-04 16:58:35 +010071 /* Make the socket non blocking */
72 fcntl(fd, F_SETFL, O_NONBLOCK);
73
74 /* Add the fd in the fd list and update its parameters */
75 dgram->t.sock.fd = fd;
76 fd_insert(fd, dgram, dgram_fd_handler, MAX_THREADS_MASK);
77 fd_want_recv(fd);
78 return 0;
Baptiste Assmann325137d2015-04-13 23:40:55 +020079}
80
Emeric Brund26a6232021-01-04 13:32:20 +010081/* Sends a message to a name server
82 * It returns message length on success
83 * or -1 in error case
84 * 0 is returned in case of output ring buffer is full
85 */
86int dns_send_nameserver(struct dns_nameserver *ns, void *buf, size_t len)
87{
88 int ret = -1;
89
90 if (ns->dgram) {
91 struct dgram_conn *dgram = &ns->dgram->conn;
92 int fd = dgram->t.sock.fd;
93
94 if (dgram->t.sock.fd == -1) {
95 if (dns_connect_nameserver(ns) == -1)
96 return -1;
97 fd = dgram->t.sock.fd;
98 }
99
100 ret = send(fd, buf, len, 0);
101 if (ret < 0) {
102 if (errno == EAGAIN) {
103 struct ist myist;
104
Tim Duesterhus92c696e2021-02-28 16:11:36 +0100105 myist = ist2(buf, len);
Emeric Brund26a6232021-01-04 13:32:20 +0100106 ret = ring_write(ns->dgram->ring_req, DNS_TCP_MSG_MAX_SIZE, NULL, 0, &myist, 1);
107 if (!ret) {
108 ns->counters->snd_error++;
109 return -1;
110 }
111 fd_cant_send(fd);
112 return ret;
113 }
114 ns->counters->snd_error++;
115 fd_delete(fd);
Emeric Brund26a6232021-01-04 13:32:20 +0100116 dgram->t.sock.fd = -1;
117 return -1;
118 }
119 ns->counters->sent++;
120 }
Emeric Brunfd647d52021-02-12 20:03:38 +0100121 else if (ns->stream) {
122 struct ist myist;
123
Tim Duesterhus92c696e2021-02-28 16:11:36 +0100124 myist = ist2(buf, len);
Emeric Brunfd647d52021-02-12 20:03:38 +0100125 ret = ring_write(ns->stream->ring_req, DNS_TCP_MSG_MAX_SIZE, NULL, 0, &myist, 1);
126 if (!ret) {
127 ns->counters->snd_error++;
128 return -1;
129 }
130 task_wakeup(ns->stream->task_req, TASK_WOKEN_MSG);
131 return ret;
132 }
Emeric Brund26a6232021-01-04 13:32:20 +0100133
134 return ret;
135}
136
Emeric Brunfd647d52021-02-12 20:03:38 +0100137void dns_session_free(struct dns_session *);
138
Emeric Brund26a6232021-01-04 13:32:20 +0100139/* Receives a dns message
140 * Returns message length
141 * 0 is returned if no more message available
142 * -1 in error case
143 */
144ssize_t dns_recv_nameserver(struct dns_nameserver *ns, void *data, size_t size)
145{
146 ssize_t ret = -1;
147
148 if (ns->dgram) {
149 struct dgram_conn *dgram = &ns->dgram->conn;
150 int fd = dgram->t.sock.fd;
151
152 if (fd == -1)
153 return -1;
154
155 if ((ret = recv(fd, data, size, 0)) < 0) {
156 if (errno == EAGAIN) {
157 fd_cant_recv(fd);
158 return 0;
159 }
160 fd_delete(fd);
Emeric Brund26a6232021-01-04 13:32:20 +0100161 dgram->t.sock.fd = -1;
162 return -1;
163 }
164 }
Emeric Brunfd647d52021-02-12 20:03:38 +0100165 else if (ns->stream) {
166 struct dns_stream_server *dss = ns->stream;
167 struct dns_session *ds;
168
169 HA_SPIN_LOCK(DNS_LOCK, &dss->lock);
170
171 if (!LIST_ISEMPTY(&dss->wait_sess)) {
172 ds = LIST_NEXT(&dss->wait_sess, struct dns_session *, waiter);
Emeric Brunfd647d52021-02-12 20:03:38 +0100173 ret = ds->rx_msg.len < size ? ds->rx_msg.len : size;
174 memcpy(data, ds->rx_msg.area, ret);
175
176 ds->rx_msg.len = 0;
177
Emeric Brunfd647d52021-02-12 20:03:38 +0100178 LIST_DEL_INIT(&ds->waiter);
179
180 if (ds->appctx) {
Ilya Shipitsin0de36ad2021-02-20 00:23:36 +0500181 /* awake appctx because it may have other
Emeric Brunfd647d52021-02-12 20:03:38 +0100182 * message to receive
183 */
184 appctx_wakeup(ds->appctx);
185
186 /* dns_session could already be into free_sess list
187 * so we firstly remove it */
188 LIST_DEL_INIT(&ds->list);
189
190 /* decrease nb_queries to free a slot for a new query on that sess */
191 ds->nb_queries--;
192 if (ds->nb_queries) {
193 /* it remains pipelined unanswered request
194 * into this session but we just decrease
195 * the counter so the session
196 * can not be full of pipelined requests
197 * so we can add if to free_sess list
198 * to receive a new request
199 */
Willy Tarreau2b718102021-04-21 07:32:39 +0200200 LIST_INSERT(&ds->dss->free_sess, &ds->list);
Emeric Brunfd647d52021-02-12 20:03:38 +0100201 }
202 else {
203 /* there is no more pipelined requests
204 * into this session, so we move it
205 * to idle_sess list */
Willy Tarreau2b718102021-04-21 07:32:39 +0200206 LIST_INSERT(&ds->dss->idle_sess, &ds->list);
Emeric Brunfd647d52021-02-12 20:03:38 +0100207
208 /* update the counter of idle sessions */
209 ds->dss->idle_conns++;
210
211 /* Note: this is useless there to update
212 * the max_active_conns since we increase
213 * the idle count */
214 }
215 }
216 else {
217 /* there is no more appctx for this session
218 * it means it is ready to die
219 */
220 dns_session_free(ds);
221 }
222
223
224 }
225
226 HA_SPIN_UNLOCK(DNS_LOCK, &dss->lock);
227 }
Emeric Brund26a6232021-01-04 13:32:20 +0100228
229 return ret;
230}
231
232static void dns_resolve_recv(struct dgram_conn *dgram)
233{
234 struct dns_nameserver *ns;
235 int fd;
236
237 fd = dgram->t.sock.fd;
238
239 /* check if ready for reading */
240 if (!fd_recv_ready(fd))
241 return;
242
243 /* no need to go further if we can't retrieve the nameserver */
244 if ((ns = dgram->owner) == NULL) {
Willy Tarreauf5090652021-04-06 17:23:40 +0200245 _HA_ATOMIC_AND(&fdtab[fd].state, ~(FD_POLL_HUP|FD_POLL_ERR));
Emeric Brund26a6232021-01-04 13:32:20 +0100246 fd_stop_recv(fd);
247 return;
248 }
249
250 ns->process_responses(ns);
251}
252
253/* Called when a dns network socket is ready to send data */
254static void dns_resolve_send(struct dgram_conn *dgram)
255{
256 int fd;
257 struct dns_nameserver *ns;
258 struct ring *ring;
259 struct buffer *buf;
260 uint64_t msg_len;
261 size_t len, cnt, ofs;
262
263 fd = dgram->t.sock.fd;
264
265 /* check if ready for sending */
266 if (!fd_send_ready(fd))
267 return;
268
269 /* no need to go further if we can't retrieve the nameserver */
270 if ((ns = dgram->owner) == NULL) {
Willy Tarreauf5090652021-04-06 17:23:40 +0200271 _HA_ATOMIC_AND(&fdtab[fd].state, ~(FD_POLL_HUP|FD_POLL_ERR));
Emeric Brund26a6232021-01-04 13:32:20 +0100272 fd_stop_send(fd);
273 return;
274 }
275
276 ring = ns->dgram->ring_req;
277 buf = &ring->buf;
278
279 HA_RWLOCK_RDLOCK(DNS_LOCK, &ring->lock);
280 ofs = ns->dgram->ofs_req;
281
282 /* explanation for the initialization below: it would be better to do
283 * this in the parsing function but this would occasionally result in
284 * dropped events because we'd take a reference on the oldest message
285 * and keep it while being scheduled. Thus instead let's take it the
286 * first time we enter here so that we have a chance to pass many
287 * existing messages before grabbing a reference to a location. This
288 * value cannot be produced after initialization.
289 */
290 if (unlikely(ofs == ~0)) {
291 ofs = 0;
Willy Tarreau4781b152021-04-06 13:53:36 +0200292 HA_ATOMIC_INC(b_peek(buf, ofs));
Emeric Brund26a6232021-01-04 13:32:20 +0100293 ofs += ring->ofs;
294 }
295
296 /* we were already there, adjust the offset to be relative to
297 * the buffer's head and remove us from the counter.
298 */
299 ofs -= ring->ofs;
300 BUG_ON(ofs >= buf->size);
Willy Tarreau4781b152021-04-06 13:53:36 +0200301 HA_ATOMIC_DEC(b_peek(buf, ofs));
Emeric Brund26a6232021-01-04 13:32:20 +0100302
303 while (ofs + 1 < b_data(buf)) {
304 int ret;
305
306 cnt = 1;
307 len = b_peek_varint(buf, ofs + cnt, &msg_len);
308 if (!len)
309 break;
310 cnt += len;
311 BUG_ON(msg_len + ofs + cnt + 1 > b_data(buf));
312 if (unlikely(msg_len > DNS_TCP_MSG_MAX_SIZE)) {
313 /* too large a message to ever fit, let's skip it */
314 ofs += cnt + msg_len;
315 continue;
316 }
317
318 len = b_getblk(buf, dns_msg_trash, msg_len, ofs + cnt);
319
320 ret = send(fd, dns_msg_trash, len, 0);
321 if (ret < 0) {
322 if (errno == EAGAIN) {
323 fd_cant_send(fd);
324 goto out;
325 }
326 ns->counters->snd_error++;
327 fd_delete(fd);
Emeric Brund26a6232021-01-04 13:32:20 +0100328 fd = dgram->t.sock.fd = -1;
329 goto out;
330 }
331 ns->counters->sent++;
332
333 ofs += cnt + len;
334 }
335
336 /* we don't want/need to be waked up any more for sending
337 * because all ring content is sent */
338 fd_stop_send(fd);
339
340out:
341
Willy Tarreau4781b152021-04-06 13:53:36 +0200342 HA_ATOMIC_INC(b_peek(buf, ofs));
Emeric Brund26a6232021-01-04 13:32:20 +0100343 ofs += ring->ofs;
344 ns->dgram->ofs_req = ofs;
345 HA_RWLOCK_RDUNLOCK(DNS_LOCK, &ring->lock);
346
347}
348
Emeric Brunc9437992021-02-12 19:42:55 +0100349/* proto_udp callback functions for a DNS resolution */
350struct dgram_data_cb dns_dgram_cb = {
351 .recv = dns_resolve_recv,
352 .send = dns_resolve_send,
353};
Baptiste Assmann325137d2015-04-13 23:40:55 +0200354
Emeric Brunc9437992021-02-12 19:42:55 +0100355int dns_dgram_init(struct dns_nameserver *ns, struct sockaddr_storage *sk)
Baptiste Assmann325137d2015-04-13 23:40:55 +0200356{
Emeric Brunc9437992021-02-12 19:42:55 +0100357 struct dns_dgram_server *dgram;
Baptiste Assmann201c07f2017-05-22 15:17:15 +0200358
Emeric Brunc9437992021-02-12 19:42:55 +0100359 if ((dgram = calloc(1, sizeof(*dgram))) == NULL)
Christopher Faulet67957bd2017-09-27 11:00:59 +0200360 return -1;
Baptiste Assmannc1ce5f32016-05-14 11:26:22 +0200361
Emeric Brunc9437992021-02-12 19:42:55 +0100362 /* Leave dgram partially initialized, no FD attached for
363 * now. */
364 dgram->conn.owner = ns;
365 dgram->conn.data = &dns_dgram_cb;
366 dgram->conn.t.sock.fd = -1;
367 dgram->conn.addr.to = *sk;
368 ns->dgram = dgram;
Baptiste Assmannc1ce5f32016-05-14 11:26:22 +0200369
Emeric Brunc9437992021-02-12 19:42:55 +0100370 dgram->ofs_req = ~0; /* init ring offset */
371 dgram->ring_req = ring_new(2*DNS_TCP_MSG_RING_MAX_SIZE);
372 if (!dgram->ring_req) {
373 ha_alert("memory allocation error initializing the ring for nameserver.\n");
374 goto out;
Baptiste Assmannc1ce5f32016-05-14 11:26:22 +0200375 }
376
Emeric Brunc9437992021-02-12 19:42:55 +0100377 /* attach the task as reader */
378 if (!ring_attach(dgram->ring_req)) {
379 /* mark server attached to the ring */
380 ha_alert("nameserver sets too many watchers > 255 on ring. This is a bug and should not happen.\n");
381 goto out;
Baptiste Assmannc1ce5f32016-05-14 11:26:22 +0200382 }
Baptiste Assmannc1ce5f32016-05-14 11:26:22 +0200383 return 0;
Emeric Brunc9437992021-02-12 19:42:55 +0100384out:
385 if (dgram->ring_req)
386 ring_free(dgram->ring_req);
Christopher Fauletd6c6b5f2020-09-08 10:27:24 +0200387
Emeric Brunc9437992021-02-12 19:42:55 +0100388 free(dgram);
Olivier Houchard2ec2db92018-01-08 16:28:57 +0100389
Emeric Brunfd647d52021-02-12 20:03:38 +0100390 return -1;
391}
392
393/*
394 * IO Handler to handle message push to dns tcp server
395 */
396static void dns_session_io_handler(struct appctx *appctx)
397{
398 struct stream_interface *si = appctx->owner;
399 struct dns_session *ds = appctx->ctx.sft.ptr;
400 struct ring *ring = &ds->ring;
401 struct buffer *buf = &ring->buf;
402 uint64_t msg_len;
403 int available_room;
404 size_t len, cnt, ofs;
405 int ret = 0;
406
407 /* if stopping was requested, close immediately */
408 if (unlikely(stopping))
409 goto close;
410
411 /* we want to be sure to not miss that we have been awaked for a shutdown */
412 __ha_barrier_load();
413
414 /* that means the connection was requested to shutdown
415 * for instance idle expire */
416 if (ds->shutdown)
417 goto close;
418
419 /* an error was detected */
420 if (unlikely(si_ic(si)->flags & (CF_WRITE_ERROR|CF_SHUTW)))
421 goto close;
422
423 /* con closed by server side, we will skip data write and drain data from channel */
424 if ((si_oc(si)->flags & CF_SHUTW)) {
425 goto read;
426 }
427
428 /* if the connection is not established, inform the stream that we want
429 * to be notified whenever the connection completes.
430 */
431 if (si_opposite(si)->state < SI_ST_EST) {
432 si_cant_get(si);
433 si_rx_conn_blk(si);
434 si_rx_endp_more(si);
435 return;
436 }
437
438
439 ofs = ds->ofs;
440
441 HA_RWLOCK_WRLOCK(DNS_LOCK, &ring->lock);
442 LIST_DEL_INIT(&appctx->wait_entry);
443 HA_RWLOCK_WRUNLOCK(DNS_LOCK, &ring->lock);
444
445 HA_RWLOCK_RDLOCK(DNS_LOCK, &ring->lock);
446
447 /* explanation for the initialization below: it would be better to do
448 * this in the parsing function but this would occasionally result in
449 * dropped events because we'd take a reference on the oldest message
450 * and keep it while being scheduled. Thus instead let's take it the
451 * first time we enter here so that we have a chance to pass many
452 * existing messages before grabbing a reference to a location. This
453 * value cannot be produced after initialization.
454 */
455 if (unlikely(ofs == ~0)) {
456 ofs = 0;
457
Willy Tarreau4781b152021-04-06 13:53:36 +0200458 HA_ATOMIC_INC(b_peek(buf, ofs));
Emeric Brunfd647d52021-02-12 20:03:38 +0100459 ofs += ring->ofs;
460 }
461
462 /* in this loop, ofs always points to the counter byte that precedes
463 * the message so that we can take our reference there if we have to
464 * stop before the end (ret=0).
465 */
466 if (si_opposite(si)->state == SI_ST_EST) {
467 /* we were already there, adjust the offset to be relative to
468 * the buffer's head and remove us from the counter.
469 */
470 ofs -= ring->ofs;
471 BUG_ON(ofs >= buf->size);
Willy Tarreau4781b152021-04-06 13:53:36 +0200472 HA_ATOMIC_DEC(b_peek(buf, ofs));
Emeric Brunfd647d52021-02-12 20:03:38 +0100473
474 ret = 1;
475 while (ofs + 1 < b_data(buf)) {
476 struct dns_query *query;
477 uint16_t original_qid;
478 uint16_t new_qid;
479
480 cnt = 1;
481 len = b_peek_varint(buf, ofs + cnt, &msg_len);
482 if (!len)
483 break;
484 cnt += len;
485 BUG_ON(msg_len + ofs + cnt + 1 > b_data(buf));
486
487 /* retrieve available room on output channel */
488 available_room = channel_recv_max(si_ic(si));
489
490 /* tx_msg_offset null means we are at the start of a new message */
491 if (!ds->tx_msg_offset) {
492 uint16_t slen;
493
494 /* check if there is enough room to put message len and query id */
495 if (available_room < sizeof(slen) + sizeof(new_qid)) {
496 si_rx_room_blk(si);
497 ret = 0;
498 break;
499 }
500
501 /* put msg len into then channel */
502 slen = (uint16_t)msg_len;
503 slen = htons(slen);
504 ci_putblk(si_ic(si), (char *)&slen, sizeof(slen));
505 available_room -= sizeof(slen);
506
507 /* backup original query id */
508 len = b_getblk(buf, (char *)&original_qid, sizeof(original_qid), ofs + cnt);
Emeric Brun538bb042021-02-15 13:58:06 +0100509 if (!len) {
510 /* should never happen since messages are atomically
511 * written into ring
512 */
513 ret = 0;
514 break;
515 }
Emeric Brunfd647d52021-02-12 20:03:38 +0100516
517 /* generates new query id */
518 new_qid = ++ds->query_counter;
519 new_qid = htons(new_qid);
520
521 /* put new query id into the channel */
522 ci_putblk(si_ic(si), (char *)&new_qid, sizeof(new_qid));
523 available_room -= sizeof(new_qid);
524
525 /* keep query id mapping */
526
527 query = pool_alloc(dns_query_pool);
528 if (query) {
529 query->qid.key = new_qid;
530 query->original_qid = original_qid;
531 query->expire = tick_add(now_ms, 5000);
532 LIST_INIT(&query->list);
533 if (LIST_ISEMPTY(&ds->queries)) {
534 /* enable task to handle expire */
535 ds->task_exp->expire = query->expire;
536 /* ensure this will be executed by the same
537 * thread than ds_session_release
538 * to ensure session_release is free
539 * to destroy the task */
540 task_queue(ds->task_exp);
541 }
Willy Tarreau2b718102021-04-21 07:32:39 +0200542 LIST_APPEND(&ds->queries, &query->list);
Emeric Brunfd647d52021-02-12 20:03:38 +0100543 eb32_insert(&ds->query_ids, &query->qid);
544 ds->onfly_queries++;
545 }
546
547 /* update the tx_offset to handle output in 16k streams */
548 ds->tx_msg_offset = sizeof(original_qid);
549
550 }
551
552 /* check if it remains available room on output chan */
553 if (unlikely(!available_room)) {
554 si_rx_room_blk(si);
555 ret = 0;
556 break;
557 }
558
559 chunk_reset(&trash);
560 if ((msg_len - ds->tx_msg_offset) > available_room) {
561 /* remaining msg data is too large to be written in output channel at one time */
562
563 len = b_getblk(buf, trash.area, available_room, ofs + cnt + ds->tx_msg_offset);
564
565 /* update offset to complete mesg forwarding later */
566 ds->tx_msg_offset += len;
567 }
568 else {
569 /* remaining msg data can be written in output channel at one time */
570 len = b_getblk(buf, trash.area, msg_len - ds->tx_msg_offset, ofs + cnt + ds->tx_msg_offset);
571
572 /* reset tx_msg_offset to mark forward fully processed */
573 ds->tx_msg_offset = 0;
574 }
575 trash.data += len;
576
Emeric Brun743afee2021-02-15 14:12:06 +0100577 if (ci_putchk(si_ic(si), &trash) == -1) {
578 /* should never happen since we
579 * check available_room is large
Ilya Shipitsin0de36ad2021-02-20 00:23:36 +0500580 * enough here.
Emeric Brun743afee2021-02-15 14:12:06 +0100581 */
582 si_rx_room_blk(si);
583 ret = 0;
584 break;
585 }
Emeric Brunfd647d52021-02-12 20:03:38 +0100586
587 if (ds->tx_msg_offset) {
588 /* msg was not fully processed, we must be awake to drain pending data */
589
590 si_rx_room_blk(si);
591 ret = 0;
592 break;
593 }
594 /* switch to next message */
595 ofs += cnt + msg_len;
596 }
597
Willy Tarreau4781b152021-04-06 13:53:36 +0200598 HA_ATOMIC_INC(b_peek(buf, ofs));
Emeric Brunfd647d52021-02-12 20:03:38 +0100599 ofs += ring->ofs;
600 ds->ofs = ofs;
601 }
602 HA_RWLOCK_RDUNLOCK(DNS_LOCK, &ring->lock);
603
604 if (ret) {
605 /* let's be woken up once new request to write arrived */
606 HA_RWLOCK_WRLOCK(DNS_LOCK, &ring->lock);
Willy Tarreau62e467c2021-10-20 11:02:13 +0200607 BUG_ON(LIST_INLIST(&appctx->wait_entry));
Willy Tarreau2b718102021-04-21 07:32:39 +0200608 LIST_APPEND(&ring->waiters, &appctx->wait_entry);
Emeric Brunfd647d52021-02-12 20:03:38 +0100609 HA_RWLOCK_WRUNLOCK(DNS_LOCK, &ring->lock);
610 si_rx_endp_done(si);
611 }
612
613read:
614
Ilya Shipitsin0de36ad2021-02-20 00:23:36 +0500615 /* if session is not a waiter it means there is no committed
Emeric Brunfd647d52021-02-12 20:03:38 +0100616 * message into rx_buf and we are free to use it
617 * Note: we need a load barrier here to not miss the
618 * delete from the list
619 */
Emeric Brun70455902021-10-20 10:49:53 +0200620
621 /* lock the dns_stream_server containing lists heads */
622 HA_SPIN_LOCK(DNS_LOCK, &ds->dss->lock);
623
Willy Tarreau2b718102021-04-21 07:32:39 +0200624 if (!LIST_INLIST(&ds->waiter)) {
Emeric Brunfd647d52021-02-12 20:03:38 +0100625 while (1) {
626 uint16_t query_id;
627 struct eb32_node *eb;
628 struct dns_query *query;
629
630 if (!ds->rx_msg.len) {
631 /* next message len is not fully available into the channel */
632 if (co_data(si_oc(si)) < 2)
633 break;
634
635 /* retrieve message len */
636 co_getblk(si_oc(si), (char *)&msg_len, 2, 0);
637
638 /* mark as consumed */
639 co_skip(si_oc(si), 2);
640
641 /* store message len */
642 ds->rx_msg.len = ntohs(msg_len);
643 }
644
645 if (!co_data(si_oc(si))) {
646 /* we need more data but nothing is available */
647 break;
648 }
649
650 if (co_data(si_oc(si)) + ds->rx_msg.offset < ds->rx_msg.len) {
651 /* message only partially available */
652
653 /* read available data */
654 co_getblk(si_oc(si), ds->rx_msg.area + ds->rx_msg.offset, co_data(si_oc(si)), 0);
655
656 /* update message offset */
657 ds->rx_msg.offset += co_data(si_oc(si));
658
659 /* consume all pending data from the channel */
660 co_skip(si_oc(si), co_data(si_oc(si)));
661
662 /* we need to wait for more data */
663 break;
664 }
665
Ilya Shipitsin0de36ad2021-02-20 00:23:36 +0500666 /* enough data is available into the channel to read the message until the end */
Emeric Brunfd647d52021-02-12 20:03:38 +0100667
668 /* read from the channel until the end of the message */
669 co_getblk(si_oc(si), ds->rx_msg.area + ds->rx_msg.offset, ds->rx_msg.len - ds->rx_msg.offset, 0);
670
671 /* consume all data until the end of the message from the channel */
672 co_skip(si_oc(si), ds->rx_msg.len - ds->rx_msg.offset);
673
674 /* reset reader offset to 0 for next message reand */
675 ds->rx_msg.offset = 0;
676
677 /* try remap query id to original */
678 memcpy(&query_id, ds->rx_msg.area, sizeof(query_id));
679 eb = eb32_lookup(&ds->query_ids, query_id);
680 if (!eb) {
681 /* query id not found means we have an unknown corresponding
682 * request, perhaps server's bug or or the query reached
683 * timeout
684 */
685 ds->rx_msg.len = 0;
686 continue;
687 }
688
689 /* re-map the original query id set by the requester */
690 query = eb32_entry(eb, struct dns_query, qid);
691 memcpy(ds->rx_msg.area, &query->original_qid, sizeof(query->original_qid));
692
693 /* remove query ids mapping from pending queries list/tree */
694 eb32_delete(&query->qid);
Willy Tarreau2b718102021-04-21 07:32:39 +0200695 LIST_DELETE(&query->list);
Emeric Brunfd647d52021-02-12 20:03:38 +0100696 pool_free(dns_query_pool, query);
697 ds->onfly_queries--;
698
Emeric Brunfd647d52021-02-12 20:03:38 +0100699 /* the dns_session is also added in queue of the
700 * wait_sess list where the task processing
701 * response will pop available responses
702 */
Willy Tarreau62e467c2021-10-20 11:02:13 +0200703 BUG_ON(LIST_INLIST(&ds->waiter));
Willy Tarreau2b718102021-04-21 07:32:39 +0200704 LIST_APPEND(&ds->dss->wait_sess, &ds->waiter);
Emeric Brunfd647d52021-02-12 20:03:38 +0100705
Emeric Brunfd647d52021-02-12 20:03:38 +0100706 /* awake the task processing the responses */
707 task_wakeup(ds->dss->task_rsp, TASK_WOKEN_INIT);
708
709 break;
710 }
711
Willy Tarreau2b718102021-04-21 07:32:39 +0200712 if (!LIST_INLIST(&ds->waiter)) {
Emeric Brunfd647d52021-02-12 20:03:38 +0100713 /* there is no more pending data to read and the con was closed by the server side */
714 if (!co_data(si_oc(si)) && (si_oc(si)->flags & CF_SHUTW)) {
Emeric Brun70455902021-10-20 10:49:53 +0200715 HA_SPIN_UNLOCK(DNS_LOCK, &ds->dss->lock);
Emeric Brunfd647d52021-02-12 20:03:38 +0100716 goto close;
717 }
718 }
719
720 }
721
Emeric Brun70455902021-10-20 10:49:53 +0200722 HA_SPIN_UNLOCK(DNS_LOCK, &ds->dss->lock);
Emeric Brunfd647d52021-02-12 20:03:38 +0100723 return;
724close:
725 si_shutw(si);
726 si_shutr(si);
727 si_ic(si)->flags |= CF_READ_NULL;
728}
729
730void dns_queries_flush(struct dns_session *ds)
731{
732 struct dns_query *query, *queryb;
733
734 list_for_each_entry_safe(query, queryb, &ds->queries, list) {
735 eb32_delete(&query->qid);
Willy Tarreau2b718102021-04-21 07:32:39 +0200736 LIST_DELETE(&query->list);
Emeric Brunfd647d52021-02-12 20:03:38 +0100737 pool_free(dns_query_pool, query);
738 }
739}
740
741void dns_session_free(struct dns_session *ds)
742{
743 if (ds->rx_msg.area)
744 pool_free(dns_msg_buf, ds->rx_msg.area);
745 if (ds->tx_ring_area)
746 pool_free(dns_msg_buf, ds->tx_ring_area);
747 if (ds->task_exp)
748 task_destroy(ds->task_exp);
749
750 dns_queries_flush(ds);
751
Emeric Brund20dc212021-10-19 15:40:10 +0200752 /* Ensure to remove this session from external lists
753 * Note: we are under the lock of dns_stream_server
754 * which own the heads of those lists.
755 */
756 LIST_DEL_INIT(&ds->waiter);
757 LIST_DEL_INIT(&ds->list);
758
Emeric Brunfd647d52021-02-12 20:03:38 +0100759 ds->dss->cur_conns--;
760 /* Note: this is useless to update
761 * max_active_conns here because
762 * we decrease the value
763 */
Willy Tarreau62e467c2021-10-20 11:02:13 +0200764
765 BUG_ON(!LIST_ISEMPTY(&ds->list));
766 BUG_ON(!LIST_ISEMPTY(&ds->waiter));
767 BUG_ON(!LIST_ISEMPTY(&ds->queries));
768 BUG_ON(!LIST_ISEMPTY(&ds->ring.waiters));
769 BUG_ON(!eb_is_empty(&ds->query_ids));
Emeric Brunfd647d52021-02-12 20:03:38 +0100770 pool_free(dns_session_pool, ds);
771}
772
773static struct appctx *dns_session_create(struct dns_session *ds);
774
775/*
776 * Function to release a DNS tcp session
777 */
778static void dns_session_release(struct appctx *appctx)
779{
780 struct dns_session *ds = appctx->ctx.sft.ptr;
Willy Tarreaue3e648c2021-02-24 17:38:46 +0100781 struct dns_stream_server *dss __maybe_unused;
Emeric Brunfd647d52021-02-12 20:03:38 +0100782
783 if (!ds)
784 return;
785
Willy Tarreaub56a8782021-10-20 14:38:43 +0200786 /* We do not call ring_appctx_detach here
787 * because we want to keep readers counters
788 * to retry a conn with a different appctx.
789 */
790 HA_RWLOCK_WRLOCK(DNS_LOCK, &ds->ring.lock);
791 LIST_DEL_INIT(&appctx->wait_entry);
792 HA_RWLOCK_WRUNLOCK(DNS_LOCK, &ds->ring.lock);
793
Emeric Brunfd647d52021-02-12 20:03:38 +0100794 dss = ds->dss;
795
796 HA_SPIN_LOCK(DNS_LOCK, &dss->lock);
797 LIST_DEL_INIT(&ds->list);
798
799 if (stopping) {
800 dns_session_free(ds);
801 HA_SPIN_UNLOCK(DNS_LOCK, &dss->lock);
802 return;
803 }
804
805 if (!ds->nb_queries) {
806 /* this is an idle session */
807 /* Note: this is useless to update max_active_sess
808 * here because we decrease idle_conns but
809 * dns_session_free decrease curconns
810 */
811
812 ds->dss->idle_conns--;
813 dns_session_free(ds);
814 HA_SPIN_UNLOCK(DNS_LOCK, &dss->lock);
815 return;
816 }
817
818 if (ds->onfly_queries == ds->nb_queries) {
819 /* the session can be released because
820 * it means that all queries AND
821 * responses are in fly */
822 dns_session_free(ds);
823 HA_SPIN_UNLOCK(DNS_LOCK, &dss->lock);
824 return;
825 }
826
Emeric Brunfd647d52021-02-12 20:03:38 +0100827 /* if there is no pending complete response
828 * message, ensure to reset
829 * message offsets if the session
830 * was closed with an incomplete pending response
831 */
Willy Tarreau2b718102021-04-21 07:32:39 +0200832 if (!LIST_INLIST(&ds->waiter))
Emeric Brunfd647d52021-02-12 20:03:38 +0100833 ds->rx_msg.len = ds->rx_msg.offset = 0;
834
835 /* we flush pending sent queries because we never
836 * have responses
837 */
838 ds->nb_queries -= ds->onfly_queries;
839 dns_queries_flush(ds);
840
841 /* reset offset to be sure to start from message start */
842 ds->tx_msg_offset = 0;
843
844 /* here the ofs and the attached counter
845 * are kept unchanged
846 */
847
848 /* Create a new appctx, We hope we can
849 * create from the release callback! */
850 ds->appctx = dns_session_create(ds);
851 if (!ds->appctx) {
852 dns_session_free(ds);
853 HA_SPIN_UNLOCK(DNS_LOCK, &dss->lock);
854 return;
855 }
856
857 if (ds->nb_queries < DNS_STREAM_MAX_PIPELINED_REQ)
Willy Tarreau2b718102021-04-21 07:32:39 +0200858 LIST_INSERT(&ds->dss->free_sess, &ds->list);
Emeric Brunfd647d52021-02-12 20:03:38 +0100859
860 HA_SPIN_UNLOCK(DNS_LOCK, &dss->lock);
861}
862
863/* DNS tcp session applet */
864static struct applet dns_session_applet = {
865 .obj_type = OBJ_TYPE_APPLET,
866 .name = "<STRMDNS>", /* used for logging */
867 .fct = dns_session_io_handler,
868 .release = dns_session_release,
869};
870
871/*
872 * Function used to create an appctx for a DNS session
873 */
874static struct appctx *dns_session_create(struct dns_session *ds)
875{
876 struct appctx *appctx;
877 struct session *sess;
878 struct stream *s;
879 struct applet *applet = &dns_session_applet;
880
Willy Tarreaue6124462021-09-13 10:07:38 +0200881 appctx = appctx_new(applet);
Emeric Brunfd647d52021-02-12 20:03:38 +0100882 if (!appctx)
883 goto out_close;
884
885 appctx->ctx.sft.ptr = (void *)ds;
886
887 sess = session_new(ds->dss->srv->proxy, NULL, &appctx->obj_type);
888 if (!sess) {
889 ha_alert("out of memory in peer_session_create().\n");
890 goto out_free_appctx;
891 }
892
893 if ((s = stream_new(sess, &appctx->obj_type, &BUF_NULL)) == NULL) {
894 ha_alert("Failed to initialize stream in peer_session_create().\n");
895 goto out_free_sess;
896 }
897
898
899 s->target = &ds->dss->srv->obj_type;
900 if (!sockaddr_alloc(&s->target_addr, &ds->dss->srv->addr, sizeof(ds->dss->srv->addr)))
901 goto out_free_strm;
902 s->flags = SF_ASSIGNED|SF_ADDR_SET;
903 s->si[1].flags |= SI_FL_NOLINGER;
904
905 s->do_log = NULL;
906 s->uniq_id = 0;
907
908 s->res.flags |= CF_READ_DONTWAIT;
909 /* for rto and rex to eternity to not expire on idle recv:
910 * We are using a syslog server.
911 */
912 s->res.rto = TICK_ETERNITY;
913 s->res.rex = TICK_ETERNITY;
914 ds->appctx = appctx;
915 task_wakeup(s->task, TASK_WOKEN_INIT);
916 return appctx;
917
918 /* Error unrolling */
919 out_free_strm:
Willy Tarreau2b718102021-04-21 07:32:39 +0200920 LIST_DELETE(&s->list);
Emeric Brunfd647d52021-02-12 20:03:38 +0100921 pool_free(pool_head_stream, s);
922 out_free_sess:
923 session_free(sess);
924 out_free_appctx:
925 appctx_free(appctx);
926 out_close:
927 return NULL;
928}
929
930/* Task processing expiration of unresponded queries, this one is supposed
931 * to be stuck on the same thread than the appctx handler
932 */
Willy Tarreau144f84a2021-03-02 16:09:26 +0100933static struct task *dns_process_query_exp(struct task *t, void *context, unsigned int state)
Emeric Brunfd647d52021-02-12 20:03:38 +0100934{
935 struct dns_session *ds = (struct dns_session *)context;
936 struct dns_query *query, *queryb;
937
938 t->expire = TICK_ETERNITY;
939
940 list_for_each_entry_safe(query, queryb, &ds->queries, list) {
941 if (tick_is_expired(query->expire, now_ms)) {
942 eb32_delete(&query->qid);
Willy Tarreau2b718102021-04-21 07:32:39 +0200943 LIST_DELETE(&query->list);
Emeric Brunfd647d52021-02-12 20:03:38 +0100944 pool_free(dns_query_pool, query);
945 ds->onfly_queries--;
946 }
947 else {
948 t->expire = query->expire;
949 break;
950 }
951 }
952
953 return t;
954}
955
956/* Task processing expiration of idle sessions */
Willy Tarreau144f84a2021-03-02 16:09:26 +0100957static struct task *dns_process_idle_exp(struct task *t, void *context, unsigned int state)
Emeric Brunfd647d52021-02-12 20:03:38 +0100958{
959 struct dns_stream_server *dss = (struct dns_stream_server *)context;
960 struct dns_session *ds, *dsb;
961 int target = 0;
962 int cur_active_conns;
963
964 HA_SPIN_LOCK(DNS_LOCK, &dss->lock);
965
966
967 cur_active_conns = dss->cur_conns - dss->idle_conns;
968 if (cur_active_conns > dss->max_active_conns)
969 dss->max_active_conns = cur_active_conns;
970
971 target = (dss->max_active_conns - cur_active_conns) / 2;
972 list_for_each_entry_safe(ds, dsb, &dss->idle_sess, list) {
973 if (!target)
974 break;
975
976 /* remove conn to pending list to ensure it won't be reused */
977 LIST_DEL_INIT(&ds->list);
978
979 /* force session shutdown */
980 ds->shutdown = 1;
981
Ilya Shipitsin0de36ad2021-02-20 00:23:36 +0500982 /* to be sure that the appctx won't miss shutdown */
Emeric Brunfd647d52021-02-12 20:03:38 +0100983 __ha_barrier_store();
984
985 /* wake appctx to perform the shutdown */
986 appctx_wakeup(ds->appctx);
987 }
988
989 /* reset max to current active conns */
990 dss->max_active_conns = cur_active_conns;
991
992 HA_SPIN_UNLOCK(DNS_LOCK, &dss->lock);
993
994 t->expire = tick_add(now_ms, 5000);
995
996 return t;
997}
998
999struct dns_session *dns_session_new(struct dns_stream_server *dss)
1000{
1001 struct dns_session *ds;
1002
1003 if (dss->maxconn && (dss->maxconn <= dss->cur_conns))
1004 return NULL;
1005
1006 ds = pool_alloc(dns_session_pool);
1007 if (!ds)
1008 return NULL;
1009
1010 ds->ofs = ~0;
1011 ds->dss = dss;
1012 LIST_INIT(&ds->list);
1013 LIST_INIT(&ds->queries);
1014 LIST_INIT(&ds->waiter);
1015 ds->rx_msg.offset = ds->rx_msg.len = 0;
1016 ds->rx_msg.area = NULL;
1017 ds->tx_ring_area = NULL;
1018 ds->task_exp = NULL;
1019 ds->appctx = NULL;
1020 ds->shutdown = 0;
1021 ds->nb_queries = 0;
1022 ds->query_ids = EB_ROOT_UNIQUE;
1023 ds->rx_msg.area = pool_alloc(dns_msg_buf);
1024 if (!ds->rx_msg.area)
1025 goto error;
1026
1027 ds->tx_ring_area = pool_alloc(dns_msg_buf);
1028 if (!ds->tx_ring_area)
1029 goto error;
1030
1031 ring_init(&ds->ring, ds->tx_ring_area, DNS_TCP_MSG_RING_MAX_SIZE);
Christopher Faulet1a1b6742021-03-04 16:53:27 +01001032 /* never fail because it is the first watcher attached to the ring */
1033 DISGUISE(ring_attach(&ds->ring));
Emeric Brunfd647d52021-02-12 20:03:38 +01001034
Willy Tarreaubeeabf52021-10-01 18:23:30 +02001035 if ((ds->task_exp = task_new_here()) == NULL)
Emeric Brunfd647d52021-02-12 20:03:38 +01001036 goto error;
1037
1038 ds->task_exp->process = dns_process_query_exp;
1039 ds->task_exp->context = ds;
1040
1041 ds->appctx = dns_session_create(ds);
1042 if (!ds->appctx)
1043 goto error;
1044
1045 dss->cur_conns++;
1046
1047 return ds;
1048
1049error:
1050 if (ds->task_exp)
1051 task_destroy(ds->task_exp);
1052 if (ds->rx_msg.area)
1053 pool_free(dns_msg_buf, ds->rx_msg.area);
1054 if (ds->tx_ring_area)
1055 pool_free(dns_msg_buf, ds->tx_ring_area);
1056
1057 pool_free(dns_session_pool, ds);
1058
1059 return NULL;
1060}
1061
1062/*
1063 * Task used to consume pending messages from nameserver ring
1064 * and forward them to dns_session ring.
1065 * Note: If no slot found a new dns_session is allocated
1066 */
Willy Tarreau144f84a2021-03-02 16:09:26 +01001067static struct task *dns_process_req(struct task *t, void *context, unsigned int state)
Emeric Brunfd647d52021-02-12 20:03:38 +01001068{
1069 struct dns_nameserver *ns = (struct dns_nameserver *)context;
1070 struct dns_stream_server *dss = ns->stream;
1071 struct ring *ring = dss->ring_req;
1072 struct buffer *buf = &ring->buf;
1073 uint64_t msg_len;
1074 size_t len, cnt, ofs;
1075 struct dns_session *ds, *ads;
1076 HA_SPIN_LOCK(DNS_LOCK, &dss->lock);
1077
1078 ofs = dss->ofs_req;
1079
1080 HA_RWLOCK_RDLOCK(DNS_LOCK, &ring->lock);
1081
1082 /* explanation for the initialization below: it would be better to do
1083 * this in the parsing function but this would occasionally result in
1084 * dropped events because we'd take a reference on the oldest message
1085 * and keep it while being scheduled. Thus instead let's take it the
1086 * first time we enter here so that we have a chance to pass many
1087 * existing messages before grabbing a reference to a location. This
1088 * value cannot be produced after initialization.
1089 */
1090 if (unlikely(ofs == ~0)) {
1091 ofs = 0;
Willy Tarreau4781b152021-04-06 13:53:36 +02001092 HA_ATOMIC_INC(b_peek(buf, ofs));
Emeric Brunfd647d52021-02-12 20:03:38 +01001093 ofs += ring->ofs;
1094 }
1095
1096 /* we were already there, adjust the offset to be relative to
1097 * the buffer's head and remove us from the counter.
1098 */
1099 ofs -= ring->ofs;
1100 BUG_ON(ofs >= buf->size);
Willy Tarreau4781b152021-04-06 13:53:36 +02001101 HA_ATOMIC_DEC(b_peek(buf, ofs));
Emeric Brunfd647d52021-02-12 20:03:38 +01001102
1103 while (ofs + 1 < b_data(buf)) {
1104 struct ist myist;
1105
1106 cnt = 1;
1107 len = b_peek_varint(buf, ofs + cnt, &msg_len);
1108 if (!len)
1109 break;
1110 cnt += len;
1111 BUG_ON(msg_len + ofs + cnt + 1 > b_data(buf));
1112 if (unlikely(msg_len > DNS_TCP_MSG_MAX_SIZE)) {
1113 /* too large a message to ever fit, let's skip it */
1114 ofs += cnt + msg_len;
1115 continue;
1116 }
1117
1118 len = b_getblk(buf, dns_msg_trash, msg_len, ofs + cnt);
1119
Tim Duesterhus92c696e2021-02-28 16:11:36 +01001120 myist = ist2(dns_msg_trash, len);
Emeric Brunfd647d52021-02-12 20:03:38 +01001121
1122 ads = NULL;
Ilya Shipitsin0de36ad2021-02-20 00:23:36 +05001123 /* try to push request into active sess with free slot */
Emeric Brunfd647d52021-02-12 20:03:38 +01001124 if (!LIST_ISEMPTY(&dss->free_sess)) {
1125 ds = LIST_NEXT(&dss->free_sess, struct dns_session *, list);
1126
1127 if (ring_write(&ds->ring, DNS_TCP_MSG_MAX_SIZE, NULL, 0, &myist, 1) > 0) {
1128 ds->nb_queries++;
1129 if (ds->nb_queries >= DNS_STREAM_MAX_PIPELINED_REQ)
1130 LIST_DEL_INIT(&ds->list);
1131 ads = ds;
1132 }
1133 else {
1134 /* it means we were unable to put a request in this slot,
1135 * it may be close to be full so we put it at the end
1136 * of free conn list */
1137 LIST_DEL_INIT(&ds->list);
Willy Tarreau2b718102021-04-21 07:32:39 +02001138 LIST_APPEND(&dss->free_sess, &ds->list);
Emeric Brunfd647d52021-02-12 20:03:38 +01001139 }
1140 }
1141
1142 if (!ads) {
Ilya Shipitsin0de36ad2021-02-20 00:23:36 +05001143 /* try to push request into idle, this one should have enough free space */
Emeric Brunfd647d52021-02-12 20:03:38 +01001144 if (!LIST_ISEMPTY(&dss->idle_sess)) {
1145 ds = LIST_NEXT(&dss->idle_sess, struct dns_session *, list);
1146
1147 /* ring is empty so this ring_write should never fail */
1148 ring_write(&ds->ring, DNS_TCP_MSG_MAX_SIZE, NULL, 0, &myist, 1);
1149 ds->nb_queries++;
1150 LIST_DEL_INIT(&ds->list);
1151
1152 ds->dss->idle_conns--;
1153
1154 /* we may have to update the max_active_conns */
1155 if (ds->dss->max_active_conns < ds->dss->cur_conns - ds->dss->idle_conns)
1156 ds->dss->max_active_conns = ds->dss->cur_conns - ds->dss->idle_conns;
1157
1158 /* since we may unable to find a free list to handle
1159 * this request, this request may be large and fill
1160 * the ring buffer so we prefer to put at the end of free
1161 * list. */
Willy Tarreau2b718102021-04-21 07:32:39 +02001162 LIST_APPEND(&dss->free_sess, &ds->list);
Emeric Brunfd647d52021-02-12 20:03:38 +01001163 ads = ds;
1164 }
1165 }
1166
Ilya Shipitsin0de36ad2021-02-20 00:23:36 +05001167 /* we didn't find a session available with large enough room */
Emeric Brunfd647d52021-02-12 20:03:38 +01001168 if (!ads) {
1169 /* allocate a new session */
1170 ads = dns_session_new(dss);
1171 if (ads) {
1172 /* ring is empty so this ring_write should never fail */
1173 ring_write(&ads->ring, DNS_TCP_MSG_MAX_SIZE, NULL, 0, &myist, 1);
1174 ads->nb_queries++;
Willy Tarreau2b718102021-04-21 07:32:39 +02001175 LIST_INSERT(&dss->free_sess, &ads->list);
Emeric Brunfd647d52021-02-12 20:03:38 +01001176 }
1177 else
1178 ns->counters->snd_error++;
1179 }
1180
1181 if (ads)
1182 ns->counters->sent++;
1183
1184 ofs += cnt + len;
1185 }
1186
Willy Tarreau4781b152021-04-06 13:53:36 +02001187 HA_ATOMIC_INC(b_peek(buf, ofs));
Emeric Brunfd647d52021-02-12 20:03:38 +01001188 ofs += ring->ofs;
1189 dss->ofs_req = ofs;
1190 HA_RWLOCK_RDUNLOCK(DNS_LOCK, &ring->lock);
1191
1192
1193 HA_SPIN_UNLOCK(DNS_LOCK, &dss->lock);
1194 return t;
1195}
1196
1197/*
1198 * Task used to consume response
1199 * Note: upper layer callback is called
1200 */
Willy Tarreau144f84a2021-03-02 16:09:26 +01001201static struct task *dns_process_rsp(struct task *t, void *context, unsigned int state)
Emeric Brunfd647d52021-02-12 20:03:38 +01001202{
1203 struct dns_nameserver *ns = (struct dns_nameserver *)context;
1204
1205 ns->process_responses(ns);
1206
1207 return t;
1208}
1209
1210/* Function used to initialize an TCP nameserver */
1211int dns_stream_init(struct dns_nameserver *ns, struct server *srv)
1212{
1213 struct dns_stream_server *dss = NULL;
1214
1215 dss = calloc(1, sizeof(*dss));
1216 if (!dss) {
1217 ha_alert("memory allocation error initializing dns tcp server '%s'.\n", srv->id);
1218 goto out;
1219 }
1220
1221 dss->srv = srv;
1222 dss->maxconn = srv->maxconn;
1223
1224 dss->ofs_req = ~0; /* init ring offset */
1225 dss->ring_req = ring_new(2*DNS_TCP_MSG_RING_MAX_SIZE);
1226 if (!dss->ring_req) {
1227 ha_alert("memory allocation error initializing the ring for dns tcp server '%s'.\n", srv->id);
1228 goto out;
1229 }
1230 /* Create the task associated to the resolver target handling conns */
Willy Tarreaubeeabf52021-10-01 18:23:30 +02001231 if ((dss->task_req = task_new_anywhere()) == NULL) {
Emeric Brunfd647d52021-02-12 20:03:38 +01001232 ha_alert("memory allocation error initializing the ring for dns tcp server '%s'.\n", srv->id);
1233 goto out;
1234 }
1235
1236 /* Update task's parameters */
1237 dss->task_req->process = dns_process_req;
1238 dss->task_req->context = ns;
1239
1240 /* attach the task as reader */
1241 if (!ring_attach(dss->ring_req)) {
1242 /* mark server attached to the ring */
1243 ha_alert("server '%s': too many watchers for ring. this should never happen.\n", srv->id);
1244 goto out;
1245 }
1246
1247 /* Create the task associated to the resolver target handling conns */
Willy Tarreaubeeabf52021-10-01 18:23:30 +02001248 if ((dss->task_rsp = task_new_anywhere()) == NULL) {
Emeric Brunfd647d52021-02-12 20:03:38 +01001249 ha_alert("memory allocation error initializing the ring for dns tcp server '%s'.\n", srv->id);
1250 goto out;
1251 }
1252
1253 /* Update task's parameters */
1254 dss->task_rsp->process = dns_process_rsp;
1255 dss->task_rsp->context = ns;
1256
1257 /* Create the task associated to the resolver target handling conns */
Willy Tarreaubeeabf52021-10-01 18:23:30 +02001258 if ((dss->task_idle = task_new_anywhere()) == NULL) {
Emeric Brunfd647d52021-02-12 20:03:38 +01001259 ha_alert("memory allocation error initializing the ring for dns tcp server '%s'.\n", srv->id);
1260 goto out;
1261 }
1262
1263 /* Update task's parameters */
1264 dss->task_idle->process = dns_process_idle_exp;
1265 dss->task_idle->context = dss;
1266 dss->task_idle->expire = tick_add(now_ms, 5000);
1267
Ilya Shipitsin0de36ad2021-02-20 00:23:36 +05001268 /* let start the task to free idle conns immediately */
Emeric Brunfd647d52021-02-12 20:03:38 +01001269 task_queue(dss->task_idle);
1270
1271 LIST_INIT(&dss->free_sess);
1272 LIST_INIT(&dss->idle_sess);
1273 LIST_INIT(&dss->wait_sess);
1274 HA_SPIN_INIT(&dss->lock);
1275 ns->stream = dss;
1276 return 0;
1277out:
1278 if (dss && dss->task_rsp)
1279 task_destroy(dss->task_rsp);
1280 if (dss && dss->task_req)
1281 task_destroy(dss->task_req);
1282 if (dss && dss->ring_req)
1283 ring_free(dss->ring_req);
1284
1285 free(dss);
Emeric Brunc9437992021-02-12 19:42:55 +01001286 return -1;
Christopher Faulet67957bd2017-09-27 11:00:59 +02001287}
1288
Emeric Brunc9437992021-02-12 19:42:55 +01001289int init_dns_buffers()
Baptiste Assmann325137d2015-04-13 23:40:55 +02001290{
Emeric Brunc9437992021-02-12 19:42:55 +01001291 dns_msg_trash = malloc(DNS_TCP_MSG_MAX_SIZE);
1292 if (!dns_msg_trash)
1293 return 0;
Baptiste Assmann325137d2015-04-13 23:40:55 +02001294
Emeric Brunc9437992021-02-12 19:42:55 +01001295 return 1;
1296}
Baptiste Assmannc1ce5f32016-05-14 11:26:22 +02001297
Emeric Brunc9437992021-02-12 19:42:55 +01001298void deinit_dns_buffers()
1299{
Willy Tarreau61cfdf42021-02-20 10:46:51 +01001300 ha_free(&dns_msg_trash);
Emeric Brunc9437992021-02-12 19:42:55 +01001301}
Emeric Brund26a6232021-01-04 13:32:20 +01001302
1303REGISTER_PER_THREAD_ALLOC(init_dns_buffers);
1304REGISTER_PER_THREAD_FREE(deinit_dns_buffers);