blob: 083850d950c3b4685387e4128f8a5888c16ec005 [file] [log] [blame]
Baptiste Assmann325137d2015-04-13 23:40:55 +02001/*
2 * Name server resolution
3 *
Emeric Brunc9437992021-02-12 19:42:55 +01004 * Copyright 2020 Haproxy Technologies
Baptiste Assmann325137d2015-04-13 23:40:55 +02005 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 *
11 */
12
13#include <errno.h>
14#include <fcntl.h>
15#include <stdio.h>
16#include <stdlib.h>
17#include <string.h>
18#include <unistd.h>
19
20#include <sys/types.h>
21
Willy Tarreau122eba92020-06-04 10:15:32 +020022#include <haproxy/action.h>
Willy Tarreau4c7e4b72020-05-27 12:58:42 +020023#include <haproxy/api.h>
Willy Tarreau6be78492020-06-05 00:00:29 +020024#include <haproxy/cfgparse.h>
Willy Tarreauf1d32c42020-06-04 21:07:02 +020025#include <haproxy/channel.h>
Willy Tarreaub2551052020-06-09 09:07:15 +020026#include <haproxy/check.h>
Willy Tarreau83487a82020-06-04 20:19:54 +020027#include <haproxy/cli.h>
Willy Tarreau7c18b542020-06-11 09:23:02 +020028#include <haproxy/dgram.h>
Willy Tarreaueb92deb2020-06-04 10:53:16 +020029#include <haproxy/dns.h>
Willy Tarreau8d366972020-05-27 16:10:29 +020030#include <haproxy/errors.h>
Willy Tarreaub2551052020-06-09 09:07:15 +020031#include <haproxy/fd.h>
Willy Tarreauaeed4a82020-06-04 22:01:04 +020032#include <haproxy/log.h>
Emeric Brund26a6232021-01-04 13:32:20 +010033#include <haproxy/ring.h>
Emeric Brunfd647d52021-02-12 20:03:38 +010034#include <haproxy/stream.h>
35#include <haproxy/stream_interface.h>
Willy Tarreau9f9e9fc2021-05-08 13:09:46 +020036#include <haproxy/tools.h>
Baptiste Assmann325137d2015-04-13 23:40:55 +020037
Emeric Brund26a6232021-01-04 13:32:20 +010038static THREAD_LOCAL char *dns_msg_trash;
Baptiste Assmann325137d2015-04-13 23:40:55 +020039
Emeric Brunfd647d52021-02-12 20:03:38 +010040DECLARE_STATIC_POOL(dns_session_pool, "dns_session", sizeof(struct dns_session));
41DECLARE_STATIC_POOL(dns_query_pool, "dns_query", sizeof(struct dns_query));
42DECLARE_STATIC_POOL(dns_msg_buf, "dns_msg_buf", DNS_TCP_MSG_RING_MAX_SIZE);
43
Christopher Faulet67957bd2017-09-27 11:00:59 +020044/* Opens an UDP socket on the namesaver's IP/Port, if required. Returns 0 on
Christopher Faulet1e711be2021-03-04 16:58:35 +010045 * success, -1 otherwise. ns->dgram must be defined.
Baptiste Assmann325137d2015-04-13 23:40:55 +020046 */
Emeric Brund26a6232021-01-04 13:32:20 +010047static int dns_connect_nameserver(struct dns_nameserver *ns)
Baptiste Assmann325137d2015-04-13 23:40:55 +020048{
Christopher Faulet1e711be2021-03-04 16:58:35 +010049 struct dgram_conn *dgram = &ns->dgram->conn;
50 int fd;
Baptiste Assmann325137d2015-04-13 23:40:55 +020051
Christopher Faulet1e711be2021-03-04 16:58:35 +010052 /* Already connected */
53 if (dgram->t.sock.fd != -1)
Emeric Brun526b7922021-02-15 14:28:27 +010054 return 0;
Christopher Faulet1e711be2021-03-04 16:58:35 +010055
56 /* Create an UDP socket and connect it on the nameserver's IP/Port */
57 if ((fd = socket(dgram->addr.to.ss_family, SOCK_DGRAM, IPPROTO_UDP)) == -1) {
58 send_log(NULL, LOG_WARNING,
59 "DNS : section '%s': can't create socket for nameserver '%s'.\n",
60 ns->counters->pid, ns->id);
61 return -1;
62 }
63 if (connect(fd, (struct sockaddr*)&dgram->addr.to, get_addr_len(&dgram->addr.to)) == -1) {
64 send_log(NULL, LOG_WARNING,
65 "DNS : section '%s': can't connect socket for nameserver '%s'.\n",
66 ns->counters->id, ns->id);
67 close(fd);
68 return -1;
Emeric Brunc9437992021-02-12 19:42:55 +010069 }
Emeric Brun526b7922021-02-15 14:28:27 +010070
Christopher Faulet1e711be2021-03-04 16:58:35 +010071 /* Make the socket non blocking */
72 fcntl(fd, F_SETFL, O_NONBLOCK);
73
74 /* Add the fd in the fd list and update its parameters */
75 dgram->t.sock.fd = fd;
76 fd_insert(fd, dgram, dgram_fd_handler, MAX_THREADS_MASK);
77 fd_want_recv(fd);
78 return 0;
Baptiste Assmann325137d2015-04-13 23:40:55 +020079}
80
Emeric Brund26a6232021-01-04 13:32:20 +010081/* Sends a message to a name server
82 * It returns message length on success
83 * or -1 in error case
84 * 0 is returned in case of output ring buffer is full
85 */
86int dns_send_nameserver(struct dns_nameserver *ns, void *buf, size_t len)
87{
88 int ret = -1;
89
90 if (ns->dgram) {
91 struct dgram_conn *dgram = &ns->dgram->conn;
92 int fd = dgram->t.sock.fd;
93
94 if (dgram->t.sock.fd == -1) {
95 if (dns_connect_nameserver(ns) == -1)
96 return -1;
97 fd = dgram->t.sock.fd;
98 }
99
100 ret = send(fd, buf, len, 0);
101 if (ret < 0) {
102 if (errno == EAGAIN) {
103 struct ist myist;
104
Tim Duesterhus92c696e2021-02-28 16:11:36 +0100105 myist = ist2(buf, len);
Emeric Brund26a6232021-01-04 13:32:20 +0100106 ret = ring_write(ns->dgram->ring_req, DNS_TCP_MSG_MAX_SIZE, NULL, 0, &myist, 1);
107 if (!ret) {
108 ns->counters->snd_error++;
109 return -1;
110 }
111 fd_cant_send(fd);
112 return ret;
113 }
114 ns->counters->snd_error++;
115 fd_delete(fd);
Emeric Brund26a6232021-01-04 13:32:20 +0100116 dgram->t.sock.fd = -1;
117 return -1;
118 }
119 ns->counters->sent++;
120 }
Emeric Brunfd647d52021-02-12 20:03:38 +0100121 else if (ns->stream) {
122 struct ist myist;
123
Tim Duesterhus92c696e2021-02-28 16:11:36 +0100124 myist = ist2(buf, len);
Emeric Brunfd647d52021-02-12 20:03:38 +0100125 ret = ring_write(ns->stream->ring_req, DNS_TCP_MSG_MAX_SIZE, NULL, 0, &myist, 1);
126 if (!ret) {
127 ns->counters->snd_error++;
128 return -1;
129 }
130 task_wakeup(ns->stream->task_req, TASK_WOKEN_MSG);
131 return ret;
132 }
Emeric Brund26a6232021-01-04 13:32:20 +0100133
134 return ret;
135}
136
Emeric Brunfd647d52021-02-12 20:03:38 +0100137void dns_session_free(struct dns_session *);
138
Emeric Brund26a6232021-01-04 13:32:20 +0100139/* Receives a dns message
140 * Returns message length
141 * 0 is returned if no more message available
142 * -1 in error case
143 */
144ssize_t dns_recv_nameserver(struct dns_nameserver *ns, void *data, size_t size)
145{
146 ssize_t ret = -1;
147
148 if (ns->dgram) {
149 struct dgram_conn *dgram = &ns->dgram->conn;
150 int fd = dgram->t.sock.fd;
151
152 if (fd == -1)
153 return -1;
154
155 if ((ret = recv(fd, data, size, 0)) < 0) {
156 if (errno == EAGAIN) {
157 fd_cant_recv(fd);
158 return 0;
159 }
160 fd_delete(fd);
Emeric Brund26a6232021-01-04 13:32:20 +0100161 dgram->t.sock.fd = -1;
162 return -1;
163 }
164 }
Emeric Brunfd647d52021-02-12 20:03:38 +0100165 else if (ns->stream) {
166 struct dns_stream_server *dss = ns->stream;
167 struct dns_session *ds;
168
169 HA_SPIN_LOCK(DNS_LOCK, &dss->lock);
170
171 if (!LIST_ISEMPTY(&dss->wait_sess)) {
172 ds = LIST_NEXT(&dss->wait_sess, struct dns_session *, waiter);
173 fprintf(stderr, "ds: %p\n", ds);
174 ret = ds->rx_msg.len < size ? ds->rx_msg.len : size;
175 memcpy(data, ds->rx_msg.area, ret);
176
177 ds->rx_msg.len = 0;
178
179 /* This barrier is here to ensure that all data is
180 * stored if the appctx detect the elem is out of the list */
181 __ha_barrier_store();
182
183 LIST_DEL_INIT(&ds->waiter);
184
185 if (ds->appctx) {
186 /* This second barrier is here to ensure that
187 * the waked up appctx won't miss that the
188 * elem is removed from the list */
189 __ha_barrier_store();
190
Ilya Shipitsin0de36ad2021-02-20 00:23:36 +0500191 /* awake appctx because it may have other
Emeric Brunfd647d52021-02-12 20:03:38 +0100192 * message to receive
193 */
194 appctx_wakeup(ds->appctx);
195
196 /* dns_session could already be into free_sess list
197 * so we firstly remove it */
198 LIST_DEL_INIT(&ds->list);
199
200 /* decrease nb_queries to free a slot for a new query on that sess */
201 ds->nb_queries--;
202 if (ds->nb_queries) {
203 /* it remains pipelined unanswered request
204 * into this session but we just decrease
205 * the counter so the session
206 * can not be full of pipelined requests
207 * so we can add if to free_sess list
208 * to receive a new request
209 */
Willy Tarreau2b718102021-04-21 07:32:39 +0200210 LIST_INSERT(&ds->dss->free_sess, &ds->list);
Emeric Brunfd647d52021-02-12 20:03:38 +0100211 }
212 else {
213 /* there is no more pipelined requests
214 * into this session, so we move it
215 * to idle_sess list */
Willy Tarreau2b718102021-04-21 07:32:39 +0200216 LIST_INSERT(&ds->dss->idle_sess, &ds->list);
Emeric Brunfd647d52021-02-12 20:03:38 +0100217
218 /* update the counter of idle sessions */
219 ds->dss->idle_conns++;
220
221 /* Note: this is useless there to update
222 * the max_active_conns since we increase
223 * the idle count */
224 }
225 }
226 else {
227 /* there is no more appctx for this session
228 * it means it is ready to die
229 */
230 dns_session_free(ds);
231 }
232
233
234 }
235
236 HA_SPIN_UNLOCK(DNS_LOCK, &dss->lock);
237 }
Emeric Brund26a6232021-01-04 13:32:20 +0100238
239 return ret;
240}
241
242static void dns_resolve_recv(struct dgram_conn *dgram)
243{
244 struct dns_nameserver *ns;
245 int fd;
246
247 fd = dgram->t.sock.fd;
248
249 /* check if ready for reading */
250 if (!fd_recv_ready(fd))
251 return;
252
253 /* no need to go further if we can't retrieve the nameserver */
254 if ((ns = dgram->owner) == NULL) {
Willy Tarreauf5090652021-04-06 17:23:40 +0200255 _HA_ATOMIC_AND(&fdtab[fd].state, ~(FD_POLL_HUP|FD_POLL_ERR));
Emeric Brund26a6232021-01-04 13:32:20 +0100256 fd_stop_recv(fd);
257 return;
258 }
259
260 ns->process_responses(ns);
261}
262
263/* Called when a dns network socket is ready to send data */
264static void dns_resolve_send(struct dgram_conn *dgram)
265{
266 int fd;
267 struct dns_nameserver *ns;
268 struct ring *ring;
269 struct buffer *buf;
270 uint64_t msg_len;
271 size_t len, cnt, ofs;
272
273 fd = dgram->t.sock.fd;
274
275 /* check if ready for sending */
276 if (!fd_send_ready(fd))
277 return;
278
279 /* no need to go further if we can't retrieve the nameserver */
280 if ((ns = dgram->owner) == NULL) {
Willy Tarreauf5090652021-04-06 17:23:40 +0200281 _HA_ATOMIC_AND(&fdtab[fd].state, ~(FD_POLL_HUP|FD_POLL_ERR));
Emeric Brund26a6232021-01-04 13:32:20 +0100282 fd_stop_send(fd);
283 return;
284 }
285
286 ring = ns->dgram->ring_req;
287 buf = &ring->buf;
288
289 HA_RWLOCK_RDLOCK(DNS_LOCK, &ring->lock);
290 ofs = ns->dgram->ofs_req;
291
292 /* explanation for the initialization below: it would be better to do
293 * this in the parsing function but this would occasionally result in
294 * dropped events because we'd take a reference on the oldest message
295 * and keep it while being scheduled. Thus instead let's take it the
296 * first time we enter here so that we have a chance to pass many
297 * existing messages before grabbing a reference to a location. This
298 * value cannot be produced after initialization.
299 */
300 if (unlikely(ofs == ~0)) {
301 ofs = 0;
Willy Tarreau4781b152021-04-06 13:53:36 +0200302 HA_ATOMIC_INC(b_peek(buf, ofs));
Emeric Brund26a6232021-01-04 13:32:20 +0100303 ofs += ring->ofs;
304 }
305
306 /* we were already there, adjust the offset to be relative to
307 * the buffer's head and remove us from the counter.
308 */
309 ofs -= ring->ofs;
310 BUG_ON(ofs >= buf->size);
Willy Tarreau4781b152021-04-06 13:53:36 +0200311 HA_ATOMIC_DEC(b_peek(buf, ofs));
Emeric Brund26a6232021-01-04 13:32:20 +0100312
313 while (ofs + 1 < b_data(buf)) {
314 int ret;
315
316 cnt = 1;
317 len = b_peek_varint(buf, ofs + cnt, &msg_len);
318 if (!len)
319 break;
320 cnt += len;
321 BUG_ON(msg_len + ofs + cnt + 1 > b_data(buf));
322 if (unlikely(msg_len > DNS_TCP_MSG_MAX_SIZE)) {
323 /* too large a message to ever fit, let's skip it */
324 ofs += cnt + msg_len;
325 continue;
326 }
327
328 len = b_getblk(buf, dns_msg_trash, msg_len, ofs + cnt);
329
330 ret = send(fd, dns_msg_trash, len, 0);
331 if (ret < 0) {
332 if (errno == EAGAIN) {
333 fd_cant_send(fd);
334 goto out;
335 }
336 ns->counters->snd_error++;
337 fd_delete(fd);
Emeric Brund26a6232021-01-04 13:32:20 +0100338 fd = dgram->t.sock.fd = -1;
339 goto out;
340 }
341 ns->counters->sent++;
342
343 ofs += cnt + len;
344 }
345
346 /* we don't want/need to be waked up any more for sending
347 * because all ring content is sent */
348 fd_stop_send(fd);
349
350out:
351
Willy Tarreau4781b152021-04-06 13:53:36 +0200352 HA_ATOMIC_INC(b_peek(buf, ofs));
Emeric Brund26a6232021-01-04 13:32:20 +0100353 ofs += ring->ofs;
354 ns->dgram->ofs_req = ofs;
355 HA_RWLOCK_RDUNLOCK(DNS_LOCK, &ring->lock);
356
357}
358
Emeric Brunc9437992021-02-12 19:42:55 +0100359/* proto_udp callback functions for a DNS resolution */
360struct dgram_data_cb dns_dgram_cb = {
361 .recv = dns_resolve_recv,
362 .send = dns_resolve_send,
363};
Baptiste Assmann325137d2015-04-13 23:40:55 +0200364
Emeric Brunc9437992021-02-12 19:42:55 +0100365int dns_dgram_init(struct dns_nameserver *ns, struct sockaddr_storage *sk)
Baptiste Assmann325137d2015-04-13 23:40:55 +0200366{
Emeric Brunc9437992021-02-12 19:42:55 +0100367 struct dns_dgram_server *dgram;
Baptiste Assmann201c07f2017-05-22 15:17:15 +0200368
Emeric Brunc9437992021-02-12 19:42:55 +0100369 if ((dgram = calloc(1, sizeof(*dgram))) == NULL)
Christopher Faulet67957bd2017-09-27 11:00:59 +0200370 return -1;
Baptiste Assmannc1ce5f32016-05-14 11:26:22 +0200371
Emeric Brunc9437992021-02-12 19:42:55 +0100372 /* Leave dgram partially initialized, no FD attached for
373 * now. */
374 dgram->conn.owner = ns;
375 dgram->conn.data = &dns_dgram_cb;
376 dgram->conn.t.sock.fd = -1;
377 dgram->conn.addr.to = *sk;
378 ns->dgram = dgram;
Baptiste Assmannc1ce5f32016-05-14 11:26:22 +0200379
Emeric Brunc9437992021-02-12 19:42:55 +0100380 dgram->ofs_req = ~0; /* init ring offset */
381 dgram->ring_req = ring_new(2*DNS_TCP_MSG_RING_MAX_SIZE);
382 if (!dgram->ring_req) {
383 ha_alert("memory allocation error initializing the ring for nameserver.\n");
384 goto out;
Baptiste Assmannc1ce5f32016-05-14 11:26:22 +0200385 }
386
Emeric Brunc9437992021-02-12 19:42:55 +0100387 /* attach the task as reader */
388 if (!ring_attach(dgram->ring_req)) {
389 /* mark server attached to the ring */
390 ha_alert("nameserver sets too many watchers > 255 on ring. This is a bug and should not happen.\n");
391 goto out;
Baptiste Assmannc1ce5f32016-05-14 11:26:22 +0200392 }
Baptiste Assmannc1ce5f32016-05-14 11:26:22 +0200393 return 0;
Emeric Brunc9437992021-02-12 19:42:55 +0100394out:
395 if (dgram->ring_req)
396 ring_free(dgram->ring_req);
Christopher Fauletd6c6b5f2020-09-08 10:27:24 +0200397
Emeric Brunc9437992021-02-12 19:42:55 +0100398 free(dgram);
Olivier Houchard2ec2db92018-01-08 16:28:57 +0100399
Emeric Brunfd647d52021-02-12 20:03:38 +0100400 return -1;
401}
402
403/*
404 * IO Handler to handle message push to dns tcp server
405 */
406static void dns_session_io_handler(struct appctx *appctx)
407{
408 struct stream_interface *si = appctx->owner;
409 struct dns_session *ds = appctx->ctx.sft.ptr;
410 struct ring *ring = &ds->ring;
411 struct buffer *buf = &ring->buf;
412 uint64_t msg_len;
413 int available_room;
414 size_t len, cnt, ofs;
415 int ret = 0;
416
417 /* if stopping was requested, close immediately */
418 if (unlikely(stopping))
419 goto close;
420
421 /* we want to be sure to not miss that we have been awaked for a shutdown */
422 __ha_barrier_load();
423
424 /* that means the connection was requested to shutdown
425 * for instance idle expire */
426 if (ds->shutdown)
427 goto close;
428
429 /* an error was detected */
430 if (unlikely(si_ic(si)->flags & (CF_WRITE_ERROR|CF_SHUTW)))
431 goto close;
432
433 /* con closed by server side, we will skip data write and drain data from channel */
434 if ((si_oc(si)->flags & CF_SHUTW)) {
435 goto read;
436 }
437
438 /* if the connection is not established, inform the stream that we want
439 * to be notified whenever the connection completes.
440 */
441 if (si_opposite(si)->state < SI_ST_EST) {
442 si_cant_get(si);
443 si_rx_conn_blk(si);
444 si_rx_endp_more(si);
445 return;
446 }
447
448
449 ofs = ds->ofs;
450
451 HA_RWLOCK_WRLOCK(DNS_LOCK, &ring->lock);
452 LIST_DEL_INIT(&appctx->wait_entry);
453 HA_RWLOCK_WRUNLOCK(DNS_LOCK, &ring->lock);
454
455 HA_RWLOCK_RDLOCK(DNS_LOCK, &ring->lock);
456
457 /* explanation for the initialization below: it would be better to do
458 * this in the parsing function but this would occasionally result in
459 * dropped events because we'd take a reference on the oldest message
460 * and keep it while being scheduled. Thus instead let's take it the
461 * first time we enter here so that we have a chance to pass many
462 * existing messages before grabbing a reference to a location. This
463 * value cannot be produced after initialization.
464 */
465 if (unlikely(ofs == ~0)) {
466 ofs = 0;
467
Willy Tarreau4781b152021-04-06 13:53:36 +0200468 HA_ATOMIC_INC(b_peek(buf, ofs));
Emeric Brunfd647d52021-02-12 20:03:38 +0100469 ofs += ring->ofs;
470 }
471
472 /* in this loop, ofs always points to the counter byte that precedes
473 * the message so that we can take our reference there if we have to
474 * stop before the end (ret=0).
475 */
476 if (si_opposite(si)->state == SI_ST_EST) {
477 /* we were already there, adjust the offset to be relative to
478 * the buffer's head and remove us from the counter.
479 */
480 ofs -= ring->ofs;
481 BUG_ON(ofs >= buf->size);
Willy Tarreau4781b152021-04-06 13:53:36 +0200482 HA_ATOMIC_DEC(b_peek(buf, ofs));
Emeric Brunfd647d52021-02-12 20:03:38 +0100483
484 ret = 1;
485 while (ofs + 1 < b_data(buf)) {
486 struct dns_query *query;
487 uint16_t original_qid;
488 uint16_t new_qid;
489
490 cnt = 1;
491 len = b_peek_varint(buf, ofs + cnt, &msg_len);
492 if (!len)
493 break;
494 cnt += len;
495 BUG_ON(msg_len + ofs + cnt + 1 > b_data(buf));
496
497 /* retrieve available room on output channel */
498 available_room = channel_recv_max(si_ic(si));
499
500 /* tx_msg_offset null means we are at the start of a new message */
501 if (!ds->tx_msg_offset) {
502 uint16_t slen;
503
504 /* check if there is enough room to put message len and query id */
505 if (available_room < sizeof(slen) + sizeof(new_qid)) {
506 si_rx_room_blk(si);
507 ret = 0;
508 break;
509 }
510
511 /* put msg len into then channel */
512 slen = (uint16_t)msg_len;
513 slen = htons(slen);
514 ci_putblk(si_ic(si), (char *)&slen, sizeof(slen));
515 available_room -= sizeof(slen);
516
517 /* backup original query id */
518 len = b_getblk(buf, (char *)&original_qid, sizeof(original_qid), ofs + cnt);
Emeric Brun538bb042021-02-15 13:58:06 +0100519 if (!len) {
520 /* should never happen since messages are atomically
521 * written into ring
522 */
523 ret = 0;
524 break;
525 }
Emeric Brunfd647d52021-02-12 20:03:38 +0100526
527 /* generates new query id */
528 new_qid = ++ds->query_counter;
529 new_qid = htons(new_qid);
530
531 /* put new query id into the channel */
532 ci_putblk(si_ic(si), (char *)&new_qid, sizeof(new_qid));
533 available_room -= sizeof(new_qid);
534
535 /* keep query id mapping */
536
537 query = pool_alloc(dns_query_pool);
538 if (query) {
539 query->qid.key = new_qid;
540 query->original_qid = original_qid;
541 query->expire = tick_add(now_ms, 5000);
542 LIST_INIT(&query->list);
543 if (LIST_ISEMPTY(&ds->queries)) {
544 /* enable task to handle expire */
545 ds->task_exp->expire = query->expire;
546 /* ensure this will be executed by the same
547 * thread than ds_session_release
548 * to ensure session_release is free
549 * to destroy the task */
550 task_queue(ds->task_exp);
551 }
Willy Tarreau2b718102021-04-21 07:32:39 +0200552 LIST_APPEND(&ds->queries, &query->list);
Emeric Brunfd647d52021-02-12 20:03:38 +0100553 eb32_insert(&ds->query_ids, &query->qid);
554 ds->onfly_queries++;
555 }
556
557 /* update the tx_offset to handle output in 16k streams */
558 ds->tx_msg_offset = sizeof(original_qid);
559
560 }
561
562 /* check if it remains available room on output chan */
563 if (unlikely(!available_room)) {
564 si_rx_room_blk(si);
565 ret = 0;
566 break;
567 }
568
569 chunk_reset(&trash);
570 if ((msg_len - ds->tx_msg_offset) > available_room) {
571 /* remaining msg data is too large to be written in output channel at one time */
572
573 len = b_getblk(buf, trash.area, available_room, ofs + cnt + ds->tx_msg_offset);
574
575 /* update offset to complete mesg forwarding later */
576 ds->tx_msg_offset += len;
577 }
578 else {
579 /* remaining msg data can be written in output channel at one time */
580 len = b_getblk(buf, trash.area, msg_len - ds->tx_msg_offset, ofs + cnt + ds->tx_msg_offset);
581
582 /* reset tx_msg_offset to mark forward fully processed */
583 ds->tx_msg_offset = 0;
584 }
585 trash.data += len;
586
Emeric Brun743afee2021-02-15 14:12:06 +0100587 if (ci_putchk(si_ic(si), &trash) == -1) {
588 /* should never happen since we
589 * check available_room is large
Ilya Shipitsin0de36ad2021-02-20 00:23:36 +0500590 * enough here.
Emeric Brun743afee2021-02-15 14:12:06 +0100591 */
592 si_rx_room_blk(si);
593 ret = 0;
594 break;
595 }
Emeric Brunfd647d52021-02-12 20:03:38 +0100596
597 if (ds->tx_msg_offset) {
598 /* msg was not fully processed, we must be awake to drain pending data */
599
600 si_rx_room_blk(si);
601 ret = 0;
602 break;
603 }
604 /* switch to next message */
605 ofs += cnt + msg_len;
606 }
607
Willy Tarreau4781b152021-04-06 13:53:36 +0200608 HA_ATOMIC_INC(b_peek(buf, ofs));
Emeric Brunfd647d52021-02-12 20:03:38 +0100609 ofs += ring->ofs;
610 ds->ofs = ofs;
611 }
612 HA_RWLOCK_RDUNLOCK(DNS_LOCK, &ring->lock);
613
614 if (ret) {
615 /* let's be woken up once new request to write arrived */
616 HA_RWLOCK_WRLOCK(DNS_LOCK, &ring->lock);
Willy Tarreau2b718102021-04-21 07:32:39 +0200617 LIST_APPEND(&ring->waiters, &appctx->wait_entry);
Emeric Brunfd647d52021-02-12 20:03:38 +0100618 HA_RWLOCK_WRUNLOCK(DNS_LOCK, &ring->lock);
619 si_rx_endp_done(si);
620 }
621
622read:
623
Ilya Shipitsin0de36ad2021-02-20 00:23:36 +0500624 /* if session is not a waiter it means there is no committed
Emeric Brunfd647d52021-02-12 20:03:38 +0100625 * message into rx_buf and we are free to use it
626 * Note: we need a load barrier here to not miss the
627 * delete from the list
628 */
629 __ha_barrier_load();
Willy Tarreau2b718102021-04-21 07:32:39 +0200630 if (!LIST_INLIST(&ds->waiter)) {
Emeric Brunfd647d52021-02-12 20:03:38 +0100631 while (1) {
632 uint16_t query_id;
633 struct eb32_node *eb;
634 struct dns_query *query;
635
636 if (!ds->rx_msg.len) {
637 /* next message len is not fully available into the channel */
638 if (co_data(si_oc(si)) < 2)
639 break;
640
641 /* retrieve message len */
642 co_getblk(si_oc(si), (char *)&msg_len, 2, 0);
643
644 /* mark as consumed */
645 co_skip(si_oc(si), 2);
646
647 /* store message len */
648 ds->rx_msg.len = ntohs(msg_len);
649 }
650
651 if (!co_data(si_oc(si))) {
652 /* we need more data but nothing is available */
653 break;
654 }
655
656 if (co_data(si_oc(si)) + ds->rx_msg.offset < ds->rx_msg.len) {
657 /* message only partially available */
658
659 /* read available data */
660 co_getblk(si_oc(si), ds->rx_msg.area + ds->rx_msg.offset, co_data(si_oc(si)), 0);
661
662 /* update message offset */
663 ds->rx_msg.offset += co_data(si_oc(si));
664
665 /* consume all pending data from the channel */
666 co_skip(si_oc(si), co_data(si_oc(si)));
667
668 /* we need to wait for more data */
669 break;
670 }
671
Ilya Shipitsin0de36ad2021-02-20 00:23:36 +0500672 /* enough data is available into the channel to read the message until the end */
Emeric Brunfd647d52021-02-12 20:03:38 +0100673
674 /* read from the channel until the end of the message */
675 co_getblk(si_oc(si), ds->rx_msg.area + ds->rx_msg.offset, ds->rx_msg.len - ds->rx_msg.offset, 0);
676
677 /* consume all data until the end of the message from the channel */
678 co_skip(si_oc(si), ds->rx_msg.len - ds->rx_msg.offset);
679
680 /* reset reader offset to 0 for next message reand */
681 ds->rx_msg.offset = 0;
682
683 /* try remap query id to original */
684 memcpy(&query_id, ds->rx_msg.area, sizeof(query_id));
685 eb = eb32_lookup(&ds->query_ids, query_id);
686 if (!eb) {
687 /* query id not found means we have an unknown corresponding
688 * request, perhaps server's bug or or the query reached
689 * timeout
690 */
691 ds->rx_msg.len = 0;
692 continue;
693 }
694
695 /* re-map the original query id set by the requester */
696 query = eb32_entry(eb, struct dns_query, qid);
697 memcpy(ds->rx_msg.area, &query->original_qid, sizeof(query->original_qid));
698
699 /* remove query ids mapping from pending queries list/tree */
700 eb32_delete(&query->qid);
Willy Tarreau2b718102021-04-21 07:32:39 +0200701 LIST_DELETE(&query->list);
Emeric Brunfd647d52021-02-12 20:03:38 +0100702 pool_free(dns_query_pool, query);
703 ds->onfly_queries--;
704
705 /* lock the dns_stream_server containing lists heads */
706 HA_SPIN_LOCK(DNS_LOCK, &ds->dss->lock);
707
708 /* the dns_session is also added in queue of the
709 * wait_sess list where the task processing
710 * response will pop available responses
711 */
Willy Tarreau2b718102021-04-21 07:32:39 +0200712 LIST_APPEND(&ds->dss->wait_sess, &ds->waiter);
Emeric Brunfd647d52021-02-12 20:03:38 +0100713
714 /* lock the dns_stream_server containing lists heads */
715 HA_SPIN_UNLOCK(DNS_LOCK, &ds->dss->lock);
716
717 /* awake the task processing the responses */
718 task_wakeup(ds->dss->task_rsp, TASK_WOKEN_INIT);
719
720 break;
721 }
722
Willy Tarreau2b718102021-04-21 07:32:39 +0200723 if (!LIST_INLIST(&ds->waiter)) {
Emeric Brunfd647d52021-02-12 20:03:38 +0100724 /* there is no more pending data to read and the con was closed by the server side */
725 if (!co_data(si_oc(si)) && (si_oc(si)->flags & CF_SHUTW)) {
726 goto close;
727 }
728 }
729
730 }
731
732
733 return;
734close:
735 si_shutw(si);
736 si_shutr(si);
737 si_ic(si)->flags |= CF_READ_NULL;
738}
739
740void dns_queries_flush(struct dns_session *ds)
741{
742 struct dns_query *query, *queryb;
743
744 list_for_each_entry_safe(query, queryb, &ds->queries, list) {
745 eb32_delete(&query->qid);
Willy Tarreau2b718102021-04-21 07:32:39 +0200746 LIST_DELETE(&query->list);
Emeric Brunfd647d52021-02-12 20:03:38 +0100747 pool_free(dns_query_pool, query);
748 }
749}
750
751void dns_session_free(struct dns_session *ds)
752{
753 if (ds->rx_msg.area)
754 pool_free(dns_msg_buf, ds->rx_msg.area);
755 if (ds->tx_ring_area)
756 pool_free(dns_msg_buf, ds->tx_ring_area);
757 if (ds->task_exp)
758 task_destroy(ds->task_exp);
759
760 dns_queries_flush(ds);
761
762 ds->dss->cur_conns--;
763 /* Note: this is useless to update
764 * max_active_conns here because
765 * we decrease the value
766 */
767 pool_free(dns_session_pool, ds);
768}
769
770static struct appctx *dns_session_create(struct dns_session *ds);
771
772/*
773 * Function to release a DNS tcp session
774 */
775static void dns_session_release(struct appctx *appctx)
776{
777 struct dns_session *ds = appctx->ctx.sft.ptr;
Willy Tarreaue3e648c2021-02-24 17:38:46 +0100778 struct dns_stream_server *dss __maybe_unused;
Emeric Brunfd647d52021-02-12 20:03:38 +0100779
780 if (!ds)
781 return;
782
783 dss = ds->dss;
784
785 HA_SPIN_LOCK(DNS_LOCK, &dss->lock);
786 LIST_DEL_INIT(&ds->list);
787
788 if (stopping) {
789 dns_session_free(ds);
790 HA_SPIN_UNLOCK(DNS_LOCK, &dss->lock);
791 return;
792 }
793
794 if (!ds->nb_queries) {
795 /* this is an idle session */
796 /* Note: this is useless to update max_active_sess
797 * here because we decrease idle_conns but
798 * dns_session_free decrease curconns
799 */
800
801 ds->dss->idle_conns--;
802 dns_session_free(ds);
803 HA_SPIN_UNLOCK(DNS_LOCK, &dss->lock);
804 return;
805 }
806
807 if (ds->onfly_queries == ds->nb_queries) {
808 /* the session can be released because
809 * it means that all queries AND
810 * responses are in fly */
811 dns_session_free(ds);
812 HA_SPIN_UNLOCK(DNS_LOCK, &dss->lock);
813 return;
814 }
815
816 /* We do not call ring_appctx_detach here
817 * because we want to keep readers counters
818 * to retry a con with a different appctx*/
819 HA_RWLOCK_WRLOCK(DNS_LOCK, &ds->ring.lock);
820 LIST_DEL_INIT(&appctx->wait_entry);
821 HA_RWLOCK_WRUNLOCK(DNS_LOCK, &ds->ring.lock);
822
823 /* if there is no pending complete response
824 * message, ensure to reset
825 * message offsets if the session
826 * was closed with an incomplete pending response
827 */
Willy Tarreau2b718102021-04-21 07:32:39 +0200828 if (!LIST_INLIST(&ds->waiter))
Emeric Brunfd647d52021-02-12 20:03:38 +0100829 ds->rx_msg.len = ds->rx_msg.offset = 0;
830
831 /* we flush pending sent queries because we never
832 * have responses
833 */
834 ds->nb_queries -= ds->onfly_queries;
835 dns_queries_flush(ds);
836
837 /* reset offset to be sure to start from message start */
838 ds->tx_msg_offset = 0;
839
840 /* here the ofs and the attached counter
841 * are kept unchanged
842 */
843
844 /* Create a new appctx, We hope we can
845 * create from the release callback! */
846 ds->appctx = dns_session_create(ds);
847 if (!ds->appctx) {
848 dns_session_free(ds);
849 HA_SPIN_UNLOCK(DNS_LOCK, &dss->lock);
850 return;
851 }
852
853 if (ds->nb_queries < DNS_STREAM_MAX_PIPELINED_REQ)
Willy Tarreau2b718102021-04-21 07:32:39 +0200854 LIST_INSERT(&ds->dss->free_sess, &ds->list);
Emeric Brunfd647d52021-02-12 20:03:38 +0100855
856 HA_SPIN_UNLOCK(DNS_LOCK, &dss->lock);
857}
858
859/* DNS tcp session applet */
860static struct applet dns_session_applet = {
861 .obj_type = OBJ_TYPE_APPLET,
862 .name = "<STRMDNS>", /* used for logging */
863 .fct = dns_session_io_handler,
864 .release = dns_session_release,
865};
866
867/*
868 * Function used to create an appctx for a DNS session
869 */
870static struct appctx *dns_session_create(struct dns_session *ds)
871{
872 struct appctx *appctx;
873 struct session *sess;
874 struct stream *s;
875 struct applet *applet = &dns_session_applet;
876
877 appctx = appctx_new(applet, tid_bit);
878 if (!appctx)
879 goto out_close;
880
881 appctx->ctx.sft.ptr = (void *)ds;
882
883 sess = session_new(ds->dss->srv->proxy, NULL, &appctx->obj_type);
884 if (!sess) {
885 ha_alert("out of memory in peer_session_create().\n");
886 goto out_free_appctx;
887 }
888
889 if ((s = stream_new(sess, &appctx->obj_type, &BUF_NULL)) == NULL) {
890 ha_alert("Failed to initialize stream in peer_session_create().\n");
891 goto out_free_sess;
892 }
893
894
895 s->target = &ds->dss->srv->obj_type;
896 if (!sockaddr_alloc(&s->target_addr, &ds->dss->srv->addr, sizeof(ds->dss->srv->addr)))
897 goto out_free_strm;
898 s->flags = SF_ASSIGNED|SF_ADDR_SET;
899 s->si[1].flags |= SI_FL_NOLINGER;
900
901 s->do_log = NULL;
902 s->uniq_id = 0;
903
904 s->res.flags |= CF_READ_DONTWAIT;
905 /* for rto and rex to eternity to not expire on idle recv:
906 * We are using a syslog server.
907 */
908 s->res.rto = TICK_ETERNITY;
909 s->res.rex = TICK_ETERNITY;
910 ds->appctx = appctx;
911 task_wakeup(s->task, TASK_WOKEN_INIT);
912 return appctx;
913
914 /* Error unrolling */
915 out_free_strm:
Willy Tarreau2b718102021-04-21 07:32:39 +0200916 LIST_DELETE(&s->list);
Emeric Brunfd647d52021-02-12 20:03:38 +0100917 pool_free(pool_head_stream, s);
918 out_free_sess:
919 session_free(sess);
920 out_free_appctx:
921 appctx_free(appctx);
922 out_close:
923 return NULL;
924}
925
926/* Task processing expiration of unresponded queries, this one is supposed
927 * to be stuck on the same thread than the appctx handler
928 */
Willy Tarreau144f84a2021-03-02 16:09:26 +0100929static struct task *dns_process_query_exp(struct task *t, void *context, unsigned int state)
Emeric Brunfd647d52021-02-12 20:03:38 +0100930{
931 struct dns_session *ds = (struct dns_session *)context;
932 struct dns_query *query, *queryb;
933
934 t->expire = TICK_ETERNITY;
935
936 list_for_each_entry_safe(query, queryb, &ds->queries, list) {
937 if (tick_is_expired(query->expire, now_ms)) {
938 eb32_delete(&query->qid);
Willy Tarreau2b718102021-04-21 07:32:39 +0200939 LIST_DELETE(&query->list);
Emeric Brunfd647d52021-02-12 20:03:38 +0100940 pool_free(dns_query_pool, query);
941 ds->onfly_queries--;
942 }
943 else {
944 t->expire = query->expire;
945 break;
946 }
947 }
948
949 return t;
950}
951
952/* Task processing expiration of idle sessions */
Willy Tarreau144f84a2021-03-02 16:09:26 +0100953static struct task *dns_process_idle_exp(struct task *t, void *context, unsigned int state)
Emeric Brunfd647d52021-02-12 20:03:38 +0100954{
955 struct dns_stream_server *dss = (struct dns_stream_server *)context;
956 struct dns_session *ds, *dsb;
957 int target = 0;
958 int cur_active_conns;
959
960 HA_SPIN_LOCK(DNS_LOCK, &dss->lock);
961
962
963 cur_active_conns = dss->cur_conns - dss->idle_conns;
964 if (cur_active_conns > dss->max_active_conns)
965 dss->max_active_conns = cur_active_conns;
966
967 target = (dss->max_active_conns - cur_active_conns) / 2;
968 list_for_each_entry_safe(ds, dsb, &dss->idle_sess, list) {
969 if (!target)
970 break;
971
972 /* remove conn to pending list to ensure it won't be reused */
973 LIST_DEL_INIT(&ds->list);
974
975 /* force session shutdown */
976 ds->shutdown = 1;
977
Ilya Shipitsin0de36ad2021-02-20 00:23:36 +0500978 /* to be sure that the appctx won't miss shutdown */
Emeric Brunfd647d52021-02-12 20:03:38 +0100979 __ha_barrier_store();
980
981 /* wake appctx to perform the shutdown */
982 appctx_wakeup(ds->appctx);
983 }
984
985 /* reset max to current active conns */
986 dss->max_active_conns = cur_active_conns;
987
988 HA_SPIN_UNLOCK(DNS_LOCK, &dss->lock);
989
990 t->expire = tick_add(now_ms, 5000);
991
992 return t;
993}
994
995struct dns_session *dns_session_new(struct dns_stream_server *dss)
996{
997 struct dns_session *ds;
998
999 if (dss->maxconn && (dss->maxconn <= dss->cur_conns))
1000 return NULL;
1001
1002 ds = pool_alloc(dns_session_pool);
1003 if (!ds)
1004 return NULL;
1005
1006 ds->ofs = ~0;
1007 ds->dss = dss;
1008 LIST_INIT(&ds->list);
1009 LIST_INIT(&ds->queries);
1010 LIST_INIT(&ds->waiter);
1011 ds->rx_msg.offset = ds->rx_msg.len = 0;
1012 ds->rx_msg.area = NULL;
1013 ds->tx_ring_area = NULL;
1014 ds->task_exp = NULL;
1015 ds->appctx = NULL;
1016 ds->shutdown = 0;
1017 ds->nb_queries = 0;
1018 ds->query_ids = EB_ROOT_UNIQUE;
1019 ds->rx_msg.area = pool_alloc(dns_msg_buf);
1020 if (!ds->rx_msg.area)
1021 goto error;
1022
1023 ds->tx_ring_area = pool_alloc(dns_msg_buf);
1024 if (!ds->tx_ring_area)
1025 goto error;
1026
1027 ring_init(&ds->ring, ds->tx_ring_area, DNS_TCP_MSG_RING_MAX_SIZE);
Christopher Faulet1a1b6742021-03-04 16:53:27 +01001028 /* never fail because it is the first watcher attached to the ring */
1029 DISGUISE(ring_attach(&ds->ring));
Emeric Brunfd647d52021-02-12 20:03:38 +01001030
1031 if ((ds->task_exp = task_new(tid_bit)) == NULL)
1032 goto error;
1033
1034 ds->task_exp->process = dns_process_query_exp;
1035 ds->task_exp->context = ds;
1036
1037 ds->appctx = dns_session_create(ds);
1038 if (!ds->appctx)
1039 goto error;
1040
1041 dss->cur_conns++;
1042
1043 return ds;
1044
1045error:
1046 if (ds->task_exp)
1047 task_destroy(ds->task_exp);
1048 if (ds->rx_msg.area)
1049 pool_free(dns_msg_buf, ds->rx_msg.area);
1050 if (ds->tx_ring_area)
1051 pool_free(dns_msg_buf, ds->tx_ring_area);
1052
1053 pool_free(dns_session_pool, ds);
1054
1055 return NULL;
1056}
1057
1058/*
1059 * Task used to consume pending messages from nameserver ring
1060 * and forward them to dns_session ring.
1061 * Note: If no slot found a new dns_session is allocated
1062 */
Willy Tarreau144f84a2021-03-02 16:09:26 +01001063static struct task *dns_process_req(struct task *t, void *context, unsigned int state)
Emeric Brunfd647d52021-02-12 20:03:38 +01001064{
1065 struct dns_nameserver *ns = (struct dns_nameserver *)context;
1066 struct dns_stream_server *dss = ns->stream;
1067 struct ring *ring = dss->ring_req;
1068 struct buffer *buf = &ring->buf;
1069 uint64_t msg_len;
1070 size_t len, cnt, ofs;
1071 struct dns_session *ds, *ads;
1072 HA_SPIN_LOCK(DNS_LOCK, &dss->lock);
1073
1074 ofs = dss->ofs_req;
1075
1076 HA_RWLOCK_RDLOCK(DNS_LOCK, &ring->lock);
1077
1078 /* explanation for the initialization below: it would be better to do
1079 * this in the parsing function but this would occasionally result in
1080 * dropped events because we'd take a reference on the oldest message
1081 * and keep it while being scheduled. Thus instead let's take it the
1082 * first time we enter here so that we have a chance to pass many
1083 * existing messages before grabbing a reference to a location. This
1084 * value cannot be produced after initialization.
1085 */
1086 if (unlikely(ofs == ~0)) {
1087 ofs = 0;
Willy Tarreau4781b152021-04-06 13:53:36 +02001088 HA_ATOMIC_INC(b_peek(buf, ofs));
Emeric Brunfd647d52021-02-12 20:03:38 +01001089 ofs += ring->ofs;
1090 }
1091
1092 /* we were already there, adjust the offset to be relative to
1093 * the buffer's head and remove us from the counter.
1094 */
1095 ofs -= ring->ofs;
1096 BUG_ON(ofs >= buf->size);
Willy Tarreau4781b152021-04-06 13:53:36 +02001097 HA_ATOMIC_DEC(b_peek(buf, ofs));
Emeric Brunfd647d52021-02-12 20:03:38 +01001098
1099 while (ofs + 1 < b_data(buf)) {
1100 struct ist myist;
1101
1102 cnt = 1;
1103 len = b_peek_varint(buf, ofs + cnt, &msg_len);
1104 if (!len)
1105 break;
1106 cnt += len;
1107 BUG_ON(msg_len + ofs + cnt + 1 > b_data(buf));
1108 if (unlikely(msg_len > DNS_TCP_MSG_MAX_SIZE)) {
1109 /* too large a message to ever fit, let's skip it */
1110 ofs += cnt + msg_len;
1111 continue;
1112 }
1113
1114 len = b_getblk(buf, dns_msg_trash, msg_len, ofs + cnt);
1115
Tim Duesterhus92c696e2021-02-28 16:11:36 +01001116 myist = ist2(dns_msg_trash, len);
Emeric Brunfd647d52021-02-12 20:03:38 +01001117
1118 ads = NULL;
Ilya Shipitsin0de36ad2021-02-20 00:23:36 +05001119 /* try to push request into active sess with free slot */
Emeric Brunfd647d52021-02-12 20:03:38 +01001120 if (!LIST_ISEMPTY(&dss->free_sess)) {
1121 ds = LIST_NEXT(&dss->free_sess, struct dns_session *, list);
1122
1123 if (ring_write(&ds->ring, DNS_TCP_MSG_MAX_SIZE, NULL, 0, &myist, 1) > 0) {
1124 ds->nb_queries++;
1125 if (ds->nb_queries >= DNS_STREAM_MAX_PIPELINED_REQ)
1126 LIST_DEL_INIT(&ds->list);
1127 ads = ds;
1128 }
1129 else {
1130 /* it means we were unable to put a request in this slot,
1131 * it may be close to be full so we put it at the end
1132 * of free conn list */
1133 LIST_DEL_INIT(&ds->list);
Willy Tarreau2b718102021-04-21 07:32:39 +02001134 LIST_APPEND(&dss->free_sess, &ds->list);
Emeric Brunfd647d52021-02-12 20:03:38 +01001135 }
1136 }
1137
1138 if (!ads) {
Ilya Shipitsin0de36ad2021-02-20 00:23:36 +05001139 /* try to push request into idle, this one should have enough free space */
Emeric Brunfd647d52021-02-12 20:03:38 +01001140 if (!LIST_ISEMPTY(&dss->idle_sess)) {
1141 ds = LIST_NEXT(&dss->idle_sess, struct dns_session *, list);
1142
1143 /* ring is empty so this ring_write should never fail */
1144 ring_write(&ds->ring, DNS_TCP_MSG_MAX_SIZE, NULL, 0, &myist, 1);
1145 ds->nb_queries++;
1146 LIST_DEL_INIT(&ds->list);
1147
1148 ds->dss->idle_conns--;
1149
1150 /* we may have to update the max_active_conns */
1151 if (ds->dss->max_active_conns < ds->dss->cur_conns - ds->dss->idle_conns)
1152 ds->dss->max_active_conns = ds->dss->cur_conns - ds->dss->idle_conns;
1153
1154 /* since we may unable to find a free list to handle
1155 * this request, this request may be large and fill
1156 * the ring buffer so we prefer to put at the end of free
1157 * list. */
Willy Tarreau2b718102021-04-21 07:32:39 +02001158 LIST_APPEND(&dss->free_sess, &ds->list);
Emeric Brunfd647d52021-02-12 20:03:38 +01001159 ads = ds;
1160 }
1161 }
1162
Ilya Shipitsin0de36ad2021-02-20 00:23:36 +05001163 /* we didn't find a session available with large enough room */
Emeric Brunfd647d52021-02-12 20:03:38 +01001164 if (!ads) {
1165 /* allocate a new session */
1166 ads = dns_session_new(dss);
1167 if (ads) {
1168 /* ring is empty so this ring_write should never fail */
1169 ring_write(&ads->ring, DNS_TCP_MSG_MAX_SIZE, NULL, 0, &myist, 1);
1170 ads->nb_queries++;
Willy Tarreau2b718102021-04-21 07:32:39 +02001171 LIST_INSERT(&dss->free_sess, &ads->list);
Emeric Brunfd647d52021-02-12 20:03:38 +01001172 }
1173 else
1174 ns->counters->snd_error++;
1175 }
1176
1177 if (ads)
1178 ns->counters->sent++;
1179
1180 ofs += cnt + len;
1181 }
1182
Willy Tarreau4781b152021-04-06 13:53:36 +02001183 HA_ATOMIC_INC(b_peek(buf, ofs));
Emeric Brunfd647d52021-02-12 20:03:38 +01001184 ofs += ring->ofs;
1185 dss->ofs_req = ofs;
1186 HA_RWLOCK_RDUNLOCK(DNS_LOCK, &ring->lock);
1187
1188
1189 HA_SPIN_UNLOCK(DNS_LOCK, &dss->lock);
1190 return t;
1191}
1192
1193/*
1194 * Task used to consume response
1195 * Note: upper layer callback is called
1196 */
Willy Tarreau144f84a2021-03-02 16:09:26 +01001197static struct task *dns_process_rsp(struct task *t, void *context, unsigned int state)
Emeric Brunfd647d52021-02-12 20:03:38 +01001198{
1199 struct dns_nameserver *ns = (struct dns_nameserver *)context;
1200
1201 ns->process_responses(ns);
1202
1203 return t;
1204}
1205
1206/* Function used to initialize an TCP nameserver */
1207int dns_stream_init(struct dns_nameserver *ns, struct server *srv)
1208{
1209 struct dns_stream_server *dss = NULL;
1210
1211 dss = calloc(1, sizeof(*dss));
1212 if (!dss) {
1213 ha_alert("memory allocation error initializing dns tcp server '%s'.\n", srv->id);
1214 goto out;
1215 }
1216
1217 dss->srv = srv;
1218 dss->maxconn = srv->maxconn;
1219
1220 dss->ofs_req = ~0; /* init ring offset */
1221 dss->ring_req = ring_new(2*DNS_TCP_MSG_RING_MAX_SIZE);
1222 if (!dss->ring_req) {
1223 ha_alert("memory allocation error initializing the ring for dns tcp server '%s'.\n", srv->id);
1224 goto out;
1225 }
1226 /* Create the task associated to the resolver target handling conns */
1227 if ((dss->task_req = task_new(MAX_THREADS_MASK)) == NULL) {
1228 ha_alert("memory allocation error initializing the ring for dns tcp server '%s'.\n", srv->id);
1229 goto out;
1230 }
1231
1232 /* Update task's parameters */
1233 dss->task_req->process = dns_process_req;
1234 dss->task_req->context = ns;
1235
1236 /* attach the task as reader */
1237 if (!ring_attach(dss->ring_req)) {
1238 /* mark server attached to the ring */
1239 ha_alert("server '%s': too many watchers for ring. this should never happen.\n", srv->id);
1240 goto out;
1241 }
1242
1243 /* Create the task associated to the resolver target handling conns */
1244 if ((dss->task_rsp = task_new(MAX_THREADS_MASK)) == NULL) {
1245 ha_alert("memory allocation error initializing the ring for dns tcp server '%s'.\n", srv->id);
1246 goto out;
1247 }
1248
1249 /* Update task's parameters */
1250 dss->task_rsp->process = dns_process_rsp;
1251 dss->task_rsp->context = ns;
1252
1253 /* Create the task associated to the resolver target handling conns */
1254 if ((dss->task_idle = task_new(MAX_THREADS_MASK)) == NULL) {
1255 ha_alert("memory allocation error initializing the ring for dns tcp server '%s'.\n", srv->id);
1256 goto out;
1257 }
1258
1259 /* Update task's parameters */
1260 dss->task_idle->process = dns_process_idle_exp;
1261 dss->task_idle->context = dss;
1262 dss->task_idle->expire = tick_add(now_ms, 5000);
1263
Ilya Shipitsin0de36ad2021-02-20 00:23:36 +05001264 /* let start the task to free idle conns immediately */
Emeric Brunfd647d52021-02-12 20:03:38 +01001265 task_queue(dss->task_idle);
1266
1267 LIST_INIT(&dss->free_sess);
1268 LIST_INIT(&dss->idle_sess);
1269 LIST_INIT(&dss->wait_sess);
1270 HA_SPIN_INIT(&dss->lock);
1271 ns->stream = dss;
1272 return 0;
1273out:
1274 if (dss && dss->task_rsp)
1275 task_destroy(dss->task_rsp);
1276 if (dss && dss->task_req)
1277 task_destroy(dss->task_req);
1278 if (dss && dss->ring_req)
1279 ring_free(dss->ring_req);
1280
1281 free(dss);
Emeric Brunc9437992021-02-12 19:42:55 +01001282 return -1;
Christopher Faulet67957bd2017-09-27 11:00:59 +02001283}
1284
Emeric Brunc9437992021-02-12 19:42:55 +01001285int init_dns_buffers()
Baptiste Assmann325137d2015-04-13 23:40:55 +02001286{
Emeric Brunc9437992021-02-12 19:42:55 +01001287 dns_msg_trash = malloc(DNS_TCP_MSG_MAX_SIZE);
1288 if (!dns_msg_trash)
1289 return 0;
Baptiste Assmann325137d2015-04-13 23:40:55 +02001290
Emeric Brunc9437992021-02-12 19:42:55 +01001291 return 1;
1292}
Baptiste Assmannc1ce5f32016-05-14 11:26:22 +02001293
Emeric Brunc9437992021-02-12 19:42:55 +01001294void deinit_dns_buffers()
1295{
Willy Tarreau61cfdf42021-02-20 10:46:51 +01001296 ha_free(&dns_msg_trash);
Emeric Brunc9437992021-02-12 19:42:55 +01001297}
Emeric Brund26a6232021-01-04 13:32:20 +01001298
1299REGISTER_PER_THREAD_ALLOC(init_dns_buffers);
1300REGISTER_PER_THREAD_FREE(deinit_dns_buffers);