blob: 249db228f8d95f538946884901d53711e7ab2c58 [file] [log] [blame]
Baptiste Assmann325137d2015-04-13 23:40:55 +02001/*
2 * Name server resolution
3 *
Willy Tarreau714f3452021-05-09 06:47:26 +02004 * Copyright 2020 HAProxy Technologies
Baptiste Assmann325137d2015-04-13 23:40:55 +02005 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 *
11 */
12
13#include <errno.h>
14#include <fcntl.h>
15#include <stdio.h>
16#include <stdlib.h>
17#include <string.h>
18#include <unistd.h>
19
20#include <sys/types.h>
21
Willy Tarreau122eba92020-06-04 10:15:32 +020022#include <haproxy/action.h>
Willy Tarreau4c7e4b72020-05-27 12:58:42 +020023#include <haproxy/api.h>
Willy Tarreau6be78492020-06-05 00:00:29 +020024#include <haproxy/cfgparse.h>
Willy Tarreauf1d32c42020-06-04 21:07:02 +020025#include <haproxy/channel.h>
Willy Tarreaub2551052020-06-09 09:07:15 +020026#include <haproxy/check.h>
Willy Tarreau83487a82020-06-04 20:19:54 +020027#include <haproxy/cli.h>
Willy Tarreau7c18b542020-06-11 09:23:02 +020028#include <haproxy/dgram.h>
Willy Tarreaueb92deb2020-06-04 10:53:16 +020029#include <haproxy/dns.h>
Willy Tarreau8d366972020-05-27 16:10:29 +020030#include <haproxy/errors.h>
Willy Tarreaub2551052020-06-09 09:07:15 +020031#include <haproxy/fd.h>
Willy Tarreauaeed4a82020-06-04 22:01:04 +020032#include <haproxy/log.h>
Emeric Brund26a6232021-01-04 13:32:20 +010033#include <haproxy/ring.h>
Emeric Brunfd647d52021-02-12 20:03:38 +010034#include <haproxy/stream.h>
35#include <haproxy/stream_interface.h>
Willy Tarreau9f9e9fc2021-05-08 13:09:46 +020036#include <haproxy/tools.h>
Baptiste Assmann325137d2015-04-13 23:40:55 +020037
Emeric Brund26a6232021-01-04 13:32:20 +010038static THREAD_LOCAL char *dns_msg_trash;
Baptiste Assmann325137d2015-04-13 23:40:55 +020039
Emeric Brunfd647d52021-02-12 20:03:38 +010040DECLARE_STATIC_POOL(dns_session_pool, "dns_session", sizeof(struct dns_session));
41DECLARE_STATIC_POOL(dns_query_pool, "dns_query", sizeof(struct dns_query));
42DECLARE_STATIC_POOL(dns_msg_buf, "dns_msg_buf", DNS_TCP_MSG_RING_MAX_SIZE);
43
Christopher Faulet67957bd2017-09-27 11:00:59 +020044/* Opens an UDP socket on the namesaver's IP/Port, if required. Returns 0 on
Christopher Faulet1e711be2021-03-04 16:58:35 +010045 * success, -1 otherwise. ns->dgram must be defined.
Baptiste Assmann325137d2015-04-13 23:40:55 +020046 */
Emeric Brund26a6232021-01-04 13:32:20 +010047static int dns_connect_nameserver(struct dns_nameserver *ns)
Baptiste Assmann325137d2015-04-13 23:40:55 +020048{
Christopher Faulet1e711be2021-03-04 16:58:35 +010049 struct dgram_conn *dgram = &ns->dgram->conn;
50 int fd;
Baptiste Assmann325137d2015-04-13 23:40:55 +020051
Christopher Faulet1e711be2021-03-04 16:58:35 +010052 /* Already connected */
53 if (dgram->t.sock.fd != -1)
Emeric Brun526b7922021-02-15 14:28:27 +010054 return 0;
Christopher Faulet1e711be2021-03-04 16:58:35 +010055
56 /* Create an UDP socket and connect it on the nameserver's IP/Port */
57 if ((fd = socket(dgram->addr.to.ss_family, SOCK_DGRAM, IPPROTO_UDP)) == -1) {
58 send_log(NULL, LOG_WARNING,
59 "DNS : section '%s': can't create socket for nameserver '%s'.\n",
60 ns->counters->pid, ns->id);
61 return -1;
62 }
63 if (connect(fd, (struct sockaddr*)&dgram->addr.to, get_addr_len(&dgram->addr.to)) == -1) {
64 send_log(NULL, LOG_WARNING,
65 "DNS : section '%s': can't connect socket for nameserver '%s'.\n",
66 ns->counters->id, ns->id);
67 close(fd);
68 return -1;
Emeric Brunc9437992021-02-12 19:42:55 +010069 }
Emeric Brun526b7922021-02-15 14:28:27 +010070
Christopher Faulet1e711be2021-03-04 16:58:35 +010071 /* Make the socket non blocking */
72 fcntl(fd, F_SETFL, O_NONBLOCK);
73
74 /* Add the fd in the fd list and update its parameters */
75 dgram->t.sock.fd = fd;
76 fd_insert(fd, dgram, dgram_fd_handler, MAX_THREADS_MASK);
77 fd_want_recv(fd);
78 return 0;
Baptiste Assmann325137d2015-04-13 23:40:55 +020079}
80
Emeric Brund26a6232021-01-04 13:32:20 +010081/* Sends a message to a name server
82 * It returns message length on success
83 * or -1 in error case
84 * 0 is returned in case of output ring buffer is full
85 */
86int dns_send_nameserver(struct dns_nameserver *ns, void *buf, size_t len)
87{
88 int ret = -1;
89
90 if (ns->dgram) {
91 struct dgram_conn *dgram = &ns->dgram->conn;
92 int fd = dgram->t.sock.fd;
93
94 if (dgram->t.sock.fd == -1) {
95 if (dns_connect_nameserver(ns) == -1)
96 return -1;
97 fd = dgram->t.sock.fd;
98 }
99
100 ret = send(fd, buf, len, 0);
101 if (ret < 0) {
102 if (errno == EAGAIN) {
103 struct ist myist;
104
Tim Duesterhus92c696e2021-02-28 16:11:36 +0100105 myist = ist2(buf, len);
Emeric Brund26a6232021-01-04 13:32:20 +0100106 ret = ring_write(ns->dgram->ring_req, DNS_TCP_MSG_MAX_SIZE, NULL, 0, &myist, 1);
107 if (!ret) {
108 ns->counters->snd_error++;
109 return -1;
110 }
111 fd_cant_send(fd);
112 return ret;
113 }
114 ns->counters->snd_error++;
115 fd_delete(fd);
Emeric Brund26a6232021-01-04 13:32:20 +0100116 dgram->t.sock.fd = -1;
117 return -1;
118 }
119 ns->counters->sent++;
120 }
Emeric Brunfd647d52021-02-12 20:03:38 +0100121 else if (ns->stream) {
122 struct ist myist;
123
Tim Duesterhus92c696e2021-02-28 16:11:36 +0100124 myist = ist2(buf, len);
Emeric Brunfd647d52021-02-12 20:03:38 +0100125 ret = ring_write(ns->stream->ring_req, DNS_TCP_MSG_MAX_SIZE, NULL, 0, &myist, 1);
126 if (!ret) {
127 ns->counters->snd_error++;
128 return -1;
129 }
130 task_wakeup(ns->stream->task_req, TASK_WOKEN_MSG);
131 return ret;
132 }
Emeric Brund26a6232021-01-04 13:32:20 +0100133
134 return ret;
135}
136
Emeric Brunfd647d52021-02-12 20:03:38 +0100137void dns_session_free(struct dns_session *);
138
Emeric Brund26a6232021-01-04 13:32:20 +0100139/* Receives a dns message
140 * Returns message length
141 * 0 is returned if no more message available
142 * -1 in error case
143 */
144ssize_t dns_recv_nameserver(struct dns_nameserver *ns, void *data, size_t size)
145{
146 ssize_t ret = -1;
147
148 if (ns->dgram) {
149 struct dgram_conn *dgram = &ns->dgram->conn;
150 int fd = dgram->t.sock.fd;
151
152 if (fd == -1)
153 return -1;
154
155 if ((ret = recv(fd, data, size, 0)) < 0) {
156 if (errno == EAGAIN) {
157 fd_cant_recv(fd);
158 return 0;
159 }
160 fd_delete(fd);
Emeric Brund26a6232021-01-04 13:32:20 +0100161 dgram->t.sock.fd = -1;
162 return -1;
163 }
164 }
Emeric Brunfd647d52021-02-12 20:03:38 +0100165 else if (ns->stream) {
166 struct dns_stream_server *dss = ns->stream;
167 struct dns_session *ds;
168
169 HA_SPIN_LOCK(DNS_LOCK, &dss->lock);
170
171 if (!LIST_ISEMPTY(&dss->wait_sess)) {
172 ds = LIST_NEXT(&dss->wait_sess, struct dns_session *, waiter);
Emeric Brunfd647d52021-02-12 20:03:38 +0100173 ret = ds->rx_msg.len < size ? ds->rx_msg.len : size;
174 memcpy(data, ds->rx_msg.area, ret);
175
176 ds->rx_msg.len = 0;
177
Willy Tarreaudde1b442021-10-21 14:33:38 +0200178 /* This barrier is here to ensure that all data is
179 * stored if the appctx detect the elem is out of the
180 * list.
181 */
182 __ha_barrier_store();
183
Emeric Brunfd647d52021-02-12 20:03:38 +0100184 LIST_DEL_INIT(&ds->waiter);
185
186 if (ds->appctx) {
Willy Tarreaudde1b442021-10-21 14:33:38 +0200187 /* This second barrier is here to ensure that
188 * the waked up appctx won't miss that the elem
189 * is removed from the list.
190 */
191 __ha_barrier_store();
192
Ilya Shipitsin0de36ad2021-02-20 00:23:36 +0500193 /* awake appctx because it may have other
Emeric Brunfd647d52021-02-12 20:03:38 +0100194 * message to receive
195 */
196 appctx_wakeup(ds->appctx);
197
198 /* dns_session could already be into free_sess list
199 * so we firstly remove it */
200 LIST_DEL_INIT(&ds->list);
201
202 /* decrease nb_queries to free a slot for a new query on that sess */
203 ds->nb_queries--;
204 if (ds->nb_queries) {
205 /* it remains pipelined unanswered request
206 * into this session but we just decrease
207 * the counter so the session
208 * can not be full of pipelined requests
209 * so we can add if to free_sess list
210 * to receive a new request
211 */
Willy Tarreau2b718102021-04-21 07:32:39 +0200212 LIST_INSERT(&ds->dss->free_sess, &ds->list);
Emeric Brunfd647d52021-02-12 20:03:38 +0100213 }
214 else {
215 /* there is no more pipelined requests
216 * into this session, so we move it
217 * to idle_sess list */
Willy Tarreau2b718102021-04-21 07:32:39 +0200218 LIST_INSERT(&ds->dss->idle_sess, &ds->list);
Emeric Brunfd647d52021-02-12 20:03:38 +0100219
220 /* update the counter of idle sessions */
221 ds->dss->idle_conns++;
222
223 /* Note: this is useless there to update
224 * the max_active_conns since we increase
225 * the idle count */
226 }
227 }
228 else {
229 /* there is no more appctx for this session
230 * it means it is ready to die
231 */
232 dns_session_free(ds);
233 }
234
235
236 }
237
238 HA_SPIN_UNLOCK(DNS_LOCK, &dss->lock);
239 }
Emeric Brund26a6232021-01-04 13:32:20 +0100240
241 return ret;
242}
243
244static void dns_resolve_recv(struct dgram_conn *dgram)
245{
246 struct dns_nameserver *ns;
247 int fd;
248
249 fd = dgram->t.sock.fd;
250
251 /* check if ready for reading */
252 if (!fd_recv_ready(fd))
253 return;
254
255 /* no need to go further if we can't retrieve the nameserver */
256 if ((ns = dgram->owner) == NULL) {
Willy Tarreauf5090652021-04-06 17:23:40 +0200257 _HA_ATOMIC_AND(&fdtab[fd].state, ~(FD_POLL_HUP|FD_POLL_ERR));
Emeric Brund26a6232021-01-04 13:32:20 +0100258 fd_stop_recv(fd);
259 return;
260 }
261
262 ns->process_responses(ns);
263}
264
265/* Called when a dns network socket is ready to send data */
266static void dns_resolve_send(struct dgram_conn *dgram)
267{
268 int fd;
269 struct dns_nameserver *ns;
270 struct ring *ring;
271 struct buffer *buf;
272 uint64_t msg_len;
273 size_t len, cnt, ofs;
274
275 fd = dgram->t.sock.fd;
276
277 /* check if ready for sending */
278 if (!fd_send_ready(fd))
279 return;
280
281 /* no need to go further if we can't retrieve the nameserver */
282 if ((ns = dgram->owner) == NULL) {
Willy Tarreauf5090652021-04-06 17:23:40 +0200283 _HA_ATOMIC_AND(&fdtab[fd].state, ~(FD_POLL_HUP|FD_POLL_ERR));
Emeric Brund26a6232021-01-04 13:32:20 +0100284 fd_stop_send(fd);
285 return;
286 }
287
288 ring = ns->dgram->ring_req;
289 buf = &ring->buf;
290
291 HA_RWLOCK_RDLOCK(DNS_LOCK, &ring->lock);
292 ofs = ns->dgram->ofs_req;
293
294 /* explanation for the initialization below: it would be better to do
295 * this in the parsing function but this would occasionally result in
296 * dropped events because we'd take a reference on the oldest message
297 * and keep it while being scheduled. Thus instead let's take it the
298 * first time we enter here so that we have a chance to pass many
299 * existing messages before grabbing a reference to a location. This
300 * value cannot be produced after initialization.
301 */
302 if (unlikely(ofs == ~0)) {
303 ofs = 0;
Willy Tarreau4781b152021-04-06 13:53:36 +0200304 HA_ATOMIC_INC(b_peek(buf, ofs));
Emeric Brund26a6232021-01-04 13:32:20 +0100305 ofs += ring->ofs;
306 }
307
308 /* we were already there, adjust the offset to be relative to
309 * the buffer's head and remove us from the counter.
310 */
311 ofs -= ring->ofs;
312 BUG_ON(ofs >= buf->size);
Willy Tarreau4781b152021-04-06 13:53:36 +0200313 HA_ATOMIC_DEC(b_peek(buf, ofs));
Emeric Brund26a6232021-01-04 13:32:20 +0100314
315 while (ofs + 1 < b_data(buf)) {
316 int ret;
317
318 cnt = 1;
319 len = b_peek_varint(buf, ofs + cnt, &msg_len);
320 if (!len)
321 break;
322 cnt += len;
323 BUG_ON(msg_len + ofs + cnt + 1 > b_data(buf));
324 if (unlikely(msg_len > DNS_TCP_MSG_MAX_SIZE)) {
325 /* too large a message to ever fit, let's skip it */
326 ofs += cnt + msg_len;
327 continue;
328 }
329
330 len = b_getblk(buf, dns_msg_trash, msg_len, ofs + cnt);
331
332 ret = send(fd, dns_msg_trash, len, 0);
333 if (ret < 0) {
334 if (errno == EAGAIN) {
335 fd_cant_send(fd);
336 goto out;
337 }
338 ns->counters->snd_error++;
339 fd_delete(fd);
Emeric Brund26a6232021-01-04 13:32:20 +0100340 fd = dgram->t.sock.fd = -1;
341 goto out;
342 }
343 ns->counters->sent++;
344
345 ofs += cnt + len;
346 }
347
348 /* we don't want/need to be waked up any more for sending
349 * because all ring content is sent */
350 fd_stop_send(fd);
351
352out:
353
Willy Tarreau4781b152021-04-06 13:53:36 +0200354 HA_ATOMIC_INC(b_peek(buf, ofs));
Emeric Brund26a6232021-01-04 13:32:20 +0100355 ofs += ring->ofs;
356 ns->dgram->ofs_req = ofs;
357 HA_RWLOCK_RDUNLOCK(DNS_LOCK, &ring->lock);
358
359}
360
Emeric Brunc9437992021-02-12 19:42:55 +0100361/* proto_udp callback functions for a DNS resolution */
362struct dgram_data_cb dns_dgram_cb = {
363 .recv = dns_resolve_recv,
364 .send = dns_resolve_send,
365};
Baptiste Assmann325137d2015-04-13 23:40:55 +0200366
Emeric Brunc9437992021-02-12 19:42:55 +0100367int dns_dgram_init(struct dns_nameserver *ns, struct sockaddr_storage *sk)
Baptiste Assmann325137d2015-04-13 23:40:55 +0200368{
Emeric Brunc9437992021-02-12 19:42:55 +0100369 struct dns_dgram_server *dgram;
Baptiste Assmann201c07f2017-05-22 15:17:15 +0200370
Emeric Brunc9437992021-02-12 19:42:55 +0100371 if ((dgram = calloc(1, sizeof(*dgram))) == NULL)
Christopher Faulet67957bd2017-09-27 11:00:59 +0200372 return -1;
Baptiste Assmannc1ce5f32016-05-14 11:26:22 +0200373
Emeric Brunc9437992021-02-12 19:42:55 +0100374 /* Leave dgram partially initialized, no FD attached for
375 * now. */
376 dgram->conn.owner = ns;
377 dgram->conn.data = &dns_dgram_cb;
378 dgram->conn.t.sock.fd = -1;
379 dgram->conn.addr.to = *sk;
380 ns->dgram = dgram;
Baptiste Assmannc1ce5f32016-05-14 11:26:22 +0200381
Emeric Brunc9437992021-02-12 19:42:55 +0100382 dgram->ofs_req = ~0; /* init ring offset */
383 dgram->ring_req = ring_new(2*DNS_TCP_MSG_RING_MAX_SIZE);
384 if (!dgram->ring_req) {
385 ha_alert("memory allocation error initializing the ring for nameserver.\n");
386 goto out;
Baptiste Assmannc1ce5f32016-05-14 11:26:22 +0200387 }
388
Emeric Brunc9437992021-02-12 19:42:55 +0100389 /* attach the task as reader */
390 if (!ring_attach(dgram->ring_req)) {
391 /* mark server attached to the ring */
392 ha_alert("nameserver sets too many watchers > 255 on ring. This is a bug and should not happen.\n");
393 goto out;
Baptiste Assmannc1ce5f32016-05-14 11:26:22 +0200394 }
Baptiste Assmannc1ce5f32016-05-14 11:26:22 +0200395 return 0;
Emeric Brunc9437992021-02-12 19:42:55 +0100396out:
397 if (dgram->ring_req)
398 ring_free(dgram->ring_req);
Christopher Fauletd6c6b5f2020-09-08 10:27:24 +0200399
Emeric Brunc9437992021-02-12 19:42:55 +0100400 free(dgram);
Olivier Houchard2ec2db92018-01-08 16:28:57 +0100401
Emeric Brunfd647d52021-02-12 20:03:38 +0100402 return -1;
403}
404
405/*
406 * IO Handler to handle message push to dns tcp server
407 */
408static void dns_session_io_handler(struct appctx *appctx)
409{
Christopher Faulet86e1c332021-12-20 17:09:39 +0100410 struct stream_interface *si = cs_si(appctx->owner);
Emeric Brunfd647d52021-02-12 20:03:38 +0100411 struct dns_session *ds = appctx->ctx.sft.ptr;
412 struct ring *ring = &ds->ring;
413 struct buffer *buf = &ring->buf;
414 uint64_t msg_len;
415 int available_room;
416 size_t len, cnt, ofs;
417 int ret = 0;
418
419 /* if stopping was requested, close immediately */
420 if (unlikely(stopping))
421 goto close;
422
423 /* we want to be sure to not miss that we have been awaked for a shutdown */
424 __ha_barrier_load();
425
426 /* that means the connection was requested to shutdown
427 * for instance idle expire */
428 if (ds->shutdown)
429 goto close;
430
431 /* an error was detected */
432 if (unlikely(si_ic(si)->flags & (CF_WRITE_ERROR|CF_SHUTW)))
433 goto close;
434
435 /* con closed by server side, we will skip data write and drain data from channel */
436 if ((si_oc(si)->flags & CF_SHUTW)) {
437 goto read;
438 }
439
440 /* if the connection is not established, inform the stream that we want
441 * to be notified whenever the connection completes.
442 */
443 if (si_opposite(si)->state < SI_ST_EST) {
444 si_cant_get(si);
445 si_rx_conn_blk(si);
446 si_rx_endp_more(si);
447 return;
448 }
449
450
451 ofs = ds->ofs;
452
453 HA_RWLOCK_WRLOCK(DNS_LOCK, &ring->lock);
454 LIST_DEL_INIT(&appctx->wait_entry);
455 HA_RWLOCK_WRUNLOCK(DNS_LOCK, &ring->lock);
456
457 HA_RWLOCK_RDLOCK(DNS_LOCK, &ring->lock);
458
459 /* explanation for the initialization below: it would be better to do
460 * this in the parsing function but this would occasionally result in
461 * dropped events because we'd take a reference on the oldest message
462 * and keep it while being scheduled. Thus instead let's take it the
463 * first time we enter here so that we have a chance to pass many
464 * existing messages before grabbing a reference to a location. This
465 * value cannot be produced after initialization.
466 */
467 if (unlikely(ofs == ~0)) {
468 ofs = 0;
469
Willy Tarreau4781b152021-04-06 13:53:36 +0200470 HA_ATOMIC_INC(b_peek(buf, ofs));
Emeric Brunfd647d52021-02-12 20:03:38 +0100471 ofs += ring->ofs;
472 }
473
474 /* in this loop, ofs always points to the counter byte that precedes
475 * the message so that we can take our reference there if we have to
476 * stop before the end (ret=0).
477 */
478 if (si_opposite(si)->state == SI_ST_EST) {
479 /* we were already there, adjust the offset to be relative to
480 * the buffer's head and remove us from the counter.
481 */
482 ofs -= ring->ofs;
483 BUG_ON(ofs >= buf->size);
Willy Tarreau4781b152021-04-06 13:53:36 +0200484 HA_ATOMIC_DEC(b_peek(buf, ofs));
Emeric Brunfd647d52021-02-12 20:03:38 +0100485
486 ret = 1;
487 while (ofs + 1 < b_data(buf)) {
488 struct dns_query *query;
489 uint16_t original_qid;
490 uint16_t new_qid;
491
492 cnt = 1;
493 len = b_peek_varint(buf, ofs + cnt, &msg_len);
494 if (!len)
495 break;
496 cnt += len;
497 BUG_ON(msg_len + ofs + cnt + 1 > b_data(buf));
498
499 /* retrieve available room on output channel */
500 available_room = channel_recv_max(si_ic(si));
501
502 /* tx_msg_offset null means we are at the start of a new message */
503 if (!ds->tx_msg_offset) {
504 uint16_t slen;
505
506 /* check if there is enough room to put message len and query id */
507 if (available_room < sizeof(slen) + sizeof(new_qid)) {
508 si_rx_room_blk(si);
509 ret = 0;
510 break;
511 }
512
513 /* put msg len into then channel */
514 slen = (uint16_t)msg_len;
515 slen = htons(slen);
516 ci_putblk(si_ic(si), (char *)&slen, sizeof(slen));
517 available_room -= sizeof(slen);
518
519 /* backup original query id */
520 len = b_getblk(buf, (char *)&original_qid, sizeof(original_qid), ofs + cnt);
Emeric Brun538bb042021-02-15 13:58:06 +0100521 if (!len) {
522 /* should never happen since messages are atomically
523 * written into ring
524 */
525 ret = 0;
526 break;
527 }
Emeric Brunfd647d52021-02-12 20:03:38 +0100528
529 /* generates new query id */
530 new_qid = ++ds->query_counter;
531 new_qid = htons(new_qid);
532
533 /* put new query id into the channel */
534 ci_putblk(si_ic(si), (char *)&new_qid, sizeof(new_qid));
535 available_room -= sizeof(new_qid);
536
537 /* keep query id mapping */
538
539 query = pool_alloc(dns_query_pool);
540 if (query) {
541 query->qid.key = new_qid;
542 query->original_qid = original_qid;
543 query->expire = tick_add(now_ms, 5000);
544 LIST_INIT(&query->list);
545 if (LIST_ISEMPTY(&ds->queries)) {
546 /* enable task to handle expire */
547 ds->task_exp->expire = query->expire;
548 /* ensure this will be executed by the same
549 * thread than ds_session_release
550 * to ensure session_release is free
551 * to destroy the task */
552 task_queue(ds->task_exp);
553 }
Willy Tarreau2b718102021-04-21 07:32:39 +0200554 LIST_APPEND(&ds->queries, &query->list);
Emeric Brunfd647d52021-02-12 20:03:38 +0100555 eb32_insert(&ds->query_ids, &query->qid);
556 ds->onfly_queries++;
557 }
558
559 /* update the tx_offset to handle output in 16k streams */
560 ds->tx_msg_offset = sizeof(original_qid);
561
562 }
563
564 /* check if it remains available room on output chan */
565 if (unlikely(!available_room)) {
566 si_rx_room_blk(si);
567 ret = 0;
568 break;
569 }
570
571 chunk_reset(&trash);
572 if ((msg_len - ds->tx_msg_offset) > available_room) {
573 /* remaining msg data is too large to be written in output channel at one time */
574
575 len = b_getblk(buf, trash.area, available_room, ofs + cnt + ds->tx_msg_offset);
576
577 /* update offset to complete mesg forwarding later */
578 ds->tx_msg_offset += len;
579 }
580 else {
581 /* remaining msg data can be written in output channel at one time */
582 len = b_getblk(buf, trash.area, msg_len - ds->tx_msg_offset, ofs + cnt + ds->tx_msg_offset);
583
584 /* reset tx_msg_offset to mark forward fully processed */
585 ds->tx_msg_offset = 0;
586 }
587 trash.data += len;
588
Emeric Brun743afee2021-02-15 14:12:06 +0100589 if (ci_putchk(si_ic(si), &trash) == -1) {
590 /* should never happen since we
591 * check available_room is large
Ilya Shipitsin0de36ad2021-02-20 00:23:36 +0500592 * enough here.
Emeric Brun743afee2021-02-15 14:12:06 +0100593 */
594 si_rx_room_blk(si);
595 ret = 0;
596 break;
597 }
Emeric Brunfd647d52021-02-12 20:03:38 +0100598
599 if (ds->tx_msg_offset) {
600 /* msg was not fully processed, we must be awake to drain pending data */
601
602 si_rx_room_blk(si);
603 ret = 0;
604 break;
605 }
606 /* switch to next message */
607 ofs += cnt + msg_len;
608 }
609
Willy Tarreau4781b152021-04-06 13:53:36 +0200610 HA_ATOMIC_INC(b_peek(buf, ofs));
Emeric Brunfd647d52021-02-12 20:03:38 +0100611 ofs += ring->ofs;
612 ds->ofs = ofs;
613 }
614 HA_RWLOCK_RDUNLOCK(DNS_LOCK, &ring->lock);
615
616 if (ret) {
617 /* let's be woken up once new request to write arrived */
618 HA_RWLOCK_WRLOCK(DNS_LOCK, &ring->lock);
Willy Tarreau62e467c2021-10-20 11:02:13 +0200619 BUG_ON(LIST_INLIST(&appctx->wait_entry));
Willy Tarreau2b718102021-04-21 07:32:39 +0200620 LIST_APPEND(&ring->waiters, &appctx->wait_entry);
Emeric Brunfd647d52021-02-12 20:03:38 +0100621 HA_RWLOCK_WRUNLOCK(DNS_LOCK, &ring->lock);
622 si_rx_endp_done(si);
623 }
624
625read:
626
Ilya Shipitsin0de36ad2021-02-20 00:23:36 +0500627 /* if session is not a waiter it means there is no committed
Emeric Brunfd647d52021-02-12 20:03:38 +0100628 * message into rx_buf and we are free to use it
629 * Note: we need a load barrier here to not miss the
630 * delete from the list
631 */
Emeric Brun70455902021-10-20 10:49:53 +0200632
Willy Tarreaudde1b442021-10-21 14:33:38 +0200633 __ha_barrier_load();
634 if (!LIST_INLIST_ATOMIC(&ds->waiter)) {
Emeric Brunfd647d52021-02-12 20:03:38 +0100635 while (1) {
636 uint16_t query_id;
637 struct eb32_node *eb;
638 struct dns_query *query;
639
640 if (!ds->rx_msg.len) {
641 /* next message len is not fully available into the channel */
642 if (co_data(si_oc(si)) < 2)
643 break;
644
645 /* retrieve message len */
646 co_getblk(si_oc(si), (char *)&msg_len, 2, 0);
647
648 /* mark as consumed */
649 co_skip(si_oc(si), 2);
650
651 /* store message len */
652 ds->rx_msg.len = ntohs(msg_len);
653 }
654
655 if (!co_data(si_oc(si))) {
656 /* we need more data but nothing is available */
657 break;
658 }
659
660 if (co_data(si_oc(si)) + ds->rx_msg.offset < ds->rx_msg.len) {
661 /* message only partially available */
662
663 /* read available data */
664 co_getblk(si_oc(si), ds->rx_msg.area + ds->rx_msg.offset, co_data(si_oc(si)), 0);
665
666 /* update message offset */
667 ds->rx_msg.offset += co_data(si_oc(si));
668
669 /* consume all pending data from the channel */
670 co_skip(si_oc(si), co_data(si_oc(si)));
671
672 /* we need to wait for more data */
673 break;
674 }
675
Ilya Shipitsin0de36ad2021-02-20 00:23:36 +0500676 /* enough data is available into the channel to read the message until the end */
Emeric Brunfd647d52021-02-12 20:03:38 +0100677
678 /* read from the channel until the end of the message */
679 co_getblk(si_oc(si), ds->rx_msg.area + ds->rx_msg.offset, ds->rx_msg.len - ds->rx_msg.offset, 0);
680
681 /* consume all data until the end of the message from the channel */
682 co_skip(si_oc(si), ds->rx_msg.len - ds->rx_msg.offset);
683
684 /* reset reader offset to 0 for next message reand */
685 ds->rx_msg.offset = 0;
686
687 /* try remap query id to original */
688 memcpy(&query_id, ds->rx_msg.area, sizeof(query_id));
689 eb = eb32_lookup(&ds->query_ids, query_id);
690 if (!eb) {
691 /* query id not found means we have an unknown corresponding
692 * request, perhaps server's bug or or the query reached
693 * timeout
694 */
695 ds->rx_msg.len = 0;
696 continue;
697 }
698
699 /* re-map the original query id set by the requester */
700 query = eb32_entry(eb, struct dns_query, qid);
701 memcpy(ds->rx_msg.area, &query->original_qid, sizeof(query->original_qid));
702
703 /* remove query ids mapping from pending queries list/tree */
704 eb32_delete(&query->qid);
Willy Tarreau2b718102021-04-21 07:32:39 +0200705 LIST_DELETE(&query->list);
Emeric Brunfd647d52021-02-12 20:03:38 +0100706 pool_free(dns_query_pool, query);
707 ds->onfly_queries--;
708
Emeric Brunfd647d52021-02-12 20:03:38 +0100709 /* the dns_session is also added in queue of the
710 * wait_sess list where the task processing
711 * response will pop available responses
712 */
Willy Tarreaudde1b442021-10-21 14:33:38 +0200713 HA_SPIN_LOCK(DNS_LOCK, &ds->dss->lock);
714
Willy Tarreau62e467c2021-10-20 11:02:13 +0200715 BUG_ON(LIST_INLIST(&ds->waiter));
Willy Tarreau2b718102021-04-21 07:32:39 +0200716 LIST_APPEND(&ds->dss->wait_sess, &ds->waiter);
Emeric Brunfd647d52021-02-12 20:03:38 +0100717
Willy Tarreaudde1b442021-10-21 14:33:38 +0200718 HA_SPIN_UNLOCK(DNS_LOCK, &ds->dss->lock);
719
Emeric Brunfd647d52021-02-12 20:03:38 +0100720 /* awake the task processing the responses */
721 task_wakeup(ds->dss->task_rsp, TASK_WOKEN_INIT);
722
723 break;
724 }
725
Willy Tarreau2b718102021-04-21 07:32:39 +0200726 if (!LIST_INLIST(&ds->waiter)) {
Emeric Brunfd647d52021-02-12 20:03:38 +0100727 /* there is no more pending data to read and the con was closed by the server side */
728 if (!co_data(si_oc(si)) && (si_oc(si)->flags & CF_SHUTW)) {
729 goto close;
730 }
731 }
732
733 }
734
Emeric Brunfd647d52021-02-12 20:03:38 +0100735 return;
736close:
737 si_shutw(si);
738 si_shutr(si);
739 si_ic(si)->flags |= CF_READ_NULL;
740}
741
742void dns_queries_flush(struct dns_session *ds)
743{
744 struct dns_query *query, *queryb;
745
746 list_for_each_entry_safe(query, queryb, &ds->queries, list) {
747 eb32_delete(&query->qid);
Willy Tarreau2b718102021-04-21 07:32:39 +0200748 LIST_DELETE(&query->list);
Emeric Brunfd647d52021-02-12 20:03:38 +0100749 pool_free(dns_query_pool, query);
750 }
751}
752
753void dns_session_free(struct dns_session *ds)
754{
755 if (ds->rx_msg.area)
756 pool_free(dns_msg_buf, ds->rx_msg.area);
757 if (ds->tx_ring_area)
758 pool_free(dns_msg_buf, ds->tx_ring_area);
759 if (ds->task_exp)
760 task_destroy(ds->task_exp);
761
762 dns_queries_flush(ds);
763
Emeric Brund20dc212021-10-19 15:40:10 +0200764 /* Ensure to remove this session from external lists
765 * Note: we are under the lock of dns_stream_server
766 * which own the heads of those lists.
767 */
768 LIST_DEL_INIT(&ds->waiter);
769 LIST_DEL_INIT(&ds->list);
770
Emeric Brunfd647d52021-02-12 20:03:38 +0100771 ds->dss->cur_conns--;
772 /* Note: this is useless to update
773 * max_active_conns here because
774 * we decrease the value
775 */
Willy Tarreau62e467c2021-10-20 11:02:13 +0200776
777 BUG_ON(!LIST_ISEMPTY(&ds->list));
778 BUG_ON(!LIST_ISEMPTY(&ds->waiter));
779 BUG_ON(!LIST_ISEMPTY(&ds->queries));
780 BUG_ON(!LIST_ISEMPTY(&ds->ring.waiters));
781 BUG_ON(!eb_is_empty(&ds->query_ids));
Emeric Brunfd647d52021-02-12 20:03:38 +0100782 pool_free(dns_session_pool, ds);
783}
784
785static struct appctx *dns_session_create(struct dns_session *ds);
786
787/*
788 * Function to release a DNS tcp session
789 */
790static void dns_session_release(struct appctx *appctx)
791{
792 struct dns_session *ds = appctx->ctx.sft.ptr;
Willy Tarreaue3e648c2021-02-24 17:38:46 +0100793 struct dns_stream_server *dss __maybe_unused;
Emeric Brunfd647d52021-02-12 20:03:38 +0100794
795 if (!ds)
796 return;
797
Willy Tarreaub56a8782021-10-20 14:38:43 +0200798 /* We do not call ring_appctx_detach here
799 * because we want to keep readers counters
800 * to retry a conn with a different appctx.
801 */
802 HA_RWLOCK_WRLOCK(DNS_LOCK, &ds->ring.lock);
803 LIST_DEL_INIT(&appctx->wait_entry);
804 HA_RWLOCK_WRUNLOCK(DNS_LOCK, &ds->ring.lock);
805
Emeric Brunfd647d52021-02-12 20:03:38 +0100806 dss = ds->dss;
807
808 HA_SPIN_LOCK(DNS_LOCK, &dss->lock);
809 LIST_DEL_INIT(&ds->list);
810
811 if (stopping) {
812 dns_session_free(ds);
813 HA_SPIN_UNLOCK(DNS_LOCK, &dss->lock);
814 return;
815 }
816
817 if (!ds->nb_queries) {
818 /* this is an idle session */
819 /* Note: this is useless to update max_active_sess
820 * here because we decrease idle_conns but
821 * dns_session_free decrease curconns
822 */
823
824 ds->dss->idle_conns--;
825 dns_session_free(ds);
826 HA_SPIN_UNLOCK(DNS_LOCK, &dss->lock);
827 return;
828 }
829
830 if (ds->onfly_queries == ds->nb_queries) {
831 /* the session can be released because
832 * it means that all queries AND
833 * responses are in fly */
834 dns_session_free(ds);
835 HA_SPIN_UNLOCK(DNS_LOCK, &dss->lock);
836 return;
837 }
838
Emeric Brunfd647d52021-02-12 20:03:38 +0100839 /* if there is no pending complete response
840 * message, ensure to reset
841 * message offsets if the session
842 * was closed with an incomplete pending response
843 */
Willy Tarreau2b718102021-04-21 07:32:39 +0200844 if (!LIST_INLIST(&ds->waiter))
Emeric Brunfd647d52021-02-12 20:03:38 +0100845 ds->rx_msg.len = ds->rx_msg.offset = 0;
846
847 /* we flush pending sent queries because we never
848 * have responses
849 */
850 ds->nb_queries -= ds->onfly_queries;
851 dns_queries_flush(ds);
852
853 /* reset offset to be sure to start from message start */
854 ds->tx_msg_offset = 0;
855
856 /* here the ofs and the attached counter
857 * are kept unchanged
858 */
859
860 /* Create a new appctx, We hope we can
861 * create from the release callback! */
862 ds->appctx = dns_session_create(ds);
863 if (!ds->appctx) {
864 dns_session_free(ds);
865 HA_SPIN_UNLOCK(DNS_LOCK, &dss->lock);
866 return;
867 }
868
869 if (ds->nb_queries < DNS_STREAM_MAX_PIPELINED_REQ)
Willy Tarreau2b718102021-04-21 07:32:39 +0200870 LIST_INSERT(&ds->dss->free_sess, &ds->list);
Emeric Brunfd647d52021-02-12 20:03:38 +0100871
872 HA_SPIN_UNLOCK(DNS_LOCK, &dss->lock);
873}
874
875/* DNS tcp session applet */
876static struct applet dns_session_applet = {
877 .obj_type = OBJ_TYPE_APPLET,
878 .name = "<STRMDNS>", /* used for logging */
879 .fct = dns_session_io_handler,
880 .release = dns_session_release,
881};
882
883/*
884 * Function used to create an appctx for a DNS session
885 */
886static struct appctx *dns_session_create(struct dns_session *ds)
887{
888 struct appctx *appctx;
889 struct session *sess;
Christopher Faulet13a35e52021-12-20 15:34:16 +0100890 struct conn_stream *cs;
Emeric Brunfd647d52021-02-12 20:03:38 +0100891 struct stream *s;
892 struct applet *applet = &dns_session_applet;
893
Willy Tarreaue6124462021-09-13 10:07:38 +0200894 appctx = appctx_new(applet);
Emeric Brunfd647d52021-02-12 20:03:38 +0100895 if (!appctx)
896 goto out_close;
897
898 appctx->ctx.sft.ptr = (void *)ds;
899
900 sess = session_new(ds->dss->srv->proxy, NULL, &appctx->obj_type);
901 if (!sess) {
Christopher Faulet13a35e52021-12-20 15:34:16 +0100902 ha_alert("out of memory in dns_session_create().\n");
Emeric Brunfd647d52021-02-12 20:03:38 +0100903 goto out_free_appctx;
904 }
905
Christopher Fauletcda94ac2021-12-23 17:28:17 +0100906 cs = cs_new();
Christopher Faulet13a35e52021-12-20 15:34:16 +0100907 if (!cs) {
908 ha_alert("out of memory in dns_session_create().\n");
Emeric Brunfd647d52021-02-12 20:03:38 +0100909 goto out_free_sess;
910 }
Christopher Fauletcda94ac2021-12-23 17:28:17 +0100911 cs_attach_endp(cs, &appctx->obj_type, appctx);
Emeric Brunfd647d52021-02-12 20:03:38 +0100912
Christopher Faulet13a35e52021-12-20 15:34:16 +0100913 if ((s = stream_new(sess, cs, &BUF_NULL)) == NULL) {
914 ha_alert("Failed to initialize stream in dns_session_create().\n");
915 goto out_free_cs;
916 }
917
Emeric Brunfd647d52021-02-12 20:03:38 +0100918
919 s->target = &ds->dss->srv->obj_type;
Christopher Faulet8f8f35b2021-12-23 13:40:42 +0100920 if (!sockaddr_alloc(&cs_si(s->csb)->dst, &ds->dss->srv->addr, sizeof(ds->dss->srv->addr)))
Emeric Brunfd647d52021-02-12 20:03:38 +0100921 goto out_free_strm;
922 s->flags = SF_ASSIGNED|SF_ADDR_SET;
Christopher Faulet8f8f35b2021-12-23 13:40:42 +0100923 cs_si(s->csb)->flags |= SI_FL_NOLINGER;
Emeric Brunfd647d52021-02-12 20:03:38 +0100924
925 s->do_log = NULL;
926 s->uniq_id = 0;
927
928 s->res.flags |= CF_READ_DONTWAIT;
929 /* for rto and rex to eternity to not expire on idle recv:
930 * We are using a syslog server.
931 */
932 s->res.rto = TICK_ETERNITY;
933 s->res.rex = TICK_ETERNITY;
934 ds->appctx = appctx;
Emeric Brunfd647d52021-02-12 20:03:38 +0100935 return appctx;
936
937 /* Error unrolling */
938 out_free_strm:
Willy Tarreau2b718102021-04-21 07:32:39 +0200939 LIST_DELETE(&s->list);
Emeric Brunfd647d52021-02-12 20:03:38 +0100940 pool_free(pool_head_stream, s);
Christopher Faulet13a35e52021-12-20 15:34:16 +0100941 out_free_cs:
942 cs_free(cs);
Emeric Brunfd647d52021-02-12 20:03:38 +0100943 out_free_sess:
944 session_free(sess);
945 out_free_appctx:
946 appctx_free(appctx);
947 out_close:
948 return NULL;
949}
950
951/* Task processing expiration of unresponded queries, this one is supposed
952 * to be stuck on the same thread than the appctx handler
953 */
Willy Tarreau144f84a2021-03-02 16:09:26 +0100954static struct task *dns_process_query_exp(struct task *t, void *context, unsigned int state)
Emeric Brunfd647d52021-02-12 20:03:38 +0100955{
956 struct dns_session *ds = (struct dns_session *)context;
957 struct dns_query *query, *queryb;
958
959 t->expire = TICK_ETERNITY;
960
961 list_for_each_entry_safe(query, queryb, &ds->queries, list) {
962 if (tick_is_expired(query->expire, now_ms)) {
963 eb32_delete(&query->qid);
Willy Tarreau2b718102021-04-21 07:32:39 +0200964 LIST_DELETE(&query->list);
Emeric Brunfd647d52021-02-12 20:03:38 +0100965 pool_free(dns_query_pool, query);
966 ds->onfly_queries--;
967 }
968 else {
969 t->expire = query->expire;
970 break;
971 }
972 }
973
974 return t;
975}
976
977/* Task processing expiration of idle sessions */
Willy Tarreau144f84a2021-03-02 16:09:26 +0100978static struct task *dns_process_idle_exp(struct task *t, void *context, unsigned int state)
Emeric Brunfd647d52021-02-12 20:03:38 +0100979{
980 struct dns_stream_server *dss = (struct dns_stream_server *)context;
981 struct dns_session *ds, *dsb;
982 int target = 0;
983 int cur_active_conns;
984
985 HA_SPIN_LOCK(DNS_LOCK, &dss->lock);
986
987
988 cur_active_conns = dss->cur_conns - dss->idle_conns;
989 if (cur_active_conns > dss->max_active_conns)
990 dss->max_active_conns = cur_active_conns;
991
992 target = (dss->max_active_conns - cur_active_conns) / 2;
993 list_for_each_entry_safe(ds, dsb, &dss->idle_sess, list) {
994 if (!target)
995 break;
996
997 /* remove conn to pending list to ensure it won't be reused */
998 LIST_DEL_INIT(&ds->list);
999
1000 /* force session shutdown */
1001 ds->shutdown = 1;
1002
Ilya Shipitsin0de36ad2021-02-20 00:23:36 +05001003 /* to be sure that the appctx won't miss shutdown */
Emeric Brunfd647d52021-02-12 20:03:38 +01001004 __ha_barrier_store();
1005
1006 /* wake appctx to perform the shutdown */
1007 appctx_wakeup(ds->appctx);
1008 }
1009
1010 /* reset max to current active conns */
1011 dss->max_active_conns = cur_active_conns;
1012
1013 HA_SPIN_UNLOCK(DNS_LOCK, &dss->lock);
1014
1015 t->expire = tick_add(now_ms, 5000);
1016
1017 return t;
1018}
1019
1020struct dns_session *dns_session_new(struct dns_stream_server *dss)
1021{
1022 struct dns_session *ds;
1023
1024 if (dss->maxconn && (dss->maxconn <= dss->cur_conns))
1025 return NULL;
1026
1027 ds = pool_alloc(dns_session_pool);
1028 if (!ds)
1029 return NULL;
1030
1031 ds->ofs = ~0;
1032 ds->dss = dss;
1033 LIST_INIT(&ds->list);
1034 LIST_INIT(&ds->queries);
1035 LIST_INIT(&ds->waiter);
1036 ds->rx_msg.offset = ds->rx_msg.len = 0;
1037 ds->rx_msg.area = NULL;
1038 ds->tx_ring_area = NULL;
1039 ds->task_exp = NULL;
1040 ds->appctx = NULL;
1041 ds->shutdown = 0;
1042 ds->nb_queries = 0;
1043 ds->query_ids = EB_ROOT_UNIQUE;
1044 ds->rx_msg.area = pool_alloc(dns_msg_buf);
1045 if (!ds->rx_msg.area)
1046 goto error;
1047
1048 ds->tx_ring_area = pool_alloc(dns_msg_buf);
1049 if (!ds->tx_ring_area)
1050 goto error;
1051
1052 ring_init(&ds->ring, ds->tx_ring_area, DNS_TCP_MSG_RING_MAX_SIZE);
Christopher Faulet1a1b6742021-03-04 16:53:27 +01001053 /* never fail because it is the first watcher attached to the ring */
1054 DISGUISE(ring_attach(&ds->ring));
Emeric Brunfd647d52021-02-12 20:03:38 +01001055
Willy Tarreaubeeabf52021-10-01 18:23:30 +02001056 if ((ds->task_exp = task_new_here()) == NULL)
Emeric Brunfd647d52021-02-12 20:03:38 +01001057 goto error;
1058
1059 ds->task_exp->process = dns_process_query_exp;
1060 ds->task_exp->context = ds;
1061
1062 ds->appctx = dns_session_create(ds);
1063 if (!ds->appctx)
1064 goto error;
1065
1066 dss->cur_conns++;
1067
1068 return ds;
1069
1070error:
1071 if (ds->task_exp)
1072 task_destroy(ds->task_exp);
1073 if (ds->rx_msg.area)
1074 pool_free(dns_msg_buf, ds->rx_msg.area);
1075 if (ds->tx_ring_area)
1076 pool_free(dns_msg_buf, ds->tx_ring_area);
1077
1078 pool_free(dns_session_pool, ds);
1079
1080 return NULL;
1081}
1082
1083/*
1084 * Task used to consume pending messages from nameserver ring
1085 * and forward them to dns_session ring.
1086 * Note: If no slot found a new dns_session is allocated
1087 */
Willy Tarreau144f84a2021-03-02 16:09:26 +01001088static struct task *dns_process_req(struct task *t, void *context, unsigned int state)
Emeric Brunfd647d52021-02-12 20:03:38 +01001089{
1090 struct dns_nameserver *ns = (struct dns_nameserver *)context;
1091 struct dns_stream_server *dss = ns->stream;
1092 struct ring *ring = dss->ring_req;
1093 struct buffer *buf = &ring->buf;
1094 uint64_t msg_len;
1095 size_t len, cnt, ofs;
1096 struct dns_session *ds, *ads;
1097 HA_SPIN_LOCK(DNS_LOCK, &dss->lock);
1098
1099 ofs = dss->ofs_req;
1100
1101 HA_RWLOCK_RDLOCK(DNS_LOCK, &ring->lock);
1102
1103 /* explanation for the initialization below: it would be better to do
1104 * this in the parsing function but this would occasionally result in
1105 * dropped events because we'd take a reference on the oldest message
1106 * and keep it while being scheduled. Thus instead let's take it the
1107 * first time we enter here so that we have a chance to pass many
1108 * existing messages before grabbing a reference to a location. This
1109 * value cannot be produced after initialization.
1110 */
1111 if (unlikely(ofs == ~0)) {
1112 ofs = 0;
Willy Tarreau4781b152021-04-06 13:53:36 +02001113 HA_ATOMIC_INC(b_peek(buf, ofs));
Emeric Brunfd647d52021-02-12 20:03:38 +01001114 ofs += ring->ofs;
1115 }
1116
1117 /* we were already there, adjust the offset to be relative to
1118 * the buffer's head and remove us from the counter.
1119 */
1120 ofs -= ring->ofs;
1121 BUG_ON(ofs >= buf->size);
Willy Tarreau4781b152021-04-06 13:53:36 +02001122 HA_ATOMIC_DEC(b_peek(buf, ofs));
Emeric Brunfd647d52021-02-12 20:03:38 +01001123
1124 while (ofs + 1 < b_data(buf)) {
1125 struct ist myist;
1126
1127 cnt = 1;
1128 len = b_peek_varint(buf, ofs + cnt, &msg_len);
1129 if (!len)
1130 break;
1131 cnt += len;
1132 BUG_ON(msg_len + ofs + cnt + 1 > b_data(buf));
1133 if (unlikely(msg_len > DNS_TCP_MSG_MAX_SIZE)) {
1134 /* too large a message to ever fit, let's skip it */
1135 ofs += cnt + msg_len;
1136 continue;
1137 }
1138
1139 len = b_getblk(buf, dns_msg_trash, msg_len, ofs + cnt);
1140
Tim Duesterhus92c696e2021-02-28 16:11:36 +01001141 myist = ist2(dns_msg_trash, len);
Emeric Brunfd647d52021-02-12 20:03:38 +01001142
1143 ads = NULL;
Ilya Shipitsin0de36ad2021-02-20 00:23:36 +05001144 /* try to push request into active sess with free slot */
Emeric Brunfd647d52021-02-12 20:03:38 +01001145 if (!LIST_ISEMPTY(&dss->free_sess)) {
1146 ds = LIST_NEXT(&dss->free_sess, struct dns_session *, list);
1147
1148 if (ring_write(&ds->ring, DNS_TCP_MSG_MAX_SIZE, NULL, 0, &myist, 1) > 0) {
1149 ds->nb_queries++;
1150 if (ds->nb_queries >= DNS_STREAM_MAX_PIPELINED_REQ)
1151 LIST_DEL_INIT(&ds->list);
1152 ads = ds;
1153 }
1154 else {
1155 /* it means we were unable to put a request in this slot,
1156 * it may be close to be full so we put it at the end
1157 * of free conn list */
1158 LIST_DEL_INIT(&ds->list);
Willy Tarreau2b718102021-04-21 07:32:39 +02001159 LIST_APPEND(&dss->free_sess, &ds->list);
Emeric Brunfd647d52021-02-12 20:03:38 +01001160 }
1161 }
1162
1163 if (!ads) {
Ilya Shipitsin0de36ad2021-02-20 00:23:36 +05001164 /* try to push request into idle, this one should have enough free space */
Emeric Brunfd647d52021-02-12 20:03:38 +01001165 if (!LIST_ISEMPTY(&dss->idle_sess)) {
1166 ds = LIST_NEXT(&dss->idle_sess, struct dns_session *, list);
1167
1168 /* ring is empty so this ring_write should never fail */
1169 ring_write(&ds->ring, DNS_TCP_MSG_MAX_SIZE, NULL, 0, &myist, 1);
1170 ds->nb_queries++;
1171 LIST_DEL_INIT(&ds->list);
1172
1173 ds->dss->idle_conns--;
1174
1175 /* we may have to update the max_active_conns */
1176 if (ds->dss->max_active_conns < ds->dss->cur_conns - ds->dss->idle_conns)
1177 ds->dss->max_active_conns = ds->dss->cur_conns - ds->dss->idle_conns;
1178
1179 /* since we may unable to find a free list to handle
1180 * this request, this request may be large and fill
1181 * the ring buffer so we prefer to put at the end of free
1182 * list. */
Willy Tarreau2b718102021-04-21 07:32:39 +02001183 LIST_APPEND(&dss->free_sess, &ds->list);
Emeric Brunfd647d52021-02-12 20:03:38 +01001184 ads = ds;
1185 }
1186 }
1187
Ilya Shipitsin0de36ad2021-02-20 00:23:36 +05001188 /* we didn't find a session available with large enough room */
Emeric Brunfd647d52021-02-12 20:03:38 +01001189 if (!ads) {
1190 /* allocate a new session */
1191 ads = dns_session_new(dss);
1192 if (ads) {
1193 /* ring is empty so this ring_write should never fail */
1194 ring_write(&ads->ring, DNS_TCP_MSG_MAX_SIZE, NULL, 0, &myist, 1);
1195 ads->nb_queries++;
Willy Tarreau2b718102021-04-21 07:32:39 +02001196 LIST_INSERT(&dss->free_sess, &ads->list);
Emeric Brunfd647d52021-02-12 20:03:38 +01001197 }
1198 else
1199 ns->counters->snd_error++;
1200 }
1201
1202 if (ads)
1203 ns->counters->sent++;
1204
1205 ofs += cnt + len;
1206 }
1207
Willy Tarreau4781b152021-04-06 13:53:36 +02001208 HA_ATOMIC_INC(b_peek(buf, ofs));
Emeric Brunfd647d52021-02-12 20:03:38 +01001209 ofs += ring->ofs;
1210 dss->ofs_req = ofs;
1211 HA_RWLOCK_RDUNLOCK(DNS_LOCK, &ring->lock);
1212
1213
1214 HA_SPIN_UNLOCK(DNS_LOCK, &dss->lock);
1215 return t;
1216}
1217
1218/*
1219 * Task used to consume response
1220 * Note: upper layer callback is called
1221 */
Willy Tarreau144f84a2021-03-02 16:09:26 +01001222static struct task *dns_process_rsp(struct task *t, void *context, unsigned int state)
Emeric Brunfd647d52021-02-12 20:03:38 +01001223{
1224 struct dns_nameserver *ns = (struct dns_nameserver *)context;
1225
1226 ns->process_responses(ns);
1227
1228 return t;
1229}
1230
1231/* Function used to initialize an TCP nameserver */
1232int dns_stream_init(struct dns_nameserver *ns, struct server *srv)
1233{
1234 struct dns_stream_server *dss = NULL;
1235
1236 dss = calloc(1, sizeof(*dss));
1237 if (!dss) {
1238 ha_alert("memory allocation error initializing dns tcp server '%s'.\n", srv->id);
1239 goto out;
1240 }
1241
1242 dss->srv = srv;
1243 dss->maxconn = srv->maxconn;
1244
1245 dss->ofs_req = ~0; /* init ring offset */
1246 dss->ring_req = ring_new(2*DNS_TCP_MSG_RING_MAX_SIZE);
1247 if (!dss->ring_req) {
1248 ha_alert("memory allocation error initializing the ring for dns tcp server '%s'.\n", srv->id);
1249 goto out;
1250 }
1251 /* Create the task associated to the resolver target handling conns */
Willy Tarreaubeeabf52021-10-01 18:23:30 +02001252 if ((dss->task_req = task_new_anywhere()) == NULL) {
Emeric Brunfd647d52021-02-12 20:03:38 +01001253 ha_alert("memory allocation error initializing the ring for dns tcp server '%s'.\n", srv->id);
1254 goto out;
1255 }
1256
1257 /* Update task's parameters */
1258 dss->task_req->process = dns_process_req;
1259 dss->task_req->context = ns;
1260
1261 /* attach the task as reader */
1262 if (!ring_attach(dss->ring_req)) {
1263 /* mark server attached to the ring */
1264 ha_alert("server '%s': too many watchers for ring. this should never happen.\n", srv->id);
1265 goto out;
1266 }
1267
1268 /* Create the task associated to the resolver target handling conns */
Willy Tarreaubeeabf52021-10-01 18:23:30 +02001269 if ((dss->task_rsp = task_new_anywhere()) == NULL) {
Emeric Brunfd647d52021-02-12 20:03:38 +01001270 ha_alert("memory allocation error initializing the ring for dns tcp server '%s'.\n", srv->id);
1271 goto out;
1272 }
1273
1274 /* Update task's parameters */
1275 dss->task_rsp->process = dns_process_rsp;
1276 dss->task_rsp->context = ns;
1277
1278 /* Create the task associated to the resolver target handling conns */
Willy Tarreaubeeabf52021-10-01 18:23:30 +02001279 if ((dss->task_idle = task_new_anywhere()) == NULL) {
Emeric Brunfd647d52021-02-12 20:03:38 +01001280 ha_alert("memory allocation error initializing the ring for dns tcp server '%s'.\n", srv->id);
1281 goto out;
1282 }
1283
1284 /* Update task's parameters */
1285 dss->task_idle->process = dns_process_idle_exp;
1286 dss->task_idle->context = dss;
1287 dss->task_idle->expire = tick_add(now_ms, 5000);
1288
Ilya Shipitsin0de36ad2021-02-20 00:23:36 +05001289 /* let start the task to free idle conns immediately */
Emeric Brunfd647d52021-02-12 20:03:38 +01001290 task_queue(dss->task_idle);
1291
1292 LIST_INIT(&dss->free_sess);
1293 LIST_INIT(&dss->idle_sess);
1294 LIST_INIT(&dss->wait_sess);
1295 HA_SPIN_INIT(&dss->lock);
1296 ns->stream = dss;
1297 return 0;
1298out:
1299 if (dss && dss->task_rsp)
1300 task_destroy(dss->task_rsp);
1301 if (dss && dss->task_req)
1302 task_destroy(dss->task_req);
1303 if (dss && dss->ring_req)
1304 ring_free(dss->ring_req);
1305
1306 free(dss);
Emeric Brunc9437992021-02-12 19:42:55 +01001307 return -1;
Christopher Faulet67957bd2017-09-27 11:00:59 +02001308}
1309
Emeric Brunc9437992021-02-12 19:42:55 +01001310int init_dns_buffers()
Baptiste Assmann325137d2015-04-13 23:40:55 +02001311{
Emeric Brunc9437992021-02-12 19:42:55 +01001312 dns_msg_trash = malloc(DNS_TCP_MSG_MAX_SIZE);
1313 if (!dns_msg_trash)
1314 return 0;
Baptiste Assmann325137d2015-04-13 23:40:55 +02001315
Emeric Brunc9437992021-02-12 19:42:55 +01001316 return 1;
1317}
Baptiste Assmannc1ce5f32016-05-14 11:26:22 +02001318
Emeric Brunc9437992021-02-12 19:42:55 +01001319void deinit_dns_buffers()
1320{
Willy Tarreau61cfdf42021-02-20 10:46:51 +01001321 ha_free(&dns_msg_trash);
Emeric Brunc9437992021-02-12 19:42:55 +01001322}
Emeric Brund26a6232021-01-04 13:32:20 +01001323
1324REGISTER_PER_THREAD_ALLOC(init_dns_buffers);
1325REGISTER_PER_THREAD_FREE(deinit_dns_buffers);