blob: e76a05cc0d7ddc361a48b47f65ba2a4fe300f1a4 [file] [log] [blame]
Baptiste Assmann325137d2015-04-13 23:40:55 +02001/*
2 * Name server resolution
3 *
Willy Tarreau714f3452021-05-09 06:47:26 +02004 * Copyright 2020 HAProxy Technologies
Baptiste Assmann325137d2015-04-13 23:40:55 +02005 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 *
11 */
12
13#include <errno.h>
14#include <fcntl.h>
15#include <stdio.h>
16#include <stdlib.h>
17#include <string.h>
18#include <unistd.h>
19
20#include <sys/types.h>
21
Willy Tarreau122eba92020-06-04 10:15:32 +020022#include <haproxy/action.h>
Willy Tarreau4c7e4b72020-05-27 12:58:42 +020023#include <haproxy/api.h>
Willy Tarreau6be78492020-06-05 00:00:29 +020024#include <haproxy/cfgparse.h>
Willy Tarreauf1d32c42020-06-04 21:07:02 +020025#include <haproxy/channel.h>
Willy Tarreaub2551052020-06-09 09:07:15 +020026#include <haproxy/check.h>
Willy Tarreau83487a82020-06-04 20:19:54 +020027#include <haproxy/cli.h>
Willy Tarreau7c18b542020-06-11 09:23:02 +020028#include <haproxy/dgram.h>
Willy Tarreaueb92deb2020-06-04 10:53:16 +020029#include <haproxy/dns.h>
Willy Tarreau8d366972020-05-27 16:10:29 +020030#include <haproxy/errors.h>
Willy Tarreaub2551052020-06-09 09:07:15 +020031#include <haproxy/fd.h>
Willy Tarreauaeed4a82020-06-04 22:01:04 +020032#include <haproxy/log.h>
Emeric Brund26a6232021-01-04 13:32:20 +010033#include <haproxy/ring.h>
Emeric Brunfd647d52021-02-12 20:03:38 +010034#include <haproxy/stream.h>
35#include <haproxy/stream_interface.h>
Willy Tarreau9f9e9fc2021-05-08 13:09:46 +020036#include <haproxy/tools.h>
Baptiste Assmann325137d2015-04-13 23:40:55 +020037
Emeric Brund26a6232021-01-04 13:32:20 +010038static THREAD_LOCAL char *dns_msg_trash;
Baptiste Assmann325137d2015-04-13 23:40:55 +020039
Emeric Brunfd647d52021-02-12 20:03:38 +010040DECLARE_STATIC_POOL(dns_session_pool, "dns_session", sizeof(struct dns_session));
41DECLARE_STATIC_POOL(dns_query_pool, "dns_query", sizeof(struct dns_query));
42DECLARE_STATIC_POOL(dns_msg_buf, "dns_msg_buf", DNS_TCP_MSG_RING_MAX_SIZE);
43
Christopher Faulet67957bd2017-09-27 11:00:59 +020044/* Opens an UDP socket on the namesaver's IP/Port, if required. Returns 0 on
Christopher Faulet1e711be2021-03-04 16:58:35 +010045 * success, -1 otherwise. ns->dgram must be defined.
Baptiste Assmann325137d2015-04-13 23:40:55 +020046 */
Emeric Brund26a6232021-01-04 13:32:20 +010047static int dns_connect_nameserver(struct dns_nameserver *ns)
Baptiste Assmann325137d2015-04-13 23:40:55 +020048{
Christopher Faulet1e711be2021-03-04 16:58:35 +010049 struct dgram_conn *dgram = &ns->dgram->conn;
50 int fd;
Baptiste Assmann325137d2015-04-13 23:40:55 +020051
Christopher Faulet1e711be2021-03-04 16:58:35 +010052 /* Already connected */
53 if (dgram->t.sock.fd != -1)
Emeric Brun526b7922021-02-15 14:28:27 +010054 return 0;
Christopher Faulet1e711be2021-03-04 16:58:35 +010055
56 /* Create an UDP socket and connect it on the nameserver's IP/Port */
57 if ((fd = socket(dgram->addr.to.ss_family, SOCK_DGRAM, IPPROTO_UDP)) == -1) {
58 send_log(NULL, LOG_WARNING,
59 "DNS : section '%s': can't create socket for nameserver '%s'.\n",
60 ns->counters->pid, ns->id);
61 return -1;
62 }
63 if (connect(fd, (struct sockaddr*)&dgram->addr.to, get_addr_len(&dgram->addr.to)) == -1) {
64 send_log(NULL, LOG_WARNING,
65 "DNS : section '%s': can't connect socket for nameserver '%s'.\n",
66 ns->counters->id, ns->id);
67 close(fd);
68 return -1;
Emeric Brunc9437992021-02-12 19:42:55 +010069 }
Emeric Brun526b7922021-02-15 14:28:27 +010070
Christopher Faulet1e711be2021-03-04 16:58:35 +010071 /* Make the socket non blocking */
72 fcntl(fd, F_SETFL, O_NONBLOCK);
73
74 /* Add the fd in the fd list and update its parameters */
75 dgram->t.sock.fd = fd;
76 fd_insert(fd, dgram, dgram_fd_handler, MAX_THREADS_MASK);
77 fd_want_recv(fd);
78 return 0;
Baptiste Assmann325137d2015-04-13 23:40:55 +020079}
80
Emeric Brund26a6232021-01-04 13:32:20 +010081/* Sends a message to a name server
82 * It returns message length on success
83 * or -1 in error case
84 * 0 is returned in case of output ring buffer is full
85 */
86int dns_send_nameserver(struct dns_nameserver *ns, void *buf, size_t len)
87{
88 int ret = -1;
89
90 if (ns->dgram) {
91 struct dgram_conn *dgram = &ns->dgram->conn;
Emeric Brunf1d38be2022-05-10 11:35:48 +020092 int fd;
Emeric Brund26a6232021-01-04 13:32:20 +010093
Emeric Brunf1d38be2022-05-10 11:35:48 +020094 HA_SPIN_LOCK(DNS_LOCK, &dgram->lock);
95 fd = dgram->t.sock.fd;
96 if (fd == -1) {
97 if (dns_connect_nameserver(ns) == -1) {
98 HA_SPIN_UNLOCK(DNS_LOCK, &dgram->lock);
Emeric Brund26a6232021-01-04 13:32:20 +010099 return -1;
Emeric Brunf1d38be2022-05-10 11:35:48 +0200100 }
Emeric Brund26a6232021-01-04 13:32:20 +0100101 fd = dgram->t.sock.fd;
102 }
103
104 ret = send(fd, buf, len, 0);
105 if (ret < 0) {
106 if (errno == EAGAIN) {
107 struct ist myist;
108
Tim Duesterhus92c696e2021-02-28 16:11:36 +0100109 myist = ist2(buf, len);
Emeric Brund26a6232021-01-04 13:32:20 +0100110 ret = ring_write(ns->dgram->ring_req, DNS_TCP_MSG_MAX_SIZE, NULL, 0, &myist, 1);
111 if (!ret) {
112 ns->counters->snd_error++;
Emeric Brunf1d38be2022-05-10 11:35:48 +0200113 HA_SPIN_UNLOCK(DNS_LOCK, &dgram->lock);
Emeric Brund26a6232021-01-04 13:32:20 +0100114 return -1;
115 }
116 fd_cant_send(fd);
Emeric Brunf1d38be2022-05-10 11:35:48 +0200117 HA_SPIN_UNLOCK(DNS_LOCK, &dgram->lock);
Emeric Brund26a6232021-01-04 13:32:20 +0100118 return ret;
119 }
120 ns->counters->snd_error++;
121 fd_delete(fd);
Emeric Brund26a6232021-01-04 13:32:20 +0100122 dgram->t.sock.fd = -1;
Emeric Brunf1d38be2022-05-10 11:35:48 +0200123 HA_SPIN_UNLOCK(DNS_LOCK, &dgram->lock);
Emeric Brund26a6232021-01-04 13:32:20 +0100124 return -1;
125 }
126 ns->counters->sent++;
Emeric Brunf1d38be2022-05-10 11:35:48 +0200127 HA_SPIN_UNLOCK(DNS_LOCK, &dgram->lock);
Emeric Brund26a6232021-01-04 13:32:20 +0100128 }
Emeric Brunfd647d52021-02-12 20:03:38 +0100129 else if (ns->stream) {
130 struct ist myist;
131
Tim Duesterhus92c696e2021-02-28 16:11:36 +0100132 myist = ist2(buf, len);
Emeric Brunfd647d52021-02-12 20:03:38 +0100133 ret = ring_write(ns->stream->ring_req, DNS_TCP_MSG_MAX_SIZE, NULL, 0, &myist, 1);
134 if (!ret) {
135 ns->counters->snd_error++;
136 return -1;
137 }
138 task_wakeup(ns->stream->task_req, TASK_WOKEN_MSG);
139 return ret;
140 }
Emeric Brund26a6232021-01-04 13:32:20 +0100141
142 return ret;
143}
144
Emeric Brunfd647d52021-02-12 20:03:38 +0100145void dns_session_free(struct dns_session *);
146
Emeric Brund26a6232021-01-04 13:32:20 +0100147/* Receives a dns message
148 * Returns message length
149 * 0 is returned if no more message available
150 * -1 in error case
151 */
152ssize_t dns_recv_nameserver(struct dns_nameserver *ns, void *data, size_t size)
153{
154 ssize_t ret = -1;
155
156 if (ns->dgram) {
157 struct dgram_conn *dgram = &ns->dgram->conn;
Emeric Brunf1d38be2022-05-10 11:35:48 +0200158 int fd;
Emeric Brund26a6232021-01-04 13:32:20 +0100159
Emeric Brunf1d38be2022-05-10 11:35:48 +0200160 HA_SPIN_LOCK(DNS_LOCK, &dgram->lock);
161 fd = dgram->t.sock.fd;
162 if (fd == -1) {
163 HA_SPIN_UNLOCK(DNS_LOCK, &dgram->lock);
Emeric Brund26a6232021-01-04 13:32:20 +0100164 return -1;
Emeric Brunf1d38be2022-05-10 11:35:48 +0200165 }
Emeric Brund26a6232021-01-04 13:32:20 +0100166
167 if ((ret = recv(fd, data, size, 0)) < 0) {
168 if (errno == EAGAIN) {
169 fd_cant_recv(fd);
Emeric Brunf1d38be2022-05-10 11:35:48 +0200170 HA_SPIN_UNLOCK(DNS_LOCK, &dgram->lock);
Emeric Brund26a6232021-01-04 13:32:20 +0100171 return 0;
172 }
173 fd_delete(fd);
Emeric Brund26a6232021-01-04 13:32:20 +0100174 dgram->t.sock.fd = -1;
Emeric Brunf1d38be2022-05-10 11:35:48 +0200175 HA_SPIN_UNLOCK(DNS_LOCK, &dgram->lock);
Emeric Brund26a6232021-01-04 13:32:20 +0100176 return -1;
177 }
Emeric Brunf1d38be2022-05-10 11:35:48 +0200178 HA_SPIN_UNLOCK(DNS_LOCK, &dgram->lock);
Emeric Brund26a6232021-01-04 13:32:20 +0100179 }
Emeric Brunfd647d52021-02-12 20:03:38 +0100180 else if (ns->stream) {
181 struct dns_stream_server *dss = ns->stream;
182 struct dns_session *ds;
183
184 HA_SPIN_LOCK(DNS_LOCK, &dss->lock);
185
186 if (!LIST_ISEMPTY(&dss->wait_sess)) {
187 ds = LIST_NEXT(&dss->wait_sess, struct dns_session *, waiter);
Emeric Brunfd647d52021-02-12 20:03:38 +0100188 ret = ds->rx_msg.len < size ? ds->rx_msg.len : size;
189 memcpy(data, ds->rx_msg.area, ret);
190
191 ds->rx_msg.len = 0;
192
Emeric Brunfd647d52021-02-12 20:03:38 +0100193 LIST_DEL_INIT(&ds->waiter);
194
195 if (ds->appctx) {
Ilya Shipitsin0de36ad2021-02-20 00:23:36 +0500196 /* awake appctx because it may have other
Emeric Brunfd647d52021-02-12 20:03:38 +0100197 * message to receive
198 */
199 appctx_wakeup(ds->appctx);
200
201 /* dns_session could already be into free_sess list
202 * so we firstly remove it */
203 LIST_DEL_INIT(&ds->list);
204
205 /* decrease nb_queries to free a slot for a new query on that sess */
206 ds->nb_queries--;
207 if (ds->nb_queries) {
208 /* it remains pipelined unanswered request
209 * into this session but we just decrease
210 * the counter so the session
211 * can not be full of pipelined requests
212 * so we can add if to free_sess list
213 * to receive a new request
214 */
Willy Tarreau2b718102021-04-21 07:32:39 +0200215 LIST_INSERT(&ds->dss->free_sess, &ds->list);
Emeric Brunfd647d52021-02-12 20:03:38 +0100216 }
217 else {
218 /* there is no more pipelined requests
219 * into this session, so we move it
220 * to idle_sess list */
Willy Tarreau2b718102021-04-21 07:32:39 +0200221 LIST_INSERT(&ds->dss->idle_sess, &ds->list);
Emeric Brunfd647d52021-02-12 20:03:38 +0100222
223 /* update the counter of idle sessions */
224 ds->dss->idle_conns++;
225
226 /* Note: this is useless there to update
227 * the max_active_conns since we increase
228 * the idle count */
229 }
230 }
231 else {
232 /* there is no more appctx for this session
233 * it means it is ready to die
234 */
235 dns_session_free(ds);
236 }
237
238
239 }
240
241 HA_SPIN_UNLOCK(DNS_LOCK, &dss->lock);
242 }
Emeric Brund26a6232021-01-04 13:32:20 +0100243
244 return ret;
245}
246
247static void dns_resolve_recv(struct dgram_conn *dgram)
248{
249 struct dns_nameserver *ns;
250 int fd;
251
Emeric Brunf1d38be2022-05-10 11:35:48 +0200252 HA_SPIN_LOCK(DNS_LOCK, &dgram->lock);
253
Emeric Brund26a6232021-01-04 13:32:20 +0100254 fd = dgram->t.sock.fd;
255
256 /* check if ready for reading */
Emeric Brunf1d38be2022-05-10 11:35:48 +0200257 if ((fd == -1) || !fd_recv_ready(fd)) {
258 HA_SPIN_UNLOCK(DNS_LOCK, &dgram->lock);
Emeric Brund26a6232021-01-04 13:32:20 +0100259 return;
Emeric Brunf1d38be2022-05-10 11:35:48 +0200260 }
Emeric Brund26a6232021-01-04 13:32:20 +0100261
262 /* no need to go further if we can't retrieve the nameserver */
263 if ((ns = dgram->owner) == NULL) {
Willy Tarreauf5090652021-04-06 17:23:40 +0200264 _HA_ATOMIC_AND(&fdtab[fd].state, ~(FD_POLL_HUP|FD_POLL_ERR));
Emeric Brund26a6232021-01-04 13:32:20 +0100265 fd_stop_recv(fd);
Emeric Brunf1d38be2022-05-10 11:35:48 +0200266 HA_SPIN_UNLOCK(DNS_LOCK, &dgram->lock);
Emeric Brund26a6232021-01-04 13:32:20 +0100267 return;
268 }
269
Emeric Brunf1d38be2022-05-10 11:35:48 +0200270 HA_SPIN_UNLOCK(DNS_LOCK, &dgram->lock);
271
Emeric Brund26a6232021-01-04 13:32:20 +0100272 ns->process_responses(ns);
273}
274
275/* Called when a dns network socket is ready to send data */
276static void dns_resolve_send(struct dgram_conn *dgram)
277{
278 int fd;
279 struct dns_nameserver *ns;
280 struct ring *ring;
281 struct buffer *buf;
282 uint64_t msg_len;
283 size_t len, cnt, ofs;
284
Emeric Brunf1d38be2022-05-10 11:35:48 +0200285 HA_SPIN_LOCK(DNS_LOCK, &dgram->lock);
286
Emeric Brund26a6232021-01-04 13:32:20 +0100287 fd = dgram->t.sock.fd;
288
289 /* check if ready for sending */
Emeric Brunf1d38be2022-05-10 11:35:48 +0200290 if ((fd == -1) || !fd_send_ready(fd)) {
291 HA_SPIN_UNLOCK(DNS_LOCK, &dgram->lock);
Emeric Brund26a6232021-01-04 13:32:20 +0100292 return;
Emeric Brunf1d38be2022-05-10 11:35:48 +0200293 }
Emeric Brund26a6232021-01-04 13:32:20 +0100294
295 /* no need to go further if we can't retrieve the nameserver */
296 if ((ns = dgram->owner) == NULL) {
Willy Tarreauf5090652021-04-06 17:23:40 +0200297 _HA_ATOMIC_AND(&fdtab[fd].state, ~(FD_POLL_HUP|FD_POLL_ERR));
Emeric Brund26a6232021-01-04 13:32:20 +0100298 fd_stop_send(fd);
Emeric Brunf1d38be2022-05-10 11:35:48 +0200299 HA_SPIN_UNLOCK(DNS_LOCK, &dgram->lock);
Emeric Brund26a6232021-01-04 13:32:20 +0100300 return;
301 }
302
303 ring = ns->dgram->ring_req;
304 buf = &ring->buf;
305
306 HA_RWLOCK_RDLOCK(DNS_LOCK, &ring->lock);
307 ofs = ns->dgram->ofs_req;
308
309 /* explanation for the initialization below: it would be better to do
310 * this in the parsing function but this would occasionally result in
311 * dropped events because we'd take a reference on the oldest message
312 * and keep it while being scheduled. Thus instead let's take it the
313 * first time we enter here so that we have a chance to pass many
314 * existing messages before grabbing a reference to a location. This
315 * value cannot be produced after initialization.
316 */
317 if (unlikely(ofs == ~0)) {
318 ofs = 0;
Willy Tarreau4781b152021-04-06 13:53:36 +0200319 HA_ATOMIC_INC(b_peek(buf, ofs));
Emeric Brund26a6232021-01-04 13:32:20 +0100320 ofs += ring->ofs;
321 }
322
323 /* we were already there, adjust the offset to be relative to
324 * the buffer's head and remove us from the counter.
325 */
326 ofs -= ring->ofs;
327 BUG_ON(ofs >= buf->size);
Willy Tarreau4781b152021-04-06 13:53:36 +0200328 HA_ATOMIC_DEC(b_peek(buf, ofs));
Emeric Brund26a6232021-01-04 13:32:20 +0100329
330 while (ofs + 1 < b_data(buf)) {
331 int ret;
332
333 cnt = 1;
334 len = b_peek_varint(buf, ofs + cnt, &msg_len);
335 if (!len)
336 break;
337 cnt += len;
338 BUG_ON(msg_len + ofs + cnt + 1 > b_data(buf));
339 if (unlikely(msg_len > DNS_TCP_MSG_MAX_SIZE)) {
340 /* too large a message to ever fit, let's skip it */
341 ofs += cnt + msg_len;
342 continue;
343 }
344
345 len = b_getblk(buf, dns_msg_trash, msg_len, ofs + cnt);
346
347 ret = send(fd, dns_msg_trash, len, 0);
348 if (ret < 0) {
349 if (errno == EAGAIN) {
350 fd_cant_send(fd);
351 goto out;
352 }
353 ns->counters->snd_error++;
354 fd_delete(fd);
Emeric Brund26a6232021-01-04 13:32:20 +0100355 fd = dgram->t.sock.fd = -1;
356 goto out;
357 }
358 ns->counters->sent++;
359
360 ofs += cnt + len;
361 }
362
363 /* we don't want/need to be waked up any more for sending
364 * because all ring content is sent */
365 fd_stop_send(fd);
366
367out:
368
Willy Tarreau4781b152021-04-06 13:53:36 +0200369 HA_ATOMIC_INC(b_peek(buf, ofs));
Emeric Brund26a6232021-01-04 13:32:20 +0100370 ofs += ring->ofs;
371 ns->dgram->ofs_req = ofs;
372 HA_RWLOCK_RDUNLOCK(DNS_LOCK, &ring->lock);
Emeric Brunf1d38be2022-05-10 11:35:48 +0200373 HA_SPIN_UNLOCK(DNS_LOCK, &dgram->lock);
Emeric Brund26a6232021-01-04 13:32:20 +0100374
375}
376
Emeric Brunc9437992021-02-12 19:42:55 +0100377/* proto_udp callback functions for a DNS resolution */
378struct dgram_data_cb dns_dgram_cb = {
379 .recv = dns_resolve_recv,
380 .send = dns_resolve_send,
381};
Baptiste Assmann325137d2015-04-13 23:40:55 +0200382
Emeric Brunc9437992021-02-12 19:42:55 +0100383int dns_dgram_init(struct dns_nameserver *ns, struct sockaddr_storage *sk)
Baptiste Assmann325137d2015-04-13 23:40:55 +0200384{
Emeric Brunc9437992021-02-12 19:42:55 +0100385 struct dns_dgram_server *dgram;
Baptiste Assmann201c07f2017-05-22 15:17:15 +0200386
Emeric Brunc9437992021-02-12 19:42:55 +0100387 if ((dgram = calloc(1, sizeof(*dgram))) == NULL)
Christopher Faulet67957bd2017-09-27 11:00:59 +0200388 return -1;
Baptiste Assmannc1ce5f32016-05-14 11:26:22 +0200389
Emeric Brunc9437992021-02-12 19:42:55 +0100390 /* Leave dgram partially initialized, no FD attached for
391 * now. */
392 dgram->conn.owner = ns;
393 dgram->conn.data = &dns_dgram_cb;
394 dgram->conn.t.sock.fd = -1;
395 dgram->conn.addr.to = *sk;
Emeric Brunf1d38be2022-05-10 11:35:48 +0200396 HA_SPIN_INIT(&dgram->conn.lock);
Emeric Brunc9437992021-02-12 19:42:55 +0100397 ns->dgram = dgram;
Baptiste Assmannc1ce5f32016-05-14 11:26:22 +0200398
Emeric Brunc9437992021-02-12 19:42:55 +0100399 dgram->ofs_req = ~0; /* init ring offset */
400 dgram->ring_req = ring_new(2*DNS_TCP_MSG_RING_MAX_SIZE);
401 if (!dgram->ring_req) {
402 ha_alert("memory allocation error initializing the ring for nameserver.\n");
403 goto out;
Baptiste Assmannc1ce5f32016-05-14 11:26:22 +0200404 }
405
Emeric Brunc9437992021-02-12 19:42:55 +0100406 /* attach the task as reader */
407 if (!ring_attach(dgram->ring_req)) {
408 /* mark server attached to the ring */
409 ha_alert("nameserver sets too many watchers > 255 on ring. This is a bug and should not happen.\n");
410 goto out;
Baptiste Assmannc1ce5f32016-05-14 11:26:22 +0200411 }
Baptiste Assmannc1ce5f32016-05-14 11:26:22 +0200412 return 0;
Emeric Brunc9437992021-02-12 19:42:55 +0100413out:
414 if (dgram->ring_req)
415 ring_free(dgram->ring_req);
Christopher Fauletd6c6b5f2020-09-08 10:27:24 +0200416
Emeric Brunc9437992021-02-12 19:42:55 +0100417 free(dgram);
Olivier Houchard2ec2db92018-01-08 16:28:57 +0100418
Emeric Brunfd647d52021-02-12 20:03:38 +0100419 return -1;
420}
421
422/*
423 * IO Handler to handle message push to dns tcp server
424 */
425static void dns_session_io_handler(struct appctx *appctx)
426{
427 struct stream_interface *si = appctx->owner;
428 struct dns_session *ds = appctx->ctx.sft.ptr;
429 struct ring *ring = &ds->ring;
430 struct buffer *buf = &ring->buf;
431 uint64_t msg_len;
432 int available_room;
433 size_t len, cnt, ofs;
434 int ret = 0;
435
436 /* if stopping was requested, close immediately */
437 if (unlikely(stopping))
438 goto close;
439
440 /* we want to be sure to not miss that we have been awaked for a shutdown */
441 __ha_barrier_load();
442
443 /* that means the connection was requested to shutdown
444 * for instance idle expire */
445 if (ds->shutdown)
446 goto close;
447
448 /* an error was detected */
449 if (unlikely(si_ic(si)->flags & (CF_WRITE_ERROR|CF_SHUTW)))
450 goto close;
451
452 /* con closed by server side, we will skip data write and drain data from channel */
453 if ((si_oc(si)->flags & CF_SHUTW)) {
454 goto read;
455 }
456
457 /* if the connection is not established, inform the stream that we want
458 * to be notified whenever the connection completes.
459 */
460 if (si_opposite(si)->state < SI_ST_EST) {
461 si_cant_get(si);
462 si_rx_conn_blk(si);
463 si_rx_endp_more(si);
464 return;
465 }
466
467
468 ofs = ds->ofs;
469
470 HA_RWLOCK_WRLOCK(DNS_LOCK, &ring->lock);
471 LIST_DEL_INIT(&appctx->wait_entry);
472 HA_RWLOCK_WRUNLOCK(DNS_LOCK, &ring->lock);
473
474 HA_RWLOCK_RDLOCK(DNS_LOCK, &ring->lock);
475
476 /* explanation for the initialization below: it would be better to do
477 * this in the parsing function but this would occasionally result in
478 * dropped events because we'd take a reference on the oldest message
479 * and keep it while being scheduled. Thus instead let's take it the
480 * first time we enter here so that we have a chance to pass many
481 * existing messages before grabbing a reference to a location. This
482 * value cannot be produced after initialization.
483 */
484 if (unlikely(ofs == ~0)) {
485 ofs = 0;
486
Willy Tarreau4781b152021-04-06 13:53:36 +0200487 HA_ATOMIC_INC(b_peek(buf, ofs));
Emeric Brunfd647d52021-02-12 20:03:38 +0100488 ofs += ring->ofs;
489 }
490
491 /* in this loop, ofs always points to the counter byte that precedes
492 * the message so that we can take our reference there if we have to
493 * stop before the end (ret=0).
494 */
495 if (si_opposite(si)->state == SI_ST_EST) {
496 /* we were already there, adjust the offset to be relative to
497 * the buffer's head and remove us from the counter.
498 */
499 ofs -= ring->ofs;
500 BUG_ON(ofs >= buf->size);
Willy Tarreau4781b152021-04-06 13:53:36 +0200501 HA_ATOMIC_DEC(b_peek(buf, ofs));
Emeric Brunfd647d52021-02-12 20:03:38 +0100502
503 ret = 1;
504 while (ofs + 1 < b_data(buf)) {
505 struct dns_query *query;
506 uint16_t original_qid;
507 uint16_t new_qid;
508
509 cnt = 1;
510 len = b_peek_varint(buf, ofs + cnt, &msg_len);
511 if (!len)
512 break;
513 cnt += len;
514 BUG_ON(msg_len + ofs + cnt + 1 > b_data(buf));
515
516 /* retrieve available room on output channel */
517 available_room = channel_recv_max(si_ic(si));
518
519 /* tx_msg_offset null means we are at the start of a new message */
520 if (!ds->tx_msg_offset) {
521 uint16_t slen;
522
523 /* check if there is enough room to put message len and query id */
524 if (available_room < sizeof(slen) + sizeof(new_qid)) {
525 si_rx_room_blk(si);
526 ret = 0;
527 break;
528 }
529
530 /* put msg len into then channel */
531 slen = (uint16_t)msg_len;
532 slen = htons(slen);
533 ci_putblk(si_ic(si), (char *)&slen, sizeof(slen));
534 available_room -= sizeof(slen);
535
536 /* backup original query id */
537 len = b_getblk(buf, (char *)&original_qid, sizeof(original_qid), ofs + cnt);
Emeric Brun538bb042021-02-15 13:58:06 +0100538 if (!len) {
539 /* should never happen since messages are atomically
540 * written into ring
541 */
542 ret = 0;
543 break;
544 }
Emeric Brunfd647d52021-02-12 20:03:38 +0100545
546 /* generates new query id */
547 new_qid = ++ds->query_counter;
548 new_qid = htons(new_qid);
549
550 /* put new query id into the channel */
551 ci_putblk(si_ic(si), (char *)&new_qid, sizeof(new_qid));
552 available_room -= sizeof(new_qid);
553
554 /* keep query id mapping */
555
556 query = pool_alloc(dns_query_pool);
557 if (query) {
558 query->qid.key = new_qid;
559 query->original_qid = original_qid;
560 query->expire = tick_add(now_ms, 5000);
561 LIST_INIT(&query->list);
562 if (LIST_ISEMPTY(&ds->queries)) {
563 /* enable task to handle expire */
564 ds->task_exp->expire = query->expire;
565 /* ensure this will be executed by the same
566 * thread than ds_session_release
567 * to ensure session_release is free
568 * to destroy the task */
569 task_queue(ds->task_exp);
570 }
Willy Tarreau2b718102021-04-21 07:32:39 +0200571 LIST_APPEND(&ds->queries, &query->list);
Emeric Brunfd647d52021-02-12 20:03:38 +0100572 eb32_insert(&ds->query_ids, &query->qid);
573 ds->onfly_queries++;
574 }
575
576 /* update the tx_offset to handle output in 16k streams */
577 ds->tx_msg_offset = sizeof(original_qid);
578
579 }
580
581 /* check if it remains available room on output chan */
582 if (unlikely(!available_room)) {
583 si_rx_room_blk(si);
584 ret = 0;
585 break;
586 }
587
588 chunk_reset(&trash);
589 if ((msg_len - ds->tx_msg_offset) > available_room) {
590 /* remaining msg data is too large to be written in output channel at one time */
591
592 len = b_getblk(buf, trash.area, available_room, ofs + cnt + ds->tx_msg_offset);
593
594 /* update offset to complete mesg forwarding later */
595 ds->tx_msg_offset += len;
596 }
597 else {
598 /* remaining msg data can be written in output channel at one time */
599 len = b_getblk(buf, trash.area, msg_len - ds->tx_msg_offset, ofs + cnt + ds->tx_msg_offset);
600
601 /* reset tx_msg_offset to mark forward fully processed */
602 ds->tx_msg_offset = 0;
603 }
604 trash.data += len;
605
Emeric Brun743afee2021-02-15 14:12:06 +0100606 if (ci_putchk(si_ic(si), &trash) == -1) {
607 /* should never happen since we
608 * check available_room is large
Ilya Shipitsin0de36ad2021-02-20 00:23:36 +0500609 * enough here.
Emeric Brun743afee2021-02-15 14:12:06 +0100610 */
611 si_rx_room_blk(si);
612 ret = 0;
613 break;
614 }
Emeric Brunfd647d52021-02-12 20:03:38 +0100615
616 if (ds->tx_msg_offset) {
617 /* msg was not fully processed, we must be awake to drain pending data */
618
619 si_rx_room_blk(si);
620 ret = 0;
621 break;
622 }
623 /* switch to next message */
624 ofs += cnt + msg_len;
625 }
626
Willy Tarreau4781b152021-04-06 13:53:36 +0200627 HA_ATOMIC_INC(b_peek(buf, ofs));
Emeric Brunfd647d52021-02-12 20:03:38 +0100628 ofs += ring->ofs;
629 ds->ofs = ofs;
630 }
631 HA_RWLOCK_RDUNLOCK(DNS_LOCK, &ring->lock);
632
633 if (ret) {
634 /* let's be woken up once new request to write arrived */
635 HA_RWLOCK_WRLOCK(DNS_LOCK, &ring->lock);
Willy Tarreau2b718102021-04-21 07:32:39 +0200636 LIST_APPEND(&ring->waiters, &appctx->wait_entry);
Emeric Brunfd647d52021-02-12 20:03:38 +0100637 HA_RWLOCK_WRUNLOCK(DNS_LOCK, &ring->lock);
638 si_rx_endp_done(si);
639 }
640
641read:
642
Ilya Shipitsin0de36ad2021-02-20 00:23:36 +0500643 /* if session is not a waiter it means there is no committed
Emeric Brunfd647d52021-02-12 20:03:38 +0100644 * message into rx_buf and we are free to use it
645 * Note: we need a load barrier here to not miss the
646 * delete from the list
647 */
Emeric Brun9a6ac572021-10-20 10:49:53 +0200648
649 /* lock the dns_stream_server containing lists heads */
650 HA_SPIN_LOCK(DNS_LOCK, &ds->dss->lock);
651
Willy Tarreau2b718102021-04-21 07:32:39 +0200652 if (!LIST_INLIST(&ds->waiter)) {
Emeric Brunfd647d52021-02-12 20:03:38 +0100653 while (1) {
654 uint16_t query_id;
655 struct eb32_node *eb;
656 struct dns_query *query;
657
658 if (!ds->rx_msg.len) {
Emeric Brunfd647d52021-02-12 20:03:38 +0100659 /* retrieve message len */
Christopher Fauletcf537f62023-03-30 15:49:30 +0200660 ret = co_getblk(si_oc(si), (char *)&msg_len, 2, 0);
661 if (ret <= 0) {
Christopher Faulet70ab1492023-09-04 17:34:04 +0200662 if (ret == -1) {
663 HA_SPIN_UNLOCK(DNS_LOCK, &ds->dss->lock);
Christopher Fauletcf537f62023-03-30 15:49:30 +0200664 goto close;
Christopher Faulet70ab1492023-09-04 17:34:04 +0200665 }
Christopher Fauletcf537f62023-03-30 15:49:30 +0200666 si_cant_get(si);
667 break;
668 }
Emeric Brunfd647d52021-02-12 20:03:38 +0100669
670 /* mark as consumed */
671 co_skip(si_oc(si), 2);
672
673 /* store message len */
674 ds->rx_msg.len = ntohs(msg_len);
Christopher Fauletcf537f62023-03-30 15:49:30 +0200675 if (!ds->rx_msg.len)
676 continue;
Emeric Brunfd647d52021-02-12 20:03:38 +0100677 }
678
Emeric Brunfd647d52021-02-12 20:03:38 +0100679 if (co_data(si_oc(si)) + ds->rx_msg.offset < ds->rx_msg.len) {
680 /* message only partially available */
681
682 /* read available data */
Christopher Fauletcf537f62023-03-30 15:49:30 +0200683 ret = co_getblk(si_oc(si), ds->rx_msg.area + ds->rx_msg.offset, co_data(si_oc(si)), 0);
684 if (ret <= 0) {
Christopher Faulet70ab1492023-09-04 17:34:04 +0200685 if (ret == -1) {
686 HA_SPIN_UNLOCK(DNS_LOCK, &ds->dss->lock);
Christopher Fauletcf537f62023-03-30 15:49:30 +0200687 goto close;
Christopher Faulet70ab1492023-09-04 17:34:04 +0200688 }
Christopher Fauletcf537f62023-03-30 15:49:30 +0200689 si_cant_get(si);
690 break;
691 }
Emeric Brunfd647d52021-02-12 20:03:38 +0100692
693 /* update message offset */
694 ds->rx_msg.offset += co_data(si_oc(si));
695
696 /* consume all pending data from the channel */
697 co_skip(si_oc(si), co_data(si_oc(si)));
698
699 /* we need to wait for more data */
Christopher Fauletcf537f62023-03-30 15:49:30 +0200700 si_cant_get(si);
Emeric Brunfd647d52021-02-12 20:03:38 +0100701 break;
702 }
703
Ilya Shipitsin0de36ad2021-02-20 00:23:36 +0500704 /* enough data is available into the channel to read the message until the end */
Emeric Brunfd647d52021-02-12 20:03:38 +0100705
706 /* read from the channel until the end of the message */
Christopher Fauletcf537f62023-03-30 15:49:30 +0200707 ret = co_getblk(si_oc(si), ds->rx_msg.area + ds->rx_msg.offset, ds->rx_msg.len - ds->rx_msg.offset, 0);
708 if (ret <= 0) {
Christopher Faulet70ab1492023-09-04 17:34:04 +0200709 if (ret == -1) {
710 HA_SPIN_UNLOCK(DNS_LOCK, &ds->dss->lock);
Christopher Fauletcf537f62023-03-30 15:49:30 +0200711 goto close;
Christopher Faulet70ab1492023-09-04 17:34:04 +0200712 }
Christopher Fauletcf537f62023-03-30 15:49:30 +0200713 si_cant_get(si);
714 break;
715 }
Emeric Brunfd647d52021-02-12 20:03:38 +0100716
717 /* consume all data until the end of the message from the channel */
718 co_skip(si_oc(si), ds->rx_msg.len - ds->rx_msg.offset);
719
720 /* reset reader offset to 0 for next message reand */
721 ds->rx_msg.offset = 0;
722
723 /* try remap query id to original */
724 memcpy(&query_id, ds->rx_msg.area, sizeof(query_id));
725 eb = eb32_lookup(&ds->query_ids, query_id);
726 if (!eb) {
727 /* query id not found means we have an unknown corresponding
728 * request, perhaps server's bug or or the query reached
729 * timeout
730 */
731 ds->rx_msg.len = 0;
732 continue;
733 }
734
735 /* re-map the original query id set by the requester */
736 query = eb32_entry(eb, struct dns_query, qid);
737 memcpy(ds->rx_msg.area, &query->original_qid, sizeof(query->original_qid));
738
739 /* remove query ids mapping from pending queries list/tree */
740 eb32_delete(&query->qid);
Willy Tarreau2b718102021-04-21 07:32:39 +0200741 LIST_DELETE(&query->list);
Emeric Brunfd647d52021-02-12 20:03:38 +0100742 pool_free(dns_query_pool, query);
743 ds->onfly_queries--;
744
Emeric Brunfd647d52021-02-12 20:03:38 +0100745 /* the dns_session is also added in queue of the
746 * wait_sess list where the task processing
747 * response will pop available responses
748 */
Willy Tarreau2b718102021-04-21 07:32:39 +0200749 LIST_APPEND(&ds->dss->wait_sess, &ds->waiter);
Emeric Brunfd647d52021-02-12 20:03:38 +0100750
Emeric Brunfd647d52021-02-12 20:03:38 +0100751 /* awake the task processing the responses */
752 task_wakeup(ds->dss->task_rsp, TASK_WOKEN_INIT);
753
754 break;
755 }
756
Willy Tarreau2b718102021-04-21 07:32:39 +0200757 if (!LIST_INLIST(&ds->waiter)) {
Emeric Brunfd647d52021-02-12 20:03:38 +0100758 /* there is no more pending data to read and the con was closed by the server side */
759 if (!co_data(si_oc(si)) && (si_oc(si)->flags & CF_SHUTW)) {
Emeric Brun9a6ac572021-10-20 10:49:53 +0200760 HA_SPIN_UNLOCK(DNS_LOCK, &ds->dss->lock);
Emeric Brunfd647d52021-02-12 20:03:38 +0100761 goto close;
762 }
763 }
764
765 }
766
Emeric Brun9a6ac572021-10-20 10:49:53 +0200767 HA_SPIN_UNLOCK(DNS_LOCK, &ds->dss->lock);
Emeric Brunfd647d52021-02-12 20:03:38 +0100768 return;
769close:
770 si_shutw(si);
771 si_shutr(si);
772 si_ic(si)->flags |= CF_READ_NULL;
773}
774
775void dns_queries_flush(struct dns_session *ds)
776{
777 struct dns_query *query, *queryb;
778
779 list_for_each_entry_safe(query, queryb, &ds->queries, list) {
780 eb32_delete(&query->qid);
Willy Tarreau2b718102021-04-21 07:32:39 +0200781 LIST_DELETE(&query->list);
Emeric Brunfd647d52021-02-12 20:03:38 +0100782 pool_free(dns_query_pool, query);
783 }
784}
785
786void dns_session_free(struct dns_session *ds)
787{
788 if (ds->rx_msg.area)
789 pool_free(dns_msg_buf, ds->rx_msg.area);
790 if (ds->tx_ring_area)
791 pool_free(dns_msg_buf, ds->tx_ring_area);
792 if (ds->task_exp)
793 task_destroy(ds->task_exp);
794
795 dns_queries_flush(ds);
796
Emeric Brunb18a95b2021-10-19 15:40:10 +0200797 /* Ensure to remove this session from external lists
798 * Note: we are under the lock of dns_stream_server
799 * which own the heads of those lists.
800 */
801 LIST_DEL_INIT(&ds->waiter);
802 LIST_DEL_INIT(&ds->list);
803
Emeric Brunfd647d52021-02-12 20:03:38 +0100804 ds->dss->cur_conns--;
805 /* Note: this is useless to update
806 * max_active_conns here because
807 * we decrease the value
808 */
809 pool_free(dns_session_pool, ds);
810}
811
812static struct appctx *dns_session_create(struct dns_session *ds);
813
814/*
815 * Function to release a DNS tcp session
816 */
817static void dns_session_release(struct appctx *appctx)
818{
819 struct dns_session *ds = appctx->ctx.sft.ptr;
Willy Tarreaue3e648c2021-02-24 17:38:46 +0100820 struct dns_stream_server *dss __maybe_unused;
Emeric Brunfd647d52021-02-12 20:03:38 +0100821
822 if (!ds)
823 return;
824
825 dss = ds->dss;
826
827 HA_SPIN_LOCK(DNS_LOCK, &dss->lock);
828 LIST_DEL_INIT(&ds->list);
829
830 if (stopping) {
831 dns_session_free(ds);
832 HA_SPIN_UNLOCK(DNS_LOCK, &dss->lock);
833 return;
834 }
835
836 if (!ds->nb_queries) {
837 /* this is an idle session */
838 /* Note: this is useless to update max_active_sess
839 * here because we decrease idle_conns but
840 * dns_session_free decrease curconns
841 */
842
843 ds->dss->idle_conns--;
844 dns_session_free(ds);
845 HA_SPIN_UNLOCK(DNS_LOCK, &dss->lock);
846 return;
847 }
848
849 if (ds->onfly_queries == ds->nb_queries) {
850 /* the session can be released because
851 * it means that all queries AND
852 * responses are in fly */
853 dns_session_free(ds);
854 HA_SPIN_UNLOCK(DNS_LOCK, &dss->lock);
855 return;
856 }
857
858 /* We do not call ring_appctx_detach here
859 * because we want to keep readers counters
860 * to retry a con with a different appctx*/
861 HA_RWLOCK_WRLOCK(DNS_LOCK, &ds->ring.lock);
862 LIST_DEL_INIT(&appctx->wait_entry);
863 HA_RWLOCK_WRUNLOCK(DNS_LOCK, &ds->ring.lock);
864
865 /* if there is no pending complete response
866 * message, ensure to reset
867 * message offsets if the session
868 * was closed with an incomplete pending response
869 */
Willy Tarreau2b718102021-04-21 07:32:39 +0200870 if (!LIST_INLIST(&ds->waiter))
Emeric Brunfd647d52021-02-12 20:03:38 +0100871 ds->rx_msg.len = ds->rx_msg.offset = 0;
872
873 /* we flush pending sent queries because we never
874 * have responses
875 */
876 ds->nb_queries -= ds->onfly_queries;
877 dns_queries_flush(ds);
878
879 /* reset offset to be sure to start from message start */
880 ds->tx_msg_offset = 0;
881
882 /* here the ofs and the attached counter
883 * are kept unchanged
884 */
885
886 /* Create a new appctx, We hope we can
887 * create from the release callback! */
888 ds->appctx = dns_session_create(ds);
889 if (!ds->appctx) {
890 dns_session_free(ds);
891 HA_SPIN_UNLOCK(DNS_LOCK, &dss->lock);
892 return;
893 }
894
895 if (ds->nb_queries < DNS_STREAM_MAX_PIPELINED_REQ)
Willy Tarreau2b718102021-04-21 07:32:39 +0200896 LIST_INSERT(&ds->dss->free_sess, &ds->list);
Emeric Brunfd647d52021-02-12 20:03:38 +0100897
898 HA_SPIN_UNLOCK(DNS_LOCK, &dss->lock);
899}
900
901/* DNS tcp session applet */
902static struct applet dns_session_applet = {
903 .obj_type = OBJ_TYPE_APPLET,
904 .name = "<STRMDNS>", /* used for logging */
905 .fct = dns_session_io_handler,
906 .release = dns_session_release,
907};
908
909/*
910 * Function used to create an appctx for a DNS session
911 */
912static struct appctx *dns_session_create(struct dns_session *ds)
913{
914 struct appctx *appctx;
915 struct session *sess;
916 struct stream *s;
917 struct applet *applet = &dns_session_applet;
918
919 appctx = appctx_new(applet, tid_bit);
920 if (!appctx)
921 goto out_close;
922
923 appctx->ctx.sft.ptr = (void *)ds;
924
925 sess = session_new(ds->dss->srv->proxy, NULL, &appctx->obj_type);
926 if (!sess) {
927 ha_alert("out of memory in peer_session_create().\n");
928 goto out_free_appctx;
929 }
930
931 if ((s = stream_new(sess, &appctx->obj_type, &BUF_NULL)) == NULL) {
932 ha_alert("Failed to initialize stream in peer_session_create().\n");
933 goto out_free_sess;
934 }
935
936
937 s->target = &ds->dss->srv->obj_type;
938 if (!sockaddr_alloc(&s->target_addr, &ds->dss->srv->addr, sizeof(ds->dss->srv->addr)))
939 goto out_free_strm;
940 s->flags = SF_ASSIGNED|SF_ADDR_SET;
941 s->si[1].flags |= SI_FL_NOLINGER;
942
943 s->do_log = NULL;
944 s->uniq_id = 0;
945
946 s->res.flags |= CF_READ_DONTWAIT;
947 /* for rto and rex to eternity to not expire on idle recv:
948 * We are using a syslog server.
949 */
950 s->res.rto = TICK_ETERNITY;
951 s->res.rex = TICK_ETERNITY;
952 ds->appctx = appctx;
953 task_wakeup(s->task, TASK_WOKEN_INIT);
954 return appctx;
955
956 /* Error unrolling */
957 out_free_strm:
Willy Tarreau2b718102021-04-21 07:32:39 +0200958 LIST_DELETE(&s->list);
Emeric Brunfd647d52021-02-12 20:03:38 +0100959 pool_free(pool_head_stream, s);
960 out_free_sess:
961 session_free(sess);
962 out_free_appctx:
963 appctx_free(appctx);
964 out_close:
965 return NULL;
966}
967
968/* Task processing expiration of unresponded queries, this one is supposed
969 * to be stuck on the same thread than the appctx handler
970 */
Willy Tarreau144f84a2021-03-02 16:09:26 +0100971static struct task *dns_process_query_exp(struct task *t, void *context, unsigned int state)
Emeric Brunfd647d52021-02-12 20:03:38 +0100972{
973 struct dns_session *ds = (struct dns_session *)context;
974 struct dns_query *query, *queryb;
975
976 t->expire = TICK_ETERNITY;
977
978 list_for_each_entry_safe(query, queryb, &ds->queries, list) {
979 if (tick_is_expired(query->expire, now_ms)) {
980 eb32_delete(&query->qid);
Willy Tarreau2b718102021-04-21 07:32:39 +0200981 LIST_DELETE(&query->list);
Emeric Brunfd647d52021-02-12 20:03:38 +0100982 pool_free(dns_query_pool, query);
983 ds->onfly_queries--;
984 }
985 else {
986 t->expire = query->expire;
987 break;
988 }
989 }
990
991 return t;
992}
993
994/* Task processing expiration of idle sessions */
Willy Tarreau144f84a2021-03-02 16:09:26 +0100995static struct task *dns_process_idle_exp(struct task *t, void *context, unsigned int state)
Emeric Brunfd647d52021-02-12 20:03:38 +0100996{
997 struct dns_stream_server *dss = (struct dns_stream_server *)context;
998 struct dns_session *ds, *dsb;
999 int target = 0;
1000 int cur_active_conns;
1001
1002 HA_SPIN_LOCK(DNS_LOCK, &dss->lock);
1003
1004
1005 cur_active_conns = dss->cur_conns - dss->idle_conns;
1006 if (cur_active_conns > dss->max_active_conns)
1007 dss->max_active_conns = cur_active_conns;
1008
1009 target = (dss->max_active_conns - cur_active_conns) / 2;
1010 list_for_each_entry_safe(ds, dsb, &dss->idle_sess, list) {
1011 if (!target)
1012 break;
1013
1014 /* remove conn to pending list to ensure it won't be reused */
1015 LIST_DEL_INIT(&ds->list);
1016
1017 /* force session shutdown */
1018 ds->shutdown = 1;
1019
Ilya Shipitsin0de36ad2021-02-20 00:23:36 +05001020 /* to be sure that the appctx won't miss shutdown */
Emeric Brunfd647d52021-02-12 20:03:38 +01001021 __ha_barrier_store();
1022
1023 /* wake appctx to perform the shutdown */
1024 appctx_wakeup(ds->appctx);
1025 }
1026
1027 /* reset max to current active conns */
1028 dss->max_active_conns = cur_active_conns;
1029
1030 HA_SPIN_UNLOCK(DNS_LOCK, &dss->lock);
1031
1032 t->expire = tick_add(now_ms, 5000);
1033
1034 return t;
1035}
1036
1037struct dns_session *dns_session_new(struct dns_stream_server *dss)
1038{
1039 struct dns_session *ds;
1040
1041 if (dss->maxconn && (dss->maxconn <= dss->cur_conns))
1042 return NULL;
1043
Christopher Faulet5dce25f2022-08-03 10:30:06 +02001044 ds = pool_zalloc(dns_session_pool);
Emeric Brunfd647d52021-02-12 20:03:38 +01001045 if (!ds)
1046 return NULL;
1047
1048 ds->ofs = ~0;
1049 ds->dss = dss;
1050 LIST_INIT(&ds->list);
1051 LIST_INIT(&ds->queries);
1052 LIST_INIT(&ds->waiter);
1053 ds->rx_msg.offset = ds->rx_msg.len = 0;
1054 ds->rx_msg.area = NULL;
1055 ds->tx_ring_area = NULL;
1056 ds->task_exp = NULL;
1057 ds->appctx = NULL;
1058 ds->shutdown = 0;
1059 ds->nb_queries = 0;
1060 ds->query_ids = EB_ROOT_UNIQUE;
1061 ds->rx_msg.area = pool_alloc(dns_msg_buf);
1062 if (!ds->rx_msg.area)
1063 goto error;
1064
1065 ds->tx_ring_area = pool_alloc(dns_msg_buf);
1066 if (!ds->tx_ring_area)
1067 goto error;
1068
1069 ring_init(&ds->ring, ds->tx_ring_area, DNS_TCP_MSG_RING_MAX_SIZE);
Christopher Faulet1a1b6742021-03-04 16:53:27 +01001070 /* never fail because it is the first watcher attached to the ring */
1071 DISGUISE(ring_attach(&ds->ring));
Emeric Brunfd647d52021-02-12 20:03:38 +01001072
1073 if ((ds->task_exp = task_new(tid_bit)) == NULL)
1074 goto error;
1075
1076 ds->task_exp->process = dns_process_query_exp;
1077 ds->task_exp->context = ds;
1078
1079 ds->appctx = dns_session_create(ds);
1080 if (!ds->appctx)
1081 goto error;
1082
1083 dss->cur_conns++;
1084
1085 return ds;
1086
1087error:
1088 if (ds->task_exp)
1089 task_destroy(ds->task_exp);
1090 if (ds->rx_msg.area)
1091 pool_free(dns_msg_buf, ds->rx_msg.area);
1092 if (ds->tx_ring_area)
1093 pool_free(dns_msg_buf, ds->tx_ring_area);
1094
1095 pool_free(dns_session_pool, ds);
1096
1097 return NULL;
1098}
1099
1100/*
1101 * Task used to consume pending messages from nameserver ring
1102 * and forward them to dns_session ring.
1103 * Note: If no slot found a new dns_session is allocated
1104 */
Willy Tarreau144f84a2021-03-02 16:09:26 +01001105static struct task *dns_process_req(struct task *t, void *context, unsigned int state)
Emeric Brunfd647d52021-02-12 20:03:38 +01001106{
1107 struct dns_nameserver *ns = (struct dns_nameserver *)context;
1108 struct dns_stream_server *dss = ns->stream;
1109 struct ring *ring = dss->ring_req;
1110 struct buffer *buf = &ring->buf;
1111 uint64_t msg_len;
1112 size_t len, cnt, ofs;
1113 struct dns_session *ds, *ads;
1114 HA_SPIN_LOCK(DNS_LOCK, &dss->lock);
1115
1116 ofs = dss->ofs_req;
1117
1118 HA_RWLOCK_RDLOCK(DNS_LOCK, &ring->lock);
1119
1120 /* explanation for the initialization below: it would be better to do
1121 * this in the parsing function but this would occasionally result in
1122 * dropped events because we'd take a reference on the oldest message
1123 * and keep it while being scheduled. Thus instead let's take it the
1124 * first time we enter here so that we have a chance to pass many
1125 * existing messages before grabbing a reference to a location. This
1126 * value cannot be produced after initialization.
1127 */
1128 if (unlikely(ofs == ~0)) {
1129 ofs = 0;
Willy Tarreau4781b152021-04-06 13:53:36 +02001130 HA_ATOMIC_INC(b_peek(buf, ofs));
Emeric Brunfd647d52021-02-12 20:03:38 +01001131 ofs += ring->ofs;
1132 }
1133
1134 /* we were already there, adjust the offset to be relative to
1135 * the buffer's head and remove us from the counter.
1136 */
1137 ofs -= ring->ofs;
1138 BUG_ON(ofs >= buf->size);
Willy Tarreau4781b152021-04-06 13:53:36 +02001139 HA_ATOMIC_DEC(b_peek(buf, ofs));
Emeric Brunfd647d52021-02-12 20:03:38 +01001140
1141 while (ofs + 1 < b_data(buf)) {
1142 struct ist myist;
1143
1144 cnt = 1;
1145 len = b_peek_varint(buf, ofs + cnt, &msg_len);
1146 if (!len)
1147 break;
1148 cnt += len;
1149 BUG_ON(msg_len + ofs + cnt + 1 > b_data(buf));
1150 if (unlikely(msg_len > DNS_TCP_MSG_MAX_SIZE)) {
1151 /* too large a message to ever fit, let's skip it */
1152 ofs += cnt + msg_len;
1153 continue;
1154 }
1155
1156 len = b_getblk(buf, dns_msg_trash, msg_len, ofs + cnt);
1157
Tim Duesterhus92c696e2021-02-28 16:11:36 +01001158 myist = ist2(dns_msg_trash, len);
Emeric Brunfd647d52021-02-12 20:03:38 +01001159
1160 ads = NULL;
Ilya Shipitsin0de36ad2021-02-20 00:23:36 +05001161 /* try to push request into active sess with free slot */
Emeric Brunfd647d52021-02-12 20:03:38 +01001162 if (!LIST_ISEMPTY(&dss->free_sess)) {
1163 ds = LIST_NEXT(&dss->free_sess, struct dns_session *, list);
1164
1165 if (ring_write(&ds->ring, DNS_TCP_MSG_MAX_SIZE, NULL, 0, &myist, 1) > 0) {
1166 ds->nb_queries++;
1167 if (ds->nb_queries >= DNS_STREAM_MAX_PIPELINED_REQ)
1168 LIST_DEL_INIT(&ds->list);
1169 ads = ds;
1170 }
1171 else {
1172 /* it means we were unable to put a request in this slot,
1173 * it may be close to be full so we put it at the end
1174 * of free conn list */
1175 LIST_DEL_INIT(&ds->list);
Willy Tarreau2b718102021-04-21 07:32:39 +02001176 LIST_APPEND(&dss->free_sess, &ds->list);
Emeric Brunfd647d52021-02-12 20:03:38 +01001177 }
1178 }
1179
1180 if (!ads) {
Ilya Shipitsin0de36ad2021-02-20 00:23:36 +05001181 /* try to push request into idle, this one should have enough free space */
Emeric Brunfd647d52021-02-12 20:03:38 +01001182 if (!LIST_ISEMPTY(&dss->idle_sess)) {
1183 ds = LIST_NEXT(&dss->idle_sess, struct dns_session *, list);
1184
1185 /* ring is empty so this ring_write should never fail */
1186 ring_write(&ds->ring, DNS_TCP_MSG_MAX_SIZE, NULL, 0, &myist, 1);
1187 ds->nb_queries++;
1188 LIST_DEL_INIT(&ds->list);
1189
1190 ds->dss->idle_conns--;
1191
1192 /* we may have to update the max_active_conns */
1193 if (ds->dss->max_active_conns < ds->dss->cur_conns - ds->dss->idle_conns)
1194 ds->dss->max_active_conns = ds->dss->cur_conns - ds->dss->idle_conns;
1195
1196 /* since we may unable to find a free list to handle
1197 * this request, this request may be large and fill
1198 * the ring buffer so we prefer to put at the end of free
1199 * list. */
Willy Tarreau2b718102021-04-21 07:32:39 +02001200 LIST_APPEND(&dss->free_sess, &ds->list);
Emeric Brunfd647d52021-02-12 20:03:38 +01001201 ads = ds;
1202 }
1203 }
1204
Ilya Shipitsin0de36ad2021-02-20 00:23:36 +05001205 /* we didn't find a session available with large enough room */
Emeric Brunfd647d52021-02-12 20:03:38 +01001206 if (!ads) {
1207 /* allocate a new session */
1208 ads = dns_session_new(dss);
1209 if (ads) {
1210 /* ring is empty so this ring_write should never fail */
1211 ring_write(&ads->ring, DNS_TCP_MSG_MAX_SIZE, NULL, 0, &myist, 1);
1212 ads->nb_queries++;
Willy Tarreau2b718102021-04-21 07:32:39 +02001213 LIST_INSERT(&dss->free_sess, &ads->list);
Emeric Brunfd647d52021-02-12 20:03:38 +01001214 }
1215 else
1216 ns->counters->snd_error++;
1217 }
1218
1219 if (ads)
1220 ns->counters->sent++;
1221
1222 ofs += cnt + len;
1223 }
1224
Willy Tarreau4781b152021-04-06 13:53:36 +02001225 HA_ATOMIC_INC(b_peek(buf, ofs));
Emeric Brunfd647d52021-02-12 20:03:38 +01001226 ofs += ring->ofs;
1227 dss->ofs_req = ofs;
1228 HA_RWLOCK_RDUNLOCK(DNS_LOCK, &ring->lock);
1229
1230
1231 HA_SPIN_UNLOCK(DNS_LOCK, &dss->lock);
1232 return t;
1233}
1234
1235/*
1236 * Task used to consume response
1237 * Note: upper layer callback is called
1238 */
Willy Tarreau144f84a2021-03-02 16:09:26 +01001239static struct task *dns_process_rsp(struct task *t, void *context, unsigned int state)
Emeric Brunfd647d52021-02-12 20:03:38 +01001240{
1241 struct dns_nameserver *ns = (struct dns_nameserver *)context;
1242
1243 ns->process_responses(ns);
1244
1245 return t;
1246}
1247
1248/* Function used to initialize an TCP nameserver */
1249int dns_stream_init(struct dns_nameserver *ns, struct server *srv)
1250{
1251 struct dns_stream_server *dss = NULL;
1252
1253 dss = calloc(1, sizeof(*dss));
1254 if (!dss) {
1255 ha_alert("memory allocation error initializing dns tcp server '%s'.\n", srv->id);
1256 goto out;
1257 }
1258
1259 dss->srv = srv;
1260 dss->maxconn = srv->maxconn;
1261
1262 dss->ofs_req = ~0; /* init ring offset */
1263 dss->ring_req = ring_new(2*DNS_TCP_MSG_RING_MAX_SIZE);
1264 if (!dss->ring_req) {
1265 ha_alert("memory allocation error initializing the ring for dns tcp server '%s'.\n", srv->id);
1266 goto out;
1267 }
1268 /* Create the task associated to the resolver target handling conns */
1269 if ((dss->task_req = task_new(MAX_THREADS_MASK)) == NULL) {
1270 ha_alert("memory allocation error initializing the ring for dns tcp server '%s'.\n", srv->id);
1271 goto out;
1272 }
1273
1274 /* Update task's parameters */
1275 dss->task_req->process = dns_process_req;
1276 dss->task_req->context = ns;
1277
1278 /* attach the task as reader */
1279 if (!ring_attach(dss->ring_req)) {
1280 /* mark server attached to the ring */
1281 ha_alert("server '%s': too many watchers for ring. this should never happen.\n", srv->id);
1282 goto out;
1283 }
1284
1285 /* Create the task associated to the resolver target handling conns */
1286 if ((dss->task_rsp = task_new(MAX_THREADS_MASK)) == NULL) {
1287 ha_alert("memory allocation error initializing the ring for dns tcp server '%s'.\n", srv->id);
1288 goto out;
1289 }
1290
1291 /* Update task's parameters */
1292 dss->task_rsp->process = dns_process_rsp;
1293 dss->task_rsp->context = ns;
1294
1295 /* Create the task associated to the resolver target handling conns */
1296 if ((dss->task_idle = task_new(MAX_THREADS_MASK)) == NULL) {
1297 ha_alert("memory allocation error initializing the ring for dns tcp server '%s'.\n", srv->id);
1298 goto out;
1299 }
1300
1301 /* Update task's parameters */
1302 dss->task_idle->process = dns_process_idle_exp;
1303 dss->task_idle->context = dss;
1304 dss->task_idle->expire = tick_add(now_ms, 5000);
1305
Ilya Shipitsin0de36ad2021-02-20 00:23:36 +05001306 /* let start the task to free idle conns immediately */
Emeric Brunfd647d52021-02-12 20:03:38 +01001307 task_queue(dss->task_idle);
1308
1309 LIST_INIT(&dss->free_sess);
1310 LIST_INIT(&dss->idle_sess);
1311 LIST_INIT(&dss->wait_sess);
1312 HA_SPIN_INIT(&dss->lock);
1313 ns->stream = dss;
1314 return 0;
1315out:
1316 if (dss && dss->task_rsp)
1317 task_destroy(dss->task_rsp);
1318 if (dss && dss->task_req)
1319 task_destroy(dss->task_req);
1320 if (dss && dss->ring_req)
1321 ring_free(dss->ring_req);
1322
1323 free(dss);
Emeric Brunc9437992021-02-12 19:42:55 +01001324 return -1;
Christopher Faulet67957bd2017-09-27 11:00:59 +02001325}
1326
Emeric Brunc9437992021-02-12 19:42:55 +01001327int init_dns_buffers()
Baptiste Assmann325137d2015-04-13 23:40:55 +02001328{
Emeric Brunc9437992021-02-12 19:42:55 +01001329 dns_msg_trash = malloc(DNS_TCP_MSG_MAX_SIZE);
1330 if (!dns_msg_trash)
1331 return 0;
Baptiste Assmann325137d2015-04-13 23:40:55 +02001332
Emeric Brunc9437992021-02-12 19:42:55 +01001333 return 1;
1334}
Baptiste Assmannc1ce5f32016-05-14 11:26:22 +02001335
Emeric Brunc9437992021-02-12 19:42:55 +01001336void deinit_dns_buffers()
1337{
Willy Tarreau61cfdf42021-02-20 10:46:51 +01001338 ha_free(&dns_msg_trash);
Emeric Brunc9437992021-02-12 19:42:55 +01001339}
Emeric Brund26a6232021-01-04 13:32:20 +01001340
1341REGISTER_PER_THREAD_ALLOC(init_dns_buffers);
1342REGISTER_PER_THREAD_FREE(deinit_dns_buffers);