blob: 712cc564eb5b14d260d4ea0231f694845ffe7cbc [file] [log] [blame]
Baptiste Assmann325137d2015-04-13 23:40:55 +02001/*
2 * Name server resolution
3 *
Willy Tarreau714f3452021-05-09 06:47:26 +02004 * Copyright 2020 HAProxy Technologies
Baptiste Assmann325137d2015-04-13 23:40:55 +02005 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 *
11 */
12
13#include <errno.h>
Baptiste Assmann325137d2015-04-13 23:40:55 +020014#include <stdio.h>
15#include <stdlib.h>
16#include <string.h>
17#include <unistd.h>
18
19#include <sys/types.h>
20
Willy Tarreau122eba92020-06-04 10:15:32 +020021#include <haproxy/action.h>
Willy Tarreau4c7e4b72020-05-27 12:58:42 +020022#include <haproxy/api.h>
Christopher Faulet6b0a0fb2022-04-04 11:29:28 +020023#include <haproxy/applet.h>
Willy Tarreau6be78492020-06-05 00:00:29 +020024#include <haproxy/cfgparse.h>
Willy Tarreauf1d32c42020-06-04 21:07:02 +020025#include <haproxy/channel.h>
Willy Tarreaub2551052020-06-09 09:07:15 +020026#include <haproxy/check.h>
Willy Tarreau83487a82020-06-04 20:19:54 +020027#include <haproxy/cli.h>
Christopher Faulet908628c2022-03-25 16:43:49 +010028#include <haproxy/conn_stream.h>
29#include <haproxy/cs_utils.h>
Willy Tarreau7c18b542020-06-11 09:23:02 +020030#include <haproxy/dgram.h>
Willy Tarreaueb92deb2020-06-04 10:53:16 +020031#include <haproxy/dns.h>
Willy Tarreau8d366972020-05-27 16:10:29 +020032#include <haproxy/errors.h>
Willy Tarreaub2551052020-06-09 09:07:15 +020033#include <haproxy/fd.h>
Willy Tarreauaeed4a82020-06-04 22:01:04 +020034#include <haproxy/log.h>
Emeric Brund26a6232021-01-04 13:32:20 +010035#include <haproxy/ring.h>
Emeric Brunfd647d52021-02-12 20:03:38 +010036#include <haproxy/stream.h>
Willy Tarreau9f9e9fc2021-05-08 13:09:46 +020037#include <haproxy/tools.h>
Baptiste Assmann325137d2015-04-13 23:40:55 +020038
Emeric Brund26a6232021-01-04 13:32:20 +010039static THREAD_LOCAL char *dns_msg_trash;
Baptiste Assmann325137d2015-04-13 23:40:55 +020040
Emeric Brunfd647d52021-02-12 20:03:38 +010041DECLARE_STATIC_POOL(dns_session_pool, "dns_session", sizeof(struct dns_session));
42DECLARE_STATIC_POOL(dns_query_pool, "dns_query", sizeof(struct dns_query));
43DECLARE_STATIC_POOL(dns_msg_buf, "dns_msg_buf", DNS_TCP_MSG_RING_MAX_SIZE);
44
Christopher Faulet67957bd2017-09-27 11:00:59 +020045/* Opens an UDP socket on the namesaver's IP/Port, if required. Returns 0 on
Christopher Faulet1e711be2021-03-04 16:58:35 +010046 * success, -1 otherwise. ns->dgram must be defined.
Baptiste Assmann325137d2015-04-13 23:40:55 +020047 */
Emeric Brund26a6232021-01-04 13:32:20 +010048static int dns_connect_nameserver(struct dns_nameserver *ns)
Baptiste Assmann325137d2015-04-13 23:40:55 +020049{
Christopher Faulet1e711be2021-03-04 16:58:35 +010050 struct dgram_conn *dgram = &ns->dgram->conn;
51 int fd;
Baptiste Assmann325137d2015-04-13 23:40:55 +020052
Christopher Faulet1e711be2021-03-04 16:58:35 +010053 /* Already connected */
54 if (dgram->t.sock.fd != -1)
Emeric Brun526b7922021-02-15 14:28:27 +010055 return 0;
Christopher Faulet1e711be2021-03-04 16:58:35 +010056
57 /* Create an UDP socket and connect it on the nameserver's IP/Port */
58 if ((fd = socket(dgram->addr.to.ss_family, SOCK_DGRAM, IPPROTO_UDP)) == -1) {
59 send_log(NULL, LOG_WARNING,
60 "DNS : section '%s': can't create socket for nameserver '%s'.\n",
61 ns->counters->pid, ns->id);
62 return -1;
63 }
64 if (connect(fd, (struct sockaddr*)&dgram->addr.to, get_addr_len(&dgram->addr.to)) == -1) {
65 send_log(NULL, LOG_WARNING,
66 "DNS : section '%s': can't connect socket for nameserver '%s'.\n",
67 ns->counters->id, ns->id);
68 close(fd);
69 return -1;
Emeric Brunc9437992021-02-12 19:42:55 +010070 }
Emeric Brun526b7922021-02-15 14:28:27 +010071
Christopher Faulet1e711be2021-03-04 16:58:35 +010072 /* Make the socket non blocking */
Willy Tarreau38247432022-04-26 10:24:14 +020073 fd_set_nonblock(fd);
Christopher Faulet1e711be2021-03-04 16:58:35 +010074
75 /* Add the fd in the fd list and update its parameters */
76 dgram->t.sock.fd = fd;
77 fd_insert(fd, dgram, dgram_fd_handler, MAX_THREADS_MASK);
78 fd_want_recv(fd);
79 return 0;
Baptiste Assmann325137d2015-04-13 23:40:55 +020080}
81
Emeric Brund26a6232021-01-04 13:32:20 +010082/* Sends a message to a name server
83 * It returns message length on success
84 * or -1 in error case
85 * 0 is returned in case of output ring buffer is full
86 */
87int dns_send_nameserver(struct dns_nameserver *ns, void *buf, size_t len)
88{
89 int ret = -1;
90
91 if (ns->dgram) {
92 struct dgram_conn *dgram = &ns->dgram->conn;
Emeric Brun314e6ec2022-05-10 11:35:48 +020093 int fd;
Emeric Brund26a6232021-01-04 13:32:20 +010094
Emeric Brun314e6ec2022-05-10 11:35:48 +020095 HA_SPIN_LOCK(DNS_LOCK, &dgram->lock);
96 fd = dgram->t.sock.fd;
97 if (fd == -1) {
98 if (dns_connect_nameserver(ns) == -1) {
99 HA_SPIN_UNLOCK(DNS_LOCK, &dgram->lock);
Emeric Brund26a6232021-01-04 13:32:20 +0100100 return -1;
Emeric Brun314e6ec2022-05-10 11:35:48 +0200101 }
Emeric Brund26a6232021-01-04 13:32:20 +0100102 fd = dgram->t.sock.fd;
103 }
104
105 ret = send(fd, buf, len, 0);
106 if (ret < 0) {
Willy Tarreauacef5e22022-04-25 20:32:15 +0200107 if (errno == EAGAIN || errno == EWOULDBLOCK) {
Emeric Brund26a6232021-01-04 13:32:20 +0100108 struct ist myist;
109
Tim Duesterhus92c696e2021-02-28 16:11:36 +0100110 myist = ist2(buf, len);
Emeric Brund26a6232021-01-04 13:32:20 +0100111 ret = ring_write(ns->dgram->ring_req, DNS_TCP_MSG_MAX_SIZE, NULL, 0, &myist, 1);
112 if (!ret) {
113 ns->counters->snd_error++;
Emeric Brun314e6ec2022-05-10 11:35:48 +0200114 HA_SPIN_UNLOCK(DNS_LOCK, &dgram->lock);
Emeric Brund26a6232021-01-04 13:32:20 +0100115 return -1;
116 }
117 fd_cant_send(fd);
Emeric Brun314e6ec2022-05-10 11:35:48 +0200118 HA_SPIN_UNLOCK(DNS_LOCK, &dgram->lock);
Emeric Brund26a6232021-01-04 13:32:20 +0100119 return ret;
120 }
121 ns->counters->snd_error++;
122 fd_delete(fd);
Emeric Brund26a6232021-01-04 13:32:20 +0100123 dgram->t.sock.fd = -1;
Emeric Brun314e6ec2022-05-10 11:35:48 +0200124 HA_SPIN_UNLOCK(DNS_LOCK, &dgram->lock);
Emeric Brund26a6232021-01-04 13:32:20 +0100125 return -1;
126 }
127 ns->counters->sent++;
Emeric Brun314e6ec2022-05-10 11:35:48 +0200128 HA_SPIN_UNLOCK(DNS_LOCK, &dgram->lock);
Emeric Brund26a6232021-01-04 13:32:20 +0100129 }
Emeric Brunfd647d52021-02-12 20:03:38 +0100130 else if (ns->stream) {
131 struct ist myist;
132
Tim Duesterhus92c696e2021-02-28 16:11:36 +0100133 myist = ist2(buf, len);
Emeric Brunfd647d52021-02-12 20:03:38 +0100134 ret = ring_write(ns->stream->ring_req, DNS_TCP_MSG_MAX_SIZE, NULL, 0, &myist, 1);
135 if (!ret) {
136 ns->counters->snd_error++;
137 return -1;
138 }
139 task_wakeup(ns->stream->task_req, TASK_WOKEN_MSG);
140 return ret;
141 }
Emeric Brund26a6232021-01-04 13:32:20 +0100142
143 return ret;
144}
145
Emeric Brunfd647d52021-02-12 20:03:38 +0100146void dns_session_free(struct dns_session *);
147
Emeric Brund26a6232021-01-04 13:32:20 +0100148/* Receives a dns message
149 * Returns message length
150 * 0 is returned if no more message available
151 * -1 in error case
152 */
153ssize_t dns_recv_nameserver(struct dns_nameserver *ns, void *data, size_t size)
154{
155 ssize_t ret = -1;
156
157 if (ns->dgram) {
158 struct dgram_conn *dgram = &ns->dgram->conn;
Emeric Brun314e6ec2022-05-10 11:35:48 +0200159 int fd;
Emeric Brund26a6232021-01-04 13:32:20 +0100160
Emeric Brun314e6ec2022-05-10 11:35:48 +0200161 HA_SPIN_LOCK(DNS_LOCK, &dgram->lock);
162 fd = dgram->t.sock.fd;
163 if (fd == -1) {
164 HA_SPIN_UNLOCK(DNS_LOCK, &dgram->lock);
Emeric Brund26a6232021-01-04 13:32:20 +0100165 return -1;
Emeric Brun314e6ec2022-05-10 11:35:48 +0200166 }
Emeric Brund26a6232021-01-04 13:32:20 +0100167
168 if ((ret = recv(fd, data, size, 0)) < 0) {
Willy Tarreauacef5e22022-04-25 20:32:15 +0200169 if (errno == EAGAIN || errno == EWOULDBLOCK) {
Emeric Brund26a6232021-01-04 13:32:20 +0100170 fd_cant_recv(fd);
Emeric Brun314e6ec2022-05-10 11:35:48 +0200171 HA_SPIN_UNLOCK(DNS_LOCK, &dgram->lock);
Emeric Brund26a6232021-01-04 13:32:20 +0100172 return 0;
173 }
174 fd_delete(fd);
Emeric Brund26a6232021-01-04 13:32:20 +0100175 dgram->t.sock.fd = -1;
Emeric Brun314e6ec2022-05-10 11:35:48 +0200176 HA_SPIN_UNLOCK(DNS_LOCK, &dgram->lock);
Emeric Brund26a6232021-01-04 13:32:20 +0100177 return -1;
178 }
Emeric Brun314e6ec2022-05-10 11:35:48 +0200179 HA_SPIN_UNLOCK(DNS_LOCK, &dgram->lock);
Emeric Brund26a6232021-01-04 13:32:20 +0100180 }
Emeric Brunfd647d52021-02-12 20:03:38 +0100181 else if (ns->stream) {
182 struct dns_stream_server *dss = ns->stream;
183 struct dns_session *ds;
184
185 HA_SPIN_LOCK(DNS_LOCK, &dss->lock);
186
187 if (!LIST_ISEMPTY(&dss->wait_sess)) {
188 ds = LIST_NEXT(&dss->wait_sess, struct dns_session *, waiter);
Emeric Brunfd647d52021-02-12 20:03:38 +0100189 ret = ds->rx_msg.len < size ? ds->rx_msg.len : size;
190 memcpy(data, ds->rx_msg.area, ret);
191
192 ds->rx_msg.len = 0;
193
Willy Tarreaudde1b442021-10-21 14:33:38 +0200194 /* This barrier is here to ensure that all data is
195 * stored if the appctx detect the elem is out of the
196 * list.
197 */
198 __ha_barrier_store();
199
Emeric Brunfd647d52021-02-12 20:03:38 +0100200 LIST_DEL_INIT(&ds->waiter);
201
202 if (ds->appctx) {
Willy Tarreaudde1b442021-10-21 14:33:38 +0200203 /* This second barrier is here to ensure that
204 * the waked up appctx won't miss that the elem
205 * is removed from the list.
206 */
207 __ha_barrier_store();
208
Ilya Shipitsin0de36ad2021-02-20 00:23:36 +0500209 /* awake appctx because it may have other
Emeric Brunfd647d52021-02-12 20:03:38 +0100210 * message to receive
211 */
212 appctx_wakeup(ds->appctx);
213
214 /* dns_session could already be into free_sess list
215 * so we firstly remove it */
216 LIST_DEL_INIT(&ds->list);
217
218 /* decrease nb_queries to free a slot for a new query on that sess */
219 ds->nb_queries--;
220 if (ds->nb_queries) {
221 /* it remains pipelined unanswered request
222 * into this session but we just decrease
223 * the counter so the session
224 * can not be full of pipelined requests
225 * so we can add if to free_sess list
226 * to receive a new request
227 */
Willy Tarreau2b718102021-04-21 07:32:39 +0200228 LIST_INSERT(&ds->dss->free_sess, &ds->list);
Emeric Brunfd647d52021-02-12 20:03:38 +0100229 }
230 else {
231 /* there is no more pipelined requests
232 * into this session, so we move it
233 * to idle_sess list */
Willy Tarreau2b718102021-04-21 07:32:39 +0200234 LIST_INSERT(&ds->dss->idle_sess, &ds->list);
Emeric Brunfd647d52021-02-12 20:03:38 +0100235
236 /* update the counter of idle sessions */
237 ds->dss->idle_conns++;
238
239 /* Note: this is useless there to update
240 * the max_active_conns since we increase
241 * the idle count */
242 }
243 }
244 else {
245 /* there is no more appctx for this session
246 * it means it is ready to die
247 */
248 dns_session_free(ds);
249 }
250
251
252 }
253
254 HA_SPIN_UNLOCK(DNS_LOCK, &dss->lock);
255 }
Emeric Brund26a6232021-01-04 13:32:20 +0100256
257 return ret;
258}
259
260static void dns_resolve_recv(struct dgram_conn *dgram)
261{
262 struct dns_nameserver *ns;
263 int fd;
264
Emeric Brun314e6ec2022-05-10 11:35:48 +0200265 HA_SPIN_LOCK(DNS_LOCK, &dgram->lock);
266
Emeric Brund26a6232021-01-04 13:32:20 +0100267 fd = dgram->t.sock.fd;
268
269 /* check if ready for reading */
Emeric Brun314e6ec2022-05-10 11:35:48 +0200270 if ((fd == -1) || !fd_recv_ready(fd)) {
271 HA_SPIN_UNLOCK(DNS_LOCK, &dgram->lock);
Emeric Brund26a6232021-01-04 13:32:20 +0100272 return;
Emeric Brun314e6ec2022-05-10 11:35:48 +0200273 }
Emeric Brund26a6232021-01-04 13:32:20 +0100274
275 /* no need to go further if we can't retrieve the nameserver */
276 if ((ns = dgram->owner) == NULL) {
Willy Tarreauf5090652021-04-06 17:23:40 +0200277 _HA_ATOMIC_AND(&fdtab[fd].state, ~(FD_POLL_HUP|FD_POLL_ERR));
Emeric Brund26a6232021-01-04 13:32:20 +0100278 fd_stop_recv(fd);
Emeric Brun314e6ec2022-05-10 11:35:48 +0200279 HA_SPIN_UNLOCK(DNS_LOCK, &dgram->lock);
Emeric Brund26a6232021-01-04 13:32:20 +0100280 return;
281 }
282
Emeric Brun314e6ec2022-05-10 11:35:48 +0200283 HA_SPIN_UNLOCK(DNS_LOCK, &dgram->lock);
284
Emeric Brund26a6232021-01-04 13:32:20 +0100285 ns->process_responses(ns);
286}
287
288/* Called when a dns network socket is ready to send data */
289static void dns_resolve_send(struct dgram_conn *dgram)
290{
291 int fd;
292 struct dns_nameserver *ns;
293 struct ring *ring;
294 struct buffer *buf;
295 uint64_t msg_len;
296 size_t len, cnt, ofs;
297
Emeric Brun314e6ec2022-05-10 11:35:48 +0200298 HA_SPIN_LOCK(DNS_LOCK, &dgram->lock);
299
Emeric Brund26a6232021-01-04 13:32:20 +0100300 fd = dgram->t.sock.fd;
301
302 /* check if ready for sending */
Emeric Brun314e6ec2022-05-10 11:35:48 +0200303 if ((fd == -1) || !fd_send_ready(fd)) {
304 HA_SPIN_UNLOCK(DNS_LOCK, &dgram->lock);
Emeric Brund26a6232021-01-04 13:32:20 +0100305 return;
Emeric Brun314e6ec2022-05-10 11:35:48 +0200306 }
Emeric Brund26a6232021-01-04 13:32:20 +0100307
308 /* no need to go further if we can't retrieve the nameserver */
309 if ((ns = dgram->owner) == NULL) {
Willy Tarreauf5090652021-04-06 17:23:40 +0200310 _HA_ATOMIC_AND(&fdtab[fd].state, ~(FD_POLL_HUP|FD_POLL_ERR));
Emeric Brund26a6232021-01-04 13:32:20 +0100311 fd_stop_send(fd);
Emeric Brun314e6ec2022-05-10 11:35:48 +0200312 HA_SPIN_UNLOCK(DNS_LOCK, &dgram->lock);
Emeric Brund26a6232021-01-04 13:32:20 +0100313 return;
314 }
315
316 ring = ns->dgram->ring_req;
317 buf = &ring->buf;
318
319 HA_RWLOCK_RDLOCK(DNS_LOCK, &ring->lock);
320 ofs = ns->dgram->ofs_req;
321
322 /* explanation for the initialization below: it would be better to do
323 * this in the parsing function but this would occasionally result in
324 * dropped events because we'd take a reference on the oldest message
325 * and keep it while being scheduled. Thus instead let's take it the
326 * first time we enter here so that we have a chance to pass many
327 * existing messages before grabbing a reference to a location. This
328 * value cannot be produced after initialization.
329 */
330 if (unlikely(ofs == ~0)) {
331 ofs = 0;
Willy Tarreau4781b152021-04-06 13:53:36 +0200332 HA_ATOMIC_INC(b_peek(buf, ofs));
Emeric Brund26a6232021-01-04 13:32:20 +0100333 ofs += ring->ofs;
334 }
335
336 /* we were already there, adjust the offset to be relative to
337 * the buffer's head and remove us from the counter.
338 */
339 ofs -= ring->ofs;
340 BUG_ON(ofs >= buf->size);
Willy Tarreau4781b152021-04-06 13:53:36 +0200341 HA_ATOMIC_DEC(b_peek(buf, ofs));
Emeric Brund26a6232021-01-04 13:32:20 +0100342
343 while (ofs + 1 < b_data(buf)) {
344 int ret;
345
346 cnt = 1;
347 len = b_peek_varint(buf, ofs + cnt, &msg_len);
348 if (!len)
349 break;
350 cnt += len;
351 BUG_ON(msg_len + ofs + cnt + 1 > b_data(buf));
352 if (unlikely(msg_len > DNS_TCP_MSG_MAX_SIZE)) {
353 /* too large a message to ever fit, let's skip it */
354 ofs += cnt + msg_len;
355 continue;
356 }
357
358 len = b_getblk(buf, dns_msg_trash, msg_len, ofs + cnt);
359
360 ret = send(fd, dns_msg_trash, len, 0);
361 if (ret < 0) {
Willy Tarreauacef5e22022-04-25 20:32:15 +0200362 if (errno == EAGAIN || errno == EWOULDBLOCK) {
Emeric Brund26a6232021-01-04 13:32:20 +0100363 fd_cant_send(fd);
364 goto out;
365 }
366 ns->counters->snd_error++;
367 fd_delete(fd);
Emeric Brund26a6232021-01-04 13:32:20 +0100368 fd = dgram->t.sock.fd = -1;
369 goto out;
370 }
371 ns->counters->sent++;
372
373 ofs += cnt + len;
374 }
375
376 /* we don't want/need to be waked up any more for sending
377 * because all ring content is sent */
378 fd_stop_send(fd);
379
380out:
381
Willy Tarreau4781b152021-04-06 13:53:36 +0200382 HA_ATOMIC_INC(b_peek(buf, ofs));
Emeric Brund26a6232021-01-04 13:32:20 +0100383 ofs += ring->ofs;
384 ns->dgram->ofs_req = ofs;
385 HA_RWLOCK_RDUNLOCK(DNS_LOCK, &ring->lock);
Emeric Brun314e6ec2022-05-10 11:35:48 +0200386 HA_SPIN_UNLOCK(DNS_LOCK, &dgram->lock);
Emeric Brund26a6232021-01-04 13:32:20 +0100387
388}
389
Emeric Brunc9437992021-02-12 19:42:55 +0100390/* proto_udp callback functions for a DNS resolution */
391struct dgram_data_cb dns_dgram_cb = {
392 .recv = dns_resolve_recv,
393 .send = dns_resolve_send,
394};
Baptiste Assmann325137d2015-04-13 23:40:55 +0200395
Emeric Brunc9437992021-02-12 19:42:55 +0100396int dns_dgram_init(struct dns_nameserver *ns, struct sockaddr_storage *sk)
Baptiste Assmann325137d2015-04-13 23:40:55 +0200397{
Emeric Brunc9437992021-02-12 19:42:55 +0100398 struct dns_dgram_server *dgram;
Baptiste Assmann201c07f2017-05-22 15:17:15 +0200399
Emeric Brunc9437992021-02-12 19:42:55 +0100400 if ((dgram = calloc(1, sizeof(*dgram))) == NULL)
Christopher Faulet67957bd2017-09-27 11:00:59 +0200401 return -1;
Baptiste Assmannc1ce5f32016-05-14 11:26:22 +0200402
Emeric Brunc9437992021-02-12 19:42:55 +0100403 /* Leave dgram partially initialized, no FD attached for
404 * now. */
405 dgram->conn.owner = ns;
406 dgram->conn.data = &dns_dgram_cb;
407 dgram->conn.t.sock.fd = -1;
408 dgram->conn.addr.to = *sk;
Emeric Brun314e6ec2022-05-10 11:35:48 +0200409 HA_SPIN_INIT(&dgram->conn.lock);
Emeric Brunc9437992021-02-12 19:42:55 +0100410 ns->dgram = dgram;
Baptiste Assmannc1ce5f32016-05-14 11:26:22 +0200411
Emeric Brunc9437992021-02-12 19:42:55 +0100412 dgram->ofs_req = ~0; /* init ring offset */
413 dgram->ring_req = ring_new(2*DNS_TCP_MSG_RING_MAX_SIZE);
414 if (!dgram->ring_req) {
415 ha_alert("memory allocation error initializing the ring for nameserver.\n");
416 goto out;
Baptiste Assmannc1ce5f32016-05-14 11:26:22 +0200417 }
418
Emeric Brunc9437992021-02-12 19:42:55 +0100419 /* attach the task as reader */
420 if (!ring_attach(dgram->ring_req)) {
421 /* mark server attached to the ring */
422 ha_alert("nameserver sets too many watchers > 255 on ring. This is a bug and should not happen.\n");
423 goto out;
Baptiste Assmannc1ce5f32016-05-14 11:26:22 +0200424 }
Baptiste Assmannc1ce5f32016-05-14 11:26:22 +0200425 return 0;
Emeric Brunc9437992021-02-12 19:42:55 +0100426out:
427 if (dgram->ring_req)
428 ring_free(dgram->ring_req);
Christopher Fauletd6c6b5f2020-09-08 10:27:24 +0200429
Emeric Brunc9437992021-02-12 19:42:55 +0100430 free(dgram);
Olivier Houchard2ec2db92018-01-08 16:28:57 +0100431
Emeric Brunfd647d52021-02-12 20:03:38 +0100432 return -1;
433}
434
435/*
436 * IO Handler to handle message push to dns tcp server
Willy Tarreau0d626a52022-05-04 20:41:19 +0200437 * It takes its context from appctx->svcctx.
Emeric Brunfd647d52021-02-12 20:03:38 +0100438 */
439static void dns_session_io_handler(struct appctx *appctx)
440{
Christopher Faulet908628c2022-03-25 16:43:49 +0100441 struct conn_stream *cs = appctx->owner;
Willy Tarreau0d626a52022-05-04 20:41:19 +0200442 struct dns_session *ds = appctx->svcctx;
Emeric Brunfd647d52021-02-12 20:03:38 +0100443 struct ring *ring = &ds->ring;
444 struct buffer *buf = &ring->buf;
445 uint64_t msg_len;
446 int available_room;
447 size_t len, cnt, ofs;
448 int ret = 0;
449
450 /* if stopping was requested, close immediately */
451 if (unlikely(stopping))
452 goto close;
453
454 /* we want to be sure to not miss that we have been awaked for a shutdown */
455 __ha_barrier_load();
456
457 /* that means the connection was requested to shutdown
458 * for instance idle expire */
459 if (ds->shutdown)
460 goto close;
461
462 /* an error was detected */
Christopher Faulet908628c2022-03-25 16:43:49 +0100463 if (unlikely(cs_ic(cs)->flags & (CF_WRITE_ERROR|CF_SHUTW)))
Emeric Brunfd647d52021-02-12 20:03:38 +0100464 goto close;
465
466 /* con closed by server side, we will skip data write and drain data from channel */
Christopher Faulet908628c2022-03-25 16:43:49 +0100467 if ((cs_oc(cs)->flags & CF_SHUTW)) {
Emeric Brunfd647d52021-02-12 20:03:38 +0100468 goto read;
469 }
470
471 /* if the connection is not established, inform the stream that we want
472 * to be notified whenever the connection completes.
473 */
Christopher Faulet62e75742022-03-31 09:16:34 +0200474 if (cs_opposite(cs)->state < CS_ST_EST) {
Christopher Fauleta0bdec32022-04-04 07:51:21 +0200475 cs_cant_get(cs);
476 cs_rx_conn_blk(cs);
477 cs_rx_endp_more(cs);
Emeric Brunfd647d52021-02-12 20:03:38 +0100478 return;
479 }
480
481
482 ofs = ds->ofs;
483
484 HA_RWLOCK_WRLOCK(DNS_LOCK, &ring->lock);
485 LIST_DEL_INIT(&appctx->wait_entry);
486 HA_RWLOCK_WRUNLOCK(DNS_LOCK, &ring->lock);
487
488 HA_RWLOCK_RDLOCK(DNS_LOCK, &ring->lock);
489
490 /* explanation for the initialization below: it would be better to do
491 * this in the parsing function but this would occasionally result in
492 * dropped events because we'd take a reference on the oldest message
493 * and keep it while being scheduled. Thus instead let's take it the
494 * first time we enter here so that we have a chance to pass many
495 * existing messages before grabbing a reference to a location. This
496 * value cannot be produced after initialization.
497 */
498 if (unlikely(ofs == ~0)) {
499 ofs = 0;
500
Willy Tarreau4781b152021-04-06 13:53:36 +0200501 HA_ATOMIC_INC(b_peek(buf, ofs));
Emeric Brunfd647d52021-02-12 20:03:38 +0100502 ofs += ring->ofs;
503 }
504
505 /* in this loop, ofs always points to the counter byte that precedes
506 * the message so that we can take our reference there if we have to
507 * stop before the end (ret=0).
508 */
Christopher Faulet62e75742022-03-31 09:16:34 +0200509 if (cs_opposite(cs)->state == CS_ST_EST) {
Emeric Brunfd647d52021-02-12 20:03:38 +0100510 /* we were already there, adjust the offset to be relative to
511 * the buffer's head and remove us from the counter.
512 */
513 ofs -= ring->ofs;
514 BUG_ON(ofs >= buf->size);
Willy Tarreau4781b152021-04-06 13:53:36 +0200515 HA_ATOMIC_DEC(b_peek(buf, ofs));
Emeric Brunfd647d52021-02-12 20:03:38 +0100516
517 ret = 1;
518 while (ofs + 1 < b_data(buf)) {
519 struct dns_query *query;
520 uint16_t original_qid;
521 uint16_t new_qid;
522
523 cnt = 1;
524 len = b_peek_varint(buf, ofs + cnt, &msg_len);
525 if (!len)
526 break;
527 cnt += len;
528 BUG_ON(msg_len + ofs + cnt + 1 > b_data(buf));
529
530 /* retrieve available room on output channel */
Christopher Faulet908628c2022-03-25 16:43:49 +0100531 available_room = channel_recv_max(cs_ic(cs));
Emeric Brunfd647d52021-02-12 20:03:38 +0100532
533 /* tx_msg_offset null means we are at the start of a new message */
534 if (!ds->tx_msg_offset) {
535 uint16_t slen;
536
537 /* check if there is enough room to put message len and query id */
538 if (available_room < sizeof(slen) + sizeof(new_qid)) {
Christopher Fauleta0bdec32022-04-04 07:51:21 +0200539 cs_rx_room_blk(cs);
Emeric Brunfd647d52021-02-12 20:03:38 +0100540 ret = 0;
541 break;
542 }
543
544 /* put msg len into then channel */
545 slen = (uint16_t)msg_len;
546 slen = htons(slen);
Christopher Faulet908628c2022-03-25 16:43:49 +0100547 ci_putblk(cs_ic(cs), (char *)&slen, sizeof(slen));
Emeric Brunfd647d52021-02-12 20:03:38 +0100548 available_room -= sizeof(slen);
549
550 /* backup original query id */
551 len = b_getblk(buf, (char *)&original_qid, sizeof(original_qid), ofs + cnt);
Emeric Brun538bb042021-02-15 13:58:06 +0100552 if (!len) {
553 /* should never happen since messages are atomically
554 * written into ring
555 */
556 ret = 0;
557 break;
558 }
Emeric Brunfd647d52021-02-12 20:03:38 +0100559
560 /* generates new query id */
561 new_qid = ++ds->query_counter;
562 new_qid = htons(new_qid);
563
564 /* put new query id into the channel */
Christopher Faulet908628c2022-03-25 16:43:49 +0100565 ci_putblk(cs_ic(cs), (char *)&new_qid, sizeof(new_qid));
Emeric Brunfd647d52021-02-12 20:03:38 +0100566 available_room -= sizeof(new_qid);
567
568 /* keep query id mapping */
569
570 query = pool_alloc(dns_query_pool);
571 if (query) {
572 query->qid.key = new_qid;
573 query->original_qid = original_qid;
574 query->expire = tick_add(now_ms, 5000);
575 LIST_INIT(&query->list);
576 if (LIST_ISEMPTY(&ds->queries)) {
577 /* enable task to handle expire */
578 ds->task_exp->expire = query->expire;
579 /* ensure this will be executed by the same
580 * thread than ds_session_release
581 * to ensure session_release is free
582 * to destroy the task */
583 task_queue(ds->task_exp);
584 }
Willy Tarreau2b718102021-04-21 07:32:39 +0200585 LIST_APPEND(&ds->queries, &query->list);
Emeric Brunfd647d52021-02-12 20:03:38 +0100586 eb32_insert(&ds->query_ids, &query->qid);
587 ds->onfly_queries++;
588 }
589
590 /* update the tx_offset to handle output in 16k streams */
591 ds->tx_msg_offset = sizeof(original_qid);
592
593 }
594
595 /* check if it remains available room on output chan */
596 if (unlikely(!available_room)) {
Christopher Fauleta0bdec32022-04-04 07:51:21 +0200597 cs_rx_room_blk(cs);
Emeric Brunfd647d52021-02-12 20:03:38 +0100598 ret = 0;
599 break;
600 }
601
602 chunk_reset(&trash);
603 if ((msg_len - ds->tx_msg_offset) > available_room) {
604 /* remaining msg data is too large to be written in output channel at one time */
605
606 len = b_getblk(buf, trash.area, available_room, ofs + cnt + ds->tx_msg_offset);
607
608 /* update offset to complete mesg forwarding later */
609 ds->tx_msg_offset += len;
610 }
611 else {
612 /* remaining msg data can be written in output channel at one time */
613 len = b_getblk(buf, trash.area, msg_len - ds->tx_msg_offset, ofs + cnt + ds->tx_msg_offset);
614
615 /* reset tx_msg_offset to mark forward fully processed */
616 ds->tx_msg_offset = 0;
617 }
618 trash.data += len;
619
Christopher Faulet908628c2022-03-25 16:43:49 +0100620 if (ci_putchk(cs_ic(cs), &trash) == -1) {
Emeric Brun743afee2021-02-15 14:12:06 +0100621 /* should never happen since we
622 * check available_room is large
Ilya Shipitsin0de36ad2021-02-20 00:23:36 +0500623 * enough here.
Emeric Brun743afee2021-02-15 14:12:06 +0100624 */
Christopher Fauleta0bdec32022-04-04 07:51:21 +0200625 cs_rx_room_blk(cs);
Emeric Brun743afee2021-02-15 14:12:06 +0100626 ret = 0;
627 break;
628 }
Emeric Brunfd647d52021-02-12 20:03:38 +0100629
630 if (ds->tx_msg_offset) {
631 /* msg was not fully processed, we must be awake to drain pending data */
632
Christopher Fauleta0bdec32022-04-04 07:51:21 +0200633 cs_rx_room_blk(cs);
Emeric Brunfd647d52021-02-12 20:03:38 +0100634 ret = 0;
635 break;
636 }
637 /* switch to next message */
638 ofs += cnt + msg_len;
639 }
640
Willy Tarreau4781b152021-04-06 13:53:36 +0200641 HA_ATOMIC_INC(b_peek(buf, ofs));
Emeric Brunfd647d52021-02-12 20:03:38 +0100642 ofs += ring->ofs;
643 ds->ofs = ofs;
644 }
645 HA_RWLOCK_RDUNLOCK(DNS_LOCK, &ring->lock);
646
647 if (ret) {
648 /* let's be woken up once new request to write arrived */
649 HA_RWLOCK_WRLOCK(DNS_LOCK, &ring->lock);
Willy Tarreau62e467c2021-10-20 11:02:13 +0200650 BUG_ON(LIST_INLIST(&appctx->wait_entry));
Willy Tarreau2b718102021-04-21 07:32:39 +0200651 LIST_APPEND(&ring->waiters, &appctx->wait_entry);
Emeric Brunfd647d52021-02-12 20:03:38 +0100652 HA_RWLOCK_WRUNLOCK(DNS_LOCK, &ring->lock);
Christopher Fauleta0bdec32022-04-04 07:51:21 +0200653 cs_rx_endp_done(cs);
Emeric Brunfd647d52021-02-12 20:03:38 +0100654 }
655
656read:
657
Ilya Shipitsin0de36ad2021-02-20 00:23:36 +0500658 /* if session is not a waiter it means there is no committed
Emeric Brunfd647d52021-02-12 20:03:38 +0100659 * message into rx_buf and we are free to use it
660 * Note: we need a load barrier here to not miss the
661 * delete from the list
662 */
Emeric Brun70455902021-10-20 10:49:53 +0200663
Willy Tarreaudde1b442021-10-21 14:33:38 +0200664 __ha_barrier_load();
665 if (!LIST_INLIST_ATOMIC(&ds->waiter)) {
Emeric Brunfd647d52021-02-12 20:03:38 +0100666 while (1) {
667 uint16_t query_id;
668 struct eb32_node *eb;
669 struct dns_query *query;
670
671 if (!ds->rx_msg.len) {
672 /* next message len is not fully available into the channel */
Christopher Faulet908628c2022-03-25 16:43:49 +0100673 if (co_data(cs_oc(cs)) < 2)
Emeric Brunfd647d52021-02-12 20:03:38 +0100674 break;
675
676 /* retrieve message len */
Christopher Faulet908628c2022-03-25 16:43:49 +0100677 co_getblk(cs_oc(cs), (char *)&msg_len, 2, 0);
Emeric Brunfd647d52021-02-12 20:03:38 +0100678
679 /* mark as consumed */
Christopher Faulet908628c2022-03-25 16:43:49 +0100680 co_skip(cs_oc(cs), 2);
Emeric Brunfd647d52021-02-12 20:03:38 +0100681
682 /* store message len */
683 ds->rx_msg.len = ntohs(msg_len);
684 }
685
Christopher Faulet908628c2022-03-25 16:43:49 +0100686 if (!co_data(cs_oc(cs))) {
Emeric Brunfd647d52021-02-12 20:03:38 +0100687 /* we need more data but nothing is available */
688 break;
689 }
690
Christopher Faulet908628c2022-03-25 16:43:49 +0100691 if (co_data(cs_oc(cs)) + ds->rx_msg.offset < ds->rx_msg.len) {
Emeric Brunfd647d52021-02-12 20:03:38 +0100692 /* message only partially available */
693
694 /* read available data */
Christopher Faulet908628c2022-03-25 16:43:49 +0100695 co_getblk(cs_oc(cs), ds->rx_msg.area + ds->rx_msg.offset, co_data(cs_oc(cs)), 0);
Emeric Brunfd647d52021-02-12 20:03:38 +0100696
697 /* update message offset */
Christopher Faulet908628c2022-03-25 16:43:49 +0100698 ds->rx_msg.offset += co_data(cs_oc(cs));
Emeric Brunfd647d52021-02-12 20:03:38 +0100699
700 /* consume all pending data from the channel */
Christopher Faulet908628c2022-03-25 16:43:49 +0100701 co_skip(cs_oc(cs), co_data(cs_oc(cs)));
Emeric Brunfd647d52021-02-12 20:03:38 +0100702
703 /* we need to wait for more data */
704 break;
705 }
706
Ilya Shipitsin0de36ad2021-02-20 00:23:36 +0500707 /* enough data is available into the channel to read the message until the end */
Emeric Brunfd647d52021-02-12 20:03:38 +0100708
709 /* read from the channel until the end of the message */
Christopher Faulet908628c2022-03-25 16:43:49 +0100710 co_getblk(cs_oc(cs), ds->rx_msg.area + ds->rx_msg.offset, ds->rx_msg.len - ds->rx_msg.offset, 0);
Emeric Brunfd647d52021-02-12 20:03:38 +0100711
712 /* consume all data until the end of the message from the channel */
Christopher Faulet908628c2022-03-25 16:43:49 +0100713 co_skip(cs_oc(cs), ds->rx_msg.len - ds->rx_msg.offset);
Emeric Brunfd647d52021-02-12 20:03:38 +0100714
715 /* reset reader offset to 0 for next message reand */
716 ds->rx_msg.offset = 0;
717
718 /* try remap query id to original */
719 memcpy(&query_id, ds->rx_msg.area, sizeof(query_id));
720 eb = eb32_lookup(&ds->query_ids, query_id);
721 if (!eb) {
722 /* query id not found means we have an unknown corresponding
723 * request, perhaps server's bug or or the query reached
724 * timeout
725 */
726 ds->rx_msg.len = 0;
727 continue;
728 }
729
730 /* re-map the original query id set by the requester */
731 query = eb32_entry(eb, struct dns_query, qid);
732 memcpy(ds->rx_msg.area, &query->original_qid, sizeof(query->original_qid));
733
734 /* remove query ids mapping from pending queries list/tree */
735 eb32_delete(&query->qid);
Willy Tarreau2b718102021-04-21 07:32:39 +0200736 LIST_DELETE(&query->list);
Emeric Brunfd647d52021-02-12 20:03:38 +0100737 pool_free(dns_query_pool, query);
738 ds->onfly_queries--;
739
Emeric Brunfd647d52021-02-12 20:03:38 +0100740 /* the dns_session is also added in queue of the
741 * wait_sess list where the task processing
742 * response will pop available responses
743 */
Willy Tarreaudde1b442021-10-21 14:33:38 +0200744 HA_SPIN_LOCK(DNS_LOCK, &ds->dss->lock);
745
Willy Tarreau62e467c2021-10-20 11:02:13 +0200746 BUG_ON(LIST_INLIST(&ds->waiter));
Willy Tarreau2b718102021-04-21 07:32:39 +0200747 LIST_APPEND(&ds->dss->wait_sess, &ds->waiter);
Emeric Brunfd647d52021-02-12 20:03:38 +0100748
Willy Tarreaudde1b442021-10-21 14:33:38 +0200749 HA_SPIN_UNLOCK(DNS_LOCK, &ds->dss->lock);
750
Emeric Brunfd647d52021-02-12 20:03:38 +0100751 /* awake the task processing the responses */
752 task_wakeup(ds->dss->task_rsp, TASK_WOKEN_INIT);
753
754 break;
755 }
756
Willy Tarreau2b718102021-04-21 07:32:39 +0200757 if (!LIST_INLIST(&ds->waiter)) {
Emeric Brunfd647d52021-02-12 20:03:38 +0100758 /* there is no more pending data to read and the con was closed by the server side */
Christopher Faulet908628c2022-03-25 16:43:49 +0100759 if (!co_data(cs_oc(cs)) && (cs_oc(cs)->flags & CF_SHUTW)) {
Emeric Brunfd647d52021-02-12 20:03:38 +0100760 goto close;
761 }
762 }
763
764 }
765
Emeric Brunfd647d52021-02-12 20:03:38 +0100766 return;
767close:
Christopher Fauletda098e62022-03-31 17:44:45 +0200768 cs_shutw(cs);
769 cs_shutr(cs);
Christopher Faulet908628c2022-03-25 16:43:49 +0100770 cs_ic(cs)->flags |= CF_READ_NULL;
Emeric Brunfd647d52021-02-12 20:03:38 +0100771}
772
773void dns_queries_flush(struct dns_session *ds)
774{
775 struct dns_query *query, *queryb;
776
777 list_for_each_entry_safe(query, queryb, &ds->queries, list) {
778 eb32_delete(&query->qid);
Willy Tarreau2b718102021-04-21 07:32:39 +0200779 LIST_DELETE(&query->list);
Emeric Brunfd647d52021-02-12 20:03:38 +0100780 pool_free(dns_query_pool, query);
781 }
782}
783
784void dns_session_free(struct dns_session *ds)
785{
786 if (ds->rx_msg.area)
787 pool_free(dns_msg_buf, ds->rx_msg.area);
788 if (ds->tx_ring_area)
789 pool_free(dns_msg_buf, ds->tx_ring_area);
790 if (ds->task_exp)
791 task_destroy(ds->task_exp);
792
793 dns_queries_flush(ds);
794
Emeric Brund20dc212021-10-19 15:40:10 +0200795 /* Ensure to remove this session from external lists
796 * Note: we are under the lock of dns_stream_server
797 * which own the heads of those lists.
798 */
799 LIST_DEL_INIT(&ds->waiter);
800 LIST_DEL_INIT(&ds->list);
801
Emeric Brunfd647d52021-02-12 20:03:38 +0100802 ds->dss->cur_conns--;
803 /* Note: this is useless to update
804 * max_active_conns here because
805 * we decrease the value
806 */
Willy Tarreau62e467c2021-10-20 11:02:13 +0200807
808 BUG_ON(!LIST_ISEMPTY(&ds->list));
809 BUG_ON(!LIST_ISEMPTY(&ds->waiter));
810 BUG_ON(!LIST_ISEMPTY(&ds->queries));
811 BUG_ON(!LIST_ISEMPTY(&ds->ring.waiters));
812 BUG_ON(!eb_is_empty(&ds->query_ids));
Emeric Brunfd647d52021-02-12 20:03:38 +0100813 pool_free(dns_session_pool, ds);
814}
815
816static struct appctx *dns_session_create(struct dns_session *ds);
817
818/*
819 * Function to release a DNS tcp session
820 */
821static void dns_session_release(struct appctx *appctx)
822{
Willy Tarreau0d626a52022-05-04 20:41:19 +0200823 struct dns_session *ds = appctx->svcctx;
Willy Tarreaue3e648c2021-02-24 17:38:46 +0100824 struct dns_stream_server *dss __maybe_unused;
Emeric Brunfd647d52021-02-12 20:03:38 +0100825
826 if (!ds)
827 return;
828
Willy Tarreaub56a8782021-10-20 14:38:43 +0200829 /* We do not call ring_appctx_detach here
830 * because we want to keep readers counters
831 * to retry a conn with a different appctx.
832 */
833 HA_RWLOCK_WRLOCK(DNS_LOCK, &ds->ring.lock);
834 LIST_DEL_INIT(&appctx->wait_entry);
835 HA_RWLOCK_WRUNLOCK(DNS_LOCK, &ds->ring.lock);
836
Emeric Brunfd647d52021-02-12 20:03:38 +0100837 dss = ds->dss;
838
839 HA_SPIN_LOCK(DNS_LOCK, &dss->lock);
840 LIST_DEL_INIT(&ds->list);
841
842 if (stopping) {
843 dns_session_free(ds);
844 HA_SPIN_UNLOCK(DNS_LOCK, &dss->lock);
845 return;
846 }
847
848 if (!ds->nb_queries) {
849 /* this is an idle session */
850 /* Note: this is useless to update max_active_sess
851 * here because we decrease idle_conns but
852 * dns_session_free decrease curconns
853 */
854
855 ds->dss->idle_conns--;
856 dns_session_free(ds);
857 HA_SPIN_UNLOCK(DNS_LOCK, &dss->lock);
858 return;
859 }
860
861 if (ds->onfly_queries == ds->nb_queries) {
862 /* the session can be released because
863 * it means that all queries AND
864 * responses are in fly */
865 dns_session_free(ds);
866 HA_SPIN_UNLOCK(DNS_LOCK, &dss->lock);
867 return;
868 }
869
Emeric Brunfd647d52021-02-12 20:03:38 +0100870 /* if there is no pending complete response
871 * message, ensure to reset
872 * message offsets if the session
873 * was closed with an incomplete pending response
874 */
Willy Tarreau2b718102021-04-21 07:32:39 +0200875 if (!LIST_INLIST(&ds->waiter))
Emeric Brunfd647d52021-02-12 20:03:38 +0100876 ds->rx_msg.len = ds->rx_msg.offset = 0;
877
878 /* we flush pending sent queries because we never
879 * have responses
880 */
881 ds->nb_queries -= ds->onfly_queries;
882 dns_queries_flush(ds);
883
884 /* reset offset to be sure to start from message start */
885 ds->tx_msg_offset = 0;
886
887 /* here the ofs and the attached counter
888 * are kept unchanged
889 */
890
891 /* Create a new appctx, We hope we can
892 * create from the release callback! */
893 ds->appctx = dns_session_create(ds);
894 if (!ds->appctx) {
895 dns_session_free(ds);
896 HA_SPIN_UNLOCK(DNS_LOCK, &dss->lock);
897 return;
898 }
899
900 if (ds->nb_queries < DNS_STREAM_MAX_PIPELINED_REQ)
Willy Tarreau2b718102021-04-21 07:32:39 +0200901 LIST_INSERT(&ds->dss->free_sess, &ds->list);
Emeric Brunfd647d52021-02-12 20:03:38 +0100902
903 HA_SPIN_UNLOCK(DNS_LOCK, &dss->lock);
904}
905
906/* DNS tcp session applet */
907static struct applet dns_session_applet = {
908 .obj_type = OBJ_TYPE_APPLET,
909 .name = "<STRMDNS>", /* used for logging */
910 .fct = dns_session_io_handler,
911 .release = dns_session_release,
912};
913
914/*
915 * Function used to create an appctx for a DNS session
Willy Tarreau0d626a52022-05-04 20:41:19 +0200916 * It sets its context into appctx->svcctx.
Emeric Brunfd647d52021-02-12 20:03:38 +0100917 */
918static struct appctx *dns_session_create(struct dns_session *ds)
919{
920 struct appctx *appctx;
921 struct session *sess;
Christopher Faulet13a35e52021-12-20 15:34:16 +0100922 struct conn_stream *cs;
Emeric Brunfd647d52021-02-12 20:03:38 +0100923 struct stream *s;
924 struct applet *applet = &dns_session_applet;
Christopher Fauleta9e8b392022-03-23 11:01:09 +0100925 struct sockaddr_storage *addr = NULL;
Emeric Brunfd647d52021-02-12 20:03:38 +0100926
Christopher Faulet9ec2f4d2022-03-23 15:15:29 +0100927 appctx = appctx_new(applet, NULL);
Christopher Faulet2479e5f2022-01-19 14:50:11 +0100928 if (!appctx)
Christopher Fauleta9e8b392022-03-23 11:01:09 +0100929 goto out_close;
Willy Tarreau0d626a52022-05-04 20:41:19 +0200930 appctx->svcctx = (void *)ds;
Emeric Brunfd647d52021-02-12 20:03:38 +0100931
932 sess = session_new(ds->dss->srv->proxy, NULL, &appctx->obj_type);
933 if (!sess) {
Christopher Faulet13a35e52021-12-20 15:34:16 +0100934 ha_alert("out of memory in dns_session_create().\n");
Emeric Brunfd647d52021-02-12 20:03:38 +0100935 goto out_free_appctx;
936 }
937
Christopher Fauleta9e8b392022-03-23 11:01:09 +0100938 if (!sockaddr_alloc(&addr, &ds->dss->srv->addr, sizeof(ds->dss->srv->addr)))
Christopher Faulet2479e5f2022-01-19 14:50:11 +0100939 goto out_free_sess;
Christopher Fauleta9e8b392022-03-23 11:01:09 +0100940
Christopher Faulet9ec2f4d2022-03-23 15:15:29 +0100941 cs = cs_new_from_applet(appctx->endp, sess, &BUF_NULL);
Christopher Fauleta9e8b392022-03-23 11:01:09 +0100942 if (!cs) {
943 ha_alert("Failed to initialize stream in dns_session_create().\n");
Christopher Fauleta9e8b392022-03-23 11:01:09 +0100944 goto out_free_addr;
Christopher Faulet13a35e52021-12-20 15:34:16 +0100945 }
946
Christopher Fauleta9e8b392022-03-23 11:01:09 +0100947 s = DISGUISE(cs_strm(cs));
Christopher Faulet8da67aa2022-03-29 17:53:09 +0200948 s->csb->dst = addr;
Christopher Faulet8abe7122022-03-30 15:10:18 +0200949 s->csb->flags |= CS_FL_NOLINGER;
Emeric Brunfd647d52021-02-12 20:03:38 +0100950 s->target = &ds->dss->srv->obj_type;
Willy Tarreau03bd3952022-05-02 16:36:47 +0200951 s->flags = SF_ASSIGNED;
Emeric Brunfd647d52021-02-12 20:03:38 +0100952
953 s->do_log = NULL;
954 s->uniq_id = 0;
955
956 s->res.flags |= CF_READ_DONTWAIT;
957 /* for rto and rex to eternity to not expire on idle recv:
958 * We are using a syslog server.
959 */
960 s->res.rto = TICK_ETERNITY;
961 s->res.rex = TICK_ETERNITY;
962 ds->appctx = appctx;
Emeric Brunfd647d52021-02-12 20:03:38 +0100963 return appctx;
964
965 /* Error unrolling */
Christopher Fauleta9e8b392022-03-23 11:01:09 +0100966 out_free_addr:
967 sockaddr_free(&addr);
Emeric Brunfd647d52021-02-12 20:03:38 +0100968 out_free_sess:
969 session_free(sess);
970 out_free_appctx:
971 appctx_free(appctx);
972 out_close:
973 return NULL;
974}
975
976/* Task processing expiration of unresponded queries, this one is supposed
977 * to be stuck on the same thread than the appctx handler
978 */
Willy Tarreau144f84a2021-03-02 16:09:26 +0100979static struct task *dns_process_query_exp(struct task *t, void *context, unsigned int state)
Emeric Brunfd647d52021-02-12 20:03:38 +0100980{
981 struct dns_session *ds = (struct dns_session *)context;
982 struct dns_query *query, *queryb;
983
984 t->expire = TICK_ETERNITY;
985
986 list_for_each_entry_safe(query, queryb, &ds->queries, list) {
987 if (tick_is_expired(query->expire, now_ms)) {
988 eb32_delete(&query->qid);
Willy Tarreau2b718102021-04-21 07:32:39 +0200989 LIST_DELETE(&query->list);
Emeric Brunfd647d52021-02-12 20:03:38 +0100990 pool_free(dns_query_pool, query);
991 ds->onfly_queries--;
992 }
993 else {
994 t->expire = query->expire;
995 break;
996 }
997 }
998
999 return t;
1000}
1001
1002/* Task processing expiration of idle sessions */
Willy Tarreau144f84a2021-03-02 16:09:26 +01001003static struct task *dns_process_idle_exp(struct task *t, void *context, unsigned int state)
Emeric Brunfd647d52021-02-12 20:03:38 +01001004{
1005 struct dns_stream_server *dss = (struct dns_stream_server *)context;
1006 struct dns_session *ds, *dsb;
1007 int target = 0;
1008 int cur_active_conns;
1009
1010 HA_SPIN_LOCK(DNS_LOCK, &dss->lock);
1011
1012
1013 cur_active_conns = dss->cur_conns - dss->idle_conns;
1014 if (cur_active_conns > dss->max_active_conns)
1015 dss->max_active_conns = cur_active_conns;
1016
1017 target = (dss->max_active_conns - cur_active_conns) / 2;
1018 list_for_each_entry_safe(ds, dsb, &dss->idle_sess, list) {
1019 if (!target)
1020 break;
1021
1022 /* remove conn to pending list to ensure it won't be reused */
1023 LIST_DEL_INIT(&ds->list);
1024
1025 /* force session shutdown */
1026 ds->shutdown = 1;
1027
Ilya Shipitsin0de36ad2021-02-20 00:23:36 +05001028 /* to be sure that the appctx won't miss shutdown */
Emeric Brunfd647d52021-02-12 20:03:38 +01001029 __ha_barrier_store();
1030
1031 /* wake appctx to perform the shutdown */
1032 appctx_wakeup(ds->appctx);
1033 }
1034
1035 /* reset max to current active conns */
1036 dss->max_active_conns = cur_active_conns;
1037
1038 HA_SPIN_UNLOCK(DNS_LOCK, &dss->lock);
1039
1040 t->expire = tick_add(now_ms, 5000);
1041
1042 return t;
1043}
1044
1045struct dns_session *dns_session_new(struct dns_stream_server *dss)
1046{
1047 struct dns_session *ds;
1048
1049 if (dss->maxconn && (dss->maxconn <= dss->cur_conns))
1050 return NULL;
1051
1052 ds = pool_alloc(dns_session_pool);
1053 if (!ds)
1054 return NULL;
1055
1056 ds->ofs = ~0;
1057 ds->dss = dss;
1058 LIST_INIT(&ds->list);
1059 LIST_INIT(&ds->queries);
1060 LIST_INIT(&ds->waiter);
1061 ds->rx_msg.offset = ds->rx_msg.len = 0;
1062 ds->rx_msg.area = NULL;
1063 ds->tx_ring_area = NULL;
1064 ds->task_exp = NULL;
1065 ds->appctx = NULL;
1066 ds->shutdown = 0;
1067 ds->nb_queries = 0;
1068 ds->query_ids = EB_ROOT_UNIQUE;
1069 ds->rx_msg.area = pool_alloc(dns_msg_buf);
1070 if (!ds->rx_msg.area)
1071 goto error;
1072
1073 ds->tx_ring_area = pool_alloc(dns_msg_buf);
1074 if (!ds->tx_ring_area)
1075 goto error;
1076
1077 ring_init(&ds->ring, ds->tx_ring_area, DNS_TCP_MSG_RING_MAX_SIZE);
Christopher Faulet1a1b6742021-03-04 16:53:27 +01001078 /* never fail because it is the first watcher attached to the ring */
1079 DISGUISE(ring_attach(&ds->ring));
Emeric Brunfd647d52021-02-12 20:03:38 +01001080
Willy Tarreaubeeabf52021-10-01 18:23:30 +02001081 if ((ds->task_exp = task_new_here()) == NULL)
Emeric Brunfd647d52021-02-12 20:03:38 +01001082 goto error;
1083
1084 ds->task_exp->process = dns_process_query_exp;
1085 ds->task_exp->context = ds;
1086
1087 ds->appctx = dns_session_create(ds);
1088 if (!ds->appctx)
1089 goto error;
1090
1091 dss->cur_conns++;
1092
1093 return ds;
1094
1095error:
1096 if (ds->task_exp)
1097 task_destroy(ds->task_exp);
1098 if (ds->rx_msg.area)
1099 pool_free(dns_msg_buf, ds->rx_msg.area);
1100 if (ds->tx_ring_area)
1101 pool_free(dns_msg_buf, ds->tx_ring_area);
1102
1103 pool_free(dns_session_pool, ds);
1104
1105 return NULL;
1106}
1107
1108/*
1109 * Task used to consume pending messages from nameserver ring
1110 * and forward them to dns_session ring.
1111 * Note: If no slot found a new dns_session is allocated
1112 */
Willy Tarreau144f84a2021-03-02 16:09:26 +01001113static struct task *dns_process_req(struct task *t, void *context, unsigned int state)
Emeric Brunfd647d52021-02-12 20:03:38 +01001114{
1115 struct dns_nameserver *ns = (struct dns_nameserver *)context;
1116 struct dns_stream_server *dss = ns->stream;
1117 struct ring *ring = dss->ring_req;
1118 struct buffer *buf = &ring->buf;
1119 uint64_t msg_len;
1120 size_t len, cnt, ofs;
1121 struct dns_session *ds, *ads;
1122 HA_SPIN_LOCK(DNS_LOCK, &dss->lock);
1123
1124 ofs = dss->ofs_req;
1125
1126 HA_RWLOCK_RDLOCK(DNS_LOCK, &ring->lock);
1127
1128 /* explanation for the initialization below: it would be better to do
1129 * this in the parsing function but this would occasionally result in
1130 * dropped events because we'd take a reference on the oldest message
1131 * and keep it while being scheduled. Thus instead let's take it the
1132 * first time we enter here so that we have a chance to pass many
1133 * existing messages before grabbing a reference to a location. This
1134 * value cannot be produced after initialization.
1135 */
1136 if (unlikely(ofs == ~0)) {
1137 ofs = 0;
Willy Tarreau4781b152021-04-06 13:53:36 +02001138 HA_ATOMIC_INC(b_peek(buf, ofs));
Emeric Brunfd647d52021-02-12 20:03:38 +01001139 ofs += ring->ofs;
1140 }
1141
1142 /* we were already there, adjust the offset to be relative to
1143 * the buffer's head and remove us from the counter.
1144 */
1145 ofs -= ring->ofs;
1146 BUG_ON(ofs >= buf->size);
Willy Tarreau4781b152021-04-06 13:53:36 +02001147 HA_ATOMIC_DEC(b_peek(buf, ofs));
Emeric Brunfd647d52021-02-12 20:03:38 +01001148
1149 while (ofs + 1 < b_data(buf)) {
1150 struct ist myist;
1151
1152 cnt = 1;
1153 len = b_peek_varint(buf, ofs + cnt, &msg_len);
1154 if (!len)
1155 break;
1156 cnt += len;
1157 BUG_ON(msg_len + ofs + cnt + 1 > b_data(buf));
1158 if (unlikely(msg_len > DNS_TCP_MSG_MAX_SIZE)) {
1159 /* too large a message to ever fit, let's skip it */
1160 ofs += cnt + msg_len;
1161 continue;
1162 }
1163
1164 len = b_getblk(buf, dns_msg_trash, msg_len, ofs + cnt);
1165
Tim Duesterhus92c696e2021-02-28 16:11:36 +01001166 myist = ist2(dns_msg_trash, len);
Emeric Brunfd647d52021-02-12 20:03:38 +01001167
1168 ads = NULL;
Ilya Shipitsin0de36ad2021-02-20 00:23:36 +05001169 /* try to push request into active sess with free slot */
Emeric Brunfd647d52021-02-12 20:03:38 +01001170 if (!LIST_ISEMPTY(&dss->free_sess)) {
1171 ds = LIST_NEXT(&dss->free_sess, struct dns_session *, list);
1172
1173 if (ring_write(&ds->ring, DNS_TCP_MSG_MAX_SIZE, NULL, 0, &myist, 1) > 0) {
1174 ds->nb_queries++;
1175 if (ds->nb_queries >= DNS_STREAM_MAX_PIPELINED_REQ)
1176 LIST_DEL_INIT(&ds->list);
1177 ads = ds;
1178 }
1179 else {
1180 /* it means we were unable to put a request in this slot,
1181 * it may be close to be full so we put it at the end
1182 * of free conn list */
1183 LIST_DEL_INIT(&ds->list);
Willy Tarreau2b718102021-04-21 07:32:39 +02001184 LIST_APPEND(&dss->free_sess, &ds->list);
Emeric Brunfd647d52021-02-12 20:03:38 +01001185 }
1186 }
1187
1188 if (!ads) {
Ilya Shipitsin0de36ad2021-02-20 00:23:36 +05001189 /* try to push request into idle, this one should have enough free space */
Emeric Brunfd647d52021-02-12 20:03:38 +01001190 if (!LIST_ISEMPTY(&dss->idle_sess)) {
1191 ds = LIST_NEXT(&dss->idle_sess, struct dns_session *, list);
1192
1193 /* ring is empty so this ring_write should never fail */
1194 ring_write(&ds->ring, DNS_TCP_MSG_MAX_SIZE, NULL, 0, &myist, 1);
1195 ds->nb_queries++;
1196 LIST_DEL_INIT(&ds->list);
1197
1198 ds->dss->idle_conns--;
1199
1200 /* we may have to update the max_active_conns */
1201 if (ds->dss->max_active_conns < ds->dss->cur_conns - ds->dss->idle_conns)
1202 ds->dss->max_active_conns = ds->dss->cur_conns - ds->dss->idle_conns;
1203
1204 /* since we may unable to find a free list to handle
1205 * this request, this request may be large and fill
1206 * the ring buffer so we prefer to put at the end of free
1207 * list. */
Willy Tarreau2b718102021-04-21 07:32:39 +02001208 LIST_APPEND(&dss->free_sess, &ds->list);
Emeric Brunfd647d52021-02-12 20:03:38 +01001209 ads = ds;
1210 }
1211 }
1212
Ilya Shipitsin0de36ad2021-02-20 00:23:36 +05001213 /* we didn't find a session available with large enough room */
Emeric Brunfd647d52021-02-12 20:03:38 +01001214 if (!ads) {
1215 /* allocate a new session */
1216 ads = dns_session_new(dss);
1217 if (ads) {
1218 /* ring is empty so this ring_write should never fail */
1219 ring_write(&ads->ring, DNS_TCP_MSG_MAX_SIZE, NULL, 0, &myist, 1);
1220 ads->nb_queries++;
Willy Tarreau2b718102021-04-21 07:32:39 +02001221 LIST_INSERT(&dss->free_sess, &ads->list);
Emeric Brunfd647d52021-02-12 20:03:38 +01001222 }
1223 else
1224 ns->counters->snd_error++;
1225 }
1226
1227 if (ads)
1228 ns->counters->sent++;
1229
1230 ofs += cnt + len;
1231 }
1232
Willy Tarreau4781b152021-04-06 13:53:36 +02001233 HA_ATOMIC_INC(b_peek(buf, ofs));
Emeric Brunfd647d52021-02-12 20:03:38 +01001234 ofs += ring->ofs;
1235 dss->ofs_req = ofs;
1236 HA_RWLOCK_RDUNLOCK(DNS_LOCK, &ring->lock);
1237
1238
1239 HA_SPIN_UNLOCK(DNS_LOCK, &dss->lock);
1240 return t;
1241}
1242
1243/*
1244 * Task used to consume response
1245 * Note: upper layer callback is called
1246 */
Willy Tarreau144f84a2021-03-02 16:09:26 +01001247static struct task *dns_process_rsp(struct task *t, void *context, unsigned int state)
Emeric Brunfd647d52021-02-12 20:03:38 +01001248{
1249 struct dns_nameserver *ns = (struct dns_nameserver *)context;
1250
1251 ns->process_responses(ns);
1252
1253 return t;
1254}
1255
1256/* Function used to initialize an TCP nameserver */
1257int dns_stream_init(struct dns_nameserver *ns, struct server *srv)
1258{
1259 struct dns_stream_server *dss = NULL;
1260
1261 dss = calloc(1, sizeof(*dss));
1262 if (!dss) {
1263 ha_alert("memory allocation error initializing dns tcp server '%s'.\n", srv->id);
1264 goto out;
1265 }
1266
1267 dss->srv = srv;
1268 dss->maxconn = srv->maxconn;
1269
1270 dss->ofs_req = ~0; /* init ring offset */
1271 dss->ring_req = ring_new(2*DNS_TCP_MSG_RING_MAX_SIZE);
1272 if (!dss->ring_req) {
1273 ha_alert("memory allocation error initializing the ring for dns tcp server '%s'.\n", srv->id);
1274 goto out;
1275 }
1276 /* Create the task associated to the resolver target handling conns */
Willy Tarreaubeeabf52021-10-01 18:23:30 +02001277 if ((dss->task_req = task_new_anywhere()) == NULL) {
Emeric Brunfd647d52021-02-12 20:03:38 +01001278 ha_alert("memory allocation error initializing the ring for dns tcp server '%s'.\n", srv->id);
1279 goto out;
1280 }
1281
1282 /* Update task's parameters */
1283 dss->task_req->process = dns_process_req;
1284 dss->task_req->context = ns;
1285
1286 /* attach the task as reader */
1287 if (!ring_attach(dss->ring_req)) {
1288 /* mark server attached to the ring */
1289 ha_alert("server '%s': too many watchers for ring. this should never happen.\n", srv->id);
1290 goto out;
1291 }
1292
1293 /* Create the task associated to the resolver target handling conns */
Willy Tarreaubeeabf52021-10-01 18:23:30 +02001294 if ((dss->task_rsp = task_new_anywhere()) == NULL) {
Emeric Brunfd647d52021-02-12 20:03:38 +01001295 ha_alert("memory allocation error initializing the ring for dns tcp server '%s'.\n", srv->id);
1296 goto out;
1297 }
1298
1299 /* Update task's parameters */
1300 dss->task_rsp->process = dns_process_rsp;
1301 dss->task_rsp->context = ns;
1302
1303 /* Create the task associated to the resolver target handling conns */
Willy Tarreaubeeabf52021-10-01 18:23:30 +02001304 if ((dss->task_idle = task_new_anywhere()) == NULL) {
Emeric Brunfd647d52021-02-12 20:03:38 +01001305 ha_alert("memory allocation error initializing the ring for dns tcp server '%s'.\n", srv->id);
1306 goto out;
1307 }
1308
1309 /* Update task's parameters */
1310 dss->task_idle->process = dns_process_idle_exp;
1311 dss->task_idle->context = dss;
1312 dss->task_idle->expire = tick_add(now_ms, 5000);
1313
Ilya Shipitsin0de36ad2021-02-20 00:23:36 +05001314 /* let start the task to free idle conns immediately */
Emeric Brunfd647d52021-02-12 20:03:38 +01001315 task_queue(dss->task_idle);
1316
1317 LIST_INIT(&dss->free_sess);
1318 LIST_INIT(&dss->idle_sess);
1319 LIST_INIT(&dss->wait_sess);
1320 HA_SPIN_INIT(&dss->lock);
1321 ns->stream = dss;
1322 return 0;
1323out:
1324 if (dss && dss->task_rsp)
1325 task_destroy(dss->task_rsp);
1326 if (dss && dss->task_req)
1327 task_destroy(dss->task_req);
1328 if (dss && dss->ring_req)
1329 ring_free(dss->ring_req);
1330
1331 free(dss);
Emeric Brunc9437992021-02-12 19:42:55 +01001332 return -1;
Christopher Faulet67957bd2017-09-27 11:00:59 +02001333}
1334
Emeric Brunc9437992021-02-12 19:42:55 +01001335int init_dns_buffers()
Baptiste Assmann325137d2015-04-13 23:40:55 +02001336{
Emeric Brunc9437992021-02-12 19:42:55 +01001337 dns_msg_trash = malloc(DNS_TCP_MSG_MAX_SIZE);
1338 if (!dns_msg_trash)
1339 return 0;
Baptiste Assmann325137d2015-04-13 23:40:55 +02001340
Emeric Brunc9437992021-02-12 19:42:55 +01001341 return 1;
1342}
Baptiste Assmannc1ce5f32016-05-14 11:26:22 +02001343
Emeric Brunc9437992021-02-12 19:42:55 +01001344void deinit_dns_buffers()
1345{
Willy Tarreau61cfdf42021-02-20 10:46:51 +01001346 ha_free(&dns_msg_trash);
Emeric Brunc9437992021-02-12 19:42:55 +01001347}
Emeric Brund26a6232021-01-04 13:32:20 +01001348
1349REGISTER_PER_THREAD_ALLOC(init_dns_buffers);
1350REGISTER_PER_THREAD_FREE(deinit_dns_buffers);