blob: e5ddee71f6e910e4e45a3b2729aa697ef1f7a821 [file] [log] [blame]
Baptiste Assmann325137d2015-04-13 23:40:55 +02001/*
2 * Name server resolution
3 *
Willy Tarreau714f3452021-05-09 06:47:26 +02004 * Copyright 2020 HAProxy Technologies
Baptiste Assmann325137d2015-04-13 23:40:55 +02005 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 *
11 */
12
13#include <errno.h>
Baptiste Assmann325137d2015-04-13 23:40:55 +020014#include <stdio.h>
15#include <stdlib.h>
16#include <string.h>
17#include <unistd.h>
18
19#include <sys/types.h>
20
Willy Tarreau122eba92020-06-04 10:15:32 +020021#include <haproxy/action.h>
Willy Tarreau4c7e4b72020-05-27 12:58:42 +020022#include <haproxy/api.h>
Christopher Faulet6b0a0fb2022-04-04 11:29:28 +020023#include <haproxy/applet.h>
Willy Tarreau6be78492020-06-05 00:00:29 +020024#include <haproxy/cfgparse.h>
Willy Tarreauf1d32c42020-06-04 21:07:02 +020025#include <haproxy/channel.h>
Willy Tarreaub2551052020-06-09 09:07:15 +020026#include <haproxy/check.h>
Willy Tarreau83487a82020-06-04 20:19:54 +020027#include <haproxy/cli.h>
Willy Tarreau7c18b542020-06-11 09:23:02 +020028#include <haproxy/dgram.h>
Willy Tarreaueb92deb2020-06-04 10:53:16 +020029#include <haproxy/dns.h>
Willy Tarreau8d366972020-05-27 16:10:29 +020030#include <haproxy/errors.h>
Willy Tarreaub2551052020-06-09 09:07:15 +020031#include <haproxy/fd.h>
Willy Tarreauaeed4a82020-06-04 22:01:04 +020032#include <haproxy/log.h>
Emeric Brund26a6232021-01-04 13:32:20 +010033#include <haproxy/ring.h>
Willy Tarreau5edca2f2022-05-27 09:25:10 +020034#include <haproxy/sc_strm.h>
Willy Tarreaucb086c62022-05-27 09:47:12 +020035#include <haproxy/stconn.h>
Emeric Brunfd647d52021-02-12 20:03:38 +010036#include <haproxy/stream.h>
Willy Tarreau9f9e9fc2021-05-08 13:09:46 +020037#include <haproxy/tools.h>
Baptiste Assmann325137d2015-04-13 23:40:55 +020038
Emeric Brund26a6232021-01-04 13:32:20 +010039static THREAD_LOCAL char *dns_msg_trash;
Baptiste Assmann325137d2015-04-13 23:40:55 +020040
Emeric Brunfd647d52021-02-12 20:03:38 +010041DECLARE_STATIC_POOL(dns_session_pool, "dns_session", sizeof(struct dns_session));
42DECLARE_STATIC_POOL(dns_query_pool, "dns_query", sizeof(struct dns_query));
43DECLARE_STATIC_POOL(dns_msg_buf, "dns_msg_buf", DNS_TCP_MSG_RING_MAX_SIZE);
44
Christopher Faulet67957bd2017-09-27 11:00:59 +020045/* Opens an UDP socket on the namesaver's IP/Port, if required. Returns 0 on
Christopher Faulet1e711be2021-03-04 16:58:35 +010046 * success, -1 otherwise. ns->dgram must be defined.
Baptiste Assmann325137d2015-04-13 23:40:55 +020047 */
Emeric Brund26a6232021-01-04 13:32:20 +010048static int dns_connect_nameserver(struct dns_nameserver *ns)
Baptiste Assmann325137d2015-04-13 23:40:55 +020049{
Christopher Faulet1e711be2021-03-04 16:58:35 +010050 struct dgram_conn *dgram = &ns->dgram->conn;
51 int fd;
Baptiste Assmann325137d2015-04-13 23:40:55 +020052
Christopher Faulet1e711be2021-03-04 16:58:35 +010053 /* Already connected */
54 if (dgram->t.sock.fd != -1)
Emeric Brun526b7922021-02-15 14:28:27 +010055 return 0;
Christopher Faulet1e711be2021-03-04 16:58:35 +010056
57 /* Create an UDP socket and connect it on the nameserver's IP/Port */
58 if ((fd = socket(dgram->addr.to.ss_family, SOCK_DGRAM, IPPROTO_UDP)) == -1) {
59 send_log(NULL, LOG_WARNING,
60 "DNS : section '%s': can't create socket for nameserver '%s'.\n",
61 ns->counters->pid, ns->id);
62 return -1;
63 }
64 if (connect(fd, (struct sockaddr*)&dgram->addr.to, get_addr_len(&dgram->addr.to)) == -1) {
65 send_log(NULL, LOG_WARNING,
66 "DNS : section '%s': can't connect socket for nameserver '%s'.\n",
67 ns->counters->id, ns->id);
68 close(fd);
69 return -1;
Emeric Brunc9437992021-02-12 19:42:55 +010070 }
Emeric Brun526b7922021-02-15 14:28:27 +010071
Christopher Faulet1e711be2021-03-04 16:58:35 +010072 /* Make the socket non blocking */
Willy Tarreau38247432022-04-26 10:24:14 +020073 fd_set_nonblock(fd);
Christopher Faulet1e711be2021-03-04 16:58:35 +010074
75 /* Add the fd in the fd list and update its parameters */
76 dgram->t.sock.fd = fd;
Willy Tarreau27a32452022-07-07 08:29:00 +020077 fd_insert(fd, dgram, dgram_fd_handler, tgid, tg->threads_enabled);
Christopher Faulet1e711be2021-03-04 16:58:35 +010078 fd_want_recv(fd);
79 return 0;
Baptiste Assmann325137d2015-04-13 23:40:55 +020080}
81
Emeric Brund26a6232021-01-04 13:32:20 +010082/* Sends a message to a name server
83 * It returns message length on success
84 * or -1 in error case
85 * 0 is returned in case of output ring buffer is full
86 */
87int dns_send_nameserver(struct dns_nameserver *ns, void *buf, size_t len)
88{
89 int ret = -1;
90
91 if (ns->dgram) {
92 struct dgram_conn *dgram = &ns->dgram->conn;
Emeric Brun314e6ec2022-05-10 11:35:48 +020093 int fd;
Emeric Brund26a6232021-01-04 13:32:20 +010094
Emeric Brun314e6ec2022-05-10 11:35:48 +020095 HA_SPIN_LOCK(DNS_LOCK, &dgram->lock);
96 fd = dgram->t.sock.fd;
97 if (fd == -1) {
98 if (dns_connect_nameserver(ns) == -1) {
99 HA_SPIN_UNLOCK(DNS_LOCK, &dgram->lock);
Emeric Brund26a6232021-01-04 13:32:20 +0100100 return -1;
Emeric Brun314e6ec2022-05-10 11:35:48 +0200101 }
Emeric Brund26a6232021-01-04 13:32:20 +0100102 fd = dgram->t.sock.fd;
103 }
104
105 ret = send(fd, buf, len, 0);
106 if (ret < 0) {
Willy Tarreauacef5e22022-04-25 20:32:15 +0200107 if (errno == EAGAIN || errno == EWOULDBLOCK) {
Emeric Brund26a6232021-01-04 13:32:20 +0100108 struct ist myist;
109
Tim Duesterhus92c696e2021-02-28 16:11:36 +0100110 myist = ist2(buf, len);
Emeric Brund26a6232021-01-04 13:32:20 +0100111 ret = ring_write(ns->dgram->ring_req, DNS_TCP_MSG_MAX_SIZE, NULL, 0, &myist, 1);
112 if (!ret) {
113 ns->counters->snd_error++;
Emeric Brun314e6ec2022-05-10 11:35:48 +0200114 HA_SPIN_UNLOCK(DNS_LOCK, &dgram->lock);
Emeric Brund26a6232021-01-04 13:32:20 +0100115 return -1;
116 }
117 fd_cant_send(fd);
Emeric Brun314e6ec2022-05-10 11:35:48 +0200118 HA_SPIN_UNLOCK(DNS_LOCK, &dgram->lock);
Emeric Brund26a6232021-01-04 13:32:20 +0100119 return ret;
120 }
121 ns->counters->snd_error++;
122 fd_delete(fd);
Emeric Brund26a6232021-01-04 13:32:20 +0100123 dgram->t.sock.fd = -1;
Emeric Brun314e6ec2022-05-10 11:35:48 +0200124 HA_SPIN_UNLOCK(DNS_LOCK, &dgram->lock);
Emeric Brund26a6232021-01-04 13:32:20 +0100125 return -1;
126 }
127 ns->counters->sent++;
Emeric Brun314e6ec2022-05-10 11:35:48 +0200128 HA_SPIN_UNLOCK(DNS_LOCK, &dgram->lock);
Emeric Brund26a6232021-01-04 13:32:20 +0100129 }
Emeric Brunfd647d52021-02-12 20:03:38 +0100130 else if (ns->stream) {
131 struct ist myist;
132
Tim Duesterhus92c696e2021-02-28 16:11:36 +0100133 myist = ist2(buf, len);
Emeric Brunfd647d52021-02-12 20:03:38 +0100134 ret = ring_write(ns->stream->ring_req, DNS_TCP_MSG_MAX_SIZE, NULL, 0, &myist, 1);
135 if (!ret) {
136 ns->counters->snd_error++;
137 return -1;
138 }
139 task_wakeup(ns->stream->task_req, TASK_WOKEN_MSG);
140 return ret;
141 }
Emeric Brund26a6232021-01-04 13:32:20 +0100142
143 return ret;
144}
145
Emeric Brunfd647d52021-02-12 20:03:38 +0100146void dns_session_free(struct dns_session *);
147
Emeric Brund26a6232021-01-04 13:32:20 +0100148/* Receives a dns message
149 * Returns message length
150 * 0 is returned if no more message available
151 * -1 in error case
152 */
153ssize_t dns_recv_nameserver(struct dns_nameserver *ns, void *data, size_t size)
154{
155 ssize_t ret = -1;
156
157 if (ns->dgram) {
158 struct dgram_conn *dgram = &ns->dgram->conn;
Emeric Brun314e6ec2022-05-10 11:35:48 +0200159 int fd;
Emeric Brund26a6232021-01-04 13:32:20 +0100160
Emeric Brun314e6ec2022-05-10 11:35:48 +0200161 HA_SPIN_LOCK(DNS_LOCK, &dgram->lock);
162 fd = dgram->t.sock.fd;
163 if (fd == -1) {
164 HA_SPIN_UNLOCK(DNS_LOCK, &dgram->lock);
Emeric Brund26a6232021-01-04 13:32:20 +0100165 return -1;
Emeric Brun314e6ec2022-05-10 11:35:48 +0200166 }
Emeric Brund26a6232021-01-04 13:32:20 +0100167
168 if ((ret = recv(fd, data, size, 0)) < 0) {
Willy Tarreauacef5e22022-04-25 20:32:15 +0200169 if (errno == EAGAIN || errno == EWOULDBLOCK) {
Emeric Brund26a6232021-01-04 13:32:20 +0100170 fd_cant_recv(fd);
Emeric Brun314e6ec2022-05-10 11:35:48 +0200171 HA_SPIN_UNLOCK(DNS_LOCK, &dgram->lock);
Emeric Brund26a6232021-01-04 13:32:20 +0100172 return 0;
173 }
174 fd_delete(fd);
Emeric Brund26a6232021-01-04 13:32:20 +0100175 dgram->t.sock.fd = -1;
Emeric Brun314e6ec2022-05-10 11:35:48 +0200176 HA_SPIN_UNLOCK(DNS_LOCK, &dgram->lock);
Emeric Brund26a6232021-01-04 13:32:20 +0100177 return -1;
178 }
Emeric Brun314e6ec2022-05-10 11:35:48 +0200179 HA_SPIN_UNLOCK(DNS_LOCK, &dgram->lock);
Emeric Brund26a6232021-01-04 13:32:20 +0100180 }
Emeric Brunfd647d52021-02-12 20:03:38 +0100181 else if (ns->stream) {
182 struct dns_stream_server *dss = ns->stream;
183 struct dns_session *ds;
184
185 HA_SPIN_LOCK(DNS_LOCK, &dss->lock);
186
187 if (!LIST_ISEMPTY(&dss->wait_sess)) {
188 ds = LIST_NEXT(&dss->wait_sess, struct dns_session *, waiter);
Emeric Brunfd647d52021-02-12 20:03:38 +0100189 ret = ds->rx_msg.len < size ? ds->rx_msg.len : size;
190 memcpy(data, ds->rx_msg.area, ret);
191
192 ds->rx_msg.len = 0;
193
Willy Tarreaudde1b442021-10-21 14:33:38 +0200194 /* This barrier is here to ensure that all data is
195 * stored if the appctx detect the elem is out of the
196 * list.
197 */
198 __ha_barrier_store();
199
Emeric Brunfd647d52021-02-12 20:03:38 +0100200 LIST_DEL_INIT(&ds->waiter);
201
202 if (ds->appctx) {
Willy Tarreaudde1b442021-10-21 14:33:38 +0200203 /* This second barrier is here to ensure that
204 * the waked up appctx won't miss that the elem
205 * is removed from the list.
206 */
207 __ha_barrier_store();
208
Ilya Shipitsin0de36ad2021-02-20 00:23:36 +0500209 /* awake appctx because it may have other
Emeric Brunfd647d52021-02-12 20:03:38 +0100210 * message to receive
211 */
212 appctx_wakeup(ds->appctx);
213
214 /* dns_session could already be into free_sess list
215 * so we firstly remove it */
216 LIST_DEL_INIT(&ds->list);
217
218 /* decrease nb_queries to free a slot for a new query on that sess */
219 ds->nb_queries--;
220 if (ds->nb_queries) {
221 /* it remains pipelined unanswered request
222 * into this session but we just decrease
223 * the counter so the session
224 * can not be full of pipelined requests
225 * so we can add if to free_sess list
226 * to receive a new request
227 */
Willy Tarreau2b718102021-04-21 07:32:39 +0200228 LIST_INSERT(&ds->dss->free_sess, &ds->list);
Emeric Brunfd647d52021-02-12 20:03:38 +0100229 }
230 else {
231 /* there is no more pipelined requests
232 * into this session, so we move it
233 * to idle_sess list */
Willy Tarreau2b718102021-04-21 07:32:39 +0200234 LIST_INSERT(&ds->dss->idle_sess, &ds->list);
Emeric Brunfd647d52021-02-12 20:03:38 +0100235
236 /* update the counter of idle sessions */
237 ds->dss->idle_conns++;
238
239 /* Note: this is useless there to update
240 * the max_active_conns since we increase
241 * the idle count */
242 }
243 }
244 else {
245 /* there is no more appctx for this session
246 * it means it is ready to die
247 */
248 dns_session_free(ds);
249 }
250
251
252 }
253
254 HA_SPIN_UNLOCK(DNS_LOCK, &dss->lock);
255 }
Emeric Brund26a6232021-01-04 13:32:20 +0100256
257 return ret;
258}
259
260static void dns_resolve_recv(struct dgram_conn *dgram)
261{
262 struct dns_nameserver *ns;
263 int fd;
264
Emeric Brun314e6ec2022-05-10 11:35:48 +0200265 HA_SPIN_LOCK(DNS_LOCK, &dgram->lock);
266
Emeric Brund26a6232021-01-04 13:32:20 +0100267 fd = dgram->t.sock.fd;
268
269 /* check if ready for reading */
Emeric Brun314e6ec2022-05-10 11:35:48 +0200270 if ((fd == -1) || !fd_recv_ready(fd)) {
271 HA_SPIN_UNLOCK(DNS_LOCK, &dgram->lock);
Emeric Brund26a6232021-01-04 13:32:20 +0100272 return;
Emeric Brun314e6ec2022-05-10 11:35:48 +0200273 }
Emeric Brund26a6232021-01-04 13:32:20 +0100274
275 /* no need to go further if we can't retrieve the nameserver */
276 if ((ns = dgram->owner) == NULL) {
Willy Tarreauf5090652021-04-06 17:23:40 +0200277 _HA_ATOMIC_AND(&fdtab[fd].state, ~(FD_POLL_HUP|FD_POLL_ERR));
Emeric Brund26a6232021-01-04 13:32:20 +0100278 fd_stop_recv(fd);
Emeric Brun314e6ec2022-05-10 11:35:48 +0200279 HA_SPIN_UNLOCK(DNS_LOCK, &dgram->lock);
Emeric Brund26a6232021-01-04 13:32:20 +0100280 return;
281 }
282
Emeric Brun314e6ec2022-05-10 11:35:48 +0200283 HA_SPIN_UNLOCK(DNS_LOCK, &dgram->lock);
284
Emeric Brund26a6232021-01-04 13:32:20 +0100285 ns->process_responses(ns);
286}
287
288/* Called when a dns network socket is ready to send data */
289static void dns_resolve_send(struct dgram_conn *dgram)
290{
291 int fd;
292 struct dns_nameserver *ns;
293 struct ring *ring;
294 struct buffer *buf;
295 uint64_t msg_len;
296 size_t len, cnt, ofs;
297
Emeric Brun314e6ec2022-05-10 11:35:48 +0200298 HA_SPIN_LOCK(DNS_LOCK, &dgram->lock);
299
Emeric Brund26a6232021-01-04 13:32:20 +0100300 fd = dgram->t.sock.fd;
301
302 /* check if ready for sending */
Emeric Brun314e6ec2022-05-10 11:35:48 +0200303 if ((fd == -1) || !fd_send_ready(fd)) {
304 HA_SPIN_UNLOCK(DNS_LOCK, &dgram->lock);
Emeric Brund26a6232021-01-04 13:32:20 +0100305 return;
Emeric Brun314e6ec2022-05-10 11:35:48 +0200306 }
Emeric Brund26a6232021-01-04 13:32:20 +0100307
308 /* no need to go further if we can't retrieve the nameserver */
309 if ((ns = dgram->owner) == NULL) {
Willy Tarreauf5090652021-04-06 17:23:40 +0200310 _HA_ATOMIC_AND(&fdtab[fd].state, ~(FD_POLL_HUP|FD_POLL_ERR));
Emeric Brund26a6232021-01-04 13:32:20 +0100311 fd_stop_send(fd);
Emeric Brun314e6ec2022-05-10 11:35:48 +0200312 HA_SPIN_UNLOCK(DNS_LOCK, &dgram->lock);
Emeric Brund26a6232021-01-04 13:32:20 +0100313 return;
314 }
315
316 ring = ns->dgram->ring_req;
317 buf = &ring->buf;
318
319 HA_RWLOCK_RDLOCK(DNS_LOCK, &ring->lock);
Emeric Brund26a6232021-01-04 13:32:20 +0100320
321 /* explanation for the initialization below: it would be better to do
322 * this in the parsing function but this would occasionally result in
323 * dropped events because we'd take a reference on the oldest message
324 * and keep it while being scheduled. Thus instead let's take it the
325 * first time we enter here so that we have a chance to pass many
326 * existing messages before grabbing a reference to a location. This
327 * value cannot be produced after initialization.
328 */
Aurelien DARRAGONbce0c0c2023-03-07 18:01:34 +0100329 if (unlikely(ns->dgram->ofs_req == ~0)) {
330 ns->dgram->ofs_req = b_peek_ofs(buf, 0);
331 HA_ATOMIC_INC(b_orig(buf) + ns->dgram->ofs_req);
Emeric Brund26a6232021-01-04 13:32:20 +0100332 }
333
334 /* we were already there, adjust the offset to be relative to
335 * the buffer's head and remove us from the counter.
336 */
Aurelien DARRAGONbce0c0c2023-03-07 18:01:34 +0100337 ofs = ns->dgram->ofs_req - b_head_ofs(buf);
338 if (ns->dgram->ofs_req < b_head_ofs(buf))
339 ofs += b_size(buf);
Emeric Brund26a6232021-01-04 13:32:20 +0100340 BUG_ON(ofs >= buf->size);
Willy Tarreau4781b152021-04-06 13:53:36 +0200341 HA_ATOMIC_DEC(b_peek(buf, ofs));
Emeric Brund26a6232021-01-04 13:32:20 +0100342
343 while (ofs + 1 < b_data(buf)) {
344 int ret;
345
346 cnt = 1;
347 len = b_peek_varint(buf, ofs + cnt, &msg_len);
348 if (!len)
349 break;
350 cnt += len;
351 BUG_ON(msg_len + ofs + cnt + 1 > b_data(buf));
352 if (unlikely(msg_len > DNS_TCP_MSG_MAX_SIZE)) {
353 /* too large a message to ever fit, let's skip it */
354 ofs += cnt + msg_len;
355 continue;
356 }
357
358 len = b_getblk(buf, dns_msg_trash, msg_len, ofs + cnt);
359
360 ret = send(fd, dns_msg_trash, len, 0);
361 if (ret < 0) {
Willy Tarreauacef5e22022-04-25 20:32:15 +0200362 if (errno == EAGAIN || errno == EWOULDBLOCK) {
Emeric Brund26a6232021-01-04 13:32:20 +0100363 fd_cant_send(fd);
364 goto out;
365 }
366 ns->counters->snd_error++;
367 fd_delete(fd);
Emeric Brund26a6232021-01-04 13:32:20 +0100368 fd = dgram->t.sock.fd = -1;
369 goto out;
370 }
371 ns->counters->sent++;
372
373 ofs += cnt + len;
374 }
375
376 /* we don't want/need to be waked up any more for sending
377 * because all ring content is sent */
378 fd_stop_send(fd);
379
380out:
Willy Tarreau4781b152021-04-06 13:53:36 +0200381 HA_ATOMIC_INC(b_peek(buf, ofs));
Aurelien DARRAGONbce0c0c2023-03-07 18:01:34 +0100382 ns->dgram->ofs_req = b_peek_ofs(buf, ofs);
Emeric Brund26a6232021-01-04 13:32:20 +0100383 HA_RWLOCK_RDUNLOCK(DNS_LOCK, &ring->lock);
Emeric Brun314e6ec2022-05-10 11:35:48 +0200384 HA_SPIN_UNLOCK(DNS_LOCK, &dgram->lock);
Emeric Brund26a6232021-01-04 13:32:20 +0100385}
386
Emeric Brunc9437992021-02-12 19:42:55 +0100387/* proto_udp callback functions for a DNS resolution */
388struct dgram_data_cb dns_dgram_cb = {
389 .recv = dns_resolve_recv,
390 .send = dns_resolve_send,
391};
Baptiste Assmann325137d2015-04-13 23:40:55 +0200392
Emeric Brunc9437992021-02-12 19:42:55 +0100393int dns_dgram_init(struct dns_nameserver *ns, struct sockaddr_storage *sk)
Baptiste Assmann325137d2015-04-13 23:40:55 +0200394{
Emeric Brunc9437992021-02-12 19:42:55 +0100395 struct dns_dgram_server *dgram;
Baptiste Assmann201c07f2017-05-22 15:17:15 +0200396
Emeric Brunc9437992021-02-12 19:42:55 +0100397 if ((dgram = calloc(1, sizeof(*dgram))) == NULL)
Christopher Faulet67957bd2017-09-27 11:00:59 +0200398 return -1;
Baptiste Assmannc1ce5f32016-05-14 11:26:22 +0200399
Emeric Brunc9437992021-02-12 19:42:55 +0100400 /* Leave dgram partially initialized, no FD attached for
401 * now. */
402 dgram->conn.owner = ns;
403 dgram->conn.data = &dns_dgram_cb;
404 dgram->conn.t.sock.fd = -1;
405 dgram->conn.addr.to = *sk;
Emeric Brun314e6ec2022-05-10 11:35:48 +0200406 HA_SPIN_INIT(&dgram->conn.lock);
Emeric Brunc9437992021-02-12 19:42:55 +0100407 ns->dgram = dgram;
Baptiste Assmannc1ce5f32016-05-14 11:26:22 +0200408
Emeric Brunc9437992021-02-12 19:42:55 +0100409 dgram->ofs_req = ~0; /* init ring offset */
410 dgram->ring_req = ring_new(2*DNS_TCP_MSG_RING_MAX_SIZE);
411 if (!dgram->ring_req) {
412 ha_alert("memory allocation error initializing the ring for nameserver.\n");
413 goto out;
Baptiste Assmannc1ce5f32016-05-14 11:26:22 +0200414 }
415
Emeric Brunc9437992021-02-12 19:42:55 +0100416 /* attach the task as reader */
417 if (!ring_attach(dgram->ring_req)) {
418 /* mark server attached to the ring */
419 ha_alert("nameserver sets too many watchers > 255 on ring. This is a bug and should not happen.\n");
420 goto out;
Baptiste Assmannc1ce5f32016-05-14 11:26:22 +0200421 }
Baptiste Assmannc1ce5f32016-05-14 11:26:22 +0200422 return 0;
Emeric Brunc9437992021-02-12 19:42:55 +0100423out:
Tim Duesterhus1307cd42023-04-22 17:47:35 +0200424 ring_free(dgram->ring_req);
Christopher Fauletd6c6b5f2020-09-08 10:27:24 +0200425
Emeric Brunc9437992021-02-12 19:42:55 +0100426 free(dgram);
Olivier Houchard2ec2db92018-01-08 16:28:57 +0100427
Emeric Brunfd647d52021-02-12 20:03:38 +0100428 return -1;
429}
430
431/*
432 * IO Handler to handle message push to dns tcp server
Willy Tarreau0d626a52022-05-04 20:41:19 +0200433 * It takes its context from appctx->svcctx.
Emeric Brunfd647d52021-02-12 20:03:38 +0100434 */
435static void dns_session_io_handler(struct appctx *appctx)
436{
Willy Tarreauc12b3212022-05-27 11:08:15 +0200437 struct stconn *sc = appctx_sc(appctx);
Willy Tarreau0d626a52022-05-04 20:41:19 +0200438 struct dns_session *ds = appctx->svcctx;
Emeric Brunfd647d52021-02-12 20:03:38 +0100439 struct ring *ring = &ds->ring;
440 struct buffer *buf = &ring->buf;
441 uint64_t msg_len;
442 int available_room;
443 size_t len, cnt, ofs;
444 int ret = 0;
445
Christopher Faulet26769b02023-03-31 10:48:03 +0200446 if (unlikely(se_fl_test(appctx->sedesc, (SE_FL_EOS|SE_FL_ERROR|SE_FL_SHR|SE_FL_SHW)))) {
447 co_skip(sc_oc(sc), co_data(sc_oc(sc)));
448 goto out;
449 }
450
Emeric Brunfd647d52021-02-12 20:03:38 +0100451 /* if stopping was requested, close immediately */
452 if (unlikely(stopping))
453 goto close;
454
455 /* we want to be sure to not miss that we have been awaked for a shutdown */
456 __ha_barrier_load();
457
458 /* that means the connection was requested to shutdown
459 * for instance idle expire */
460 if (ds->shutdown)
461 goto close;
462
Emeric Brunfd647d52021-02-12 20:03:38 +0100463 /* if the connection is not established, inform the stream that we want
464 * to be notified whenever the connection completes.
465 */
Willy Tarreaud7950ad2022-05-27 10:33:42 +0200466 if (sc_opposite(sc)->state < SC_ST_EST) {
Willy Tarreau90e8b452022-05-25 18:21:43 +0200467 applet_need_more_data(appctx);
Willy Tarreaub23edc82022-05-24 16:49:03 +0200468 se_need_remote_conn(appctx->sedesc);
Willy Tarreau4164eb92022-05-25 15:42:03 +0200469 applet_have_more_data(appctx);
Christopher Faulet26769b02023-03-31 10:48:03 +0200470 goto out;
Emeric Brunfd647d52021-02-12 20:03:38 +0100471 }
472
Emeric Brunfd647d52021-02-12 20:03:38 +0100473 HA_RWLOCK_WRLOCK(DNS_LOCK, &ring->lock);
474 LIST_DEL_INIT(&appctx->wait_entry);
475 HA_RWLOCK_WRUNLOCK(DNS_LOCK, &ring->lock);
476
477 HA_RWLOCK_RDLOCK(DNS_LOCK, &ring->lock);
478
479 /* explanation for the initialization below: it would be better to do
480 * this in the parsing function but this would occasionally result in
481 * dropped events because we'd take a reference on the oldest message
482 * and keep it while being scheduled. Thus instead let's take it the
483 * first time we enter here so that we have a chance to pass many
484 * existing messages before grabbing a reference to a location. This
485 * value cannot be produced after initialization.
486 */
Amaury Denoyelle737d10f2023-03-07 11:18:27 +0100487 if (unlikely(ds->ofs == ~0)) {
Aurelien DARRAGON5a43db22023-03-07 17:45:02 +0100488 ds->ofs = b_peek_ofs(buf, 0);
489 HA_ATOMIC_INC(b_orig(buf) + ds->ofs);
Emeric Brunfd647d52021-02-12 20:03:38 +0100490 }
491
Christopher Faulet4d3283f2023-03-31 10:42:22 +0200492 /* we were already there, adjust the offset to be relative to
493 * the buffer's head and remove us from the counter.
Emeric Brunfd647d52021-02-12 20:03:38 +0100494 */
Christopher Faulet4d3283f2023-03-31 10:42:22 +0200495 ofs = ds->ofs - b_head_ofs(buf);
496 if (ds->ofs < b_head_ofs(buf))
497 ofs += b_size(buf);
Amaury Denoyelle737d10f2023-03-07 11:18:27 +0100498
Christopher Faulet4d3283f2023-03-31 10:42:22 +0200499 BUG_ON(ofs >= buf->size);
500 HA_ATOMIC_DEC(b_peek(buf, ofs));
Emeric Brunfd647d52021-02-12 20:03:38 +0100501
Christopher Faulet4d3283f2023-03-31 10:42:22 +0200502 /* in following loop, ofs always points to the counter byte that
503 * precedes the message so that we can take our reference there if we
504 * have to stop before the end (ret=0).
505 */
506 ret = 1;
507 while (ofs + 1 < b_data(buf)) {
508 struct dns_query *query;
509 uint16_t original_qid;
510 uint16_t new_qid;
Emeric Brunfd647d52021-02-12 20:03:38 +0100511
Christopher Faulet4d3283f2023-03-31 10:42:22 +0200512 cnt = 1;
513 len = b_peek_varint(buf, ofs + cnt, &msg_len);
514 if (!len)
515 break;
516 cnt += len;
517 BUG_ON(msg_len + ofs + cnt + 1 > b_data(buf));
Emeric Brunfd647d52021-02-12 20:03:38 +0100518
Christopher Faulet4d3283f2023-03-31 10:42:22 +0200519 /* retrieve available room on output channel */
520 available_room = channel_recv_max(sc_ic(sc));
Emeric Brunfd647d52021-02-12 20:03:38 +0100521
Christopher Faulet4d3283f2023-03-31 10:42:22 +0200522 /* tx_msg_offset null means we are at the start of a new message */
523 if (!ds->tx_msg_offset) {
524 uint16_t slen;
Emeric Brunfd647d52021-02-12 20:03:38 +0100525
Christopher Faulet4d3283f2023-03-31 10:42:22 +0200526 /* check if there is enough room to put message len and query id */
527 if (available_room < sizeof(slen) + sizeof(new_qid)) {
528 sc_need_room(sc);
529 ret = 0;
530 break;
531 }
Emeric Brunfd647d52021-02-12 20:03:38 +0100532
Christopher Faulet4d3283f2023-03-31 10:42:22 +0200533 /* put msg len into then channel */
534 slen = (uint16_t)msg_len;
535 slen = htons(slen);
536 applet_putblk(appctx, (char *)&slen, sizeof(slen));
537 available_room -= sizeof(slen);
Emeric Brunfd647d52021-02-12 20:03:38 +0100538
Christopher Faulet4d3283f2023-03-31 10:42:22 +0200539 /* backup original query id */
540 len = b_getblk(buf, (char *)&original_qid, sizeof(original_qid), ofs + cnt);
541 if (!len) {
542 /* should never happen since messages are atomically
543 * written into ring
544 */
545 ret = 0;
546 break;
547 }
Emeric Brunfd647d52021-02-12 20:03:38 +0100548
Christopher Faulet4d3283f2023-03-31 10:42:22 +0200549 /* generates new query id */
550 new_qid = ++ds->query_counter;
551 new_qid = htons(new_qid);
Emeric Brunfd647d52021-02-12 20:03:38 +0100552
Christopher Faulet4d3283f2023-03-31 10:42:22 +0200553 /* put new query id into the channel */
554 applet_putblk(appctx, (char *)&new_qid, sizeof(new_qid));
555 available_room -= sizeof(new_qid);
Emeric Brunfd647d52021-02-12 20:03:38 +0100556
Christopher Faulet4d3283f2023-03-31 10:42:22 +0200557 /* keep query id mapping */
Emeric Brunfd647d52021-02-12 20:03:38 +0100558
Christopher Faulet4d3283f2023-03-31 10:42:22 +0200559 query = pool_alloc(dns_query_pool);
560 if (query) {
561 query->qid.key = new_qid;
562 query->original_qid = original_qid;
563 query->expire = tick_add(now_ms, 5000);
564 LIST_INIT(&query->list);
565 if (LIST_ISEMPTY(&ds->queries)) {
566 /* enable task to handle expire */
567 ds->task_exp->expire = query->expire;
568 /* ensure this will be executed by the same
569 * thread than ds_session_release
570 * to ensure session_release is free
571 * to destroy the task */
572 task_queue(ds->task_exp);
Emeric Brunfd647d52021-02-12 20:03:38 +0100573 }
Christopher Faulet4d3283f2023-03-31 10:42:22 +0200574 LIST_APPEND(&ds->queries, &query->list);
575 eb32_insert(&ds->query_ids, &query->qid);
576 ds->onfly_queries++;
Emeric Brunfd647d52021-02-12 20:03:38 +0100577 }
578
Christopher Faulet4d3283f2023-03-31 10:42:22 +0200579 /* update the tx_offset to handle output in 16k streams */
580 ds->tx_msg_offset = sizeof(original_qid);
Emeric Brunfd647d52021-02-12 20:03:38 +0100581
Christopher Faulet4d3283f2023-03-31 10:42:22 +0200582 }
Emeric Brunfd647d52021-02-12 20:03:38 +0100583
Christopher Faulet4d3283f2023-03-31 10:42:22 +0200584 /* check if it remains available room on output chan */
585 if (unlikely(!available_room)) {
586 sc_need_room(sc);
587 ret = 0;
588 break;
589 }
Emeric Brunfd647d52021-02-12 20:03:38 +0100590
Christopher Faulet4d3283f2023-03-31 10:42:22 +0200591 chunk_reset(&trash);
592 if ((msg_len - ds->tx_msg_offset) > available_room) {
593 /* remaining msg data is too large to be written in output channel at one time */
Emeric Brunfd647d52021-02-12 20:03:38 +0100594
Christopher Faulet4d3283f2023-03-31 10:42:22 +0200595 len = b_getblk(buf, trash.area, available_room, ofs + cnt + ds->tx_msg_offset);
Emeric Brunfd647d52021-02-12 20:03:38 +0100596
Christopher Faulet4d3283f2023-03-31 10:42:22 +0200597 /* update offset to complete mesg forwarding later */
598 ds->tx_msg_offset += len;
599 }
600 else {
601 /* remaining msg data can be written in output channel at one time */
602 len = b_getblk(buf, trash.area, msg_len - ds->tx_msg_offset, ofs + cnt + ds->tx_msg_offset);
Emeric Brunfd647d52021-02-12 20:03:38 +0100603
Christopher Faulet4d3283f2023-03-31 10:42:22 +0200604 /* reset tx_msg_offset to mark forward fully processed */
605 ds->tx_msg_offset = 0;
606 }
607 trash.data += len;
Emeric Brunfd647d52021-02-12 20:03:38 +0100608
Christopher Faulet4d3283f2023-03-31 10:42:22 +0200609 if (applet_putchk(appctx, &trash) == -1) {
610 /* should never happen since we
611 * check available_room is large
612 * enough here.
613 */
614 ret = 0;
615 break;
Emeric Brunfd647d52021-02-12 20:03:38 +0100616 }
617
Christopher Faulet4d3283f2023-03-31 10:42:22 +0200618 if (ds->tx_msg_offset) {
619 /* msg was not fully processed, we must be awake to drain pending data */
620
621 sc_need_room(sc);
622 ret = 0;
623 break;
624 }
625 /* switch to next message */
626 ofs += cnt + msg_len;
Emeric Brunfd647d52021-02-12 20:03:38 +0100627 }
Christopher Faulet4d3283f2023-03-31 10:42:22 +0200628
629 HA_ATOMIC_INC(b_peek(buf, ofs));
630 ds->ofs = b_peek_ofs(buf, ofs);
631
Emeric Brunfd647d52021-02-12 20:03:38 +0100632 HA_RWLOCK_RDUNLOCK(DNS_LOCK, &ring->lock);
633
634 if (ret) {
635 /* let's be woken up once new request to write arrived */
636 HA_RWLOCK_WRLOCK(DNS_LOCK, &ring->lock);
Willy Tarreau62e467c2021-10-20 11:02:13 +0200637 BUG_ON(LIST_INLIST(&appctx->wait_entry));
Willy Tarreau2b718102021-04-21 07:32:39 +0200638 LIST_APPEND(&ring->waiters, &appctx->wait_entry);
Emeric Brunfd647d52021-02-12 20:03:38 +0100639 HA_RWLOCK_WRUNLOCK(DNS_LOCK, &ring->lock);
Willy Tarreau4164eb92022-05-25 15:42:03 +0200640 applet_have_no_more_data(appctx);
Emeric Brunfd647d52021-02-12 20:03:38 +0100641 }
642
Ilya Shipitsin0de36ad2021-02-20 00:23:36 +0500643 /* if session is not a waiter it means there is no committed
Emeric Brunfd647d52021-02-12 20:03:38 +0100644 * message into rx_buf and we are free to use it
645 * Note: we need a load barrier here to not miss the
646 * delete from the list
647 */
Willy Tarreaudde1b442021-10-21 14:33:38 +0200648 __ha_barrier_load();
649 if (!LIST_INLIST_ATOMIC(&ds->waiter)) {
Emeric Brunfd647d52021-02-12 20:03:38 +0100650 while (1) {
651 uint16_t query_id;
652 struct eb32_node *eb;
653 struct dns_query *query;
654
655 if (!ds->rx_msg.len) {
Emeric Brunfd647d52021-02-12 20:03:38 +0100656 /* retrieve message len */
Christopher Faulet28975e12023-03-30 15:49:30 +0200657 ret = co_getblk(sc_oc(sc), (char *)&msg_len, 2, 0);
658 if (ret <= 0) {
659 if (ret == -1)
Christopher Faulet26769b02023-03-31 10:48:03 +0200660 goto error;
Christopher Faulet28975e12023-03-30 15:49:30 +0200661 applet_need_more_data(appctx);
662 break;
663 }
Emeric Brunfd647d52021-02-12 20:03:38 +0100664
665 /* mark as consumed */
Willy Tarreaud7950ad2022-05-27 10:33:42 +0200666 co_skip(sc_oc(sc), 2);
Emeric Brunfd647d52021-02-12 20:03:38 +0100667
668 /* store message len */
669 ds->rx_msg.len = ntohs(msg_len);
Christopher Faulet28975e12023-03-30 15:49:30 +0200670 if (!ds->rx_msg.len)
671 continue;
Emeric Brunfd647d52021-02-12 20:03:38 +0100672 }
673
Willy Tarreaud7950ad2022-05-27 10:33:42 +0200674 if (co_data(sc_oc(sc)) + ds->rx_msg.offset < ds->rx_msg.len) {
Emeric Brunfd647d52021-02-12 20:03:38 +0100675 /* message only partially available */
676
677 /* read available data */
Christopher Faulet28975e12023-03-30 15:49:30 +0200678 ret = co_getblk(sc_oc(sc), ds->rx_msg.area + ds->rx_msg.offset, co_data(sc_oc(sc)), 0);
679 if (ret <= 0) {
680 if (ret == -1)
Christopher Faulet26769b02023-03-31 10:48:03 +0200681 goto error;
Christopher Faulet28975e12023-03-30 15:49:30 +0200682 applet_need_more_data(appctx);
683 break;
684 }
Emeric Brunfd647d52021-02-12 20:03:38 +0100685
686 /* update message offset */
Willy Tarreaud7950ad2022-05-27 10:33:42 +0200687 ds->rx_msg.offset += co_data(sc_oc(sc));
Emeric Brunfd647d52021-02-12 20:03:38 +0100688
689 /* consume all pending data from the channel */
Willy Tarreaud7950ad2022-05-27 10:33:42 +0200690 co_skip(sc_oc(sc), co_data(sc_oc(sc)));
Emeric Brunfd647d52021-02-12 20:03:38 +0100691
692 /* we need to wait for more data */
Christopher Faulet28975e12023-03-30 15:49:30 +0200693 applet_need_more_data(appctx);
Emeric Brunfd647d52021-02-12 20:03:38 +0100694 break;
695 }
696
Ilya Shipitsin0de36ad2021-02-20 00:23:36 +0500697 /* enough data is available into the channel to read the message until the end */
Emeric Brunfd647d52021-02-12 20:03:38 +0100698
699 /* read from the channel until the end of the message */
Christopher Faulet28975e12023-03-30 15:49:30 +0200700 ret = co_getblk(sc_oc(sc), ds->rx_msg.area + ds->rx_msg.offset, ds->rx_msg.len - ds->rx_msg.offset, 0);
701 if (ret <= 0) {
702 if (ret == -1)
Christopher Faulet26769b02023-03-31 10:48:03 +0200703 goto error;
Christopher Faulet28975e12023-03-30 15:49:30 +0200704 applet_need_more_data(appctx);
705 break;
706 }
Emeric Brunfd647d52021-02-12 20:03:38 +0100707
708 /* consume all data until the end of the message from the channel */
Willy Tarreaud7950ad2022-05-27 10:33:42 +0200709 co_skip(sc_oc(sc), ds->rx_msg.len - ds->rx_msg.offset);
Emeric Brunfd647d52021-02-12 20:03:38 +0100710
711 /* reset reader offset to 0 for next message reand */
712 ds->rx_msg.offset = 0;
713
714 /* try remap query id to original */
715 memcpy(&query_id, ds->rx_msg.area, sizeof(query_id));
716 eb = eb32_lookup(&ds->query_ids, query_id);
717 if (!eb) {
718 /* query id not found means we have an unknown corresponding
719 * request, perhaps server's bug or or the query reached
720 * timeout
721 */
722 ds->rx_msg.len = 0;
723 continue;
724 }
725
726 /* re-map the original query id set by the requester */
727 query = eb32_entry(eb, struct dns_query, qid);
728 memcpy(ds->rx_msg.area, &query->original_qid, sizeof(query->original_qid));
729
730 /* remove query ids mapping from pending queries list/tree */
731 eb32_delete(&query->qid);
Willy Tarreau2b718102021-04-21 07:32:39 +0200732 LIST_DELETE(&query->list);
Emeric Brunfd647d52021-02-12 20:03:38 +0100733 pool_free(dns_query_pool, query);
734 ds->onfly_queries--;
735
Emeric Brunfd647d52021-02-12 20:03:38 +0100736 /* the dns_session is also added in queue of the
737 * wait_sess list where the task processing
738 * response will pop available responses
739 */
Willy Tarreaudde1b442021-10-21 14:33:38 +0200740 HA_SPIN_LOCK(DNS_LOCK, &ds->dss->lock);
741
Willy Tarreau62e467c2021-10-20 11:02:13 +0200742 BUG_ON(LIST_INLIST(&ds->waiter));
Willy Tarreau2b718102021-04-21 07:32:39 +0200743 LIST_APPEND(&ds->dss->wait_sess, &ds->waiter);
Emeric Brunfd647d52021-02-12 20:03:38 +0100744
Willy Tarreaudde1b442021-10-21 14:33:38 +0200745 HA_SPIN_UNLOCK(DNS_LOCK, &ds->dss->lock);
746
Emeric Brunfd647d52021-02-12 20:03:38 +0100747 /* awake the task processing the responses */
748 task_wakeup(ds->dss->task_rsp, TASK_WOKEN_INIT);
749
750 break;
751 }
Emeric Brunfd647d52021-02-12 20:03:38 +0100752 }
753
Christopher Faulet26769b02023-03-31 10:48:03 +0200754out:
Emeric Brunfd647d52021-02-12 20:03:38 +0100755 return;
Christopher Faulet26769b02023-03-31 10:48:03 +0200756
Emeric Brunfd647d52021-02-12 20:03:38 +0100757close:
Christopher Faulet26769b02023-03-31 10:48:03 +0200758 se_fl_set(appctx->sedesc, SE_FL_EOS|SE_FL_EOI);
759 goto out;
760
761error:
762 se_fl_set(appctx->sedesc, SE_FL_ERROR);
763 goto out;
Emeric Brunfd647d52021-02-12 20:03:38 +0100764}
765
766void dns_queries_flush(struct dns_session *ds)
767{
768 struct dns_query *query, *queryb;
769
770 list_for_each_entry_safe(query, queryb, &ds->queries, list) {
771 eb32_delete(&query->qid);
Willy Tarreau2b718102021-04-21 07:32:39 +0200772 LIST_DELETE(&query->list);
Emeric Brunfd647d52021-02-12 20:03:38 +0100773 pool_free(dns_query_pool, query);
774 }
775}
776
777void dns_session_free(struct dns_session *ds)
778{
Tim Duesterhusc18e2442023-04-22 17:47:33 +0200779 pool_free(dns_msg_buf, ds->rx_msg.area);
780 pool_free(dns_msg_buf, ds->tx_ring_area);
Tim Duesterhusfe83f582023-04-22 17:47:34 +0200781 task_destroy(ds->task_exp);
Emeric Brunfd647d52021-02-12 20:03:38 +0100782
783 dns_queries_flush(ds);
784
Emeric Brund20dc212021-10-19 15:40:10 +0200785 /* Ensure to remove this session from external lists
786 * Note: we are under the lock of dns_stream_server
787 * which own the heads of those lists.
788 */
789 LIST_DEL_INIT(&ds->waiter);
790 LIST_DEL_INIT(&ds->list);
791
Emeric Brunfd647d52021-02-12 20:03:38 +0100792 ds->dss->cur_conns--;
793 /* Note: this is useless to update
794 * max_active_conns here because
795 * we decrease the value
796 */
Willy Tarreau62e467c2021-10-20 11:02:13 +0200797
798 BUG_ON(!LIST_ISEMPTY(&ds->list));
799 BUG_ON(!LIST_ISEMPTY(&ds->waiter));
800 BUG_ON(!LIST_ISEMPTY(&ds->queries));
801 BUG_ON(!LIST_ISEMPTY(&ds->ring.waiters));
802 BUG_ON(!eb_is_empty(&ds->query_ids));
Emeric Brunfd647d52021-02-12 20:03:38 +0100803 pool_free(dns_session_pool, ds);
804}
805
806static struct appctx *dns_session_create(struct dns_session *ds);
807
Christopher Faulet92238512022-05-12 15:24:46 +0200808static int dns_session_init(struct appctx *appctx)
809{
810 struct dns_session *ds = appctx->svcctx;
811 struct stream *s;
812 struct sockaddr_storage *addr = NULL;
813
814 if (!sockaddr_alloc(&addr, &ds->dss->srv->addr, sizeof(ds->dss->srv->addr)))
815 goto error;
816
817 if (appctx_finalize_startup(appctx, ds->dss->srv->proxy, &BUF_NULL) == -1)
818 goto error;
819
820 s = appctx_strm(appctx);
Willy Tarreau7cb9e6c2022-05-17 19:40:40 +0200821 s->scb->dst = addr;
Christopher Faulet9a790f62023-03-16 14:40:03 +0100822 s->scb->flags |= (SC_FL_RCV_ONCE|SC_FL_NOLINGER);
Christopher Faulet92238512022-05-12 15:24:46 +0200823 s->target = &ds->dss->srv->obj_type;
824 s->flags = SF_ASSIGNED;
825
826 s->do_log = NULL;
827 s->uniq_id = 0;
828
Christopher Faulet2ca4cc12023-02-22 14:22:56 +0100829 applet_expect_no_data(appctx);
Christopher Faulet92238512022-05-12 15:24:46 +0200830 ds->appctx = appctx;
831 return 0;
832
833 error:
834 return -1;
835}
836
Emeric Brunfd647d52021-02-12 20:03:38 +0100837/*
838 * Function to release a DNS tcp session
839 */
840static void dns_session_release(struct appctx *appctx)
841{
Willy Tarreau0d626a52022-05-04 20:41:19 +0200842 struct dns_session *ds = appctx->svcctx;
Willy Tarreaue3e648c2021-02-24 17:38:46 +0100843 struct dns_stream_server *dss __maybe_unused;
Emeric Brunfd647d52021-02-12 20:03:38 +0100844
845 if (!ds)
846 return;
847
Willy Tarreaub56a8782021-10-20 14:38:43 +0200848 /* We do not call ring_appctx_detach here
849 * because we want to keep readers counters
850 * to retry a conn with a different appctx.
851 */
852 HA_RWLOCK_WRLOCK(DNS_LOCK, &ds->ring.lock);
853 LIST_DEL_INIT(&appctx->wait_entry);
854 HA_RWLOCK_WRUNLOCK(DNS_LOCK, &ds->ring.lock);
855
Emeric Brunfd647d52021-02-12 20:03:38 +0100856 dss = ds->dss;
857
858 HA_SPIN_LOCK(DNS_LOCK, &dss->lock);
859 LIST_DEL_INIT(&ds->list);
860
861 if (stopping) {
862 dns_session_free(ds);
863 HA_SPIN_UNLOCK(DNS_LOCK, &dss->lock);
864 return;
865 }
866
867 if (!ds->nb_queries) {
868 /* this is an idle session */
869 /* Note: this is useless to update max_active_sess
870 * here because we decrease idle_conns but
871 * dns_session_free decrease curconns
872 */
873
874 ds->dss->idle_conns--;
875 dns_session_free(ds);
876 HA_SPIN_UNLOCK(DNS_LOCK, &dss->lock);
877 return;
878 }
879
880 if (ds->onfly_queries == ds->nb_queries) {
881 /* the session can be released because
882 * it means that all queries AND
883 * responses are in fly */
884 dns_session_free(ds);
885 HA_SPIN_UNLOCK(DNS_LOCK, &dss->lock);
886 return;
887 }
888
Emeric Brunfd647d52021-02-12 20:03:38 +0100889 /* if there is no pending complete response
890 * message, ensure to reset
891 * message offsets if the session
892 * was closed with an incomplete pending response
893 */
Willy Tarreau2b718102021-04-21 07:32:39 +0200894 if (!LIST_INLIST(&ds->waiter))
Emeric Brunfd647d52021-02-12 20:03:38 +0100895 ds->rx_msg.len = ds->rx_msg.offset = 0;
896
897 /* we flush pending sent queries because we never
898 * have responses
899 */
900 ds->nb_queries -= ds->onfly_queries;
901 dns_queries_flush(ds);
902
903 /* reset offset to be sure to start from message start */
904 ds->tx_msg_offset = 0;
905
906 /* here the ofs and the attached counter
907 * are kept unchanged
908 */
909
910 /* Create a new appctx, We hope we can
911 * create from the release callback! */
912 ds->appctx = dns_session_create(ds);
913 if (!ds->appctx) {
914 dns_session_free(ds);
915 HA_SPIN_UNLOCK(DNS_LOCK, &dss->lock);
916 return;
917 }
918
919 if (ds->nb_queries < DNS_STREAM_MAX_PIPELINED_REQ)
Willy Tarreau2b718102021-04-21 07:32:39 +0200920 LIST_INSERT(&ds->dss->free_sess, &ds->list);
Emeric Brunfd647d52021-02-12 20:03:38 +0100921
922 HA_SPIN_UNLOCK(DNS_LOCK, &dss->lock);
923}
924
925/* DNS tcp session applet */
926static struct applet dns_session_applet = {
927 .obj_type = OBJ_TYPE_APPLET,
928 .name = "<STRMDNS>", /* used for logging */
929 .fct = dns_session_io_handler,
Christopher Faulet92238512022-05-12 15:24:46 +0200930 .init = dns_session_init,
Emeric Brunfd647d52021-02-12 20:03:38 +0100931 .release = dns_session_release,
932};
933
934/*
935 * Function used to create an appctx for a DNS session
Willy Tarreau0d626a52022-05-04 20:41:19 +0200936 * It sets its context into appctx->svcctx.
Emeric Brunfd647d52021-02-12 20:03:38 +0100937 */
938static struct appctx *dns_session_create(struct dns_session *ds)
939{
940 struct appctx *appctx;
Emeric Brunfd647d52021-02-12 20:03:38 +0100941
Christopher Faulet6095d572022-05-16 17:09:48 +0200942 appctx = appctx_new_here(&dns_session_applet, NULL);
Christopher Faulet2479e5f2022-01-19 14:50:11 +0100943 if (!appctx)
Christopher Fauleta9e8b392022-03-23 11:01:09 +0100944 goto out_close;
Willy Tarreau0d626a52022-05-04 20:41:19 +0200945 appctx->svcctx = (void *)ds;
Emeric Brunfd647d52021-02-12 20:03:38 +0100946
Christopher Faulet92238512022-05-12 15:24:46 +0200947 if (appctx_init(appctx) == -1) {
Christopher Faulet13a35e52021-12-20 15:34:16 +0100948 ha_alert("out of memory in dns_session_create().\n");
Emeric Brunfd647d52021-02-12 20:03:38 +0100949 goto out_free_appctx;
Christopher Faulet13a35e52021-12-20 15:34:16 +0100950 }
951
Emeric Brunfd647d52021-02-12 20:03:38 +0100952 return appctx;
953
954 /* Error unrolling */
Emeric Brunfd647d52021-02-12 20:03:38 +0100955 out_free_appctx:
Christopher Faulet92238512022-05-12 15:24:46 +0200956 appctx_free_on_early_error(appctx);
Emeric Brunfd647d52021-02-12 20:03:38 +0100957 out_close:
958 return NULL;
959}
960
961/* Task processing expiration of unresponded queries, this one is supposed
962 * to be stuck on the same thread than the appctx handler
963 */
Willy Tarreau144f84a2021-03-02 16:09:26 +0100964static struct task *dns_process_query_exp(struct task *t, void *context, unsigned int state)
Emeric Brunfd647d52021-02-12 20:03:38 +0100965{
966 struct dns_session *ds = (struct dns_session *)context;
967 struct dns_query *query, *queryb;
968
969 t->expire = TICK_ETERNITY;
970
971 list_for_each_entry_safe(query, queryb, &ds->queries, list) {
972 if (tick_is_expired(query->expire, now_ms)) {
973 eb32_delete(&query->qid);
Willy Tarreau2b718102021-04-21 07:32:39 +0200974 LIST_DELETE(&query->list);
Emeric Brunfd647d52021-02-12 20:03:38 +0100975 pool_free(dns_query_pool, query);
976 ds->onfly_queries--;
977 }
978 else {
979 t->expire = query->expire;
980 break;
981 }
982 }
983
984 return t;
985}
986
987/* Task processing expiration of idle sessions */
Willy Tarreau144f84a2021-03-02 16:09:26 +0100988static struct task *dns_process_idle_exp(struct task *t, void *context, unsigned int state)
Emeric Brunfd647d52021-02-12 20:03:38 +0100989{
990 struct dns_stream_server *dss = (struct dns_stream_server *)context;
991 struct dns_session *ds, *dsb;
992 int target = 0;
993 int cur_active_conns;
994
995 HA_SPIN_LOCK(DNS_LOCK, &dss->lock);
996
997
998 cur_active_conns = dss->cur_conns - dss->idle_conns;
999 if (cur_active_conns > dss->max_active_conns)
1000 dss->max_active_conns = cur_active_conns;
1001
1002 target = (dss->max_active_conns - cur_active_conns) / 2;
1003 list_for_each_entry_safe(ds, dsb, &dss->idle_sess, list) {
Christopher Faulete0f47172023-04-11 07:44:34 +02001004 if (!stopping && !target)
Emeric Brunfd647d52021-02-12 20:03:38 +01001005 break;
1006
1007 /* remove conn to pending list to ensure it won't be reused */
1008 LIST_DEL_INIT(&ds->list);
1009
1010 /* force session shutdown */
1011 ds->shutdown = 1;
1012
Ilya Shipitsin0de36ad2021-02-20 00:23:36 +05001013 /* to be sure that the appctx won't miss shutdown */
Emeric Brunfd647d52021-02-12 20:03:38 +01001014 __ha_barrier_store();
1015
1016 /* wake appctx to perform the shutdown */
1017 appctx_wakeup(ds->appctx);
1018 }
1019
1020 /* reset max to current active conns */
1021 dss->max_active_conns = cur_active_conns;
1022
1023 HA_SPIN_UNLOCK(DNS_LOCK, &dss->lock);
1024
1025 t->expire = tick_add(now_ms, 5000);
1026
1027 return t;
1028}
1029
1030struct dns_session *dns_session_new(struct dns_stream_server *dss)
1031{
1032 struct dns_session *ds;
1033
1034 if (dss->maxconn && (dss->maxconn <= dss->cur_conns))
1035 return NULL;
1036
Christopher Faulet14a60d42022-08-03 10:30:06 +02001037 ds = pool_zalloc(dns_session_pool);
Emeric Brunfd647d52021-02-12 20:03:38 +01001038 if (!ds)
1039 return NULL;
1040
1041 ds->ofs = ~0;
1042 ds->dss = dss;
1043 LIST_INIT(&ds->list);
1044 LIST_INIT(&ds->queries);
1045 LIST_INIT(&ds->waiter);
1046 ds->rx_msg.offset = ds->rx_msg.len = 0;
1047 ds->rx_msg.area = NULL;
1048 ds->tx_ring_area = NULL;
1049 ds->task_exp = NULL;
1050 ds->appctx = NULL;
1051 ds->shutdown = 0;
1052 ds->nb_queries = 0;
1053 ds->query_ids = EB_ROOT_UNIQUE;
1054 ds->rx_msg.area = pool_alloc(dns_msg_buf);
1055 if (!ds->rx_msg.area)
1056 goto error;
1057
1058 ds->tx_ring_area = pool_alloc(dns_msg_buf);
1059 if (!ds->tx_ring_area)
1060 goto error;
1061
1062 ring_init(&ds->ring, ds->tx_ring_area, DNS_TCP_MSG_RING_MAX_SIZE);
Christopher Faulet1a1b6742021-03-04 16:53:27 +01001063 /* never fail because it is the first watcher attached to the ring */
1064 DISGUISE(ring_attach(&ds->ring));
Emeric Brunfd647d52021-02-12 20:03:38 +01001065
Willy Tarreaubeeabf52021-10-01 18:23:30 +02001066 if ((ds->task_exp = task_new_here()) == NULL)
Emeric Brunfd647d52021-02-12 20:03:38 +01001067 goto error;
1068
1069 ds->task_exp->process = dns_process_query_exp;
1070 ds->task_exp->context = ds;
1071
1072 ds->appctx = dns_session_create(ds);
1073 if (!ds->appctx)
1074 goto error;
1075
1076 dss->cur_conns++;
1077
1078 return ds;
1079
1080error:
Tim Duesterhusfe83f582023-04-22 17:47:34 +02001081 task_destroy(ds->task_exp);
Tim Duesterhusc18e2442023-04-22 17:47:33 +02001082 pool_free(dns_msg_buf, ds->rx_msg.area);
1083 pool_free(dns_msg_buf, ds->tx_ring_area);
Emeric Brunfd647d52021-02-12 20:03:38 +01001084
1085 pool_free(dns_session_pool, ds);
1086
1087 return NULL;
1088}
1089
1090/*
1091 * Task used to consume pending messages from nameserver ring
1092 * and forward them to dns_session ring.
1093 * Note: If no slot found a new dns_session is allocated
1094 */
Willy Tarreau144f84a2021-03-02 16:09:26 +01001095static struct task *dns_process_req(struct task *t, void *context, unsigned int state)
Emeric Brunfd647d52021-02-12 20:03:38 +01001096{
1097 struct dns_nameserver *ns = (struct dns_nameserver *)context;
1098 struct dns_stream_server *dss = ns->stream;
1099 struct ring *ring = dss->ring_req;
1100 struct buffer *buf = &ring->buf;
1101 uint64_t msg_len;
1102 size_t len, cnt, ofs;
1103 struct dns_session *ds, *ads;
1104 HA_SPIN_LOCK(DNS_LOCK, &dss->lock);
1105
Emeric Brunfd647d52021-02-12 20:03:38 +01001106 HA_RWLOCK_RDLOCK(DNS_LOCK, &ring->lock);
1107
1108 /* explanation for the initialization below: it would be better to do
1109 * this in the parsing function but this would occasionally result in
1110 * dropped events because we'd take a reference on the oldest message
1111 * and keep it while being scheduled. Thus instead let's take it the
1112 * first time we enter here so that we have a chance to pass many
1113 * existing messages before grabbing a reference to a location. This
1114 * value cannot be produced after initialization.
1115 */
Amaury Denoyelle737d10f2023-03-07 11:18:27 +01001116 if (unlikely(dss->ofs_req == ~0)) {
Aurelien DARRAGON5a43db22023-03-07 17:45:02 +01001117 dss->ofs_req = b_peek_ofs(buf, 0);
1118 HA_ATOMIC_INC(b_orig(buf) + dss->ofs_req);
Emeric Brunfd647d52021-02-12 20:03:38 +01001119 }
1120
1121 /* we were already there, adjust the offset to be relative to
1122 * the buffer's head and remove us from the counter.
1123 */
Amaury Denoyelle737d10f2023-03-07 11:18:27 +01001124 ofs = dss->ofs_req - b_head_ofs(buf);
1125 if (dss->ofs_req < b_head_ofs(buf))
1126 ofs += b_size(buf);
1127
Emeric Brunfd647d52021-02-12 20:03:38 +01001128 BUG_ON(ofs >= buf->size);
Willy Tarreau4781b152021-04-06 13:53:36 +02001129 HA_ATOMIC_DEC(b_peek(buf, ofs));
Emeric Brunfd647d52021-02-12 20:03:38 +01001130
1131 while (ofs + 1 < b_data(buf)) {
1132 struct ist myist;
1133
1134 cnt = 1;
1135 len = b_peek_varint(buf, ofs + cnt, &msg_len);
1136 if (!len)
1137 break;
1138 cnt += len;
1139 BUG_ON(msg_len + ofs + cnt + 1 > b_data(buf));
1140 if (unlikely(msg_len > DNS_TCP_MSG_MAX_SIZE)) {
1141 /* too large a message to ever fit, let's skip it */
1142 ofs += cnt + msg_len;
1143 continue;
1144 }
1145
1146 len = b_getblk(buf, dns_msg_trash, msg_len, ofs + cnt);
1147
Tim Duesterhus92c696e2021-02-28 16:11:36 +01001148 myist = ist2(dns_msg_trash, len);
Emeric Brunfd647d52021-02-12 20:03:38 +01001149
1150 ads = NULL;
Ilya Shipitsin0de36ad2021-02-20 00:23:36 +05001151 /* try to push request into active sess with free slot */
Emeric Brunfd647d52021-02-12 20:03:38 +01001152 if (!LIST_ISEMPTY(&dss->free_sess)) {
1153 ds = LIST_NEXT(&dss->free_sess, struct dns_session *, list);
1154
1155 if (ring_write(&ds->ring, DNS_TCP_MSG_MAX_SIZE, NULL, 0, &myist, 1) > 0) {
1156 ds->nb_queries++;
1157 if (ds->nb_queries >= DNS_STREAM_MAX_PIPELINED_REQ)
1158 LIST_DEL_INIT(&ds->list);
1159 ads = ds;
1160 }
1161 else {
1162 /* it means we were unable to put a request in this slot,
1163 * it may be close to be full so we put it at the end
1164 * of free conn list */
1165 LIST_DEL_INIT(&ds->list);
Willy Tarreau2b718102021-04-21 07:32:39 +02001166 LIST_APPEND(&dss->free_sess, &ds->list);
Emeric Brunfd647d52021-02-12 20:03:38 +01001167 }
1168 }
1169
1170 if (!ads) {
Ilya Shipitsin0de36ad2021-02-20 00:23:36 +05001171 /* try to push request into idle, this one should have enough free space */
Emeric Brunfd647d52021-02-12 20:03:38 +01001172 if (!LIST_ISEMPTY(&dss->idle_sess)) {
1173 ds = LIST_NEXT(&dss->idle_sess, struct dns_session *, list);
1174
1175 /* ring is empty so this ring_write should never fail */
1176 ring_write(&ds->ring, DNS_TCP_MSG_MAX_SIZE, NULL, 0, &myist, 1);
1177 ds->nb_queries++;
1178 LIST_DEL_INIT(&ds->list);
1179
1180 ds->dss->idle_conns--;
1181
1182 /* we may have to update the max_active_conns */
1183 if (ds->dss->max_active_conns < ds->dss->cur_conns - ds->dss->idle_conns)
1184 ds->dss->max_active_conns = ds->dss->cur_conns - ds->dss->idle_conns;
1185
1186 /* since we may unable to find a free list to handle
1187 * this request, this request may be large and fill
1188 * the ring buffer so we prefer to put at the end of free
1189 * list. */
Willy Tarreau2b718102021-04-21 07:32:39 +02001190 LIST_APPEND(&dss->free_sess, &ds->list);
Emeric Brunfd647d52021-02-12 20:03:38 +01001191 ads = ds;
1192 }
1193 }
1194
Ilya Shipitsin0de36ad2021-02-20 00:23:36 +05001195 /* we didn't find a session available with large enough room */
Emeric Brunfd647d52021-02-12 20:03:38 +01001196 if (!ads) {
1197 /* allocate a new session */
1198 ads = dns_session_new(dss);
1199 if (ads) {
1200 /* ring is empty so this ring_write should never fail */
1201 ring_write(&ads->ring, DNS_TCP_MSG_MAX_SIZE, NULL, 0, &myist, 1);
1202 ads->nb_queries++;
Willy Tarreau2b718102021-04-21 07:32:39 +02001203 LIST_INSERT(&dss->free_sess, &ads->list);
Emeric Brunfd647d52021-02-12 20:03:38 +01001204 }
1205 else
1206 ns->counters->snd_error++;
1207 }
1208
1209 if (ads)
1210 ns->counters->sent++;
1211
1212 ofs += cnt + len;
1213 }
1214
Willy Tarreau4781b152021-04-06 13:53:36 +02001215 HA_ATOMIC_INC(b_peek(buf, ofs));
Amaury Denoyelle737d10f2023-03-07 11:18:27 +01001216 dss->ofs_req = b_peek_ofs(buf, ofs);
Emeric Brunfd647d52021-02-12 20:03:38 +01001217 HA_RWLOCK_RDUNLOCK(DNS_LOCK, &ring->lock);
1218
1219
1220 HA_SPIN_UNLOCK(DNS_LOCK, &dss->lock);
1221 return t;
1222}
1223
1224/*
1225 * Task used to consume response
1226 * Note: upper layer callback is called
1227 */
Willy Tarreau144f84a2021-03-02 16:09:26 +01001228static struct task *dns_process_rsp(struct task *t, void *context, unsigned int state)
Emeric Brunfd647d52021-02-12 20:03:38 +01001229{
1230 struct dns_nameserver *ns = (struct dns_nameserver *)context;
1231
1232 ns->process_responses(ns);
1233
1234 return t;
1235}
1236
1237/* Function used to initialize an TCP nameserver */
1238int dns_stream_init(struct dns_nameserver *ns, struct server *srv)
1239{
1240 struct dns_stream_server *dss = NULL;
1241
1242 dss = calloc(1, sizeof(*dss));
1243 if (!dss) {
1244 ha_alert("memory allocation error initializing dns tcp server '%s'.\n", srv->id);
1245 goto out;
1246 }
1247
1248 dss->srv = srv;
1249 dss->maxconn = srv->maxconn;
1250
1251 dss->ofs_req = ~0; /* init ring offset */
1252 dss->ring_req = ring_new(2*DNS_TCP_MSG_RING_MAX_SIZE);
1253 if (!dss->ring_req) {
1254 ha_alert("memory allocation error initializing the ring for dns tcp server '%s'.\n", srv->id);
1255 goto out;
1256 }
1257 /* Create the task associated to the resolver target handling conns */
Willy Tarreaubeeabf52021-10-01 18:23:30 +02001258 if ((dss->task_req = task_new_anywhere()) == NULL) {
Emeric Brunfd647d52021-02-12 20:03:38 +01001259 ha_alert("memory allocation error initializing the ring for dns tcp server '%s'.\n", srv->id);
1260 goto out;
1261 }
1262
1263 /* Update task's parameters */
1264 dss->task_req->process = dns_process_req;
1265 dss->task_req->context = ns;
1266
1267 /* attach the task as reader */
1268 if (!ring_attach(dss->ring_req)) {
1269 /* mark server attached to the ring */
1270 ha_alert("server '%s': too many watchers for ring. this should never happen.\n", srv->id);
1271 goto out;
1272 }
1273
1274 /* Create the task associated to the resolver target handling conns */
Willy Tarreaubeeabf52021-10-01 18:23:30 +02001275 if ((dss->task_rsp = task_new_anywhere()) == NULL) {
Emeric Brunfd647d52021-02-12 20:03:38 +01001276 ha_alert("memory allocation error initializing the ring for dns tcp server '%s'.\n", srv->id);
1277 goto out;
1278 }
1279
1280 /* Update task's parameters */
1281 dss->task_rsp->process = dns_process_rsp;
1282 dss->task_rsp->context = ns;
1283
1284 /* Create the task associated to the resolver target handling conns */
Willy Tarreaubeeabf52021-10-01 18:23:30 +02001285 if ((dss->task_idle = task_new_anywhere()) == NULL) {
Emeric Brunfd647d52021-02-12 20:03:38 +01001286 ha_alert("memory allocation error initializing the ring for dns tcp server '%s'.\n", srv->id);
1287 goto out;
1288 }
1289
1290 /* Update task's parameters */
1291 dss->task_idle->process = dns_process_idle_exp;
1292 dss->task_idle->context = dss;
1293 dss->task_idle->expire = tick_add(now_ms, 5000);
1294
Ilya Shipitsin0de36ad2021-02-20 00:23:36 +05001295 /* let start the task to free idle conns immediately */
Emeric Brunfd647d52021-02-12 20:03:38 +01001296 task_queue(dss->task_idle);
1297
1298 LIST_INIT(&dss->free_sess);
1299 LIST_INIT(&dss->idle_sess);
1300 LIST_INIT(&dss->wait_sess);
1301 HA_SPIN_INIT(&dss->lock);
1302 ns->stream = dss;
1303 return 0;
1304out:
1305 if (dss && dss->task_rsp)
1306 task_destroy(dss->task_rsp);
1307 if (dss && dss->task_req)
1308 task_destroy(dss->task_req);
1309 if (dss && dss->ring_req)
1310 ring_free(dss->ring_req);
1311
1312 free(dss);
Emeric Brunc9437992021-02-12 19:42:55 +01001313 return -1;
Christopher Faulet67957bd2017-09-27 11:00:59 +02001314}
1315
Emeric Brunc9437992021-02-12 19:42:55 +01001316int init_dns_buffers()
Baptiste Assmann325137d2015-04-13 23:40:55 +02001317{
Emeric Brunc9437992021-02-12 19:42:55 +01001318 dns_msg_trash = malloc(DNS_TCP_MSG_MAX_SIZE);
1319 if (!dns_msg_trash)
1320 return 0;
Baptiste Assmann325137d2015-04-13 23:40:55 +02001321
Emeric Brunc9437992021-02-12 19:42:55 +01001322 return 1;
1323}
Baptiste Assmannc1ce5f32016-05-14 11:26:22 +02001324
Emeric Brunc9437992021-02-12 19:42:55 +01001325void deinit_dns_buffers()
1326{
Willy Tarreau61cfdf42021-02-20 10:46:51 +01001327 ha_free(&dns_msg_trash);
Emeric Brunc9437992021-02-12 19:42:55 +01001328}
Emeric Brund26a6232021-01-04 13:32:20 +01001329
1330REGISTER_PER_THREAD_ALLOC(init_dns_buffers);
1331REGISTER_PER_THREAD_FREE(deinit_dns_buffers);