blob: cc7e50c0e8480f3890f953f6a30f0cea65648f8b [file] [log] [blame]
Willy Tarreaue6b98942007-10-29 01:09:36 +01001/*
2 * AF_INET/AF_INET6 SOCK_STREAM protocol layer (tcp)
3 *
Willy Tarreaue8c66af2008-01-13 18:40:14 +01004 * Copyright 2000-2008 Willy Tarreau <w@1wt.eu>
Willy Tarreaue6b98942007-10-29 01:09:36 +01005 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 *
11 */
12
13#include <ctype.h>
14#include <errno.h>
15#include <fcntl.h>
16#include <stdio.h>
17#include <stdlib.h>
18#include <string.h>
19#include <time.h>
20
21#include <sys/param.h>
22#include <sys/socket.h>
23#include <sys/stat.h>
24#include <sys/types.h>
25#include <sys/un.h>
26
Willy Tarreaub6866442008-07-14 23:54:42 +020027#include <common/cfgparse.h>
Willy Tarreaue6b98942007-10-29 01:09:36 +010028#include <common/compat.h>
29#include <common/config.h>
30#include <common/debug.h>
31#include <common/errors.h>
32#include <common/memory.h>
33#include <common/mini-clist.h>
34#include <common/standard.h>
35#include <common/time.h>
36#include <common/version.h>
37
38#include <types/acl.h>
Willy Tarreaue6b98942007-10-29 01:09:36 +010039#include <types/global.h>
40#include <types/polling.h>
41#include <types/proxy.h>
42#include <types/server.h>
43
44#include <proto/acl.h>
45#include <proto/backend.h>
46#include <proto/buffers.h>
47#include <proto/fd.h>
48#include <proto/protocols.h>
49#include <proto/proto_tcp.h>
Willy Tarreaub6866442008-07-14 23:54:42 +020050#include <proto/proxy.h>
Willy Tarreaue6b98942007-10-29 01:09:36 +010051#include <proto/queue.h>
52#include <proto/senddata.h>
53#include <proto/session.h>
54#include <proto/stream_sock.h>
55#include <proto/task.h>
56
Willy Tarreaue8c66af2008-01-13 18:40:14 +010057#ifdef CONFIG_HAP_CTTPROXY
58#include <import/ip_tproxy.h>
59#endif
60
Willy Tarreaue6b98942007-10-29 01:09:36 +010061static int tcp_bind_listeners(struct protocol *proto);
62
63/* Note: must not be declared <const> as its list will be overwritten */
64static struct protocol proto_tcpv4 = {
65 .name = "tcpv4",
66 .sock_domain = AF_INET,
67 .sock_type = SOCK_STREAM,
68 .sock_prot = IPPROTO_TCP,
69 .sock_family = AF_INET,
70 .sock_addrlen = sizeof(struct sockaddr_in),
71 .l3_addrlen = 32/8,
72 .read = &stream_sock_read,
73 .write = &stream_sock_write,
74 .bind_all = tcp_bind_listeners,
75 .unbind_all = unbind_all_listeners,
76 .enable_all = enable_all_listeners,
77 .listeners = LIST_HEAD_INIT(proto_tcpv4.listeners),
78 .nb_listeners = 0,
79};
80
81/* Note: must not be declared <const> as its list will be overwritten */
82static struct protocol proto_tcpv6 = {
83 .name = "tcpv6",
84 .sock_domain = AF_INET6,
85 .sock_type = SOCK_STREAM,
86 .sock_prot = IPPROTO_TCP,
87 .sock_family = AF_INET6,
88 .sock_addrlen = sizeof(struct sockaddr_in6),
89 .l3_addrlen = 128/8,
90 .read = &stream_sock_read,
91 .write = &stream_sock_write,
92 .bind_all = tcp_bind_listeners,
93 .unbind_all = unbind_all_listeners,
94 .enable_all = enable_all_listeners,
95 .listeners = LIST_HEAD_INIT(proto_tcpv6.listeners),
96 .nb_listeners = 0,
97};
98
Willy Tarreaue8c66af2008-01-13 18:40:14 +010099
100/* Binds ipv4 address <local> to socket <fd>, unless <flags> is set, in which
101 * case we try to bind <remote>. <flags> is a 2-bit field consisting of :
102 * - 0 : ignore remote address (may even be a NULL pointer)
103 * - 1 : use provided address
104 * - 2 : use provided port
105 * - 3 : use both
106 *
107 * The function supports multiple foreign binding methods :
108 * - linux_tproxy: we directly bind to the foreign address
109 * - cttproxy: we bind to a local address then nat.
110 * The second one can be used as a fallback for the first one.
111 * This function returns 0 when everything's OK, 1 if it could not bind, to the
112 * local address, 2 if it could not bind to the foreign address.
113 */
114int tcpv4_bind_socket(int fd, int flags, struct sockaddr_in *local, struct sockaddr_in *remote)
115{
116 struct sockaddr_in bind_addr;
117 int foreign_ok = 0;
118 int ret;
119
120#ifdef CONFIG_HAP_LINUX_TPROXY
121 static int ip_transp_working = 1;
122 if (flags && ip_transp_working) {
123 if (setsockopt(fd, SOL_IP, IP_TRANSPARENT, (char *) &one, sizeof(one)) == 0
124 || setsockopt(fd, SOL_IP, IP_FREEBIND, (char *) &one, sizeof(one)) == 0)
125 foreign_ok = 1;
126 else
127 ip_transp_working = 0;
128 }
129#endif
130 if (flags) {
131 memset(&bind_addr, 0, sizeof(bind_addr));
132 if (flags & 1)
133 bind_addr.sin_addr = remote->sin_addr;
134 if (flags & 2)
135 bind_addr.sin_port = remote->sin_port;
136 }
137
138 setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, (char *) &one, sizeof(one));
139 if (foreign_ok) {
140 ret = bind(fd, (struct sockaddr *)&bind_addr, sizeof(bind_addr));
141 if (ret < 0)
142 return 2;
143 }
144 else {
145 ret = bind(fd, (struct sockaddr *)local, sizeof(*local));
146 if (ret < 0)
147 return 1;
148 }
149
150 if (!flags)
151 return 0;
152
153#ifdef CONFIG_HAP_CTTPROXY
154 if (!foreign_ok) {
155 struct in_tproxy itp1, itp2;
156 memset(&itp1, 0, sizeof(itp1));
157
158 itp1.op = TPROXY_ASSIGN;
159 itp1.v.addr.faddr = bind_addr.sin_addr;
160 itp1.v.addr.fport = bind_addr.sin_port;
161
162 /* set connect flag on socket */
163 itp2.op = TPROXY_FLAGS;
164 itp2.v.flags = ITP_CONNECT | ITP_ONCE;
165
166 if (setsockopt(fd, SOL_IP, IP_TPROXY, &itp1, sizeof(itp1)) != -1 &&
167 setsockopt(fd, SOL_IP, IP_TPROXY, &itp2, sizeof(itp2)) != -1) {
168 foreign_ok = 1;
169 }
170 }
171#endif
172 if (!foreign_ok)
173 /* we could not bind to a foreign address */
174 return 2;
175
176 return 0;
177}
Willy Tarreaue6b98942007-10-29 01:09:36 +0100178
179/* This function tries to bind a TCPv4/v6 listener. It may return a warning or
180 * an error message in <err> if the message is at most <errlen> bytes long
181 * (including '\0'). The return value is composed from ERR_ABORT, ERR_WARN,
182 * ERR_ALERT, ERR_RETRYABLE and ERR_FATAL. ERR_NONE indicates that everything
183 * was alright and that no message was returned. ERR_RETRYABLE means that an
184 * error occurred but that it may vanish after a retry (eg: port in use), and
185 * ERR_FATAL indicates a non-fixable error.ERR_WARN and ERR_ALERT do not alter
186 * the meaning of the error, but just indicate that a message is present which
187 * should be displayed with the respective level. Last, ERR_ABORT indicates
188 * that it's pointless to try to start other listeners. No error message is
189 * returned if errlen is NULL.
190 */
191int tcp_bind_listener(struct listener *listener, char *errmsg, int errlen)
192{
193 __label__ tcp_return, tcp_close_return;
194 int fd, err;
195 const char *msg = NULL;
196
197 /* ensure we never return garbage */
198 if (errmsg && errlen)
199 *errmsg = 0;
200
201 if (listener->state != LI_ASSIGNED)
202 return ERR_NONE; /* already bound */
203
204 err = ERR_NONE;
205
206 if ((fd = socket(listener->addr.ss_family, SOCK_STREAM, IPPROTO_TCP)) == -1) {
207 err |= ERR_RETRYABLE | ERR_ALERT;
208 msg = "cannot create listening socket";
209 goto tcp_return;
210 }
211
212 if (fd >= global.maxsock) {
213 err |= ERR_FATAL | ERR_ABORT | ERR_ALERT;
214 msg = "not enough free sockets (raise '-n' parameter)";
215 goto tcp_close_return;
216 }
217
218 if ((fcntl(fd, F_SETFL, O_NONBLOCK) == -1) ||
219 (setsockopt(fd, IPPROTO_TCP, TCP_NODELAY,
220 (char *) &one, sizeof(one)) == -1)) {
221 err |= ERR_FATAL | ERR_ALERT;
222 msg = "cannot make socket non-blocking";
223 goto tcp_close_return;
224 }
225
226 if (setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, (char *) &one, sizeof(one)) == -1) {
227 /* not fatal but should be reported */
228 msg = "cannot do so_reuseaddr";
229 err |= ERR_ALERT;
230 }
231
232 if (listener->options & LI_O_NOLINGER)
233 setsockopt(fd, SOL_SOCKET, SO_LINGER, (struct linger *) &nolinger, sizeof(struct linger));
234
235#ifdef SO_REUSEPORT
236 /* OpenBSD supports this. As it's present in old libc versions of Linux,
237 * it might return an error that we will silently ignore.
238 */
239 setsockopt(fd, SOL_SOCKET, SO_REUSEPORT, (char *) &one, sizeof(one));
240#endif
Willy Tarreaub1e52e82008-01-13 14:49:51 +0100241#ifdef CONFIG_HAP_LINUX_TPROXY
242 if ((listener->options & LI_O_FOREIGN)
Willy Tarreau0a459892008-01-13 17:37:16 +0100243 && (setsockopt(fd, SOL_IP, IP_TRANSPARENT, (char *) &one, sizeof(one)) == -1)
244 && (setsockopt(fd, SOL_IP, IP_FREEBIND, (char *) &one, sizeof(one)) == -1)) {
Willy Tarreaub1e52e82008-01-13 14:49:51 +0100245 msg = "cannot make listening socket transparent";
246 err |= ERR_ALERT;
247 }
248#endif
Willy Tarreaue6b98942007-10-29 01:09:36 +0100249 if (bind(fd, (struct sockaddr *)&listener->addr, listener->proto->sock_addrlen) == -1) {
250 err |= ERR_RETRYABLE | ERR_ALERT;
251 msg = "cannot bind socket";
252 goto tcp_close_return;
253 }
254
Willy Tarreauc73ce2b2008-01-06 10:55:10 +0100255 if (listen(fd, listener->backlog ? listener->backlog : listener->maxconn) == -1) {
Willy Tarreaue6b98942007-10-29 01:09:36 +0100256 err |= ERR_RETRYABLE | ERR_ALERT;
257 msg = "cannot listen to socket";
258 goto tcp_close_return;
259 }
260
261 /* the socket is ready */
262 listener->fd = fd;
263 listener->state = LI_LISTEN;
264
265 /* the function for the accept() event */
266 fd_insert(fd);
267 fdtab[fd].cb[DIR_RD].f = listener->accept;
268 fdtab[fd].cb[DIR_WR].f = NULL; /* never called */
269 fdtab[fd].cb[DIR_RD].b = fdtab[fd].cb[DIR_WR].b = NULL;
270 fdtab[fd].owner = (struct task *)listener; /* reference the listener instead of a task */
271 fdtab[fd].state = FD_STLISTEN;
272 fdtab[fd].peeraddr = NULL;
273 fdtab[fd].peerlen = 0;
274 fdtab[fd].listener = NULL;
Willy Tarreaue6b98942007-10-29 01:09:36 +0100275 tcp_return:
276 if (msg && errlen)
277 strlcpy2(errmsg, msg, errlen);
278 return err;
279
280 tcp_close_return:
281 close(fd);
282 goto tcp_return;
283}
284
285/* This function creates all TCP sockets bound to the protocol entry <proto>.
286 * It is intended to be used as the protocol's bind_all() function.
287 * The sockets will be registered but not added to any fd_set, in order not to
288 * loose them across the fork(). A call to enable_all_listeners() is needed
289 * to complete initialization. The return value is composed from ERR_*.
290 */
291static int tcp_bind_listeners(struct protocol *proto)
292{
293 struct listener *listener;
294 int err = ERR_NONE;
295
296 list_for_each_entry(listener, &proto->listeners, proto_list) {
297 err |= tcp_bind_listener(listener, NULL, 0);
298 if ((err & ERR_CODE) == ERR_ABORT)
299 break;
300 }
301
302 return err;
303}
304
305/* Add listener to the list of tcpv4 listeners. The listener's state
306 * is automatically updated from LI_INIT to LI_ASSIGNED. The number of
307 * listeners is updated. This is the function to use to add a new listener.
308 */
309void tcpv4_add_listener(struct listener *listener)
310{
311 if (listener->state != LI_INIT)
312 return;
313 listener->state = LI_ASSIGNED;
314 listener->proto = &proto_tcpv4;
315 LIST_ADDQ(&proto_tcpv4.listeners, &listener->proto_list);
316 proto_tcpv4.nb_listeners++;
317}
318
319/* Add listener to the list of tcpv4 listeners. The listener's state
320 * is automatically updated from LI_INIT to LI_ASSIGNED. The number of
321 * listeners is updated. This is the function to use to add a new listener.
322 */
323void tcpv6_add_listener(struct listener *listener)
324{
325 if (listener->state != LI_INIT)
326 return;
327 listener->state = LI_ASSIGNED;
328 listener->proto = &proto_tcpv6;
329 LIST_ADDQ(&proto_tcpv6.listeners, &listener->proto_list);
330 proto_tcpv6.nb_listeners++;
331}
332
Willy Tarreaub6866442008-07-14 23:54:42 +0200333/* This function should be called to parse a line starting with the "tcp-request"
334 * keyword.
335 */
336static int tcp_parse_tcp_req(char **args, int section_type, struct proxy *curpx,
337 struct proxy *defpx, char *err, int errlen)
338{
339 const char *ptr = NULL;
340 int val;
341 int retlen;
342
343 if (!*args[1]) {
344 snprintf(err, errlen, "missing argument for '%s' in %s '%s'",
345 args[0], proxy_type_str(proxy), curpx->id);
346 return -1;
347 }
348
349 if (!strcmp(args[1], "inspect-delay")) {
350 if (curpx == defpx) {
351 snprintf(err, errlen, "%s %s is not allowed in 'defaults' sections",
352 args[0], args[1]);
353 return -1;
354 }
355
356 if (!(curpx->cap & PR_CAP_FE)) {
357 snprintf(err, errlen, "%s %s will be ignored because %s '%s' has no %s capability",
358 args[0], args[1], proxy_type_str(proxy), curpx->id,
359 "frontend");
360 return 1;
361 }
362
363 if (!*args[2] || (ptr = parse_time_err(args[2], &val, TIME_UNIT_MS))) {
364 retlen = snprintf(err, errlen,
365 "'%s %s' expects a positive delay in milliseconds, in %s '%s'",
366 args[0], args[1], proxy_type_str(proxy), curpx->id);
367 if (ptr && retlen < errlen)
368 retlen += snprintf(err+retlen, errlen - retlen,
369 " (unexpected character '%c')", *ptr);
370 return -1;
371 }
372
373 if (curpx->tcp_req.inspect_delay) {
374 snprintf(err, errlen, "ignoring %s %s (was already defined) in %s '%s'",
375 args[0], args[1], proxy_type_str(proxy), curpx->id);
376 return 1;
377 }
378 curpx->tcp_req.inspect_delay = val;
379 return 0;
380 }
381
382 if (!strcmp(args[1], "content")) {
383 int action;
384 int pol = ACL_COND_NONE;
385 struct acl_cond *cond;
386 struct tcp_rule *rule;
387
388 if (curpx == defpx) {
389 snprintf(err, errlen, "%s %s is not allowed in 'defaults' sections",
390 args[0], args[1]);
391 return -1;
392 }
393
394 if (!strcmp(args[2], "accept"))
395 action = TCP_ACT_ACCEPT;
396 else if (!strcmp(args[2], "reject"))
397 action = TCP_ACT_REJECT;
398 else {
399 retlen = snprintf(err, errlen,
400 "'%s %s' expects 'accept' or 'reject', in %s '%s' (was '%s')",
401 args[0], args[1], proxy_type_str(curpx), curpx->id, args[2]);
402 return -1;
403 }
404
405 pol = ACL_COND_NONE;
406 cond = NULL;
407
408 if (!strcmp(args[3], "if"))
409 pol = ACL_COND_IF;
410 else if (!strcmp(args[3], "unless"))
411 pol = ACL_COND_UNLESS;
412
413 /* Note: we consider "if TRUE" when there is no condition */
414 if (pol != ACL_COND_NONE &&
415 (cond = parse_acl_cond((const char **)args+4, &curpx->acl, pol)) == NULL) {
416 retlen = snprintf(err, errlen,
417 "Error detected in %s '%s' while parsing '%s' condition",
418 proxy_type_str(curpx), curpx->id, args[3]);
419 return -1;
420 }
421
422 rule = (struct tcp_rule *)calloc(1, sizeof(*rule));
423 rule->cond = cond;
424 rule->action = action;
425 LIST_INIT(&rule->list);
426 LIST_ADDQ(&curpx->tcp_req.inspect_rules, &rule->list);
427 return 0;
428 }
429
430 snprintf(err, errlen, "unknown argument '%s' after '%s' in %s '%s'",
431 args[1], args[0], proxy_type_str(proxy), curpx->id);
432 return -1;
433}
434
435/* return the number of bytes in the request buffer */
436static int
437acl_fetch_req_len(struct proxy *px, struct session *l4, void *l7, int dir,
438 struct acl_expr *expr, struct acl_test *test)
439{
440 if (!l4 || !l4->req)
441 return 0;
442
443 test->i = l4->req->l;
444 test->flags = ACL_TEST_F_VOLATILE | ACL_TEST_F_MAY_CHANGE;
445 return 1;
446}
447
Willy Tarreau655e26a2008-07-15 18:58:05 +0200448/* Return the version of the SSL protocol in the request. It supports both
449 * SSLv3 (TLSv1) header format for any message, and SSLv2 header format for
450 * the hello message. The SSLv3 format is described in RFC 2246 p49, and the
451 * SSLv2 format is described here, and completed p67 of RFC 2246 :
452 * http://wp.netscape.com/eng/security/SSL_2.html
453 *
454 * Note: this decoder only works with non-wrapping data.
455 */
456static int
457acl_fetch_req_ssl_ver(struct proxy *px, struct session *l4, void *l7, int dir,
458 struct acl_expr *expr, struct acl_test *test)
459{
460 int version, bleft, msg_len;
461 const unsigned char *data;
462
463 if (!l4 || !l4->req)
464 return 0;
465
466 msg_len = 0;
467 bleft = l4->req->l;
468 if (!bleft)
469 goto too_short;
470
471 data = l4->req->w;
472 if ((*data >= 0x14 && *data <= 0x17) || (*data == 0xFF)) {
473 /* SSLv3 header format */
474 if (bleft < 5)
475 goto too_short;
476
477 version = (data[1] << 16) + data[2]; /* version: major, minor */
478 msg_len = (data[3] << 8) + data[4]; /* record length */
479
480 /* format introduced with SSLv3 */
481 if (version < 0x00030000)
482 goto not_ssl;
483
484 /* message length between 1 and 2^14 + 2048 */
485 if (msg_len < 1 || msg_len > ((1<<14) + 2048))
486 goto not_ssl;
487
488 bleft -= 5; data += 5;
489 } else {
490 /* SSLv2 header format, only supported for hello (msg type 1) */
491 int rlen, plen, cilen, silen, chlen;
492
493 if (*data & 0x80) {
494 if (bleft < 3)
495 goto too_short;
496 /* short header format : 15 bits for length */
497 rlen = ((data[0] & 0x7F) << 8) | data[1];
498 plen = 0;
499 bleft -= 2; data += 2;
500 } else {
501 if (bleft < 4)
502 goto too_short;
503 /* long header format : 14 bits for length + pad length */
504 rlen = ((data[0] & 0x3F) << 8) | data[1];
505 plen = data[2];
506 bleft -= 3; data += 2;
507 }
508
509 if (*data != 0x01)
510 goto not_ssl;
511 bleft--; data++;
512
513 if (bleft < 8)
514 goto too_short;
515 version = (data[0] << 16) + data[1]; /* version: major, minor */
516 cilen = (data[2] << 8) + data[3]; /* cipher len, multiple of 3 */
517 silen = (data[4] << 8) + data[5]; /* session_id_len: 0 or 16 */
518 chlen = (data[6] << 8) + data[7]; /* 16<=challenge length<=32 */
519
520 bleft -= 8; data += 8;
521 if (cilen % 3 != 0)
522 goto not_ssl;
523 if (silen && silen != 16)
524 goto not_ssl;
525 if (chlen < 16 || chlen > 32)
526 goto not_ssl;
527 if (rlen != 9 + cilen + silen + chlen)
528 goto not_ssl;
529
530 /* focus on the remaining data length */
531 msg_len = cilen + silen + chlen + plen;
532 }
533 /* We could recursively check that the buffer ends exactly on an SSL
534 * fragment boundary and that a possible next segment is still SSL,
535 * but that's a bit pointless. However, we could still check that
536 * all the part of the request which fits in a buffer is already
537 * there.
538 */
539 if (msg_len > l4->req->rlim - l4->req->w)
540 msg_len = l4->req->rlim - l4->req->w;
541
542 if (bleft < msg_len)
543 goto too_short;
544
545 /* OK that's enough. We have at least the whole message, and we have
546 * the protocol version.
547 */
548 test->i = version;
549 test->flags = ACL_TEST_F_VOLATILE;
550 return 1;
551
552 too_short:
553 test->flags = ACL_TEST_F_MAY_CHANGE;
554 not_ssl:
555 return 0;
556}
557
Willy Tarreaub6866442008-07-14 23:54:42 +0200558
559static struct cfg_kw_list cfg_kws = {{ },{
560 { CFG_LISTEN, "tcp-request", tcp_parse_tcp_req },
561 { 0, NULL, NULL },
562}};
563
564static struct acl_kw_list acl_kws = {{ },{
Willy Tarreau655e26a2008-07-15 18:58:05 +0200565 { "req_len", acl_parse_int, acl_fetch_req_len, acl_match_int },
566 { "req_ssl_ver", acl_parse_dotted_ver, acl_fetch_req_ssl_ver, acl_match_int },
Willy Tarreaub6866442008-07-14 23:54:42 +0200567 { NULL, NULL, NULL, NULL },
568}};
569
Willy Tarreaue6b98942007-10-29 01:09:36 +0100570__attribute__((constructor))
571static void __tcp_protocol_init(void)
572{
573 protocol_register(&proto_tcpv4);
574 protocol_register(&proto_tcpv6);
Willy Tarreaub6866442008-07-14 23:54:42 +0200575 cfg_register_keywords(&cfg_kws);
576 acl_register_keywords(&acl_kws);
Willy Tarreaue6b98942007-10-29 01:09:36 +0100577}
578
579
580/*
581 * Local variables:
582 * c-indent-level: 8
583 * c-basic-offset: 8
584 * End:
585 */