blob: d122a03a135c7d295805566cf8a2b49800695986 [file] [log] [blame]
Willy Tarreaue6b98942007-10-29 01:09:36 +01001/*
2 * AF_INET/AF_INET6 SOCK_STREAM protocol layer (tcp)
3 *
Willy Tarreaue8c66af2008-01-13 18:40:14 +01004 * Copyright 2000-2008 Willy Tarreau <w@1wt.eu>
Willy Tarreaue6b98942007-10-29 01:09:36 +01005 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 *
11 */
12
13#include <ctype.h>
14#include <errno.h>
15#include <fcntl.h>
16#include <stdio.h>
17#include <stdlib.h>
18#include <string.h>
19#include <time.h>
20
21#include <sys/param.h>
22#include <sys/socket.h>
23#include <sys/stat.h>
24#include <sys/types.h>
25#include <sys/un.h>
26
27#include <common/compat.h>
28#include <common/config.h>
29#include <common/debug.h>
30#include <common/errors.h>
31#include <common/memory.h>
32#include <common/mini-clist.h>
33#include <common/standard.h>
34#include <common/time.h>
35#include <common/version.h>
36
37#include <types/acl.h>
38#include <types/client.h>
39#include <types/global.h>
40#include <types/polling.h>
41#include <types/proxy.h>
42#include <types/server.h>
43
44#include <proto/acl.h>
45#include <proto/backend.h>
46#include <proto/buffers.h>
47#include <proto/fd.h>
48#include <proto/protocols.h>
49#include <proto/proto_tcp.h>
50#include <proto/queue.h>
51#include <proto/senddata.h>
52#include <proto/session.h>
53#include <proto/stream_sock.h>
54#include <proto/task.h>
55
Willy Tarreaue8c66af2008-01-13 18:40:14 +010056#ifdef CONFIG_HAP_CTTPROXY
57#include <import/ip_tproxy.h>
58#endif
59
Willy Tarreaue6b98942007-10-29 01:09:36 +010060static int tcp_bind_listeners(struct protocol *proto);
61
62/* Note: must not be declared <const> as its list will be overwritten */
63static struct protocol proto_tcpv4 = {
64 .name = "tcpv4",
65 .sock_domain = AF_INET,
66 .sock_type = SOCK_STREAM,
67 .sock_prot = IPPROTO_TCP,
68 .sock_family = AF_INET,
69 .sock_addrlen = sizeof(struct sockaddr_in),
70 .l3_addrlen = 32/8,
71 .read = &stream_sock_read,
72 .write = &stream_sock_write,
73 .bind_all = tcp_bind_listeners,
74 .unbind_all = unbind_all_listeners,
75 .enable_all = enable_all_listeners,
76 .listeners = LIST_HEAD_INIT(proto_tcpv4.listeners),
77 .nb_listeners = 0,
78};
79
80/* Note: must not be declared <const> as its list will be overwritten */
81static struct protocol proto_tcpv6 = {
82 .name = "tcpv6",
83 .sock_domain = AF_INET6,
84 .sock_type = SOCK_STREAM,
85 .sock_prot = IPPROTO_TCP,
86 .sock_family = AF_INET6,
87 .sock_addrlen = sizeof(struct sockaddr_in6),
88 .l3_addrlen = 128/8,
89 .read = &stream_sock_read,
90 .write = &stream_sock_write,
91 .bind_all = tcp_bind_listeners,
92 .unbind_all = unbind_all_listeners,
93 .enable_all = enable_all_listeners,
94 .listeners = LIST_HEAD_INIT(proto_tcpv6.listeners),
95 .nb_listeners = 0,
96};
97
Willy Tarreaue8c66af2008-01-13 18:40:14 +010098
99/* Binds ipv4 address <local> to socket <fd>, unless <flags> is set, in which
100 * case we try to bind <remote>. <flags> is a 2-bit field consisting of :
101 * - 0 : ignore remote address (may even be a NULL pointer)
102 * - 1 : use provided address
103 * - 2 : use provided port
104 * - 3 : use both
105 *
106 * The function supports multiple foreign binding methods :
107 * - linux_tproxy: we directly bind to the foreign address
108 * - cttproxy: we bind to a local address then nat.
109 * The second one can be used as a fallback for the first one.
110 * This function returns 0 when everything's OK, 1 if it could not bind, to the
111 * local address, 2 if it could not bind to the foreign address.
112 */
113int tcpv4_bind_socket(int fd, int flags, struct sockaddr_in *local, struct sockaddr_in *remote)
114{
115 struct sockaddr_in bind_addr;
116 int foreign_ok = 0;
117 int ret;
118
119#ifdef CONFIG_HAP_LINUX_TPROXY
120 static int ip_transp_working = 1;
121 if (flags && ip_transp_working) {
122 if (setsockopt(fd, SOL_IP, IP_TRANSPARENT, (char *) &one, sizeof(one)) == 0
123 || setsockopt(fd, SOL_IP, IP_FREEBIND, (char *) &one, sizeof(one)) == 0)
124 foreign_ok = 1;
125 else
126 ip_transp_working = 0;
127 }
128#endif
129 if (flags) {
130 memset(&bind_addr, 0, sizeof(bind_addr));
131 if (flags & 1)
132 bind_addr.sin_addr = remote->sin_addr;
133 if (flags & 2)
134 bind_addr.sin_port = remote->sin_port;
135 }
136
137 setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, (char *) &one, sizeof(one));
138 if (foreign_ok) {
139 ret = bind(fd, (struct sockaddr *)&bind_addr, sizeof(bind_addr));
140 if (ret < 0)
141 return 2;
142 }
143 else {
144 ret = bind(fd, (struct sockaddr *)local, sizeof(*local));
145 if (ret < 0)
146 return 1;
147 }
148
149 if (!flags)
150 return 0;
151
152#ifdef CONFIG_HAP_CTTPROXY
153 if (!foreign_ok) {
154 struct in_tproxy itp1, itp2;
155 memset(&itp1, 0, sizeof(itp1));
156
157 itp1.op = TPROXY_ASSIGN;
158 itp1.v.addr.faddr = bind_addr.sin_addr;
159 itp1.v.addr.fport = bind_addr.sin_port;
160
161 /* set connect flag on socket */
162 itp2.op = TPROXY_FLAGS;
163 itp2.v.flags = ITP_CONNECT | ITP_ONCE;
164
165 if (setsockopt(fd, SOL_IP, IP_TPROXY, &itp1, sizeof(itp1)) != -1 &&
166 setsockopt(fd, SOL_IP, IP_TPROXY, &itp2, sizeof(itp2)) != -1) {
167 foreign_ok = 1;
168 }
169 }
170#endif
171 if (!foreign_ok)
172 /* we could not bind to a foreign address */
173 return 2;
174
175 return 0;
176}
Willy Tarreaue6b98942007-10-29 01:09:36 +0100177
178/* This function tries to bind a TCPv4/v6 listener. It may return a warning or
179 * an error message in <err> if the message is at most <errlen> bytes long
180 * (including '\0'). The return value is composed from ERR_ABORT, ERR_WARN,
181 * ERR_ALERT, ERR_RETRYABLE and ERR_FATAL. ERR_NONE indicates that everything
182 * was alright and that no message was returned. ERR_RETRYABLE means that an
183 * error occurred but that it may vanish after a retry (eg: port in use), and
184 * ERR_FATAL indicates a non-fixable error.ERR_WARN and ERR_ALERT do not alter
185 * the meaning of the error, but just indicate that a message is present which
186 * should be displayed with the respective level. Last, ERR_ABORT indicates
187 * that it's pointless to try to start other listeners. No error message is
188 * returned if errlen is NULL.
189 */
190int tcp_bind_listener(struct listener *listener, char *errmsg, int errlen)
191{
192 __label__ tcp_return, tcp_close_return;
193 int fd, err;
194 const char *msg = NULL;
195
196 /* ensure we never return garbage */
197 if (errmsg && errlen)
198 *errmsg = 0;
199
200 if (listener->state != LI_ASSIGNED)
201 return ERR_NONE; /* already bound */
202
203 err = ERR_NONE;
204
205 if ((fd = socket(listener->addr.ss_family, SOCK_STREAM, IPPROTO_TCP)) == -1) {
206 err |= ERR_RETRYABLE | ERR_ALERT;
207 msg = "cannot create listening socket";
208 goto tcp_return;
209 }
210
211 if (fd >= global.maxsock) {
212 err |= ERR_FATAL | ERR_ABORT | ERR_ALERT;
213 msg = "not enough free sockets (raise '-n' parameter)";
214 goto tcp_close_return;
215 }
216
217 if ((fcntl(fd, F_SETFL, O_NONBLOCK) == -1) ||
218 (setsockopt(fd, IPPROTO_TCP, TCP_NODELAY,
219 (char *) &one, sizeof(one)) == -1)) {
220 err |= ERR_FATAL | ERR_ALERT;
221 msg = "cannot make socket non-blocking";
222 goto tcp_close_return;
223 }
224
225 if (setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, (char *) &one, sizeof(one)) == -1) {
226 /* not fatal but should be reported */
227 msg = "cannot do so_reuseaddr";
228 err |= ERR_ALERT;
229 }
230
231 if (listener->options & LI_O_NOLINGER)
232 setsockopt(fd, SOL_SOCKET, SO_LINGER, (struct linger *) &nolinger, sizeof(struct linger));
233
234#ifdef SO_REUSEPORT
235 /* OpenBSD supports this. As it's present in old libc versions of Linux,
236 * it might return an error that we will silently ignore.
237 */
238 setsockopt(fd, SOL_SOCKET, SO_REUSEPORT, (char *) &one, sizeof(one));
239#endif
Willy Tarreaub1e52e82008-01-13 14:49:51 +0100240#ifdef CONFIG_HAP_LINUX_TPROXY
241 if ((listener->options & LI_O_FOREIGN)
Willy Tarreau0a459892008-01-13 17:37:16 +0100242 && (setsockopt(fd, SOL_IP, IP_TRANSPARENT, (char *) &one, sizeof(one)) == -1)
243 && (setsockopt(fd, SOL_IP, IP_FREEBIND, (char *) &one, sizeof(one)) == -1)) {
Willy Tarreaub1e52e82008-01-13 14:49:51 +0100244 msg = "cannot make listening socket transparent";
245 err |= ERR_ALERT;
246 }
247#endif
Willy Tarreaue6b98942007-10-29 01:09:36 +0100248 if (bind(fd, (struct sockaddr *)&listener->addr, listener->proto->sock_addrlen) == -1) {
249 err |= ERR_RETRYABLE | ERR_ALERT;
250 msg = "cannot bind socket";
251 goto tcp_close_return;
252 }
253
Willy Tarreauc73ce2b2008-01-06 10:55:10 +0100254 if (listen(fd, listener->backlog ? listener->backlog : listener->maxconn) == -1) {
Willy Tarreaue6b98942007-10-29 01:09:36 +0100255 err |= ERR_RETRYABLE | ERR_ALERT;
256 msg = "cannot listen to socket";
257 goto tcp_close_return;
258 }
259
260 /* the socket is ready */
261 listener->fd = fd;
262 listener->state = LI_LISTEN;
263
264 /* the function for the accept() event */
265 fd_insert(fd);
266 fdtab[fd].cb[DIR_RD].f = listener->accept;
267 fdtab[fd].cb[DIR_WR].f = NULL; /* never called */
268 fdtab[fd].cb[DIR_RD].b = fdtab[fd].cb[DIR_WR].b = NULL;
269 fdtab[fd].owner = (struct task *)listener; /* reference the listener instead of a task */
270 fdtab[fd].state = FD_STLISTEN;
271 fdtab[fd].peeraddr = NULL;
272 fdtab[fd].peerlen = 0;
273 fdtab[fd].listener = NULL;
Willy Tarreaue6b98942007-10-29 01:09:36 +0100274 tcp_return:
275 if (msg && errlen)
276 strlcpy2(errmsg, msg, errlen);
277 return err;
278
279 tcp_close_return:
280 close(fd);
281 goto tcp_return;
282}
283
284/* This function creates all TCP sockets bound to the protocol entry <proto>.
285 * It is intended to be used as the protocol's bind_all() function.
286 * The sockets will be registered but not added to any fd_set, in order not to
287 * loose them across the fork(). A call to enable_all_listeners() is needed
288 * to complete initialization. The return value is composed from ERR_*.
289 */
290static int tcp_bind_listeners(struct protocol *proto)
291{
292 struct listener *listener;
293 int err = ERR_NONE;
294
295 list_for_each_entry(listener, &proto->listeners, proto_list) {
296 err |= tcp_bind_listener(listener, NULL, 0);
297 if ((err & ERR_CODE) == ERR_ABORT)
298 break;
299 }
300
301 return err;
302}
303
304/* Add listener to the list of tcpv4 listeners. The listener's state
305 * is automatically updated from LI_INIT to LI_ASSIGNED. The number of
306 * listeners is updated. This is the function to use to add a new listener.
307 */
308void tcpv4_add_listener(struct listener *listener)
309{
310 if (listener->state != LI_INIT)
311 return;
312 listener->state = LI_ASSIGNED;
313 listener->proto = &proto_tcpv4;
314 LIST_ADDQ(&proto_tcpv4.listeners, &listener->proto_list);
315 proto_tcpv4.nb_listeners++;
316}
317
318/* Add listener to the list of tcpv4 listeners. The listener's state
319 * is automatically updated from LI_INIT to LI_ASSIGNED. The number of
320 * listeners is updated. This is the function to use to add a new listener.
321 */
322void tcpv6_add_listener(struct listener *listener)
323{
324 if (listener->state != LI_INIT)
325 return;
326 listener->state = LI_ASSIGNED;
327 listener->proto = &proto_tcpv6;
328 LIST_ADDQ(&proto_tcpv6.listeners, &listener->proto_list);
329 proto_tcpv6.nb_listeners++;
330}
331
332__attribute__((constructor))
333static void __tcp_protocol_init(void)
334{
335 protocol_register(&proto_tcpv4);
336 protocol_register(&proto_tcpv6);
337}
338
339
340/*
341 * Local variables:
342 * c-indent-level: 8
343 * c-basic-offset: 8
344 * End:
345 */