| /* |
| * HA-Proxy : High Availability-enabled HTTP/TCP proxy |
| * 2000-2006 - Willy Tarreau - willy AT meta-x DOT org. |
| * |
| * This program is free software; you can redistribute it and/or |
| * modify it under the terms of the GNU General Public License |
| * as published by the Free Software Foundation; either version |
| * 2 of the License, or (at your option) any later version. |
| * |
| * Please refer to RFC2068 or RFC2616 for informations about HTTP protocol, and |
| * RFC2965 for informations about cookies usage. More generally, the IETF HTTP |
| * Working Group's web site should be consulted for protocol related changes : |
| * |
| * http://ftp.ics.uci.edu/pub/ietf/http/ |
| * |
| * Pending bugs (may be not fixed because never reproduced) : |
| * - solaris only : sometimes, an HTTP proxy with only a dispatch address causes |
| * the proxy to terminate (no core) if the client breaks the connection during |
| * the response. Seen on 1.1.8pre4, but never reproduced. May not be related to |
| * the snprintf() bug since requests were simple (GET / HTTP/1.0), but may be |
| * related to missing setsid() (fixed in 1.1.15) |
| * - a proxy with an invalid config will prevent the startup even if disabled. |
| * |
| * ChangeLog has moved to the CHANGELOG file. |
| * |
| * TODO: |
| * - handle properly intermediate incomplete server headers. Done ? |
| * - handle hot-reconfiguration |
| * - fix client/server state transition when server is in connect or headers state |
| * and client suddenly disconnects. The server *should* switch to SHUT_WR, but |
| * still handle HTTP headers. |
| * - remove MAX_NEWHDR |
| * - cut this huge file into several ones |
| * |
| */ |
| |
| #include <stdio.h> |
| #include <stdlib.h> |
| #include <unistd.h> |
| #include <string.h> |
| #include <ctype.h> |
| #include <sys/time.h> |
| #include <sys/types.h> |
| #include <sys/socket.h> |
| #include <netinet/tcp.h> |
| #include <netinet/in.h> |
| #include <arpa/inet.h> |
| #include <netdb.h> |
| #include <fcntl.h> |
| #include <errno.h> |
| #include <signal.h> |
| #include <stdarg.h> |
| #include <sys/resource.h> |
| #include <time.h> |
| #include <syslog.h> |
| |
| #ifdef USE_PCRE |
| #include <pcre.h> |
| #include <pcreposix.h> |
| #else |
| #include <regex.h> |
| #endif |
| |
| #if defined(TPROXY) && defined(NETFILTER) |
| #include <linux/netfilter_ipv4.h> |
| #endif |
| |
| #if defined(__dietlibc__) |
| #include <strings.h> |
| #endif |
| |
| #if defined(ENABLE_POLL) |
| #include <sys/poll.h> |
| #endif |
| |
| #if defined(ENABLE_EPOLL) |
| #if !defined(USE_MY_EPOLL) |
| #include <sys/epoll.h> |
| #else |
| #include "include/epoll.h" |
| #endif |
| #endif |
| |
| #ifdef DEBUG_FULL |
| #include <assert.h> |
| #endif |
| |
| #include "include/appsession.h" |
| #include "include/mini-clist.h" |
| |
| #ifndef HAPROXY_VERSION |
| #define HAPROXY_VERSION "1.2.12" |
| #endif |
| |
| #ifndef HAPROXY_DATE |
| #define HAPROXY_DATE "2006/04/15" |
| #endif |
| |
| /* this is for libc5 for example */ |
| #ifndef TCP_NODELAY |
| #define TCP_NODELAY 1 |
| #endif |
| |
| #ifndef SHUT_RD |
| #define SHUT_RD 0 |
| #endif |
| |
| #ifndef SHUT_WR |
| #define SHUT_WR 1 |
| #endif |
| |
| /* |
| * BUFSIZE defines the size of a read and write buffer. It is the maximum |
| * amount of bytes which can be stored by the proxy for each session. However, |
| * when reading HTTP headers, the proxy needs some spare space to add or rewrite |
| * headers if needed. The size of this spare is defined with MAXREWRITE. So it |
| * is not possible to process headers longer than BUFSIZE-MAXREWRITE bytes. By |
| * default, BUFSIZE=16384 bytes and MAXREWRITE=BUFSIZE/2, so the maximum length |
| * of headers accepted is 8192 bytes, which is in line with Apache's limits. |
| */ |
| #ifndef BUFSIZE |
| #define BUFSIZE 16384 |
| #endif |
| |
| // reserved buffer space for header rewriting |
| #ifndef MAXREWRITE |
| #define MAXREWRITE (BUFSIZE / 2) |
| #endif |
| |
| #define REQURI_LEN 1024 |
| #define CAPTURE_LEN 64 |
| |
| // max # args on a configuration line |
| #define MAX_LINE_ARGS 40 |
| |
| // max # of added headers per request |
| #define MAX_NEWHDR 10 |
| |
| // max # of matches per regexp |
| #define MAX_MATCH 10 |
| |
| // cookie delimitor in "prefix" mode. This character is inserted between the |
| // persistence cookie and the original value. The '~' is allowed by RFC2965, |
| // and should not be too common in server names. |
| #ifndef COOKIE_DELIM |
| #define COOKIE_DELIM '~' |
| #endif |
| |
| #define CONN_RETRIES 3 |
| |
| #define CHK_CONNTIME 2000 |
| #define DEF_CHKINTR 2000 |
| #define DEF_FALLTIME 3 |
| #define DEF_RISETIME 2 |
| #define DEF_CHECK_REQ "OPTIONS / HTTP/1.0\r\n\r\n" |
| |
| /* Default connections limit. |
| * |
| * A system limit can be enforced at build time in order to avoid using haproxy |
| * beyond reasonable system limits. For this, just define SYSTEM_MAXCONN to the |
| * absolute limit accepted by the system. If the configuration specifies a |
| * higher value, it will be capped to SYSTEM_MAXCONN and a warning will be |
| * emitted. The only way to override this limit will be to set it via the |
| * command-line '-n' argument. |
| */ |
| #ifndef SYSTEM_MAXCONN |
| #define DEFAULT_MAXCONN 2000 |
| #else |
| #define DEFAULT_MAXCONN SYSTEM_MAXCONN |
| #endif |
| |
| /* how many bits are needed to code the size of an int (eg: 32bits -> 5) */ |
| #define INTBITS 5 |
| |
| /* show stats this every millisecond, 0 to disable */ |
| #ifndef STATTIME |
| #define STATTIME 2000 |
| #endif |
| |
| /* this reduces the number of calls to select() by choosing appropriate |
| * sheduler precision in milliseconds. It should be near the minimum |
| * time that is needed by select() to collect all events. All timeouts |
| * are rounded up by adding this value prior to pass it to select(). |
| */ |
| #define SCHEDULER_RESOLUTION 9 |
| |
| #define TIME_ETERNITY -1 |
| /* returns the lowest delay amongst <old> and <new>, and respects TIME_ETERNITY */ |
| #define MINTIME(old, new) (((new)<0)?(old):(((old)<0||(new)<(old))?(new):(old))) |
| #define SETNOW(a) (*a=now) |
| |
| /****** string-specific macros and functions ******/ |
| /* if a > max, then bound <a> to <max>. The macro returns the new <a> */ |
| #define UBOUND(a, max) ({ typeof(a) b = (max); if ((a) > b) (a) = b; (a); }) |
| |
| /* if a < min, then bound <a> to <min>. The macro returns the new <a> */ |
| #define LBOUND(a, min) ({ typeof(a) b = (min); if ((a) < b) (a) = b; (a); }) |
| |
| /* returns 1 only if only zero or one bit is set in X, which means that X is a |
| * power of 2, and 0 otherwise */ |
| #define POWEROF2(x) (((x) & ((x)-1)) == 0) |
| /* |
| * copies at most <size-1> chars from <src> to <dst>. Last char is always |
| * set to 0, unless <size> is 0. The number of chars copied is returned |
| * (excluding the terminating zero). |
| * This code has been optimized for size and speed : on x86, it's 45 bytes |
| * long, uses only registers, and consumes only 4 cycles per char. |
| */ |
| int strlcpy2(char *dst, const char *src, int size) { |
| char *orig = dst; |
| if (size) { |
| while (--size && (*dst = *src)) { |
| src++; dst++; |
| } |
| *dst = 0; |
| } |
| return dst - orig; |
| } |
| |
| /* |
| * Returns a pointer to an area of <__len> bytes taken from the pool <pool> or |
| * dynamically allocated. In the first case, <__pool> is updated to point to |
| * the next element in the list. |
| */ |
| #define pool_alloc_from(__pool, __len) ({ \ |
| void *__p; \ |
| if ((__p = (__pool)) == NULL) \ |
| __p = malloc(((__len) >= sizeof (void *)) ? (__len) : sizeof(void *)); \ |
| else { \ |
| __pool = *(void **)(__pool); \ |
| } \ |
| __p; \ |
| }) |
| |
| /* |
| * Puts a memory area back to the corresponding pool. |
| * Items are chained directly through a pointer that |
| * is written in the beginning of the memory area, so |
| * there's no need for any carrier cell. This implies |
| * that each memory area is at least as big as one |
| * pointer. |
| */ |
| #define pool_free_to(__pool, __ptr) ({ \ |
| *(void **)(__ptr) = (void *)(__pool); \ |
| __pool = (void *)(__ptr); \ |
| }) |
| |
| |
| #define MEM_OPTIM |
| #ifdef MEM_OPTIM |
| /* |
| * Returns a pointer to type <type> taken from the |
| * pool <pool_type> or dynamically allocated. In the |
| * first case, <pool_type> is updated to point to the |
| * next element in the list. |
| */ |
| #define pool_alloc(type) ({ \ |
| void *__p; \ |
| if ((__p = pool_##type) == NULL) \ |
| __p = malloc(sizeof_##type); \ |
| else { \ |
| pool_##type = *(void **)pool_##type; \ |
| } \ |
| __p; \ |
| }) |
| |
| /* |
| * Puts a memory area back to the corresponding pool. |
| * Items are chained directly through a pointer that |
| * is written in the beginning of the memory area, so |
| * there's no need for any carrier cell. This implies |
| * that each memory area is at least as big as one |
| * pointer. |
| */ |
| #define pool_free(type, ptr) ({ \ |
| *(void **)ptr = (void *)pool_##type; \ |
| pool_##type = (void *)ptr; \ |
| }) |
| |
| #else |
| #define pool_alloc(type) (calloc(1,sizeof_##type)); |
| #define pool_free(type, ptr) (free(ptr)); |
| #endif /* MEM_OPTIM */ |
| |
| #define sizeof_task sizeof(struct task) |
| #define sizeof_session sizeof(struct session) |
| #define sizeof_pendconn sizeof(struct pendconn) |
| #define sizeof_buffer sizeof(struct buffer) |
| #define sizeof_fdtab sizeof(struct fdtab) |
| #define sizeof_requri REQURI_LEN |
| #define sizeof_capture CAPTURE_LEN |
| #define sizeof_curappsession CAPTURE_LEN /* current_session pool */ |
| #define sizeof_appsess sizeof(struct appsessions) |
| |
| /* different possible states for the sockets */ |
| #define FD_STCLOSE 0 |
| #define FD_STLISTEN 1 |
| #define FD_STCONN 2 |
| #define FD_STREADY 3 |
| #define FD_STERROR 4 |
| |
| /* values for task->state */ |
| #define TASK_IDLE 0 |
| #define TASK_RUNNING 1 |
| |
| /* values for proxy->state */ |
| #define PR_STNEW 0 |
| #define PR_STIDLE 1 |
| #define PR_STRUN 2 |
| #define PR_STSTOPPED 3 |
| #define PR_STPAUSED 4 |
| |
| /* values for proxy->mode */ |
| #define PR_MODE_TCP 0 |
| #define PR_MODE_HTTP 1 |
| #define PR_MODE_HEALTH 2 |
| |
| /* possible actions for the *poll() loops */ |
| #define POLL_LOOP_ACTION_INIT 0 |
| #define POLL_LOOP_ACTION_RUN 1 |
| #define POLL_LOOP_ACTION_CLEAN 2 |
| |
| /* poll mechanisms available */ |
| #define POLL_USE_SELECT (1<<0) |
| #define POLL_USE_POLL (1<<1) |
| #define POLL_USE_EPOLL (1<<2) |
| |
| /* bits for proxy->options */ |
| #define PR_O_REDISP 0x00000001 /* allow reconnection to dispatch in case of errors */ |
| #define PR_O_TRANSP 0x00000002 /* transparent mode : use original DEST as dispatch */ |
| #define PR_O_COOK_RW 0x00000004 /* rewrite all direct cookies with the right serverid */ |
| #define PR_O_COOK_IND 0x00000008 /* keep only indirect cookies */ |
| #define PR_O_COOK_INS 0x00000010 /* insert cookies when not accessing a server directly */ |
| #define PR_O_COOK_PFX 0x00000020 /* rewrite all cookies by prefixing the right serverid */ |
| #define PR_O_COOK_ANY (PR_O_COOK_RW | PR_O_COOK_IND | PR_O_COOK_INS | PR_O_COOK_PFX) |
| #define PR_O_BALANCE_RR 0x00000040 /* balance in round-robin mode */ |
| #define PR_O_KEEPALIVE 0x00000080 /* follow keep-alive sessions */ |
| #define PR_O_FWDFOR 0x00000100 /* insert x-forwarded-for with client address */ |
| #define PR_O_BIND_SRC 0x00000200 /* bind to a specific source address when connect()ing */ |
| #define PR_O_NULLNOLOG 0x00000400 /* a connect without request will not be logged */ |
| #define PR_O_COOK_NOC 0x00000800 /* add a 'Cache-control' header with the cookie */ |
| #define PR_O_COOK_POST 0x00001000 /* don't insert cookies for requests other than a POST */ |
| #define PR_O_HTTP_CHK 0x00002000 /* use HTTP 'OPTIONS' method to check server health */ |
| #define PR_O_PERSIST 0x00004000 /* server persistence stays effective even when server is down */ |
| #define PR_O_LOGASAP 0x00008000 /* log as soon as possible, without waiting for the session to complete */ |
| #define PR_O_HTTP_CLOSE 0x00010000 /* force 'connection: close' in both directions */ |
| #define PR_O_CHK_CACHE 0x00020000 /* require examination of cacheability of the 'set-cookie' field */ |
| #define PR_O_TCP_CLI_KA 0x00040000 /* enable TCP keep-alive on client-side sessions */ |
| #define PR_O_TCP_SRV_KA 0x00080000 /* enable TCP keep-alive on server-side sessions */ |
| #define PR_O_USE_ALL_BK 0x00100000 /* load-balance between backup servers */ |
| #define PR_O_FORCE_CLO 0x00200000 /* enforce the connection close immediately after server response */ |
| #define PR_O_BALANCE_SH 0x00400000 /* balance on source IP hash */ |
| #define PR_O_BALANCE (PR_O_BALANCE_RR | PR_O_BALANCE_SH) |
| |
| /* various session flags, bits values 0x01 to 0x20 (shift 0) */ |
| #define SN_DIRECT 0x00000001 /* connection made on the server matching the client cookie */ |
| #define SN_CLDENY 0x00000002 /* a client header matches a deny regex */ |
| #define SN_CLALLOW 0x00000004 /* a client header matches an allow regex */ |
| #define SN_SVDENY 0x00000008 /* a server header matches a deny regex */ |
| #define SN_SVALLOW 0x00000010 /* a server header matches an allow regex */ |
| #define SN_POST 0x00000020 /* the request was an HTTP POST */ |
| |
| /* session flags dedicated to cookies : bits values 0x40, 0x80 (0-3 shift 6) */ |
| #define SN_CK_NONE 0x00000000 /* this session had no cookie */ |
| #define SN_CK_INVALID 0x00000040 /* this session had a cookie which matches no server */ |
| #define SN_CK_DOWN 0x00000080 /* this session had cookie matching a down server */ |
| #define SN_CK_VALID 0x000000C0 /* this session had cookie matching a valid server */ |
| #define SN_CK_MASK 0x000000C0 /* mask to get this session's cookie flags */ |
| #define SN_CK_SHIFT 6 /* bit shift */ |
| |
| /* session termination conditions, bits values 0x100 to 0x700 (0-7 shift 8) */ |
| #define SN_ERR_NONE 0x00000000 |
| #define SN_ERR_CLITO 0x00000100 /* client time-out */ |
| #define SN_ERR_CLICL 0x00000200 /* client closed (read/write error) */ |
| #define SN_ERR_SRVTO 0x00000300 /* server time-out, connect time-out */ |
| #define SN_ERR_SRVCL 0x00000400 /* server closed (connect/read/write error) */ |
| #define SN_ERR_PRXCOND 0x00000500 /* the proxy decided to close (deny...) */ |
| #define SN_ERR_RESOURCE 0x00000600 /* the proxy encountered a lack of a local resources (fd, mem, ...) */ |
| #define SN_ERR_INTERNAL 0x00000700 /* the proxy encountered an internal error */ |
| #define SN_ERR_MASK 0x00000700 /* mask to get only session error flags */ |
| #define SN_ERR_SHIFT 8 /* bit shift */ |
| |
| /* session state at termination, bits values 0x1000 to 0x7000 (0-7 shift 12) */ |
| #define SN_FINST_R 0x00001000 /* session ended during client request */ |
| #define SN_FINST_C 0x00002000 /* session ended during server connect */ |
| #define SN_FINST_H 0x00003000 /* session ended during server headers */ |
| #define SN_FINST_D 0x00004000 /* session ended during data phase */ |
| #define SN_FINST_L 0x00005000 /* session ended while pushing last data to client */ |
| #define SN_FINST_MASK 0x00007000 /* mask to get only final session state flags */ |
| #define SN_FINST_SHIFT 12 /* bit shift */ |
| |
| /* cookie information, bits values 0x10000 to 0x80000 (0-8 shift 16) */ |
| #define SN_SCK_NONE 0x00000000 /* no set-cookie seen for the server cookie */ |
| #define SN_SCK_DELETED 0x00010000 /* existing set-cookie deleted or changed */ |
| #define SN_SCK_INSERTED 0x00020000 /* new set-cookie inserted or changed existing one */ |
| #define SN_SCK_SEEN 0x00040000 /* set-cookie seen for the server cookie */ |
| #define SN_SCK_MASK 0x00070000 /* mask to get the set-cookie field */ |
| #define SN_SCK_ANY 0x00080000 /* at least one set-cookie seen (not to be counted) */ |
| #define SN_SCK_SHIFT 16 /* bit shift */ |
| |
| /* cacheability management, bits values 0x100000 to 0x300000 (0-3 shift 20) */ |
| #define SN_CACHEABLE 0x00100000 /* at least part of the response is cacheable */ |
| #define SN_CACHE_COOK 0x00200000 /* a cookie in the response is cacheable */ |
| #define SN_CACHE_SHIFT 20 /* bit shift */ |
| |
| /* various other session flags, bits values 0x400000 and above */ |
| #define SN_MONITOR 0x00400000 /* this session comes from a monitoring system */ |
| #define SN_ASSIGNED 0x00800000 /* no need to assign a server to this session */ |
| #define SN_ADDR_SET 0x01000000 /* this session's server address has been set */ |
| |
| |
| /* different possible states for the client side */ |
| #define CL_STHEADERS 0 |
| #define CL_STDATA 1 |
| #define CL_STSHUTR 2 |
| #define CL_STSHUTW 3 |
| #define CL_STCLOSE 4 |
| |
| /* different possible states for the server side */ |
| #define SV_STIDLE 0 |
| #define SV_STCPEND 1 |
| #define SV_STCONN 2 |
| #define SV_STHEADERS 3 |
| #define SV_STDATA 4 |
| #define SV_STSHUTR 5 |
| #define SV_STSHUTW 6 |
| #define SV_STCLOSE 7 |
| |
| /* result of an I/O event */ |
| #define RES_SILENT 0 /* didn't happen */ |
| #define RES_DATA 1 /* data were sent or received */ |
| #define RES_NULL 2 /* result is 0 (read == 0), or connect without need for writing */ |
| #define RES_ERROR 3 /* result -1 or error on the socket (eg: connect()) */ |
| |
| /* modes of operation (global.mode) */ |
| #define MODE_DEBUG 1 |
| #define MODE_STATS 2 |
| #define MODE_LOG 4 |
| #define MODE_DAEMON 8 |
| #define MODE_QUIET 16 |
| #define MODE_CHECK 32 |
| #define MODE_VERBOSE 64 |
| #define MODE_STARTING 128 |
| #define MODE_FOREGROUND 256 |
| |
| /* server flags */ |
| #define SRV_RUNNING 1 /* the server is UP */ |
| #define SRV_BACKUP 2 /* this server is a backup server */ |
| #define SRV_MAPPORTS 4 /* this server uses mapped ports */ |
| #define SRV_BIND_SRC 8 /* this server uses a specific source address */ |
| #define SRV_CHECKED 16 /* this server needs to be checked */ |
| |
| /* function which act on servers need to return various errors */ |
| #define SRV_STATUS_OK 0 /* everything is OK. */ |
| #define SRV_STATUS_INTERNAL 1 /* other unrecoverable errors. */ |
| #define SRV_STATUS_NOSRV 2 /* no server is available */ |
| #define SRV_STATUS_FULL 3 /* the/all server(s) are saturated */ |
| #define SRV_STATUS_QUEUED 4 /* the/all server(s) are saturated but the connection was queued */ |
| |
| /* what to do when a header matches a regex */ |
| #define ACT_ALLOW 0 /* allow the request */ |
| #define ACT_REPLACE 1 /* replace the matching header */ |
| #define ACT_REMOVE 2 /* remove the matching header */ |
| #define ACT_DENY 3 /* deny the request */ |
| #define ACT_PASS 4 /* pass this header without allowing or denying the request */ |
| |
| /* configuration sections */ |
| #define CFG_NONE 0 |
| #define CFG_GLOBAL 1 |
| #define CFG_LISTEN 2 |
| |
| /* fields that need to be logged. They appear as flags in session->logs.logwait */ |
| #define LW_DATE 1 /* date */ |
| #define LW_CLIP 2 /* CLient IP */ |
| #define LW_SVIP 4 /* SerVer IP */ |
| #define LW_SVID 8 /* server ID */ |
| #define LW_REQ 16 /* http REQuest */ |
| #define LW_RESP 32 /* http RESPonse */ |
| #define LW_PXIP 64 /* proxy IP */ |
| #define LW_PXID 128 /* proxy ID */ |
| #define LW_BYTES 256 /* bytes read from server */ |
| #define LW_COOKIE 512 /* captured cookie */ |
| #define LW_REQHDR 1024 /* request header(s) */ |
| #define LW_RSPHDR 2048 /* response header(s) */ |
| |
| #define ERR_NONE 0 /* no error */ |
| #define ERR_RETRYABLE 1 /* retryable error, may be cumulated */ |
| #define ERR_FATAL 2 /* fatal error, may be cumulated */ |
| |
| /*********************************************************************/ |
| |
| #define LIST_HEAD(a) ((void *)(&(a))) |
| |
| /*********************************************************************/ |
| |
| struct cap_hdr { |
| struct cap_hdr *next; |
| char *name; /* header name, case insensitive */ |
| int namelen; /* length of the header name, to speed-up lookups */ |
| int len; /* capture length, not including terminal zero */ |
| int index; /* index in the output array */ |
| void *pool; /* pool of pre-allocated memory area of (len+1) bytes */ |
| }; |
| |
| struct hdr_exp { |
| struct hdr_exp *next; |
| regex_t *preg; /* expression to look for */ |
| int action; /* ACT_ALLOW, ACT_REPLACE, ACT_REMOVE, ACT_DENY */ |
| char *replace; /* expression to set instead */ |
| }; |
| |
| struct buffer { |
| unsigned int l; /* data length */ |
| char *r, *w, *h, *lr; /* read ptr, write ptr, last header ptr, last read */ |
| char *rlim; /* read limit, used for header rewriting */ |
| unsigned long long total; /* total data read */ |
| char data[BUFSIZE]; |
| }; |
| |
| struct pendconn { |
| struct list list; /* chaining ... */ |
| struct session *sess; /* the session waiting for a connection */ |
| struct server *srv; /* the server we are waiting for */ |
| }; |
| |
| struct server { |
| struct server *next; |
| int state; /* server state (SRV_*) */ |
| int cklen; /* the len of the cookie, to speed up checks */ |
| char *cookie; /* the id set in the cookie */ |
| char *id; /* just for identification */ |
| struct list pendconns; /* pending connections */ |
| int nbpend; /* number of pending connections */ |
| struct sockaddr_in addr; /* the address to connect to */ |
| struct sockaddr_in source_addr; /* the address to which we want to bind for connect() */ |
| short check_port; /* the port to use for the health checks */ |
| int health; /* 0->rise-1 = bad; rise->rise+fall-1 = good */ |
| int rise, fall; /* time in iterations */ |
| int inter; /* time in milliseconds */ |
| int result; /* 0 = connect OK, -1 = connect KO */ |
| int curfd; /* file desc used for current test, or -1 if not in test */ |
| unsigned char uweight, eweight; /* user-specified weight-1, and effective weight-1 */ |
| unsigned int wscore; /* weight score, used during srv map computation */ |
| int cur_sess; /* number of currently active sessions (including syn_sent) */ |
| unsigned int cum_sess; /* cumulated number of sessions really sent to this server */ |
| unsigned int maxconn; /* max # of active sessions. 0 = unlimited. */ |
| struct proxy *proxy; /* the proxy this server belongs to */ |
| }; |
| |
| /* The base for all tasks */ |
| struct task { |
| struct task *next, *prev; /* chaining ... */ |
| struct task *rqnext; /* chaining in run queue ... */ |
| struct task *wq; /* the wait queue this task is in */ |
| int state; /* task state : IDLE or RUNNING */ |
| struct timeval expire; /* next expiration time for this task, use only for fast sorting */ |
| int (*process)(struct task *t); /* the function which processes the task */ |
| void *context; /* the task's context */ |
| }; |
| |
| /* WARNING: if new fields are added, they must be initialized in event_accept() */ |
| struct session { |
| struct task *task; /* the task associated with this session */ |
| /* application specific below */ |
| struct timeval crexpire; /* expiration date for a client read */ |
| struct timeval cwexpire; /* expiration date for a client write */ |
| struct timeval srexpire; /* expiration date for a server read */ |
| struct timeval swexpire; /* expiration date for a server write */ |
| struct timeval cnexpire; /* expiration date for a connect */ |
| char res_cr, res_cw, res_sr, res_sw;/* results of some events */ |
| struct proxy *proxy; /* the proxy this socket belongs to */ |
| int cli_fd; /* the client side fd */ |
| int srv_fd; /* the server side fd */ |
| int cli_state; /* state of the client side */ |
| int srv_state; /* state of the server side */ |
| int conn_retries; /* number of connect retries left */ |
| int flags; /* some flags describing the session */ |
| struct buffer *req; /* request buffer */ |
| struct buffer *rep; /* response buffer */ |
| struct sockaddr_storage cli_addr; /* the client address */ |
| struct sockaddr_in srv_addr; /* the address to connect to */ |
| struct server *srv; /* the server being used */ |
| struct pendconn *pend_pos; /* if not NULL, points to the position in the pending queue */ |
| char **req_cap; /* array of captured request headers (may be NULL) */ |
| char **rsp_cap; /* array of captured response headers (may be NULL) */ |
| struct { |
| int logwait; /* log fields waiting to be collected : LW_* */ |
| struct timeval tv_accept; /* date of the accept() (beginning of the session) */ |
| long t_request; /* delay before the end of the request arrives, -1 if never occurs */ |
| long t_queue; /* delay before the session gets out of the connect queue, -1 if never occurs */ |
| long t_connect; /* delay before the connect() to the server succeeds, -1 if never occurs */ |
| long t_data; /* delay before the first data byte from the server ... */ |
| unsigned long t_close; /* total session duration */ |
| unsigned long queue_size; /* overall number of sessions waiting for a connect slot on this instance at accept() time */ |
| char *uri; /* first line if log needed, NULL otherwise */ |
| char *cli_cookie; /* cookie presented by the client, in capture mode */ |
| char *srv_cookie; /* cookie presented by the server, in capture mode */ |
| int status; /* HTTP status from the server, negative if from proxy */ |
| long long bytes; /* number of bytes transferred from the server */ |
| } logs; |
| unsigned int uniq_id; /* unique ID used for the traces */ |
| }; |
| |
| struct listener { |
| int fd; /* the listen socket */ |
| struct sockaddr_storage addr; /* the address we listen to */ |
| struct listener *next; /* next address or NULL */ |
| }; |
| |
| struct proxy { |
| struct listener *listen; /* the listen addresses and sockets */ |
| struct in_addr mon_net, mon_mask; /* don't forward connections from this net (network order) FIXME: should support IPv6 */ |
| int state; /* proxy state */ |
| struct sockaddr_in dispatch_addr; /* the default address to connect to */ |
| struct server *srv; /* known servers */ |
| int srv_act, srv_bck; /* # of running servers */ |
| int tot_wact, tot_wbck; /* total weights of active and backup servers */ |
| struct server **srv_map; /* the server map used to apply weights */ |
| int srv_map_sz; /* the size of the effective server map */ |
| int srv_rr_idx; /* next server to be elected in round robin mode */ |
| char *cookie_name; /* name of the cookie to look for */ |
| int cookie_len; /* strlen(cookie_name), computed only once */ |
| char *appsession_name; /* name of the cookie to look for */ |
| int appsession_name_len; /* strlen(appsession_name), computed only once */ |
| int appsession_len; /* length of the appsession cookie value to be used */ |
| int appsession_timeout; |
| CHTbl htbl_proxy; /* Per Proxy hashtable */ |
| char *capture_name; /* beginning of the name of the cookie to capture */ |
| int capture_namelen; /* length of the cookie name to match */ |
| int capture_len; /* length of the string to be captured */ |
| int clitimeout; /* client I/O timeout (in milliseconds) */ |
| int srvtimeout; /* server I/O timeout (in milliseconds) */ |
| int contimeout; /* connect timeout (in milliseconds) */ |
| char *id; /* proxy id */ |
| struct list pendconns; /* pending connections with no server assigned yet */ |
| int nbpend; /* number of pending connections with no server assigned yet */ |
| int totpend; /* total number of pending connections on this instance (for stats) */ |
| int nbconn; /* # of active sessions */ |
| unsigned int cum_conn; /* cumulated number of processed sessions */ |
| int maxconn; /* max # of active sessions */ |
| int conn_retries; /* maximum number of connect retries */ |
| int options; /* PR_O_REDISP, PR_O_TRANSP, ... */ |
| int mode; /* mode = PR_MODE_TCP, PR_MODE_HTTP or PR_MODE_HEALTH */ |
| struct sockaddr_in source_addr; /* the address to which we want to bind for connect() */ |
| struct proxy *next; |
| struct sockaddr_in logsrv1, logsrv2; /* 2 syslog servers */ |
| signed char logfac1, logfac2; /* log facility for both servers. -1 = disabled */ |
| int loglev1, loglev2; /* log level for each server, 7 by default */ |
| int to_log; /* things to be logged (LW_*) */ |
| struct timeval stop_time; /* date to stop listening, when stopping != 0 */ |
| int nb_reqadd, nb_rspadd; |
| struct hdr_exp *req_exp; /* regular expressions for request headers */ |
| struct hdr_exp *rsp_exp; /* regular expressions for response headers */ |
| int nb_req_cap, nb_rsp_cap; /* # of headers to be captured */ |
| struct cap_hdr *req_cap; /* chained list of request headers to be captured */ |
| struct cap_hdr *rsp_cap; /* chained list of response headers to be captured */ |
| void *req_cap_pool, *rsp_cap_pool; /* pools of pre-allocated char ** used to build the sessions */ |
| char *req_add[MAX_NEWHDR], *rsp_add[MAX_NEWHDR]; /* headers to be added */ |
| int grace; /* grace time after stop request */ |
| char *check_req; /* HTTP request to use if PR_O_HTTP_CHK is set, else NULL */ |
| int check_len; /* Length of the HTTP request */ |
| struct { |
| char *msg400; /* message for error 400 */ |
| int len400; /* message length for error 400 */ |
| char *msg403; /* message for error 403 */ |
| int len403; /* message length for error 403 */ |
| char *msg408; /* message for error 408 */ |
| int len408; /* message length for error 408 */ |
| char *msg500; /* message for error 500 */ |
| int len500; /* message length for error 500 */ |
| char *msg502; /* message for error 502 */ |
| int len502; /* message length for error 502 */ |
| char *msg503; /* message for error 503 */ |
| int len503; /* message length for error 503 */ |
| char *msg504; /* message for error 504 */ |
| int len504; /* message length for error 504 */ |
| } errmsg; |
| }; |
| |
| /* info about one given fd */ |
| struct fdtab { |
| int (*read)(int fd); /* read function */ |
| int (*write)(int fd); /* write function */ |
| struct task *owner; /* the session (or proxy) associated with this fd */ |
| int state; /* the state of this fd */ |
| }; |
| |
| /*********************************************************************/ |
| |
| int cfg_maxpconn = DEFAULT_MAXCONN; /* # of simultaneous connections per proxy (-N) */ |
| int cfg_maxconn = 0; /* # of simultaneous connections, (-n) */ |
| char *cfg_cfgfile = NULL; /* configuration file */ |
| char *progname = NULL; /* program name */ |
| int pid; /* current process id */ |
| |
| /* global options */ |
| static struct { |
| int uid; |
| int gid; |
| int nbproc; |
| int maxconn; |
| int maxsock; /* max # of sockets */ |
| int rlimit_nofile; /* default ulimit-n value : 0=unset */ |
| int rlimit_memmax; /* default ulimit-d in megs value : 0=unset */ |
| int mode; |
| char *chroot; |
| char *pidfile; |
| int logfac1, logfac2; |
| int loglev1, loglev2; |
| struct sockaddr_in logsrv1, logsrv2; |
| } global = { |
| logfac1 : -1, |
| logfac2 : -1, |
| loglev1 : 7, /* max syslog level : debug */ |
| loglev2 : 7, |
| /* others NULL OK */ |
| }; |
| |
| /*********************************************************************/ |
| |
| fd_set *StaticReadEvent, |
| *StaticWriteEvent; |
| |
| int cfg_polling_mechanism = 0; /* POLL_USE_{SELECT|POLL|EPOLL} */ |
| |
| void **pool_session = NULL, |
| **pool_pendconn = NULL, |
| **pool_buffer = NULL, |
| **pool_fdtab = NULL, |
| **pool_requri = NULL, |
| **pool_task = NULL, |
| **pool_capture = NULL, |
| **pool_appsess = NULL; |
| |
| struct proxy *proxy = NULL; /* list of all existing proxies */ |
| struct fdtab *fdtab = NULL; /* array of all the file descriptors */ |
| struct task *rq = NULL; /* global run queue */ |
| struct task wait_queue[2] = { /* global wait queue */ |
| { |
| prev:LIST_HEAD(wait_queue[0]), /* expirable tasks */ |
| next:LIST_HEAD(wait_queue[0]), |
| }, |
| { |
| prev:LIST_HEAD(wait_queue[1]), /* non-expirable tasks */ |
| next:LIST_HEAD(wait_queue[1]), |
| }, |
| }; |
| |
| static int totalconn = 0; /* total # of terminated sessions */ |
| static int actconn = 0; /* # of active sessions */ |
| static int maxfd = 0; /* # of the highest fd + 1 */ |
| static int listeners = 0; /* # of listeners */ |
| static int stopping = 0; /* non zero means stopping in progress */ |
| static struct timeval now = {0,0}; /* the current date at any moment */ |
| static struct proxy defproxy; /* fake proxy used to assign default values on all instances */ |
| |
| /* Here we store informations about the pids of the processes we may pause |
| * or kill. We will send them a signal every 10 ms until we can bind to all |
| * our ports. With 200 retries, that's about 2 seconds. |
| */ |
| #define MAX_START_RETRIES 200 |
| static int nb_oldpids = 0; |
| static int *oldpids = NULL; |
| static int oldpids_sig; /* use USR1 or TERM */ |
| |
| #if defined(ENABLE_EPOLL) |
| /* FIXME: this is dirty, but at the moment, there's no other solution to remove |
| * the old FDs from outside the loop. Perhaps we should export a global 'poll' |
| * structure with pointers to functions such as init_fd() and close_fd(), plus |
| * a private structure with several pointers to places such as below. |
| */ |
| |
| static fd_set *PrevReadEvent = NULL, *PrevWriteEvent = NULL; |
| #endif |
| |
| static regmatch_t pmatch[MAX_MATCH]; /* rm_so, rm_eo for regular expressions */ |
| /* this is used to drain data, and as a temporary buffer for sprintf()... */ |
| static char trash[BUFSIZE]; |
| |
| const int zero = 0; |
| const int one = 1; |
| |
| /* |
| * Syslog facilities and levels. Conforming to RFC3164. |
| */ |
| |
| #define MAX_SYSLOG_LEN 1024 |
| #define NB_LOG_FACILITIES 24 |
| const char *log_facilities[NB_LOG_FACILITIES] = { |
| "kern", "user", "mail", "daemon", |
| "auth", "syslog", "lpr", "news", |
| "uucp", "cron", "auth2", "ftp", |
| "ntp", "audit", "alert", "cron2", |
| "local0", "local1", "local2", "local3", |
| "local4", "local5", "local6", "local7" |
| }; |
| |
| |
| #define NB_LOG_LEVELS 8 |
| const char *log_levels[NB_LOG_LEVELS] = { |
| "emerg", "alert", "crit", "err", |
| "warning", "notice", "info", "debug" |
| }; |
| |
| #define SYSLOG_PORT 514 |
| |
| const char *monthname[12] = {"Jan", "Feb", "Mar", "Apr", "May", "Jun", |
| "Jul", "Aug", "Sep", "Oct", "Nov", "Dec" }; |
| |
| const char sess_term_cond[8] = "-cCsSPRI"; /* normal, CliTo, CliErr, SrvTo, SrvErr, PxErr, Resource, Internal */ |
| const char sess_fin_state[8] = "-RCHDL67"; /* cliRequest, srvConnect, srvHeader, Data, Last, unknown */ |
| const char sess_cookie[4] = "NIDV"; /* No cookie, Invalid cookie, cookie for a Down server, Valid cookie */ |
| const char sess_set_cookie[8] = "N1I3PD5R"; /* No set-cookie, unknown, Set-Cookie Inserted, unknown, |
| Set-cookie seen and left unchanged (passive), Set-cookie Deleted, |
| unknown, Set-cookie Rewritten */ |
| |
| #define MAX_HOSTNAME_LEN 32 |
| static char hostname[MAX_HOSTNAME_LEN] = ""; |
| |
| const char *HTTP_302 = |
| "HTTP/1.0 302 Found\r\n" |
| "Cache-Control: no-cache\r\n" |
| "Connection: close\r\n" |
| "Location: "; /* not terminated since it will be concatenated with the URL */ |
| |
| /* same as 302 except that the browser MUST retry with the GET method */ |
| const char *HTTP_303 = |
| "HTTP/1.0 303 See Other\r\n" |
| "Cache-Control: no-cache\r\n" |
| "Connection: close\r\n" |
| "Location: "; /* not terminated since it will be concatenated with the URL */ |
| |
| const char *HTTP_400 = |
| "HTTP/1.0 400 Bad request\r\n" |
| "Cache-Control: no-cache\r\n" |
| "Connection: close\r\n" |
| "\r\n" |
| "<html><body><h1>400 Bad request</h1>\nYour browser sent an invalid request.\n</body></html>\n"; |
| |
| const char *HTTP_403 = |
| "HTTP/1.0 403 Forbidden\r\n" |
| "Cache-Control: no-cache\r\n" |
| "Connection: close\r\n" |
| "\r\n" |
| "<html><body><h1>403 Forbidden</h1>\nRequest forbidden by administrative rules.\n</body></html>\n"; |
| |
| const char *HTTP_408 = |
| "HTTP/1.0 408 Request Time-out\r\n" |
| "Cache-Control: no-cache\r\n" |
| "Connection: close\r\n" |
| "\r\n" |
| "<html><body><h1>408 Request Time-out</h1>\nYour browser didn't send a complete request in time.\n</body></html>\n"; |
| |
| const char *HTTP_500 = |
| "HTTP/1.0 500 Server Error\r\n" |
| "Cache-Control: no-cache\r\n" |
| "Connection: close\r\n" |
| "\r\n" |
| "<html><body><h1>500 Server Error</h1>\nAn internal server error occured.\n</body></html>\n"; |
| |
| const char *HTTP_502 = |
| "HTTP/1.0 502 Bad Gateway\r\n" |
| "Cache-Control: no-cache\r\n" |
| "Connection: close\r\n" |
| "\r\n" |
| "<html><body><h1>502 Bad Gateway</h1>\nThe server returned an invalid or incomplete response.\n</body></html>\n"; |
| |
| const char *HTTP_503 = |
| "HTTP/1.0 503 Service Unavailable\r\n" |
| "Cache-Control: no-cache\r\n" |
| "Connection: close\r\n" |
| "\r\n" |
| "<html><body><h1>503 Service Unavailable</h1>\nNo server is available to handle this request.\n</body></html>\n"; |
| |
| const char *HTTP_504 = |
| "HTTP/1.0 504 Gateway Time-out\r\n" |
| "Cache-Control: no-cache\r\n" |
| "Connection: close\r\n" |
| "\r\n" |
| "<html><body><h1>504 Gateway Time-out</h1>\nThe server didn't respond in time.\n</body></html>\n"; |
| |
| /*********************************************************************/ |
| /* statistics ******************************************************/ |
| /*********************************************************************/ |
| |
| #if STATTIME > 0 |
| static int stats_tsk_lsrch, stats_tsk_rsrch, |
| stats_tsk_good, stats_tsk_right, stats_tsk_left, |
| stats_tsk_new, stats_tsk_nsrch; |
| #endif |
| |
| |
| /*********************************************************************/ |
| /* debugging *******************************************************/ |
| /*********************************************************************/ |
| #ifdef DEBUG_FULL |
| static char *cli_stnames[5] = {"HDR", "DAT", "SHR", "SHW", "CLS" }; |
| static char *srv_stnames[8] = {"IDL", "PND", "CON", "HDR", "DAT", "SHR", "SHW", "CLS" }; |
| #endif |
| |
| /*********************************************************************/ |
| /* function prototypes *********************************************/ |
| /*********************************************************************/ |
| |
| int event_accept(int fd); |
| int event_cli_read(int fd); |
| int event_cli_write(int fd); |
| int event_srv_read(int fd); |
| int event_srv_write(int fd); |
| int process_session(struct task *t); |
| |
| static int appsession_task_init(void); |
| static int appsession_init(void); |
| static int appsession_refresh(struct task *t); |
| |
| /*********************************************************************/ |
| /* general purpose functions ***************************************/ |
| /*********************************************************************/ |
| |
| void display_version() { |
| printf("HA-Proxy version " HAPROXY_VERSION " " HAPROXY_DATE"\n"); |
| printf("Copyright 2000-2006 Willy Tarreau <w@w.ods.org>\n\n"); |
| } |
| |
| /* |
| * This function prints the command line usage and exits |
| */ |
| void usage(char *name) { |
| display_version(); |
| fprintf(stderr, |
| "Usage : %s -f <cfgfile> [ -vdV" |
| #if STATTIME > 0 |
| "sl" |
| #endif |
| "D ] [ -n <maxconn> ] [ -N <maxpconn> ]\n" |
| " [ -p <pidfile> ] [ -m <max megs> ]\n" |
| " -v displays version\n" |
| " -d enters debug mode ; -db only disables background mode.\n" |
| " -V enters verbose mode (disables quiet mode)\n" |
| #if STATTIME > 0 |
| " -s enables statistics output\n" |
| " -l enables long statistics format\n" |
| #endif |
| " -D goes daemon ; implies -q\n" |
| " -q quiet mode : don't display messages\n" |
| " -c check mode : only check config file and exit\n" |
| " -n sets the maximum total # of connections (%d)\n" |
| " -m limits the usable amount of memory (in MB)\n" |
| " -N sets the default, per-proxy maximum # of connections (%d)\n" |
| " -p writes pids of all children to this file\n" |
| #if defined(ENABLE_EPOLL) |
| " -de disables epoll() usage even when available\n" |
| #endif |
| #if defined(ENABLE_POLL) |
| " -dp disables poll() usage even when available\n" |
| #endif |
| " -sf/-st [pid ]* finishes/terminates old pids. Must be last arguments.\n" |
| "\n", |
| name, DEFAULT_MAXCONN, cfg_maxpconn); |
| exit(1); |
| } |
| |
| |
| /* |
| * Displays the message on stderr with the date and pid. Overrides the quiet |
| * mode during startup. |
| */ |
| void Alert(char *fmt, ...) { |
| va_list argp; |
| struct timeval tv; |
| struct tm *tm; |
| |
| if (!(global.mode & MODE_QUIET) || (global.mode & (MODE_VERBOSE | MODE_STARTING))) { |
| va_start(argp, fmt); |
| |
| gettimeofday(&tv, NULL); |
| tm=localtime(&tv.tv_sec); |
| fprintf(stderr, "[ALERT] %03d/%02d%02d%02d (%d) : ", |
| tm->tm_yday, tm->tm_hour, tm->tm_min, tm->tm_sec, (int)getpid()); |
| vfprintf(stderr, fmt, argp); |
| fflush(stderr); |
| va_end(argp); |
| } |
| } |
| |
| |
| /* |
| * Displays the message on stderr with the date and pid. |
| */ |
| void Warning(char *fmt, ...) { |
| va_list argp; |
| struct timeval tv; |
| struct tm *tm; |
| |
| if (!(global.mode & MODE_QUIET) || (global.mode & MODE_VERBOSE)) { |
| va_start(argp, fmt); |
| |
| gettimeofday(&tv, NULL); |
| tm=localtime(&tv.tv_sec); |
| fprintf(stderr, "[WARNING] %03d/%02d%02d%02d (%d) : ", |
| tm->tm_yday, tm->tm_hour, tm->tm_min, tm->tm_sec, (int)getpid()); |
| vfprintf(stderr, fmt, argp); |
| fflush(stderr); |
| va_end(argp); |
| } |
| } |
| |
| /* |
| * Displays the message on <out> only if quiet mode is not set. |
| */ |
| void qfprintf(FILE *out, char *fmt, ...) { |
| va_list argp; |
| |
| if (!(global.mode & MODE_QUIET) || (global.mode & MODE_VERBOSE)) { |
| va_start(argp, fmt); |
| vfprintf(out, fmt, argp); |
| fflush(out); |
| va_end(argp); |
| } |
| } |
| |
| |
| /* |
| * converts <str> to a struct sockaddr_in* which is locally allocated. |
| * The format is "addr:port", where "addr" can be empty or "*" to indicate |
| * INADDR_ANY. |
| */ |
| struct sockaddr_in *str2sa(char *str) { |
| static struct sockaddr_in sa; |
| char *c; |
| int port; |
| |
| memset(&sa, 0, sizeof(sa)); |
| str=strdup(str); |
| |
| if ((c=strrchr(str,':')) != NULL) { |
| *c++=0; |
| port=atol(c); |
| } |
| else |
| port=0; |
| |
| if (*str == '*' || *str == '\0') { /* INADDR_ANY */ |
| sa.sin_addr.s_addr = INADDR_ANY; |
| } |
| else if (!inet_pton(AF_INET, str, &sa.sin_addr)) { |
| struct hostent *he; |
| |
| if ((he = gethostbyname(str)) == NULL) { |
| Alert("Invalid server name: '%s'\n", str); |
| } |
| else |
| sa.sin_addr = *(struct in_addr *) *(he->h_addr_list); |
| } |
| sa.sin_port=htons(port); |
| sa.sin_family=AF_INET; |
| |
| free(str); |
| return &sa; |
| } |
| |
| /* |
| * converts <str> to a two struct in_addr* which are locally allocated. |
| * The format is "addr[/mask]", where "addr" cannot be empty, and mask |
| * is optionnal and either in the dotted or CIDR notation. |
| * Note: "addr" can also be a hostname. Returns 1 if OK, 0 if error. |
| */ |
| int str2net(char *str, struct in_addr *addr, struct in_addr *mask) { |
| char *c; |
| unsigned long len; |
| |
| memset(mask, 0, sizeof(*mask)); |
| memset(addr, 0, sizeof(*addr)); |
| str=strdup(str); |
| |
| if ((c = strrchr(str, '/')) != NULL) { |
| *c++ = 0; |
| /* c points to the mask */ |
| if (strchr(c, '.') != NULL) { /* dotted notation */ |
| if (!inet_pton(AF_INET, c, mask)) |
| return 0; |
| } |
| else { /* mask length */ |
| char *err; |
| len = strtol(c, &err, 10); |
| if (!*c || (err && *err) || (unsigned)len > 32) |
| return 0; |
| if (len) |
| mask->s_addr = htonl(0xFFFFFFFFUL << (32 - len)); |
| else |
| mask->s_addr = 0; |
| } |
| } |
| else { |
| mask->s_addr = 0xFFFFFFFF; |
| } |
| if (!inet_pton(AF_INET, str, addr)) { |
| struct hostent *he; |
| |
| if ((he = gethostbyname(str)) == NULL) { |
| return 0; |
| } |
| else |
| *addr = *(struct in_addr *) *(he->h_addr_list); |
| } |
| free(str); |
| return 1; |
| } |
| |
| |
| /* |
| * converts <str> to a list of listeners which are dynamically allocated. |
| * The format is "{addr|'*'}:port[-end][,{addr|'*'}:port[-end]]*", where : |
| * - <addr> can be empty or "*" to indicate INADDR_ANY ; |
| * - <port> is a numerical port from 1 to 65535 ; |
| * - <end> indicates to use the range from <port> to <end> instead (inclusive). |
| * This can be repeated as many times as necessary, separated by a coma. |
| * The <tail> argument is a pointer to a current list which should be appended |
| * to the tail of the new list. The pointer to the new list is returned. |
| */ |
| struct listener *str2listener(char *str, struct listener *tail) { |
| struct listener *l; |
| char *c, *next, *range, *dupstr; |
| int port, end; |
| |
| next = dupstr = strdup(str); |
| |
| while (next && *next) { |
| struct sockaddr_storage ss; |
| |
| str = next; |
| /* 1) look for the end of the first address */ |
| if ((next = strrchr(str, ',')) != NULL) { |
| *next++ = 0; |
| } |
| |
| /* 2) look for the addr/port delimiter, it's the last colon. */ |
| if ((range = strrchr(str, ':')) == NULL) { |
| Alert("Missing port number: '%s'\n", str); |
| goto fail; |
| } |
| |
| *range++ = 0; |
| |
| if (strrchr(str, ':') != NULL) { |
| /* IPv6 address contains ':' */ |
| memset(&ss, 0, sizeof(ss)); |
| ss.ss_family = AF_INET6; |
| |
| if (!inet_pton(ss.ss_family, str, &((struct sockaddr_in6 *)&ss)->sin6_addr)) { |
| Alert("Invalid server address: '%s'\n", str); |
| goto fail; |
| } |
| } |
| else { |
| memset(&ss, 0, sizeof(ss)); |
| ss.ss_family = AF_INET; |
| |
| if (*str == '*' || *str == '\0') { /* INADDR_ANY */ |
| ((struct sockaddr_in *)&ss)->sin_addr.s_addr = INADDR_ANY; |
| } |
| else if (!inet_pton(ss.ss_family, str, &((struct sockaddr_in *)&ss)->sin_addr)) { |
| struct hostent *he; |
| |
| if ((he = gethostbyname(str)) == NULL) { |
| Alert("Invalid server name: '%s'\n", str); |
| goto fail; |
| } |
| else |
| ((struct sockaddr_in *)&ss)->sin_addr = |
| *(struct in_addr *) *(he->h_addr_list); |
| } |
| } |
| |
| /* 3) look for the port-end delimiter */ |
| if ((c = strchr(range, '-')) != NULL) { |
| *c++ = 0; |
| end = atol(c); |
| } |
| else { |
| end = atol(range); |
| } |
| |
| port = atol(range); |
| |
| if (port < 1 || port > 65535) { |
| Alert("Invalid port '%d' specified for address '%s'.\n", port, str); |
| goto fail; |
| } |
| |
| if (end < 1 || end > 65535) { |
| Alert("Invalid port '%d' specified for address '%s'.\n", end, str); |
| goto fail; |
| } |
| |
| for (; port <= end; port++) { |
| l = (struct listener *)calloc(1, sizeof(struct listener)); |
| l->next = tail; |
| tail = l; |
| |
| l->fd = -1; |
| l->addr = ss; |
| if (ss.ss_family == AF_INET6) |
| ((struct sockaddr_in6 *)(&l->addr))->sin6_port = htons(port); |
| else |
| ((struct sockaddr_in *)(&l->addr))->sin_port = htons(port); |
| |
| } /* end for(port) */ |
| } /* end while(next) */ |
| free(dupstr); |
| return tail; |
| fail: |
| free(dupstr); |
| return NULL; |
| } |
| |
| |
| #define FD_SETS_ARE_BITFIELDS |
| #ifdef FD_SETS_ARE_BITFIELDS |
| /* |
| * This map is used with all the FD_* macros to check whether a particular bit |
| * is set or not. Each bit represents an ACSII code. FD_SET() sets those bytes |
| * which should be encoded. When FD_ISSET() returns non-zero, it means that the |
| * byte should be encoded. Be careful to always pass bytes from 0 to 255 |
| * exclusively to the macros. |
| */ |
| fd_set hdr_encode_map[(sizeof(fd_set) > (256/8)) ? 1 : ((256/8) / sizeof(fd_set))]; |
| fd_set url_encode_map[(sizeof(fd_set) > (256/8)) ? 1 : ((256/8) / sizeof(fd_set))]; |
| |
| #else |
| #error "Check if your OS uses bitfields for fd_sets" |
| #endif |
| |
| /* will try to encode the string <string> replacing all characters tagged in |
| * <map> with the hexadecimal representation of their ASCII-code (2 digits) |
| * prefixed by <escape>, and will store the result between <start> (included |
| *) and <stop> (excluded), and will always terminate the string with a '\0' |
| * before <stop>. The position of the '\0' is returned if the conversion |
| * completes. If bytes are missing between <start> and <stop>, then the |
| * conversion will be incomplete and truncated. If <stop> <= <start>, the '\0' |
| * cannot even be stored so we return <start> without writing the 0. |
| * The input string must also be zero-terminated. |
| */ |
| char hextab[16] = "0123456789ABCDEF"; |
| char *encode_string(char *start, char *stop, |
| const char escape, const fd_set *map, |
| const char *string) |
| { |
| if (start < stop) { |
| stop--; /* reserve one byte for the final '\0' */ |
| while (start < stop && *string != 0) { |
| if (!FD_ISSET((unsigned char)(*string), map)) |
| *start++ = *string; |
| else { |
| if (start + 3 >= stop) |
| break; |
| *start++ = escape; |
| *start++ = hextab[(*string >> 4) & 15]; |
| *start++ = hextab[*string & 15]; |
| } |
| string++; |
| } |
| *start = '\0'; |
| } |
| return start; |
| } |
| |
| /* |
| * This function sends a syslog message to both log servers of a proxy, |
| * or to global log servers if the proxy is NULL. |
| * It also tries not to waste too much time computing the message header. |
| * It doesn't care about errors nor does it report them. |
| */ |
| void send_log(struct proxy *p, int level, char *message, ...) { |
| static int logfd = -1; /* syslog UDP socket */ |
| static long tvsec = -1; /* to force the string to be initialized */ |
| struct timeval tv; |
| va_list argp; |
| static char logmsg[MAX_SYSLOG_LEN]; |
| static char *dataptr = NULL; |
| int fac_level; |
| int hdr_len, data_len; |
| struct sockaddr_in *sa[2]; |
| int facilities[2], loglevel[2]; |
| int nbloggers = 0; |
| char *log_ptr; |
| |
| if (logfd < 0) { |
| if ((logfd = socket(AF_INET, SOCK_DGRAM, IPPROTO_UDP)) < 0) |
| return; |
| } |
| |
| if (level < 0 || progname == NULL || message == NULL) |
| return; |
| |
| gettimeofday(&tv, NULL); |
| if (tv.tv_sec != tvsec || dataptr == NULL) { |
| /* this string is rebuild only once a second */ |
| struct tm *tm = localtime(&tv.tv_sec); |
| tvsec = tv.tv_sec; |
| |
| hdr_len = snprintf(logmsg, sizeof(logmsg), |
| "<<<<>%s %2d %02d:%02d:%02d %s[%d]: ", |
| monthname[tm->tm_mon], |
| tm->tm_mday, tm->tm_hour, tm->tm_min, tm->tm_sec, |
| progname, pid); |
| /* WARNING: depending upon implementations, snprintf may return |
| * either -1 or the number of bytes that would be needed to store |
| * the total message. In both cases, we must adjust it. |
| */ |
| if (hdr_len < 0 || hdr_len > sizeof(logmsg)) |
| hdr_len = sizeof(logmsg); |
| |
| dataptr = logmsg + hdr_len; |
| } |
| |
| va_start(argp, message); |
| data_len = vsnprintf(dataptr, logmsg + sizeof(logmsg) - dataptr, message, argp); |
| if (data_len < 0 || data_len > (logmsg + sizeof(logmsg) - dataptr)) |
| data_len = logmsg + sizeof(logmsg) - dataptr; |
| va_end(argp); |
| dataptr[data_len - 1] = '\n'; /* force a break on ultra-long lines */ |
| |
| if (p == NULL) { |
| if (global.logfac1 >= 0) { |
| sa[nbloggers] = &global.logsrv1; |
| facilities[nbloggers] = global.logfac1; |
| loglevel[nbloggers] = global.loglev1; |
| nbloggers++; |
| } |
| if (global.logfac2 >= 0) { |
| sa[nbloggers] = &global.logsrv2; |
| facilities[nbloggers] = global.logfac2; |
| loglevel[nbloggers] = global.loglev2; |
| nbloggers++; |
| } |
| } else { |
| if (p->logfac1 >= 0) { |
| sa[nbloggers] = &p->logsrv1; |
| facilities[nbloggers] = p->logfac1; |
| loglevel[nbloggers] = p->loglev1; |
| nbloggers++; |
| } |
| if (p->logfac2 >= 0) { |
| sa[nbloggers] = &p->logsrv2; |
| facilities[nbloggers] = p->logfac2; |
| loglevel[nbloggers] = p->loglev2; |
| nbloggers++; |
| } |
| } |
| |
| while (nbloggers-- > 0) { |
| /* we can filter the level of the messages that are sent to each logger */ |
| if (level > loglevel[nbloggers]) |
| continue; |
| |
| /* For each target, we may have a different facility. |
| * We can also have a different log level for each message. |
| * This induces variations in the message header length. |
| * Since we don't want to recompute it each time, nor copy it every |
| * time, we only change the facility in the pre-computed header, |
| * and we change the pointer to the header accordingly. |
| */ |
| fac_level = (facilities[nbloggers] << 3) + level; |
| log_ptr = logmsg + 3; /* last digit of the log level */ |
| do { |
| *log_ptr = '0' + fac_level % 10; |
| fac_level /= 10; |
| log_ptr--; |
| } while (fac_level && log_ptr > logmsg); |
| *log_ptr = '<'; |
| |
| /* the total syslog message now starts at logptr, for dataptr+data_len-logptr */ |
| |
| #ifndef MSG_NOSIGNAL |
| sendto(logfd, log_ptr, dataptr + data_len - log_ptr, MSG_DONTWAIT, |
| (struct sockaddr *)sa[nbloggers], sizeof(**sa)); |
| #else |
| sendto(logfd, log_ptr, dataptr + data_len - log_ptr, MSG_DONTWAIT | MSG_NOSIGNAL, |
| (struct sockaddr *)sa[nbloggers], sizeof(**sa)); |
| #endif |
| } |
| } |
| |
| |
| /* sets <tv> to the current time */ |
| static inline struct timeval *tv_now(struct timeval *tv) { |
| if (tv) |
| gettimeofday(tv, NULL); |
| return tv; |
| } |
| |
| /* |
| * adds <ms> ms to <from>, set the result to <tv> and returns a pointer <tv> |
| */ |
| static inline struct timeval *tv_delayfrom(struct timeval *tv, struct timeval *from, int ms) { |
| if (!tv || !from) |
| return NULL; |
| tv->tv_usec = from->tv_usec + (ms%1000)*1000; |
| tv->tv_sec = from->tv_sec + (ms/1000); |
| while (tv->tv_usec >= 1000000) { |
| tv->tv_usec -= 1000000; |
| tv->tv_sec++; |
| } |
| return tv; |
| } |
| |
| /* |
| * compares <tv1> and <tv2> : returns 0 if equal, -1 if tv1 < tv2, 1 if tv1 > tv2 |
| * Must not be used when either argument is eternity. Use tv_cmp2() for that. |
| */ |
| static inline int tv_cmp(struct timeval *tv1, struct timeval *tv2) { |
| if (tv1->tv_sec < tv2->tv_sec) |
| return -1; |
| else if (tv1->tv_sec > tv2->tv_sec) |
| return 1; |
| else if (tv1->tv_usec < tv2->tv_usec) |
| return -1; |
| else if (tv1->tv_usec > tv2->tv_usec) |
| return 1; |
| else |
| return 0; |
| } |
| |
| /* |
| * returns the absolute difference, in ms, between tv1 and tv2 |
| * Must not be used when either argument is eternity. |
| */ |
| unsigned long tv_delta(struct timeval *tv1, struct timeval *tv2) { |
| int cmp; |
| unsigned long ret; |
| |
| |
| cmp = tv_cmp(tv1, tv2); |
| if (!cmp) |
| return 0; /* same dates, null diff */ |
| else if (cmp < 0) { |
| struct timeval *tmp = tv1; |
| tv1 = tv2; |
| tv2 = tmp; |
| } |
| ret = (tv1->tv_sec - tv2->tv_sec) * 1000; |
| if (tv1->tv_usec > tv2->tv_usec) |
| ret += (tv1->tv_usec - tv2->tv_usec) / 1000; |
| else |
| ret -= (tv2->tv_usec - tv1->tv_usec) / 1000; |
| return (unsigned long) ret; |
| } |
| |
| /* |
| * returns the difference, in ms, between tv1 and tv2 |
| * Must not be used when either argument is eternity. |
| */ |
| static inline unsigned long tv_diff(struct timeval *tv1, struct timeval *tv2) { |
| unsigned long ret; |
| |
| ret = (tv2->tv_sec - tv1->tv_sec) * 1000; |
| if (tv2->tv_usec > tv1->tv_usec) |
| ret += (tv2->tv_usec - tv1->tv_usec) / 1000; |
| else |
| ret -= (tv1->tv_usec - tv2->tv_usec) / 1000; |
| return (unsigned long) ret; |
| } |
| |
| /* |
| * compares <tv1> and <tv2> modulo 1ms: returns 0 if equal, -1 if tv1 < tv2, 1 if tv1 > tv2 |
| * Must not be used when either argument is eternity. Use tv_cmp2_ms() for that. |
| */ |
| static inline int tv_cmp_ms(struct timeval *tv1, struct timeval *tv2) { |
| if (tv1->tv_sec == tv2->tv_sec) { |
| if (tv2->tv_usec >= tv1->tv_usec + 1000) |
| return -1; |
| else if (tv1->tv_usec >= tv2->tv_usec + 1000) |
| return 1; |
| else |
| return 0; |
| } |
| else if ((tv2->tv_sec > tv1->tv_sec + 1) || |
| ((tv2->tv_sec == tv1->tv_sec + 1) && (tv2->tv_usec + 1000000 >= tv1->tv_usec + 1000))) |
| return -1; |
| else if ((tv1->tv_sec > tv2->tv_sec + 1) || |
| ((tv1->tv_sec == tv2->tv_sec + 1) && (tv1->tv_usec + 1000000 >= tv2->tv_usec + 1000))) |
| return 1; |
| else |
| return 0; |
| } |
| |
| /* |
| * returns the remaining time between tv1=now and event=tv2 |
| * if tv2 is passed, 0 is returned. |
| * Must not be used when either argument is eternity. |
| */ |
| static inline unsigned long tv_remain(struct timeval *tv1, struct timeval *tv2) { |
| unsigned long ret; |
| |
| if (tv_cmp_ms(tv1, tv2) >= 0) |
| return 0; /* event elapsed */ |
| |
| ret = (tv2->tv_sec - tv1->tv_sec) * 1000; |
| if (tv2->tv_usec > tv1->tv_usec) |
| ret += (tv2->tv_usec - tv1->tv_usec) / 1000; |
| else |
| ret -= (tv1->tv_usec - tv2->tv_usec) / 1000; |
| return (unsigned long) ret; |
| } |
| |
| |
| /* |
| * zeroes a struct timeval |
| */ |
| |
| static inline struct timeval *tv_eternity(struct timeval *tv) { |
| tv->tv_sec = tv->tv_usec = 0; |
| return tv; |
| } |
| |
| /* |
| * returns 1 if tv is null, else 0 |
| */ |
| static inline int tv_iseternity(struct timeval *tv) { |
| if (tv->tv_sec == 0 && tv->tv_usec == 0) |
| return 1; |
| else |
| return 0; |
| } |
| |
| /* |
| * compares <tv1> and <tv2> : returns 0 if equal, -1 if tv1 < tv2, 1 if tv1 > tv2, |
| * considering that 0 is the eternity. |
| */ |
| static inline int tv_cmp2(struct timeval *tv1, struct timeval *tv2) { |
| if (tv_iseternity(tv1)) |
| if (tv_iseternity(tv2)) |
| return 0; /* same */ |
| else |
| return 1; /* tv1 later than tv2 */ |
| else if (tv_iseternity(tv2)) |
| return -1; /* tv2 later than tv1 */ |
| |
| if (tv1->tv_sec > tv2->tv_sec) |
| return 1; |
| else if (tv1->tv_sec < tv2->tv_sec) |
| return -1; |
| else if (tv1->tv_usec > tv2->tv_usec) |
| return 1; |
| else if (tv1->tv_usec < tv2->tv_usec) |
| return -1; |
| else |
| return 0; |
| } |
| |
| /* |
| * compares <tv1> and <tv2> modulo 1 ms: returns 0 if equal, -1 if tv1 < tv2, 1 if tv1 > tv2, |
| * considering that 0 is the eternity. |
| */ |
| static inline int tv_cmp2_ms(struct timeval *tv1, struct timeval *tv2) { |
| if (tv_iseternity(tv1)) |
| if (tv_iseternity(tv2)) |
| return 0; /* same */ |
| else |
| return 1; /* tv1 later than tv2 */ |
| else if (tv_iseternity(tv2)) |
| return -1; /* tv2 later than tv1 */ |
| |
| if (tv1->tv_sec == tv2->tv_sec) { |
| if (tv1->tv_usec >= tv2->tv_usec + 1000) |
| return 1; |
| else if (tv2->tv_usec >= tv1->tv_usec + 1000) |
| return -1; |
| else |
| return 0; |
| } |
| else if ((tv1->tv_sec > tv2->tv_sec + 1) || |
| ((tv1->tv_sec == tv2->tv_sec + 1) && (tv1->tv_usec + 1000000 >= tv2->tv_usec + 1000))) |
| return 1; |
| else if ((tv2->tv_sec > tv1->tv_sec + 1) || |
| ((tv2->tv_sec == tv1->tv_sec + 1) && (tv2->tv_usec + 1000000 >= tv1->tv_usec + 1000))) |
| return -1; |
| else |
| return 0; |
| } |
| |
| /* |
| * returns the remaining time between tv1=now and event=tv2 |
| * if tv2 is passed, 0 is returned. |
| * Returns TIME_ETERNITY if tv2 is eternity. |
| */ |
| static inline unsigned long tv_remain2(struct timeval *tv1, struct timeval *tv2) { |
| unsigned long ret; |
| |
| if (tv_iseternity(tv2)) |
| return TIME_ETERNITY; |
| |
| if (tv_cmp_ms(tv1, tv2) >= 0) |
| return 0; /* event elapsed */ |
| |
| ret = (tv2->tv_sec - tv1->tv_sec) * 1000; |
| if (tv2->tv_usec > tv1->tv_usec) |
| ret += (tv2->tv_usec - tv1->tv_usec) / 1000; |
| else |
| ret -= (tv1->tv_usec - tv2->tv_usec) / 1000; |
| return (unsigned long) ret; |
| } |
| |
| /* |
| * returns the first event between tv1 and tv2 into tvmin. |
| * a zero tv is ignored. tvmin is returned. |
| */ |
| static inline struct timeval *tv_min(struct timeval *tvmin, |
| struct timeval *tv1, struct timeval *tv2) { |
| |
| if (tv_cmp2(tv1, tv2) <= 0) |
| *tvmin = *tv1; |
| else |
| *tvmin = *tv2; |
| |
| return tvmin; |
| } |
| |
| |
| |
| /***********************************************************/ |
| /* fd management ***************************************/ |
| /***********************************************************/ |
| |
| |
| |
| /* Deletes an FD from the fdsets, and recomputes the maxfd limit. |
| * The file descriptor is also closed. |
| */ |
| static inline void fd_delete(int fd) { |
| FD_CLR(fd, StaticReadEvent); |
| FD_CLR(fd, StaticWriteEvent); |
| #if defined(ENABLE_EPOLL) |
| if (PrevReadEvent) { |
| FD_CLR(fd, PrevReadEvent); |
| FD_CLR(fd, PrevWriteEvent); |
| } |
| #endif |
| |
| close(fd); |
| fdtab[fd].state = FD_STCLOSE; |
| |
| while ((maxfd-1 >= 0) && (fdtab[maxfd-1].state == FD_STCLOSE)) |
| maxfd--; |
| } |
| |
| /* recomputes the maxfd limit from the fd */ |
| static inline void fd_insert(int fd) { |
| if (fd+1 > maxfd) |
| maxfd = fd+1; |
| } |
| |
| /*************************************************************/ |
| /* task management ***************************************/ |
| /*************************************************************/ |
| |
| /* puts the task <t> in run queue <q>, and returns <t> */ |
| static inline struct task *task_wakeup(struct task **q, struct task *t) { |
| if (t->state == TASK_RUNNING) |
| return t; |
| else { |
| t->rqnext = *q; |
| t->state = TASK_RUNNING; |
| return *q = t; |
| } |
| } |
| |
| /* removes the task <t> from the queue <q> |
| * <s> MUST be <q>'s first task. |
| * set the run queue to point to the next one, and return it |
| */ |
| static inline struct task *task_sleep(struct task **q, struct task *t) { |
| if (t->state == TASK_RUNNING) { |
| *q = t->rqnext; |
| t->state = TASK_IDLE; /* tell that s has left the run queue */ |
| } |
| return *q; /* return next running task */ |
| } |
| |
| /* |
| * removes the task <t> from its wait queue. It must have already been removed |
| * from the run queue. A pointer to the task itself is returned. |
| */ |
| static inline struct task *task_delete(struct task *t) { |
| t->prev->next = t->next; |
| t->next->prev = t->prev; |
| return t; |
| } |
| |
| /* |
| * frees a task. Its context must have been freed since it will be lost. |
| */ |
| static inline void task_free(struct task *t) { |
| pool_free(task, t); |
| } |
| |
| /* inserts <task> into its assigned wait queue, where it may already be. In this case, it |
| * may be only moved or left where it was, depending on its timing requirements. |
| * <task> is returned. |
| */ |
| struct task *task_queue(struct task *task) { |
| struct task *list = task->wq; |
| struct task *start_from; |
| |
| /* This is a very dirty hack to queue non-expirable tasks in another queue |
| * in order to avoid pulluting the tail of the standard queue. This will go |
| * away with the new O(log(n)) scheduler anyway. |
| */ |
| if (tv_iseternity(&task->expire)) { |
| /* if the task was queued in the standard wait queue, we must dequeue it */ |
| if (task->prev) { |
| if (task->wq == LIST_HEAD(wait_queue[1])) |
| return task; |
| else { |
| task_delete(task); |
| task->prev = NULL; |
| } |
| } |
| list = task->wq = LIST_HEAD(wait_queue[1]); |
| } else { |
| /* if the task was queued in the eternity queue, we must dequeue it */ |
| if (task->prev && (task->wq == LIST_HEAD(wait_queue[1]))) { |
| task_delete(task); |
| task->prev = NULL; |
| list = task->wq = LIST_HEAD(wait_queue[0]); |
| } |
| } |
| |
| /* next, test if the task was already in a list */ |
| if (task->prev == NULL) { |
| // start_from = list; |
| start_from = list->prev; |
| #if STATTIME > 0 |
| stats_tsk_new++; |
| #endif |
| /* insert the unlinked <task> into the list, searching back from the last entry */ |
| while (start_from != list && tv_cmp2(&task->expire, &start_from->expire) < 0) { |
| start_from = start_from->prev; |
| #if STATTIME > 0 |
| stats_tsk_nsrch++; |
| #endif |
| } |
| |
| // while (start_from->next != list && tv_cmp2(&task->expire, &start_from->next->expire) > 0) { |
| // start_from = start_from->next; |
| // stats_tsk_nsrch++; |
| // } |
| } |
| else if (task->prev == list || |
| tv_cmp2(&task->expire, &task->prev->expire) >= 0) { /* walk right */ |
| start_from = task->next; |
| if (start_from == list || tv_cmp2(&task->expire, &start_from->expire) <= 0) { |
| #if STATTIME > 0 |
| stats_tsk_good++; |
| #endif |
| return task; /* it's already in the right place */ |
| } |
| |
| #if STATTIME > 0 |
| stats_tsk_right++; |
| #endif |
| |
| /* if the task is not at the right place, there's little chance that |
| * it has only shifted a bit, and it will nearly always be queued |
| * at the end of the list because of constant timeouts |
| * (observed in real case). |
| */ |
| #ifndef WE_REALLY_THINK_THAT_THIS_TASK_MAY_HAVE_SHIFTED |
| start_from = list->prev; /* assume we'll queue to the end of the list */ |
| while (start_from != list && tv_cmp2(&task->expire, &start_from->expire) < 0) { |
| start_from = start_from->prev; |
| #if STATTIME > 0 |
| stats_tsk_lsrch++; |
| #endif |
| } |
| #else /* WE_REALLY_... */ |
| /* insert the unlinked <task> into the list, searching after position <start_from> */ |
| while (start_from->next != list && tv_cmp2(&task->expire, &start_from->next->expire) > 0) { |
| start_from = start_from->next; |
| #if STATTIME > 0 |
| stats_tsk_rsrch++; |
| #endif |
| } |
| #endif /* WE_REALLY_... */ |
| |
| /* we need to unlink it now */ |
| task_delete(task); |
| } |
| else { /* walk left. */ |
| #if STATTIME > 0 |
| stats_tsk_left++; |
| #endif |
| #ifdef LEFT_TO_TOP /* not very good */ |
| start_from = list; |
| while (start_from->next != list && tv_cmp2(&task->expire, &start_from->next->expire) > 0) { |
| start_from = start_from->next; |
| #if STATTIME > 0 |
| stats_tsk_lsrch++; |
| #endif |
| } |
| #else |
| start_from = task->prev->prev; /* valid because of the previous test above */ |
| while (start_from != list && tv_cmp2(&task->expire, &start_from->expire) < 0) { |
| start_from = start_from->prev; |
| #if STATTIME > 0 |
| stats_tsk_lsrch++; |
| #endif |
| } |
| #endif |
| /* we need to unlink it now */ |
| task_delete(task); |
| } |
| task->prev = start_from; |
| task->next = start_from->next; |
| task->next->prev = task; |
| start_from->next = task; |
| return task; |
| } |
| |
| |
| /*********************************************************************/ |
| /* pending connections queues **************************************/ |
| /*********************************************************************/ |
| |
| /* |
| * Detaches pending connection <p>, decreases the pending count, and frees |
| * the pending connection. The connection might have been queued to a specific |
| * server as well as to the proxy. The session also gets marked unqueued. |
| */ |
| static void pendconn_free(struct pendconn *p) { |
| LIST_DEL(&p->list); |
| p->sess->pend_pos = NULL; |
| if (p->srv) |
| p->srv->nbpend--; |
| else |
| p->sess->proxy->nbpend--; |
| p->sess->proxy->totpend--; |
| pool_free(pendconn, p); |
| } |
| |
| /* Returns the first pending connection for server <s>, which may be NULL if |
| * nothing is pending. |
| */ |
| static inline struct pendconn *pendconn_from_srv(struct server *s) { |
| if (!s->nbpend) |
| return NULL; |
| |
| return LIST_ELEM(s->pendconns.n, struct pendconn *, list); |
| } |
| |
| /* Returns the first pending connection for proxy <px>, which may be NULL if |
| * nothing is pending. |
| */ |
| static inline struct pendconn *pendconn_from_px(struct proxy *px) { |
| if (!px->nbpend) |
| return NULL; |
| |
| return LIST_ELEM(px->pendconns.n, struct pendconn *, list); |
| } |
| |
| /* Detaches the next pending connection for either current session's server or |
| * current session's proxy, and returns its associated session. If no pending |
| * connection is found, NULL is returned. Note that cur->srv cannot be NULL. |
| */ |
| static struct session *pendconn_get_next_sess(struct session *cur) { |
| struct pendconn *p; |
| struct session *sess; |
| |
| p = pendconn_from_srv(cur->srv); |
| if (!p) { |
| p = pendconn_from_px(cur->proxy); |
| if (!p) |
| return NULL; |
| p->sess->srv = cur->srv; |
| } |
| sess = p->sess; |
| pendconn_free(p); |
| return sess; |
| } |
| |
| /* Checks if other sessions are waiting for the same server, and wakes the |
| * first one up. Note that cur->srv cannot be NULL. |
| */ |
| void offer_connection_slot(struct session *cur) { |
| struct session *sess; |
| |
| sess = pendconn_get_next_sess(cur); |
| if (sess == NULL) |
| return; |
| task_wakeup(&rq, sess->task); |
| } |
| |
| /* Adds the session <sess> to the pending connection list of server <sess>->srv |
| * or to the one of <sess>->proxy if srv is NULL. All counters and back pointers |
| * are updated accordingly. Returns NULL if no memory is available, otherwise the |
| * pendconn itself. |
| */ |
| static struct pendconn *pendconn_add(struct session *sess) { |
| struct pendconn *p; |
| |
| p = pool_alloc(pendconn); |
| if (!p) |
| return NULL; |
| |
| sess->pend_pos = p; |
| p->sess = sess; |
| p->srv = sess->srv; |
| if (sess->srv) { |
| LIST_ADDQ(&sess->srv->pendconns, &p->list); |
| sess->srv->nbpend++; |
| } else { |
| LIST_ADDQ(&sess->proxy->pendconns, &p->list); |
| sess->proxy->nbpend++; |
| } |
| sess->proxy->totpend++; |
| return p; |
| } |
| |
| /*********************************************************************/ |
| /* more specific functions ***************************************/ |
| /*********************************************************************/ |
| |
| /* some prototypes */ |
| static int maintain_proxies(void); |
| |
| /* This either returns the sockname or the original destination address. Code |
| * inspired from Patrick Schaaf's example of nf_getsockname() implementation. |
| */ |
| static int get_original_dst(int fd, struct sockaddr_in *sa, socklen_t *salen) { |
| #if defined(TPROXY) && defined(SO_ORIGINAL_DST) |
| return getsockopt(fd, SOL_IP, SO_ORIGINAL_DST, (void *)sa, salen); |
| #else |
| #if defined(TPROXY) && defined(USE_GETSOCKNAME) |
| return getsockname(fd, (struct sockaddr *)sa, salen); |
| #else |
| return -1; |
| #endif |
| #endif |
| } |
| |
| /* |
| * frees the context associated to a session. It must have been removed first. |
| */ |
| static void session_free(struct session *s) { |
| if (s->pend_pos) |
| pendconn_free(s->pend_pos); |
| if (s->req) |
| pool_free(buffer, s->req); |
| if (s->rep) |
| pool_free(buffer, s->rep); |
| |
| if (s->rsp_cap != NULL) { |
| struct cap_hdr *h; |
| for (h = s->proxy->rsp_cap; h; h = h->next) { |
| if (s->rsp_cap[h->index] != NULL) |
| pool_free_to(h->pool, s->rsp_cap[h->index]); |
| } |
| pool_free_to(s->proxy->rsp_cap_pool, s->rsp_cap); |
| } |
| if (s->req_cap != NULL) { |
| struct cap_hdr *h; |
| for (h = s->proxy->req_cap; h; h = h->next) { |
| if (s->req_cap[h->index] != NULL) |
| pool_free_to(h->pool, s->req_cap[h->index]); |
| } |
| pool_free_to(s->proxy->req_cap_pool, s->req_cap); |
| } |
| |
| if (s->logs.uri) |
| pool_free(requri, s->logs.uri); |
| if (s->logs.cli_cookie) |
| pool_free(capture, s->logs.cli_cookie); |
| if (s->logs.srv_cookie) |
| pool_free(capture, s->logs.srv_cookie); |
| |
| pool_free(session, s); |
| } |
| |
| |
| /* |
| * This function recounts the number of usable active and backup servers for |
| * proxy <p>. These numbers are returned into the p->srv_act and p->srv_bck. |
| * This function also recomputes the total active and backup weights. |
| */ |
| static void recount_servers(struct proxy *px) { |
| struct server *srv; |
| |
| px->srv_act = 0; px->srv_bck = px->tot_wact = px->tot_wbck = 0; |
| for (srv = px->srv; srv != NULL; srv = srv->next) { |
| if (srv->state & SRV_RUNNING) { |
| if (srv->state & SRV_BACKUP) { |
| px->srv_bck++; |
| px->tot_wbck += srv->eweight + 1; |
| } else { |
| px->srv_act++; |
| px->tot_wact += srv->eweight + 1; |
| } |
| } |
| } |
| } |
| |
| /* This function recomputes the server map for proxy px. It |
| * relies on px->tot_wact and px->tot_wbck, so it must be |
| * called after recount_servers(). It also expects px->srv_map |
| * to be initialized to the largest value needed. |
| */ |
| static void recalc_server_map(struct proxy *px) { |
| int o, tot, flag; |
| struct server *cur, *best; |
| |
| if (px->srv_act) { |
| flag = SRV_RUNNING; |
| tot = px->tot_wact; |
| } else if (px->srv_bck) { |
| flag = SRV_RUNNING | SRV_BACKUP; |
| if (px->options & PR_O_USE_ALL_BK) |
| tot = px->tot_wbck; |
| else |
| tot = 1; /* the first server is enough */ |
| } else { |
| px->srv_map_sz = 0; |
| return; |
| } |
| |
| /* this algorithm gives priority to the first server, which means that |
| * it will respect the declaration order for equivalent weights, and |
| * that whatever the weights, the first server called will always be |
| * the first declard. This is an important asumption for the backup |
| * case, where we want the first server only. |
| */ |
| for (cur = px->srv; cur; cur = cur->next) |
| cur->wscore = 0; |
| |
| for (o = 0; o < tot; o++) { |
| int max = 0; |
| best = NULL; |
| for (cur = px->srv; cur; cur = cur->next) { |
| if ((cur->state & (SRV_RUNNING | SRV_BACKUP)) == flag) { |
| int v; |
| |
| /* If we are forced to return only one server, we don't want to |
| * go further, because we would return the wrong one due to |
| * divide overflow. |
| */ |
| if (tot == 1) { |
| best = cur; |
| break; |
| } |
| |
| cur->wscore += cur->eweight + 1; |
| v = (cur->wscore + tot) / tot; /* result between 0 and 3 */ |
| if (best == NULL || v > max) { |
| max = v; |
| best = cur; |
| } |
| } |
| } |
| px->srv_map[o] = best; |
| best->wscore -= tot; |
| } |
| px->srv_map_sz = tot; |
| } |
| |
| /* |
| * This function tries to find a running server with free connection slots for |
| * the proxy <px> following the round-robin method. |
| * If any server is found, it will be returned and px->srv_rr_idx will be updated |
| * to point to the next server. If no valid server is found, NULL is returned. |
| */ |
| static inline struct server *get_server_rr_with_conns(struct proxy *px) { |
| int newidx; |
| struct server *srv; |
| |
| if (px->srv_map_sz == 0) |
| return NULL; |
| |
| if (px->srv_rr_idx < 0 || px->srv_rr_idx >= px->srv_map_sz) |
| px->srv_rr_idx = 0; |
| newidx = px->srv_rr_idx; |
| |
| do { |
| srv = px->srv_map[newidx++]; |
| if (!srv->maxconn || srv->cur_sess < srv->maxconn) { |
| px->srv_rr_idx = newidx; |
| return srv; |
| } |
| if (newidx == px->srv_map_sz) |
| newidx = 0; |
| } while (newidx != px->srv_rr_idx); |
| |
| return NULL; |
| } |
| |
| |
| /* |
| * This function tries to find a running server for the proxy <px> following |
| * the round-robin method. |
| * If any server is found, it will be returned and px->srv_rr_idx will be updated |
| * to point to the next server. If no valid server is found, NULL is returned. |
| */ |
| static inline struct server *get_server_rr(struct proxy *px) { |
| if (px->srv_map_sz == 0) |
| return NULL; |
| |
| if (px->srv_rr_idx < 0 || px->srv_rr_idx >= px->srv_map_sz) |
| px->srv_rr_idx = 0; |
| return px->srv_map[px->srv_rr_idx++]; |
| } |
| |
| |
| /* |
| * This function tries to find a running server for the proxy <px> following |
| * the source hash method. Depending on the number of active/backup servers, |
| * it will either look for active servers, or for backup servers. |
| * If any server is found, it will be returned. If no valid server is found, |
| * NULL is returned. |
| */ |
| static inline struct server *get_server_sh(struct proxy *px, char *addr, int len) { |
| unsigned int h, l; |
| |
| if (px->srv_map_sz == 0) |
| return NULL; |
| |
| l = h = 0; |
| if (px->srv_act > 1 || (px->srv_act == 0 && px->srv_bck > 1)) { |
| while ((l + sizeof (int)) <= len) { |
| h ^= ntohl(*(unsigned int *)(&addr[l])); |
| l += sizeof (int); |
| } |
| h %= px->srv_map_sz; |
| } |
| return px->srv_map[h]; |
| } |
| |
| |
| /* |
| * This function marks the session as 'assigned' in direct or dispatch modes, |
| * or tries to assign one in balance mode, according to the algorithm. It does |
| * nothing if the session had already been assigned a server. |
| * |
| * It may return : |
| * SRV_STATUS_OK if everything is OK. |
| * SRV_STATUS_NOSRV if no server is available |
| * SRV_STATUS_FULL if all servers are saturated |
| * SRV_STATUS_INTERNAL for other unrecoverable errors. |
| * |
| * Upon successful return, the session flag SN_ASSIGNED to indicate that it does |
| * not need to be called anymore. This usually means that s->srv can be trusted |
| * in balance and direct modes. This flag is not cleared, so it's to the caller |
| * to clear it if required (eg: redispatch). |
| * |
| */ |
| |
| int assign_server(struct session *s) { |
| #ifdef DEBUG_FULL |
| fprintf(stderr,"assign_server : s=%p\n",s); |
| #endif |
| |
| if (s->pend_pos) |
| return SRV_STATUS_INTERNAL; |
| |
| if (!(s->flags & SN_ASSIGNED)) { |
| if ((s->proxy->options & PR_O_BALANCE) && !(s->flags & SN_DIRECT)) { |
| if (!s->proxy->srv_act && !s->proxy->srv_bck) |
| return SRV_STATUS_NOSRV; |
| |
| if (s->proxy->options & PR_O_BALANCE_RR) { |
| s->srv = get_server_rr_with_conns(s->proxy); |
| if (!s->srv) |
| return SRV_STATUS_FULL; |
| } |
| else if (s->proxy->options & PR_O_BALANCE_SH) { |
| int len; |
| |
| if (s->cli_addr.ss_family == AF_INET) |
| len = 4; |
| else if (s->cli_addr.ss_family == AF_INET6) |
| len = 16; |
| else /* unknown IP family */ |
| return SRV_STATUS_INTERNAL; |
| |
| s->srv = get_server_sh(s->proxy, |
| (void *)&((struct sockaddr_in *)&s->cli_addr)->sin_addr, |
| len); |
| } |
| else /* unknown balancing algorithm */ |
| return SRV_STATUS_INTERNAL; |
| } |
| s->flags |= SN_ASSIGNED; |
| } |
| return SRV_STATUS_OK; |
| } |
| |
| /* |
| * This function assigns a server address to a session, and sets SN_ADDR_SET. |
| * The address is taken from the currently assigned server, or from the |
| * dispatch or transparent address. |
| * |
| * It may return : |
| * SRV_STATUS_OK if everything is OK. |
| * SRV_STATUS_INTERNAL for other unrecoverable errors. |
| * |
| * Upon successful return, the session flag SN_ADDR_SET is set. This flag is |
| * not cleared, so it's to the caller to clear it if required. |
| * |
| */ |
| int assign_server_address(struct session *s) { |
| #ifdef DEBUG_FULL |
| fprintf(stderr,"assign_server_address : s=%p\n",s); |
| #endif |
| |
| if (s->flags & SN_DIRECT || s->proxy->options & PR_O_BALANCE) { |
| /* A server is necessarily known for this session */ |
| if (!(s->flags & SN_ASSIGNED)) |
| return SRV_STATUS_INTERNAL; |
| |
| s->srv_addr = s->srv->addr; |
| |
| /* if this server remaps proxied ports, we'll use |
| * the port the client connected to with an offset. */ |
| if (s->srv->state & SRV_MAPPORTS) { |
| struct sockaddr_in sockname; |
| socklen_t namelen = sizeof(sockname); |
| |
| if (!(s->proxy->options & PR_O_TRANSP) || |
| get_original_dst(s->cli_fd, (struct sockaddr_in *)&sockname, &namelen) == -1) |
| getsockname(s->cli_fd, (struct sockaddr *)&sockname, &namelen); |
| s->srv_addr.sin_port = htons(ntohs(s->srv_addr.sin_port) + ntohs(sockname.sin_port)); |
| } |
| } |
| else if (*(int *)&s->proxy->dispatch_addr.sin_addr) { |
| /* connect to the defined dispatch addr */ |
| s->srv_addr = s->proxy->dispatch_addr; |
| } |
| else if (s->proxy->options & PR_O_TRANSP) { |
| /* in transparent mode, use the original dest addr if no dispatch specified */ |
| socklen_t salen = sizeof(s->srv_addr); |
| |
| if (get_original_dst(s->cli_fd, &s->srv_addr, &salen) == -1) { |
| qfprintf(stderr, "Cannot get original server address.\n"); |
| return SRV_STATUS_INTERNAL; |
| } |
| } |
| |
| s->flags |= SN_ADDR_SET; |
| return SRV_STATUS_OK; |
| } |
| |
| /* This function assigns a server to session <s> if required, and can add the |
| * connection to either the assigned server's queue or to the proxy's queue. |
| * |
| * Returns : |
| * |
| * SRV_STATUS_OK if everything is OK. |
| * SRV_STATUS_NOSRV if no server is available |
| * SRV_STATUS_QUEUED if the connection has been queued. |
| * SRV_STATUS_FULL if the server(s) is/are saturated and the |
| * connection could not be queued. |
| * SRV_STATUS_INTERNAL for other unrecoverable errors. |
| * |
| */ |
| int assign_server_and_queue(struct session *s) { |
| struct pendconn *p; |
| int err; |
| |
| if (s->pend_pos) |
| return SRV_STATUS_INTERNAL; |
| |
| if (s->flags & SN_ASSIGNED) { |
| /* a server does not need to be assigned, perhaps because we're in |
| * direct mode, or in dispatch or transparent modes where the server |
| * is not needed. |
| */ |
| if (s->srv && |
| s->srv->maxconn && s->srv->cur_sess >= s->srv->maxconn) { |
| p = pendconn_add(s); |
| if (p) |
| return SRV_STATUS_QUEUED; |
| else |
| return SRV_STATUS_FULL; |
| } |
| return SRV_STATUS_OK; |
| } |
| |
| /* a server needs to be assigned */ |
| err = assign_server(s); |
| switch (err) { |
| case SRV_STATUS_OK: |
| /* in balance mode, we might have servers with connection limits */ |
| if (s->srv != NULL && |
| s->srv->maxconn && s->srv->cur_sess >= s->srv->maxconn) { |
| p = pendconn_add(s); |
| if (p) |
| return SRV_STATUS_QUEUED; |
| else |
| return SRV_STATUS_FULL; |
| } |
| return SRV_STATUS_OK; |
| |
| case SRV_STATUS_FULL: |
| /* queue this session into the proxy's queue */ |
| p = pendconn_add(s); |
| if (p) |
| return SRV_STATUS_QUEUED; |
| else |
| return SRV_STATUS_FULL; |
| |
| case SRV_STATUS_NOSRV: |
| case SRV_STATUS_INTERNAL: |
| return err; |
| default: |
| return SRV_STATUS_INTERNAL; |
| } |
| } |
| |
| |
| /* |
| * This function initiates a connection to the server assigned to this session |
| * (s->srv, s->srv_addr). It will assign a server if none is assigned yet. |
| * It can return one of : |
| * - SN_ERR_NONE if everything's OK |
| * - SN_ERR_SRVTO if there are no more servers |
| * - SN_ERR_SRVCL if the connection was refused by the server |
| * - SN_ERR_PRXCOND if the connection has been limited by the proxy (maxconn) |
| * - SN_ERR_RESOURCE if a system resource is lacking (eg: fd limits, ports, ...) |
| * - SN_ERR_INTERNAL for any other purely internal errors |
| * Additionnally, in the case of SN_ERR_RESOURCE, an emergency log will be emitted. |
| */ |
| int connect_server(struct session *s) { |
| int fd, err; |
| |
| if (!(s->flags & SN_ADDR_SET)) { |
| err = assign_server_address(s); |
| if (err != SRV_STATUS_OK) |
| return SN_ERR_INTERNAL; |
| } |
| |
| if ((fd = s->srv_fd = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP)) == -1) { |
| qfprintf(stderr, "Cannot get a server socket.\n"); |
| |
| if (errno == ENFILE) |
| send_log(s->proxy, LOG_EMERG, |
| "Proxy %s reached system FD limit at %d. Please check system tunables.\n", |
| s->proxy->id, maxfd); |
| else if (errno == EMFILE) |
| send_log(s->proxy, LOG_EMERG, |
| "Proxy %s reached process FD limit at %d. Please check 'ulimit-n' and restart.\n", |
| s->proxy->id, maxfd); |
| else if (errno == ENOBUFS || errno == ENOMEM) |
| send_log(s->proxy, LOG_EMERG, |
| "Proxy %s reached system memory limit at %d sockets. Please check system tunables.\n", |
| s->proxy->id, maxfd); |
| /* this is a resource error */ |
| return SN_ERR_RESOURCE; |
| } |
| |
| if (fd >= global.maxsock) { |
| /* do not log anything there, it's a normal condition when this option |
| * is used to serialize connections to a server ! |
| */ |
| Alert("socket(): not enough free sockets. Raise -n argument. Giving up.\n"); |
| close(fd); |
| return SN_ERR_PRXCOND; /* it is a configuration limit */ |
| } |
| |
| if ((fcntl(fd, F_SETFL, O_NONBLOCK)==-1) || |
| (setsockopt(fd, IPPROTO_TCP, TCP_NODELAY, (char *) &one, sizeof(one)) == -1)) { |
| qfprintf(stderr,"Cannot set client socket to non blocking mode.\n"); |
| close(fd); |
| return SN_ERR_INTERNAL; |
| } |
| |
| if (s->proxy->options & PR_O_TCP_SRV_KA) |
| setsockopt(fd, SOL_SOCKET, SO_KEEPALIVE, (char *) &one, sizeof(one)); |
| |
| /* allow specific binding : |
| * - server-specific at first |
| * - proxy-specific next |
| */ |
| if (s->srv != NULL && s->srv->state & SRV_BIND_SRC) { |
| setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, (char *) &one, sizeof(one)); |
| if (bind(fd, (struct sockaddr *)&s->srv->source_addr, sizeof(s->srv->source_addr)) == -1) { |
| Alert("Cannot bind to source address before connect() for server %s/%s. Aborting.\n", |
| s->proxy->id, s->srv->id); |
| close(fd); |
| send_log(s->proxy, LOG_EMERG, |
| "Cannot bind to source address before connect() for server %s/%s.\n", |
| s->proxy->id, s->srv->id); |
| return SN_ERR_RESOURCE; |
| } |
| } |
| else if (s->proxy->options & PR_O_BIND_SRC) { |
| setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, (char *) &one, sizeof(one)); |
| if (bind(fd, (struct sockaddr *)&s->proxy->source_addr, sizeof(s->proxy->source_addr)) == -1) { |
| Alert("Cannot bind to source address before connect() for proxy %s. Aborting.\n", s->proxy->id); |
| close(fd); |
| send_log(s->proxy, LOG_EMERG, |
| "Cannot bind to source address before connect() for server %s/%s.\n", |
| s->proxy->id, s->srv->id); |
| return SN_ERR_RESOURCE; |
| } |
| } |
| |
| if ((connect(fd, (struct sockaddr *)&s->srv_addr, sizeof(s->srv_addr)) == -1) && |
| (errno != EINPROGRESS) && (errno != EALREADY) && (errno != EISCONN)) { |
| |
| if (errno == EAGAIN || errno == EADDRINUSE) { |
| char *msg; |
| if (errno == EAGAIN) /* no free ports left, try again later */ |
| msg = "no free ports"; |
| else |
| msg = "local address already in use"; |
| |
| qfprintf(stderr,"Cannot connect: %s.\n",msg); |
| close(fd); |
| send_log(s->proxy, LOG_EMERG, |
| "Connect() failed for server %s/%s: %s.\n", |
| s->proxy->id, s->srv->id, msg); |
| return SN_ERR_RESOURCE; |
| } else if (errno == ETIMEDOUT) { |
| //qfprintf(stderr,"Connect(): ETIMEDOUT"); |
| close(fd); |
| return SN_ERR_SRVTO; |
| } else { |
| // (errno == ECONNREFUSED || errno == ENETUNREACH || errno == EACCES || errno == EPERM) |
| //qfprintf(stderr,"Connect(): %d", errno); |
| close(fd); |
| return SN_ERR_SRVCL; |
| } |
| } |
| |
| fdtab[fd].owner = s->task; |
| fdtab[fd].read = &event_srv_read; |
| fdtab[fd].write = &event_srv_write; |
| fdtab[fd].state = FD_STCONN; /* connection in progress */ |
| |
| FD_SET(fd, StaticWriteEvent); /* for connect status */ |
| #if defined(DEBUG_FULL) && defined(ENABLE_EPOLL) |
| if (PrevReadEvent) { |
| assert(!(FD_ISSET(fd, PrevReadEvent))); |
| assert(!(FD_ISSET(fd, PrevWriteEvent))); |
| } |
| #endif |
| |
| fd_insert(fd); |
| if (s->srv) |
| s->srv->cur_sess++; |
| |
| if (s->proxy->contimeout) |
| tv_delayfrom(&s->cnexpire, &now, s->proxy->contimeout); |
| else |
| tv_eternity(&s->cnexpire); |
| return SN_ERR_NONE; /* connection is OK */ |
| } |
| |
| /* |
| * this function is called on a read event from a client socket. |
| * It returns 0. |
| */ |
| int event_cli_read(int fd) { |
| struct task *t = fdtab[fd].owner; |
| struct session *s = t->context; |
| struct buffer *b = s->req; |
| int ret, max; |
| |
| #ifdef DEBUG_FULL |
| fprintf(stderr,"event_cli_read : fd=%d, s=%p\n", fd, s); |
| #endif |
| |
| if (fdtab[fd].state != FD_STERROR) { |
| #ifdef FILL_BUFFERS |
| while (1) |
| #else |
| do |
| #endif |
| { |
| if (b->l == 0) { /* let's realign the buffer to optimize I/O */ |
| b->r = b->w = b->h = b->lr = b->data; |
| max = b->rlim - b->data; |
| } |
| else if (b->r > b->w) { |
| max = b->rlim - b->r; |
| } |
| else { |
| max = b->w - b->r; |
| /* FIXME: theorically, if w>0, we shouldn't have rlim < data+size anymore |
| * since it means that the rewrite protection has been removed. This |
| * implies that the if statement can be removed. |
| */ |
| if (max > b->rlim - b->data) |
| max = b->rlim - b->data; |
| } |
| |
| if (max == 0) { /* not anymore room to store data */ |
| FD_CLR(fd, StaticReadEvent); |
| break; |
| } |
| |
| #ifndef MSG_NOSIGNAL |
| { |
| int skerr; |
| socklen_t lskerr = sizeof(skerr); |
| |
| getsockopt(fd, SOL_SOCKET, SO_ERROR, &skerr, &lskerr); |
| if (skerr) |
| ret = -1; |
| else |
| ret = recv(fd, b->r, max, 0); |
| } |
| #else |
| ret = recv(fd, b->r, max, MSG_NOSIGNAL); |
| #endif |
| if (ret > 0) { |
| b->r += ret; |
| b->l += ret; |
| s->res_cr = RES_DATA; |
| |
| if (b->r == b->data + BUFSIZE) { |
| b->r = b->data; /* wrap around the buffer */ |
| } |
| |
| b->total += ret; |
| /* we hope to read more data or to get a close on next round */ |
| continue; |
| } |
| else if (ret == 0) { |
| s->res_cr = RES_NULL; |
| break; |
| } |
| else if (errno == EAGAIN) {/* ignore EAGAIN */ |
| break; |
| } |
| else { |
| s->res_cr = RES_ERROR; |
| fdtab[fd].state = FD_STERROR; |
| break; |
| } |
| } /* while(1) */ |
| #ifndef FILL_BUFFERS |
| while (0); |
| #endif |
| } |
| else { |
| s->res_cr = RES_ERROR; |
| fdtab[fd].state = FD_STERROR; |
| } |
| |
| if (s->res_cr != RES_SILENT) { |
| if (s->proxy->clitimeout && FD_ISSET(fd, StaticReadEvent)) |
| tv_delayfrom(&s->crexpire, &now, s->proxy->clitimeout); |
| else |
| tv_eternity(&s->crexpire); |
| |
| task_wakeup(&rq, t); |
| } |
| |
| return 0; |
| } |
| |
| |
| /* |
| * this function is called on a read event from a server socket. |
| * It returns 0. |
| */ |
| int event_srv_read(int fd) { |
| struct task *t = fdtab[fd].owner; |
| struct session *s = t->context; |
| struct buffer *b = s->rep; |
| int ret, max; |
| |
| #ifdef DEBUG_FULL |
| fprintf(stderr,"event_srv_read : fd=%d, s=%p\n", fd, s); |
| #endif |
| |
| if (fdtab[fd].state != FD_STERROR) { |
| #ifdef FILL_BUFFERS |
| while (1) |
| #else |
| do |
| #endif |
| { |
| if (b->l == 0) { /* let's realign the buffer to optimize I/O */ |
| b->r = b->w = b->h = b->lr = b->data; |
| max = b->rlim - b->data; |
| } |
| else if (b->r > b->w) { |
| max = b->rlim - b->r; |
| } |
| else { |
| max = b->w - b->r; |
| /* FIXME: theorically, if w>0, we shouldn't have rlim < data+size anymore |
| * since it means that the rewrite protection has been removed. This |
| * implies that the if statement can be removed. |
| */ |
| if (max > b->rlim - b->data) |
| max = b->rlim - b->data; |
| } |
| |
| if (max == 0) { /* not anymore room to store data */ |
| FD_CLR(fd, StaticReadEvent); |
| break; |
| } |
| |
| #ifndef MSG_NOSIGNAL |
| { |
| int skerr; |
| socklen_t lskerr = sizeof(skerr); |
| |
| getsockopt(fd, SOL_SOCKET, SO_ERROR, &skerr, &lskerr); |
| if (skerr) |
| ret = -1; |
| else |
| ret = recv(fd, b->r, max, 0); |
| } |
| #else |
| ret = recv(fd, b->r, max, MSG_NOSIGNAL); |
| #endif |
| if (ret > 0) { |
| b->r += ret; |
| b->l += ret; |
| s->res_sr = RES_DATA; |
| |
| if (b->r == b->data + BUFSIZE) { |
| b->r = b->data; /* wrap around the buffer */ |
| } |
| |
| b->total += ret; |
| /* we hope to read more data or to get a close on next round */ |
| continue; |
| } |
| else if (ret == 0) { |
| s->res_sr = RES_NULL; |
| break; |
| } |
| else if (errno == EAGAIN) {/* ignore EAGAIN */ |
| break; |
| } |
| else { |
| s->res_sr = RES_ERROR; |
| fdtab[fd].state = FD_STERROR; |
| break; |
| } |
| } /* while(1) */ |
| #ifndef FILL_BUFFERS |
| while (0); |
| #endif |
| } |
| else { |
| s->res_sr = RES_ERROR; |
| fdtab[fd].state = FD_STERROR; |
| } |
| |
| if (s->res_sr != RES_SILENT) { |
| if (s->proxy->srvtimeout && FD_ISSET(fd, StaticReadEvent)) |
| tv_delayfrom(&s->srexpire, &now, s->proxy->srvtimeout); |
| else |
| tv_eternity(&s->srexpire); |
| |
| task_wakeup(&rq, t); |
| } |
| |
| return 0; |
| } |
| |
| /* |
| * this function is called on a write event from a client socket. |
| * It returns 0. |
| */ |
| int event_cli_write(int fd) { |
| struct task *t = fdtab[fd].owner; |
| struct session *s = t->context; |
| struct buffer *b = s->rep; |
| int ret, max; |
| |
| #ifdef DEBUG_FULL |
| fprintf(stderr,"event_cli_write : fd=%d, s=%p\n", fd, s); |
| #endif |
| |
| if (b->l == 0) { /* let's realign the buffer to optimize I/O */ |
| b->r = b->w = b->h = b->lr = b->data; |
| // max = BUFSIZE; BUG !!!! |
| max = 0; |
| } |
| else if (b->r > b->w) { |
| max = b->r - b->w; |
| } |
| else |
| max = b->data + BUFSIZE - b->w; |
| |
| if (fdtab[fd].state != FD_STERROR) { |
| if (max == 0) { |
| s->res_cw = RES_NULL; |
| task_wakeup(&rq, t); |
| tv_eternity(&s->cwexpire); |
| FD_CLR(fd, StaticWriteEvent); |
| return 0; |
| } |
| |
| #ifndef MSG_NOSIGNAL |
| { |
| int skerr; |
| socklen_t lskerr = sizeof(skerr); |
| |
| getsockopt(fd, SOL_SOCKET, SO_ERROR, &skerr, &lskerr); |
| if (skerr) |
| ret = -1; |
| else |
| ret = send(fd, b->w, max, MSG_DONTWAIT); |
| } |
| #else |
| ret = send(fd, b->w, max, MSG_DONTWAIT | MSG_NOSIGNAL); |
| #endif |
| |
| if (ret > 0) { |
| b->l -= ret; |
| b->w += ret; |
| |
| s->res_cw = RES_DATA; |
| |
| if (b->w == b->data + BUFSIZE) { |
| b->w = b->data; /* wrap around the buffer */ |
| } |
| } |
| else if (ret == 0) { |
| /* nothing written, just make as if we were never called */ |
| // s->res_cw = RES_NULL; |
| return 0; |
| } |
| else if (errno == EAGAIN) /* ignore EAGAIN */ |
| return 0; |
| else { |
| s->res_cw = RES_ERROR; |
| fdtab[fd].state = FD_STERROR; |
| } |
| } |
| else { |
| s->res_cw = RES_ERROR; |
| fdtab[fd].state = FD_STERROR; |
| } |
| |
| if (s->proxy->clitimeout) { |
| tv_delayfrom(&s->cwexpire, &now, s->proxy->clitimeout); |
| /* FIXME: to prevent the client from expiring read timeouts during writes, |
| * we refresh it. A solution would be to merge read+write timeouts into a |
| * unique one, although that needs some study particularly on full-duplex |
| * TCP connections. */ |
| s->crexpire = s->cwexpire; |
| } |
| else |
| tv_eternity(&s->cwexpire); |
| |
| task_wakeup(&rq, t); |
| return 0; |
| } |
| |
| |
| /* |
| * this function is called on a write event from a server socket. |
| * It returns 0. |
| */ |
| int event_srv_write(int fd) { |
| struct task *t = fdtab[fd].owner; |
| struct session *s = t->context; |
| struct buffer *b = s->req; |
| int ret, max; |
| |
| #ifdef DEBUG_FULL |
| fprintf(stderr,"event_srv_write : fd=%d, s=%p\n", fd, s); |
| #endif |
| |
| if (b->l == 0) { /* let's realign the buffer to optimize I/O */ |
| b->r = b->w = b->h = b->lr = b->data; |
| // max = BUFSIZE; BUG !!!! |
| max = 0; |
| } |
| else if (b->r > b->w) { |
| max = b->r - b->w; |
| } |
| else |
| max = b->data + BUFSIZE - b->w; |
| |
| if (fdtab[fd].state != FD_STERROR) { |
| if (max == 0) { |
| /* may be we have received a connection acknowledgement in TCP mode without data */ |
| if (s->srv_state == SV_STCONN) { |
| int skerr; |
| socklen_t lskerr = sizeof(skerr); |
| getsockopt(fd, SOL_SOCKET, SO_ERROR, &skerr, &lskerr); |
| if (skerr) { |
| s->res_sw = RES_ERROR; |
| fdtab[fd].state = FD_STERROR; |
| task_wakeup(&rq, t); |
| tv_eternity(&s->swexpire); |
| FD_CLR(fd, StaticWriteEvent); |
| return 0; |
| } |
| } |
| |
| s->res_sw = RES_NULL; |
| task_wakeup(&rq, t); |
| fdtab[fd].state = FD_STREADY; |
| tv_eternity(&s->swexpire); |
| FD_CLR(fd, StaticWriteEvent); |
| return 0; |
| } |
| |
| #ifndef MSG_NOSIGNAL |
| { |
| int skerr; |
| socklen_t lskerr = sizeof(skerr); |
| getsockopt(fd, SOL_SOCKET, SO_ERROR, &skerr, &lskerr); |
| if (skerr) |
| ret = -1; |
| else |
| ret = send(fd, b->w, max, MSG_DONTWAIT); |
| } |
| #else |
| ret = send(fd, b->w, max, MSG_DONTWAIT | MSG_NOSIGNAL); |
| #endif |
| fdtab[fd].state = FD_STREADY; |
| if (ret > 0) { |
| b->l -= ret; |
| b->w += ret; |
| |
| s->res_sw = RES_DATA; |
| |
| if (b->w == b->data + BUFSIZE) { |
| b->w = b->data; /* wrap around the buffer */ |
| } |
| } |
| else if (ret == 0) { |
| /* nothing written, just make as if we were never called */ |
| // s->res_sw = RES_NULL; |
| return 0; |
| } |
| else if (errno == EAGAIN) /* ignore EAGAIN */ |
| return 0; |
| else { |
| s->res_sw = RES_ERROR; |
| fdtab[fd].state = FD_STERROR; |
| } |
| } |
| else { |
| s->res_sw = RES_ERROR; |
| fdtab[fd].state = FD_STERROR; |
| } |
| |
| /* We don't want to re-arm read/write timeouts if we're trying to connect, |
| * otherwise it could loop indefinitely ! |
| */ |
| if (s->srv_state != SV_STCONN) { |
| if (s->proxy->srvtimeout) { |
| tv_delayfrom(&s->swexpire, &now, s->proxy->srvtimeout); |
| /* FIXME: to prevent the server from expiring read timeouts during writes, |
| * we refresh it. A solution would be to merge read+write+connect timeouts |
| * into a unique one since we don't mind expiring on read or write, and none |
| * of them is enabled while waiting for connect(), although that needs some |
| * study particularly on full-duplex TCP connections. */ |
| s->srexpire = s->swexpire; |
| } |
| else |
| tv_eternity(&s->swexpire); |
| } |
| |
| task_wakeup(&rq, t); |
| return 0; |
| } |
| |
| |
| /* |
| * returns a message to the client ; the connection is shut down for read, |
| * and the request is cleared so that no server connection can be initiated. |
| * The client must be in a valid state for this (HEADER, DATA ...). |
| * Nothing is performed on the server side. |
| * The reply buffer doesn't need to be empty before this. |
| */ |
| void client_retnclose(struct session *s, int len, const char *msg) { |
| FD_CLR(s->cli_fd, StaticReadEvent); |
| FD_SET(s->cli_fd, StaticWriteEvent); |
| tv_eternity(&s->crexpire); |
| tv_delayfrom(&s->cwexpire, &now, s->proxy->clitimeout); |
| shutdown(s->cli_fd, SHUT_RD); |
| s->cli_state = CL_STSHUTR; |
| strcpy(s->rep->data, msg); |
| s->rep->l = len; |
| s->rep->r = s->rep->h = s->rep->lr = s->rep->w = s->rep->data; |
| s->rep->r += len; |
| s->req->l = 0; |
| } |
| |
| |
| /* |
| * returns a message into the rep buffer, and flushes the req buffer. |
| * The reply buffer doesn't need to be empty before this. |
| */ |
| void client_return(struct session *s, int len, const char *msg) { |
| strcpy(s->rep->data, msg); |
| s->rep->l = len; |
| s->rep->r = s->rep->h = s->rep->lr = s->rep->w = s->rep->data; |
| s->rep->r += len; |
| s->req->l = 0; |
| } |
| |
| /* |
| * send a log for the session when we have enough info about it |
| */ |
| void sess_log(struct session *s) { |
| char pn[INET6_ADDRSTRLEN + strlen(":65535")]; |
| struct proxy *p = s->proxy; |
| int log; |
| char *uri; |
| char *pxid; |
| char *srv; |
| struct tm *tm; |
| |
| /* This is a first attempt at a better logging system. |
| * For now, we rely on send_log() to provide the date, although it obviously |
| * is the date of the log and not of the request, and most fields are not |
| * computed. |
| */ |
| |
| log = p->to_log & ~s->logs.logwait; |
| |
| if (s->cli_addr.ss_family == AF_INET) |
| inet_ntop(AF_INET, |
| (const void *)&((struct sockaddr_in *)&s->cli_addr)->sin_addr, |
| pn, sizeof(pn)); |
| else |
| inet_ntop(AF_INET6, |
| (const void *)&((struct sockaddr_in6 *)(&s->cli_addr))->sin6_addr, |
| pn, sizeof(pn)); |
| |
| uri = (log & LW_REQ) ? s->logs.uri ? s->logs.uri : "<BADREQ>" : ""; |
| pxid = p->id; |
| srv = (p->to_log & LW_SVID) ? (s->srv != NULL) ? s->srv->id : "<NOSRV>" : "-"; |
| |
| tm = localtime(&s->logs.tv_accept.tv_sec); |
| if (p->to_log & LW_REQ) { |
| char tmpline[MAX_SYSLOG_LEN], *h; |
| int hdr; |
| |
| h = tmpline; |
| if (p->to_log & LW_REQHDR && (h < tmpline + sizeof(tmpline) - 10)) { |
| *(h++) = ' '; |
| *(h++) = '{'; |
| for (hdr = 0; hdr < p->nb_req_cap; hdr++) { |
| if (hdr) |
| *(h++) = '|'; |
| if (s->req_cap[hdr] != NULL) |
| h = encode_string(h, tmpline + sizeof(tmpline) - 7, '#', hdr_encode_map, s->req_cap[hdr]); |
| } |
| *(h++) = '}'; |
| } |
| |
| if (p->to_log & LW_RSPHDR && (h < tmpline + sizeof(tmpline) - 7)) { |
| *(h++) = ' '; |
| *(h++) = '{'; |
| for (hdr = 0; hdr < p->nb_rsp_cap; hdr++) { |
| if (hdr) |
| *(h++) = '|'; |
| if (s->rsp_cap[hdr] != NULL) |
| h = encode_string(h, tmpline + sizeof(tmpline) - 4, '#', hdr_encode_map, s->rsp_cap[hdr]); |
| } |
| *(h++) = '}'; |
| } |
| |
| if (h < tmpline + sizeof(tmpline) - 4) { |
| *(h++) = ' '; |
| *(h++) = '"'; |
| h = encode_string(h, tmpline + sizeof(tmpline) - 1, '#', url_encode_map, uri); |
| *(h++) = '"'; |
| } |
| *h = '\0'; |
| |
| send_log(p, LOG_INFO, "%s:%d [%02d/%s/%04d:%02d:%02d:%02d] %s %s %d/%d/%d/%d/%s%d %d %s%lld %s %s %c%c%c%c %d/%d/%d/%d%s\n", |
| pn, |
| (s->cli_addr.ss_family == AF_INET) ? |
| ntohs(((struct sockaddr_in *)&s->cli_addr)->sin_port) : |
| ntohs(((struct sockaddr_in6 *)&s->cli_addr)->sin6_port), |
| tm->tm_mday, monthname[tm->tm_mon], tm->tm_year+1900, |
| tm->tm_hour, tm->tm_min, tm->tm_sec, |
| pxid, srv, |
| s->logs.t_request, |
| (s->logs.t_queue >= 0) ? s->logs.t_queue - s->logs.t_request
|