blob: 426786e33dc22e3e72830d18daf6fb222bd013b6 [file] [log] [blame]
/*
* HA-Proxy : High Availability-enabled HTTP/TCP proxy
* 2000-2004 - Willy Tarreau - willy AT meta-x DOT org.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*
* Please refer to RFC2068 or RFC2616 for informations about HTTP protocol, and
* RFC2965 for informations about cookies usage. More generally, the IETF HTTP
* Working Group's web site should be consulted for protocol related changes :
*
* http://ftp.ics.uci.edu/pub/ietf/http/
*
* Pending bugs (may be not fixed because never reproduced) :
* - solaris only : sometimes, an HTTP proxy with only a dispatch address causes
* the proxy to terminate (no core) if the client breaks the connection during
* the response. Seen on 1.1.8pre4, but never reproduced. May not be related to
* the snprintf() bug since requests were simple (GET / HTTP/1.0), but may be
* related to missing setsid() (fixed in 1.1.15)
* - a proxy with an invalid config will prevent the startup even if disabled.
*
* ChangeLog has moved to the CHANGELOG file.
*
* TODO:
* - handle properly intermediate incomplete server headers. Done ?
* - handle hot-reconfiguration
* - fix client/server state transition when server is in connect or headers state
* and client suddenly disconnects. The server *should* switch to SHUT_WR, but
* still handle HTTP headers.
* - remove MAX_NEWHDR
*
*/
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <string.h>
#include <ctype.h>
#include <sys/time.h>
#include <sys/types.h>
#include <sys/socket.h>
#include <netinet/tcp.h>
#include <netinet/in.h>
#include <arpa/inet.h>
#include <netdb.h>
#include <fcntl.h>
#include <errno.h>
#include <signal.h>
#include <stdarg.h>
#include <sys/resource.h>
#include <time.h>
#include <regex.h>
#include <syslog.h>
#if defined(TPROXY) && defined(NETFILTER)
#include <linux/netfilter_ipv4.h>
#endif
#define HAPROXY_VERSION "1.2.2"
#define HAPROXY_DATE "2004/10/18"
/* this is for libc5 for example */
#ifndef TCP_NODELAY
#define TCP_NODELAY 1
#endif
#ifndef SHUT_RD
#define SHUT_RD 0
#endif
#ifndef SHUT_WR
#define SHUT_WR 1
#endif
#define BUFSIZE 8192
// reserved buffer space for header rewriting
#define MAXREWRITE 4096
#define REQURI_LEN 1024
#define CAPTURE_LEN 64
// max # args on a configuration line
#define MAX_LINE_ARGS 40
// max # of added headers per request
#define MAX_NEWHDR 10
// max # of matches per regexp
#define MAX_MATCH 10
/* FIXME: serverid_len and cookiename_len are no longer checked in configuration file */
#define COOKIENAME_LEN 16
#define SERVERID_LEN 16
#define CONN_RETRIES 3
#define CHK_CONNTIME 2000
#define DEF_CHKINTR 2000
#define DEF_FALLTIME 3
#define DEF_RISETIME 2
#define DEF_CHECK_REQ "OPTIONS / HTTP/1.0\r\n\r\n"
/* default connections limit */
#define DEFAULT_MAXCONN 2000
/* how many bits are needed to code the size of an int (eg: 32bits -> 5) */
#define INTBITS 5
/* show stats this every millisecond, 0 to disable */
#ifndef STATTIME
#define STATTIME 2000
#endif
/* this reduces the number of calls to select() by choosing appropriate
* sheduler precision in milliseconds. It should be near the minimum
* time that is needed by select() to collect all events. All timeouts
* are rounded up by adding this value prior to pass it to select().
*/
#define SCHEDULER_RESOLUTION 9
#define MINTIME(old, new) (((new)<0)?(old):(((old)<0||(new)<(old))?(new):(old)))
#define SETNOW(a) (*a=now)
/****** string-specific macros and functions ******/
/* if a > max, then bound <a> to <max>. The macro returns the new <a> */
#define UBOUND(a, max) ({ typeof(a) b = (max); if ((a) > b) (a) = b; (a); })
/* if a < min, then bound <a> to <min>. The macro returns the new <a> */
#define LBOUND(a, min) ({ typeof(a) b = (min); if ((a) < b) (a) = b; (a); })
/*
* copies at most <size-1> chars from <src> to <dst>. Last char is always
* set to 0, unless <size> is 0. The number of chars copied is returned
* (excluding the terminating zero).
* This code has been optimized for size and speed : on x86, it's 45 bytes
* long, uses only registers, and consumes only 4 cycles per char.
*/
int strlcpy2(char *dst, const char *src, int size) {
char *orig = dst;
if (size) {
while (--size && (*dst = *src)) {
src++; dst++;
}
*dst = 0;
}
return dst - orig;
}
/*
* Returns a pointer to an area of <__len> bytes taken from the pool <pool> or
* dynamically allocated. In the first case, <__pool> is updated to point to
* the next element in the list.
*/
#define pool_alloc_from(__pool, __len) ({ \
void *__p; \
if ((__p = (__pool)) == NULL) \
__p = malloc(((__len) >= sizeof (void *)) ? (__len) : sizeof(void *)); \
else { \
__pool = *(void **)(__pool); \
} \
__p; \
})
/*
* Puts a memory area back to the corresponding pool.
* Items are chained directly through a pointer that
* is written in the beginning of the memory area, so
* there's no need for any carrier cell. This implies
* that each memory area is at least as big as one
* pointer.
*/
#define pool_free_to(__pool, __ptr) ({ \
*(void **)(__ptr) = (void *)(__pool); \
__pool = (void *)(__ptr); \
})
#define MEM_OPTIM
#ifdef MEM_OPTIM
/*
* Returns a pointer to type <type> taken from the
* pool <pool_type> or dynamically allocated. In the
* first case, <pool_type> is updated to point to the
* next element in the list.
*/
#define pool_alloc(type) ({ \
void *__p; \
if ((__p = pool_##type) == NULL) \
__p = malloc(sizeof_##type); \
else { \
pool_##type = *(void **)pool_##type; \
} \
__p; \
})
/*
* Puts a memory area back to the corresponding pool.
* Items are chained directly through a pointer that
* is written in the beginning of the memory area, so
* there's no need for any carrier cell. This implies
* that each memory area is at least as big as one
* pointer.
*/
#define pool_free(type, ptr) ({ \
*(void **)ptr = (void *)pool_##type; \
pool_##type = (void *)ptr; \
})
#else
#define pool_alloc(type) (calloc(1,sizeof_##type));
#define pool_free(type, ptr) (free(ptr));
#endif /* MEM_OPTIM */
#define sizeof_task sizeof(struct task)
#define sizeof_session sizeof(struct session)
#define sizeof_buffer sizeof(struct buffer)
#define sizeof_fdtab sizeof(struct fdtab)
#define sizeof_requri REQURI_LEN
#define sizeof_capture CAPTURE_LEN
/* different possible states for the sockets */
#define FD_STCLOSE 0
#define FD_STLISTEN 1
#define FD_STCONN 2
#define FD_STREADY 3
#define FD_STERROR 4
/* values for task->state */
#define TASK_IDLE 0
#define TASK_RUNNING 1
/* values for proxy->state */
#define PR_STNEW 0
#define PR_STIDLE 1
#define PR_STRUN 2
#define PR_STDISABLED 3
/* values for proxy->mode */
#define PR_MODE_TCP 0
#define PR_MODE_HTTP 1
#define PR_MODE_HEALTH 2
/* bits for proxy->options */
#define PR_O_REDISP 1 /* allow reconnection to dispatch in case of errors */
#define PR_O_TRANSP 2 /* transparent mode : use original DEST as dispatch */
#define PR_O_COOK_RW 4 /* rewrite all direct cookies with the right serverid */
#define PR_O_COOK_IND 8 /* keep only indirect cookies */
#define PR_O_COOK_INS 16 /* insert cookies when not accessing a server directly */
#define PR_O_COOK_ANY (PR_O_COOK_RW | PR_O_COOK_IND | PR_O_COOK_INS)
#define PR_O_BALANCE_RR 32 /* balance in round-robin mode */
#define PR_O_BALANCE (PR_O_BALANCE_RR)
#define PR_O_KEEPALIVE 64 /* follow keep-alive sessions */
#define PR_O_FWDFOR 128 /* insert x-forwarded-for with client address */
#define PR_O_BIND_SRC 256 /* bind to a specific source address when connect()ing */
#define PR_O_NULLNOLOG 512 /* a connect without request will not be logged */
#define PR_O_COOK_NOC 1024 /* add a 'Cache-control' header with the cookie */
#define PR_O_COOK_POST 2048 /* don't insert cookies for requests other than a POST */
#define PR_O_HTTP_CHK 4096 /* use HTTP 'OPTIONS' method to check server health */
#define PR_O_PERSIST 8192 /* server persistence stays effective even when server is down */
#define PR_O_LOGASAP 16384 /* log as soon as possible, without waiting for the session to complete */
#define PR_O_HTTP_CLOSE 32768 /* force 'connection: close' in both directions */
#define PR_O_CHK_CACHE 65536 /* require examination of cacheability of the 'set-cookie' field */
/* various session flags */
#define SN_DIRECT 0x00000001 /* connection made on the server matching the client cookie */
#define SN_CLDENY 0x00000002 /* a client header matches a deny regex */
#define SN_CLALLOW 0x00000004 /* a client header matches an allow regex */
#define SN_SVDENY 0x00000008 /* a server header matches a deny regex */
#define SN_SVALLOW 0x00000010 /* a server header matches an allow regex */
#define SN_POST 0x00000020 /* the request was an HTTP POST */
#define SN_CK_NONE 0x00000000 /* this session had no cookie */
#define SN_CK_INVALID 0x00000040 /* this session had a cookie which matches no server */
#define SN_CK_DOWN 0x00000080 /* this session had cookie matching a down server */
#define SN_CK_VALID 0x000000C0 /* this session had cookie matching a valid server */
#define SN_CK_MASK 0x000000C0 /* mask to get this session's cookie flags */
#define SN_CK_SHIFT 6 /* bit shift */
#define SN_ERR_CLITO 0x00000100 /* client time-out */
#define SN_ERR_CLICL 0x00000200 /* client closed (read/write error) */
#define SN_ERR_SRVTO 0x00000300 /* server time-out, connect time-out */
#define SN_ERR_SRVCL 0x00000400 /* server closed (connect/read/write error) */
#define SN_ERR_PRXCOND 0x00000500 /* the proxy decided to close (deny...) */
#define SN_ERR_MASK 0x00000700 /* mask to get only session error flags */
#define SN_ERR_SHIFT 8 /* bit shift */
#define SN_FINST_R 0x00001000 /* session ended during client request */
#define SN_FINST_C 0x00002000 /* session ended during server connect */
#define SN_FINST_H 0x00003000 /* session ended during server headers */
#define SN_FINST_D 0x00004000 /* session ended during data phase */
#define SN_FINST_L 0x00005000 /* session ended while pushing last data to client */
#define SN_FINST_MASK 0x00007000 /* mask to get only final session state flags */
#define SN_FINST_SHIFT 12 /* bit shift */
#define SN_SCK_NONE 0x00000000 /* no set-cookie seen for the server cookie */
#define SN_SCK_DELETED 0x00010000 /* existing set-cookie deleted or changed */
#define SN_SCK_INSERTED 0x00020000 /* new set-cookie inserted or changed existing one */
#define SN_SCK_SEEN 0x00040000 /* set-cookie seen for the server cookie */
#define SN_SCK_MASK 0x00070000 /* mask to get the set-cookie field */
#define SN_SCK_ANY 0x00080000 /* at least one set-cookie seen (not to be counted) */
#define SN_SCK_SHIFT 16 /* bit shift */
#define SN_CACHEABLE 0x00100000 /* at least part of the response is cacheable */
#define SN_CACHE_COOK 0x00200000 /* a cookie in the response is cacheable */
#define SN_CACHE_SHIFT 20 /* bit shift */
/* different possible states for the client side */
#define CL_STHEADERS 0
#define CL_STDATA 1
#define CL_STSHUTR 2
#define CL_STSHUTW 3
#define CL_STCLOSE 4
/* different possible states for the server side */
#define SV_STIDLE 0
#define SV_STCONN 1
#define SV_STHEADERS 2
#define SV_STDATA 3
#define SV_STSHUTR 4
#define SV_STSHUTW 5
#define SV_STCLOSE 6
/* result of an I/O event */
#define RES_SILENT 0 /* didn't happen */
#define RES_DATA 1 /* data were sent or received */
#define RES_NULL 2 /* result is 0 (read == 0), or connect without need for writing */
#define RES_ERROR 3 /* result -1 or error on the socket (eg: connect()) */
/* modes of operation (global.mode) */
#define MODE_DEBUG 1
#define MODE_STATS 2
#define MODE_LOG 4
#define MODE_DAEMON 8
#define MODE_QUIET 16
#define MODE_CHECK 32
#define MODE_VERBOSE 64
/* server flags */
#define SRV_RUNNING 1 /* the server is UP */
#define SRV_BACKUP 2 /* this server is a backup server */
#define SRV_MAPPORTS 4 /* this server uses mapped ports */
/* what to do when a header matches a regex */
#define ACT_ALLOW 0 /* allow the request */
#define ACT_REPLACE 1 /* replace the matching header */
#define ACT_REMOVE 2 /* remove the matching header */
#define ACT_DENY 3 /* deny the request */
#define ACT_PASS 4 /* pass this header without allowing or denying the request */
/* configuration sections */
#define CFG_NONE 0
#define CFG_GLOBAL 1
#define CFG_LISTEN 2
/* fields that need to be logged. They appear as flags in session->logs.logwait */
#define LW_DATE 1 /* date */
#define LW_CLIP 2 /* CLient IP */
#define LW_SVIP 4 /* SerVer IP */
#define LW_SVID 8 /* server ID */
#define LW_REQ 16 /* http REQuest */
#define LW_RESP 32 /* http RESPonse */
#define LW_PXIP 64 /* proxy IP */
#define LW_PXID 128 /* proxy ID */
#define LW_BYTES 256 /* bytes read from server */
#define LW_COOKIE 512 /* captured cookie */
#define LW_REQHDR 1024 /* request header(s) */
#define LW_RSPHDR 2048 /* response header(s) */
/*********************************************************************/
#define LIST_HEAD(a) ((void *)(&(a)))
/*********************************************************************/
struct cap_hdr {
struct cap_hdr *next;
char *name; /* header name, case insensitive */
int namelen; /* length of the header name, to speed-up lookups */
int len; /* capture length, not including terminal zero */
int index; /* index in the output array */
void *pool; /* pool of pre-allocated memory area of (len+1) bytes */
};
struct hdr_exp {
struct hdr_exp *next;
regex_t *preg; /* expression to look for */
int action; /* ACT_ALLOW, ACT_REPLACE, ACT_REMOVE, ACT_DENY */
char *replace; /* expression to set instead */
};
struct buffer {
unsigned int l; /* data length */
char *r, *w, *h, *lr; /* read ptr, write ptr, last header ptr, last read */
char *rlim; /* read limit, used for header rewriting */
unsigned long long total; /* total data read */
char data[BUFSIZE];
};
struct server {
struct server *next;
int state; /* server state (SRV_*) */
int cklen; /* the len of the cookie, to speed up checks */
char *cookie; /* the id set in the cookie */
char *id; /* just for identification */
struct sockaddr_in addr; /* the address to connect to */
short check_port; /* the port to use for the health checks */
int health; /* 0->rise-1 = bad; rise->rise+fall-1 = good */
int rise, fall; /* time in iterations */
int inter; /* time in milliseconds */
int result; /* 0 = connect OK, -1 = connect KO */
int curfd; /* file desc used for current test, or -1 if not in test */
struct proxy *proxy; /* the proxy this server belongs to */
};
/* The base for all tasks */
struct task {
struct task *next, *prev; /* chaining ... */
struct task *rqnext; /* chaining in run queue ... */
struct task *wq; /* the wait queue this task is in */
int state; /* task state : IDLE or RUNNING */
struct timeval expire; /* next expiration time for this task, use only for fast sorting */
int (*process)(struct task *t); /* the function which processes the task */
void *context; /* the task's context */
};
/* WARNING: if new fields are added, they must be initialized in event_accept() */
struct session {
struct task *task; /* the task associated with this session */
/* application specific below */
struct timeval crexpire; /* expiration date for a client read */
struct timeval cwexpire; /* expiration date for a client write */
struct timeval srexpire; /* expiration date for a server read */
struct timeval swexpire; /* expiration date for a server write */
struct timeval cnexpire; /* expiration date for a connect */
char res_cr, res_cw, res_sr, res_sw;/* results of some events */
struct proxy *proxy; /* the proxy this socket belongs to */
int cli_fd; /* the client side fd */
int srv_fd; /* the server side fd */
int cli_state; /* state of the client side */
int srv_state; /* state of the server side */
int conn_retries; /* number of connect retries left */
int flags; /* some flags describing the session */
struct buffer *req; /* request buffer */
struct buffer *rep; /* response buffer */
struct sockaddr_storage cli_addr; /* the client address */
struct sockaddr_in srv_addr; /* the address to connect to */
struct server *srv; /* the server being used */
char **req_cap; /* array of captured request headers (may be NULL) */
char **rsp_cap; /* array of captured response headers (may be NULL) */
struct {
int logwait; /* log fields waiting to be collected : LW_* */
struct timeval tv_accept; /* date of the accept() (beginning of the session) */
long t_request; /* delay before the end of the request arrives, -1 if never occurs */
long t_connect; /* delay before the connect() to the server succeeds, -1 if never occurs */
long t_data; /* delay before the first data byte from the server ... */
unsigned long t_close; /* total session duration */
char *uri; /* first line if log needed, NULL otherwise */
char *cli_cookie; /* cookie presented by the client, in capture mode */
char *srv_cookie; /* cookie presented by the server, in capture mode */
int status; /* HTTP status from the server, negative if from proxy */
long long bytes; /* number of bytes transferred from the server */
} logs;
unsigned int uniq_id; /* unique ID used for the traces */
};
struct listener {
int fd; /* the listen socket */
struct sockaddr_storage addr; /* the address we listen to */
struct listener *next; /* next address or NULL */
};
struct proxy {
struct listener *listen; /* the listen addresses and sockets */
int state; /* proxy state */
struct sockaddr_in dispatch_addr; /* the default address to connect to */
struct server *srv, *cursrv; /* known servers, current server */
int nbservers; /* # of servers */
char *cookie_name; /* name of the cookie to look for */
int cookie_len; /* strlen(cookie_len), computed only once */
char *capture_name; /* beginning of the name of the cookie to capture */
int capture_namelen; /* length of the cookie name to match */
int capture_len; /* length of the string to be captured */
int clitimeout; /* client I/O timeout (in milliseconds) */
int srvtimeout; /* server I/O timeout (in milliseconds) */
int contimeout; /* connect timeout (in milliseconds) */
char *id; /* proxy id */
int nbconn; /* # of active sessions */
int maxconn; /* max # of active sessions */
int conn_retries; /* maximum number of connect retries */
int options; /* PR_O_REDISP, PR_O_TRANSP */
int mode; /* mode = PR_MODE_TCP, PR_MODE_HTTP or PR_MODE_HEALTH */
struct sockaddr_in source_addr; /* the address to which we want to bind for connect() */
struct proxy *next;
struct sockaddr_in logsrv1, logsrv2; /* 2 syslog servers */
char logfac1, logfac2; /* log facility for both servers. -1 = disabled */
int loglev1, loglev2; /* log level for each server, 7 by default */
int to_log; /* things to be logged (LW_*) */
struct timeval stop_time; /* date to stop listening, when stopping != 0 */
int nb_reqadd, nb_rspadd;
struct hdr_exp *req_exp; /* regular expressions for request headers */
struct hdr_exp *rsp_exp; /* regular expressions for response headers */
int nb_req_cap, nb_rsp_cap; /* # of headers to be captured */
struct cap_hdr *req_cap; /* chained list of request headers to be captured */
struct cap_hdr *rsp_cap; /* chained list of response headers to be captured */
void *req_cap_pool, *rsp_cap_pool; /* pools of pre-allocated char ** used to build the sessions */
char *req_add[MAX_NEWHDR], *rsp_add[MAX_NEWHDR]; /* headers to be added */
int grace; /* grace time after stop request */
char *check_req; /* HTTP request to use if PR_O_HTTP_CHK is set, else NULL */
int check_len; /* Length of the HTTP request */
struct {
char *msg400; /* message for error 400 */
int len400; /* message length for error 400 */
char *msg403; /* message for error 403 */
int len403; /* message length for error 403 */
char *msg408; /* message for error 408 */
int len408; /* message length for error 408 */
char *msg500; /* message for error 500 */
int len500; /* message length for error 500 */
char *msg502; /* message for error 502 */
int len502; /* message length for error 502 */
char *msg503; /* message for error 503 */
int len503; /* message length for error 503 */
char *msg504; /* message for error 504 */
int len504; /* message length for error 504 */
} errmsg;
};
/* info about one given fd */
struct fdtab {
int (*read)(int fd); /* read function */
int (*write)(int fd); /* write function */
struct task *owner; /* the session (or proxy) associated with this fd */
int state; /* the state of this fd */
};
/*********************************************************************/
int cfg_maxpconn = 2000; /* # of simultaneous connections per proxy (-N) */
char *cfg_cfgfile = NULL; /* configuration file */
char *progname = NULL; /* program name */
int pid; /* current process id */
/* global options */
static struct {
int uid;
int gid;
int nbproc;
int maxconn;
int maxsock; /* max # of sockets */
int mode;
char *chroot;
char *pidfile;
int logfac1, logfac2;
int loglev1, loglev2;
struct sockaddr_in logsrv1, logsrv2;
} global = {
logfac1 : -1,
logfac2 : -1,
loglev1 : 7, /* max syslog level : debug */
loglev2 : 7,
/* others NULL OK */
};
/*********************************************************************/
fd_set *ReadEvent,
*WriteEvent,
*StaticReadEvent,
*StaticWriteEvent;
void **pool_session = NULL,
**pool_buffer = NULL,
**pool_fdtab = NULL,
**pool_requri = NULL,
**pool_task = NULL,
**pool_capture = NULL;
struct proxy *proxy = NULL; /* list of all existing proxies */
struct fdtab *fdtab = NULL; /* array of all the file descriptors */
struct task *rq = NULL; /* global run queue */
struct task wait_queue = { /* global wait queue */
prev:LIST_HEAD(wait_queue),
next:LIST_HEAD(wait_queue)
};
static int totalconn = 0; /* total # of terminated sessions */
static int actconn = 0; /* # of active sessions */
static int maxfd = 0; /* # of the highest fd + 1 */
static int listeners = 0; /* # of listeners */
static int stopping = 0; /* non zero means stopping in progress */
static struct timeval now = {0,0}; /* the current date at any moment */
static struct proxy defproxy; /* fake proxy used to assign default values on all instances */
static regmatch_t pmatch[MAX_MATCH]; /* rm_so, rm_eo for regular expressions */
/* this is used to drain data, and as a temporary buffer for sprintf()... */
static char trash[BUFSIZE];
const int zero = 0;
const int one = 1;
/*
* Syslog facilities and levels. Conforming to RFC3164.
*/
#define MAX_SYSLOG_LEN 1024
#define NB_LOG_FACILITIES 24
const char *log_facilities[NB_LOG_FACILITIES] = {
"kern", "user", "mail", "daemon",
"auth", "syslog", "lpr", "news",
"uucp", "cron", "auth2", "ftp",
"ntp", "audit", "alert", "cron2",
"local0", "local1", "local2", "local3",
"local4", "local5", "local6", "local7"
};
#define NB_LOG_LEVELS 8
const char *log_levels[NB_LOG_LEVELS] = {
"emerg", "alert", "crit", "err",
"warning", "notice", "info", "debug"
};
#define SYSLOG_PORT 514
const char *monthname[12] = {"Jan", "Feb", "Mar", "Apr", "May", "Jun",
"Jul", "Aug", "Sep", "Oct", "Nov", "Dec" };
const char sess_term_cond[8] = "-cCsSP67"; /* normal, CliTo, CliErr, SrvTo, SrvErr, PxErr, unknown */
const char sess_fin_state[8] = "-RCHDL67"; /* cliRequest, srvConnect, srvHeader, Data, Last, unknown */
const char sess_cookie[4] = "NIDV"; /* No cookie, Invalid cookie, cookie for a Down server, Valid cookie */
const char sess_set_cookie[8] = "N1I3PD5R"; /* No set-cookie, unknown, Set-Cookie Inserted, unknown,
Set-cookie seen and left unchanged (passive), Set-cookie Deleted,
unknown, Set-cookie Rewritten */
#define MAX_HOSTNAME_LEN 32
static char hostname[MAX_HOSTNAME_LEN] = "";
const char *HTTP_302 =
"HTTP/1.0 302 Found\r\n"
"Cache-Control: no-cache\r\n"
"Connection: close\r\n"
"Location: "; /* not terminated since it will be concatenated with the URL */
const char *HTTP_400 =
"HTTP/1.0 400 Bad request\r\n"
"Cache-Control: no-cache\r\n"
"Connection: close\r\n"
"\r\n"
"<html><body><h1>400 Bad request</h1>\nYour browser sent an invalid request.\n</body></html>\n";
const char *HTTP_403 =
"HTTP/1.0 403 Forbidden\r\n"
"Cache-Control: no-cache\r\n"
"Connection: close\r\n"
"\r\n"
"<html><body><h1>403 Forbidden</h1>\nRequest forbidden by administrative rules.\n</body></html>\n";
const char *HTTP_408 =
"HTTP/1.0 408 Request Time-out\r\n"
"Cache-Control: no-cache\r\n"
"Connection: close\r\n"
"\r\n"
"<html><body><h1>408 Request Time-out</h1>\nYour browser didn't send a complete request in time.\n</body></html>\n";
const char *HTTP_500 =
"HTTP/1.0 500 Server Error\r\n"
"Cache-Control: no-cache\r\n"
"Connection: close\r\n"
"\r\n"
"<html><body><h1>500 Server Error</h1>\nAn internal server error occured.\n</body></html>\n";
const char *HTTP_502 =
"HTTP/1.0 502 Bad Gateway\r\n"
"Cache-Control: no-cache\r\n"
"Connection: close\r\n"
"\r\n"
"<html><body><h1>502 Bad Gateway</h1>\nThe server returned an invalid or incomplete response.\n</body></html>\n";
const char *HTTP_503 =
"HTTP/1.0 503 Service Unavailable\r\n"
"Cache-Control: no-cache\r\n"
"Connection: close\r\n"
"\r\n"
"<html><body><h1>503 Service Unavailable</h1>\nNo server is available to handle this request.\n</body></html>\n";
const char *HTTP_504 =
"HTTP/1.0 504 Gateway Time-out\r\n"
"Cache-Control: no-cache\r\n"
"Connection: close\r\n"
"\r\n"
"<html><body><h1>504 Gateway Time-out</h1>\nThe server didn't respond in time.\n</body></html>\n";
/*********************************************************************/
/* statistics ******************************************************/
/*********************************************************************/
#if STATTIME > 0
static int stats_tsk_lsrch, stats_tsk_rsrch,
stats_tsk_good, stats_tsk_right, stats_tsk_left,
stats_tsk_new, stats_tsk_nsrch;
#endif
/*********************************************************************/
/* debugging *******************************************************/
/*********************************************************************/
#ifdef DEBUG_FULL
static char *cli_stnames[5] = {"HDR", "DAT", "SHR", "SHW", "CLS" };
static char *srv_stnames[7] = {"IDL", "CON", "HDR", "DAT", "SHR", "SHW", "CLS" };
#endif
/*********************************************************************/
/* function prototypes *********************************************/
/*********************************************************************/
int event_accept(int fd);
int event_cli_read(int fd);
int event_cli_write(int fd);
int event_srv_read(int fd);
int event_srv_write(int fd);
int process_session(struct task *t);
/*********************************************************************/
/* general purpose functions ***************************************/
/*********************************************************************/
void display_version() {
printf("HA-Proxy version " HAPROXY_VERSION " " HAPROXY_DATE"\n");
printf("Copyright 2000-2004 Willy Tarreau <w@w.ods.org>\n\n");
}
/*
* This function prints the command line usage and exits
*/
void usage(char *name) {
display_version();
fprintf(stderr,
"Usage : %s -f <cfgfile> [ -vdV"
#if STATTIME > 0
"sl"
#endif
"D ] [ -n <maxconn> ] [ -N <maxpconn> ] [ -p <pidfile> ]\n"
" -v displays version\n"
" -d enters debug mode\n"
" -V enters verbose mode (disables quiet mode)\n"
#if STATTIME > 0
" -s enables statistics output\n"
" -l enables long statistics format\n"
#endif
" -D goes daemon ; implies -q\n"
" -q quiet mode : don't display messages\n"
" -c check mode : only check config file and exit\n"
" -n sets the maximum total # of connections (%d)\n"
" -N sets the default, per-proxy maximum # of connections (%d)\n"
" -p writes pids of all children to this file\n\n",
name, DEFAULT_MAXCONN, cfg_maxpconn);
exit(1);
}
/*
* Displays the message on stderr with the date and pid.
*/
void Alert(char *fmt, ...) {
va_list argp;
struct timeval tv;
struct tm *tm;
if (!(global.mode & MODE_QUIET) || (global.mode & MODE_VERBOSE)) {
va_start(argp, fmt);
gettimeofday(&tv, NULL);
tm=localtime(&tv.tv_sec);
fprintf(stderr, "[ALERT] %03d/%02d%02d%02d (%d) : ",
tm->tm_yday, tm->tm_hour, tm->tm_min, tm->tm_sec, (int)getpid());
vfprintf(stderr, fmt, argp);
fflush(stderr);
va_end(argp);
}
}
/*
* Displays the message on stderr with the date and pid.
*/
void Warning(char *fmt, ...) {
va_list argp;
struct timeval tv;
struct tm *tm;
if (!(global.mode & MODE_QUIET) || (global.mode & MODE_VERBOSE)) {
va_start(argp, fmt);
gettimeofday(&tv, NULL);
tm=localtime(&tv.tv_sec);
fprintf(stderr, "[WARNING] %03d/%02d%02d%02d (%d) : ",
tm->tm_yday, tm->tm_hour, tm->tm_min, tm->tm_sec, (int)getpid());
vfprintf(stderr, fmt, argp);
fflush(stderr);
va_end(argp);
}
}
/*
* Displays the message on <out> only if quiet mode is not set.
*/
void qfprintf(FILE *out, char *fmt, ...) {
va_list argp;
if (!(global.mode & MODE_QUIET) || (global.mode & MODE_VERBOSE)) {
va_start(argp, fmt);
vfprintf(out, fmt, argp);
fflush(out);
va_end(argp);
}
}
/*
* converts <str> to a struct sockaddr_in* which is locally allocated.
* The format is "addr:port", where "addr" can be empty or "*" to indicate
* INADDR_ANY.
*/
struct sockaddr_in *str2sa(char *str) {
static struct sockaddr_in sa;
char *c;
int port;
memset(&sa, 0, sizeof(sa));
str=strdup(str);
if ((c=strrchr(str,':')) != NULL) {
*c++=0;
port=atol(c);
}
else
port=0;
if (*str == '*' || *str == '\0') { /* INADDR_ANY */
sa.sin_addr.s_addr = INADDR_ANY;
}
else if (!inet_pton(AF_INET, str, &sa.sin_addr)) {
struct hostent *he;
if ((he = gethostbyname(str)) == NULL) {
Alert("Invalid server name: '%s'\n", str);
}
else
sa.sin_addr = *(struct in_addr *) *(he->h_addr_list);
}
sa.sin_port=htons(port);
sa.sin_family=AF_INET;
free(str);
return &sa;
}
/*
* converts <str> to a list of listeners which are dynamically allocated.
* The format is "{addr|'*'}:port[-end][,{addr|'*'}:port[-end]]*", where :
* - <addr> can be empty or "*" to indicate INADDR_ANY ;
* - <port> is a numerical port from 1 to 65535 ;
* - <end> indicates to use the range from <port> to <end> instead (inclusive).
* This can be repeated as many times as necessary, separated by a coma.
* The <tail> argument is a pointer to a current list which should be appended
* to the tail of the new list. The pointer to the new list is returned.
*/
struct listener *str2listener(char *str, struct listener *tail) {
struct listener *l;
char *c, *next, *range, *dupstr;
int port, end;
next = dupstr = strdup(str);
while (next && *next) {
struct sockaddr_storage ss;
str = next;
/* 1) look for the end of the first address */
if ((next = strrchr(str, ',')) != NULL) {
*next++ = 0;
}
/* 2) look for the addr/port delimiter, it's the last colon. */
if ((range = strrchr(str, ':')) == NULL) {
Alert("Missing port number: '%s'\n", str);
}
*range++ = 0;
if (strrchr(str, ':') != NULL) {
/* IPv6 address contains ':' */
memset(&ss, 0, sizeof(ss));
ss.ss_family = AF_INET6;
if (!inet_pton(ss.ss_family, str, &((struct sockaddr_in6 *)&ss)->sin6_addr)) {
Alert("Invalid server address: '%s'\n", str);
}
}
else {
memset(&ss, 0, sizeof(ss));
ss.ss_family = AF_INET;
if (*str == '*' || *str == '\0') { /* INADDR_ANY */
((struct sockaddr_in *)&ss)->sin_addr.s_addr = INADDR_ANY;
}
else if (!inet_pton(ss.ss_family, str, &((struct sockaddr_in *)&ss)->sin_addr)) {
struct hostent *he;
if ((he = gethostbyname(str)) == NULL) {
Alert("Invalid server name: '%s'\n", str);
}
else
((struct sockaddr_in *)&ss)->sin_addr =
*(struct in_addr *) *(he->h_addr_list);
}
}
/* 3) look for the port-end delimiter */
if ((c = strchr(range, '-')) != NULL) {
*c++ = 0;
end = atol(c);
}
else {
end = atol(range);
}
for (port = atol(range); port <= end; port++) {
l = (struct listener *)calloc(1, sizeof(struct listener));
l->next = tail;
tail = l;
l->addr = ss;
if (ss.ss_family == AF_INET6)
((struct sockaddr_in6 *)(&l->addr))->sin6_port = htons(port);
else
((struct sockaddr_in *)(&l->addr))->sin_port = htons(port);
} /* end for(port) */
} /* end while(next) */
free(dupstr);
return tail;
}
#define FD_SETS_ARE_BITFIELDS
#ifdef FD_SETS_ARE_BITFIELDS
/*
* This map is used with all the FD_* macros to check whether a particular bit
* is set or not. Each bit represents an ACSII code. FD_SET() sets those bytes
* which should be encoded. When FD_ISSET() returns non-zero, it means that the
* byte should be encoded. Be careful to always pass bytes from 0 to 255
* exclusively to the macros.
*/
fd_set hdr_encode_map[(sizeof(fd_set) > (256/8)) ? 1 : ((256/8) / sizeof(fd_set))];
fd_set url_encode_map[(sizeof(fd_set) > (256/8)) ? 1 : ((256/8) / sizeof(fd_set))];
#else
#error "Check if your OS uses bitfields for fd_sets"
#endif
/* will try to encode the string <string> replacing all characters tagged in
* <map> with the hexadecimal representation of their ASCII-code (2 digits)
* prefixed by <escape>, and will store the result between <start> (included
*) and <stop> (excluded), and will always terminate the string with a '\0'
* before <stop>. The position of the '\0' is returned if the conversion
* completes. If bytes are missing between <start> and <stop>, then the
* conversion will be incomplete and truncated. If <stop> <= <start>, the '\0'
* cannot even be stored so we return <start> without writing the 0.
* The input string must also be zero-terminated.
*/
char hextab[16] = "0123456789ABCDEF";
char *encode_string(char *start, char *stop,
const char escape, const fd_set *map,
const char *string)
{
if (start < stop) {
stop--; /* reserve one byte for the final '\0' */
while (start < stop && *string != 0) {
if (!FD_ISSET((unsigned char)(*string), map))
*start++ = *string;
else {
if (start + 3 >= stop)
break;
*start++ = escape;
*start++ = hextab[(*string >> 4) & 15];
*start++ = hextab[*string & 15];
}
string++;
}
*start = '\0';
}
return start;
}
/*
* This function sends a syslog message to both log servers of a proxy,
* or to global log servers if the proxy is NULL.
* It also tries not to waste too much time computing the message header.
* It doesn't care about errors nor does it report them.
*/
void send_log(struct proxy *p, int level, char *message, ...) {
static int logfd = -1; /* syslog UDP socket */
static long tvsec = -1; /* to force the string to be initialized */
struct timeval tv;
va_list argp;
static char logmsg[MAX_SYSLOG_LEN];
static char *dataptr = NULL;
int fac_level;
int hdr_len, data_len;
struct sockaddr_in *sa[2];
int facilities[2], loglevel[2];
int nbloggers = 0;
char *log_ptr;
if (logfd < 0) {
if ((logfd = socket(AF_INET, SOCK_DGRAM, IPPROTO_UDP)) < 0)
return;
}
if (level < 0 || progname == NULL || message == NULL)
return;
gettimeofday(&tv, NULL);
if (tv.tv_sec != tvsec || dataptr == NULL) {
/* this string is rebuild only once a second */
struct tm *tm = localtime(&tv.tv_sec);
tvsec = tv.tv_sec;
hdr_len = snprintf(logmsg, sizeof(logmsg),
"<<<<>%s %2d %02d:%02d:%02d %s[%d]: ",
monthname[tm->tm_mon],
tm->tm_mday, tm->tm_hour, tm->tm_min, tm->tm_sec,
progname, pid);
/* WARNING: depending upon implementations, snprintf may return
* either -1 or the number of bytes that would be needed to store
* the total message. In both cases, we must adjust it.
*/
if (hdr_len < 0 || hdr_len > sizeof(logmsg))
hdr_len = sizeof(logmsg);
dataptr = logmsg + hdr_len;
}
va_start(argp, message);
data_len = vsnprintf(dataptr, logmsg + sizeof(logmsg) - dataptr, message, argp);
if (data_len < 0 || data_len > (logmsg + sizeof(logmsg) - dataptr))
data_len = logmsg + sizeof(logmsg) - dataptr;
va_end(argp);
dataptr[data_len - 1] = '\n'; /* force a break on ultra-long lines */
if (p == NULL) {
if (global.logfac1 >= 0) {
sa[nbloggers] = &global.logsrv1;
facilities[nbloggers] = global.logfac1;
loglevel[nbloggers] = global.loglev1;
nbloggers++;
}
if (global.logfac2 >= 0) {
sa[nbloggers] = &global.logsrv2;
facilities[nbloggers] = global.logfac2;
loglevel[nbloggers] = global.loglev2;
nbloggers++;
}
} else {
if (p->logfac1 >= 0) {
sa[nbloggers] = &p->logsrv1;
facilities[nbloggers] = p->logfac1;
loglevel[nbloggers] = p->loglev1;
nbloggers++;
}
if (p->logfac2 >= 0) {
sa[nbloggers] = &p->logsrv2;
facilities[nbloggers] = p->logfac2;
loglevel[nbloggers] = p->loglev2;
nbloggers++;
}
}
while (nbloggers-- > 0) {
/* we can filter the level of the messages that are sent to each logger */
if (level > loglevel[nbloggers])
continue;
/* For each target, we may have a different facility.
* We can also have a different log level for each message.
* This induces variations in the message header length.
* Since we don't want to recompute it each time, nor copy it every
* time, we only change the facility in the pre-computed header,
* and we change the pointer to the header accordingly.
*/
fac_level = (facilities[nbloggers] << 3) + level;
log_ptr = logmsg + 3; /* last digit of the log level */
do {
*log_ptr = '0' + fac_level % 10;
fac_level /= 10;
log_ptr--;
} while (fac_level && log_ptr > logmsg);
*log_ptr = '<';
/* the total syslog message now starts at logptr, for dataptr+data_len-logptr */
#ifndef MSG_NOSIGNAL
sendto(logfd, log_ptr, dataptr + data_len - log_ptr, MSG_DONTWAIT,
(struct sockaddr *)sa[nbloggers], sizeof(**sa));
#else
sendto(logfd, log_ptr, dataptr + data_len - log_ptr, MSG_DONTWAIT | MSG_NOSIGNAL,
(struct sockaddr *)sa[nbloggers], sizeof(**sa));
#endif
}
}
/* sets <tv> to the current time */
static inline struct timeval *tv_now(struct timeval *tv) {
if (tv)
gettimeofday(tv, NULL);
return tv;
}
/*
* adds <ms> ms to <from>, set the result to <tv> and returns a pointer <tv>
*/
static inline struct timeval *tv_delayfrom(struct timeval *tv, struct timeval *from, int ms) {
if (!tv || !from)
return NULL;
tv->tv_usec = from->tv_usec + (ms%1000)*1000;
tv->tv_sec = from->tv_sec + (ms/1000);
while (tv->tv_usec >= 1000000) {
tv->tv_usec -= 1000000;
tv->tv_sec++;
}
return tv;
}
/*
* compares <tv1> and <tv2> : returns 0 if equal, -1 if tv1 < tv2, 1 if tv1 > tv2
*/
static inline int tv_cmp(struct timeval *tv1, struct timeval *tv2) {
if (tv1->tv_sec < tv2->tv_sec)
return -1;
else if (tv1->tv_sec > tv2->tv_sec)
return 1;
else if (tv1->tv_usec < tv2->tv_usec)
return -1;
else if (tv1->tv_usec > tv2->tv_usec)
return 1;
else
return 0;
}
/*
* returns the absolute difference, in ms, between tv1 and tv2
*/
unsigned long tv_delta(struct timeval *tv1, struct timeval *tv2) {
int cmp;
unsigned long ret;
cmp = tv_cmp(tv1, tv2);
if (!cmp)
return 0; /* same dates, null diff */
else if (cmp < 0) {
struct timeval *tmp = tv1;
tv1 = tv2;
tv2 = tmp;
}
ret = (tv1->tv_sec - tv2->tv_sec) * 1000;
if (tv1->tv_usec > tv2->tv_usec)
ret += (tv1->tv_usec - tv2->tv_usec) / 1000;
else
ret -= (tv2->tv_usec - tv1->tv_usec) / 1000;
return (unsigned long) ret;
}
/*
* returns the difference, in ms, between tv1 and tv2
*/
static inline unsigned long tv_diff(struct timeval *tv1, struct timeval *tv2) {
unsigned long ret;
ret = (tv2->tv_sec - tv1->tv_sec) * 1000;
if (tv2->tv_usec > tv1->tv_usec)
ret += (tv2->tv_usec - tv1->tv_usec) / 1000;
else
ret -= (tv1->tv_usec - tv2->tv_usec) / 1000;
return (unsigned long) ret;
}
/*
* compares <tv1> and <tv2> modulo 1ms: returns 0 if equal, -1 if tv1 < tv2, 1 if tv1 > tv2
*/
static inline int tv_cmp_ms(struct timeval *tv1, struct timeval *tv2) {
if (tv1->tv_sec == tv2->tv_sec) {
if (tv2->tv_usec > tv1->tv_usec + 1000)
return -1;
else if (tv1->tv_usec > tv2->tv_usec + 1000)
return 1;
else
return 0;
}
else if ((tv2->tv_sec > tv1->tv_sec + 1) ||
((tv2->tv_sec == tv1->tv_sec + 1) && (tv2->tv_usec + 1000000 > tv1->tv_usec + 1000)))
return -1;
else if ((tv1->tv_sec > tv2->tv_sec + 1) ||
((tv1->tv_sec == tv2->tv_sec + 1) && (tv1->tv_usec + 1000000 > tv2->tv_usec + 1000)))
return 1;
else
return 0;
}
/*
* returns the remaining time between tv1=now and event=tv2
* if tv2 is passed, 0 is returned.
*/
static inline unsigned long tv_remain(struct timeval *tv1, struct timeval *tv2) {
unsigned long ret;
if (tv_cmp_ms(tv1, tv2) >= 0)
return 0; /* event elapsed */
ret = (tv2->tv_sec - tv1->tv_sec) * 1000;
if (tv2->tv_usec > tv1->tv_usec)
ret += (tv2->tv_usec - tv1->tv_usec) / 1000;
else
ret -= (tv1->tv_usec - tv2->tv_usec) / 1000;
return (unsigned long) ret;
}
/*
* zeroes a struct timeval
*/
static inline struct timeval *tv_eternity(struct timeval *tv) {
tv->tv_sec = tv->tv_usec = 0;
return tv;
}
/*
* returns 1 if tv is null, else 0
*/
static inline int tv_iseternity(struct timeval *tv) {
if (tv->tv_sec == 0 && tv->tv_usec == 0)
return 1;
else
return 0;
}
/*
* compares <tv1> and <tv2> : returns 0 if equal, -1 if tv1 < tv2, 1 if tv1 > tv2,
* considering that 0 is the eternity.
*/
static inline int tv_cmp2(struct timeval *tv1, struct timeval *tv2) {
if (tv_iseternity(tv1))
if (tv_iseternity(tv2))
return 0; /* same */
else
return 1; /* tv1 later than tv2 */
else if (tv_iseternity(tv2))
return -1; /* tv2 later than tv1 */
if (tv1->tv_sec > tv2->tv_sec)
return 1;
else if (tv1->tv_sec < tv2->tv_sec)
return -1;
else if (tv1->tv_usec > tv2->tv_usec)
return 1;
else if (tv1->tv_usec < tv2->tv_usec)
return -1;
else
return 0;
}
/*
* compares <tv1> and <tv2> modulo 1 ms: returns 0 if equal, -1 if tv1 < tv2, 1 if tv1 > tv2,
* considering that 0 is the eternity.
*/
static inline int tv_cmp2_ms(struct timeval *tv1, struct timeval *tv2) {
if (tv_iseternity(tv1))
if (tv_iseternity(tv2))
return 0; /* same */
else
return 1; /* tv1 later than tv2 */
else if (tv_iseternity(tv2))
return -1; /* tv2 later than tv1 */
if (tv1->tv_sec == tv2->tv_sec) {
if (tv1->tv_usec > tv2->tv_usec + 1000)
return 1;
else if (tv2->tv_usec > tv1->tv_usec + 1000)
return -1;
else
return 0;
}
else if ((tv1->tv_sec > tv2->tv_sec + 1) ||
((tv1->tv_sec == tv2->tv_sec + 1) && (tv1->tv_usec + 1000000 > tv2->tv_usec + 1000)))
return 1;
else if ((tv2->tv_sec > tv1->tv_sec + 1) ||
((tv2->tv_sec == tv1->tv_sec + 1) && (tv2->tv_usec + 1000000 > tv1->tv_usec + 1000)))
return -1;
else
return 0;
}
/*
* returns the first event between tv1 and tv2 into tvmin.
* a zero tv is ignored. tvmin is returned.
*/
static inline struct timeval *tv_min(struct timeval *tvmin,
struct timeval *tv1, struct timeval *tv2) {
if (tv_cmp2(tv1, tv2) <= 0)
*tvmin = *tv1;
else
*tvmin = *tv2;
return tvmin;
}
/***********************************************************/
/* fd management ***************************************/
/***********************************************************/
/* Deletes an FD from the fdsets, and recomputes the maxfd limit.
* The file descriptor is also closed.
*/
static inline void fd_delete(int fd) {
FD_CLR(fd, StaticReadEvent);
FD_CLR(fd, StaticWriteEvent);
close(fd);
fdtab[fd].state = FD_STCLOSE;
while ((maxfd-1 >= 0) && (fdtab[maxfd-1].state == FD_STCLOSE))
maxfd--;
}
/* recomputes the maxfd limit from the fd */
static inline void fd_insert(int fd) {
if (fd+1 > maxfd)
maxfd = fd+1;
}
/*************************************************************/
/* task management ***************************************/
/*************************************************************/
/* puts the task <t> in run queue <q>, and returns <t> */
static inline struct task *task_wakeup(struct task **q, struct task *t) {
if (t->state == TASK_RUNNING)
return t;
else {
t->rqnext = *q;
t->state = TASK_RUNNING;
return *q = t;
}
}
/* removes the task <t> from the queue <q>
* <s> MUST be <q>'s first task.
* set the run queue to point to the next one, and return it
*/
static inline struct task *task_sleep(struct task **q, struct task *t) {
if (t->state == TASK_RUNNING) {
*q = t->rqnext;
t->state = TASK_IDLE; /* tell that s has left the run queue */
}
return *q; /* return next running task */
}
/*
* removes the task <t> from its wait queue. It must have already been removed
* from the run queue. A pointer to the task itself is returned.
*/
static inline struct task *task_delete(struct task *t) {
t->prev->next = t->next;
t->next->prev = t->prev;
return t;
}
/*
* frees a task. Its context must have been freed since it will be lost.
*/
static inline void task_free(struct task *t) {
pool_free(task, t);
}
/* inserts <task> into its assigned wait queue, where it may already be. In this case, it
* may be only moved or left where it was, depending on its timing requirements.
* <task> is returned.
*/
struct task *task_queue(struct task *task) {
struct task *list = task->wq;
struct task *start_from;
/* first, test if the task was already in a list */
if (task->prev == NULL) {
// start_from = list;
start_from = list->prev;
#if STATTIME > 0
stats_tsk_new++;
#endif
/* insert the unlinked <task> into the list, searching back from the last entry */
while (start_from != list && tv_cmp2(&task->expire, &start_from->expire) < 0) {
start_from = start_from->prev;
#if STATTIME > 0
stats_tsk_nsrch++;
#endif
}
// while (start_from->next != list && tv_cmp2(&task->expire, &start_from->next->expire) > 0) {
// start_from = start_from->next;
// stats_tsk_nsrch++;
// }
}
else if (task->prev == list ||
tv_cmp2(&task->expire, &task->prev->expire) >= 0) { /* walk right */
start_from = task->next;
if (start_from == list || tv_cmp2(&task->expire, &start_from->expire) <= 0) {
#if STATTIME > 0
stats_tsk_good++;
#endif
return task; /* it's already in the right place */
}
#if STATTIME > 0
stats_tsk_right++;
#endif
/* if the task is not at the right place, there's little chance that
* it has only shifted a bit, and it will nearly always be queued
* at the end of the list because of constant timeouts
* (observed in real case).
*/
#ifndef WE_REALLY_THINK_THAT_THIS_TASK_MAY_HAVE_SHIFTED
start_from = list->prev; /* assume we'll queue to the end of the list */
while (start_from != list && tv_cmp2(&task->expire, &start_from->expire) < 0) {
start_from = start_from->prev;
#if STATTIME > 0
stats_tsk_lsrch++;
#endif
}
#else /* WE_REALLY_... */
/* insert the unlinked <task> into the list, searching after position <start_from> */
while (start_from->next != list && tv_cmp2(&task->expire, &start_from->next->expire) > 0) {
start_from = start_from->next;
#if STATTIME > 0
stats_tsk_rsrch++;
#endif
}
#endif /* WE_REALLY_... */
/* we need to unlink it now */
task_delete(task);
}
else { /* walk left. */
#if STATTIME > 0
stats_tsk_left++;
#endif
#ifdef LEFT_TO_TOP /* not very good */
start_from = list;
while (start_from->next != list && tv_cmp2(&task->expire, &start_from->next->expire) > 0) {
start_from = start_from->next;
#if STATTIME > 0
stats_tsk_lsrch++;
#endif
}
#else
start_from = task->prev->prev; /* valid because of the previous test above */
while (start_from != list && tv_cmp2(&task->expire, &start_from->expire) < 0) {
start_from = start_from->prev;
#if STATTIME > 0
stats_tsk_lsrch++;
#endif
}
#endif
/* we need to unlink it now */
task_delete(task);
}
task->prev = start_from;
task->next = start_from->next;
task->next->prev = task;
start_from->next = task;
return task;
}
/*********************************************************************/
/* more specific functions ***************************************/
/*********************************************************************/
/* some prototypes */
static int maintain_proxies(void);
/* this either returns the sockname or the original destination address. Code
* inspired from Patrick Schaaf's example of nf_getsockname() implementation.
*/
static int get_original_dst(int fd, struct sockaddr_in *sa, int *salen) {
#if defined(TPROXY) && defined(SO_ORIGINAL_DST)
return getsockopt(fd, SOL_IP, SO_ORIGINAL_DST, (void *)sa, salen);
#else
#if defined(TPROXY) && defined(USE_GETSOCKNAME)
return getsockname(fd, (struct sockaddr *)sa, salen);
#else
return -1;
#endif
#endif
}
/*
* frees the context associated to a session. It must have been removed first.
*/
static inline void session_free(struct session *s) {
if (s->req)
pool_free(buffer, s->req);
if (s->rep)
pool_free(buffer, s->rep);
if (s->rsp_cap != NULL) {
struct cap_hdr *h;
for (h = s->proxy->rsp_cap; h; h = h->next) {
if (s->rsp_cap[h->index] != NULL)
pool_free_to(h->pool, s->rsp_cap[h->index]);
}
pool_free_to(s->proxy->rsp_cap_pool, s->rsp_cap);
}
if (s->req_cap != NULL) {
struct cap_hdr *h;
for (h = s->proxy->req_cap; h; h = h->next) {
if (s->req_cap[h->index] != NULL)
pool_free_to(h->pool, s->req_cap[h->index]);
}
pool_free_to(s->proxy->req_cap_pool, s->req_cap);
}
if (s->logs.uri)
pool_free(requri, s->logs.uri);
if (s->logs.cli_cookie)
pool_free(capture, s->logs.cli_cookie);
if (s->logs.srv_cookie)
pool_free(capture, s->logs.srv_cookie);
pool_free(session, s);
}
/*
* This function tries to find a running server for the proxy <px>. A first
* pass looks for active servers, and if none is found, a second pass also
* looks for backup servers.
* If no valid server is found, NULL is returned and px->cursrv is left undefined.
*/
static inline struct server *find_server(struct proxy *px) {
struct server *srv = px->cursrv;
int ignore_backup = 1;
do {
do {
if (srv == NULL)
srv = px->srv;
if (srv->state & SRV_RUNNING
&& !((srv->state & SRV_BACKUP) && ignore_backup))
return srv;
srv = srv->next;
} while (srv != px->cursrv);
} while (ignore_backup--);
return NULL;
}
/*
* This function initiates a connection to the current server (s->srv) if (s->direct)
* is set, or to the dispatch server if (s->direct) is 0. It returns 0 if
* it's OK, -1 if it's impossible.
*/
int connect_server(struct session *s) {
int fd;
// fprintf(stderr,"connect_server : s=%p\n",s);
if (s->flags & SN_DIRECT) { /* srv cannot be null */
s->srv_addr = s->srv->addr;
}
else if (s->proxy->options & PR_O_BALANCE) {
if (s->proxy->options & PR_O_BALANCE_RR) {
struct server *srv;
srv = find_server(s->proxy);
if (srv == NULL) /* no server left */
return -1;
s->srv_addr = srv->addr;
s->srv = srv;
s->proxy->cursrv = srv->next;
}
else /* unknown balancing algorithm */
return -1;
}
else if (*(int *)&s->proxy->dispatch_addr.sin_addr) {
/* connect to the defined dispatch addr */
s->srv_addr = s->proxy->dispatch_addr;
}
else if (s->proxy->options & PR_O_TRANSP) {
/* in transparent mode, use the original dest addr if no dispatch specified */
int salen = sizeof(struct sockaddr_in);
if (get_original_dst(s->cli_fd, &s->srv_addr, &salen) == -1) {
qfprintf(stderr, "Cannot get original server address.\n");
return -1;
}
}
/* if this server remaps proxied ports, we'll use
* the port the client connected to with an offset. */
if (s->srv != NULL && s->srv->state & SRV_MAPPORTS) {
struct sockaddr_in sockname;
int namelen;
namelen = sizeof(sockname);
if (get_original_dst(s->cli_fd, (struct sockaddr_in *)&sockname, &namelen) == -1)
getsockname(s->cli_fd, (struct sockaddr *)&sockname, &namelen);
s->srv_addr.sin_port = htons(ntohs(s->srv_addr.sin_port) + ntohs(sockname.sin_port));
}
if ((fd = s->srv_fd = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP)) == -1) {
qfprintf(stderr, "Cannot get a server socket.\n");
return -1;
}
if (fd >= global.maxsock) {
Alert("socket(): not enough free sockets. Raise -n argument. Giving up.\n");
close(fd);
return -1;
}
if ((fcntl(fd, F_SETFL, O_NONBLOCK)==-1) ||
(setsockopt(fd, IPPROTO_TCP, TCP_NODELAY, (char *) &one, sizeof(one)) == -1)) {
qfprintf(stderr,"Cannot set client socket to non blocking mode.\n");
close(fd);
return -1;
}
/* allow specific binding */
if (s->proxy->options & PR_O_BIND_SRC &&
bind(fd, (struct sockaddr *)&s->proxy->source_addr, sizeof(s->proxy->source_addr)) == -1) {
Alert("Cannot bind to source address before connect() for proxy %s. Aborting.\n", s->proxy->id);
close(fd);
return -1;
}
if ((connect(fd, (struct sockaddr *)&s->srv_addr, sizeof(s->srv_addr)) == -1) && (errno != EINPROGRESS)) {
if (errno == EAGAIN) { /* no free ports left, try again later */
qfprintf(stderr,"Cannot connect, no free ports.\n");
close(fd);
return -1;
}
else if (errno != EALREADY && errno != EISCONN) {
close(fd);
return -1;
}
}
fdtab[fd].owner = s->task;
fdtab[fd].read = &event_srv_read;
fdtab[fd].write = &event_srv_write;
fdtab[fd].state = FD_STCONN; /* connection in progress */
FD_SET(fd, StaticWriteEvent); /* for connect status */
fd_insert(fd);
if (s->proxy->contimeout)
tv_delayfrom(&s->cnexpire, &now, s->proxy->contimeout);
else
tv_eternity(&s->cnexpire);
return 0;
}
/*
* this function is called on a read event from a client socket.
* It returns 0.
*/
int event_cli_read(int fd) {
struct task *t = fdtab[fd].owner;
struct session *s = t->context;
struct buffer *b = s->req;
int ret, max;
// fprintf(stderr,"event_cli_read : fd=%d, s=%p\n", fd, s);
if (fdtab[fd].state != FD_STERROR) {
while (1) {
if (b->l == 0) { /* let's realign the buffer to optimize I/O */
b->r = b->w = b->h = b->lr = b->data;
max = b->rlim - b->data;
}
else if (b->r > b->w) {
max = b->rlim - b->r;
}
else {
max = b->w - b->r;
/* FIXME: theorically, if w>0, we shouldn't have rlim < data+size anymore
* since it means that the rewrite protection has been removed. This
* implies that the if statement can be removed.
*/
if (max > b->rlim - b->data)
max = b->rlim - b->data;
}
if (max == 0) { /* not anymore room to store data */
FD_CLR(fd, StaticReadEvent);
break;
}
#ifndef MSG_NOSIGNAL
{
int skerr, lskerr;
lskerr = sizeof(skerr);
getsockopt(fd, SOL_SOCKET, SO_ERROR, &skerr, &lskerr);
if (skerr)
ret = -1;
else
ret = recv(fd, b->r, max, 0);
}
#else
ret = recv(fd, b->r, max, MSG_NOSIGNAL);
#endif
if (ret > 0) {
b->r += ret;
b->l += ret;
s->res_cr = RES_DATA;
if (b->r == b->data + BUFSIZE) {
b->r = b->data; /* wrap around the buffer */
}
b->total += ret;
/* we hope to read more data or to get a close on next round */
continue;
}
else if (ret == 0) {
s->res_cr = RES_NULL;
break;
}
else if (errno == EAGAIN) {/* ignore EAGAIN */
break;
}
else {
s->res_cr = RES_ERROR;
fdtab[fd].state = FD_STERROR;
break;
}
} /* while(1) */
}
else {
s->res_cr = RES_ERROR;
fdtab[fd].state = FD_STERROR;
}
if (s->res_cr != RES_SILENT) {
if (s->proxy->clitimeout && FD_ISSET(fd, StaticReadEvent))
tv_delayfrom(&s->crexpire, &now, s->proxy->clitimeout);
else
tv_eternity(&s->crexpire);
task_wakeup(&rq, t);
}
return 0;
}
/*
* this function is called on a read event from a server socket.
* It returns 0.
*/
int event_srv_read(int fd) {
struct task *t = fdtab[fd].owner;
struct session *s = t->context;
struct buffer *b = s->rep;
int ret, max;
// fprintf(stderr,"event_srv_read : fd=%d, s=%p\n", fd, s);
if (fdtab[fd].state != FD_STERROR) {
while (1) {
if (b->l == 0) { /* let's realign the buffer to optimize I/O */
b->r = b->w = b->h = b->lr = b->data;
max = b->rlim - b->data;
}
else if (b->r > b->w) {
max = b->rlim - b->r;
}
else {
max = b->w - b->r;
/* FIXME: theorically, if w>0, we shouldn't have rlim < data+size anymore
* since it means that the rewrite protection has been removed. This
* implies that the if statement can be removed.
*/
if (max > b->rlim - b->data)
max = b->rlim - b->data;
}
if (max == 0) { /* not anymore room to store data */
FD_CLR(fd, StaticReadEvent);
break;
}
#ifndef MSG_NOSIGNAL
{
int skerr, lskerr;
lskerr = sizeof(skerr);
getsockopt(fd, SOL_SOCKET, SO_ERROR, &skerr, &lskerr);
if (skerr)
ret = -1;
else
ret = recv(fd, b->r, max, 0);
}
#else
ret = recv(fd, b->r, max, MSG_NOSIGNAL);
#endif
if (ret > 0) {
b->r += ret;
b->l += ret;
s->res_sr = RES_DATA;
if (b->r == b->data + BUFSIZE) {
b->r = b->data; /* wrap around the buffer */
}
b->total += ret;
/* we hope to read more data or to get a close on next round */
continue;
}
else if (ret == 0) {
s->res_sr = RES_NULL;
break;
}
else if (errno == EAGAIN) {/* ignore EAGAIN */
break;
}
else {
s->res_sr = RES_ERROR;
fdtab[fd].state = FD_STERROR;
break;
}
} /* while(1) */
}
else {
s->res_sr = RES_ERROR;
fdtab[fd].state = FD_STERROR;
}
if (s->res_sr != RES_SILENT) {
if (s->proxy->srvtimeout && FD_ISSET(fd, StaticReadEvent))
tv_delayfrom(&s->srexpire, &now, s->proxy->srvtimeout);
else
tv_eternity(&s->srexpire);
task_wakeup(&rq, t);
}
return 0;
}
/*
* this function is called on a write event from a client socket.
* It returns 0.
*/
int event_cli_write(int fd) {
struct task *t = fdtab[fd].owner;
struct session *s = t->context;
struct buffer *b = s->rep;
int ret, max;
// fprintf(stderr,"event_cli_write : fd=%d, s=%p\n", fd, s);
if (b->l == 0) { /* let's realign the buffer to optimize I/O */
b->r = b->w = b->h = b->lr = b->data;
// max = BUFSIZE; BUG !!!!
max = 0;
}
else if (b->r > b->w) {
max = b->r - b->w;
}
else
max = b->data + BUFSIZE - b->w;
if (fdtab[fd].state != FD_STERROR) {
#ifndef MSG_NOSIGNAL
int skerr, lskerr;
#endif
if (max == 0) {
s->res_cw = RES_NULL;
task_wakeup(&rq, t);
tv_eternity(&s->cwexpire);
FD_CLR(fd, StaticWriteEvent);
return 0;
}
#ifndef MSG_NOSIGNAL
lskerr=sizeof(skerr);
getsockopt(fd, SOL_SOCKET, SO_ERROR, &skerr, &lskerr);
if (skerr)
ret = -1;
else
ret = send(fd, b->w, max, MSG_DONTWAIT);
#else
ret = send(fd, b->w, max, MSG_DONTWAIT | MSG_NOSIGNAL);
#endif
if (ret > 0) {
b->l -= ret;
b->w += ret;
s->res_cw = RES_DATA;
if (b->w == b->data + BUFSIZE) {
b->w = b->data; /* wrap around the buffer */
}
}
else if (ret == 0) {
/* nothing written, just make as if we were never called */
// s->res_cw = RES_NULL;
return 0;
}
else if (errno == EAGAIN) /* ignore EAGAIN */
return 0;
else {
s->res_cw = RES_ERROR;
fdtab[fd].state = FD_STERROR;
}
}
else {
s->res_cw = RES_ERROR;
fdtab[fd].state = FD_STERROR;
}
if (s->proxy->clitimeout) {
tv_delayfrom(&s->cwexpire, &now, s->proxy->clitimeout);
/* FIXME: to avoid the client to read-time-out during writes, we refresh it */
s->crexpire = s->cwexpire;
}
else
tv_eternity(&s->cwexpire);
task_wakeup(&rq, t);
return 0;
}
/*
* this function is called on a write event from a server socket.
* It returns 0.
*/
int event_srv_write(int fd) {
struct task *t = fdtab[fd].owner;
struct session *s = t->context;
struct buffer *b = s->req;
int ret, max;
//fprintf(stderr,"event_srv_write : fd=%d, s=%p\n", fd, s);
if (b->l == 0) { /* let's realign the buffer to optimize I/O */
b->r = b->w = b->h = b->lr = b->data;
// max = BUFSIZE; BUG !!!!
max = 0;
}
else if (b->r > b->w) {
max = b->r - b->w;
}
else
max = b->data + BUFSIZE - b->w;
if (fdtab[fd].state != FD_STERROR) {
#ifndef MSG_NOSIGNAL
int skerr, lskerr;
#endif
if (max == 0) {
/* may be we have received a connection acknowledgement in TCP mode without data */
s->res_sw = RES_NULL;
task_wakeup(&rq, t);
fdtab[fd].state = FD_STREADY;
tv_eternity(&s->swexpire);
FD_CLR(fd, StaticWriteEvent);
return 0;
}
#ifndef MSG_NOSIGNAL
lskerr=sizeof(skerr);
getsockopt(fd, SOL_SOCKET, SO_ERROR, &skerr, &lskerr);
if (skerr)
ret = -1;
else
ret = send(fd, b->w, max, MSG_DONTWAIT);
#else
ret = send(fd, b->w, max, MSG_DONTWAIT | MSG_NOSIGNAL);
#endif
fdtab[fd].state = FD_STREADY;
if (ret > 0) {
b->l -= ret;
b->w += ret;
s->res_sw = RES_DATA;
if (b->w == b->data + BUFSIZE) {
b->w = b->data; /* wrap around the buffer */
}
}
else if (ret == 0) {
/* nothing written, just make as if we were never called */
// s->res_sw = RES_NULL;
return 0;
}
else if (errno == EAGAIN) /* ignore EAGAIN */
return 0;
else {
s->res_sw = RES_ERROR;
fdtab[fd].state = FD_STERROR;
}
}
else {
s->res_sw = RES_ERROR;
fdtab[fd].state = FD_STERROR;
}
if (s->proxy->srvtimeout) {
tv_delayfrom(&s->swexpire, &now, s->proxy->srvtimeout);
/* FIXME: to avoid the server to read-time-out during writes, we refresh it */
s->srexpire = s->swexpire;
}
else
tv_eternity(&s->swexpire);
task_wakeup(&rq, t);
return 0;
}
/*
* returns a message to the client ; the connection is shut down for read,
* and the request is cleared so that no server connection can be initiated.
* The client must be in a valid state for this (HEADER, DATA ...).
* Nothing is performed on the server side.
* The reply buffer doesn't need to be empty before this.
*/
void client_retnclose(struct session *s, int len, const char *msg) {
FD_CLR(s->cli_fd, StaticReadEvent);
FD_SET(s->cli_fd, StaticWriteEvent);
tv_eternity(&s->crexpire);
shutdown(s->cli_fd, SHUT_RD);
s->cli_state = CL_STSHUTR;
strcpy(s->rep->data, msg);
s->rep->l = len;
s->rep->r = s->rep->h = s->rep->lr = s->rep->w = s->rep->data;
s->rep->r += len;
s->req->l = 0;
}
/*
* returns a message into the rep buffer, and flushes the req buffer.
* The reply buffer doesn't need to be empty before this.
*/
void client_return(struct session *s, int len, const char *msg) {
strcpy(s->rep->data, msg);
s->rep->l = len;
s->rep->r = s->rep->h = s->rep->lr = s->rep->w = s->rep->data;
s->rep->r += len;
s->req->l = 0;
}
/*
* send a log for the session when we have enough info about it
*/
void sess_log(struct session *s) {
char pn[INET6_ADDRSTRLEN + strlen(":65535")];
struct proxy *p = s->proxy;
int log;
char *uri;
char *pxid;
char *srv;
struct tm *tm;
/* This is a first attempt at a better logging system.
* For now, we rely on send_log() to provide the date, although it obviously
* is the date of the log and not of the request, and most fields are not
* computed.
*/
log = p->to_log & ~s->logs.logwait;
if (s->cli_addr.ss_family == AF_INET)
inet_ntop(AF_INET,
(const void *)&((struct sockaddr_in *)&s->cli_addr)->sin_addr,
pn, sizeof(pn));
else
inet_ntop(AF_INET6,
(const void *)&((struct sockaddr_in6 *)(&s->cli_addr))->sin6_addr,
pn, sizeof(pn));
uri = (log & LW_REQ) ? s->logs.uri ? s->logs.uri : "<BADREQ>" : "";
pxid = p->id;
srv = (p->to_log & LW_SVID) ? (s->srv != NULL) ? s->srv->id : "<NOSRV>" : "-";
tm = localtime(&s->logs.tv_accept.tv_sec);
if (p->to_log & LW_REQ) {
char tmpline[MAX_SYSLOG_LEN], *h;
int hdr;
h = tmpline;
if (p->to_log & LW_REQHDR && (h < tmpline + sizeof(tmpline) - 10)) {
*(h++) = ' ';
*(h++) = '{';
for (hdr = 0; hdr < p->nb_req_cap; hdr++) {
if (hdr)
*(h++) = '|';
if (s->req_cap[hdr] != NULL)
h = encode_string(h, tmpline + sizeof(tmpline) - 7, '#', hdr_encode_map, s->req_cap[hdr]);
}
*(h++) = '}';
}
if (p->to_log & LW_RSPHDR && (h < tmpline + sizeof(tmpline) - 7)) {
*(h++) = ' ';
*(h++) = '{';
for (hdr = 0; hdr < p->nb_rsp_cap; hdr++) {
if (hdr)
*(h++) = '|';
if (s->rsp_cap[hdr] != NULL)
h = encode_string(h, tmpline + sizeof(tmpline) - 4, '#', hdr_encode_map, s->rsp_cap[hdr]);
}
*(h++) = '}';
}
if (h < tmpline + sizeof(tmpline) - 4) {
*(h++) = ' ';
*(h++) = '"';
h = encode_string(h, tmpline + sizeof(tmpline) - 1, '#', url_encode_map, uri);
*(h++) = '"';
}
*h = '\0';
send_log(p, LOG_INFO, "%s:%d [%02d/%s/%04d:%02d:%02d:%02d] %s %s %d/%d/%d/%s%d %d %s%lld %s %s %c%c%c%c%s\n",
pn,
(s->cli_addr.ss_family == AF_INET) ?
ntohs(((struct sockaddr_in *)&s->cli_addr)->sin_port) :
ntohs(((struct sockaddr_in6 *)&s->cli_addr)->sin6_port),
tm->tm_mday, monthname[tm->tm_mon], tm->tm_year+1900,
tm->tm_hour, tm->tm_min, tm->tm_sec,
pxid, srv,
s->logs.t_request,
(s->logs.t_connect >= 0) ? s->logs.t_connect - s->logs.t_request : -1,
(s->logs.t_data >= 0) ? s->logs.t_data - s->logs.t_connect : -1,
(p->to_log & LW_BYTES) ? "" : "+", s->logs.t_close,
s->logs.status,
(p->to_log & LW_BYTES) ? "" : "+", s->logs.bytes,
s->logs.cli_cookie ? s->logs.cli_cookie : "-",
s->logs.srv_cookie ? s->logs.srv_cookie : "-",
sess_term_cond[(s->flags & SN_ERR_MASK) >> SN_ERR_SHIFT],
sess_fin_state[(s->flags & SN_FINST_MASK) >> SN_FINST_SHIFT],
(p->options & PR_O_COOK_ANY) ? sess_cookie[(s->flags & SN_CK_MASK) >> SN_CK_SHIFT] : '-',
(p->options & PR_O_COOK_ANY) ? sess_set_cookie[(s->flags & SN_SCK_MASK) >> SN_SCK_SHIFT] : '-',
tmpline);
}
else {
send_log(p, LOG_INFO, "%s:%d [%02d/%s/%04d:%02d:%02d:%02d] %s %s %d/%s%d %s%lld %c%c\n",
pn,
(s->cli_addr.ss_family == AF_INET) ?
ntohs(((struct sockaddr_in *)&s->cli_addr)->sin_port) :
ntohs(((struct sockaddr_in6 *)&s->cli_addr)->sin6_port),
tm->tm_mday, monthname[tm->tm_mon], tm->tm_year+1900,
tm->tm_hour, tm->tm_min, tm->tm_sec,
pxid, srv,
(s->logs.t_connect >= 0) ? s->logs.t_connect : -1,
(p->to_log & LW_BYTES) ? "" : "+", s->logs.t_close,
(p->to_log & LW_BYTES) ? "" : "+", s->logs.bytes,
sess_term_cond[(s->flags & SN_ERR_MASK) >> SN_ERR_SHIFT],
sess_fin_state[(s->flags & SN_FINST_MASK) >> SN_FINST_SHIFT]);
}
s->logs.logwait = 0;
}
/*
* this function is called on a read event from a listen socket, corresponding
* to an accept. It tries to accept as many connections as possible.
* It returns 0.
*/
int event_accept(int fd) {
struct proxy *p = (struct proxy *)fdtab[fd].owner;
struct session *s;
struct task *t;
int cfd;
while (p->nbconn < p->maxconn) {
struct sockaddr_storage addr;
int laddr = sizeof(addr);
if ((cfd = accept(fd, (struct sockaddr *)&addr, &laddr)) == -1)
return 0; /* nothing more to accept */
if ((s = pool_alloc(session)) == NULL) { /* disable this proxy for a while */
Alert("out of memory in event_accept().\n");
FD_CLR(fd, StaticReadEvent);
p->state = PR_STIDLE;
close(cfd);
return 0;
}
if ((t = pool_alloc(task)) == NULL) { /* disable this proxy for a while */
Alert("out of memory in event_accept().\n");
FD_CLR(fd, StaticReadEvent);
p->state = PR_STIDLE;
close(cfd);
pool_free(session, s);
return 0;
}
s->cli_addr = addr;
if (cfd >= global.maxsock) {
Alert("accept(): not enough free sockets. Raise -n argument. Giving up.\n");
close(cfd);
pool_free(task, t);
pool_free(session, s);
return 0;
}
if ((fcntl(cfd, F_SETFL, O_NONBLOCK) == -1) ||
(setsockopt(cfd, IPPROTO_TCP, TCP_NODELAY,
(char *) &one, sizeof(one)) == -1)) {
Alert("accept(): cannot set the socket in non blocking mode. Giving up\n");
close(cfd);
pool_free(task, t);
pool_free(session, s);
return 0;
}
t->next = t->prev = t->rqnext = NULL; /* task not in run queue yet */
t->wq = LIST_HEAD(wait_queue); /* but already has a wait queue assigned */
t->state = TASK_IDLE;
t->process = process_session;
t->context = s;
s->task = t;
s->proxy = p;
s->cli_state = (p->mode == PR_MODE_HTTP) ? CL_STHEADERS : CL_STDATA; /* no HTTP headers for non-HTTP proxies */
s->srv_state = SV_STIDLE;
s->req = s->rep = NULL; /* will be allocated later */
s->flags = 0;
s->res_cr = s->res_cw = s->res_sr = s->res_sw = RES_SILENT;
s->cli_fd = cfd;
s->srv_fd = -1;
s->srv = NULL;
s->conn_retries = p->conn_retries;
s->logs.logwait = p->to_log;
s->logs.tv_accept = now;
s->logs.t_request = -1;
s->logs.t_connect = -1;
s->logs.t_data = -1;
s->logs.t_close = 0;
s->logs.uri = NULL;
s->logs.cli_cookie = NULL;
s->logs.srv_cookie = NULL;
s->logs.status = -1;
s->logs.bytes = 0;
s->uniq_id = totalconn;
if (p->nb_req_cap > 0) {
if ((s->req_cap =
pool_alloc_from(p->req_cap_pool, p->nb_req_cap*sizeof(char *)))
== NULL) { /* no memory */
close(cfd); /* nothing can be done for this fd without memory */
pool_free(task, t);
pool_free(session, s);
return 0;
}
memset(s->req_cap, 0, p->nb_req_cap*sizeof(char *));
}
else
s->req_cap = NULL;
if (p->nb_rsp_cap > 0) {
if ((s->rsp_cap =
pool_alloc_from(p->rsp_cap_pool, p->nb_rsp_cap*sizeof(char *)))
== NULL) { /* no memory */
if (s->req_cap != NULL)
pool_free_to(p->req_cap_pool, s->req_cap);
close(cfd); /* nothing can be done for this fd without memory */
pool_free(task, t);
pool_free(session, s);
return 0;
}
memset(s->rsp_cap, 0, p->nb_rsp_cap*sizeof(char *));
}
else
s->rsp_cap = NULL;
if ((p->mode == PR_MODE_TCP || p->mode == PR_MODE_HTTP)
&& (p->logfac1 >= 0 || p->logfac2 >= 0)) {
struct sockaddr_storage sockname;
int namelen;
namelen = sizeof(sockname);
if (addr.ss_family != AF_INET ||
get_original_dst(cfd, (struct sockaddr_in *)&sockname, &namelen) == -1)
getsockname(cfd, (struct sockaddr *)&sockname, &namelen);
if (p->to_log) {
/* we have the client ip */
if (s->logs.logwait & LW_CLIP)
if (!(s->logs.logwait &= ~LW_CLIP))
sess_log(s);
}
else if (s->cli_addr.ss_family == AF_INET) {
char pn[INET_ADDRSTRLEN], sn[INET_ADDRSTRLEN];
if (inet_ntop(AF_INET, (const void *)&((struct sockaddr_in *)&sockname)->sin_addr,
sn, sizeof(sn)) &&
inet_ntop(AF_INET, (const void *)&((struct sockaddr_in *)&s->cli_addr)->sin_addr,
pn, sizeof(pn))) {
send_log(p, LOG_INFO, "Connect from %s:%d to %s:%d (%s/%s)\n",
pn, ntohs(((struct sockaddr_in *)&s->cli_addr)->sin_port),
sn, ntohs(((struct sockaddr_in *)&sockname)->sin_port),
p->id, (p->mode == PR_MODE_HTTP) ? "HTTP" : "TCP");
}
}
else {
char pn[INET6_ADDRSTRLEN], sn[INET6_ADDRSTRLEN];
if (inet_ntop(AF_INET6, (const void *)&((struct sockaddr_in6 *)&sockname)->sin6_addr,
sn, sizeof(sn)) &&
inet_ntop(AF_INET6, (const void *)&((struct sockaddr_in6 *)&s->cli_addr)->sin6_addr,
pn, sizeof(pn))) {
send_log(p, LOG_INFO, "Connect from %s:%d to %s:%d (%s/%s)\n",
pn, ntohs(((struct sockaddr_in6 *)&s->cli_addr)->sin6_port),
sn, ntohs(((struct sockaddr_in6 *)&sockname)->sin6_port),
p->id, (p->mode == PR_MODE_HTTP) ? "HTTP" : "TCP");
}
}
}
if ((global.mode & MODE_DEBUG) && (!(global.mode & MODE_QUIET) || (global.mode & MODE_VERBOSE))) {
struct sockaddr_in sockname;
int namelen;
int len;
namelen = sizeof(sockname);
if (addr.ss_family != AF_INET ||
get_original_dst(cfd, (struct sockaddr_in *)&sockname, &namelen) == -1)
getsockname(cfd, (struct sockaddr *)&sockname, &namelen);
if (s->cli_addr.ss_family == AF_INET) {
char pn[INET_ADDRSTRLEN];
inet_ntop(AF_INET,
(const void *)&((struct sockaddr_in *)&s->cli_addr)->sin_addr,
pn, sizeof(pn));
len = sprintf(trash, "%08x:%s.accept(%04x)=%04x from [%s:%d]\n",
s->uniq_id, p->id, (unsigned short)fd, (unsigned short)cfd,
pn, ntohs(((struct sockaddr_in *)&s->cli_addr)->sin_port));
}
else {
char pn[INET6_ADDRSTRLEN];
inet_ntop(AF_INET6,
(const void *)&((struct sockaddr_in6 *)(&s->cli_addr))->sin6_addr,
pn, sizeof(pn));
len = sprintf(trash, "%08x:%s.accept(%04x)=%04x from [%s:%d]\n",
s->uniq_id, p->id, (unsigned short)fd, (unsigned short)cfd,
pn, ntohs(((struct sockaddr_in6 *)(&s->cli_addr))->sin6_port));
}
write(1, trash, len);
}
if ((s->req = pool_alloc(buffer)) == NULL) { /* no memory */
if (s->rsp_cap != NULL)
pool_free_to(p->rsp_cap_pool, s->rsp_cap);
if (s->req_cap != NULL)
pool_free_to(p->req_cap_pool, s->req_cap);
close(cfd); /* nothing can be done for this fd without memory */
pool_free(task, t);
pool_free(session, s);
return 0;
}
s->req->l = 0;
s->req->total = 0;
s->req->h = s->req->r = s->req->lr = s->req->w = s->req->data; /* r and w will be reset further */
s->req->rlim = s->req->data + BUFSIZE;
if (s->cli_state == CL_STHEADERS) /* reserve some space for header rewriting */
s->req->rlim -= MAXREWRITE;
if ((s->rep = pool_alloc(buffer)) == NULL) { /* no memory */
pool_free(buffer, s->req);
if (s->rsp_cap != NULL)
pool_free_to(p->rsp_cap_pool, s->rsp_cap);
if (s->req_cap != NULL)
pool_free_to(p->req_cap_pool, s->req_cap);
close(cfd); /* nothing can be done for this fd without memory */
pool_free(task, t);
pool_free(session, s);
return 0;
}
s->rep->l = 0;
s->rep->total = 0;
s->rep->h = s->rep->r = s->rep->lr = s->rep->w = s->rep->rlim = s->rep->data;
fdtab[cfd].read = &event_cli_read;
fdtab[cfd].write = &event_cli_write;
fdtab[cfd].owner = t;
fdtab[cfd].state = FD_STREADY;
if (p->mode == PR_MODE_HEALTH) { /* health check mode, no client reading */
if (p->options & PR_O_HTTP_CHK) /* "option httpchk" will make it speak HTTP */
client_retnclose(s, 19, "HTTP/1.0 200 OK\r\n\r\n"); /* forge a 200 response */
else
client_retnclose(s, 3, "OK\n"); /* forge an "OK" response */
}
else {
FD_SET(cfd, StaticReadEvent);
}
fd_insert(cfd);
tv_eternity(&s->cnexpire);
tv_eternity(&s->srexpire);
tv_eternity(&s->swexpire);
tv_eternity(&s->cwexpire);
if (s->proxy->clitimeout)
tv_delayfrom(&s->crexpire, &now, s->proxy->clitimeout);
else
tv_eternity(&s->crexpire);
t->expire = s->crexpire;
task_queue(t);
if (p->mode != PR_MODE_HEALTH)
task_wakeup(&rq, t);
p->nbconn++;
actconn++;
totalconn++;
// fprintf(stderr, "accepting from %p => %d conn, %d total\n", p, actconn, totalconn);
} /* end of while (p->nbconn < p->maxconn) */
return 0;
}
/*
* This function is used only for server health-checks. It handles
* the connection acknowledgement. If the proxy requires HTTP health-checks,
* it sends the request. In other cases, it returns 1 if the socket is OK,
* or -1 if an error occured.
*/
int event_srv_chk_w(int fd) {
struct task *t = fdtab[fd].owner;
struct server *s = t->context;
int skerr, lskerr;
lskerr = sizeof(skerr);
getsockopt(fd, SOL_SOCKET, SO_ERROR, &skerr, &lskerr);
/* in case of TCP only, this tells us if the connection succeeded */
if (skerr)
s->result = -1;
else {
if (s->proxy->options & PR_O_HTTP_CHK) {
int ret;
/* we want to check if this host replies to "OPTIONS / HTTP/1.0"
* so we'll send the request, and won't wake the checker up now.
*/
#ifndef MSG_NOSIGNAL
ret = send(fd, s->proxy->check_req, s->proxy->check_len, MSG_DONTWAIT);
#else
ret = send(fd, s->proxy->check_req, s->proxy->check_len, MSG_DONTWAIT | MSG_NOSIGNAL);
#endif
if (ret == s->proxy->check_len) {
FD_SET(fd, StaticReadEvent); /* prepare for reading reply */
FD_CLR(fd, StaticWriteEvent); /* nothing more to write */
return 0;
}
else
s->result = -1;
}
else {
/* good TCP connection is enough */
s->result = 1;
}
}
task_wakeup(&rq, t);
return 0;
}
/*
* This function is used only for server health-checks. It handles
* the server's reply to an HTTP request. It returns 1 if the server replies
* 2xx or 3xx (valid responses), or -1 in other cases.
*/
int event_srv_chk_r(int fd) {
char reply[64];
int len;
struct task *t = fdtab[fd].owner;
struct server *s = t->context;
int skerr, lskerr;
lskerr = sizeof(skerr);
s->result = len = -1;
#ifndef MSG_NOSIGNAL
getsockopt(fd, SOL_SOCKET, SO_ERROR, &skerr, &lskerr);
if (!skerr)
len = recv(fd, reply, sizeof(reply), 0);
#else
/* Warning! Linux returns EAGAIN on SO_ERROR if data are still available
* but the connection was closed on the remote end. Fortunately, recv still
* works correctly and we don't need to do the getsockopt() on linux.
*/
len = recv(fd, reply, sizeof(reply), MSG_NOSIGNAL);
#endif
if ((len >= sizeof("HTTP/1.0 000")) &&
!memcmp(reply, "HTTP/1.", 7) &&
(reply[9] == '2' || reply[9] == '3')) /* 2xx or 3xx */
s->result = 1;
FD_CLR(fd, StaticReadEvent);
task_wakeup(&rq, t);
return 0;
}
/*
* this function writes the string <str> at position <pos> which must be in buffer <b>,
* and moves <end> just after the end of <str>.
* <b>'s parameters (l, r, w, h, lr) are recomputed to be valid after the shift.
* the shift value (positive or negative) is returned.
* If there's no space left, the move is not done.
*
*/
int buffer_replace(struct buffer *b, char *pos, char *end, char *str) {
int delta;
int len;
len = strlen(str);
delta = len - (end - pos);
if (delta + b->r >= b->data + BUFSIZE)
return 0; /* no space left */
/* first, protect the end of the buffer */
memmove(end + delta, end, b->data + b->l - end);
/* now, copy str over pos */
memcpy(pos, str,len);
/* we only move data after the displaced zone */
if (b->r > pos) b->r += delta;
if (b->w > pos) b->w += delta;
if (b->h > pos) b->h += delta;
if (b->lr > pos) b->lr += delta;
b->l += delta;
return delta;
}
/* same except that the string length is given, which allows str to be NULL if
* len is 0.
*/
int buffer_replace2(struct buffer *b, char *pos, char *end, char *str, int len) {
int delta;
delta = len - (end - pos);
if (delta + b->r >= b->data + BUFSIZE)
return 0; /* no space left */
/* first, protect the end of the buffer */
memmove(end + delta, end, b->data + b->l - end);
/* now, copy str over pos */
if (len)
memcpy(pos, str, len);
/* we only move data after the displaced zone */
if (b->r > pos) b->r += delta;
if (b->w > pos) b->w += delta;
if (b->h > pos) b->h += delta;
if (b->lr > pos) b->lr += delta;
b->l += delta;
return delta;
}
int exp_replace(char *dst, char *src, char *str, regmatch_t *matches) {
char *old_dst = dst;
while (*str) {
if (*str == '\\') {
str++;
if (isdigit((int)*str)) {
int len, num;
num = *str - '0';
str++;
if (matches[num].rm_eo > -1 && matches[num].rm_so > -1) {
len = matches[num].rm_eo - matches[num].rm_so;
memcpy(dst, src + matches[num].rm_so, len);
dst += len;
}
}
else if (*str == 'x') {
unsigned char hex1, hex2;
str++;
hex1=toupper(*str++) - '0'; hex2=toupper(*str++) - '0';
if (hex1 > 9) hex1 -= 'A' - '9' - 1;
if (hex2 > 9) hex2 -= 'A' - '9' - 1;
*dst++ = (hex1<<4) + hex2;
}
else
*dst++ = *str++;
}
else
*dst++ = *str++;
}
*dst = 0;
return dst - old_dst;
}
/*
* manages the client FSM and its socket. BTW, it also tries to handle the
* cookie. It returns 1 if a state has changed (and a resync may be needed),
* 0 else.
*/
int process_cli(struct session *t) {
int s = t->srv_state;
int c = t->cli_state;
struct buffer *req = t->req;
struct buffer *rep = t->rep;
#ifdef DEBUG_FULL
fprintf(stderr,"process_cli: c=%s, s=%s\n", cli_stnames[c], srv_stnames[s]);
#endif
//fprintf(stderr,"process_cli: c=%d, s=%d, cr=%d, cw=%d, sr=%d, sw=%d\n", c, s,
//FD_ISSET(t->cli_fd, StaticReadEvent), FD_ISSET(t->cli_fd, StaticWriteEvent),
//FD_ISSET(t->srv_fd, StaticReadEvent), FD_ISSET(t->srv_fd, StaticWriteEvent)
//);
if (c == CL_STHEADERS) {
/* now parse the partial (or complete) headers */
while (req->lr < req->r) { /* this loop only sees one header at each iteration */
char *ptr;
int delete_header;
ptr = req->lr;
/* look for the end of the current header */
while (ptr < req->r && *ptr != '\n' && *ptr != '\r')
ptr++;
if (ptr == req->h) { /* empty line, end of headers */
int line, len;
/* we can only get here after an end of headers */
/* we'll have something else to do here : add new headers ... */
if (t->flags & SN_CLDENY) {
/* no need to go further */
t->logs.status = 403;
client_retnclose(t, t->proxy->errmsg.len403, t->proxy->errmsg.msg403);
if (!(t->flags & SN_ERR_MASK))
t->flags |= SN_ERR_PRXCOND;
if (!(t->flags & SN_FINST_MASK))
t->flags |= SN_FINST_R;
return 1;
}
for (line = 0; line < t->proxy->nb_reqadd; line++) {
len = sprintf(trash, "%s\r\n", t->proxy->req_add[line]);
buffer_replace2(req, req->h, req->h, trash, len);
}
if (t->proxy->options & PR_O_FWDFOR) {
if (t->cli_addr.ss_family == AF_INET) {
unsigned char *pn;
pn = (unsigned char *)&((struct sockaddr_in *)&t->cli_addr)->sin_addr;
len = sprintf(trash, "X-Forwarded-For: %d.%d.%d.%d\r\n",
pn[0], pn[1], pn[2], pn[3]);
buffer_replace2(req, req->h, req->h, trash, len);
}
else if (t->cli_addr.ss_family == AF_INET6) {
char pn[INET6_ADDRSTRLEN];
inet_ntop(AF_INET6,
(const void *)&((struct sockaddr_in6 *)(&t->cli_addr))->sin6_addr,
pn, sizeof(pn));
len = sprintf(trash, "X-Forwarded-For: %s\r\n", pn);
buffer_replace2(req, req->h, req->h, trash, len);
}
}
/* add a "connection: close" line if needed */
if (t->proxy->options & PR_O_HTTP_CLOSE)
buffer_replace2(req, req->h, req->h, "Connection: close\r\n", 19);
if (!memcmp(req->data, "POST ", 5)) {
/* this is a POST request, which is not cacheable by default */
t->flags |= SN_POST;
}
t->cli_state = CL_STDATA;
req->rlim = req->data + BUFSIZE; /* no more rewrite needed */
t->logs.t_request = tv_diff(&t->logs.tv_accept, &now);
/* FIXME: we'll set the client in a wait state while we try to
* connect to the server. Is this really needed ? wouldn't it be
* better to release the maximum of system buffers instead ? */
//FD_CLR(t->cli_fd, StaticReadEvent);
//tv_eternity(&t->crexpire);
/* FIXME: if we break here (as up to 1.1.23), having the client
* shutdown its connection can lead to an abort further.
* it's better to either return 1 or even jump directly to the
* data state which will save one schedule.
*/
//break;
if (!t->proxy->clitimeout ||
(t->srv_state < SV_STDATA && t->proxy->srvtimeout))
/* If the client has no timeout, or if the server is not ready yet,
* and we know for sure that it can expire, then it's cleaner to
* disable the timeout on the client side so that too low values
* cannot make the sessions abort too early.
*/
tv_eternity(&t->crexpire);
goto process_data;
}
/* to get a complete header line, we need the ending \r\n, \n\r, \r or \n too */
if (ptr > req->r - 2) {
/* this is a partial header, let's wait for more to come */
req->lr = ptr;
break;
}
/* now we know that *ptr is either \r or \n,
* and that there are at least 1 char after it.
*/
if ((ptr[0] == ptr[1]) || (ptr[1] != '\r' && ptr[1] != '\n'))
req->lr = ptr + 1; /* \r\r, \n\n, \r[^\n], \n[^\r] */
else
req->lr = ptr + 2; /* \r\n or \n\r */
/*
* now we know that we have a full header ; we can do whatever
* we want with these pointers :
* req->h = beginning of header
* ptr = end of header (first \r or \n)
* req->lr = beginning of next line (next rep->h)
* req->r = end of data (not used at this stage)
*/
if (t->logs.logwait & LW_REQ) {
/* we have a complete HTTP request that we must log */
int urilen;
if ((t->logs.uri = pool_alloc(requri)) == NULL) {
Alert("HTTP logging : out of memory.\n");
t->logs.status = 500;
client_retnclose(t, t->proxy->errmsg.len500, t->proxy->errmsg.msg500);
if (!(t->flags & SN_ERR_MASK))
t->flags |= SN_ERR_PRXCOND;
if (!(t->flags & SN_FINST_MASK))
t->flags |= SN_FINST_R;
return 1;
}
urilen = ptr - req->h;
if (urilen >= REQURI_LEN)
urilen = REQURI_LEN - 1;
memcpy(t->logs.uri, req->h, urilen);
t->logs.uri[urilen] = 0;
if (!(t->logs.logwait &= ~LW_REQ))
sess_log(t);
}
else if (t->logs.logwait & LW_REQHDR) {
struct cap_hdr *h;
int len;
for (h = t->proxy->req_cap; h; h = h->next) {
if ((h->namelen + 2 <= ptr - req->h) &&
(req->h[h->namelen] == ':') &&
(strncasecmp(req->h, h->name, h->namelen) == 0)) {
if (t->req_cap[h->index] == NULL)
t->req_cap[h->index] = pool_alloc_from(h->pool, h->len + 1);
len = ptr - (req->h + h->namelen + 2);
if (len > h->len)
len = h->len;
memcpy(t->req_cap[h->index], req->h + h->namelen + 2, len);
t->req_cap[h->index][len]=0;
}
}
}
delete_header = 0;
if ((global.mode & MODE_DEBUG) && (!(global.mode & MODE_QUIET) || (global.mode & MODE_VERBOSE))) {
int len, max;
len = sprintf(trash, "%08x:%s.clihdr[%04x:%04x]: ", t->uniq_id, t->proxy->id, (unsigned short)t->cli_fd, (unsigned short)t->srv_fd);
max = ptr - req->h;
UBOUND(max, sizeof(trash) - len - 1);
len += strlcpy2(trash + len, req->h, max + 1);
trash[len++] = '\n';
write(1, trash, len);
}
/* remove "connection: " if needed */
if (!delete_header && (t->proxy->options & PR_O_HTTP_CLOSE)
&& (strncasecmp(req->h, "Connection: ", 12) == 0)) {
delete_header = 1;
}
/* try headers regexps */
if (!delete_header && t->proxy->req_exp != NULL
&& !(t->flags & SN_CLDENY)) {
struct hdr_exp *exp;
char term;
term = *ptr;
*ptr = '\0';
exp = t->proxy->req_exp;
do {