[MEDIUM] Add stick table (persistence) management functions and types
diff --git a/Makefile b/Makefile
index 64ccd50..daead76 100644
--- a/Makefile
+++ b/Makefile
@@ -459,7 +459,7 @@
OBJS = src/haproxy.o src/sessionhash.o src/base64.o src/protocols.o \
src/uri_auth.o src/standard.o src/buffers.o src/log.o src/task.o \
src/time.o src/fd.o src/pipe.o src/regex.o src/cfgparse.o src/server.o \
- src/checks.o src/queue.o src/client.o src/proxy.o src/proto_uxst.o \
+ src/checks.o src/queue.o src/client.o src/proxy.o src/stick_table.o src/proto_uxst.o \
src/proto_http.o src/stream_sock.o src/appsession.o src/backend.o \
src/lb_chash.o src/lb_fwlc.o src/lb_fwrr.o src/lb_map.o \
src/stream_interface.o src/dumpstats.o src/proto_tcp.o \
diff --git a/include/proto/stick_table.h b/include/proto/stick_table.h
new file mode 100644
index 0000000..1cc4624
--- /dev/null
+++ b/include/proto/stick_table.h
@@ -0,0 +1,37 @@
+/*
+ * include/proto/stick_table.h
+ * Functions for stick tables management.
+ *
+ * Copyright (C) 2009-2010 EXCELIANCE, Emeric Brun <ebrun@exceliance.fr>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation, version 2.1
+ * exclusively.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef _PROTO_STICK_TABLE_H
+#define _PROTO_STICK_TABLE_H
+
+#include <types/stick_table.h>
+
+struct stksess *stksess_new(struct stktable *t, struct stktable_key *key);
+void stksess_key(struct stktable *t, struct stksess *ts, struct stktable_key *key);
+void stksess_free(struct stktable *t, struct stksess *ts);
+
+int stktable_init(struct stktable *t);
+int stktable_parse_type(char **args, int *idx, unsigned long *type, size_t *key_size);
+int stktable_store(struct stktable *t, struct stksess *ts, int sid);
+struct stksess *stktable_lookup(struct stktable *t, struct stktable_key *key);
+
+
+#endif /* _PROTO_STICK_TABLE_H */
diff --git a/include/types/stick_table.h b/include/types/stick_table.h
new file mode 100644
index 0000000..fd0c806
--- /dev/null
+++ b/include/types/stick_table.h
@@ -0,0 +1,89 @@
+/*
+ * include/types/stick_table.h
+ * Macros, variables and structures for stick tables management.
+ *
+ * Copyright (C) 2009-2010 EXCELIANCE, Emeric Brun <ebrun@exceliance.fr>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation, version 2.1
+ * exclusively.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef _TYPES_STICK_TABLE_H
+#define _TYPES_STICK_TABLE_H
+
+#include <sys/socket.h>
+#include <netinet/in.h>
+
+#include <ebtree.h>
+#include <ebmbtree.h>
+#include <eb32tree.h>
+#include <common/memory.h>
+
+/* stick table key types */
+#define STKTABLE_TYPE_IP 0 /* table key is ipv4 */
+#define STKTABLE_TYPE_INTEGER 1 /* table key is unsigned 32bit integer */
+#define STKTABLE_TYPE_STRING 2 /* table key is a null terminated string */
+
+#define STKTABLE_TYPES 3 /* Increase this value if you add a type */
+
+/* stick table type flags */
+#define STKTABLE_TYPEFLAG_CUSTOMKEYSIZE 0x00000001 /* this table type maxsize is configurable */
+
+/* stick table keyword type */
+struct stktable_type {
+ const char *kw; /* keyword string */
+ int flags; /* type flags */
+ size_t default_size; /* default key size */
+};
+
+/* stuck session */
+struct stksess {
+ int sid; /* id of server to use for session */
+ unsigned int expire; /* session expiration date */
+ struct eb32_node exps; /* ebtree node used to hold the session in expiration tree */
+ struct ebmb_node keys; /* ebtree node used to hold the session in table */
+};
+
+
+/* stick table */
+struct stktable {
+ struct eb_root keys; /* head of stuck session tree */
+ struct eb_root exps; /* head of stuck session expiration tree */
+ struct pool_head *pool; /* pool used to allocate stuck sessions */
+ struct task *exp_task; /* expiration task */
+ unsigned long type; /* type of table (determine key format) */
+ size_t key_size; /* size of a key, maximum size in case of string */
+ unsigned int size; /* maximum stuck session in table */
+ unsigned int current; /* number of stuck session in table */
+ int nopurge; /* 1 never purge stuck sessions */
+ int exp_next; /* next epiration date */
+ int expire; /* duration before expiration of stuck session */
+};
+
+/* stick table key data */
+union stktable_key_data {
+ struct in_addr ip; /* used to store an ip key */
+ uint32_t integer; /* used to store an integer key */
+ char buf[BUFSIZE]; /* used to store a null terminated string key */
+};
+
+/* stick table key */
+struct stktable_key {
+ void *key; /* pointer on key buffer */
+ size_t key_len; /* data len to read in buff in case of null terminated string */
+ union stktable_key_data data; /* data */
+};
+
+#endif /* _TYPES_STICK_TABLE_H */
+
diff --git a/src/stick_table.c b/src/stick_table.c
new file mode 100644
index 0000000..0d70e32
--- /dev/null
+++ b/src/stick_table.c
@@ -0,0 +1,334 @@
+/*
+ * Stick tables management functions.
+ *
+ * Copyright 2009-2010 EXCELIANCE, Emeric Brun <ebrun@exceliance.fr>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <string.h>
+
+#include <common/config.h>
+#include <common/memory.h>
+#include <common/mini-clist.h>
+#include <common/standard.h>
+#include <common/time.h>
+
+#include <ebmbtree.h>
+#include <ebsttree.h>
+
+#include <types/stick_table.h>
+
+#include <proto/proxy.h>
+#include <proto/session.h>
+#include <proto/task.h>
+
+
+/*
+ * Free an allocate sticked session <ts>.
+ * Decrease table <t> sticked session counter .
+ */
+void stksess_free(struct stktable *t, struct stksess *ts)
+{
+ t->current--;
+ pool_free2(t->pool,ts);
+}
+
+/*
+ * Init or modify <key> of th sticked session <ts> present in table <t>.
+ */
+void stksess_key(struct stktable *t, struct stksess *ts, struct stktable_key *key)
+{
+ if (t->type != STKTABLE_TYPE_STRING)
+ memcpy(ts->keys.key, key->key , t->key_size);
+ else {
+ memcpy(ts->keys.key, key->key, MIN(t->key_size - 1, key->key_len));
+ ts->keys.key[MIN(t->key_size - 1, key->key_len)] = 0;
+ }
+}
+
+
+/*
+ * Init sticked session <ts> using <key>.
+ */
+struct stksess *stksess_init(struct stktable *t, struct stksess * ts, struct stktable_key *key)
+{
+ ts->keys.node.leaf_p = NULL;
+ ts->exps.node.leaf_p = NULL;
+ ts->sid = 0;
+ stksess_key(t, ts, key);
+
+ return ts;
+}
+
+/*
+ * Trash oldest <to_batch> sticked sessions from table <t>
+ * Returns number of trashed sticked session.
+ */
+static int stktable_trash_oldest(struct stktable *t, int to_batch)
+{
+ struct stksess *ts;
+ struct eb32_node *eb;
+ int batched = 0;
+
+ eb = eb32_lookup_ge(&t->exps, now_ms - TIMER_LOOK_BACK);
+
+ while (batched < to_batch) {
+
+ if (unlikely(!eb)) {
+ /* we might have reached the end of the tree, typically because
+ * <now_ms> is in the first half and we're first scanning the last
+ * half. Let's loop back to the beginning of the tree now.
+ */
+ eb = eb32_first(&t->exps);
+ if (likely(!eb))
+ break;
+ }
+
+ /* timer looks expired, detach it from the queue */
+ ts = eb32_entry(eb, struct stksess, exps);
+ eb = eb32_next(eb);
+
+ eb32_delete(&ts->exps);
+
+ if (ts->expire != ts->exps.key) {
+
+ if (!tick_isset(ts->expire))
+ continue;
+
+ ts->exps.key = ts->expire;
+
+ eb32_insert(&t->exps, &ts->exps);
+
+ if (!eb || eb->key > ts->exps.key)
+ eb = &ts->exps;
+
+ continue;
+ }
+ /* session expired, trash it */
+
+ ebmb_delete(&ts->keys);
+ stksess_free(t, ts);
+ batched++;
+ }
+
+ return batched;
+}
+
+/*
+ * Allocate and initialise a new sticked session.
+ * The new sticked session is returned or NULL in case of lack of memory.
+ * Sticked sessions should only be allocated this way, and must be
+ * freed using stksess_free().
+ * Increase table <t> sticked session counter.
+ */
+struct stksess *stksess_new(struct stktable *t, struct stktable_key *key)
+{
+ struct stksess *ts;
+
+ if (unlikely(t->current == t->size)) {
+ if ( t->nopurge )
+ return NULL;
+
+ if (!stktable_trash_oldest(t, t->size >> 8))
+ return NULL;
+ }
+
+ ts = pool_alloc2(t->pool);
+ if (ts) {
+ t->current++;
+ stksess_init(t, ts, key);
+ }
+
+ return ts;
+}
+
+/*
+ * Lookup in table <t> for a sticked session identified by <key>.
+ * Returns pointer on requested sticked session or NULL if no one found.
+ */
+struct stksess *stktable_lookup(struct stktable *t, struct stktable_key *key)
+{
+ struct ebmb_node *eb;
+
+ /* lookup on track session */
+ if (t->type == STKTABLE_TYPE_STRING)
+ eb = ebst_lookup_len(&t->keys, key->key, key->key_len);
+ else
+ eb = ebmb_lookup(&t->keys, key->key, t->key_size);
+
+ if (unlikely(!eb)) {
+ /* no session found */
+ return NULL;
+ }
+
+ /* Existing session, returns server id */
+ return ebmb_entry(eb, struct stksess, keys);
+}
+
+/*
+ * Store sticked session if not present in table.
+ * Il already present, update the existing session.
+ */
+int stktable_store(struct stktable *t, struct stksess *tsess, int sid)
+{
+ struct stksess *ts;
+ struct ebmb_node *eb;
+
+ if (t->type == STKTABLE_TYPE_STRING)
+ eb = ebst_lookup(&(t->keys), (char *)tsess->keys.key);
+ else
+ eb = ebmb_lookup(&(t->keys), tsess->keys.key, t->key_size);
+
+ if (unlikely(!eb)) {
+ tsess->sid = sid;
+ ebmb_insert(&t->keys, &tsess->keys, t->key_size);
+
+ tsess->exps.key = tsess->expire = tick_add(now_ms, MS_TO_TICKS(t->expire));
+ eb32_insert(&t->exps, &tsess->exps);
+
+ if (t->expire) {
+ t->exp_task->expire = t->exp_next = tick_first(tsess->expire, t->exp_next);
+ task_queue(t->exp_task);
+ }
+ return 0;
+ }
+
+ /* Existing track session */
+ ts = ebmb_entry(eb, struct stksess, keys);
+
+ if ( ts->sid != sid )
+ ts->sid = sid;
+ return 1;
+}
+
+/*
+ * Trash expired sticked sessions from table <t>.
+ */
+static int stktable_trash_expired(struct stktable *t)
+{
+ struct stksess *ts;
+ struct eb32_node *eb;
+
+ eb = eb32_lookup_ge(&t->exps, now_ms - TIMER_LOOK_BACK);
+
+ while (1) {
+ if (unlikely(!eb)) {
+ /* we might have reached the end of the tree, typically because
+ * <now_ms> is in the first half and we're first scanning the last
+ * half. Let's loop back to the beginning of the tree now.
+ */
+ eb = eb32_first(&t->exps);
+ if (likely(!eb))
+ break;
+ }
+
+ if (likely(tick_is_lt(now_ms, eb->key))) {
+ /* timer not expired yet, revisit it later */
+ t->exp_next = eb->key;
+ return t->exp_next;
+ }
+
+ /* timer looks expired, detach it from the queue */
+ ts = eb32_entry(eb, struct stksess, exps);
+ eb = eb32_next(eb);
+
+ eb32_delete(&ts->exps);
+
+ if (!tick_is_expired(ts->expire, now_ms)) {
+ if (!tick_isset(ts->expire))
+ continue;
+
+ ts->exps.key = ts->expire;
+ eb32_insert(&t->exps, &ts->exps);
+
+ if (!eb || eb->key > ts->exps.key)
+ eb = &ts->exps;
+ continue;
+ }
+
+ /* session expired, trash it */
+ ebmb_delete(&ts->keys);
+ stksess_free(t, ts);
+ }
+
+ /* We have found no task to expire in any tree */
+ t->exp_next = TICK_ETERNITY;
+ return t->exp_next;
+}
+
+/*
+ * Task processing function to trash expired sticked sessions.
+ */
+static struct task *process_table_expire(struct task * task)
+{
+ struct stktable *t = (struct stktable *)task->context;
+
+ task->expire = stktable_trash_expired(t);
+ return task;
+}
+
+/* Perform minimal intializations, report 0 in case of error, 1 if OK. */
+int stktable_init(struct stktable *t)
+{
+ if (t->size) {
+ memset(&t->keys, 0, sizeof(t->keys));
+ memset(&t->exps, 0, sizeof(t->exps));
+
+ t->pool = create_pool("sticktables", sizeof(struct stksess) + t->key_size, MEM_F_SHARED);
+
+ t->exp_next = TICK_ETERNITY;
+ if ( t->expire ) {
+ t->exp_task = task_new();
+ t->exp_task->process = process_table_expire;
+ t->exp_task->expire = TICK_ETERNITY;
+ t->exp_task->context = (void *)t;
+ }
+ return t->pool != NULL;
+ }
+ return 1;
+}
+
+/*
+ * Configuration keywords of known table types
+ */
+struct stktable_type stktable_types[STKTABLE_TYPES] = { { "ip", 0, 4 } ,
+ { "integer", 0, 4 },
+ { "string", STKTABLE_TYPEFLAG_CUSTOMKEYSIZE, 32 } };
+
+
+/*
+ * Parse table type configuration.
+ * Returns 0 on successful parsing, else 1.
+ * <myidx> is set at next configuration <args> index.
+ */
+int stktable_parse_type(char **args, int *myidx, unsigned long *type, size_t *key_size)
+{
+ for (*type = 0; *type < STKTABLE_TYPES; (*type)++) {
+ if (strcmp(args[*myidx], stktable_types[*type].kw) != 0)
+ continue;
+
+ *key_size = stktable_types[*type].default_size;
+ (*myidx)++;
+
+ if (stktable_types[*type].flags & STKTABLE_TYPEFLAG_CUSTOMKEYSIZE) {
+ if (strcmp("len", args[*myidx]) == 0) {
+ (*myidx)++;
+ *key_size = atol(args[*myidx]);
+ if ( !*key_size )
+ break;
+ /* null terminated string needs +1 for '\0'. */
+ (*key_size)++;
+ (*myidx)++;
+ }
+ }
+ return 0;
+ }
+ return 1;
+}
+
+