MINOR: add a new function call tracer for debugging purposes

This feature relies on GCC's ability to call helpers at function entry/exit
points. We define these helpers to quickly dump the minimum info into a trace
file that can be converted to a human readable format using a script in the
contrib/trace directory. This has only been implemented in the GNU makefile
for now on as it is unsure whether it's supported on all OSes.

The feature is enabled by building with "TRACE=1". The performance impact is
huge, so this feature should only be used when debugging. To limit the loss
of performance, fprintf() has been disabled and the output is hand-crafted
and emitted using fwrite(), resulting in doubling the performance. Using the
TSC instead of gettimeofday() also doubles the performance. Around 1200 conns/s
may be achieved on a Pentium-M 1.7 GHz which leads to around 50 MB/s of traces.

The entry and exits of all functions will be dumped into a file designated
by the HAPROXY_TRACE environment variable, or by default "trace.out". If the
trace file name is empty or "/dev/null", then traces are disabled. If
opening the trace file fails, then stderr is used. If HAPROXY_TRACE_FAST is
used, then the time is taken from the global <now> variable. Last, if
HAPROXY_TRACE_TSC is used, then the machine's TSC is used instead of the
real time (almost twice as fast).

The output format is :

  <sec.usec> <level> <caller_ptr> <dir> <callee_ptr>
or :
  <tsc> <level> <caller_ptr> <dir> <callee_ptr>

where <dir> is '>' when entering a function and '<' when leaving.

The awk script in contrib/trace provides a nicer indented output :

6f74989e6f8 ->->->   run_poll_loop > signal_process_queue [src/haproxy.c:1097:0x804bd69] > [include/proto/signal.h:32:0x8049cd0]
6f74989eb00          run_poll_loop < signal_process_queue [src/haproxy.c:1097:0x804bd69] < [include/proto/signal.h:32:0x8049cd0]
6f74989ef44 ->->->   run_poll_loop > wake_expired_tasks [src/haproxy.c:1100:0x804bd72] > [src/task.c:123:0x8055060]
6f74989f3a6 ->->->->   wake_expired_tasks > eb32_lookup_ge [src/task.c:128:0x8055091] > [ebtree/eb32tree.c:138:0x80a8c70]
6f74989f7e9            wake_expired_tasks < eb32_lookup_ge [src/task.c:128:0x8055091] < [ebtree/eb32tree.c:138:0x80a8c70]
6f74989fc0d ->->->->   wake_expired_tasks > eb32_first [src/task.c:134:0x80550d5] > [ebtree/eb32tree.h:55:0x8054ad0]
6f7498a003d ->->->->->   eb32_first > eb_first [ebtree/eb32tree.h:56:0x8054af1] > [ebtree/ebtree.h:520:0x8054a10]
6f7498a0436 ->->->->->->   eb_first > eb_walk_down [ebtree/ebtree.h:521:0x8054a33] > [ebtree/ebtree.h:442:0x80549a0]
6f7498a0843 ->->->->->->->   eb_walk_down > eb_gettag [ebtree/ebtree.h:445:0x80549d6] > [ebtree/ebtree.h:418:0x80548e0]
6f7498a0c2b                  eb_walk_down < eb_gettag [ebtree/ebtree.h:445:0x80549d6] < [ebtree/ebtree.h:418:0x80548e0]
6f7498a1042 ->->->->->->->   eb_walk_down > eb_untag [ebtree/ebtree.h:447:0x80549e2] > [ebtree/ebtree.h:412:0x80548a0]
6f7498a1498                  eb_walk_down < eb_untag [ebtree/ebtree.h:447:0x80549e2] < [ebtree/ebtree.h:412:0x80548a0]
6f7498a18c6 ->->->->->->->   eb_walk_down > eb_root_to_node [ebtree/ebtree.h:448:0x80549e7] > [ebtree/ebtree.h:432:0x8054960]
6f7498a1cd4                  eb_walk_down < eb_root_to_node [ebtree/ebtree.h:448:0x80549e7] < [ebtree/ebtree.h:432:0x8054960]
6f7498a20c4                eb_first < eb_walk_down [ebtree/ebtree.h:521:0x8054a33] < [ebtree/ebtree.h:442:0x80549a0]
6f7498a24b4              eb32_first < eb_first [ebtree/eb32tree.h:56:0x8054af1] < [ebtree/ebtree.h:520:0x8054a10]
6f7498a289c            wake_expired_tasks < eb32_first [src/task.c:134:0x80550d5] < [ebtree/eb32tree.h:55:0x8054ad0]
6f7498a2c8c          run_poll_loop < wake_expired_tasks [src/haproxy.c:1100:0x804bd72] < [src/task.c:123:0x8055060]
6f7498a3095 ->->->   run_poll_loop > process_runnable_tasks [src/haproxy.c:1103:0x804bd7a] > [src/task.c:190:0x8055150]

A nice improvement would possibly consist in trying to get the function's
arguments in the stack and to dump a few more infor for some well-known
functions (eg: the session's status for process_session).
diff --git a/Makefile b/Makefile
index a6c7962..c70dc94 100644
--- a/Makefile
+++ b/Makefile
@@ -122,6 +122,11 @@
 # use at all.
 DEBUG =
 
+#### Trace options
+# Use TRACE=1 to trace function calls to file "trace.out" or to stderr if not
+# possible.
+TRACE =
+
 #### Additional include and library dirs
 # Redefine this if you want to add some special PATH to include/libs
 ADDINC =
@@ -478,6 +483,12 @@
 COPTS += -DCONFIG_HAPROXY_DATE=\"$(VERDATE)\"
 endif
 
+ifneq ($(TRACE),)
+# if tracing is enabled, we want it to be as fast as possible
+TRACE_COPTS := $(filter-out -O0 -O1 -O2 -pg -finstrument-functions,$(COPTS)) -O3 -fomit-frame-pointer
+COPTS += -finstrument-functions
+endif
+
 #### Global link options
 # These options are added at the end of the "ld" command line. Use LDFLAGS to
 # add options at the beginning of the "ld" command line if needed.
@@ -523,6 +534,10 @@
               $(EBTREE_DIR)/ebmbtree.o $(EBTREE_DIR)/ebsttree.o \
               $(EBTREE_DIR)/ebimtree.o $(EBTREE_DIR)/ebistree.o
 
+ifneq ($(TRACE),)
+OBJS += src/trace.o
+endif
+
 # Not used right now
 LIB_EBTREE = $(EBTREE_DIR)/libebtree.a
 
@@ -538,6 +553,9 @@
 %.o:	%.c
 	$(CC) $(COPTS) -c -o $@ $<
 
+src/trace.o: src/trace.c
+	$(CC) $(TRACE_COPTS) -c -o $@ $<
+
 src/haproxy.o:	src/haproxy.c
 	$(CC) $(COPTS) \
 	      -DBUILD_TARGET='"$(strip $(TARGET))"' \