[MEDIUM] New option http_proxy

Hello,

You will find attached an updated release of previously submitted patch.
It polish some part and extend ACL engine to match IP and PORT parsed in
HTTP request. (and take care of comments made by Willy ! ;))

Best regards,
Alexandre
diff --git a/doc/configuration.txt b/doc/configuration.txt
index 149f330..5ebe3cc 100644
--- a/doc/configuration.txt
+++ b/doc/configuration.txt
@@ -275,6 +275,7 @@
 option httplog              X          X         X         X
 option logasap              X          X         X         -
 option nolinger             X          X         X         X
+option http_proxy           X          X         X         X
 option persist              X          -         X         X
 option redispatch           X          -         X         X
 option smtpchk              X          -         X         X
@@ -539,6 +540,15 @@
   used any time, but it is important to remember that regex matching is slower
   than other methods. See also "path_reg" and all "url_" criteria.
 
+url_ip <ip_address>
+  Applies to the IP address parsed in HTTP request. It can be used to
+  prevent access to certain resources such as local network. It is useful
+  with option 'http_proxy'.
+
+url_port <integer>
+  Applies to the port parsed in HTTP request. It can be used to
+  prevent access to certain resources. It is useful with option 'http_proxy'.
+
 hdr <string> 
 hdr(header) <string>
   Note: all the "hdr*" matching criteria either apply to all headers, or to a
diff --git a/examples/option-http_proxy.cfg b/examples/option-http_proxy.cfg
new file mode 100644
index 0000000..8f73c3b
--- /dev/null
+++ b/examples/option-http_proxy.cfg
@@ -0,0 +1,53 @@
+#
+# demo config for Proxy mode
+# 
+
+global
+        maxconn         20000
+	ulimit-n	16384
+        log             127.0.0.1 local0
+        uid             200
+        gid             200
+        chroot          /var/empty
+	nbproc		4
+        daemon
+
+frontend test-proxy
+	bind		192.168.200.10:8080
+        mode            http
+        log             global
+        option          httplog
+        option          dontlognull
+        option          httpclose
+        option          nolinger
+        option          http_proxy
+        maxconn         8000
+        clitimeout      30000
+
+	# layer3: Valid users
+	acl allow_host src 192.168.200.150/32
+	block if !allow_host
+
+	# layer7: prevent private network relaying
+	acl forbidden_dst url_ip 192.168.0.0/24
+	acl forbidden_dst url_ip 172.16.0.0/12
+	acl forbidden_dst url_ip 10.0.0.0/8
+	block if forbidden_dst
+
+	default_backend test-proxy-srv
+
+
+backend test-proxy-srv
+	mode            http
+	contimeout      5000
+	srvtimeout      5000
+	retries         2
+	option          nolinger
+	option          http_proxy
+
+	# layer7: Only GET method is valid
+	acl valid_method        method GET
+	block if !valid_method
+
+	# layer7: protect bad reply
+	rspdeny ^Content-Type:[\ ]*audio/mp3
diff --git a/include/common/standard.h b/include/common/standard.h
index 90ed618..80f6d2a 100644
--- a/include/common/standard.h
+++ b/include/common/standard.h
@@ -132,6 +132,11 @@
  */
 int str2net(const char *str, struct in_addr *addr, struct in_addr *mask);
 
+/*
+ * Resolve destination server from URL. Convert <str> to a sockaddr_in*.
+ */
+int url2sa(const char *url, int ulen, struct sockaddr_in *addr);
+
 /* will try to encode the string <string> replacing all characters tagged in
  * <map> with the hexadecimal representation of their ASCII-code (2 digits)
  * prefixed by <escape>, and will store the result between <start> (included)
diff --git a/include/types/proxy.h b/include/types/proxy.h
index 6ffc264..2a75fad 100644
--- a/include/types/proxy.h
+++ b/include/types/proxy.h
@@ -101,6 +101,7 @@
 
 #define PR_O_TCPSPLICE	0x08000000      /* delegate data transfer to linux kernel's tcp_splice */
 #define PR_O_CONTSTATS	0x10000000	/* continous counters */
+#define PR_O_HTTP_PROXY 0x20000000	/* Enable session to use HTTP proxy operations */
 
 /* This structure is used to apply fast weighted round robin on a server group */
 struct fwrr_group {
diff --git a/src/backend.c b/src/backend.c
index 91027e8..a8676b3 100644
--- a/src/backend.c
+++ b/src/backend.c
@@ -936,6 +936,10 @@
 				return SRV_STATUS_INTERNAL;
 			}
 		}
+		else if (s->be->options & PR_O_HTTP_PROXY) {
+			if (!s->srv_addr.sin_addr.s_addr)
+				return SRV_STATUS_NOSRV;
+		}
 		else if (!*(int *)&s->be->dispatch_addr.sin_addr &&
 			 !(s->fe->options & PR_O_TRANSP)) {
 			return SRV_STATUS_NOSRV;
@@ -999,6 +1003,10 @@
 			return SRV_STATUS_INTERNAL;
 		}
 	}
+	else if (s->be->options & PR_O_HTTP_PROXY) {
+		/* If HTTP PROXY option is set, then server is already assigned
+		 * during incoming client request parsing. */
+	}
 	else {
 		/* no server and no LB algorithm ! */
 		return SRV_STATUS_INTERNAL;
diff --git a/src/cfgparse.c b/src/cfgparse.c
index 4753012..affb956 100644
--- a/src/cfgparse.c
+++ b/src/cfgparse.c
@@ -97,6 +97,7 @@
 	{ "keepalive",    PR_O_KEEPALIVE,  PR_CAP_NONE, 0 },
 	{ "httpclose",    PR_O_HTTP_CLOSE, PR_CAP_FE | PR_CAP_BE, 0 },
 	{ "nolinger",     PR_O_TCP_NOLING, PR_CAP_FE | PR_CAP_BE, 0 },
+	{ "http_proxy",	  PR_O_HTTP_PROXY, PR_CAP_FE | PR_CAP_BE, 0 },
 	{ "logasap",      PR_O_LOGASAP,    PR_CAP_FE, 0 },
 	{ "contstats",    PR_O_CONTSTATS,  PR_CAP_FE, 0 },
 	{ "abortonclose", PR_O_ABRT_CLOSE, PR_CAP_BE, 0 },
@@ -2473,7 +2474,7 @@
 		}
 		else if (curproxy->cap & PR_CAP_BE &&
 			 ((curproxy->mode != PR_MODE_HEALTH) &&
-			  !(curproxy->options & PR_O_TRANSP) &&
+			  !(curproxy->options & (PR_O_TRANSP | PR_O_HTTP_PROXY)) &&
 			  !(curproxy->lbprm.algo & BE_LB_ALGO) &&
 			  (*(int *)&curproxy->dispatch_addr.sin_addr == 0))) {
 			Alert("parsing %s : %s '%s' has no dispatch address and is not in transparent or balance mode.\n",
diff --git a/src/client.c b/src/client.c
index 2ee41a6..73c8895 100644
--- a/src/client.c
+++ b/src/client.c
@@ -522,6 +522,7 @@
 	return 1;
 }
 
+
 /* set test->i to the number of connexions to the proxy */
 static int
 acl_fetch_dconn(struct proxy *px, struct session *l4, void *l7, int dir,
@@ -534,14 +535,14 @@
 
 /* Note: must not be declared <const> as its list will be overwritten */
 static struct acl_kw_list acl_kws = {{ },{
-	{ "src_port",   acl_parse_int,   acl_fetch_sport,  acl_match_int },
-	{ "src",        acl_parse_ip,    acl_fetch_src,    acl_match_ip  },
-	{ "dst",        acl_parse_ip,    acl_fetch_dst,    acl_match_ip  },
-	{ "dst_port",   acl_parse_int,   acl_fetch_dport,  acl_match_int },
+	{ "src_port",   acl_parse_int,   acl_fetch_sport,    acl_match_int },
+	{ "src",        acl_parse_ip,    acl_fetch_src,      acl_match_ip  },
+	{ "dst",        acl_parse_ip,    acl_fetch_dst,      acl_match_ip  },
+	{ "dst_port",   acl_parse_int,   acl_fetch_dport,    acl_match_int },
 #if 0
-	{ "src_limit",  acl_parse_int,   acl_fetch_sconn,  acl_match_int },
+	{ "src_limit",  acl_parse_int,   acl_fetch_sconn,    acl_match_int },
 #endif
-	{ "dst_conn",   acl_parse_int,   acl_fetch_dconn,  acl_match_int },
+	{ "dst_conn",   acl_parse_int,   acl_fetch_dconn,    acl_match_int },
 	{ NULL, NULL, NULL, NULL },
 }};
 
diff --git a/src/proto_http.c b/src/proto_http.c
index 3727431..59b9055 100644
--- a/src/proto_http.c
+++ b/src/proto_http.c
@@ -1476,7 +1476,7 @@
 	msg->msg_state = HTTP_MSG_ERROR;
 	return;
 }
-    
+
 /*
  * manages the client FSM and its socket. BTW, it also tries to handle the
  * cookie. It returns 1 if a state has changed (and a resync may be needed),
@@ -1908,8 +1908,13 @@
 		 * may have separate values for ->fe, ->be.
 		 */
 
-
-
+		/*
+		 * If HTTP PROXY is set we simply get remote server address
+		 * parsing incoming request.
+		 */
+		if ((t->be->options & PR_O_HTTP_PROXY) && !(t->flags & SN_ADDR_SET)) {
+			url2sa(req->data + msg->sl.rq.u, msg->sl.rq.u_l, &t->srv_addr);
+		}
 
 		/*
 		 * 7: the appsession cookie was looked up very early in 1.2,
@@ -4950,6 +4955,57 @@
 	return 1;
 }
 
+static int
+acl_fetch_url_ip(struct proxy *px, struct session *l4, void *l7, int dir,
+		 struct acl_expr *expr, struct acl_test *test)
+{
+	struct http_txn *txn = l7;
+
+	if (txn->req.msg_state != HTTP_MSG_BODY)
+		return 0;
+	if (txn->rsp.msg_state != HTTP_MSG_RPBEFORE)
+		/* ensure the indexes are not affected */
+		return 0;
+
+	/* Parse HTTP request */
+	url2sa(txn->req.sol + txn->req.sl.rq.u, txn->req.sl.rq.u_l, &l4->srv_addr);
+	test->ptr = (void *)&((struct sockaddr_in *)&l4->srv_addr)->sin_addr;
+	test->i = AF_INET;
+
+	/*
+	 * If we are parsing url in frontend space, we prepare backend stage
+	 * to not parse again the same url ! optimization lazyness...
+	 */
+	if (px->options & PR_O_HTTP_PROXY)
+		l4->flags |= SN_ADDR_SET;
+
+	test->flags = ACL_TEST_F_READ_ONLY;
+	return 1;
+}
+
+static int
+acl_fetch_url_port(struct proxy *px, struct session *l4, void *l7, int dir,
+		   struct acl_expr *expr, struct acl_test *test)
+{
+	struct http_txn *txn = l7;
+
+	if (txn->req.msg_state != HTTP_MSG_BODY)
+		return 0;
+	if (txn->rsp.msg_state != HTTP_MSG_RPBEFORE)
+		/* ensure the indexes are not affected */
+		return 0;
+
+	/* Same optimization as url_ip */
+	url2sa(txn->req.sol + txn->req.sl.rq.u, txn->req.sl.rq.u_l, &l4->srv_addr);
+	test->i = ntohs(((struct sockaddr_in *)&l4->srv_addr)->sin_port);
+
+	if (px->options & PR_O_HTTP_PROXY)
+		l4->flags |= SN_ADDR_SET;
+
+	test->flags = ACL_TEST_F_READ_ONLY;
+	return 1;
+}
+
 /* 5. Check on HTTP header. A pointer to the beginning of the value is returned.
  * This generic function is used by both acl_fetch_chdr() and acl_fetch_shdr().
  */
@@ -5186,13 +5242,15 @@
 	{ "resp_ver",   acl_parse_ver,   acl_fetch_stver,  acl_match_str  },
 	{ "status",     acl_parse_int,   acl_fetch_stcode, acl_match_int  },
 
-	{ "url",        acl_parse_str,   acl_fetch_url,    acl_match_str  },
-	{ "url_beg",    acl_parse_str,   acl_fetch_url,    acl_match_beg  },
-	{ "url_end",    acl_parse_str,   acl_fetch_url,    acl_match_end  },
-	{ "url_sub",    acl_parse_str,   acl_fetch_url,    acl_match_sub  },
-	{ "url_dir",    acl_parse_str,   acl_fetch_url,    acl_match_dir  },
-	{ "url_dom",    acl_parse_str,   acl_fetch_url,    acl_match_dom  },
-	{ "url_reg",    acl_parse_reg,   acl_fetch_url,    acl_match_reg  },
+	{ "url",        acl_parse_str,   acl_fetch_url,      acl_match_str  },
+	{ "url_beg",    acl_parse_str,   acl_fetch_url,      acl_match_beg  },
+	{ "url_end",    acl_parse_str,   acl_fetch_url,      acl_match_end  },
+	{ "url_sub",    acl_parse_str,   acl_fetch_url,      acl_match_sub  },
+	{ "url_dir",    acl_parse_str,   acl_fetch_url,      acl_match_dir  },
+	{ "url_dom",    acl_parse_str,   acl_fetch_url,      acl_match_dom  },
+	{ "url_reg",    acl_parse_reg,   acl_fetch_url,      acl_match_reg  },
+	{ "url_ip",     acl_parse_ip,    acl_fetch_url_ip,   acl_match_ip   },
+	{ "url_port",   acl_parse_int,   acl_fetch_url_port, acl_match_int  },
 
 	{ "hdr",        acl_parse_str,   acl_fetch_chdr,    acl_match_str },
 	{ "hdr_reg",    acl_parse_reg,   acl_fetch_chdr,    acl_match_reg },
diff --git a/src/standard.c b/src/standard.c
index 1e631301..d245949 100644
--- a/src/standard.c
+++ b/src/standard.c
@@ -202,6 +202,100 @@
 	goto out_free;
 }
 
+
+/*
+ * Parse IP address found in url.
+ */
+static int url2ip(const char *addr, struct in_addr *dst)
+{
+	int saw_digit, octets, ch;
+	u_char tmp[4], *tp;
+	const char *cp = addr;
+
+	saw_digit = 0;
+	octets = 0;
+	*(tp = tmp) = 0;
+
+	while (*addr) {
+		unsigned char digit = (ch = *addr++) - '0';
+		if (digit > 9 && ch != '.')
+			break;
+		if (digit <= 9) {
+			u_int new = *tp * 10 + digit;
+			if (new > 255)
+				return 0;
+			*tp = new;
+			if (!saw_digit) {
+				if (++octets > 4)
+					return 0;
+				saw_digit = 1;
+			}
+		} else if (ch == '.' && saw_digit) {
+			if (octets == 4)
+				return 0;
+			*++tp = 0;
+			saw_digit = 0;
+		} else
+			return 0;
+	}
+
+	if (octets < 4)
+		return 0;
+
+	memcpy(&dst->s_addr, tmp, 4);
+	return addr-cp-1;
+}
+
+/*
+ * Resolve destination server from URL. Convert <str> to a sockaddr_in*.
+ */
+int url2sa(const char *url, int ulen, struct sockaddr_in *addr)
+{
+	const char *curr = url, *cp = url;
+	int ret, url_code = 0;
+	unsigned int http_code = 0;
+
+	/* Cleanup the room */
+	addr->sin_family = AF_INET;
+	addr->sin_addr.s_addr = 0;
+	addr->sin_port = 0;
+
+	/* Firstly, try to find :// pattern */
+	while (curr < url+ulen && url_code != 0x3a2f2f) {
+		url_code = ((url_code & 0xffff) << 8);
+		url_code += (unsigned char)*curr++;
+	}
+
+	/* Secondly, if :// pattern is found, verify parsed stuff
+	 * before pattern is matching our http pattern.
+	 * If so parse ip address and port in uri.
+	 * 
+	 * WARNING: Current code doesn't support dynamic async dns resolver.
+	 */
+	if (url_code == 0x3a2f2f) {
+		while (cp < curr - 3)
+			http_code = (http_code << 8) + *cp++;
+		http_code |= 0x20202020;			/* Turn everything to lower case */
+		
+		/* HTTP url matching */
+		if (http_code == 0x68747470) {
+			/* We are looking for IP address. If you want to parse and
+			 * resolve hostname found in url, you can use str2sa(), but
+			 * be warned this can slow down global daemon performances
+			 * while handling lagging dns responses.
+			 */
+			ret = url2ip(curr, &addr->sin_addr);
+			if (!ret)
+				return -1;
+			curr += ret;
+			addr->sin_port = (*curr == ':') ? htons(str2uic(++curr)) : htons(80);
+		}
+		return 0;
+	}
+
+	return -1;
+}
+
 /* will try to encode the string <string> replacing all characters tagged in
  * <map> with the hexadecimal representation of their ASCII-code (2 digits)
  * prefixed by <escape>, and will store the result between <start> (included)