MEDIUM: regex: pcre2 support

this adds a support of the newest pcre2 library,
more secure than its older sibling in a cost of a
more complex API.
It works pretty similarly to pcre's part to keep
the overall change smooth,  except :

- we define the string class supported at compile time.
- after matching the ovec data is properly sized, althought
we do not take advantage of it here.
- the lack of jit support is treated less 'dramatically'
as pcre2_jit_compile in this case is 'no-op'.
diff --git a/Makefile b/Makefile
index 9e39485..a87e1e2 100644
--- a/Makefile
+++ b/Makefile
@@ -14,11 +14,14 @@
 #   USE_NETFILTER        : enable netfilter on Linux. Automatic.
 #   USE_PCRE             : enable use of libpcre for regex. Recommended.
 #   USE_PCRE_JIT         : enable JIT for faster regex on libpcre >= 8.32
+#   USE_PCRE2            : enable use of libpcre2 for regex.
+#   USE_PCRE2_JIT        : enable JIT for faster regex on libpcre2
 #   USE_POLL             : enable poll(). Automatic.
 #   USE_PRIVATE_CACHE    : disable shared memory cache of ssl sessions.
 #   USE_PTHREAD_PSHARED  : enable pthread process shared mutex on sslcache.
 #   USE_REGPARM          : enable regparm optimization. Recommended on x86.
 #   USE_STATIC_PCRE      : enable static libpcre. Recommended.
+#   USE_STATIC_PCRE2     : enable static libpcre2.
 #   USE_TPROXY           : enable transparent proxy. Automatic.
 #   USE_LINUX_TPROXY     : enable full transparent proxy. Automatic.
 #   USE_LINUX_SPLICE     : enable kernel 2.6 splicing. Automatic.
@@ -671,6 +674,9 @@
 endif
 
 ifneq ($(USE_PCRE)$(USE_STATIC_PCRE)$(USE_PCRE_JIT),)
+ifneq ($(USE_PCRE2)$(USE_STATIC_PCRE2)$(USE_PCRE2_JIT),)
+$(error cannot compile both PCRE and PCRE2 support)
+endif
 # PCREDIR is used to automatically construct the PCRE_INC and PCRE_LIB paths,
 # by appending /include and /lib respectively. If your system does not use the
 # same sub-directories, simply force these variables instead of PCREDIR. It is
@@ -702,6 +708,54 @@
 endif
 endif
 
+ifneq ($(USE_PCRE2)$(USE_STATIC_PCRE2)$(USE_PCRE2_JIT),)
+PCRE2DIR	:= $(shell pcre2-config --prefix 2>/dev/null || echo /usr/local)
+ifneq ($(PCRE2DIR),)
+PCRE2_INC       := $(PCRE2DIR)/include
+PCRE2_LIB       := $(PCRE2DIR)/lib
+
+ifeq ($(PCRE2_WIDTH),)
+PCRE2_WIDTH	= 8
+endif
+
+ifneq ($(PCRE2_WIDTH),8)
+ifneq ($(PCRE2_WIDTH),16)
+ifneq ($(PCRE2_WIDTH),32)
+$(error PCRE2_WIDTH needs to be set to either 8,16 or 32)
+endif
+endif
+endif
+
+
+PCRE2_LDFLAGS	:= $(shell pcre2-config --libs$(PCRE2_WIDTH) 2>/dev/null || echo -L/usr/local/lib -lpcre2-$(PCRE2_WIDTH))
+
+ifeq ($(PCRE2_LDFLAGS),)
+$(error libpcre2-$(PCRE2_WIDTH) not found)
+else
+ifeq ($(PCRE2_WIDTH),8)
+PCRE2_LDFLAGS	+= -lpcre2-posix
+endif
+endif
+
+OPTIONS_CFLAGS	+= -DUSE_PCRE2 -DPCRE2_CODE_UNIT_WIDTH=$(PCRE2_WIDTH)
+OPTIONS_CFLAGS  += $(if $(PCRE2_INC), -I$(PCRE2_INC))
+
+ifneq ($(USE_STATIC_PCRE2),)
+OPTIONS_LDFLAGS += $(if $(PCRE2_LIB),-L$(PCRE2_LIB)) -Wl,-Bstatic -L$(PCRE2_LIB) $(PCRE2_LDFLAGS) -Wl,-Bdynamic
+BUILD_OPTIONS   += $(call ignore_implicit,USE_STATIC_PCRE2)
+else
+OPTIONS_LDFLAGS += $(if $(PCRE2_LIB),-L$(PCRE2_LIB)) -L$(PCRE2_LIB) $(PCRE2_LDFLAGS)
+BUILD_OPTIONS   += $(call ignore_implicit,USE_PCRE2)
+endif
+
+ifneq ($(USE_PCRE2_JIT),)
+OPTIONS_CFLAGS  += -DUSE_PCRE2_JIT
+BUILD_OPTIONS   += $(call ignore_implicit,USE_PCRE2_JIT)
+endif
+
+endif
+endif
+
 # TCP Fast Open
 ifneq ($(USE_TFO),)
 OPTIONS_CFLAGS  += -DUSE_TFO
diff --git a/include/common/regex.h b/include/common/regex.h
index 8a1703f..2f171b3 100644
--- a/include/common/regex.h
+++ b/include/common/regex.h
@@ -36,7 +36,11 @@
 #define PCRE_STUDY_JIT_COMPILE 0
 #endif
 
-#else /* no PCRE */
+#elif USE_PCRE2
+#include <pcre2.h>
+#include <pcre2posix.h>
+
+#else /* no PCRE, nor PCRE2 */
 #include <regex.h>
 #endif
 
@@ -49,6 +53,8 @@
 #error "The PCRE lib doesn't support JIT. Change your lib, or remove the option USE_PCRE_JIT."
 #endif
 #endif
+#elif USE_PCRE2
+	pcre2_code *reg;
 #else /* no PCRE */
 	regex_t regex;
 #endif
@@ -95,6 +101,17 @@
 	if (pcre_exec(preg->reg, preg->extra, subject, strlen(subject), 0, 0, NULL, 0) < 0)
 		return 0;
 	return 1;
+#elif defined(USE_PCRE2)
+	pcre2_match_data *pm;
+	int ret;
+
+	pm = pcre2_match_data_create_from_pattern(preg->reg, NULL);
+	ret = pcre2_match(preg->reg, (PCRE2_SPTR)subject, (PCRE2_SIZE)strlen(subject),
+		0, 0, pm, NULL);
+	pcre2_match_data_free(pm);
+	if (ret < 0)
+		return 0;
+	return 1;
 #else
 	int match;
 	match = regexec(&preg->regex, subject, 0, NULL, 0);
@@ -115,6 +132,17 @@
 	if (pcre_exec(preg->reg, preg->extra, subject, length, 0, 0, NULL, 0) < 0)
 		return 0;
 	return 1;
+#elif defined(USE_PCRE2)
+	pcre2_match_data *pm;
+	int ret;
+
+	pm = pcre2_match_data_create_from_pattern(preg->reg, NULL);
+	ret = pcre2_match(preg->reg, (PCRE2_SPTR)subject, (PCRE2_SIZE)length,
+		0, 0, pm, NULL);
+	pcre2_match_data_free(pm);
+	if (ret < 0)
+		return 0;
+	return 1;
 #else
 	int match;
 	char old_char = subject[length];
@@ -143,6 +171,8 @@
 #else /* PCRE_CONFIG_JIT */
 	pcre_free(preg->extra);
 #endif /* PCRE_CONFIG_JIT */
+#elif defined(USE_PCRE2) || defined(USE_PCRE2_JIT)
+	pcre2_code_free(preg->reg);
 #else
 	regfree(&preg->regex);
 #endif
diff --git a/src/regex.c b/src/regex.c
index dd77194..38d7132 100644
--- a/src/regex.c
+++ b/src/regex.c
@@ -158,9 +158,14 @@
  */
 int regex_exec_match(const struct my_regex *preg, const char *subject,
                      size_t nmatch, regmatch_t pmatch[], int flags) {
-#if defined(USE_PCRE) || defined(USE_PCRE_JIT)
+#if defined(USE_PCRE) || defined(USE_PCRE_JIT) || defined(USE_PCRE2) || defined(USE_PCRE2_JIT)
 	int ret;
+#ifdef USE_PCRE2
+	PCRE2_SIZE *matches;
+	pcre2_match_data *pm;
+#else
 	int matches[MAX_MATCH * 3];
+#endif
 	int enmatch;
 	int i;
 	int options;
@@ -169,15 +174,20 @@
 	 * match i the maximum value for match, in fact this
 	 * limit is not applyied.
 	 */
+
 	enmatch = nmatch;
 	if (enmatch > MAX_MATCH)
 		enmatch = MAX_MATCH;
 
 	options = 0;
 	if (flags & REG_NOTBOL)
+#ifdef USE_PCRE2
+		options |= PCRE2_NOTBOL;
+#else
 		options |= PCRE_NOTBOL;
+#endif
 
-	/* The value returned by pcre_exec() is one more than the highest numbered
+	/* The value returned by pcre_exec()/pcre2_match() is one more than the highest numbered
 	 * pair that has been set. For example, if two substrings have been captured,
 	 * the returned value is 3. If there are no capturing subpatterns, the return
 	 * value from a successful match is 1, indicating that just the first pair of
@@ -186,9 +196,22 @@
 	 * It seems that this function returns 0 if it detect more matches than avalaible
 	 * space in the matches array.
 	 */
+#ifdef USE_PCRE2
+	pm = pcre2_match_data_create_from_pattern(preg->reg, NULL);
+	ret = pcre2_match(preg->reg, (PCRE2_SPTR)subject, (PCRE2_SIZE)strlen(subject), 0, options, pm, NULL);
+
+	if (ret < 0) {
+		pcre2_match_data_free(pm);
+		return 0;
+	}
+
+	matches = pcre2_get_ovector_pointer(pm);
+#else
 	ret = pcre_exec(preg->reg, preg->extra, subject, strlen(subject), 0, options, matches, enmatch * 3);
+
 	if (ret < 0)
 		return 0;
+#endif
 
 	if (ret == 0)
 		ret = enmatch;
@@ -204,6 +227,9 @@
 		pmatch[i].rm_so = -1;
 		pmatch[i].rm_eo = -1;
 	}
+#ifdef USE_PCRE2
+	pcre2_match_data_free(pm);
+#endif
 	return 1;
 #else
 	int match;
@@ -226,9 +252,14 @@
  */
 int regex_exec_match2(const struct my_regex *preg, char *subject, int length,
                       size_t nmatch, regmatch_t pmatch[], int flags) {
-#if defined(USE_PCRE) || defined(USE_PCRE_JIT)
+#if defined(USE_PCRE) || defined(USE_PCRE_JIT) || defined(USE_PCRE2) || defined(USE_PCRE2_JIT)
 	int ret;
+#ifdef USE_PCRE2
+	PCRE2_SIZE *matches;
+	pcre2_match_data *pm;
+#else
 	int matches[MAX_MATCH * 3];
+#endif
 	int enmatch;
 	int i;
 	int options;
@@ -243,9 +274,13 @@
 
 	options = 0;
 	if (flags & REG_NOTBOL)
+#ifdef USE_PCRE2
+		options |= PCRE2_NOTBOL;
+#else
 		options |= PCRE_NOTBOL;
+#endif
 
-	/* The value returned by pcre_exec() is one more than the highest numbered
+	/* The value returned by pcre_exec()/pcre2_match() is one more than the highest numbered
 	 * pair that has been set. For example, if two substrings have been captured,
 	 * the returned value is 3. If there are no capturing subpatterns, the return
 	 * value from a successful match is 1, indicating that just the first pair of
@@ -254,9 +289,21 @@
 	 * It seems that this function returns 0 if it detect more matches than avalaible
 	 * space in the matches array.
 	 */
+#ifdef USE_PCRE2
+	pm = pcre2_match_data_create_from_pattern(preg->reg, NULL);
+	ret = pcre2_match(preg->reg, (PCRE2_SPTR)subject, (PCRE2_SIZE)length, 0, options, pm, NULL);
+
+	if (ret < 0) {
+		pcre2_match_data_free(pm);
+		return 0;
+	}
+
+	matches = pcre2_get_ovector_pointer(pm);
+#else
 	ret = pcre_exec(preg->reg, preg->extra, subject, length, 0, options, matches, enmatch * 3);
 	if (ret < 0)
 		return 0;
+#endif
 
 	if (ret == 0)
 		ret = enmatch;
@@ -272,6 +319,9 @@
 		pmatch[i].rm_so = -1;
 		pmatch[i].rm_eo = -1;
 	}
+#ifdef USE_PCRE2
+	pcre2_match_data_free(pm);
+#endif
 	return 1;
 #else
 	char old_char = subject[length];
@@ -311,6 +361,40 @@
 		memprintf(err, "failed to compile regex '%s' (error=%s)", str, error);
 		return 0;
 	}
+#elif defined(USE_PCRE2) || defined(USE_PCRE2_JIT)
+	int flags = 0;
+	int errn;
+#if defined(USE_PCRE2_JIT)
+	int jit;
+#endif
+	PCRE2_UCHAR error[256];
+	PCRE2_SIZE erroffset;
+
+	if (!cs)
+		flags |= PCRE2_CASELESS;
+	if (!cap)
+		flags |= PCRE2_NO_AUTO_CAPTURE;
+
+	regex->reg = pcre2_compile((PCRE2_SPTR)str, PCRE2_ZERO_TERMINATED, flags, &errn, &erroffset, NULL);
+	if (!regex->reg) {
+		pcre2_get_error_message(errn, error, sizeof(error));
+		memprintf(err, "regex '%s' is invalid (error=%s, erroffset=%zu)", str, error, erroffset);
+		return 0;
+	}
+
+#if defined(USE_PCRE2_JIT)
+	jit = pcre2_jit_compile(regex->reg, PCRE2_JIT_COMPLETE);
+	/*
+	 * We end if it is an error not related to lack of JIT support
+	 * in a case of JIT support missing pcre2_jit_compile is "no-op"
+	 */
+	if (jit < 0 && jit != PCRE2_ERROR_JIT_BADOPTION) {
+		pcre2_code_free(regex->reg);
+		memprintf(err, "regex '%s' jit compilation failed", str);
+		return 0;
+	}
+#endif
+
 #else
 	int flags = REG_EXTENDED;
 
@@ -349,8 +433,27 @@
 		  "no (USE_PCRE_JIT not set)"
 #endif
 		  );
+#endif /* USE_PCRE */
+
+#ifdef USE_PCRE2
+	memprintf(&ptr, "Built with PCRE2 version : %s", (HAP_XSTRING(Z PCRE2_PRERELEASE)[1] == 0) ?
+	          HAP_XSTRING(PCRE2_MAJOR.PCRE2_MINOR PCRE2_DATE) :
+	          HAP_XSTRING(PCRE2_MAJOR.PCRE2_MINOR) HAP_XSTRING(PCRE2_PRERELEASE PCRE2_DATE));
+	memprintf(&ptr, "%s\nPCRE2 library supports JIT : %s", ptr,
+#ifdef USE_PCRE2_JIT
+		  ({
+			  int r;
+			  pcre2_config(PCRE2_CONFIG_JIT, &r);
+			  r ? "yes" : "no (libpcre2 build without JIT?)";
+		  })
 #else
-	memprintf(&ptr, "Built without PCRE support (using libc's regex instead)");
+		  "no (USE_PCRE2_JIT not set)"
+#endif
+		  );
+#endif /* USE_PCRE2 */
+
+#if !defined(USE_PCRE) && !defined(USE_PCRE2)
+	memprintf(&ptr, "Built without PCRE or PCRE2 support (using libc's regex instead)");
 #endif
 	hap_register_build_opts(ptr, 1);
 }