MEDIUM: regex: pcre2 support
this adds a support of the newest pcre2 library,
more secure than its older sibling in a cost of a
more complex API.
It works pretty similarly to pcre's part to keep
the overall change smooth, except :
- we define the string class supported at compile time.
- after matching the ovec data is properly sized, althought
we do not take advantage of it here.
- the lack of jit support is treated less 'dramatically'
as pcre2_jit_compile in this case is 'no-op'.
diff --git a/Makefile b/Makefile
index 9e39485..a87e1e2 100644
--- a/Makefile
+++ b/Makefile
@@ -14,11 +14,14 @@
# USE_NETFILTER : enable netfilter on Linux. Automatic.
# USE_PCRE : enable use of libpcre for regex. Recommended.
# USE_PCRE_JIT : enable JIT for faster regex on libpcre >= 8.32
+# USE_PCRE2 : enable use of libpcre2 for regex.
+# USE_PCRE2_JIT : enable JIT for faster regex on libpcre2
# USE_POLL : enable poll(). Automatic.
# USE_PRIVATE_CACHE : disable shared memory cache of ssl sessions.
# USE_PTHREAD_PSHARED : enable pthread process shared mutex on sslcache.
# USE_REGPARM : enable regparm optimization. Recommended on x86.
# USE_STATIC_PCRE : enable static libpcre. Recommended.
+# USE_STATIC_PCRE2 : enable static libpcre2.
# USE_TPROXY : enable transparent proxy. Automatic.
# USE_LINUX_TPROXY : enable full transparent proxy. Automatic.
# USE_LINUX_SPLICE : enable kernel 2.6 splicing. Automatic.
@@ -671,6 +674,9 @@
endif
ifneq ($(USE_PCRE)$(USE_STATIC_PCRE)$(USE_PCRE_JIT),)
+ifneq ($(USE_PCRE2)$(USE_STATIC_PCRE2)$(USE_PCRE2_JIT),)
+$(error cannot compile both PCRE and PCRE2 support)
+endif
# PCREDIR is used to automatically construct the PCRE_INC and PCRE_LIB paths,
# by appending /include and /lib respectively. If your system does not use the
# same sub-directories, simply force these variables instead of PCREDIR. It is
@@ -702,6 +708,54 @@
endif
endif
+ifneq ($(USE_PCRE2)$(USE_STATIC_PCRE2)$(USE_PCRE2_JIT),)
+PCRE2DIR := $(shell pcre2-config --prefix 2>/dev/null || echo /usr/local)
+ifneq ($(PCRE2DIR),)
+PCRE2_INC := $(PCRE2DIR)/include
+PCRE2_LIB := $(PCRE2DIR)/lib
+
+ifeq ($(PCRE2_WIDTH),)
+PCRE2_WIDTH = 8
+endif
+
+ifneq ($(PCRE2_WIDTH),8)
+ifneq ($(PCRE2_WIDTH),16)
+ifneq ($(PCRE2_WIDTH),32)
+$(error PCRE2_WIDTH needs to be set to either 8,16 or 32)
+endif
+endif
+endif
+
+
+PCRE2_LDFLAGS := $(shell pcre2-config --libs$(PCRE2_WIDTH) 2>/dev/null || echo -L/usr/local/lib -lpcre2-$(PCRE2_WIDTH))
+
+ifeq ($(PCRE2_LDFLAGS),)
+$(error libpcre2-$(PCRE2_WIDTH) not found)
+else
+ifeq ($(PCRE2_WIDTH),8)
+PCRE2_LDFLAGS += -lpcre2-posix
+endif
+endif
+
+OPTIONS_CFLAGS += -DUSE_PCRE2 -DPCRE2_CODE_UNIT_WIDTH=$(PCRE2_WIDTH)
+OPTIONS_CFLAGS += $(if $(PCRE2_INC), -I$(PCRE2_INC))
+
+ifneq ($(USE_STATIC_PCRE2),)
+OPTIONS_LDFLAGS += $(if $(PCRE2_LIB),-L$(PCRE2_LIB)) -Wl,-Bstatic -L$(PCRE2_LIB) $(PCRE2_LDFLAGS) -Wl,-Bdynamic
+BUILD_OPTIONS += $(call ignore_implicit,USE_STATIC_PCRE2)
+else
+OPTIONS_LDFLAGS += $(if $(PCRE2_LIB),-L$(PCRE2_LIB)) -L$(PCRE2_LIB) $(PCRE2_LDFLAGS)
+BUILD_OPTIONS += $(call ignore_implicit,USE_PCRE2)
+endif
+
+ifneq ($(USE_PCRE2_JIT),)
+OPTIONS_CFLAGS += -DUSE_PCRE2_JIT
+BUILD_OPTIONS += $(call ignore_implicit,USE_PCRE2_JIT)
+endif
+
+endif
+endif
+
# TCP Fast Open
ifneq ($(USE_TFO),)
OPTIONS_CFLAGS += -DUSE_TFO
diff --git a/include/common/regex.h b/include/common/regex.h
index 8a1703f..2f171b3 100644
--- a/include/common/regex.h
+++ b/include/common/regex.h
@@ -36,7 +36,11 @@
#define PCRE_STUDY_JIT_COMPILE 0
#endif
-#else /* no PCRE */
+#elif USE_PCRE2
+#include <pcre2.h>
+#include <pcre2posix.h>
+
+#else /* no PCRE, nor PCRE2 */
#include <regex.h>
#endif
@@ -49,6 +53,8 @@
#error "The PCRE lib doesn't support JIT. Change your lib, or remove the option USE_PCRE_JIT."
#endif
#endif
+#elif USE_PCRE2
+ pcre2_code *reg;
#else /* no PCRE */
regex_t regex;
#endif
@@ -95,6 +101,17 @@
if (pcre_exec(preg->reg, preg->extra, subject, strlen(subject), 0, 0, NULL, 0) < 0)
return 0;
return 1;
+#elif defined(USE_PCRE2)
+ pcre2_match_data *pm;
+ int ret;
+
+ pm = pcre2_match_data_create_from_pattern(preg->reg, NULL);
+ ret = pcre2_match(preg->reg, (PCRE2_SPTR)subject, (PCRE2_SIZE)strlen(subject),
+ 0, 0, pm, NULL);
+ pcre2_match_data_free(pm);
+ if (ret < 0)
+ return 0;
+ return 1;
#else
int match;
match = regexec(&preg->regex, subject, 0, NULL, 0);
@@ -115,6 +132,17 @@
if (pcre_exec(preg->reg, preg->extra, subject, length, 0, 0, NULL, 0) < 0)
return 0;
return 1;
+#elif defined(USE_PCRE2)
+ pcre2_match_data *pm;
+ int ret;
+
+ pm = pcre2_match_data_create_from_pattern(preg->reg, NULL);
+ ret = pcre2_match(preg->reg, (PCRE2_SPTR)subject, (PCRE2_SIZE)length,
+ 0, 0, pm, NULL);
+ pcre2_match_data_free(pm);
+ if (ret < 0)
+ return 0;
+ return 1;
#else
int match;
char old_char = subject[length];
@@ -143,6 +171,8 @@
#else /* PCRE_CONFIG_JIT */
pcre_free(preg->extra);
#endif /* PCRE_CONFIG_JIT */
+#elif defined(USE_PCRE2) || defined(USE_PCRE2_JIT)
+ pcre2_code_free(preg->reg);
#else
regfree(&preg->regex);
#endif
diff --git a/src/regex.c b/src/regex.c
index dd77194..38d7132 100644
--- a/src/regex.c
+++ b/src/regex.c
@@ -158,9 +158,14 @@
*/
int regex_exec_match(const struct my_regex *preg, const char *subject,
size_t nmatch, regmatch_t pmatch[], int flags) {
-#if defined(USE_PCRE) || defined(USE_PCRE_JIT)
+#if defined(USE_PCRE) || defined(USE_PCRE_JIT) || defined(USE_PCRE2) || defined(USE_PCRE2_JIT)
int ret;
+#ifdef USE_PCRE2
+ PCRE2_SIZE *matches;
+ pcre2_match_data *pm;
+#else
int matches[MAX_MATCH * 3];
+#endif
int enmatch;
int i;
int options;
@@ -169,15 +174,20 @@
* match i the maximum value for match, in fact this
* limit is not applyied.
*/
+
enmatch = nmatch;
if (enmatch > MAX_MATCH)
enmatch = MAX_MATCH;
options = 0;
if (flags & REG_NOTBOL)
+#ifdef USE_PCRE2
+ options |= PCRE2_NOTBOL;
+#else
options |= PCRE_NOTBOL;
+#endif
- /* The value returned by pcre_exec() is one more than the highest numbered
+ /* The value returned by pcre_exec()/pcre2_match() is one more than the highest numbered
* pair that has been set. For example, if two substrings have been captured,
* the returned value is 3. If there are no capturing subpatterns, the return
* value from a successful match is 1, indicating that just the first pair of
@@ -186,9 +196,22 @@
* It seems that this function returns 0 if it detect more matches than avalaible
* space in the matches array.
*/
+#ifdef USE_PCRE2
+ pm = pcre2_match_data_create_from_pattern(preg->reg, NULL);
+ ret = pcre2_match(preg->reg, (PCRE2_SPTR)subject, (PCRE2_SIZE)strlen(subject), 0, options, pm, NULL);
+
+ if (ret < 0) {
+ pcre2_match_data_free(pm);
+ return 0;
+ }
+
+ matches = pcre2_get_ovector_pointer(pm);
+#else
ret = pcre_exec(preg->reg, preg->extra, subject, strlen(subject), 0, options, matches, enmatch * 3);
+
if (ret < 0)
return 0;
+#endif
if (ret == 0)
ret = enmatch;
@@ -204,6 +227,9 @@
pmatch[i].rm_so = -1;
pmatch[i].rm_eo = -1;
}
+#ifdef USE_PCRE2
+ pcre2_match_data_free(pm);
+#endif
return 1;
#else
int match;
@@ -226,9 +252,14 @@
*/
int regex_exec_match2(const struct my_regex *preg, char *subject, int length,
size_t nmatch, regmatch_t pmatch[], int flags) {
-#if defined(USE_PCRE) || defined(USE_PCRE_JIT)
+#if defined(USE_PCRE) || defined(USE_PCRE_JIT) || defined(USE_PCRE2) || defined(USE_PCRE2_JIT)
int ret;
+#ifdef USE_PCRE2
+ PCRE2_SIZE *matches;
+ pcre2_match_data *pm;
+#else
int matches[MAX_MATCH * 3];
+#endif
int enmatch;
int i;
int options;
@@ -243,9 +274,13 @@
options = 0;
if (flags & REG_NOTBOL)
+#ifdef USE_PCRE2
+ options |= PCRE2_NOTBOL;
+#else
options |= PCRE_NOTBOL;
+#endif
- /* The value returned by pcre_exec() is one more than the highest numbered
+ /* The value returned by pcre_exec()/pcre2_match() is one more than the highest numbered
* pair that has been set. For example, if two substrings have been captured,
* the returned value is 3. If there are no capturing subpatterns, the return
* value from a successful match is 1, indicating that just the first pair of
@@ -254,9 +289,21 @@
* It seems that this function returns 0 if it detect more matches than avalaible
* space in the matches array.
*/
+#ifdef USE_PCRE2
+ pm = pcre2_match_data_create_from_pattern(preg->reg, NULL);
+ ret = pcre2_match(preg->reg, (PCRE2_SPTR)subject, (PCRE2_SIZE)length, 0, options, pm, NULL);
+
+ if (ret < 0) {
+ pcre2_match_data_free(pm);
+ return 0;
+ }
+
+ matches = pcre2_get_ovector_pointer(pm);
+#else
ret = pcre_exec(preg->reg, preg->extra, subject, length, 0, options, matches, enmatch * 3);
if (ret < 0)
return 0;
+#endif
if (ret == 0)
ret = enmatch;
@@ -272,6 +319,9 @@
pmatch[i].rm_so = -1;
pmatch[i].rm_eo = -1;
}
+#ifdef USE_PCRE2
+ pcre2_match_data_free(pm);
+#endif
return 1;
#else
char old_char = subject[length];
@@ -311,6 +361,40 @@
memprintf(err, "failed to compile regex '%s' (error=%s)", str, error);
return 0;
}
+#elif defined(USE_PCRE2) || defined(USE_PCRE2_JIT)
+ int flags = 0;
+ int errn;
+#if defined(USE_PCRE2_JIT)
+ int jit;
+#endif
+ PCRE2_UCHAR error[256];
+ PCRE2_SIZE erroffset;
+
+ if (!cs)
+ flags |= PCRE2_CASELESS;
+ if (!cap)
+ flags |= PCRE2_NO_AUTO_CAPTURE;
+
+ regex->reg = pcre2_compile((PCRE2_SPTR)str, PCRE2_ZERO_TERMINATED, flags, &errn, &erroffset, NULL);
+ if (!regex->reg) {
+ pcre2_get_error_message(errn, error, sizeof(error));
+ memprintf(err, "regex '%s' is invalid (error=%s, erroffset=%zu)", str, error, erroffset);
+ return 0;
+ }
+
+#if defined(USE_PCRE2_JIT)
+ jit = pcre2_jit_compile(regex->reg, PCRE2_JIT_COMPLETE);
+ /*
+ * We end if it is an error not related to lack of JIT support
+ * in a case of JIT support missing pcre2_jit_compile is "no-op"
+ */
+ if (jit < 0 && jit != PCRE2_ERROR_JIT_BADOPTION) {
+ pcre2_code_free(regex->reg);
+ memprintf(err, "regex '%s' jit compilation failed", str);
+ return 0;
+ }
+#endif
+
#else
int flags = REG_EXTENDED;
@@ -349,8 +433,27 @@
"no (USE_PCRE_JIT not set)"
#endif
);
+#endif /* USE_PCRE */
+
+#ifdef USE_PCRE2
+ memprintf(&ptr, "Built with PCRE2 version : %s", (HAP_XSTRING(Z PCRE2_PRERELEASE)[1] == 0) ?
+ HAP_XSTRING(PCRE2_MAJOR.PCRE2_MINOR PCRE2_DATE) :
+ HAP_XSTRING(PCRE2_MAJOR.PCRE2_MINOR) HAP_XSTRING(PCRE2_PRERELEASE PCRE2_DATE));
+ memprintf(&ptr, "%s\nPCRE2 library supports JIT : %s", ptr,
+#ifdef USE_PCRE2_JIT
+ ({
+ int r;
+ pcre2_config(PCRE2_CONFIG_JIT, &r);
+ r ? "yes" : "no (libpcre2 build without JIT?)";
+ })
#else
- memprintf(&ptr, "Built without PCRE support (using libc's regex instead)");
+ "no (USE_PCRE2_JIT not set)"
+#endif
+ );
+#endif /* USE_PCRE2 */
+
+#if !defined(USE_PCRE) && !defined(USE_PCRE2)
+ memprintf(&ptr, "Built without PCRE or PCRE2 support (using libc's regex instead)");
#endif
hap_register_build_opts(ptr, 1);
}