blob: f3f74c3dce485dee83f1c42598f86bcd947291f0 [file] [log] [blame]
Willy Tarreaubaaee002006-06-26 02:48:02 +02001/*
2 * Regex and string management functions.
3 *
Willy Tarreauf4f04122010-01-28 18:10:50 +01004 * Copyright 2000-2010 Willy Tarreau <w@1wt.eu>
Willy Tarreaubaaee002006-06-26 02:48:02 +02005 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 *
11 */
12
13#include <ctype.h>
14#include <stdlib.h>
15#include <string.h>
16
Willy Tarreau7a9ac6d2016-12-21 19:13:14 +010017#include <types/global.h>
Willy Tarreaue3ba5f02006-06-29 18:54:54 +020018#include <common/config.h>
Thierry FOURNIERb8f980c2014-06-11 13:59:05 +020019#include <common/defaults.h>
Willy Tarreau2dd0d472006-06-29 17:53:05 +020020#include <common/regex.h>
21#include <common/standard.h>
Willy Tarreaubaaee002006-06-26 02:48:02 +020022#include <proto/log.h>
23
24/* regex trash buffer used by various regex tests */
Emeric Brun272e2522017-06-15 11:53:49 +020025THREAD_LOCAL regmatch_t pmatch[MAX_MATCH]; /* rm_so, rm_eo for regular expressions */
Willy Tarreaubaaee002006-06-26 02:48:02 +020026
Willy Tarreauc8746532014-05-28 23:05:07 +020027int exp_replace(char *dst, unsigned int dst_size, char *src, const char *str, const regmatch_t *matches)
Willy Tarreaubaaee002006-06-26 02:48:02 +020028{
29 char *old_dst = dst;
Sasha Pachevc6002042014-05-26 12:33:48 -060030 char* dst_end = dst + dst_size;
Willy Tarreaubaaee002006-06-26 02:48:02 +020031
32 while (*str) {
33 if (*str == '\\') {
34 str++;
Sasha Pachevc6002042014-05-26 12:33:48 -060035 if (!*str)
36 return -1;
37
Willy Tarreau8f8e6452007-06-17 21:51:38 +020038 if (isdigit((unsigned char)*str)) {
Willy Tarreaubaaee002006-06-26 02:48:02 +020039 int len, num;
40
41 num = *str - '0';
42 str++;
43
44 if (matches[num].rm_eo > -1 && matches[num].rm_so > -1) {
45 len = matches[num].rm_eo - matches[num].rm_so;
Sasha Pachevc6002042014-05-26 12:33:48 -060046
47 if (dst + len >= dst_end)
48 return -1;
49
Willy Tarreaubaaee002006-06-26 02:48:02 +020050 memcpy(dst, src + matches[num].rm_so, len);
51 dst += len;
52 }
53
54 } else if (*str == 'x') {
55 unsigned char hex1, hex2;
56 str++;
57
Sasha Pachevc6002042014-05-26 12:33:48 -060058 if (!*str)
59 return -1;
60
Willy Tarreaubaaee002006-06-26 02:48:02 +020061 hex1 = toupper(*str++) - '0';
Sasha Pachevc6002042014-05-26 12:33:48 -060062
63 if (!*str)
64 return -1;
65
Willy Tarreaubaaee002006-06-26 02:48:02 +020066 hex2 = toupper(*str++) - '0';
67
68 if (hex1 > 9) hex1 -= 'A' - '9' - 1;
69 if (hex2 > 9) hex2 -= 'A' - '9' - 1;
Sasha Pachevc6002042014-05-26 12:33:48 -060070
71 if (dst >= dst_end)
72 return -1;
73
Willy Tarreaubaaee002006-06-26 02:48:02 +020074 *dst++ = (hex1<<4) + hex2;
75 } else {
Sasha Pachevc6002042014-05-26 12:33:48 -060076 if (dst >= dst_end)
77 return -1;
78
Willy Tarreaubaaee002006-06-26 02:48:02 +020079 *dst++ = *str++;
80 }
81 } else {
Sasha Pachevc6002042014-05-26 12:33:48 -060082 if (dst >= dst_end)
83 return -1;
84
Willy Tarreaubaaee002006-06-26 02:48:02 +020085 *dst++ = *str++;
86 }
87 }
Sasha Pachevc6002042014-05-26 12:33:48 -060088 if (dst >= dst_end)
89 return -1;
90
Willy Tarreaubaaee002006-06-26 02:48:02 +020091 *dst = '\0';
92 return dst - old_dst;
93}
94
95/* returns NULL if the replacement string <str> is valid, or the pointer to the first error */
Willy Tarreaub17916e2006-10-15 15:17:57 +020096const char *check_replace_string(const char *str)
Willy Tarreaubaaee002006-06-26 02:48:02 +020097{
Willy Tarreaub17916e2006-10-15 15:17:57 +020098 const char *err = NULL;
Willy Tarreaubaaee002006-06-26 02:48:02 +020099 while (*str) {
100 if (*str == '\\') {
101 err = str; /* in case of a backslash, we return the pointer to it */
102 str++;
103 if (!*str)
104 return err;
Willy Tarreau8f8e6452007-06-17 21:51:38 +0200105 else if (isdigit((unsigned char)*str))
Willy Tarreaubaaee002006-06-26 02:48:02 +0200106 err = NULL;
107 else if (*str == 'x') {
108 str++;
109 if (!ishex(*str))
110 return err;
111 str++;
112 if (!ishex(*str))
113 return err;
114 err = NULL;
115 }
116 else {
Christopher Faulet767a84b2017-11-24 16:50:31 +0100117 ha_warning("'\\%c' : deprecated use of a backslash before something not '\\','x' or a digit.\n", *str);
Willy Tarreaubaaee002006-06-26 02:48:02 +0200118 err = NULL;
119 }
120 }
121 str++;
122 }
123 return err;
124}
125
126
127/* returns the pointer to an error in the replacement string, or NULL if OK */
Thierry FOURNIER09af0d62014-06-18 11:35:54 +0200128const char *chain_regex(struct hdr_exp **head, struct my_regex *preg,
Willy Tarreauf4f04122010-01-28 18:10:50 +0100129 int action, const char *replace, void *cond)
Willy Tarreaubaaee002006-06-26 02:48:02 +0200130{
131 struct hdr_exp *exp;
132
133 if (replace != NULL) {
Willy Tarreaub17916e2006-10-15 15:17:57 +0200134 const char *err;
Willy Tarreaubaaee002006-06-26 02:48:02 +0200135 err = check_replace_string(replace);
136 if (err)
137 return err;
138 }
139
140 while (*head != NULL)
141 head = &(*head)->next;
142
Vincent Bernat02779b62016-04-03 13:48:43 +0200143 exp = calloc(1, sizeof(*exp));
Willy Tarreaubaaee002006-06-26 02:48:02 +0200144
145 exp->preg = preg;
146 exp->replace = replace;
147 exp->action = action;
Willy Tarreauf4f04122010-01-28 18:10:50 +0100148 exp->cond = cond;
Willy Tarreaubaaee002006-06-26 02:48:02 +0200149 *head = exp;
150
151 return NULL;
152}
153
Thierry FOURNIERb8f980c2014-06-11 13:59:05 +0200154/* This function apply regex. It take const null terminated char as input.
155 * If the function doesn't match, it returns false, else it returns true.
156 * When it is compiled with JIT, this function execute strlen on the subject.
Willy Tarreau15a53a42015-01-21 13:39:42 +0100157 * Currently the only supported flag is REG_NOTBOL.
Thierry FOURNIERb8f980c2014-06-11 13:59:05 +0200158 */
159int regex_exec_match(const struct my_regex *preg, const char *subject,
Willy Tarreau15a53a42015-01-21 13:39:42 +0100160 size_t nmatch, regmatch_t pmatch[], int flags) {
David Carlierf2592b22016-11-21 21:25:58 +0000161#if defined(USE_PCRE) || defined(USE_PCRE_JIT) || defined(USE_PCRE2) || defined(USE_PCRE2_JIT)
Thierry FOURNIERb8f980c2014-06-11 13:59:05 +0200162 int ret;
David Carlierf2592b22016-11-21 21:25:58 +0000163#ifdef USE_PCRE2
164 PCRE2_SIZE *matches;
165 pcre2_match_data *pm;
166#else
Thierry FOURNIERb8f980c2014-06-11 13:59:05 +0200167 int matches[MAX_MATCH * 3];
David Carlierf2592b22016-11-21 21:25:58 +0000168#endif
Thierry FOURNIERb8f980c2014-06-11 13:59:05 +0200169 int enmatch;
170 int i;
Willy Tarreau15a53a42015-01-21 13:39:42 +0100171 int options;
Thierry FOURNIERb8f980c2014-06-11 13:59:05 +0200172
173 /* Silently limit the number of allowed matches. max
174 * match i the maximum value for match, in fact this
175 * limit is not applyied.
176 */
David Carlierf2592b22016-11-21 21:25:58 +0000177
Thierry FOURNIERb8f980c2014-06-11 13:59:05 +0200178 enmatch = nmatch;
179 if (enmatch > MAX_MATCH)
180 enmatch = MAX_MATCH;
181
Willy Tarreau15a53a42015-01-21 13:39:42 +0100182 options = 0;
183 if (flags & REG_NOTBOL)
David Carlierf2592b22016-11-21 21:25:58 +0000184#ifdef USE_PCRE2
185 options |= PCRE2_NOTBOL;
186#else
Willy Tarreau15a53a42015-01-21 13:39:42 +0100187 options |= PCRE_NOTBOL;
David Carlierf2592b22016-11-21 21:25:58 +0000188#endif
Willy Tarreau15a53a42015-01-21 13:39:42 +0100189
David Carlierf2592b22016-11-21 21:25:58 +0000190 /* The value returned by pcre_exec()/pcre2_match() is one more than the highest numbered
Thierry FOURNIERb8f980c2014-06-11 13:59:05 +0200191 * pair that has been set. For example, if two substrings have been captured,
192 * the returned value is 3. If there are no capturing subpatterns, the return
193 * value from a successful match is 1, indicating that just the first pair of
194 * offsets has been set.
195 *
Joseph Herlanteda75482018-11-15 14:46:29 -0800196 * It seems that this function returns 0 if it detects more matches than available
Thierry FOURNIERb8f980c2014-06-11 13:59:05 +0200197 * space in the matches array.
198 */
David Carlierf2592b22016-11-21 21:25:58 +0000199#ifdef USE_PCRE2
200 pm = pcre2_match_data_create_from_pattern(preg->reg, NULL);
201 ret = pcre2_match(preg->reg, (PCRE2_SPTR)subject, (PCRE2_SIZE)strlen(subject), 0, options, pm, NULL);
202
203 if (ret < 0) {
204 pcre2_match_data_free(pm);
205 return 0;
206 }
207
208 matches = pcre2_get_ovector_pointer(pm);
209#else
Willy Tarreau15a53a42015-01-21 13:39:42 +0100210 ret = pcre_exec(preg->reg, preg->extra, subject, strlen(subject), 0, options, matches, enmatch * 3);
David Carlierf2592b22016-11-21 21:25:58 +0000211
Thierry FOURNIERb8f980c2014-06-11 13:59:05 +0200212 if (ret < 0)
213 return 0;
David Carlierf2592b22016-11-21 21:25:58 +0000214#endif
Thierry FOURNIERb8f980c2014-06-11 13:59:05 +0200215
216 if (ret == 0)
217 ret = enmatch;
218
219 for (i=0; i<nmatch; i++) {
220 /* Copy offset. */
221 if (i < ret) {
222 pmatch[i].rm_so = matches[(i*2)];
223 pmatch[i].rm_eo = matches[(i*2)+1];
224 continue;
225 }
226 /* Set the unmatvh flag (-1). */
227 pmatch[i].rm_so = -1;
228 pmatch[i].rm_eo = -1;
229 }
David Carlierf2592b22016-11-21 21:25:58 +0000230#ifdef USE_PCRE2
231 pcre2_match_data_free(pm);
232#endif
Thierry FOURNIERb8f980c2014-06-11 13:59:05 +0200233 return 1;
234#else
235 int match;
Willy Tarreau15a53a42015-01-21 13:39:42 +0100236
237 flags &= REG_NOTBOL;
238 match = regexec(&preg->regex, subject, nmatch, pmatch, flags);
Thierry FOURNIERb8f980c2014-06-11 13:59:05 +0200239 if (match == REG_NOMATCH)
240 return 0;
241 return 1;
242#endif
243}
244
245/* This function apply regex. It take a "char *" ans length as input. The
246 * <subject> can be modified during the processing. If the function doesn't
247 * match, it returns false, else it returns true.
248 * When it is compiled with standard POSIX regex or PCRE, this function add
249 * a temporary null chracters at the end of the <subject>. The <subject> must
Willy Tarreau15a53a42015-01-21 13:39:42 +0100250 * have a real length of <length> + 1. Currently the only supported flag is
251 * REG_NOTBOL.
Thierry FOURNIERb8f980c2014-06-11 13:59:05 +0200252 */
253int regex_exec_match2(const struct my_regex *preg, char *subject, int length,
Willy Tarreau15a53a42015-01-21 13:39:42 +0100254 size_t nmatch, regmatch_t pmatch[], int flags) {
David Carlierf2592b22016-11-21 21:25:58 +0000255#if defined(USE_PCRE) || defined(USE_PCRE_JIT) || defined(USE_PCRE2) || defined(USE_PCRE2_JIT)
Thierry FOURNIERb8f980c2014-06-11 13:59:05 +0200256 int ret;
David Carlierf2592b22016-11-21 21:25:58 +0000257#ifdef USE_PCRE2
258 PCRE2_SIZE *matches;
259 pcre2_match_data *pm;
260#else
Thierry FOURNIERb8f980c2014-06-11 13:59:05 +0200261 int matches[MAX_MATCH * 3];
David Carlierf2592b22016-11-21 21:25:58 +0000262#endif
Thierry FOURNIERb8f980c2014-06-11 13:59:05 +0200263 int enmatch;
264 int i;
Willy Tarreau15a53a42015-01-21 13:39:42 +0100265 int options;
Thierry FOURNIERb8f980c2014-06-11 13:59:05 +0200266
267 /* Silently limit the number of allowed matches. max
268 * match i the maximum value for match, in fact this
269 * limit is not applyied.
270 */
271 enmatch = nmatch;
272 if (enmatch > MAX_MATCH)
273 enmatch = MAX_MATCH;
274
Willy Tarreau15a53a42015-01-21 13:39:42 +0100275 options = 0;
276 if (flags & REG_NOTBOL)
David Carlierf2592b22016-11-21 21:25:58 +0000277#ifdef USE_PCRE2
278 options |= PCRE2_NOTBOL;
279#else
Willy Tarreau15a53a42015-01-21 13:39:42 +0100280 options |= PCRE_NOTBOL;
David Carlierf2592b22016-11-21 21:25:58 +0000281#endif
Willy Tarreau15a53a42015-01-21 13:39:42 +0100282
David Carlierf2592b22016-11-21 21:25:58 +0000283 /* The value returned by pcre_exec()/pcre2_match() is one more than the highest numbered
Thierry FOURNIERb8f980c2014-06-11 13:59:05 +0200284 * pair that has been set. For example, if two substrings have been captured,
285 * the returned value is 3. If there are no capturing subpatterns, the return
286 * value from a successful match is 1, indicating that just the first pair of
287 * offsets has been set.
288 *
Joseph Herlanteda75482018-11-15 14:46:29 -0800289 * It seems that this function returns 0 if it detects more matches than available
Thierry FOURNIERb8f980c2014-06-11 13:59:05 +0200290 * space in the matches array.
291 */
David Carlierf2592b22016-11-21 21:25:58 +0000292#ifdef USE_PCRE2
293 pm = pcre2_match_data_create_from_pattern(preg->reg, NULL);
294 ret = pcre2_match(preg->reg, (PCRE2_SPTR)subject, (PCRE2_SIZE)length, 0, options, pm, NULL);
295
296 if (ret < 0) {
297 pcre2_match_data_free(pm);
298 return 0;
299 }
300
301 matches = pcre2_get_ovector_pointer(pm);
302#else
Willy Tarreau15a53a42015-01-21 13:39:42 +0100303 ret = pcre_exec(preg->reg, preg->extra, subject, length, 0, options, matches, enmatch * 3);
Thierry FOURNIERb8f980c2014-06-11 13:59:05 +0200304 if (ret < 0)
305 return 0;
David Carlierf2592b22016-11-21 21:25:58 +0000306#endif
Thierry FOURNIERb8f980c2014-06-11 13:59:05 +0200307
308 if (ret == 0)
309 ret = enmatch;
310
311 for (i=0; i<nmatch; i++) {
312 /* Copy offset. */
313 if (i < ret) {
314 pmatch[i].rm_so = matches[(i*2)];
315 pmatch[i].rm_eo = matches[(i*2)+1];
316 continue;
317 }
318 /* Set the unmatvh flag (-1). */
319 pmatch[i].rm_so = -1;
320 pmatch[i].rm_eo = -1;
321 }
David Carlierf2592b22016-11-21 21:25:58 +0000322#ifdef USE_PCRE2
323 pcre2_match_data_free(pm);
324#endif
Thierry FOURNIERb8f980c2014-06-11 13:59:05 +0200325 return 1;
326#else
327 char old_char = subject[length];
328 int match;
Willy Tarreau15a53a42015-01-21 13:39:42 +0100329
330 flags &= REG_NOTBOL;
Thierry FOURNIERb8f980c2014-06-11 13:59:05 +0200331 subject[length] = 0;
Willy Tarreau15a53a42015-01-21 13:39:42 +0100332 match = regexec(&preg->regex, subject, nmatch, pmatch, flags);
Thierry FOURNIERb8f980c2014-06-11 13:59:05 +0200333 subject[length] = old_char;
334 if (match == REG_NOMATCH)
335 return 0;
336 return 1;
337#endif
338}
339
Thierry FOURNIER799c0422013-12-06 20:36:20 +0100340int regex_comp(const char *str, struct my_regex *regex, int cs, int cap, char **err)
Thierry FOURNIERed5a4ae2013-10-14 14:07:36 +0200341{
Thierry FOURNIER26202762014-06-18 11:50:51 +0200342#if defined(USE_PCRE) || defined(USE_PCRE_JIT)
Thierry FOURNIERed5a4ae2013-10-14 14:07:36 +0200343 int flags = 0;
344 const char *error;
345 int erroffset;
346
347 if (!cs)
348 flags |= PCRE_CASELESS;
349 if (!cap)
350 flags |= PCRE_NO_AUTO_CAPTURE;
351
352 regex->reg = pcre_compile(str, flags, &error, &erroffset, NULL);
353 if (!regex->reg) {
354 memprintf(err, "regex '%s' is invalid (error=%s, erroffset=%d)", str, error, erroffset);
355 return 0;
356 }
357
358 regex->extra = pcre_study(regex->reg, PCRE_STUDY_JIT_COMPILE, &error);
Christian Ruppert955f4612014-10-29 17:05:53 +0100359 if (!regex->extra && error != NULL) {
Thierry FOURNIERed5a4ae2013-10-14 14:07:36 +0200360 pcre_free(regex->reg);
361 memprintf(err, "failed to compile regex '%s' (error=%s)", str, error);
362 return 0;
363 }
David Carlierf2592b22016-11-21 21:25:58 +0000364#elif defined(USE_PCRE2) || defined(USE_PCRE2_JIT)
365 int flags = 0;
366 int errn;
367#if defined(USE_PCRE2_JIT)
368 int jit;
369#endif
370 PCRE2_UCHAR error[256];
371 PCRE2_SIZE erroffset;
372
373 if (!cs)
374 flags |= PCRE2_CASELESS;
375 if (!cap)
376 flags |= PCRE2_NO_AUTO_CAPTURE;
377
378 regex->reg = pcre2_compile((PCRE2_SPTR)str, PCRE2_ZERO_TERMINATED, flags, &errn, &erroffset, NULL);
379 if (!regex->reg) {
380 pcre2_get_error_message(errn, error, sizeof(error));
381 memprintf(err, "regex '%s' is invalid (error=%s, erroffset=%zu)", str, error, erroffset);
382 return 0;
383 }
384
385#if defined(USE_PCRE2_JIT)
386 jit = pcre2_jit_compile(regex->reg, PCRE2_JIT_COMPLETE);
387 /*
388 * We end if it is an error not related to lack of JIT support
389 * in a case of JIT support missing pcre2_jit_compile is "no-op"
390 */
391 if (jit < 0 && jit != PCRE2_ERROR_JIT_BADOPTION) {
392 pcre2_code_free(regex->reg);
393 memprintf(err, "regex '%s' jit compilation failed", str);
394 return 0;
395 }
396#endif
397
Thierry FOURNIERed5a4ae2013-10-14 14:07:36 +0200398#else
399 int flags = REG_EXTENDED;
400
401 if (!cs)
402 flags |= REG_ICASE;
403 if (!cap)
404 flags |= REG_NOSUB;
Willy Tarreaubaaee002006-06-26 02:48:02 +0200405
Thierry FOURNIER799c0422013-12-06 20:36:20 +0100406 if (regcomp(&regex->regex, str, flags) != 0) {
Thierry FOURNIERed5a4ae2013-10-14 14:07:36 +0200407 memprintf(err, "regex '%s' is invalid", str);
408 return 0;
409 }
410#endif
411 return 1;
412}
Willy Tarreaubaaee002006-06-26 02:48:02 +0200413
Willy Tarreau7a9ac6d2016-12-21 19:13:14 +0100414__attribute__((constructor))
415static void __regex_init(void)
416{
417 char *ptr = NULL;
418
419#ifdef USE_PCRE
420 memprintf(&ptr, "Built with PCRE version : %s", (HAP_XSTRING(Z PCRE_PRERELEASE)[1] == 0)?
421 HAP_XSTRING(PCRE_MAJOR.PCRE_MINOR PCRE_DATE) :
422 HAP_XSTRING(PCRE_MAJOR.PCRE_MINOR) HAP_XSTRING(PCRE_PRERELEASE PCRE_DATE));
423 memprintf(&ptr, "%s\nRunning on PCRE version : %s", ptr, pcre_version());
424
425 memprintf(&ptr, "%s\nPCRE library supports JIT : %s", ptr,
426#ifdef USE_PCRE_JIT
427 ({
428 int r;
429 pcre_config(PCRE_CONFIG_JIT, &r);
430 r ? "yes" : "no (libpcre build without JIT?)";
431 })
432#else
433 "no (USE_PCRE_JIT not set)"
434#endif
435 );
David Carlierf2592b22016-11-21 21:25:58 +0000436#endif /* USE_PCRE */
437
438#ifdef USE_PCRE2
439 memprintf(&ptr, "Built with PCRE2 version : %s", (HAP_XSTRING(Z PCRE2_PRERELEASE)[1] == 0) ?
440 HAP_XSTRING(PCRE2_MAJOR.PCRE2_MINOR PCRE2_DATE) :
441 HAP_XSTRING(PCRE2_MAJOR.PCRE2_MINOR) HAP_XSTRING(PCRE2_PRERELEASE PCRE2_DATE));
442 memprintf(&ptr, "%s\nPCRE2 library supports JIT : %s", ptr,
443#ifdef USE_PCRE2_JIT
444 ({
445 int r;
446 pcre2_config(PCRE2_CONFIG_JIT, &r);
447 r ? "yes" : "no (libpcre2 build without JIT?)";
448 })
Willy Tarreau7a9ac6d2016-12-21 19:13:14 +0100449#else
David Carlierf2592b22016-11-21 21:25:58 +0000450 "no (USE_PCRE2_JIT not set)"
451#endif
452 );
453#endif /* USE_PCRE2 */
454
455#if !defined(USE_PCRE) && !defined(USE_PCRE2)
456 memprintf(&ptr, "Built without PCRE or PCRE2 support (using libc's regex instead)");
Willy Tarreau7a9ac6d2016-12-21 19:13:14 +0100457#endif
458 hap_register_build_opts(ptr, 1);
459}
460
Willy Tarreaubaaee002006-06-26 02:48:02 +0200461/*
462 * Local variables:
463 * c-indent-level: 8
464 * c-basic-offset: 8
465 * End:
466 */