blob: dd7719405fdcb0d9baeb2d07ef4b96d940af0414 [file] [log] [blame]
Willy Tarreaubaaee002006-06-26 02:48:02 +02001/*
2 * Regex and string management functions.
3 *
Willy Tarreauf4f04122010-01-28 18:10:50 +01004 * Copyright 2000-2010 Willy Tarreau <w@1wt.eu>
Willy Tarreaubaaee002006-06-26 02:48:02 +02005 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 *
11 */
12
13#include <ctype.h>
14#include <stdlib.h>
15#include <string.h>
16
Willy Tarreau7a9ac6d2016-12-21 19:13:14 +010017#include <types/global.h>
Willy Tarreaue3ba5f02006-06-29 18:54:54 +020018#include <common/config.h>
Thierry FOURNIERb8f980c2014-06-11 13:59:05 +020019#include <common/defaults.h>
Willy Tarreau2dd0d472006-06-29 17:53:05 +020020#include <common/regex.h>
21#include <common/standard.h>
Willy Tarreaubaaee002006-06-26 02:48:02 +020022#include <proto/log.h>
23
24/* regex trash buffer used by various regex tests */
25regmatch_t pmatch[MAX_MATCH]; /* rm_so, rm_eo for regular expressions */
26
Willy Tarreauc8746532014-05-28 23:05:07 +020027int exp_replace(char *dst, unsigned int dst_size, char *src, const char *str, const regmatch_t *matches)
Willy Tarreaubaaee002006-06-26 02:48:02 +020028{
29 char *old_dst = dst;
Sasha Pachevc6002042014-05-26 12:33:48 -060030 char* dst_end = dst + dst_size;
Willy Tarreaubaaee002006-06-26 02:48:02 +020031
32 while (*str) {
33 if (*str == '\\') {
34 str++;
Sasha Pachevc6002042014-05-26 12:33:48 -060035 if (!*str)
36 return -1;
37
Willy Tarreau8f8e6452007-06-17 21:51:38 +020038 if (isdigit((unsigned char)*str)) {
Willy Tarreaubaaee002006-06-26 02:48:02 +020039 int len, num;
40
41 num = *str - '0';
42 str++;
43
44 if (matches[num].rm_eo > -1 && matches[num].rm_so > -1) {
45 len = matches[num].rm_eo - matches[num].rm_so;
Sasha Pachevc6002042014-05-26 12:33:48 -060046
47 if (dst + len >= dst_end)
48 return -1;
49
Willy Tarreaubaaee002006-06-26 02:48:02 +020050 memcpy(dst, src + matches[num].rm_so, len);
51 dst += len;
52 }
53
54 } else if (*str == 'x') {
55 unsigned char hex1, hex2;
56 str++;
57
Sasha Pachevc6002042014-05-26 12:33:48 -060058 if (!*str)
59 return -1;
60
Willy Tarreaubaaee002006-06-26 02:48:02 +020061 hex1 = toupper(*str++) - '0';
Sasha Pachevc6002042014-05-26 12:33:48 -060062
63 if (!*str)
64 return -1;
65
Willy Tarreaubaaee002006-06-26 02:48:02 +020066 hex2 = toupper(*str++) - '0';
67
68 if (hex1 > 9) hex1 -= 'A' - '9' - 1;
69 if (hex2 > 9) hex2 -= 'A' - '9' - 1;
Sasha Pachevc6002042014-05-26 12:33:48 -060070
71 if (dst >= dst_end)
72 return -1;
73
Willy Tarreaubaaee002006-06-26 02:48:02 +020074 *dst++ = (hex1<<4) + hex2;
75 } else {
Sasha Pachevc6002042014-05-26 12:33:48 -060076 if (dst >= dst_end)
77 return -1;
78
Willy Tarreaubaaee002006-06-26 02:48:02 +020079 *dst++ = *str++;
80 }
81 } else {
Sasha Pachevc6002042014-05-26 12:33:48 -060082 if (dst >= dst_end)
83 return -1;
84
Willy Tarreaubaaee002006-06-26 02:48:02 +020085 *dst++ = *str++;
86 }
87 }
Sasha Pachevc6002042014-05-26 12:33:48 -060088 if (dst >= dst_end)
89 return -1;
90
Willy Tarreaubaaee002006-06-26 02:48:02 +020091 *dst = '\0';
92 return dst - old_dst;
93}
94
95/* returns NULL if the replacement string <str> is valid, or the pointer to the first error */
Willy Tarreaub17916e2006-10-15 15:17:57 +020096const char *check_replace_string(const char *str)
Willy Tarreaubaaee002006-06-26 02:48:02 +020097{
Willy Tarreaub17916e2006-10-15 15:17:57 +020098 const char *err = NULL;
Willy Tarreaubaaee002006-06-26 02:48:02 +020099 while (*str) {
100 if (*str == '\\') {
101 err = str; /* in case of a backslash, we return the pointer to it */
102 str++;
103 if (!*str)
104 return err;
Willy Tarreau8f8e6452007-06-17 21:51:38 +0200105 else if (isdigit((unsigned char)*str))
Willy Tarreaubaaee002006-06-26 02:48:02 +0200106 err = NULL;
107 else if (*str == 'x') {
108 str++;
109 if (!ishex(*str))
110 return err;
111 str++;
112 if (!ishex(*str))
113 return err;
114 err = NULL;
115 }
116 else {
117 Warning("'\\%c' : deprecated use of a backslash before something not '\\','x' or a digit.\n", *str);
118 err = NULL;
119 }
120 }
121 str++;
122 }
123 return err;
124}
125
126
127/* returns the pointer to an error in the replacement string, or NULL if OK */
Thierry FOURNIER09af0d62014-06-18 11:35:54 +0200128const char *chain_regex(struct hdr_exp **head, struct my_regex *preg,
Willy Tarreauf4f04122010-01-28 18:10:50 +0100129 int action, const char *replace, void *cond)
Willy Tarreaubaaee002006-06-26 02:48:02 +0200130{
131 struct hdr_exp *exp;
132
133 if (replace != NULL) {
Willy Tarreaub17916e2006-10-15 15:17:57 +0200134 const char *err;
Willy Tarreaubaaee002006-06-26 02:48:02 +0200135 err = check_replace_string(replace);
136 if (err)
137 return err;
138 }
139
140 while (*head != NULL)
141 head = &(*head)->next;
142
Vincent Bernat02779b62016-04-03 13:48:43 +0200143 exp = calloc(1, sizeof(*exp));
Willy Tarreaubaaee002006-06-26 02:48:02 +0200144
145 exp->preg = preg;
146 exp->replace = replace;
147 exp->action = action;
Willy Tarreauf4f04122010-01-28 18:10:50 +0100148 exp->cond = cond;
Willy Tarreaubaaee002006-06-26 02:48:02 +0200149 *head = exp;
150
151 return NULL;
152}
153
Thierry FOURNIERb8f980c2014-06-11 13:59:05 +0200154/* This function apply regex. It take const null terminated char as input.
155 * If the function doesn't match, it returns false, else it returns true.
156 * When it is compiled with JIT, this function execute strlen on the subject.
Willy Tarreau15a53a42015-01-21 13:39:42 +0100157 * Currently the only supported flag is REG_NOTBOL.
Thierry FOURNIERb8f980c2014-06-11 13:59:05 +0200158 */
159int regex_exec_match(const struct my_regex *preg, const char *subject,
Willy Tarreau15a53a42015-01-21 13:39:42 +0100160 size_t nmatch, regmatch_t pmatch[], int flags) {
Thierry FOURNIER26202762014-06-18 11:50:51 +0200161#if defined(USE_PCRE) || defined(USE_PCRE_JIT)
Thierry FOURNIERb8f980c2014-06-11 13:59:05 +0200162 int ret;
163 int matches[MAX_MATCH * 3];
164 int enmatch;
165 int i;
Willy Tarreau15a53a42015-01-21 13:39:42 +0100166 int options;
Thierry FOURNIERb8f980c2014-06-11 13:59:05 +0200167
168 /* Silently limit the number of allowed matches. max
169 * match i the maximum value for match, in fact this
170 * limit is not applyied.
171 */
172 enmatch = nmatch;
173 if (enmatch > MAX_MATCH)
174 enmatch = MAX_MATCH;
175
Willy Tarreau15a53a42015-01-21 13:39:42 +0100176 options = 0;
177 if (flags & REG_NOTBOL)
178 options |= PCRE_NOTBOL;
179
Thierry FOURNIERb8f980c2014-06-11 13:59:05 +0200180 /* The value returned by pcre_exec() is one more than the highest numbered
181 * pair that has been set. For example, if two substrings have been captured,
182 * the returned value is 3. If there are no capturing subpatterns, the return
183 * value from a successful match is 1, indicating that just the first pair of
184 * offsets has been set.
185 *
186 * It seems that this function returns 0 if it detect more matches than avalaible
187 * space in the matches array.
188 */
Willy Tarreau15a53a42015-01-21 13:39:42 +0100189 ret = pcre_exec(preg->reg, preg->extra, subject, strlen(subject), 0, options, matches, enmatch * 3);
Thierry FOURNIERb8f980c2014-06-11 13:59:05 +0200190 if (ret < 0)
191 return 0;
192
193 if (ret == 0)
194 ret = enmatch;
195
196 for (i=0; i<nmatch; i++) {
197 /* Copy offset. */
198 if (i < ret) {
199 pmatch[i].rm_so = matches[(i*2)];
200 pmatch[i].rm_eo = matches[(i*2)+1];
201 continue;
202 }
203 /* Set the unmatvh flag (-1). */
204 pmatch[i].rm_so = -1;
205 pmatch[i].rm_eo = -1;
206 }
207 return 1;
208#else
209 int match;
Willy Tarreau15a53a42015-01-21 13:39:42 +0100210
211 flags &= REG_NOTBOL;
212 match = regexec(&preg->regex, subject, nmatch, pmatch, flags);
Thierry FOURNIERb8f980c2014-06-11 13:59:05 +0200213 if (match == REG_NOMATCH)
214 return 0;
215 return 1;
216#endif
217}
218
219/* This function apply regex. It take a "char *" ans length as input. The
220 * <subject> can be modified during the processing. If the function doesn't
221 * match, it returns false, else it returns true.
222 * When it is compiled with standard POSIX regex or PCRE, this function add
223 * a temporary null chracters at the end of the <subject>. The <subject> must
Willy Tarreau15a53a42015-01-21 13:39:42 +0100224 * have a real length of <length> + 1. Currently the only supported flag is
225 * REG_NOTBOL.
Thierry FOURNIERb8f980c2014-06-11 13:59:05 +0200226 */
227int regex_exec_match2(const struct my_regex *preg, char *subject, int length,
Willy Tarreau15a53a42015-01-21 13:39:42 +0100228 size_t nmatch, regmatch_t pmatch[], int flags) {
Thierry FOURNIER26202762014-06-18 11:50:51 +0200229#if defined(USE_PCRE) || defined(USE_PCRE_JIT)
Thierry FOURNIERb8f980c2014-06-11 13:59:05 +0200230 int ret;
231 int matches[MAX_MATCH * 3];
232 int enmatch;
233 int i;
Willy Tarreau15a53a42015-01-21 13:39:42 +0100234 int options;
Thierry FOURNIERb8f980c2014-06-11 13:59:05 +0200235
236 /* Silently limit the number of allowed matches. max
237 * match i the maximum value for match, in fact this
238 * limit is not applyied.
239 */
240 enmatch = nmatch;
241 if (enmatch > MAX_MATCH)
242 enmatch = MAX_MATCH;
243
Willy Tarreau15a53a42015-01-21 13:39:42 +0100244 options = 0;
245 if (flags & REG_NOTBOL)
246 options |= PCRE_NOTBOL;
247
Thierry FOURNIERb8f980c2014-06-11 13:59:05 +0200248 /* The value returned by pcre_exec() is one more than the highest numbered
249 * pair that has been set. For example, if two substrings have been captured,
250 * the returned value is 3. If there are no capturing subpatterns, the return
251 * value from a successful match is 1, indicating that just the first pair of
252 * offsets has been set.
253 *
254 * It seems that this function returns 0 if it detect more matches than avalaible
255 * space in the matches array.
256 */
Willy Tarreau15a53a42015-01-21 13:39:42 +0100257 ret = pcre_exec(preg->reg, preg->extra, subject, length, 0, options, matches, enmatch * 3);
Thierry FOURNIERb8f980c2014-06-11 13:59:05 +0200258 if (ret < 0)
259 return 0;
260
261 if (ret == 0)
262 ret = enmatch;
263
264 for (i=0; i<nmatch; i++) {
265 /* Copy offset. */
266 if (i < ret) {
267 pmatch[i].rm_so = matches[(i*2)];
268 pmatch[i].rm_eo = matches[(i*2)+1];
269 continue;
270 }
271 /* Set the unmatvh flag (-1). */
272 pmatch[i].rm_so = -1;
273 pmatch[i].rm_eo = -1;
274 }
275 return 1;
276#else
277 char old_char = subject[length];
278 int match;
Willy Tarreau15a53a42015-01-21 13:39:42 +0100279
280 flags &= REG_NOTBOL;
Thierry FOURNIERb8f980c2014-06-11 13:59:05 +0200281 subject[length] = 0;
Willy Tarreau15a53a42015-01-21 13:39:42 +0100282 match = regexec(&preg->regex, subject, nmatch, pmatch, flags);
Thierry FOURNIERb8f980c2014-06-11 13:59:05 +0200283 subject[length] = old_char;
284 if (match == REG_NOMATCH)
285 return 0;
286 return 1;
287#endif
288}
289
Thierry FOURNIER799c0422013-12-06 20:36:20 +0100290int regex_comp(const char *str, struct my_regex *regex, int cs, int cap, char **err)
Thierry FOURNIERed5a4ae2013-10-14 14:07:36 +0200291{
Thierry FOURNIER26202762014-06-18 11:50:51 +0200292#if defined(USE_PCRE) || defined(USE_PCRE_JIT)
Thierry FOURNIERed5a4ae2013-10-14 14:07:36 +0200293 int flags = 0;
294 const char *error;
295 int erroffset;
296
297 if (!cs)
298 flags |= PCRE_CASELESS;
299 if (!cap)
300 flags |= PCRE_NO_AUTO_CAPTURE;
301
302 regex->reg = pcre_compile(str, flags, &error, &erroffset, NULL);
303 if (!regex->reg) {
304 memprintf(err, "regex '%s' is invalid (error=%s, erroffset=%d)", str, error, erroffset);
305 return 0;
306 }
307
308 regex->extra = pcre_study(regex->reg, PCRE_STUDY_JIT_COMPILE, &error);
Christian Ruppert955f4612014-10-29 17:05:53 +0100309 if (!regex->extra && error != NULL) {
Thierry FOURNIERed5a4ae2013-10-14 14:07:36 +0200310 pcre_free(regex->reg);
311 memprintf(err, "failed to compile regex '%s' (error=%s)", str, error);
312 return 0;
313 }
314#else
315 int flags = REG_EXTENDED;
316
317 if (!cs)
318 flags |= REG_ICASE;
319 if (!cap)
320 flags |= REG_NOSUB;
Willy Tarreaubaaee002006-06-26 02:48:02 +0200321
Thierry FOURNIER799c0422013-12-06 20:36:20 +0100322 if (regcomp(&regex->regex, str, flags) != 0) {
Thierry FOURNIERed5a4ae2013-10-14 14:07:36 +0200323 memprintf(err, "regex '%s' is invalid", str);
324 return 0;
325 }
326#endif
327 return 1;
328}
Willy Tarreaubaaee002006-06-26 02:48:02 +0200329
Willy Tarreau7a9ac6d2016-12-21 19:13:14 +0100330__attribute__((constructor))
331static void __regex_init(void)
332{
333 char *ptr = NULL;
334
335#ifdef USE_PCRE
336 memprintf(&ptr, "Built with PCRE version : %s", (HAP_XSTRING(Z PCRE_PRERELEASE)[1] == 0)?
337 HAP_XSTRING(PCRE_MAJOR.PCRE_MINOR PCRE_DATE) :
338 HAP_XSTRING(PCRE_MAJOR.PCRE_MINOR) HAP_XSTRING(PCRE_PRERELEASE PCRE_DATE));
339 memprintf(&ptr, "%s\nRunning on PCRE version : %s", ptr, pcre_version());
340
341 memprintf(&ptr, "%s\nPCRE library supports JIT : %s", ptr,
342#ifdef USE_PCRE_JIT
343 ({
344 int r;
345 pcre_config(PCRE_CONFIG_JIT, &r);
346 r ? "yes" : "no (libpcre build without JIT?)";
347 })
348#else
349 "no (USE_PCRE_JIT not set)"
350#endif
351 );
352#else
353 memprintf(&ptr, "Built without PCRE support (using libc's regex instead)");
354#endif
355 hap_register_build_opts(ptr, 1);
356}
357
Willy Tarreaubaaee002006-06-26 02:48:02 +0200358/*
359 * Local variables:
360 * c-indent-level: 8
361 * c-basic-offset: 8
362 * End:
363 */