blob: be4fe5b24596dc0594a8bad4b0a02833fb08b654 [file] [log] [blame]
Willy Tarreaubaaee002006-06-26 02:48:02 +02001/*
2 * Regex and string management functions.
3 *
Willy Tarreauf4f04122010-01-28 18:10:50 +01004 * Copyright 2000-2010 Willy Tarreau <w@1wt.eu>
Willy Tarreaubaaee002006-06-26 02:48:02 +02005 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 *
11 */
12
13#include <ctype.h>
14#include <stdlib.h>
15#include <string.h>
16
Willy Tarreaue3ba5f02006-06-29 18:54:54 +020017#include <common/config.h>
Thierry FOURNIERb8f980c2014-06-11 13:59:05 +020018#include <common/defaults.h>
Willy Tarreau2dd0d472006-06-29 17:53:05 +020019#include <common/regex.h>
20#include <common/standard.h>
Willy Tarreaubaaee002006-06-26 02:48:02 +020021#include <proto/log.h>
22
23/* regex trash buffer used by various regex tests */
24regmatch_t pmatch[MAX_MATCH]; /* rm_so, rm_eo for regular expressions */
25
Willy Tarreauc8746532014-05-28 23:05:07 +020026int exp_replace(char *dst, unsigned int dst_size, char *src, const char *str, const regmatch_t *matches)
Willy Tarreaubaaee002006-06-26 02:48:02 +020027{
28 char *old_dst = dst;
Sasha Pachevc6002042014-05-26 12:33:48 -060029 char* dst_end = dst + dst_size;
Willy Tarreaubaaee002006-06-26 02:48:02 +020030
31 while (*str) {
32 if (*str == '\\') {
33 str++;
Sasha Pachevc6002042014-05-26 12:33:48 -060034 if (!*str)
35 return -1;
36
Willy Tarreau8f8e6452007-06-17 21:51:38 +020037 if (isdigit((unsigned char)*str)) {
Willy Tarreaubaaee002006-06-26 02:48:02 +020038 int len, num;
39
40 num = *str - '0';
41 str++;
42
43 if (matches[num].rm_eo > -1 && matches[num].rm_so > -1) {
44 len = matches[num].rm_eo - matches[num].rm_so;
Sasha Pachevc6002042014-05-26 12:33:48 -060045
46 if (dst + len >= dst_end)
47 return -1;
48
Willy Tarreaubaaee002006-06-26 02:48:02 +020049 memcpy(dst, src + matches[num].rm_so, len);
50 dst += len;
51 }
52
53 } else if (*str == 'x') {
54 unsigned char hex1, hex2;
55 str++;
56
Sasha Pachevc6002042014-05-26 12:33:48 -060057 if (!*str)
58 return -1;
59
Willy Tarreaubaaee002006-06-26 02:48:02 +020060 hex1 = toupper(*str++) - '0';
Sasha Pachevc6002042014-05-26 12:33:48 -060061
62 if (!*str)
63 return -1;
64
Willy Tarreaubaaee002006-06-26 02:48:02 +020065 hex2 = toupper(*str++) - '0';
66
67 if (hex1 > 9) hex1 -= 'A' - '9' - 1;
68 if (hex2 > 9) hex2 -= 'A' - '9' - 1;
Sasha Pachevc6002042014-05-26 12:33:48 -060069
70 if (dst >= dst_end)
71 return -1;
72
Willy Tarreaubaaee002006-06-26 02:48:02 +020073 *dst++ = (hex1<<4) + hex2;
74 } else {
Sasha Pachevc6002042014-05-26 12:33:48 -060075 if (dst >= dst_end)
76 return -1;
77
Willy Tarreaubaaee002006-06-26 02:48:02 +020078 *dst++ = *str++;
79 }
80 } else {
Sasha Pachevc6002042014-05-26 12:33:48 -060081 if (dst >= dst_end)
82 return -1;
83
Willy Tarreaubaaee002006-06-26 02:48:02 +020084 *dst++ = *str++;
85 }
86 }
Sasha Pachevc6002042014-05-26 12:33:48 -060087 if (dst >= dst_end)
88 return -1;
89
Willy Tarreaubaaee002006-06-26 02:48:02 +020090 *dst = '\0';
91 return dst - old_dst;
92}
93
94/* returns NULL if the replacement string <str> is valid, or the pointer to the first error */
Willy Tarreaub17916e2006-10-15 15:17:57 +020095const char *check_replace_string(const char *str)
Willy Tarreaubaaee002006-06-26 02:48:02 +020096{
Willy Tarreaub17916e2006-10-15 15:17:57 +020097 const char *err = NULL;
Willy Tarreaubaaee002006-06-26 02:48:02 +020098 while (*str) {
99 if (*str == '\\') {
100 err = str; /* in case of a backslash, we return the pointer to it */
101 str++;
102 if (!*str)
103 return err;
Willy Tarreau8f8e6452007-06-17 21:51:38 +0200104 else if (isdigit((unsigned char)*str))
Willy Tarreaubaaee002006-06-26 02:48:02 +0200105 err = NULL;
106 else if (*str == 'x') {
107 str++;
108 if (!ishex(*str))
109 return err;
110 str++;
111 if (!ishex(*str))
112 return err;
113 err = NULL;
114 }
115 else {
116 Warning("'\\%c' : deprecated use of a backslash before something not '\\','x' or a digit.\n", *str);
117 err = NULL;
118 }
119 }
120 str++;
121 }
122 return err;
123}
124
125
126/* returns the pointer to an error in the replacement string, or NULL if OK */
Thierry FOURNIER09af0d62014-06-18 11:35:54 +0200127const char *chain_regex(struct hdr_exp **head, struct my_regex *preg,
Willy Tarreauf4f04122010-01-28 18:10:50 +0100128 int action, const char *replace, void *cond)
Willy Tarreaubaaee002006-06-26 02:48:02 +0200129{
130 struct hdr_exp *exp;
131
132 if (replace != NULL) {
Willy Tarreaub17916e2006-10-15 15:17:57 +0200133 const char *err;
Willy Tarreaubaaee002006-06-26 02:48:02 +0200134 err = check_replace_string(replace);
135 if (err)
136 return err;
137 }
138
139 while (*head != NULL)
140 head = &(*head)->next;
141
Vincent Bernat02779b62016-04-03 13:48:43 +0200142 exp = calloc(1, sizeof(*exp));
Willy Tarreaubaaee002006-06-26 02:48:02 +0200143
144 exp->preg = preg;
145 exp->replace = replace;
146 exp->action = action;
Willy Tarreauf4f04122010-01-28 18:10:50 +0100147 exp->cond = cond;
Willy Tarreaubaaee002006-06-26 02:48:02 +0200148 *head = exp;
149
150 return NULL;
151}
152
Thierry FOURNIERb8f980c2014-06-11 13:59:05 +0200153/* This function apply regex. It take const null terminated char as input.
154 * If the function doesn't match, it returns false, else it returns true.
155 * When it is compiled with JIT, this function execute strlen on the subject.
Willy Tarreau15a53a42015-01-21 13:39:42 +0100156 * Currently the only supported flag is REG_NOTBOL.
Thierry FOURNIERb8f980c2014-06-11 13:59:05 +0200157 */
158int regex_exec_match(const struct my_regex *preg, const char *subject,
Willy Tarreau15a53a42015-01-21 13:39:42 +0100159 size_t nmatch, regmatch_t pmatch[], int flags) {
Thierry FOURNIER26202762014-06-18 11:50:51 +0200160#if defined(USE_PCRE) || defined(USE_PCRE_JIT)
Thierry FOURNIERb8f980c2014-06-11 13:59:05 +0200161 int ret;
162 int matches[MAX_MATCH * 3];
163 int enmatch;
164 int i;
Willy Tarreau15a53a42015-01-21 13:39:42 +0100165 int options;
Thierry FOURNIERb8f980c2014-06-11 13:59:05 +0200166
167 /* Silently limit the number of allowed matches. max
168 * match i the maximum value for match, in fact this
169 * limit is not applyied.
170 */
171 enmatch = nmatch;
172 if (enmatch > MAX_MATCH)
173 enmatch = MAX_MATCH;
174
Willy Tarreau15a53a42015-01-21 13:39:42 +0100175 options = 0;
176 if (flags & REG_NOTBOL)
177 options |= PCRE_NOTBOL;
178
Thierry FOURNIERb8f980c2014-06-11 13:59:05 +0200179 /* The value returned by pcre_exec() is one more than the highest numbered
180 * pair that has been set. For example, if two substrings have been captured,
181 * the returned value is 3. If there are no capturing subpatterns, the return
182 * value from a successful match is 1, indicating that just the first pair of
183 * offsets has been set.
184 *
185 * It seems that this function returns 0 if it detect more matches than avalaible
186 * space in the matches array.
187 */
Willy Tarreau15a53a42015-01-21 13:39:42 +0100188 ret = pcre_exec(preg->reg, preg->extra, subject, strlen(subject), 0, options, matches, enmatch * 3);
Thierry FOURNIERb8f980c2014-06-11 13:59:05 +0200189 if (ret < 0)
190 return 0;
191
192 if (ret == 0)
193 ret = enmatch;
194
195 for (i=0; i<nmatch; i++) {
196 /* Copy offset. */
197 if (i < ret) {
198 pmatch[i].rm_so = matches[(i*2)];
199 pmatch[i].rm_eo = matches[(i*2)+1];
200 continue;
201 }
202 /* Set the unmatvh flag (-1). */
203 pmatch[i].rm_so = -1;
204 pmatch[i].rm_eo = -1;
205 }
206 return 1;
207#else
208 int match;
Willy Tarreau15a53a42015-01-21 13:39:42 +0100209
210 flags &= REG_NOTBOL;
211 match = regexec(&preg->regex, subject, nmatch, pmatch, flags);
Thierry FOURNIERb8f980c2014-06-11 13:59:05 +0200212 if (match == REG_NOMATCH)
213 return 0;
214 return 1;
215#endif
216}
217
218/* This function apply regex. It take a "char *" ans length as input. The
219 * <subject> can be modified during the processing. If the function doesn't
220 * match, it returns false, else it returns true.
221 * When it is compiled with standard POSIX regex or PCRE, this function add
222 * a temporary null chracters at the end of the <subject>. The <subject> must
Willy Tarreau15a53a42015-01-21 13:39:42 +0100223 * have a real length of <length> + 1. Currently the only supported flag is
224 * REG_NOTBOL.
Thierry FOURNIERb8f980c2014-06-11 13:59:05 +0200225 */
226int regex_exec_match2(const struct my_regex *preg, char *subject, int length,
Willy Tarreau15a53a42015-01-21 13:39:42 +0100227 size_t nmatch, regmatch_t pmatch[], int flags) {
Thierry FOURNIER26202762014-06-18 11:50:51 +0200228#if defined(USE_PCRE) || defined(USE_PCRE_JIT)
Thierry FOURNIERb8f980c2014-06-11 13:59:05 +0200229 int ret;
230 int matches[MAX_MATCH * 3];
231 int enmatch;
232 int i;
Willy Tarreau15a53a42015-01-21 13:39:42 +0100233 int options;
Thierry FOURNIERb8f980c2014-06-11 13:59:05 +0200234
235 /* Silently limit the number of allowed matches. max
236 * match i the maximum value for match, in fact this
237 * limit is not applyied.
238 */
239 enmatch = nmatch;
240 if (enmatch > MAX_MATCH)
241 enmatch = MAX_MATCH;
242
Willy Tarreau15a53a42015-01-21 13:39:42 +0100243 options = 0;
244 if (flags & REG_NOTBOL)
245 options |= PCRE_NOTBOL;
246
Thierry FOURNIERb8f980c2014-06-11 13:59:05 +0200247 /* The value returned by pcre_exec() is one more than the highest numbered
248 * pair that has been set. For example, if two substrings have been captured,
249 * the returned value is 3. If there are no capturing subpatterns, the return
250 * value from a successful match is 1, indicating that just the first pair of
251 * offsets has been set.
252 *
253 * It seems that this function returns 0 if it detect more matches than avalaible
254 * space in the matches array.
255 */
Willy Tarreau15a53a42015-01-21 13:39:42 +0100256 ret = pcre_exec(preg->reg, preg->extra, subject, length, 0, options, matches, enmatch * 3);
Thierry FOURNIERb8f980c2014-06-11 13:59:05 +0200257 if (ret < 0)
258 return 0;
259
260 if (ret == 0)
261 ret = enmatch;
262
263 for (i=0; i<nmatch; i++) {
264 /* Copy offset. */
265 if (i < ret) {
266 pmatch[i].rm_so = matches[(i*2)];
267 pmatch[i].rm_eo = matches[(i*2)+1];
268 continue;
269 }
270 /* Set the unmatvh flag (-1). */
271 pmatch[i].rm_so = -1;
272 pmatch[i].rm_eo = -1;
273 }
274 return 1;
275#else
276 char old_char = subject[length];
277 int match;
Willy Tarreau15a53a42015-01-21 13:39:42 +0100278
279 flags &= REG_NOTBOL;
Thierry FOURNIERb8f980c2014-06-11 13:59:05 +0200280 subject[length] = 0;
Willy Tarreau15a53a42015-01-21 13:39:42 +0100281 match = regexec(&preg->regex, subject, nmatch, pmatch, flags);
Thierry FOURNIERb8f980c2014-06-11 13:59:05 +0200282 subject[length] = old_char;
283 if (match == REG_NOMATCH)
284 return 0;
285 return 1;
286#endif
287}
288
Thierry FOURNIER799c0422013-12-06 20:36:20 +0100289int regex_comp(const char *str, struct my_regex *regex, int cs, int cap, char **err)
Thierry FOURNIERed5a4ae2013-10-14 14:07:36 +0200290{
Thierry FOURNIER26202762014-06-18 11:50:51 +0200291#if defined(USE_PCRE) || defined(USE_PCRE_JIT)
Thierry FOURNIERed5a4ae2013-10-14 14:07:36 +0200292 int flags = 0;
293 const char *error;
294 int erroffset;
295
296 if (!cs)
297 flags |= PCRE_CASELESS;
298 if (!cap)
299 flags |= PCRE_NO_AUTO_CAPTURE;
300
301 regex->reg = pcre_compile(str, flags, &error, &erroffset, NULL);
302 if (!regex->reg) {
303 memprintf(err, "regex '%s' is invalid (error=%s, erroffset=%d)", str, error, erroffset);
304 return 0;
305 }
306
307 regex->extra = pcre_study(regex->reg, PCRE_STUDY_JIT_COMPILE, &error);
Christian Ruppert955f4612014-10-29 17:05:53 +0100308 if (!regex->extra && error != NULL) {
Thierry FOURNIERed5a4ae2013-10-14 14:07:36 +0200309 pcre_free(regex->reg);
310 memprintf(err, "failed to compile regex '%s' (error=%s)", str, error);
311 return 0;
312 }
313#else
314 int flags = REG_EXTENDED;
315
316 if (!cs)
317 flags |= REG_ICASE;
318 if (!cap)
319 flags |= REG_NOSUB;
Willy Tarreaubaaee002006-06-26 02:48:02 +0200320
Thierry FOURNIER799c0422013-12-06 20:36:20 +0100321 if (regcomp(&regex->regex, str, flags) != 0) {
Thierry FOURNIERed5a4ae2013-10-14 14:07:36 +0200322 memprintf(err, "regex '%s' is invalid", str);
323 return 0;
324 }
325#endif
326 return 1;
327}
Willy Tarreaubaaee002006-06-26 02:48:02 +0200328
329/*
330 * Local variables:
331 * c-indent-level: 8
332 * c-basic-offset: 8
333 * End:
334 */