blob: dda666db22083fc4c840c1745731be087e91ab09 [file] [log] [blame]
Willy Tarreaubaaee002006-06-26 02:48:02 +02001/*
2 * Regex and string management functions.
3 *
Willy Tarreauf4f04122010-01-28 18:10:50 +01004 * Copyright 2000-2010 Willy Tarreau <w@1wt.eu>
Willy Tarreaubaaee002006-06-26 02:48:02 +02005 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 *
11 */
12
13#include <ctype.h>
14#include <stdlib.h>
15#include <string.h>
16
Willy Tarreaue3ba5f02006-06-29 18:54:54 +020017#include <common/config.h>
Thierry FOURNIERb8f980c2014-06-11 13:59:05 +020018#include <common/defaults.h>
Willy Tarreau2dd0d472006-06-29 17:53:05 +020019#include <common/regex.h>
20#include <common/standard.h>
Willy Tarreaubaaee002006-06-26 02:48:02 +020021#include <proto/log.h>
22
23/* regex trash buffer used by various regex tests */
24regmatch_t pmatch[MAX_MATCH]; /* rm_so, rm_eo for regular expressions */
25
Willy Tarreauc8746532014-05-28 23:05:07 +020026int exp_replace(char *dst, unsigned int dst_size, char *src, const char *str, const regmatch_t *matches)
Willy Tarreaubaaee002006-06-26 02:48:02 +020027{
28 char *old_dst = dst;
Sasha Pachevc6002042014-05-26 12:33:48 -060029 char* dst_end = dst + dst_size;
Willy Tarreaubaaee002006-06-26 02:48:02 +020030
31 while (*str) {
32 if (*str == '\\') {
33 str++;
Sasha Pachevc6002042014-05-26 12:33:48 -060034 if (!*str)
35 return -1;
36
Willy Tarreau8f8e6452007-06-17 21:51:38 +020037 if (isdigit((unsigned char)*str)) {
Willy Tarreaubaaee002006-06-26 02:48:02 +020038 int len, num;
39
40 num = *str - '0';
41 str++;
42
43 if (matches[num].rm_eo > -1 && matches[num].rm_so > -1) {
44 len = matches[num].rm_eo - matches[num].rm_so;
Sasha Pachevc6002042014-05-26 12:33:48 -060045
46 if (dst + len >= dst_end)
47 return -1;
48
Willy Tarreaubaaee002006-06-26 02:48:02 +020049 memcpy(dst, src + matches[num].rm_so, len);
50 dst += len;
51 }
52
53 } else if (*str == 'x') {
54 unsigned char hex1, hex2;
55 str++;
56
Sasha Pachevc6002042014-05-26 12:33:48 -060057 if (!*str)
58 return -1;
59
Willy Tarreaubaaee002006-06-26 02:48:02 +020060 hex1 = toupper(*str++) - '0';
Sasha Pachevc6002042014-05-26 12:33:48 -060061
62 if (!*str)
63 return -1;
64
Willy Tarreaubaaee002006-06-26 02:48:02 +020065 hex2 = toupper(*str++) - '0';
66
67 if (hex1 > 9) hex1 -= 'A' - '9' - 1;
68 if (hex2 > 9) hex2 -= 'A' - '9' - 1;
Sasha Pachevc6002042014-05-26 12:33:48 -060069
70 if (dst >= dst_end)
71 return -1;
72
Willy Tarreaubaaee002006-06-26 02:48:02 +020073 *dst++ = (hex1<<4) + hex2;
74 } else {
Sasha Pachevc6002042014-05-26 12:33:48 -060075 if (dst >= dst_end)
76 return -1;
77
Willy Tarreaubaaee002006-06-26 02:48:02 +020078 *dst++ = *str++;
79 }
80 } else {
Sasha Pachevc6002042014-05-26 12:33:48 -060081 if (dst >= dst_end)
82 return -1;
83
Willy Tarreaubaaee002006-06-26 02:48:02 +020084 *dst++ = *str++;
85 }
86 }
Sasha Pachevc6002042014-05-26 12:33:48 -060087 if (dst >= dst_end)
88 return -1;
89
Willy Tarreaubaaee002006-06-26 02:48:02 +020090 *dst = '\0';
91 return dst - old_dst;
92}
93
94/* returns NULL if the replacement string <str> is valid, or the pointer to the first error */
Willy Tarreaub17916e2006-10-15 15:17:57 +020095const char *check_replace_string(const char *str)
Willy Tarreaubaaee002006-06-26 02:48:02 +020096{
Willy Tarreaub17916e2006-10-15 15:17:57 +020097 const char *err = NULL;
Willy Tarreaubaaee002006-06-26 02:48:02 +020098 while (*str) {
99 if (*str == '\\') {
100 err = str; /* in case of a backslash, we return the pointer to it */
101 str++;
102 if (!*str)
103 return err;
Willy Tarreau8f8e6452007-06-17 21:51:38 +0200104 else if (isdigit((unsigned char)*str))
Willy Tarreaubaaee002006-06-26 02:48:02 +0200105 err = NULL;
106 else if (*str == 'x') {
107 str++;
108 if (!ishex(*str))
109 return err;
110 str++;
111 if (!ishex(*str))
112 return err;
113 err = NULL;
114 }
115 else {
116 Warning("'\\%c' : deprecated use of a backslash before something not '\\','x' or a digit.\n", *str);
117 err = NULL;
118 }
119 }
120 str++;
121 }
122 return err;
123}
124
125
126/* returns the pointer to an error in the replacement string, or NULL if OK */
Thierry FOURNIER09af0d62014-06-18 11:35:54 +0200127const char *chain_regex(struct hdr_exp **head, struct my_regex *preg,
Willy Tarreauf4f04122010-01-28 18:10:50 +0100128 int action, const char *replace, void *cond)
Willy Tarreaubaaee002006-06-26 02:48:02 +0200129{
130 struct hdr_exp *exp;
131
132 if (replace != NULL) {
Willy Tarreaub17916e2006-10-15 15:17:57 +0200133 const char *err;
Willy Tarreaubaaee002006-06-26 02:48:02 +0200134 err = check_replace_string(replace);
135 if (err)
136 return err;
137 }
138
139 while (*head != NULL)
140 head = &(*head)->next;
141
142 exp = calloc(1, sizeof(struct hdr_exp));
143
144 exp->preg = preg;
145 exp->replace = replace;
146 exp->action = action;
Willy Tarreauf4f04122010-01-28 18:10:50 +0100147 exp->cond = cond;
Willy Tarreaubaaee002006-06-26 02:48:02 +0200148 *head = exp;
149
150 return NULL;
151}
152
Thierry FOURNIERb8f980c2014-06-11 13:59:05 +0200153/* This function apply regex. It take const null terminated char as input.
154 * If the function doesn't match, it returns false, else it returns true.
155 * When it is compiled with JIT, this function execute strlen on the subject.
156 */
157int regex_exec_match(const struct my_regex *preg, const char *subject,
158 size_t nmatch, regmatch_t pmatch[]) {
Thierry FOURNIER26202762014-06-18 11:50:51 +0200159#if defined(USE_PCRE) || defined(USE_PCRE_JIT)
Thierry FOURNIERb8f980c2014-06-11 13:59:05 +0200160 int ret;
161 int matches[MAX_MATCH * 3];
162 int enmatch;
163 int i;
164
165 /* Silently limit the number of allowed matches. max
166 * match i the maximum value for match, in fact this
167 * limit is not applyied.
168 */
169 enmatch = nmatch;
170 if (enmatch > MAX_MATCH)
171 enmatch = MAX_MATCH;
172
173 /* The value returned by pcre_exec() is one more than the highest numbered
174 * pair that has been set. For example, if two substrings have been captured,
175 * the returned value is 3. If there are no capturing subpatterns, the return
176 * value from a successful match is 1, indicating that just the first pair of
177 * offsets has been set.
178 *
179 * It seems that this function returns 0 if it detect more matches than avalaible
180 * space in the matches array.
181 */
182 ret = pcre_exec(preg->reg, preg->extra, subject, strlen(subject), 0, 0, matches, enmatch * 3);
183 if (ret < 0)
184 return 0;
185
186 if (ret == 0)
187 ret = enmatch;
188
189 for (i=0; i<nmatch; i++) {
190 /* Copy offset. */
191 if (i < ret) {
192 pmatch[i].rm_so = matches[(i*2)];
193 pmatch[i].rm_eo = matches[(i*2)+1];
194 continue;
195 }
196 /* Set the unmatvh flag (-1). */
197 pmatch[i].rm_so = -1;
198 pmatch[i].rm_eo = -1;
199 }
200 return 1;
201#else
202 int match;
203 match = regexec(&preg->regex, subject, nmatch, pmatch, 0);
204 if (match == REG_NOMATCH)
205 return 0;
206 return 1;
207#endif
208}
209
210/* This function apply regex. It take a "char *" ans length as input. The
211 * <subject> can be modified during the processing. If the function doesn't
212 * match, it returns false, else it returns true.
213 * When it is compiled with standard POSIX regex or PCRE, this function add
214 * a temporary null chracters at the end of the <subject>. The <subject> must
215 * have a real length of <length> + 1.
216 */
217int regex_exec_match2(const struct my_regex *preg, char *subject, int length,
218 size_t nmatch, regmatch_t pmatch[]) {
Thierry FOURNIER26202762014-06-18 11:50:51 +0200219#if defined(USE_PCRE) || defined(USE_PCRE_JIT)
Thierry FOURNIERb8f980c2014-06-11 13:59:05 +0200220 int ret;
221 int matches[MAX_MATCH * 3];
222 int enmatch;
223 int i;
224
225 /* Silently limit the number of allowed matches. max
226 * match i the maximum value for match, in fact this
227 * limit is not applyied.
228 */
229 enmatch = nmatch;
230 if (enmatch > MAX_MATCH)
231 enmatch = MAX_MATCH;
232
233 /* The value returned by pcre_exec() is one more than the highest numbered
234 * pair that has been set. For example, if two substrings have been captured,
235 * the returned value is 3. If there are no capturing subpatterns, the return
236 * value from a successful match is 1, indicating that just the first pair of
237 * offsets has been set.
238 *
239 * It seems that this function returns 0 if it detect more matches than avalaible
240 * space in the matches array.
241 */
242 ret = pcre_exec(preg->reg, preg->extra, subject, length, 0, 0, matches, enmatch * 3);
243 if (ret < 0)
244 return 0;
245
246 if (ret == 0)
247 ret = enmatch;
248
249 for (i=0; i<nmatch; i++) {
250 /* Copy offset. */
251 if (i < ret) {
252 pmatch[i].rm_so = matches[(i*2)];
253 pmatch[i].rm_eo = matches[(i*2)+1];
254 continue;
255 }
256 /* Set the unmatvh flag (-1). */
257 pmatch[i].rm_so = -1;
258 pmatch[i].rm_eo = -1;
259 }
260 return 1;
261#else
262 char old_char = subject[length];
263 int match;
264 subject[length] = 0;
265 match = regexec(&preg->regex, subject, nmatch, pmatch, 0);
266 subject[length] = old_char;
267 if (match == REG_NOMATCH)
268 return 0;
269 return 1;
270#endif
271}
272
Thierry FOURNIER799c0422013-12-06 20:36:20 +0100273int regex_comp(const char *str, struct my_regex *regex, int cs, int cap, char **err)
Thierry FOURNIERed5a4ae2013-10-14 14:07:36 +0200274{
Thierry FOURNIER26202762014-06-18 11:50:51 +0200275#if defined(USE_PCRE) || defined(USE_PCRE_JIT)
Thierry FOURNIERed5a4ae2013-10-14 14:07:36 +0200276 int flags = 0;
277 const char *error;
278 int erroffset;
279
280 if (!cs)
281 flags |= PCRE_CASELESS;
282 if (!cap)
283 flags |= PCRE_NO_AUTO_CAPTURE;
284
285 regex->reg = pcre_compile(str, flags, &error, &erroffset, NULL);
286 if (!regex->reg) {
287 memprintf(err, "regex '%s' is invalid (error=%s, erroffset=%d)", str, error, erroffset);
288 return 0;
289 }
290
Thierry FOURNIER26202762014-06-18 11:50:51 +0200291#ifdef USE_PCRE_JIT
Thierry FOURNIERed5a4ae2013-10-14 14:07:36 +0200292 regex->extra = pcre_study(regex->reg, PCRE_STUDY_JIT_COMPILE, &error);
293 if (!regex->extra) {
294 pcre_free(regex->reg);
295 memprintf(err, "failed to compile regex '%s' (error=%s)", str, error);
296 return 0;
297 }
298#else
Thierry FOURNIER26202762014-06-18 11:50:51 +0200299 regex->extra = NULL;
300#endif
301#else
Thierry FOURNIERed5a4ae2013-10-14 14:07:36 +0200302 int flags = REG_EXTENDED;
303
304 if (!cs)
305 flags |= REG_ICASE;
306 if (!cap)
307 flags |= REG_NOSUB;
Willy Tarreaubaaee002006-06-26 02:48:02 +0200308
Thierry FOURNIER799c0422013-12-06 20:36:20 +0100309 if (regcomp(&regex->regex, str, flags) != 0) {
Thierry FOURNIERed5a4ae2013-10-14 14:07:36 +0200310 memprintf(err, "regex '%s' is invalid", str);
311 return 0;
312 }
313#endif
314 return 1;
315}
Willy Tarreaubaaee002006-06-26 02:48:02 +0200316
317/*
318 * Local variables:
319 * c-indent-level: 8
320 * c-basic-offset: 8
321 * End:
322 */