blob: b6f75021a400935512fee2879962ccb045c4420d [file] [log] [blame]
Thierry FOURNIERed66c292013-11-28 11:05:19 +01001/*
2 * Pattern management functions.
3 *
4 * Copyright 2000-2013 Willy Tarreau <w@1wt.eu>
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 *
11 */
12
13#include <ctype.h>
14#include <stdio.h>
15
16#include <common/config.h>
17#include <common/standard.h>
18
19#include <types/global.h>
20#include <types/pattern.h>
21
22#include <proto/pattern.h>
23
24#include <ebsttree.h>
25
Thierry FOURNIERa65b3432013-11-28 18:22:00 +010026char *pat_match_names[PAT_MATCH_NUM] = {
27 [PAT_MATCH_FOUND] = "found",
28 [PAT_MATCH_BOOL] = "bool",
29 [PAT_MATCH_INT] = "int",
30 [PAT_MATCH_IP] = "ip",
31 [PAT_MATCH_BIN] = "bin",
32 [PAT_MATCH_LEN] = "len",
33 [PAT_MATCH_STR] = "str",
34 [PAT_MATCH_BEG] = "beg",
35 [PAT_MATCH_SUB] = "sub",
36 [PAT_MATCH_DIR] = "dir",
37 [PAT_MATCH_DOM] = "dom",
38 [PAT_MATCH_END] = "end",
39 [PAT_MATCH_REG] = "reg",
Thierry FOURNIERed66c292013-11-28 11:05:19 +010040};
41
Thierry FOURNIERa65b3432013-11-28 18:22:00 +010042int (*pat_parse_fcts[PAT_MATCH_NUM])(const char **, struct pattern *, struct sample_storage *, int *, char **) = {
43 [PAT_MATCH_FOUND] = pat_parse_nothing,
44 [PAT_MATCH_BOOL] = pat_parse_nothing,
45 [PAT_MATCH_INT] = pat_parse_int,
46 [PAT_MATCH_IP] = pat_parse_ip,
47 [PAT_MATCH_BIN] = pat_parse_bin,
48 [PAT_MATCH_LEN] = pat_parse_int,
49 [PAT_MATCH_STR] = pat_parse_str,
50 [PAT_MATCH_BEG] = pat_parse_str,
51 [PAT_MATCH_SUB] = pat_parse_str,
52 [PAT_MATCH_DIR] = pat_parse_str,
53 [PAT_MATCH_DOM] = pat_parse_str,
54 [PAT_MATCH_END] = pat_parse_str,
55 [PAT_MATCH_REG] = pat_parse_reg,
Thierry FOURNIERed66c292013-11-28 11:05:19 +010056};
57
Willy Tarreau0cba6072013-11-28 22:21:02 +010058enum pat_match_res (*pat_match_fcts[PAT_MATCH_NUM])(struct sample *, struct pattern *) = {
Thierry FOURNIERa65b3432013-11-28 18:22:00 +010059 [PAT_MATCH_FOUND] = NULL,
60 [PAT_MATCH_BOOL] = pat_match_nothing,
61 [PAT_MATCH_INT] = pat_match_int,
62 [PAT_MATCH_IP] = pat_match_ip,
63 [PAT_MATCH_BIN] = pat_match_bin,
64 [PAT_MATCH_LEN] = pat_match_len,
65 [PAT_MATCH_STR] = pat_match_str,
66 [PAT_MATCH_BEG] = pat_match_beg,
67 [PAT_MATCH_SUB] = pat_match_sub,
68 [PAT_MATCH_DIR] = pat_match_dir,
69 [PAT_MATCH_DOM] = pat_match_dom,
70 [PAT_MATCH_END] = pat_match_end,
71 [PAT_MATCH_REG] = pat_match_reg,
Thierry FOURNIERed66c292013-11-28 11:05:19 +010072};
73
74/*
75 * These functions are exported and may be used by any other component.
76 */
77
78/* ignore the current line */
Thierry FOURNIERa65b3432013-11-28 18:22:00 +010079int pat_parse_nothing(const char **text, struct pattern *pattern, struct sample_storage *smp, int *opaque, char **err)
Thierry FOURNIERed66c292013-11-28 11:05:19 +010080{
81 return 1;
82}
83
84/* always return false */
Willy Tarreau0cba6072013-11-28 22:21:02 +010085enum pat_match_res pat_match_nothing(struct sample *smp, struct pattern *pattern)
Thierry FOURNIERed66c292013-11-28 11:05:19 +010086{
Thierry FOURNIERa65b3432013-11-28 18:22:00 +010087 return PAT_NOMATCH;
Thierry FOURNIERed66c292013-11-28 11:05:19 +010088}
89
90
91/* NB: For two strings to be identical, it is required that their lengths match */
Willy Tarreau0cba6072013-11-28 22:21:02 +010092enum pat_match_res pat_match_str(struct sample *smp, struct pattern *pattern)
Thierry FOURNIERed66c292013-11-28 11:05:19 +010093{
94 int icase;
95
96 if (pattern->len != smp->data.str.len)
Thierry FOURNIERa65b3432013-11-28 18:22:00 +010097 return PAT_NOMATCH;
Thierry FOURNIERed66c292013-11-28 11:05:19 +010098
Thierry FOURNIERa65b3432013-11-28 18:22:00 +010099 icase = pattern->flags & PAT_F_IGNORE_CASE;
Thierry FOURNIERed66c292013-11-28 11:05:19 +0100100 if ((icase && strncasecmp(pattern->ptr.str, smp->data.str.str, smp->data.str.len) == 0) ||
101 (!icase && strncmp(pattern->ptr.str, smp->data.str.str, smp->data.str.len) == 0))
Thierry FOURNIERa65b3432013-11-28 18:22:00 +0100102 return PAT_MATCH;
103 return PAT_NOMATCH;
Thierry FOURNIERed66c292013-11-28 11:05:19 +0100104}
105
106/* NB: For two binaries buf to be identical, it is required that their lengths match */
Willy Tarreau0cba6072013-11-28 22:21:02 +0100107enum pat_match_res pat_match_bin(struct sample *smp, struct pattern *pattern)
Thierry FOURNIERed66c292013-11-28 11:05:19 +0100108{
109 if (pattern->len != smp->data.str.len)
Thierry FOURNIERa65b3432013-11-28 18:22:00 +0100110 return PAT_NOMATCH;
Thierry FOURNIERed66c292013-11-28 11:05:19 +0100111
112 if (memcmp(pattern->ptr.str, smp->data.str.str, smp->data.str.len) == 0)
Thierry FOURNIERa65b3432013-11-28 18:22:00 +0100113 return PAT_MATCH;
114 return PAT_NOMATCH;
Thierry FOURNIERed66c292013-11-28 11:05:19 +0100115}
116
117/* Lookup a string in the expression's pattern tree. The node is returned if it
118 * exists, otherwise NULL.
119 */
Thierry FOURNIERa65b3432013-11-28 18:22:00 +0100120static void *pat_lookup_str(struct sample *smp, struct pattern_expr *expr)
Thierry FOURNIERed66c292013-11-28 11:05:19 +0100121{
122 /* data are stored in a tree */
123 struct ebmb_node *node;
124 char prev;
125
126 /* we may have to force a trailing zero on the test pattern */
127 prev = smp->data.str.str[smp->data.str.len];
128 if (prev)
129 smp->data.str.str[smp->data.str.len] = '\0';
130 node = ebst_lookup(&expr->pattern_tree, smp->data.str.str);
131 if (prev)
132 smp->data.str.str[smp->data.str.len] = prev;
133 return node;
134}
135
136/* Executes a regex. It temporarily changes the data to add a trailing zero,
137 * and restores the previous character when leaving.
138 */
Willy Tarreau0cba6072013-11-28 22:21:02 +0100139enum pat_match_res pat_match_reg(struct sample *smp, struct pattern *pattern)
Thierry FOURNIERed66c292013-11-28 11:05:19 +0100140{
141 if (regex_exec(pattern->ptr.reg, smp->data.str.str, smp->data.str.len) == 0)
Thierry FOURNIERa65b3432013-11-28 18:22:00 +0100142 return PAT_MATCH;
143 return PAT_NOMATCH;
Thierry FOURNIERed66c292013-11-28 11:05:19 +0100144}
145
146/* Checks that the pattern matches the beginning of the tested string. */
Willy Tarreau0cba6072013-11-28 22:21:02 +0100147enum pat_match_res pat_match_beg(struct sample *smp, struct pattern *pattern)
Thierry FOURNIERed66c292013-11-28 11:05:19 +0100148{
149 int icase;
150
151 if (pattern->len > smp->data.str.len)
Thierry FOURNIERa65b3432013-11-28 18:22:00 +0100152 return PAT_NOMATCH;
Thierry FOURNIERed66c292013-11-28 11:05:19 +0100153
Thierry FOURNIERa65b3432013-11-28 18:22:00 +0100154 icase = pattern->flags & PAT_F_IGNORE_CASE;
Thierry FOURNIERed66c292013-11-28 11:05:19 +0100155 if ((icase && strncasecmp(pattern->ptr.str, smp->data.str.str, pattern->len) != 0) ||
156 (!icase && strncmp(pattern->ptr.str, smp->data.str.str, pattern->len) != 0))
Thierry FOURNIERa65b3432013-11-28 18:22:00 +0100157 return PAT_NOMATCH;
158 return PAT_MATCH;
Thierry FOURNIERed66c292013-11-28 11:05:19 +0100159}
160
161/* Checks that the pattern matches the end of the tested string. */
Willy Tarreau0cba6072013-11-28 22:21:02 +0100162enum pat_match_res pat_match_end(struct sample *smp, struct pattern *pattern)
Thierry FOURNIERed66c292013-11-28 11:05:19 +0100163{
164 int icase;
165
166 if (pattern->len > smp->data.str.len)
Thierry FOURNIERa65b3432013-11-28 18:22:00 +0100167 return PAT_NOMATCH;
168 icase = pattern->flags & PAT_F_IGNORE_CASE;
Thierry FOURNIERed66c292013-11-28 11:05:19 +0100169 if ((icase && strncasecmp(pattern->ptr.str, smp->data.str.str + smp->data.str.len - pattern->len, pattern->len) != 0) ||
170 (!icase && strncmp(pattern->ptr.str, smp->data.str.str + smp->data.str.len - pattern->len, pattern->len) != 0))
Thierry FOURNIERa65b3432013-11-28 18:22:00 +0100171 return PAT_NOMATCH;
172 return PAT_MATCH;
Thierry FOURNIERed66c292013-11-28 11:05:19 +0100173}
174
175/* Checks that the pattern is included inside the tested string.
176 * NB: Suboptimal, should be rewritten using a Boyer-Moore method.
177 */
Willy Tarreau0cba6072013-11-28 22:21:02 +0100178enum pat_match_res pat_match_sub(struct sample *smp, struct pattern *pattern)
Thierry FOURNIERed66c292013-11-28 11:05:19 +0100179{
180 int icase;
181 char *end;
182 char *c;
183
184 if (pattern->len > smp->data.str.len)
Thierry FOURNIERa65b3432013-11-28 18:22:00 +0100185 return PAT_NOMATCH;
Thierry FOURNIERed66c292013-11-28 11:05:19 +0100186
187 end = smp->data.str.str + smp->data.str.len - pattern->len;
Thierry FOURNIERa65b3432013-11-28 18:22:00 +0100188 icase = pattern->flags & PAT_F_IGNORE_CASE;
Thierry FOURNIERed66c292013-11-28 11:05:19 +0100189 if (icase) {
190 for (c = smp->data.str.str; c <= end; c++) {
191 if (tolower(*c) != tolower(*pattern->ptr.str))
192 continue;
193 if (strncasecmp(pattern->ptr.str, c, pattern->len) == 0)
Thierry FOURNIERa65b3432013-11-28 18:22:00 +0100194 return PAT_MATCH;
Thierry FOURNIERed66c292013-11-28 11:05:19 +0100195 }
196 } else {
197 for (c = smp->data.str.str; c <= end; c++) {
198 if (*c != *pattern->ptr.str)
199 continue;
200 if (strncmp(pattern->ptr.str, c, pattern->len) == 0)
Thierry FOURNIERa65b3432013-11-28 18:22:00 +0100201 return PAT_MATCH;
Thierry FOURNIERed66c292013-11-28 11:05:19 +0100202 }
203 }
Thierry FOURNIERa65b3432013-11-28 18:22:00 +0100204 return PAT_NOMATCH;
Thierry FOURNIERed66c292013-11-28 11:05:19 +0100205}
206
207/* Background: Fast way to find a zero byte in a word
208 * http://graphics.stanford.edu/~seander/bithacks.html#ZeroInWord
209 * hasZeroByte = (v - 0x01010101UL) & ~v & 0x80808080UL;
210 *
211 * To look for 4 different byte values, xor the word with those bytes and
212 * then check for zero bytes:
213 *
214 * v = (((unsigned char)c * 0x1010101U) ^ delimiter)
215 * where <delimiter> is the 4 byte values to look for (as an uint)
216 * and <c> is the character that is being tested
217 */
218static inline unsigned int is_delimiter(unsigned char c, unsigned int mask)
219{
220 mask ^= (c * 0x01010101); /* propagate the char to all 4 bytes */
221 return (mask - 0x01010101) & ~mask & 0x80808080U;
222}
223
224static inline unsigned int make_4delim(unsigned char d1, unsigned char d2, unsigned char d3, unsigned char d4)
225{
226 return d1 << 24 | d2 << 16 | d3 << 8 | d4;
227}
228
229/* This one is used by other real functions. It checks that the pattern is
230 * included inside the tested string, but enclosed between the specified
231 * delimiters or at the beginning or end of the string. The delimiters are
232 * provided as an unsigned int made by make_4delim() and match up to 4 different
233 * delimiters. Delimiters are stripped at the beginning and end of the pattern.
234 */
Thierry FOURNIERa65b3432013-11-28 18:22:00 +0100235static int match_word(struct sample *smp, struct pattern *pattern, unsigned int delimiters)
Thierry FOURNIERed66c292013-11-28 11:05:19 +0100236{
237 int may_match, icase;
238 char *c, *end;
239 char *ps;
240 int pl;
241
242 pl = pattern->len;
243 ps = pattern->ptr.str;
244
245 while (pl > 0 && is_delimiter(*ps, delimiters)) {
246 pl--;
247 ps++;
248 }
249
250 while (pl > 0 && is_delimiter(ps[pl - 1], delimiters))
251 pl--;
252
253 if (pl > smp->data.str.len)
Thierry FOURNIERa65b3432013-11-28 18:22:00 +0100254 return PAT_NOMATCH;
Thierry FOURNIERed66c292013-11-28 11:05:19 +0100255
256 may_match = 1;
Thierry FOURNIERa65b3432013-11-28 18:22:00 +0100257 icase = pattern->flags & PAT_F_IGNORE_CASE;
Thierry FOURNIERed66c292013-11-28 11:05:19 +0100258 end = smp->data.str.str + smp->data.str.len - pl;
259 for (c = smp->data.str.str; c <= end; c++) {
260 if (is_delimiter(*c, delimiters)) {
261 may_match = 1;
262 continue;
263 }
264
265 if (!may_match)
266 continue;
267
268 if (icase) {
269 if ((tolower(*c) == tolower(*ps)) &&
270 (strncasecmp(ps, c, pl) == 0) &&
271 (c == end || is_delimiter(c[pl], delimiters)))
Thierry FOURNIERa65b3432013-11-28 18:22:00 +0100272 return PAT_MATCH;
Thierry FOURNIERed66c292013-11-28 11:05:19 +0100273 } else {
274 if ((*c == *ps) &&
275 (strncmp(ps, c, pl) == 0) &&
276 (c == end || is_delimiter(c[pl], delimiters)))
Thierry FOURNIERa65b3432013-11-28 18:22:00 +0100277 return PAT_MATCH;
Thierry FOURNIERed66c292013-11-28 11:05:19 +0100278 }
279 may_match = 0;
280 }
Thierry FOURNIERa65b3432013-11-28 18:22:00 +0100281 return PAT_NOMATCH;
Thierry FOURNIERed66c292013-11-28 11:05:19 +0100282}
283
284/* Checks that the pattern is included inside the tested string, but enclosed
285 * between the delimiters '?' or '/' or at the beginning or end of the string.
286 * Delimiters at the beginning or end of the pattern are ignored.
287 */
Willy Tarreau0cba6072013-11-28 22:21:02 +0100288enum pat_match_res pat_match_dir(struct sample *smp, struct pattern *pattern)
Thierry FOURNIERed66c292013-11-28 11:05:19 +0100289{
290 return match_word(smp, pattern, make_4delim('/', '?', '?', '?'));
291}
292
293/* Checks that the pattern is included inside the tested string, but enclosed
294 * between the delmiters '/', '?', '.' or ":" or at the beginning or end of
295 * the string. Delimiters at the beginning or end of the pattern are ignored.
296 */
Willy Tarreau0cba6072013-11-28 22:21:02 +0100297enum pat_match_res pat_match_dom(struct sample *smp, struct pattern *pattern)
Thierry FOURNIERed66c292013-11-28 11:05:19 +0100298{
299 return match_word(smp, pattern, make_4delim('/', '?', '.', ':'));
300}
301
302/* Checks that the integer in <test> is included between min and max */
Willy Tarreau0cba6072013-11-28 22:21:02 +0100303enum pat_match_res pat_match_int(struct sample *smp, struct pattern *pattern)
Thierry FOURNIERed66c292013-11-28 11:05:19 +0100304{
305 if ((!pattern->val.range.min_set || pattern->val.range.min <= smp->data.uint) &&
306 (!pattern->val.range.max_set || smp->data.uint <= pattern->val.range.max))
Thierry FOURNIERa65b3432013-11-28 18:22:00 +0100307 return PAT_MATCH;
308 return PAT_NOMATCH;
Thierry FOURNIERed66c292013-11-28 11:05:19 +0100309}
310
311/* Checks that the length of the pattern in <test> is included between min and max */
Willy Tarreau0cba6072013-11-28 22:21:02 +0100312enum pat_match_res pat_match_len(struct sample *smp, struct pattern *pattern)
Thierry FOURNIERed66c292013-11-28 11:05:19 +0100313{
314 if ((!pattern->val.range.min_set || pattern->val.range.min <= smp->data.str.len) &&
315 (!pattern->val.range.max_set || smp->data.str.len <= pattern->val.range.max))
Thierry FOURNIERa65b3432013-11-28 18:22:00 +0100316 return PAT_MATCH;
317 return PAT_NOMATCH;
Thierry FOURNIERed66c292013-11-28 11:05:19 +0100318}
319
Willy Tarreau0cba6072013-11-28 22:21:02 +0100320enum pat_match_res pat_match_ip(struct sample *smp, struct pattern *pattern)
Thierry FOURNIERed66c292013-11-28 11:05:19 +0100321{
322 unsigned int v4; /* in network byte order */
323 struct in6_addr *v6;
324 int bits, pos;
325 struct in6_addr tmp6;
326
327 if (pattern->type == SMP_T_IPV4) {
328 if (smp->type == SMP_T_IPV4) {
329 v4 = smp->data.ipv4.s_addr;
330 }
331 else if (smp->type == SMP_T_IPV6) {
332 /* v4 match on a V6 sample. We want to check at least for
333 * the following forms :
334 * - ::ffff:ip:v4 (ipv4 mapped)
335 * - ::0000:ip:v4 (old ipv4 mapped)
336 * - 2002:ip:v4:: (6to4)
337 */
338 if (*(uint32_t*)&smp->data.ipv6.s6_addr[0] == 0 &&
339 *(uint32_t*)&smp->data.ipv6.s6_addr[4] == 0 &&
340 (*(uint32_t*)&smp->data.ipv6.s6_addr[8] == 0 ||
341 *(uint32_t*)&smp->data.ipv6.s6_addr[8] == htonl(0xFFFF))) {
342 v4 = *(uint32_t*)&smp->data.ipv6.s6_addr[12];
343 }
344 else if (*(uint16_t*)&smp->data.ipv6.s6_addr[0] == htons(0x2002)) {
345 v4 = htonl((ntohs(*(uint16_t*)&smp->data.ipv6.s6_addr[2]) << 16) +
346 ntohs(*(uint16_t*)&smp->data.ipv6.s6_addr[4]));
347 }
348 else
Thierry FOURNIERa65b3432013-11-28 18:22:00 +0100349 return PAT_NOMATCH;
Thierry FOURNIERed66c292013-11-28 11:05:19 +0100350 }
351 else
Thierry FOURNIERa65b3432013-11-28 18:22:00 +0100352 return PAT_NOMATCH;
Thierry FOURNIERed66c292013-11-28 11:05:19 +0100353
354 if (((v4 ^ pattern->val.ipv4.addr.s_addr) & pattern->val.ipv4.mask.s_addr) == 0)
Thierry FOURNIERa65b3432013-11-28 18:22:00 +0100355 return PAT_MATCH;
Thierry FOURNIERed66c292013-11-28 11:05:19 +0100356 else
Thierry FOURNIERa65b3432013-11-28 18:22:00 +0100357 return PAT_NOMATCH;
Thierry FOURNIERed66c292013-11-28 11:05:19 +0100358 }
359 else if (pattern->type == SMP_T_IPV6) {
360 if (smp->type == SMP_T_IPV4) {
361 /* Convert the IPv4 sample address to IPv4 with the
362 * mapping method using the ::ffff: prefix.
363 */
364 memset(&tmp6, 0, 10);
365 *(uint16_t*)&tmp6.s6_addr[10] = htons(0xffff);
366 *(uint32_t*)&tmp6.s6_addr[12] = smp->data.ipv4.s_addr;
367 v6 = &tmp6;
368 }
369 else if (smp->type == SMP_T_IPV6) {
370 v6 = &smp->data.ipv6;
371 }
372 else {
Thierry FOURNIERa65b3432013-11-28 18:22:00 +0100373 return PAT_NOMATCH;
Thierry FOURNIERed66c292013-11-28 11:05:19 +0100374 }
375
376 bits = pattern->val.ipv6.mask;
377 for (pos = 0; bits > 0; pos += 4, bits -= 32) {
378 v4 = *(uint32_t*)&v6->s6_addr[pos] ^ *(uint32_t*)&pattern->val.ipv6.addr.s6_addr[pos];
379 if (bits < 32)
380 v4 &= htonl((~0U) << (32-bits));
381 if (v4)
Thierry FOURNIERa65b3432013-11-28 18:22:00 +0100382 return PAT_NOMATCH;
Thierry FOURNIERed66c292013-11-28 11:05:19 +0100383 }
Thierry FOURNIERa65b3432013-11-28 18:22:00 +0100384 return PAT_MATCH;
Thierry FOURNIERed66c292013-11-28 11:05:19 +0100385 }
Thierry FOURNIERa65b3432013-11-28 18:22:00 +0100386 return PAT_NOMATCH;
Thierry FOURNIERed66c292013-11-28 11:05:19 +0100387}
388
389/* Lookup an IPv4 address in the expression's pattern tree using the longest
390 * match method. The node is returned if it exists, otherwise NULL.
391 */
Thierry FOURNIERa65b3432013-11-28 18:22:00 +0100392static void *pat_lookup_ip(struct sample *smp, struct pattern_expr *expr)
Thierry FOURNIERed66c292013-11-28 11:05:19 +0100393{
394 struct in_addr *s;
395
396 if (smp->type != SMP_T_IPV4)
Thierry FOURNIERa65b3432013-11-28 18:22:00 +0100397 return PAT_NOMATCH;
Thierry FOURNIERed66c292013-11-28 11:05:19 +0100398
399 s = &smp->data.ipv4;
400 return ebmb_lookup_longest(&expr->pattern_tree, &s->s_addr);
401}
402
403/* Parse a string. It is allocated and duplicated. */
Thierry FOURNIERa65b3432013-11-28 18:22:00 +0100404int pat_parse_str(const char **text, struct pattern *pattern, struct sample_storage *smp, int *opaque, char **err)
Thierry FOURNIERed66c292013-11-28 11:05:19 +0100405{
406 int len;
407
408 len = strlen(*text);
409 pattern->type = SMP_T_CSTR;
410
Thierry FOURNIERa65b3432013-11-28 18:22:00 +0100411 if (pattern->flags & PAT_F_TREE_OK) {
Thierry FOURNIERed66c292013-11-28 11:05:19 +0100412 /* we're allowed to put the data in a tree whose root is pointed
413 * to by val.tree.
414 */
Thierry FOURNIERa65b3432013-11-28 18:22:00 +0100415 struct pat_idx_elt *node;
Thierry FOURNIERed66c292013-11-28 11:05:19 +0100416
417 node = calloc(1, sizeof(*node) + len + 1);
418 if (!node) {
419 memprintf(err, "out of memory while loading string pattern");
420 return 0;
421 }
422 node->smp = smp;
423 memcpy(node->node.key, *text, len + 1);
424 if (ebst_insert(pattern->val.tree, &node->node) != &node->node)
425 free(node); /* was a duplicate */
Thierry FOURNIERa65b3432013-11-28 18:22:00 +0100426 pattern->flags |= PAT_F_TREE; /* this pattern now contains a tree */
Thierry FOURNIERed66c292013-11-28 11:05:19 +0100427 return 1;
428 }
429
430 pattern->ptr.str = strdup(*text);
431 pattern->smp = smp;
432 if (!pattern->ptr.str) {
433 memprintf(err, "out of memory while loading string pattern");
434 return 0;
435 }
436 pattern->len = len;
437 return 1;
438}
439
440/* Parse a binary written in hexa. It is allocated. */
Thierry FOURNIERa65b3432013-11-28 18:22:00 +0100441int pat_parse_bin(const char **text, struct pattern *pattern, struct sample_storage *smp, int *opaque, char **err)
Thierry FOURNIERed66c292013-11-28 11:05:19 +0100442{
443 int len;
444 const char *p = *text;
445 int i,j;
446
447 len = strlen(p);
448 if (len%2) {
449 memprintf(err, "an even number of hex digit is expected");
450 return 0;
451 }
452
453 pattern->type = SMP_T_CBIN;
454 pattern->len = len >> 1;
455 pattern->ptr.str = malloc(pattern->len);
456 pattern->smp = smp;
457 if (!pattern->ptr.str) {
458 memprintf(err, "out of memory while loading string pattern");
459 return 0;
460 }
461
462 i = j = 0;
463 while (j < pattern->len) {
464 if (!ishex(p[i++]))
465 goto bad_input;
466 if (!ishex(p[i++]))
467 goto bad_input;
468 pattern->ptr.str[j++] = (hex2i(p[i-2]) << 4) + hex2i(p[i-1]);
469 }
470 return 1;
471
472bad_input:
473 memprintf(err, "an hex digit is expected (found '%c')", p[i-1]);
474 free(pattern->ptr.str);
475 return 0;
476}
477
478/* Parse and concatenate all further strings into one. */
479int
Thierry FOURNIERa65b3432013-11-28 18:22:00 +0100480pat_parse_strcat(const char **text, struct pattern *pattern, struct sample_storage *smp, int *opaque, char **err)
Thierry FOURNIERed66c292013-11-28 11:05:19 +0100481{
482
483 int len = 0, i;
484 char *s;
485
486 for (i = 0; *text[i]; i++)
487 len += strlen(text[i])+1;
488
489 pattern->type = SMP_T_CSTR;
490 pattern->ptr.str = s = calloc(1, len);
491 pattern->smp = smp;
492 if (!pattern->ptr.str) {
493 memprintf(err, "out of memory while loading pattern");
494 return 0;
495 }
496
497 for (i = 0; *text[i]; i++)
498 s += sprintf(s, i?" %s":"%s", text[i]);
499
500 pattern->len = len;
501
502 return i;
503}
504
Thierry FOURNIERa65b3432013-11-28 18:22:00 +0100505/* Free data allocated by pat_parse_reg */
506static void pat_free_reg(void *ptr)
Thierry FOURNIERed66c292013-11-28 11:05:19 +0100507{
508 regex_free(ptr);
509}
510
511/* Parse a regex. It is allocated. */
Thierry FOURNIERa65b3432013-11-28 18:22:00 +0100512int pat_parse_reg(const char **text, struct pattern *pattern, struct sample_storage *smp, int *opaque, char **err)
Thierry FOURNIERed66c292013-11-28 11:05:19 +0100513{
514 regex *preg;
515
516 preg = calloc(1, sizeof(*preg));
517
518 if (!preg) {
519 memprintf(err, "out of memory while loading pattern");
520 return 0;
521 }
522
Thierry FOURNIERa65b3432013-11-28 18:22:00 +0100523 if (!regex_comp(*text, preg, !(pattern->flags & PAT_F_IGNORE_CASE), 0, err)) {
Thierry FOURNIERed66c292013-11-28 11:05:19 +0100524 free(preg);
525 return 0;
526 }
527
528 pattern->ptr.reg = preg;
Thierry FOURNIERa65b3432013-11-28 18:22:00 +0100529 pattern->freeptrbuf = &pat_free_reg;
Thierry FOURNIERed66c292013-11-28 11:05:19 +0100530 pattern->smp = smp;
531 return 1;
532}
533
534/* Parse a range of positive integers delimited by either ':' or '-'. If only
535 * one integer is read, it is set as both min and max. An operator may be
536 * specified as the prefix, among this list of 5 :
537 *
538 * 0:eq, 1:gt, 2:ge, 3:lt, 4:le
539 *
540 * The default operator is "eq". It supports range matching. Ranges are
541 * rejected for other operators. The operator may be changed at any time.
542 * The operator is stored in the 'opaque' argument.
543 *
544 * If err is non-NULL, an error message will be returned there on errors and
545 * the caller will have to free it.
546 *
547 */
Thierry FOURNIERa65b3432013-11-28 18:22:00 +0100548int pat_parse_int(const char **text, struct pattern *pattern, struct sample_storage *smp, int *opaque, char **err)
Thierry FOURNIERed66c292013-11-28 11:05:19 +0100549{
550 signed long long i;
551 unsigned int j, last, skip = 0;
552 const char *ptr = *text;
553
554 pattern->type = SMP_T_UINT;
555 pattern->smp = smp;
556 while (!isdigit((unsigned char)*ptr)) {
557 switch (get_std_op(ptr)) {
558 case STD_OP_EQ: *opaque = 0; break;
559 case STD_OP_GT: *opaque = 1; break;
560 case STD_OP_GE: *opaque = 2; break;
561 case STD_OP_LT: *opaque = 3; break;
562 case STD_OP_LE: *opaque = 4; break;
563 default:
564 memprintf(err, "'%s' is neither a number nor a supported operator", ptr);
565 return 0;
566 }
567
568 skip++;
569 ptr = text[skip];
570 }
571
572 last = i = 0;
573 while (1) {
574 j = *ptr++;
575 if ((j == '-' || j == ':') && !last) {
576 last++;
577 pattern->val.range.min = i;
578 i = 0;
579 continue;
580 }
581 j -= '0';
582 if (j > 9)
583 // also catches the terminating zero
584 break;
585 i *= 10;
586 i += j;
587 }
588
589 if (last && *opaque >= 1 && *opaque <= 4) {
590 /* having a range with a min or a max is absurd */
591 memprintf(err, "integer range '%s' specified with a comparison operator", text[skip]);
592 return 0;
593 }
594
595 if (!last)
596 pattern->val.range.min = i;
597 pattern->val.range.max = i;
598
599 switch (*opaque) {
600 case 0: /* eq */
601 pattern->val.range.min_set = 1;
602 pattern->val.range.max_set = 1;
603 break;
604 case 1: /* gt */
605 pattern->val.range.min++; /* gt = ge + 1 */
606 case 2: /* ge */
607 pattern->val.range.min_set = 1;
608 pattern->val.range.max_set = 0;
609 break;
610 case 3: /* lt */
611 pattern->val.range.max--; /* lt = le - 1 */
612 case 4: /* le */
613 pattern->val.range.min_set = 0;
614 pattern->val.range.max_set = 1;
615 break;
616 }
617 return skip + 1;
618}
619
620/* Parse a range of positive 2-component versions delimited by either ':' or
621 * '-'. The version consists in a major and a minor, both of which must be
622 * smaller than 65536, because internally they will be represented as a 32-bit
623 * integer.
624 * If only one version is read, it is set as both min and max. Just like for
625 * pure integers, an operator may be specified as the prefix, among this list
626 * of 5 :
627 *
628 * 0:eq, 1:gt, 2:ge, 3:lt, 4:le
629 *
630 * The default operator is "eq". It supports range matching. Ranges are
631 * rejected for other operators. The operator may be changed at any time.
632 * The operator is stored in the 'opaque' argument. This allows constructs
633 * such as the following one :
634 *
635 * acl obsolete_ssl ssl_req_proto lt 3
636 * acl unsupported_ssl ssl_req_proto gt 3.1
637 * acl valid_ssl ssl_req_proto 3.0-3.1
638 *
639 */
Thierry FOURNIERa65b3432013-11-28 18:22:00 +0100640int pat_parse_dotted_ver(const char **text, struct pattern *pattern, struct sample_storage *smp, int *opaque, char **err)
Thierry FOURNIERed66c292013-11-28 11:05:19 +0100641{
642 signed long long i;
643 unsigned int j, last, skip = 0;
644 const char *ptr = *text;
645
646
647 while (!isdigit((unsigned char)*ptr)) {
648 switch (get_std_op(ptr)) {
649 case STD_OP_EQ: *opaque = 0; break;
650 case STD_OP_GT: *opaque = 1; break;
651 case STD_OP_GE: *opaque = 2; break;
652 case STD_OP_LT: *opaque = 3; break;
653 case STD_OP_LE: *opaque = 4; break;
654 default:
655 memprintf(err, "'%s' is neither a number nor a supported operator", ptr);
656 return 0;
657 }
658
659 skip++;
660 ptr = text[skip];
661 }
662
663 last = i = 0;
664 while (1) {
665 j = *ptr++;
666 if (j == '.') {
667 /* minor part */
668 if (i >= 65536)
669 return 0;
670 i <<= 16;
671 continue;
672 }
673 if ((j == '-' || j == ':') && !last) {
674 last++;
675 if (i < 65536)
676 i <<= 16;
677 pattern->val.range.min = i;
678 i = 0;
679 continue;
680 }
681 j -= '0';
682 if (j > 9)
683 // also catches the terminating zero
684 break;
685 i = (i & 0xFFFF0000) + (i & 0xFFFF) * 10;
686 i += j;
687 }
688
689 /* if we only got a major version, let's shift it now */
690 if (i < 65536)
691 i <<= 16;
692
693 if (last && *opaque >= 1 && *opaque <= 4) {
694 /* having a range with a min or a max is absurd */
695 memprintf(err, "version range '%s' specified with a comparison operator", text[skip]);
696 return 0;
697 }
698
699 pattern->smp = smp;
700
701 if (!last)
702 pattern->val.range.min = i;
703 pattern->val.range.max = i;
704
705 switch (*opaque) {
706 case 0: /* eq */
707 pattern->val.range.min_set = 1;
708 pattern->val.range.max_set = 1;
709 break;
710 case 1: /* gt */
711 pattern->val.range.min++; /* gt = ge + 1 */
712 case 2: /* ge */
713 pattern->val.range.min_set = 1;
714 pattern->val.range.max_set = 0;
715 break;
716 case 3: /* lt */
717 pattern->val.range.max--; /* lt = le - 1 */
718 case 4: /* le */
719 pattern->val.range.min_set = 0;
720 pattern->val.range.max_set = 1;
721 break;
722 }
723 return skip + 1;
724}
725
726/* Parse an IP address and an optional mask in the form addr[/mask].
727 * The addr may either be an IPv4 address or a hostname. The mask
728 * may either be a dotted mask or a number of bits. Returns 1 if OK,
729 * otherwise 0. NOTE: IP address patterns are typed (IPV4/IPV6).
730 */
Thierry FOURNIERa65b3432013-11-28 18:22:00 +0100731int pat_parse_ip(const char **text, struct pattern *pattern, struct sample_storage *smp, int *opaque, char **err)
Thierry FOURNIERed66c292013-11-28 11:05:19 +0100732{
733 struct eb_root *tree = NULL;
Thierry FOURNIERa65b3432013-11-28 18:22:00 +0100734 if (pattern->flags & PAT_F_TREE_OK)
Thierry FOURNIERed66c292013-11-28 11:05:19 +0100735 tree = pattern->val.tree;
736
737 if (str2net(*text, &pattern->val.ipv4.addr, &pattern->val.ipv4.mask)) {
738 unsigned int mask = ntohl(pattern->val.ipv4.mask.s_addr);
Thierry FOURNIERa65b3432013-11-28 18:22:00 +0100739 struct pat_idx_elt *node;
Thierry FOURNIERed66c292013-11-28 11:05:19 +0100740 /* check if the mask is contiguous so that we can insert the
741 * network into the tree. A continuous mask has only ones on
742 * the left. This means that this mask + its lower bit added
743 * once again is null.
744 */
745 pattern->type = SMP_T_IPV4;
746 if (mask + (mask & -mask) == 0 && tree) {
747 mask = mask ? 33 - flsnz(mask & -mask) : 0; /* equals cidr value */
748 /* FIXME: insert <addr>/<mask> into the tree here */
749 node = calloc(1, sizeof(*node) + 4); /* reserve 4 bytes for IPv4 address */
750 if (!node) {
751 memprintf(err, "out of memory while loading IPv4 pattern");
752 return 0;
753 }
754 node->smp = smp;
755 memcpy(node->node.key, &pattern->val.ipv4.addr, 4); /* network byte order */
756 node->node.node.pfx = mask;
757 if (ebmb_insert_prefix(tree, &node->node, 4) != &node->node)
758 free(node); /* was a duplicate */
Thierry FOURNIERa65b3432013-11-28 18:22:00 +0100759 pattern->flags |= PAT_F_TREE;
Thierry FOURNIERed66c292013-11-28 11:05:19 +0100760 return 1;
761 }
762 return 1;
763 }
764 else if (str62net(*text, &pattern->val.ipv6.addr, &pattern->val.ipv6.mask)) {
765 /* no tree support right now */
766 pattern->type = SMP_T_IPV6;
767 return 1;
768 }
769 else {
770 memprintf(err, "'%s' is not a valid IPv4 or IPv6 address", *text);
771 return 0;
772 }
773}
774
775/* NB: does nothing if <pat> is NULL */
Thierry FOURNIERa65b3432013-11-28 18:22:00 +0100776void pattern_free(struct pattern *pat)
Thierry FOURNIERed66c292013-11-28 11:05:19 +0100777{
778 if (!pat)
779 return;
780
781 if (pat->ptr.ptr) {
782 if (pat->freeptrbuf)
783 pat->freeptrbuf(pat->ptr.ptr);
784
785 free(pat->ptr.ptr);
786 }
787
788 free(pat);
789}
790
791void free_pattern_list(struct list *head)
792{
Thierry FOURNIERa65b3432013-11-28 18:22:00 +0100793 struct pattern *pat, *tmp;
Thierry FOURNIERed66c292013-11-28 11:05:19 +0100794 list_for_each_entry_safe(pat, tmp, head, list)
Thierry FOURNIERa65b3432013-11-28 18:22:00 +0100795 pattern_free(pat);
Thierry FOURNIERed66c292013-11-28 11:05:19 +0100796}
797
798void free_pattern_tree(struct eb_root *root)
799{
800 struct eb_node *node, *next;
801 node = eb_first(root);
802 while (node) {
803 next = eb_next(node);
804 eb_delete(node);
805 free(node);
806 node = next;
807 }
808}
809
Thierry FOURNIERa65b3432013-11-28 18:22:00 +0100810void pattern_prune_expr(struct pattern_expr *expr)
Thierry FOURNIERd163e1c2013-11-28 11:41:23 +0100811{
812 free_pattern_list(&expr->patterns);
813 free_pattern_tree(&expr->pattern_tree);
814 LIST_INIT(&expr->patterns);
815}
816
Thierry FOURNIERa65b3432013-11-28 18:22:00 +0100817void pattern_init_expr(struct pattern_expr *expr)
Thierry FOURNIERd163e1c2013-11-28 11:41:23 +0100818{
819 LIST_INIT(&expr->patterns);
820 expr->pattern_tree = EB_ROOT_UNIQUE;
821}
822
Thierry FOURNIERed66c292013-11-28 11:05:19 +0100823/* return 1 if the process is ok
824 * return -1 if the parser fail. The err message is filled.
825 * return -2 if out of memory
826 */
Thierry FOURNIERa65b3432013-11-28 18:22:00 +0100827int pattern_register(struct pattern_expr *expr, char *text,
Thierry FOURNIERed66c292013-11-28 11:05:19 +0100828 struct sample_storage *smp,
Thierry FOURNIERa65b3432013-11-28 18:22:00 +0100829 struct pattern **pattern,
Thierry FOURNIERed66c292013-11-28 11:05:19 +0100830 int patflags, char **err)
831{
832 const char *args[2];
833 int opaque = 0;
834
835 args[0] = text;
836 args[1] = "";
837
838 /* we keep the previous pattern along iterations as long as it's not used */
839 if (!*pattern)
Thierry FOURNIERa65b3432013-11-28 18:22:00 +0100840 *pattern = (struct pattern *)malloc(sizeof(**pattern));
Thierry FOURNIERed66c292013-11-28 11:05:19 +0100841 if (!*pattern)
842 return -1;
843
844 memset(*pattern, 0, sizeof(**pattern));
845 (*pattern)->flags = patflags;
846
Thierry FOURNIERa65b3432013-11-28 18:22:00 +0100847 if (!((*pattern)->flags & PAT_F_IGNORE_CASE) &&
848 (expr->match == pat_match_str || expr->match == pat_match_ip)) {
Thierry FOURNIERed66c292013-11-28 11:05:19 +0100849 /* we pre-set the data pointer to the tree's head so that functions
850 * which are able to insert in a tree know where to do that.
851 */
Thierry FOURNIERa65b3432013-11-28 18:22:00 +0100852 (*pattern)->flags |= PAT_F_TREE_OK;
Thierry FOURNIERed66c292013-11-28 11:05:19 +0100853 (*pattern)->val.tree = &expr->pattern_tree;
854 }
855
856 (*pattern)->type = SMP_TYPES; /* unspecified type by default */
857 if (!expr->parse(args, *pattern, smp, &opaque, err))
858 return -1;
859
860 /* if the parser did not feed the tree, let's chain the pattern to the list */
Thierry FOURNIERa65b3432013-11-28 18:22:00 +0100861 if (!((*pattern)->flags & PAT_F_TREE)) {
Thierry FOURNIERed66c292013-11-28 11:05:19 +0100862 LIST_ADDQ(&expr->patterns, &(*pattern)->list);
863 *pattern = NULL; /* get a new one */
864 }
865
866 return 1;
867}
868
869/* Reads patterns from a file. If <err_msg> is non-NULL, an error message will
870 * be returned there on errors and the caller will have to free it.
871 */
Thierry FOURNIERa65b3432013-11-28 18:22:00 +0100872int pattern_read_from_file(struct pattern_expr *expr,
Thierry FOURNIERed66c292013-11-28 11:05:19 +0100873 const char *filename, int patflags,
874 char **err)
875{
876 FILE *file;
877 char *c;
878 char *arg;
Thierry FOURNIERa65b3432013-11-28 18:22:00 +0100879 struct pattern *pattern;
Thierry FOURNIERed66c292013-11-28 11:05:19 +0100880 int ret = 0;
881 int line = 0;
882 int code;
883
884 file = fopen(filename, "r");
885 if (!file) {
886 memprintf(err, "failed to open pattern file <%s>", filename);
887 return 0;
888 }
889
890 /* now parse all patterns. The file may contain only one pattern per
891 * line. If the line contains spaces, they will be part of the pattern.
892 * The pattern stops at the first CR, LF or EOF encountered.
893 */
894 pattern = NULL;
895 while (fgets(trash.str, trash.size, file) != NULL) {
896 line++;
897 c = trash.str;
898
899 /* ignore lines beginning with a dash */
900 if (*c == '#')
901 continue;
902
903 /* strip leading spaces and tabs */
904 while (*c == ' ' || *c == '\t')
905 c++;
906
907
908 arg = c;
909 while (*c && *c != '\n' && *c != '\r')
910 c++;
911 *c = 0;
912
913 /* empty lines are ignored too */
914 if (c == arg)
915 continue;
916
Thierry FOURNIERa65b3432013-11-28 18:22:00 +0100917 code = pattern_register(expr, arg, NULL, &pattern, patflags, err);
Thierry FOURNIERed66c292013-11-28 11:05:19 +0100918 if (code == -2) {
919 memprintf(err, "out of memory when loading patterns from file <%s>", filename);
920 goto out_close;
921 }
922 else if (code < 0) {
923 memprintf(err, "%s when loading patterns from file <%s>", *err, filename);
924 goto out_free_pattern;
925 }
926 }
927
928 ret = 1; /* success */
929
930 out_free_pattern:
Thierry FOURNIERa65b3432013-11-28 18:22:00 +0100931 pattern_free(pattern);
Thierry FOURNIERed66c292013-11-28 11:05:19 +0100932 out_close:
933 fclose(file);
934 return ret;
935}
936
Thierry FOURNIERa65b3432013-11-28 18:22:00 +0100937/* This function matches a sample <smp> against a set of patterns presented in
938 * pattern expression <expr>. Upon success, if <sample> is not NULL, it is fed
939 * with the pointer associated with the matching pattern. This function returns
940 * PAT_NOMATCH or PAT_MATCH.
Thierry FOURNIERed66c292013-11-28 11:05:19 +0100941 */
Willy Tarreau0cba6072013-11-28 22:21:02 +0100942enum pat_match_res pattern_exec_match(struct pattern_expr *expr, struct sample *smp,
943 struct sample_storage **sample)
Thierry FOURNIERed66c292013-11-28 11:05:19 +0100944{
Willy Tarreau0cba6072013-11-28 22:21:02 +0100945 enum pat_match_res pat_res = PAT_NOMATCH;
Thierry FOURNIERa65b3432013-11-28 18:22:00 +0100946 struct pattern *pattern;
Thierry FOURNIERed66c292013-11-28 11:05:19 +0100947 struct ebmb_node *node = NULL;
Thierry FOURNIERa65b3432013-11-28 18:22:00 +0100948 struct pat_idx_elt *elt;
Thierry FOURNIERed66c292013-11-28 11:05:19 +0100949
Thierry FOURNIERa65b3432013-11-28 18:22:00 +0100950 if (expr->match == pat_match_nothing) {
Thierry FOURNIERed66c292013-11-28 11:05:19 +0100951 if (smp->data.uint)
Thierry FOURNIERa65b3432013-11-28 18:22:00 +0100952 pat_res |= PAT_MATCH;
Thierry FOURNIERed66c292013-11-28 11:05:19 +0100953 else
Thierry FOURNIERa65b3432013-11-28 18:22:00 +0100954 pat_res |= PAT_NOMATCH;
Thierry FOURNIERed66c292013-11-28 11:05:19 +0100955 }
956 else if (!expr->match) {
957 /* just check for existence */
Thierry FOURNIERa65b3432013-11-28 18:22:00 +0100958 pat_res |= PAT_MATCH;
Thierry FOURNIERed66c292013-11-28 11:05:19 +0100959 }
960 else {
961 if (!eb_is_empty(&expr->pattern_tree)) {
962 /* a tree is present, let's check what type it is */
Thierry FOURNIERa65b3432013-11-28 18:22:00 +0100963 if (expr->match == pat_match_str)
964 node = pat_lookup_str(smp, expr);
965 else if (expr->match == pat_match_ip)
966 node = pat_lookup_ip(smp, expr);
Thierry FOURNIERed66c292013-11-28 11:05:19 +0100967 if (node) {
Thierry FOURNIERa65b3432013-11-28 18:22:00 +0100968 pat_res |= PAT_MATCH;
969 elt = ebmb_entry(node, struct pat_idx_elt, node);
Thierry FOURNIERed66c292013-11-28 11:05:19 +0100970 if (sample)
971 *sample = elt->smp;
972 }
973 }
974
975 /* call the match() function for all tests on this value */
976 list_for_each_entry(pattern, &expr->patterns, list) {
Thierry FOURNIERa65b3432013-11-28 18:22:00 +0100977 if (pat_res == PAT_MATCH)
Thierry FOURNIERed66c292013-11-28 11:05:19 +0100978 break;
Thierry FOURNIERa65b3432013-11-28 18:22:00 +0100979 pat_res |= expr->match(smp, pattern);
Thierry FOURNIERed66c292013-11-28 11:05:19 +0100980 if (sample)
981 *sample = pattern->smp;
982 }
983 }
984
Thierry FOURNIERa65b3432013-11-28 18:22:00 +0100985 return pat_res;
Thierry FOURNIERed66c292013-11-28 11:05:19 +0100986}
987