blob: aef2a00537286d190be1915a126e2defd4c93def [file] [log] [blame]
Thierry FOURNIERed66c292013-11-28 11:05:19 +01001/*
2 * Pattern management functions.
3 *
4 * Copyright 2000-2013 Willy Tarreau <w@1wt.eu>
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 *
11 */
12
13#include <ctype.h>
14#include <stdio.h>
15
16#include <common/config.h>
17#include <common/standard.h>
18
19#include <types/global.h>
20#include <types/pattern.h>
21
22#include <proto/pattern.h>
23
24#include <ebsttree.h>
25
Thierry FOURNIERa65b3432013-11-28 18:22:00 +010026char *pat_match_names[PAT_MATCH_NUM] = {
27 [PAT_MATCH_FOUND] = "found",
28 [PAT_MATCH_BOOL] = "bool",
29 [PAT_MATCH_INT] = "int",
30 [PAT_MATCH_IP] = "ip",
31 [PAT_MATCH_BIN] = "bin",
32 [PAT_MATCH_LEN] = "len",
33 [PAT_MATCH_STR] = "str",
34 [PAT_MATCH_BEG] = "beg",
35 [PAT_MATCH_SUB] = "sub",
36 [PAT_MATCH_DIR] = "dir",
37 [PAT_MATCH_DOM] = "dom",
38 [PAT_MATCH_END] = "end",
39 [PAT_MATCH_REG] = "reg",
Thierry FOURNIERed66c292013-11-28 11:05:19 +010040};
41
Thierry FOURNIERa65b3432013-11-28 18:22:00 +010042int (*pat_parse_fcts[PAT_MATCH_NUM])(const char **, struct pattern *, struct sample_storage *, int *, char **) = {
43 [PAT_MATCH_FOUND] = pat_parse_nothing,
44 [PAT_MATCH_BOOL] = pat_parse_nothing,
45 [PAT_MATCH_INT] = pat_parse_int,
46 [PAT_MATCH_IP] = pat_parse_ip,
47 [PAT_MATCH_BIN] = pat_parse_bin,
48 [PAT_MATCH_LEN] = pat_parse_int,
49 [PAT_MATCH_STR] = pat_parse_str,
50 [PAT_MATCH_BEG] = pat_parse_str,
51 [PAT_MATCH_SUB] = pat_parse_str,
52 [PAT_MATCH_DIR] = pat_parse_str,
53 [PAT_MATCH_DOM] = pat_parse_str,
54 [PAT_MATCH_END] = pat_parse_str,
55 [PAT_MATCH_REG] = pat_parse_reg,
Thierry FOURNIERed66c292013-11-28 11:05:19 +010056};
57
Willy Tarreau0cba6072013-11-28 22:21:02 +010058enum pat_match_res (*pat_match_fcts[PAT_MATCH_NUM])(struct sample *, struct pattern *) = {
Thierry FOURNIERa65b3432013-11-28 18:22:00 +010059 [PAT_MATCH_FOUND] = NULL,
60 [PAT_MATCH_BOOL] = pat_match_nothing,
61 [PAT_MATCH_INT] = pat_match_int,
62 [PAT_MATCH_IP] = pat_match_ip,
63 [PAT_MATCH_BIN] = pat_match_bin,
64 [PAT_MATCH_LEN] = pat_match_len,
65 [PAT_MATCH_STR] = pat_match_str,
66 [PAT_MATCH_BEG] = pat_match_beg,
67 [PAT_MATCH_SUB] = pat_match_sub,
68 [PAT_MATCH_DIR] = pat_match_dir,
69 [PAT_MATCH_DOM] = pat_match_dom,
70 [PAT_MATCH_END] = pat_match_end,
71 [PAT_MATCH_REG] = pat_match_reg,
Thierry FOURNIERed66c292013-11-28 11:05:19 +010072};
73
74/*
75 * These functions are exported and may be used by any other component.
76 */
77
78/* ignore the current line */
Thierry FOURNIERa65b3432013-11-28 18:22:00 +010079int pat_parse_nothing(const char **text, struct pattern *pattern, struct sample_storage *smp, int *opaque, char **err)
Thierry FOURNIERed66c292013-11-28 11:05:19 +010080{
81 return 1;
82}
83
84/* always return false */
Willy Tarreau0cba6072013-11-28 22:21:02 +010085enum pat_match_res pat_match_nothing(struct sample *smp, struct pattern *pattern)
Thierry FOURNIERed66c292013-11-28 11:05:19 +010086{
Thierry FOURNIERa65b3432013-11-28 18:22:00 +010087 return PAT_NOMATCH;
Thierry FOURNIERed66c292013-11-28 11:05:19 +010088}
89
90
91/* NB: For two strings to be identical, it is required that their lengths match */
Willy Tarreau0cba6072013-11-28 22:21:02 +010092enum pat_match_res pat_match_str(struct sample *smp, struct pattern *pattern)
Thierry FOURNIERed66c292013-11-28 11:05:19 +010093{
94 int icase;
95
96 if (pattern->len != smp->data.str.len)
Thierry FOURNIERa65b3432013-11-28 18:22:00 +010097 return PAT_NOMATCH;
Thierry FOURNIERed66c292013-11-28 11:05:19 +010098
Thierry FOURNIERa65b3432013-11-28 18:22:00 +010099 icase = pattern->flags & PAT_F_IGNORE_CASE;
Thierry FOURNIERed66c292013-11-28 11:05:19 +0100100 if ((icase && strncasecmp(pattern->ptr.str, smp->data.str.str, smp->data.str.len) == 0) ||
101 (!icase && strncmp(pattern->ptr.str, smp->data.str.str, smp->data.str.len) == 0))
Thierry FOURNIERa65b3432013-11-28 18:22:00 +0100102 return PAT_MATCH;
103 return PAT_NOMATCH;
Thierry FOURNIERed66c292013-11-28 11:05:19 +0100104}
105
106/* NB: For two binaries buf to be identical, it is required that their lengths match */
Willy Tarreau0cba6072013-11-28 22:21:02 +0100107enum pat_match_res pat_match_bin(struct sample *smp, struct pattern *pattern)
Thierry FOURNIERed66c292013-11-28 11:05:19 +0100108{
109 if (pattern->len != smp->data.str.len)
Thierry FOURNIERa65b3432013-11-28 18:22:00 +0100110 return PAT_NOMATCH;
Thierry FOURNIERed66c292013-11-28 11:05:19 +0100111
112 if (memcmp(pattern->ptr.str, smp->data.str.str, smp->data.str.len) == 0)
Thierry FOURNIERa65b3432013-11-28 18:22:00 +0100113 return PAT_MATCH;
114 return PAT_NOMATCH;
Thierry FOURNIERed66c292013-11-28 11:05:19 +0100115}
116
117/* Lookup a string in the expression's pattern tree. The node is returned if it
118 * exists, otherwise NULL.
119 */
Thierry FOURNIERa65b3432013-11-28 18:22:00 +0100120static void *pat_lookup_str(struct sample *smp, struct pattern_expr *expr)
Thierry FOURNIERed66c292013-11-28 11:05:19 +0100121{
122 /* data are stored in a tree */
123 struct ebmb_node *node;
124 char prev;
125
126 /* we may have to force a trailing zero on the test pattern */
127 prev = smp->data.str.str[smp->data.str.len];
128 if (prev)
129 smp->data.str.str[smp->data.str.len] = '\0';
130 node = ebst_lookup(&expr->pattern_tree, smp->data.str.str);
131 if (prev)
132 smp->data.str.str[smp->data.str.len] = prev;
133 return node;
134}
135
136/* Executes a regex. It temporarily changes the data to add a trailing zero,
137 * and restores the previous character when leaving.
138 */
Willy Tarreau0cba6072013-11-28 22:21:02 +0100139enum pat_match_res pat_match_reg(struct sample *smp, struct pattern *pattern)
Thierry FOURNIERed66c292013-11-28 11:05:19 +0100140{
141 if (regex_exec(pattern->ptr.reg, smp->data.str.str, smp->data.str.len) == 0)
Thierry FOURNIERa65b3432013-11-28 18:22:00 +0100142 return PAT_MATCH;
143 return PAT_NOMATCH;
Thierry FOURNIERed66c292013-11-28 11:05:19 +0100144}
145
146/* Checks that the pattern matches the beginning of the tested string. */
Willy Tarreau0cba6072013-11-28 22:21:02 +0100147enum pat_match_res pat_match_beg(struct sample *smp, struct pattern *pattern)
Thierry FOURNIERed66c292013-11-28 11:05:19 +0100148{
149 int icase;
150
151 if (pattern->len > smp->data.str.len)
Thierry FOURNIERa65b3432013-11-28 18:22:00 +0100152 return PAT_NOMATCH;
Thierry FOURNIERed66c292013-11-28 11:05:19 +0100153
Thierry FOURNIERa65b3432013-11-28 18:22:00 +0100154 icase = pattern->flags & PAT_F_IGNORE_CASE;
Thierry FOURNIERed66c292013-11-28 11:05:19 +0100155 if ((icase && strncasecmp(pattern->ptr.str, smp->data.str.str, pattern->len) != 0) ||
156 (!icase && strncmp(pattern->ptr.str, smp->data.str.str, pattern->len) != 0))
Thierry FOURNIERa65b3432013-11-28 18:22:00 +0100157 return PAT_NOMATCH;
158 return PAT_MATCH;
Thierry FOURNIERed66c292013-11-28 11:05:19 +0100159}
160
161/* Checks that the pattern matches the end of the tested string. */
Willy Tarreau0cba6072013-11-28 22:21:02 +0100162enum pat_match_res pat_match_end(struct sample *smp, struct pattern *pattern)
Thierry FOURNIERed66c292013-11-28 11:05:19 +0100163{
164 int icase;
165
166 if (pattern->len > smp->data.str.len)
Thierry FOURNIERa65b3432013-11-28 18:22:00 +0100167 return PAT_NOMATCH;
168 icase = pattern->flags & PAT_F_IGNORE_CASE;
Thierry FOURNIERed66c292013-11-28 11:05:19 +0100169 if ((icase && strncasecmp(pattern->ptr.str, smp->data.str.str + smp->data.str.len - pattern->len, pattern->len) != 0) ||
170 (!icase && strncmp(pattern->ptr.str, smp->data.str.str + smp->data.str.len - pattern->len, pattern->len) != 0))
Thierry FOURNIERa65b3432013-11-28 18:22:00 +0100171 return PAT_NOMATCH;
172 return PAT_MATCH;
Thierry FOURNIERed66c292013-11-28 11:05:19 +0100173}
174
175/* Checks that the pattern is included inside the tested string.
176 * NB: Suboptimal, should be rewritten using a Boyer-Moore method.
177 */
Willy Tarreau0cba6072013-11-28 22:21:02 +0100178enum pat_match_res pat_match_sub(struct sample *smp, struct pattern *pattern)
Thierry FOURNIERed66c292013-11-28 11:05:19 +0100179{
180 int icase;
181 char *end;
182 char *c;
183
184 if (pattern->len > smp->data.str.len)
Thierry FOURNIERa65b3432013-11-28 18:22:00 +0100185 return PAT_NOMATCH;
Thierry FOURNIERed66c292013-11-28 11:05:19 +0100186
187 end = smp->data.str.str + smp->data.str.len - pattern->len;
Thierry FOURNIERa65b3432013-11-28 18:22:00 +0100188 icase = pattern->flags & PAT_F_IGNORE_CASE;
Thierry FOURNIERed66c292013-11-28 11:05:19 +0100189 if (icase) {
190 for (c = smp->data.str.str; c <= end; c++) {
191 if (tolower(*c) != tolower(*pattern->ptr.str))
192 continue;
193 if (strncasecmp(pattern->ptr.str, c, pattern->len) == 0)
Thierry FOURNIERa65b3432013-11-28 18:22:00 +0100194 return PAT_MATCH;
Thierry FOURNIERed66c292013-11-28 11:05:19 +0100195 }
196 } else {
197 for (c = smp->data.str.str; c <= end; c++) {
198 if (*c != *pattern->ptr.str)
199 continue;
200 if (strncmp(pattern->ptr.str, c, pattern->len) == 0)
Thierry FOURNIERa65b3432013-11-28 18:22:00 +0100201 return PAT_MATCH;
Thierry FOURNIERed66c292013-11-28 11:05:19 +0100202 }
203 }
Thierry FOURNIERa65b3432013-11-28 18:22:00 +0100204 return PAT_NOMATCH;
Thierry FOURNIERed66c292013-11-28 11:05:19 +0100205}
206
207/* Background: Fast way to find a zero byte in a word
208 * http://graphics.stanford.edu/~seander/bithacks.html#ZeroInWord
209 * hasZeroByte = (v - 0x01010101UL) & ~v & 0x80808080UL;
210 *
211 * To look for 4 different byte values, xor the word with those bytes and
212 * then check for zero bytes:
213 *
214 * v = (((unsigned char)c * 0x1010101U) ^ delimiter)
215 * where <delimiter> is the 4 byte values to look for (as an uint)
216 * and <c> is the character that is being tested
217 */
218static inline unsigned int is_delimiter(unsigned char c, unsigned int mask)
219{
220 mask ^= (c * 0x01010101); /* propagate the char to all 4 bytes */
221 return (mask - 0x01010101) & ~mask & 0x80808080U;
222}
223
224static inline unsigned int make_4delim(unsigned char d1, unsigned char d2, unsigned char d3, unsigned char d4)
225{
226 return d1 << 24 | d2 << 16 | d3 << 8 | d4;
227}
228
229/* This one is used by other real functions. It checks that the pattern is
230 * included inside the tested string, but enclosed between the specified
231 * delimiters or at the beginning or end of the string. The delimiters are
232 * provided as an unsigned int made by make_4delim() and match up to 4 different
233 * delimiters. Delimiters are stripped at the beginning and end of the pattern.
234 */
Thierry FOURNIERa65b3432013-11-28 18:22:00 +0100235static int match_word(struct sample *smp, struct pattern *pattern, unsigned int delimiters)
Thierry FOURNIERed66c292013-11-28 11:05:19 +0100236{
237 int may_match, icase;
238 char *c, *end;
239 char *ps;
240 int pl;
241
242 pl = pattern->len;
243 ps = pattern->ptr.str;
244
245 while (pl > 0 && is_delimiter(*ps, delimiters)) {
246 pl--;
247 ps++;
248 }
249
250 while (pl > 0 && is_delimiter(ps[pl - 1], delimiters))
251 pl--;
252
253 if (pl > smp->data.str.len)
Thierry FOURNIERa65b3432013-11-28 18:22:00 +0100254 return PAT_NOMATCH;
Thierry FOURNIERed66c292013-11-28 11:05:19 +0100255
256 may_match = 1;
Thierry FOURNIERa65b3432013-11-28 18:22:00 +0100257 icase = pattern->flags & PAT_F_IGNORE_CASE;
Thierry FOURNIERed66c292013-11-28 11:05:19 +0100258 end = smp->data.str.str + smp->data.str.len - pl;
259 for (c = smp->data.str.str; c <= end; c++) {
260 if (is_delimiter(*c, delimiters)) {
261 may_match = 1;
262 continue;
263 }
264
265 if (!may_match)
266 continue;
267
268 if (icase) {
269 if ((tolower(*c) == tolower(*ps)) &&
270 (strncasecmp(ps, c, pl) == 0) &&
271 (c == end || is_delimiter(c[pl], delimiters)))
Thierry FOURNIERa65b3432013-11-28 18:22:00 +0100272 return PAT_MATCH;
Thierry FOURNIERed66c292013-11-28 11:05:19 +0100273 } else {
274 if ((*c == *ps) &&
275 (strncmp(ps, c, pl) == 0) &&
276 (c == end || is_delimiter(c[pl], delimiters)))
Thierry FOURNIERa65b3432013-11-28 18:22:00 +0100277 return PAT_MATCH;
Thierry FOURNIERed66c292013-11-28 11:05:19 +0100278 }
279 may_match = 0;
280 }
Thierry FOURNIERa65b3432013-11-28 18:22:00 +0100281 return PAT_NOMATCH;
Thierry FOURNIERed66c292013-11-28 11:05:19 +0100282}
283
284/* Checks that the pattern is included inside the tested string, but enclosed
285 * between the delimiters '?' or '/' or at the beginning or end of the string.
286 * Delimiters at the beginning or end of the pattern are ignored.
287 */
Willy Tarreau0cba6072013-11-28 22:21:02 +0100288enum pat_match_res pat_match_dir(struct sample *smp, struct pattern *pattern)
Thierry FOURNIERed66c292013-11-28 11:05:19 +0100289{
290 return match_word(smp, pattern, make_4delim('/', '?', '?', '?'));
291}
292
293/* Checks that the pattern is included inside the tested string, but enclosed
294 * between the delmiters '/', '?', '.' or ":" or at the beginning or end of
295 * the string. Delimiters at the beginning or end of the pattern are ignored.
296 */
Willy Tarreau0cba6072013-11-28 22:21:02 +0100297enum pat_match_res pat_match_dom(struct sample *smp, struct pattern *pattern)
Thierry FOURNIERed66c292013-11-28 11:05:19 +0100298{
299 return match_word(smp, pattern, make_4delim('/', '?', '.', ':'));
300}
301
302/* Checks that the integer in <test> is included between min and max */
Willy Tarreau0cba6072013-11-28 22:21:02 +0100303enum pat_match_res pat_match_int(struct sample *smp, struct pattern *pattern)
Thierry FOURNIERed66c292013-11-28 11:05:19 +0100304{
305 if ((!pattern->val.range.min_set || pattern->val.range.min <= smp->data.uint) &&
306 (!pattern->val.range.max_set || smp->data.uint <= pattern->val.range.max))
Thierry FOURNIERa65b3432013-11-28 18:22:00 +0100307 return PAT_MATCH;
308 return PAT_NOMATCH;
Thierry FOURNIERed66c292013-11-28 11:05:19 +0100309}
310
311/* Checks that the length of the pattern in <test> is included between min and max */
Willy Tarreau0cba6072013-11-28 22:21:02 +0100312enum pat_match_res pat_match_len(struct sample *smp, struct pattern *pattern)
Thierry FOURNIERed66c292013-11-28 11:05:19 +0100313{
314 if ((!pattern->val.range.min_set || pattern->val.range.min <= smp->data.str.len) &&
315 (!pattern->val.range.max_set || smp->data.str.len <= pattern->val.range.max))
Thierry FOURNIERa65b3432013-11-28 18:22:00 +0100316 return PAT_MATCH;
317 return PAT_NOMATCH;
Thierry FOURNIERed66c292013-11-28 11:05:19 +0100318}
319
Willy Tarreau0cba6072013-11-28 22:21:02 +0100320enum pat_match_res pat_match_ip(struct sample *smp, struct pattern *pattern)
Thierry FOURNIERed66c292013-11-28 11:05:19 +0100321{
322 unsigned int v4; /* in network byte order */
323 struct in6_addr *v6;
324 int bits, pos;
325 struct in6_addr tmp6;
326
327 if (pattern->type == SMP_T_IPV4) {
328 if (smp->type == SMP_T_IPV4) {
329 v4 = smp->data.ipv4.s_addr;
330 }
331 else if (smp->type == SMP_T_IPV6) {
332 /* v4 match on a V6 sample. We want to check at least for
333 * the following forms :
334 * - ::ffff:ip:v4 (ipv4 mapped)
335 * - ::0000:ip:v4 (old ipv4 mapped)
336 * - 2002:ip:v4:: (6to4)
337 */
338 if (*(uint32_t*)&smp->data.ipv6.s6_addr[0] == 0 &&
339 *(uint32_t*)&smp->data.ipv6.s6_addr[4] == 0 &&
340 (*(uint32_t*)&smp->data.ipv6.s6_addr[8] == 0 ||
341 *(uint32_t*)&smp->data.ipv6.s6_addr[8] == htonl(0xFFFF))) {
342 v4 = *(uint32_t*)&smp->data.ipv6.s6_addr[12];
343 }
344 else if (*(uint16_t*)&smp->data.ipv6.s6_addr[0] == htons(0x2002)) {
345 v4 = htonl((ntohs(*(uint16_t*)&smp->data.ipv6.s6_addr[2]) << 16) +
346 ntohs(*(uint16_t*)&smp->data.ipv6.s6_addr[4]));
347 }
348 else
Thierry FOURNIERa65b3432013-11-28 18:22:00 +0100349 return PAT_NOMATCH;
Thierry FOURNIERed66c292013-11-28 11:05:19 +0100350 }
351 else
Thierry FOURNIERa65b3432013-11-28 18:22:00 +0100352 return PAT_NOMATCH;
Thierry FOURNIERed66c292013-11-28 11:05:19 +0100353
354 if (((v4 ^ pattern->val.ipv4.addr.s_addr) & pattern->val.ipv4.mask.s_addr) == 0)
Thierry FOURNIERa65b3432013-11-28 18:22:00 +0100355 return PAT_MATCH;
Thierry FOURNIERed66c292013-11-28 11:05:19 +0100356 else
Thierry FOURNIERa65b3432013-11-28 18:22:00 +0100357 return PAT_NOMATCH;
Thierry FOURNIERed66c292013-11-28 11:05:19 +0100358 }
359 else if (pattern->type == SMP_T_IPV6) {
360 if (smp->type == SMP_T_IPV4) {
361 /* Convert the IPv4 sample address to IPv4 with the
362 * mapping method using the ::ffff: prefix.
363 */
364 memset(&tmp6, 0, 10);
365 *(uint16_t*)&tmp6.s6_addr[10] = htons(0xffff);
366 *(uint32_t*)&tmp6.s6_addr[12] = smp->data.ipv4.s_addr;
367 v6 = &tmp6;
368 }
369 else if (smp->type == SMP_T_IPV6) {
370 v6 = &smp->data.ipv6;
371 }
372 else {
Thierry FOURNIERa65b3432013-11-28 18:22:00 +0100373 return PAT_NOMATCH;
Thierry FOURNIERed66c292013-11-28 11:05:19 +0100374 }
375
376 bits = pattern->val.ipv6.mask;
377 for (pos = 0; bits > 0; pos += 4, bits -= 32) {
378 v4 = *(uint32_t*)&v6->s6_addr[pos] ^ *(uint32_t*)&pattern->val.ipv6.addr.s6_addr[pos];
379 if (bits < 32)
380 v4 &= htonl((~0U) << (32-bits));
381 if (v4)
Thierry FOURNIERa65b3432013-11-28 18:22:00 +0100382 return PAT_NOMATCH;
Thierry FOURNIERed66c292013-11-28 11:05:19 +0100383 }
Thierry FOURNIERa65b3432013-11-28 18:22:00 +0100384 return PAT_MATCH;
Thierry FOURNIERed66c292013-11-28 11:05:19 +0100385 }
Thierry FOURNIERa65b3432013-11-28 18:22:00 +0100386 return PAT_NOMATCH;
Thierry FOURNIERed66c292013-11-28 11:05:19 +0100387}
388
389/* Lookup an IPv4 address in the expression's pattern tree using the longest
390 * match method. The node is returned if it exists, otherwise NULL.
391 */
Thierry FOURNIERa65b3432013-11-28 18:22:00 +0100392static void *pat_lookup_ip(struct sample *smp, struct pattern_expr *expr)
Thierry FOURNIERed66c292013-11-28 11:05:19 +0100393{
394 struct in_addr *s;
395
396 if (smp->type != SMP_T_IPV4)
Thierry FOURNIERa65b3432013-11-28 18:22:00 +0100397 return PAT_NOMATCH;
Thierry FOURNIERed66c292013-11-28 11:05:19 +0100398
399 s = &smp->data.ipv4;
400 return ebmb_lookup_longest(&expr->pattern_tree, &s->s_addr);
401}
402
403/* Parse a string. It is allocated and duplicated. */
Thierry FOURNIERa65b3432013-11-28 18:22:00 +0100404int pat_parse_str(const char **text, struct pattern *pattern, struct sample_storage *smp, int *opaque, char **err)
Thierry FOURNIERed66c292013-11-28 11:05:19 +0100405{
406 int len;
407
408 len = strlen(*text);
409 pattern->type = SMP_T_CSTR;
410
Thierry FOURNIERa65b3432013-11-28 18:22:00 +0100411 if (pattern->flags & PAT_F_TREE_OK) {
Thierry FOURNIERed66c292013-11-28 11:05:19 +0100412 /* we're allowed to put the data in a tree whose root is pointed
413 * to by val.tree.
414 */
Thierry FOURNIERa65b3432013-11-28 18:22:00 +0100415 struct pat_idx_elt *node;
Thierry FOURNIERed66c292013-11-28 11:05:19 +0100416
417 node = calloc(1, sizeof(*node) + len + 1);
418 if (!node) {
419 memprintf(err, "out of memory while loading string pattern");
420 return 0;
421 }
422 node->smp = smp;
423 memcpy(node->node.key, *text, len + 1);
424 if (ebst_insert(pattern->val.tree, &node->node) != &node->node)
425 free(node); /* was a duplicate */
Thierry FOURNIERa65b3432013-11-28 18:22:00 +0100426 pattern->flags |= PAT_F_TREE; /* this pattern now contains a tree */
Thierry FOURNIERed66c292013-11-28 11:05:19 +0100427 return 1;
428 }
429
430 pattern->ptr.str = strdup(*text);
431 pattern->smp = smp;
432 if (!pattern->ptr.str) {
433 memprintf(err, "out of memory while loading string pattern");
434 return 0;
435 }
436 pattern->len = len;
437 return 1;
438}
439
440/* Parse a binary written in hexa. It is allocated. */
Thierry FOURNIERa65b3432013-11-28 18:22:00 +0100441int pat_parse_bin(const char **text, struct pattern *pattern, struct sample_storage *smp, int *opaque, char **err)
Thierry FOURNIERed66c292013-11-28 11:05:19 +0100442{
Thierry FOURNIERed66c292013-11-28 11:05:19 +0100443 pattern->type = SMP_T_CBIN;
Thierry FOURNIERed66c292013-11-28 11:05:19 +0100444 pattern->smp = smp;
Thierry FOURNIERed66c292013-11-28 11:05:19 +0100445
Willy Tarreau126d4062013-12-03 17:50:47 +0100446 return parse_binary(*text, &pattern->ptr.str, &pattern->len, err);
Thierry FOURNIERed66c292013-11-28 11:05:19 +0100447}
448
449/* Parse and concatenate all further strings into one. */
450int
Thierry FOURNIERa65b3432013-11-28 18:22:00 +0100451pat_parse_strcat(const char **text, struct pattern *pattern, struct sample_storage *smp, int *opaque, char **err)
Thierry FOURNIERed66c292013-11-28 11:05:19 +0100452{
453
454 int len = 0, i;
455 char *s;
456
457 for (i = 0; *text[i]; i++)
458 len += strlen(text[i])+1;
459
460 pattern->type = SMP_T_CSTR;
461 pattern->ptr.str = s = calloc(1, len);
462 pattern->smp = smp;
463 if (!pattern->ptr.str) {
464 memprintf(err, "out of memory while loading pattern");
465 return 0;
466 }
467
468 for (i = 0; *text[i]; i++)
469 s += sprintf(s, i?" %s":"%s", text[i]);
470
471 pattern->len = len;
472
473 return i;
474}
475
Thierry FOURNIERa65b3432013-11-28 18:22:00 +0100476/* Free data allocated by pat_parse_reg */
477static void pat_free_reg(void *ptr)
Thierry FOURNIERed66c292013-11-28 11:05:19 +0100478{
479 regex_free(ptr);
480}
481
482/* Parse a regex. It is allocated. */
Thierry FOURNIERa65b3432013-11-28 18:22:00 +0100483int pat_parse_reg(const char **text, struct pattern *pattern, struct sample_storage *smp, int *opaque, char **err)
Thierry FOURNIERed66c292013-11-28 11:05:19 +0100484{
485 regex *preg;
486
487 preg = calloc(1, sizeof(*preg));
488
489 if (!preg) {
490 memprintf(err, "out of memory while loading pattern");
491 return 0;
492 }
493
Thierry FOURNIERa65b3432013-11-28 18:22:00 +0100494 if (!regex_comp(*text, preg, !(pattern->flags & PAT_F_IGNORE_CASE), 0, err)) {
Thierry FOURNIERed66c292013-11-28 11:05:19 +0100495 free(preg);
496 return 0;
497 }
498
499 pattern->ptr.reg = preg;
Thierry FOURNIERa65b3432013-11-28 18:22:00 +0100500 pattern->freeptrbuf = &pat_free_reg;
Thierry FOURNIERed66c292013-11-28 11:05:19 +0100501 pattern->smp = smp;
502 return 1;
503}
504
505/* Parse a range of positive integers delimited by either ':' or '-'. If only
506 * one integer is read, it is set as both min and max. An operator may be
507 * specified as the prefix, among this list of 5 :
508 *
509 * 0:eq, 1:gt, 2:ge, 3:lt, 4:le
510 *
511 * The default operator is "eq". It supports range matching. Ranges are
512 * rejected for other operators. The operator may be changed at any time.
513 * The operator is stored in the 'opaque' argument.
514 *
515 * If err is non-NULL, an error message will be returned there on errors and
516 * the caller will have to free it.
517 *
518 */
Thierry FOURNIERa65b3432013-11-28 18:22:00 +0100519int pat_parse_int(const char **text, struct pattern *pattern, struct sample_storage *smp, int *opaque, char **err)
Thierry FOURNIERed66c292013-11-28 11:05:19 +0100520{
521 signed long long i;
522 unsigned int j, last, skip = 0;
523 const char *ptr = *text;
524
525 pattern->type = SMP_T_UINT;
526 pattern->smp = smp;
527 while (!isdigit((unsigned char)*ptr)) {
528 switch (get_std_op(ptr)) {
529 case STD_OP_EQ: *opaque = 0; break;
530 case STD_OP_GT: *opaque = 1; break;
531 case STD_OP_GE: *opaque = 2; break;
532 case STD_OP_LT: *opaque = 3; break;
533 case STD_OP_LE: *opaque = 4; break;
534 default:
535 memprintf(err, "'%s' is neither a number nor a supported operator", ptr);
536 return 0;
537 }
538
539 skip++;
540 ptr = text[skip];
541 }
542
543 last = i = 0;
544 while (1) {
545 j = *ptr++;
546 if ((j == '-' || j == ':') && !last) {
547 last++;
548 pattern->val.range.min = i;
549 i = 0;
550 continue;
551 }
552 j -= '0';
553 if (j > 9)
554 // also catches the terminating zero
555 break;
556 i *= 10;
557 i += j;
558 }
559
560 if (last && *opaque >= 1 && *opaque <= 4) {
561 /* having a range with a min or a max is absurd */
562 memprintf(err, "integer range '%s' specified with a comparison operator", text[skip]);
563 return 0;
564 }
565
566 if (!last)
567 pattern->val.range.min = i;
568 pattern->val.range.max = i;
569
570 switch (*opaque) {
571 case 0: /* eq */
572 pattern->val.range.min_set = 1;
573 pattern->val.range.max_set = 1;
574 break;
575 case 1: /* gt */
576 pattern->val.range.min++; /* gt = ge + 1 */
577 case 2: /* ge */
578 pattern->val.range.min_set = 1;
579 pattern->val.range.max_set = 0;
580 break;
581 case 3: /* lt */
582 pattern->val.range.max--; /* lt = le - 1 */
583 case 4: /* le */
584 pattern->val.range.min_set = 0;
585 pattern->val.range.max_set = 1;
586 break;
587 }
588 return skip + 1;
589}
590
591/* Parse a range of positive 2-component versions delimited by either ':' or
592 * '-'. The version consists in a major and a minor, both of which must be
593 * smaller than 65536, because internally they will be represented as a 32-bit
594 * integer.
595 * If only one version is read, it is set as both min and max. Just like for
596 * pure integers, an operator may be specified as the prefix, among this list
597 * of 5 :
598 *
599 * 0:eq, 1:gt, 2:ge, 3:lt, 4:le
600 *
601 * The default operator is "eq". It supports range matching. Ranges are
602 * rejected for other operators. The operator may be changed at any time.
603 * The operator is stored in the 'opaque' argument. This allows constructs
604 * such as the following one :
605 *
606 * acl obsolete_ssl ssl_req_proto lt 3
607 * acl unsupported_ssl ssl_req_proto gt 3.1
608 * acl valid_ssl ssl_req_proto 3.0-3.1
609 *
610 */
Thierry FOURNIERa65b3432013-11-28 18:22:00 +0100611int pat_parse_dotted_ver(const char **text, struct pattern *pattern, struct sample_storage *smp, int *opaque, char **err)
Thierry FOURNIERed66c292013-11-28 11:05:19 +0100612{
613 signed long long i;
614 unsigned int j, last, skip = 0;
615 const char *ptr = *text;
616
617
618 while (!isdigit((unsigned char)*ptr)) {
619 switch (get_std_op(ptr)) {
620 case STD_OP_EQ: *opaque = 0; break;
621 case STD_OP_GT: *opaque = 1; break;
622 case STD_OP_GE: *opaque = 2; break;
623 case STD_OP_LT: *opaque = 3; break;
624 case STD_OP_LE: *opaque = 4; break;
625 default:
626 memprintf(err, "'%s' is neither a number nor a supported operator", ptr);
627 return 0;
628 }
629
630 skip++;
631 ptr = text[skip];
632 }
633
634 last = i = 0;
635 while (1) {
636 j = *ptr++;
637 if (j == '.') {
638 /* minor part */
639 if (i >= 65536)
640 return 0;
641 i <<= 16;
642 continue;
643 }
644 if ((j == '-' || j == ':') && !last) {
645 last++;
646 if (i < 65536)
647 i <<= 16;
648 pattern->val.range.min = i;
649 i = 0;
650 continue;
651 }
652 j -= '0';
653 if (j > 9)
654 // also catches the terminating zero
655 break;
656 i = (i & 0xFFFF0000) + (i & 0xFFFF) * 10;
657 i += j;
658 }
659
660 /* if we only got a major version, let's shift it now */
661 if (i < 65536)
662 i <<= 16;
663
664 if (last && *opaque >= 1 && *opaque <= 4) {
665 /* having a range with a min or a max is absurd */
666 memprintf(err, "version range '%s' specified with a comparison operator", text[skip]);
667 return 0;
668 }
669
670 pattern->smp = smp;
671
672 if (!last)
673 pattern->val.range.min = i;
674 pattern->val.range.max = i;
675
676 switch (*opaque) {
677 case 0: /* eq */
678 pattern->val.range.min_set = 1;
679 pattern->val.range.max_set = 1;
680 break;
681 case 1: /* gt */
682 pattern->val.range.min++; /* gt = ge + 1 */
683 case 2: /* ge */
684 pattern->val.range.min_set = 1;
685 pattern->val.range.max_set = 0;
686 break;
687 case 3: /* lt */
688 pattern->val.range.max--; /* lt = le - 1 */
689 case 4: /* le */
690 pattern->val.range.min_set = 0;
691 pattern->val.range.max_set = 1;
692 break;
693 }
694 return skip + 1;
695}
696
697/* Parse an IP address and an optional mask in the form addr[/mask].
698 * The addr may either be an IPv4 address or a hostname. The mask
699 * may either be a dotted mask or a number of bits. Returns 1 if OK,
700 * otherwise 0. NOTE: IP address patterns are typed (IPV4/IPV6).
701 */
Thierry FOURNIERa65b3432013-11-28 18:22:00 +0100702int pat_parse_ip(const char **text, struct pattern *pattern, struct sample_storage *smp, int *opaque, char **err)
Thierry FOURNIERed66c292013-11-28 11:05:19 +0100703{
704 struct eb_root *tree = NULL;
Thierry FOURNIERa65b3432013-11-28 18:22:00 +0100705 if (pattern->flags & PAT_F_TREE_OK)
Thierry FOURNIERed66c292013-11-28 11:05:19 +0100706 tree = pattern->val.tree;
707
708 if (str2net(*text, &pattern->val.ipv4.addr, &pattern->val.ipv4.mask)) {
709 unsigned int mask = ntohl(pattern->val.ipv4.mask.s_addr);
Thierry FOURNIERa65b3432013-11-28 18:22:00 +0100710 struct pat_idx_elt *node;
Thierry FOURNIERed66c292013-11-28 11:05:19 +0100711 /* check if the mask is contiguous so that we can insert the
712 * network into the tree. A continuous mask has only ones on
713 * the left. This means that this mask + its lower bit added
714 * once again is null.
715 */
716 pattern->type = SMP_T_IPV4;
717 if (mask + (mask & -mask) == 0 && tree) {
718 mask = mask ? 33 - flsnz(mask & -mask) : 0; /* equals cidr value */
719 /* FIXME: insert <addr>/<mask> into the tree here */
720 node = calloc(1, sizeof(*node) + 4); /* reserve 4 bytes for IPv4 address */
721 if (!node) {
722 memprintf(err, "out of memory while loading IPv4 pattern");
723 return 0;
724 }
725 node->smp = smp;
726 memcpy(node->node.key, &pattern->val.ipv4.addr, 4); /* network byte order */
727 node->node.node.pfx = mask;
728 if (ebmb_insert_prefix(tree, &node->node, 4) != &node->node)
729 free(node); /* was a duplicate */
Thierry FOURNIERa65b3432013-11-28 18:22:00 +0100730 pattern->flags |= PAT_F_TREE;
Thierry FOURNIERed66c292013-11-28 11:05:19 +0100731 return 1;
732 }
733 return 1;
734 }
735 else if (str62net(*text, &pattern->val.ipv6.addr, &pattern->val.ipv6.mask)) {
736 /* no tree support right now */
737 pattern->type = SMP_T_IPV6;
738 return 1;
739 }
740 else {
741 memprintf(err, "'%s' is not a valid IPv4 or IPv6 address", *text);
742 return 0;
743 }
744}
745
746/* NB: does nothing if <pat> is NULL */
Thierry FOURNIERa65b3432013-11-28 18:22:00 +0100747void pattern_free(struct pattern *pat)
Thierry FOURNIERed66c292013-11-28 11:05:19 +0100748{
749 if (!pat)
750 return;
751
752 if (pat->ptr.ptr) {
753 if (pat->freeptrbuf)
754 pat->freeptrbuf(pat->ptr.ptr);
755
756 free(pat->ptr.ptr);
757 }
758
759 free(pat);
760}
761
762void free_pattern_list(struct list *head)
763{
Thierry FOURNIERa65b3432013-11-28 18:22:00 +0100764 struct pattern *pat, *tmp;
Thierry FOURNIERed66c292013-11-28 11:05:19 +0100765 list_for_each_entry_safe(pat, tmp, head, list)
Thierry FOURNIERa65b3432013-11-28 18:22:00 +0100766 pattern_free(pat);
Thierry FOURNIERed66c292013-11-28 11:05:19 +0100767}
768
769void free_pattern_tree(struct eb_root *root)
770{
771 struct eb_node *node, *next;
Thierry FOURNIER3ce88c72013-12-09 11:29:46 +0100772 struct pat_idx_elt *elt;
773
Thierry FOURNIERed66c292013-11-28 11:05:19 +0100774 node = eb_first(root);
775 while (node) {
776 next = eb_next(node);
777 eb_delete(node);
Thierry FOURNIER3ce88c72013-12-09 11:29:46 +0100778 elt = container_of(node, struct pat_idx_elt, node);
779 free(elt);
Thierry FOURNIERed66c292013-11-28 11:05:19 +0100780 node = next;
781 }
782}
783
Thierry FOURNIERa65b3432013-11-28 18:22:00 +0100784void pattern_prune_expr(struct pattern_expr *expr)
Thierry FOURNIERd163e1c2013-11-28 11:41:23 +0100785{
786 free_pattern_list(&expr->patterns);
787 free_pattern_tree(&expr->pattern_tree);
788 LIST_INIT(&expr->patterns);
789}
790
Thierry FOURNIERa65b3432013-11-28 18:22:00 +0100791void pattern_init_expr(struct pattern_expr *expr)
Thierry FOURNIERd163e1c2013-11-28 11:41:23 +0100792{
793 LIST_INIT(&expr->patterns);
794 expr->pattern_tree = EB_ROOT_UNIQUE;
795}
796
Thierry FOURNIERed66c292013-11-28 11:05:19 +0100797/* return 1 if the process is ok
798 * return -1 if the parser fail. The err message is filled.
799 * return -2 if out of memory
800 */
Thierry FOURNIERa65b3432013-11-28 18:22:00 +0100801int pattern_register(struct pattern_expr *expr, char *text,
Thierry FOURNIERed66c292013-11-28 11:05:19 +0100802 struct sample_storage *smp,
Thierry FOURNIERa65b3432013-11-28 18:22:00 +0100803 struct pattern **pattern,
Thierry FOURNIERed66c292013-11-28 11:05:19 +0100804 int patflags, char **err)
805{
806 const char *args[2];
807 int opaque = 0;
808
809 args[0] = text;
810 args[1] = "";
811
812 /* we keep the previous pattern along iterations as long as it's not used */
813 if (!*pattern)
Thierry FOURNIERa65b3432013-11-28 18:22:00 +0100814 *pattern = (struct pattern *)malloc(sizeof(**pattern));
Thierry FOURNIERed66c292013-11-28 11:05:19 +0100815 if (!*pattern)
816 return -1;
817
818 memset(*pattern, 0, sizeof(**pattern));
819 (*pattern)->flags = patflags;
820
Thierry FOURNIERa65b3432013-11-28 18:22:00 +0100821 if (!((*pattern)->flags & PAT_F_IGNORE_CASE) &&
822 (expr->match == pat_match_str || expr->match == pat_match_ip)) {
Thierry FOURNIERed66c292013-11-28 11:05:19 +0100823 /* we pre-set the data pointer to the tree's head so that functions
824 * which are able to insert in a tree know where to do that.
825 */
Thierry FOURNIERa65b3432013-11-28 18:22:00 +0100826 (*pattern)->flags |= PAT_F_TREE_OK;
Thierry FOURNIERed66c292013-11-28 11:05:19 +0100827 (*pattern)->val.tree = &expr->pattern_tree;
828 }
829
830 (*pattern)->type = SMP_TYPES; /* unspecified type by default */
831 if (!expr->parse(args, *pattern, smp, &opaque, err))
832 return -1;
833
834 /* if the parser did not feed the tree, let's chain the pattern to the list */
Thierry FOURNIERa65b3432013-11-28 18:22:00 +0100835 if (!((*pattern)->flags & PAT_F_TREE)) {
Thierry FOURNIERed66c292013-11-28 11:05:19 +0100836 LIST_ADDQ(&expr->patterns, &(*pattern)->list);
837 *pattern = NULL; /* get a new one */
838 }
839
840 return 1;
841}
842
843/* Reads patterns from a file. If <err_msg> is non-NULL, an error message will
844 * be returned there on errors and the caller will have to free it.
845 */
Thierry FOURNIERa65b3432013-11-28 18:22:00 +0100846int pattern_read_from_file(struct pattern_expr *expr,
Thierry FOURNIERed66c292013-11-28 11:05:19 +0100847 const char *filename, int patflags,
848 char **err)
849{
850 FILE *file;
851 char *c;
852 char *arg;
Thierry FOURNIERa65b3432013-11-28 18:22:00 +0100853 struct pattern *pattern;
Thierry FOURNIERed66c292013-11-28 11:05:19 +0100854 int ret = 0;
855 int line = 0;
856 int code;
857
858 file = fopen(filename, "r");
859 if (!file) {
860 memprintf(err, "failed to open pattern file <%s>", filename);
861 return 0;
862 }
863
864 /* now parse all patterns. The file may contain only one pattern per
865 * line. If the line contains spaces, they will be part of the pattern.
866 * The pattern stops at the first CR, LF or EOF encountered.
867 */
868 pattern = NULL;
869 while (fgets(trash.str, trash.size, file) != NULL) {
870 line++;
871 c = trash.str;
872
873 /* ignore lines beginning with a dash */
874 if (*c == '#')
875 continue;
876
877 /* strip leading spaces and tabs */
878 while (*c == ' ' || *c == '\t')
879 c++;
880
881
882 arg = c;
883 while (*c && *c != '\n' && *c != '\r')
884 c++;
885 *c = 0;
886
887 /* empty lines are ignored too */
888 if (c == arg)
889 continue;
890
Thierry FOURNIERa65b3432013-11-28 18:22:00 +0100891 code = pattern_register(expr, arg, NULL, &pattern, patflags, err);
Thierry FOURNIERed66c292013-11-28 11:05:19 +0100892 if (code == -2) {
893 memprintf(err, "out of memory when loading patterns from file <%s>", filename);
894 goto out_close;
895 }
896 else if (code < 0) {
897 memprintf(err, "%s when loading patterns from file <%s>", *err, filename);
898 goto out_free_pattern;
899 }
900 }
901
902 ret = 1; /* success */
903
904 out_free_pattern:
Thierry FOURNIERa65b3432013-11-28 18:22:00 +0100905 pattern_free(pattern);
Thierry FOURNIERed66c292013-11-28 11:05:19 +0100906 out_close:
907 fclose(file);
908 return ret;
909}
910
Thierry FOURNIERa65b3432013-11-28 18:22:00 +0100911/* This function matches a sample <smp> against a set of patterns presented in
912 * pattern expression <expr>. Upon success, if <sample> is not NULL, it is fed
913 * with the pointer associated with the matching pattern. This function returns
914 * PAT_NOMATCH or PAT_MATCH.
Thierry FOURNIERed66c292013-11-28 11:05:19 +0100915 */
Willy Tarreau0cba6072013-11-28 22:21:02 +0100916enum pat_match_res pattern_exec_match(struct pattern_expr *expr, struct sample *smp,
917 struct sample_storage **sample)
Thierry FOURNIERed66c292013-11-28 11:05:19 +0100918{
Willy Tarreau0cba6072013-11-28 22:21:02 +0100919 enum pat_match_res pat_res = PAT_NOMATCH;
Thierry FOURNIERa65b3432013-11-28 18:22:00 +0100920 struct pattern *pattern;
Thierry FOURNIERed66c292013-11-28 11:05:19 +0100921 struct ebmb_node *node = NULL;
Thierry FOURNIERa65b3432013-11-28 18:22:00 +0100922 struct pat_idx_elt *elt;
Thierry FOURNIERed66c292013-11-28 11:05:19 +0100923
Thierry FOURNIERa65b3432013-11-28 18:22:00 +0100924 if (expr->match == pat_match_nothing) {
Thierry FOURNIERed66c292013-11-28 11:05:19 +0100925 if (smp->data.uint)
Thierry FOURNIERa65b3432013-11-28 18:22:00 +0100926 pat_res |= PAT_MATCH;
Thierry FOURNIERed66c292013-11-28 11:05:19 +0100927 else
Thierry FOURNIERa65b3432013-11-28 18:22:00 +0100928 pat_res |= PAT_NOMATCH;
Thierry FOURNIERed66c292013-11-28 11:05:19 +0100929 }
930 else if (!expr->match) {
931 /* just check for existence */
Thierry FOURNIERa65b3432013-11-28 18:22:00 +0100932 pat_res |= PAT_MATCH;
Thierry FOURNIERed66c292013-11-28 11:05:19 +0100933 }
934 else {
935 if (!eb_is_empty(&expr->pattern_tree)) {
936 /* a tree is present, let's check what type it is */
Thierry FOURNIERa65b3432013-11-28 18:22:00 +0100937 if (expr->match == pat_match_str)
938 node = pat_lookup_str(smp, expr);
939 else if (expr->match == pat_match_ip)
940 node = pat_lookup_ip(smp, expr);
Thierry FOURNIERed66c292013-11-28 11:05:19 +0100941 if (node) {
Thierry FOURNIERa65b3432013-11-28 18:22:00 +0100942 pat_res |= PAT_MATCH;
943 elt = ebmb_entry(node, struct pat_idx_elt, node);
Thierry FOURNIERed66c292013-11-28 11:05:19 +0100944 if (sample)
945 *sample = elt->smp;
946 }
947 }
948
949 /* call the match() function for all tests on this value */
950 list_for_each_entry(pattern, &expr->patterns, list) {
Thierry FOURNIERa65b3432013-11-28 18:22:00 +0100951 if (pat_res == PAT_MATCH)
Thierry FOURNIERed66c292013-11-28 11:05:19 +0100952 break;
Thierry FOURNIERa65b3432013-11-28 18:22:00 +0100953 pat_res |= expr->match(smp, pattern);
Thierry FOURNIERed66c292013-11-28 11:05:19 +0100954 if (sample)
955 *sample = pattern->smp;
956 }
957 }
958
Thierry FOURNIERa65b3432013-11-28 18:22:00 +0100959 return pat_res;
Thierry FOURNIERed66c292013-11-28 11:05:19 +0100960}
961