blob: d106b8690fc96dd062f42782608936f4ec5e5e4d [file] [log] [blame]
Willy Tarreaubaaee002006-06-26 02:48:02 +02001/*
Willy Tarreauf4f04122010-01-28 18:10:50 +01002 * include/common/regex.h
3 * This file defines everything related to regular expressions.
4 *
5 * Copyright (C) 2000-2010 Willy Tarreau - w@1wt.eu
6 *
7 * This library is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation, version 2.1
10 * exclusively.
11 *
12 * This library is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
16 *
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with this library; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20 */
Willy Tarreaubaaee002006-06-26 02:48:02 +020021
Willy Tarreau2dd0d472006-06-29 17:53:05 +020022#ifndef _COMMON_REGEX_H
23#define _COMMON_REGEX_H
Willy Tarreaubaaee002006-06-26 02:48:02 +020024
Thierry FOURNIERe28f1ec2013-10-09 15:23:01 +020025#include <stdlib.h>
Thierry FOURNIERec9a58c2015-11-26 19:33:54 +010026#include <string.h>
Thierry FOURNIERe28f1ec2013-10-09 15:23:01 +020027
Willy Tarreau4c7e4b72020-05-27 12:58:42 +020028#include <haproxy/api.h>
Willy Tarreau5778fea2020-05-09 09:08:09 +020029#include <common/hathreads.h>
Willy Tarreaubaaee002006-06-26 02:48:02 +020030
31#ifdef USE_PCRE
32#include <pcre.h>
33#include <pcreposix.h>
Christian Ruppertde898712014-11-18 13:03:58 +010034
35/* For pre-8.20 PCRE compatibility */
36#ifndef PCRE_STUDY_JIT_COMPILE
37#define PCRE_STUDY_JIT_COMPILE 0
38#endif
39
David Carlierf2592b22016-11-21 21:25:58 +000040#elif USE_PCRE2
41#include <pcre2.h>
42#include <pcre2posix.h>
43
44#else /* no PCRE, nor PCRE2 */
Thierry FOURNIER799c0422013-12-06 20:36:20 +010045#include <regex.h>
46#endif
Hiroaki Nakamura70351322013-01-13 15:00:42 +090047
Thierry FOURNIER799c0422013-12-06 20:36:20 +010048struct my_regex {
49#ifdef USE_PCRE
Thierry FOURNIER26202762014-06-18 11:50:51 +020050 pcre *reg;
51 pcre_extra *extra;
Hiroaki Nakamura70351322013-01-13 15:00:42 +090052#ifdef USE_PCRE_JIT
Thierry FOURNIERed5a4ae2013-10-14 14:07:36 +020053#ifndef PCRE_CONFIG_JIT
54#error "The PCRE lib doesn't support JIT. Change your lib, or remove the option USE_PCRE_JIT."
55#endif
Hiroaki Nakamura70351322013-01-13 15:00:42 +090056#endif
David Carlierf2592b22016-11-21 21:25:58 +000057#elif USE_PCRE2
58 pcre2_code *reg;
Hiroaki Nakamura70351322013-01-13 15:00:42 +090059#else /* no PCRE */
Thierry FOURNIER799c0422013-12-06 20:36:20 +010060 regex_t regex;
Willy Tarreaubaaee002006-06-26 02:48:02 +020061#endif
Thierry FOURNIER799c0422013-12-06 20:36:20 +010062};
Willy Tarreaubaaee002006-06-26 02:48:02 +020063
Willy Tarreaubaaee002006-06-26 02:48:02 +020064struct hdr_exp {
65 struct hdr_exp *next;
Thierry FOURNIER09af0d62014-06-18 11:35:54 +020066 struct my_regex *preg; /* expression to look for */
Willy Tarreaub17916e2006-10-15 15:17:57 +020067 const char *replace; /* expression to set instead */
Willy Tarreauf4f04122010-01-28 18:10:50 +010068 void *cond; /* a possible condition or NULL */
Willy Tarreaubaaee002006-06-26 02:48:02 +020069};
70
Emeric Brun272e2522017-06-15 11:53:49 +020071extern THREAD_LOCAL regmatch_t pmatch[MAX_MATCH];
Willy Tarreaubaaee002006-06-26 02:48:02 +020072
Thierry FOURNIERed5a4ae2013-10-14 14:07:36 +020073/* "str" is the string that contain the regex to compile.
74 * "regex" is preallocated memory. After the execution of this function, this
75 * struct contain the compiled regex.
76 * "cs" is the case sensitive flag. If cs is true, case sensitive is enabled.
77 * "cap" is capture flag. If cap if true the regex can capture into
78 * parenthesis strings.
Ilya Shipitsin77e3b4a2020-03-10 12:06:11 +050079 * "err" is the standard error message pointer.
Thierry FOURNIERed5a4ae2013-10-14 14:07:36 +020080 *
Ilya Shipitsin77e3b4a2020-03-10 12:06:11 +050081 * The function return 1 is success case, else return 0 and err is filled.
Thierry FOURNIERed5a4ae2013-10-14 14:07:36 +020082 */
Dragan Dosen26743032019-04-30 15:54:36 +020083struct my_regex *regex_comp(const char *str, int cs, int cap, char **err);
Willy Tarreauc8746532014-05-28 23:05:07 +020084int exp_replace(char *dst, unsigned int dst_size, char *src, const char *str, const regmatch_t *matches);
Willy Tarreaub17916e2006-10-15 15:17:57 +020085const char *check_replace_string(const char *str);
Willy Tarreaubaaee002006-06-26 02:48:02 +020086
Thierry FOURNIERb8f980c2014-06-11 13:59:05 +020087/* If the function doesn't match, it returns false, else it returns true.
88 */
89static inline int regex_exec(const struct my_regex *preg, char *subject) {
Thierry FOURNIER26202762014-06-18 11:50:51 +020090#if defined(USE_PCRE) || defined(USE_PCRE_JIT)
Thierry FOURNIERb8f980c2014-06-11 13:59:05 +020091 if (pcre_exec(preg->reg, preg->extra, subject, strlen(subject), 0, 0, NULL, 0) < 0)
92 return 0;
93 return 1;
David Carlierf2592b22016-11-21 21:25:58 +000094#elif defined(USE_PCRE2)
95 pcre2_match_data *pm;
96 int ret;
97
98 pm = pcre2_match_data_create_from_pattern(preg->reg, NULL);
99 ret = pcre2_match(preg->reg, (PCRE2_SPTR)subject, (PCRE2_SIZE)strlen(subject),
100 0, 0, pm, NULL);
101 pcre2_match_data_free(pm);
102 if (ret < 0)
103 return 0;
104 return 1;
Thierry FOURNIERb8f980c2014-06-11 13:59:05 +0200105#else
106 int match;
107 match = regexec(&preg->regex, subject, 0, NULL, 0);
108 if (match == REG_NOMATCH)
109 return 0;
110 return 1;
111#endif
112}
113
Thierry FOURNIERef37a662013-10-15 13:41:44 +0200114/* Note that <subject> MUST be at least <length+1> characters long and must
115 * be writable because the function will temporarily force a zero past the
116 * last character.
Thierry FOURNIERb8f980c2014-06-11 13:59:05 +0200117 *
118 * If the function doesn't match, it returns false, else it returns true.
Thierry FOURNIERef37a662013-10-15 13:41:44 +0200119 */
Thierry FOURNIERb8f980c2014-06-11 13:59:05 +0200120static inline int regex_exec2(const struct my_regex *preg, char *subject, int length) {
Thierry FOURNIER26202762014-06-18 11:50:51 +0200121#if defined(USE_PCRE) || defined(USE_PCRE_JIT)
Thierry FOURNIERb8f980c2014-06-11 13:59:05 +0200122 if (pcre_exec(preg->reg, preg->extra, subject, length, 0, 0, NULL, 0) < 0)
123 return 0;
124 return 1;
David Carlierf2592b22016-11-21 21:25:58 +0000125#elif defined(USE_PCRE2)
126 pcre2_match_data *pm;
127 int ret;
128
129 pm = pcre2_match_data_create_from_pattern(preg->reg, NULL);
130 ret = pcre2_match(preg->reg, (PCRE2_SPTR)subject, (PCRE2_SIZE)length,
131 0, 0, pm, NULL);
132 pcre2_match_data_free(pm);
133 if (ret < 0)
134 return 0;
135 return 1;
Hiroaki Nakamura70351322013-01-13 15:00:42 +0900136#else
Thierry FOURNIERef37a662013-10-15 13:41:44 +0200137 int match;
138 char old_char = subject[length];
139 subject[length] = 0;
Thierry FOURNIER799c0422013-12-06 20:36:20 +0100140 match = regexec(&preg->regex, subject, 0, NULL, 0);
Thierry FOURNIERef37a662013-10-15 13:41:44 +0200141 subject[length] = old_char;
Thierry FOURNIERb8f980c2014-06-11 13:59:05 +0200142 if (match == REG_NOMATCH)
143 return 0;
144 return 1;
Hiroaki Nakamura70351322013-01-13 15:00:42 +0900145#endif
146}
147
Thierry FOURNIERb8f980c2014-06-11 13:59:05 +0200148int regex_exec_match(const struct my_regex *preg, const char *subject,
Willy Tarreau15a53a42015-01-21 13:39:42 +0100149 size_t nmatch, regmatch_t pmatch[], int flags);
Thierry FOURNIERb8f980c2014-06-11 13:59:05 +0200150int regex_exec_match2(const struct my_regex *preg, char *subject, int length,
Willy Tarreau15a53a42015-01-21 13:39:42 +0100151 size_t nmatch, regmatch_t pmatch[], int flags);
Thierry FOURNIERb8f980c2014-06-11 13:59:05 +0200152
Thierry FOURNIER799c0422013-12-06 20:36:20 +0100153static inline void regex_free(struct my_regex *preg) {
Dragan Dosen26743032019-04-30 15:54:36 +0200154 if (!preg)
155 return;
Thierry FOURNIER26202762014-06-18 11:50:51 +0200156#if defined(USE_PCRE) || defined(USE_PCRE_JIT)
157 pcre_free(preg->reg);
Christian Ruppertde898712014-11-18 13:03:58 +0100158/* PCRE < 8.20 requires pcre_free() while >= 8.20 requires pcre_study_free(),
159 * which is easily detected using PCRE_CONFIG_JIT.
160 */
161#ifdef PCRE_CONFIG_JIT
Hiroaki Nakamura70351322013-01-13 15:00:42 +0900162 pcre_free_study(preg->extra);
Christian Ruppertde898712014-11-18 13:03:58 +0100163#else /* PCRE_CONFIG_JIT */
164 pcre_free(preg->extra);
165#endif /* PCRE_CONFIG_JIT */
David Carlierf2592b22016-11-21 21:25:58 +0000166#elif defined(USE_PCRE2) || defined(USE_PCRE2_JIT)
167 pcre2_code_free(preg->reg);
Hiroaki Nakamura70351322013-01-13 15:00:42 +0900168#else
Thierry FOURNIER799c0422013-12-06 20:36:20 +0100169 regfree(&preg->regex);
Hiroaki Nakamura70351322013-01-13 15:00:42 +0900170#endif
Dragan Dosen26743032019-04-30 15:54:36 +0200171 free(preg);
Hiroaki Nakamura70351322013-01-13 15:00:42 +0900172}
173
Willy Tarreau2dd0d472006-06-29 17:53:05 +0200174#endif /* _COMMON_REGEX_H */
Willy Tarreaubaaee002006-06-26 02:48:02 +0200175
176/*
177 * Local variables:
178 * c-indent-level: 8
179 * c-basic-offset: 8
180 * End:
181 */