Willy Tarreau | baaee00 | 2006-06-26 02:48:02 +0200 | [diff] [blame] | 1 | /* |
Willy Tarreau | f4f0412 | 2010-01-28 18:10:50 +0100 | [diff] [blame] | 2 | * include/common/regex.h |
| 3 | * This file defines everything related to regular expressions. |
| 4 | * |
| 5 | * Copyright (C) 2000-2010 Willy Tarreau - w@1wt.eu |
| 6 | * |
| 7 | * This library is free software; you can redistribute it and/or |
| 8 | * modify it under the terms of the GNU Lesser General Public |
| 9 | * License as published by the Free Software Foundation, version 2.1 |
| 10 | * exclusively. |
| 11 | * |
| 12 | * This library is distributed in the hope that it will be useful, |
| 13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
| 15 | * Lesser General Public License for more details. |
| 16 | * |
| 17 | * You should have received a copy of the GNU Lesser General Public |
| 18 | * License along with this library; if not, write to the Free Software |
| 19 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
| 20 | */ |
Willy Tarreau | baaee00 | 2006-06-26 02:48:02 +0200 | [diff] [blame] | 21 | |
Willy Tarreau | 2dd0d47 | 2006-06-29 17:53:05 +0200 | [diff] [blame] | 22 | #ifndef _COMMON_REGEX_H |
| 23 | #define _COMMON_REGEX_H |
Willy Tarreau | baaee00 | 2006-06-26 02:48:02 +0200 | [diff] [blame] | 24 | |
Thierry FOURNIER | e28f1ec | 2013-10-09 15:23:01 +0200 | [diff] [blame] | 25 | #include <stdlib.h> |
Thierry FOURNIER | ec9a58c | 2015-11-26 19:33:54 +0100 | [diff] [blame] | 26 | #include <string.h> |
Thierry FOURNIER | e28f1ec | 2013-10-09 15:23:01 +0200 | [diff] [blame] | 27 | |
Willy Tarreau | e3ba5f0 | 2006-06-29 18:54:54 +0200 | [diff] [blame] | 28 | #include <common/config.h> |
Willy Tarreau | 5778fea | 2020-05-09 09:08:09 +0200 | [diff] [blame] | 29 | #include <common/hathreads.h> |
Willy Tarreau | baaee00 | 2006-06-26 02:48:02 +0200 | [diff] [blame] | 30 | |
| 31 | #ifdef USE_PCRE |
| 32 | #include <pcre.h> |
| 33 | #include <pcreposix.h> |
Christian Ruppert | de89871 | 2014-11-18 13:03:58 +0100 | [diff] [blame] | 34 | |
| 35 | /* For pre-8.20 PCRE compatibility */ |
| 36 | #ifndef PCRE_STUDY_JIT_COMPILE |
| 37 | #define PCRE_STUDY_JIT_COMPILE 0 |
| 38 | #endif |
| 39 | |
David Carlier | f2592b2 | 2016-11-21 21:25:58 +0000 | [diff] [blame] | 40 | #elif USE_PCRE2 |
| 41 | #include <pcre2.h> |
| 42 | #include <pcre2posix.h> |
| 43 | |
| 44 | #else /* no PCRE, nor PCRE2 */ |
Thierry FOURNIER | 799c042 | 2013-12-06 20:36:20 +0100 | [diff] [blame] | 45 | #include <regex.h> |
| 46 | #endif |
Hiroaki Nakamura | 7035132 | 2013-01-13 15:00:42 +0900 | [diff] [blame] | 47 | |
Thierry FOURNIER | 799c042 | 2013-12-06 20:36:20 +0100 | [diff] [blame] | 48 | struct my_regex { |
| 49 | #ifdef USE_PCRE |
Thierry FOURNIER | 2620276 | 2014-06-18 11:50:51 +0200 | [diff] [blame] | 50 | pcre *reg; |
| 51 | pcre_extra *extra; |
Hiroaki Nakamura | 7035132 | 2013-01-13 15:00:42 +0900 | [diff] [blame] | 52 | #ifdef USE_PCRE_JIT |
Thierry FOURNIER | ed5a4ae | 2013-10-14 14:07:36 +0200 | [diff] [blame] | 53 | #ifndef PCRE_CONFIG_JIT |
| 54 | #error "The PCRE lib doesn't support JIT. Change your lib, or remove the option USE_PCRE_JIT." |
| 55 | #endif |
Hiroaki Nakamura | 7035132 | 2013-01-13 15:00:42 +0900 | [diff] [blame] | 56 | #endif |
David Carlier | f2592b2 | 2016-11-21 21:25:58 +0000 | [diff] [blame] | 57 | #elif USE_PCRE2 |
| 58 | pcre2_code *reg; |
Hiroaki Nakamura | 7035132 | 2013-01-13 15:00:42 +0900 | [diff] [blame] | 59 | #else /* no PCRE */ |
Thierry FOURNIER | 799c042 | 2013-12-06 20:36:20 +0100 | [diff] [blame] | 60 | regex_t regex; |
Willy Tarreau | baaee00 | 2006-06-26 02:48:02 +0200 | [diff] [blame] | 61 | #endif |
Thierry FOURNIER | 799c042 | 2013-12-06 20:36:20 +0100 | [diff] [blame] | 62 | }; |
Willy Tarreau | baaee00 | 2006-06-26 02:48:02 +0200 | [diff] [blame] | 63 | |
| 64 | /* what to do when a header matches a regex */ |
| 65 | #define ACT_ALLOW 0 /* allow the request */ |
| 66 | #define ACT_REPLACE 1 /* replace the matching header */ |
| 67 | #define ACT_REMOVE 2 /* remove the matching header */ |
| 68 | #define ACT_DENY 3 /* deny the request */ |
| 69 | #define ACT_PASS 4 /* pass this header without allowing or denying the request */ |
Willy Tarreau | b8750a8 | 2006-09-03 09:56:00 +0200 | [diff] [blame] | 70 | #define ACT_TARPIT 5 /* tarpit the connection matching this request */ |
Willy Tarreau | baaee00 | 2006-06-26 02:48:02 +0200 | [diff] [blame] | 71 | |
| 72 | struct hdr_exp { |
| 73 | struct hdr_exp *next; |
Thierry FOURNIER | 09af0d6 | 2014-06-18 11:35:54 +0200 | [diff] [blame] | 74 | struct my_regex *preg; /* expression to look for */ |
Willy Tarreau | baaee00 | 2006-06-26 02:48:02 +0200 | [diff] [blame] | 75 | int action; /* ACT_ALLOW, ACT_REPLACE, ACT_REMOVE, ACT_DENY */ |
Willy Tarreau | b17916e | 2006-10-15 15:17:57 +0200 | [diff] [blame] | 76 | const char *replace; /* expression to set instead */ |
Willy Tarreau | f4f0412 | 2010-01-28 18:10:50 +0100 | [diff] [blame] | 77 | void *cond; /* a possible condition or NULL */ |
Willy Tarreau | baaee00 | 2006-06-26 02:48:02 +0200 | [diff] [blame] | 78 | }; |
| 79 | |
Emeric Brun | 272e252 | 2017-06-15 11:53:49 +0200 | [diff] [blame] | 80 | extern THREAD_LOCAL regmatch_t pmatch[MAX_MATCH]; |
Willy Tarreau | baaee00 | 2006-06-26 02:48:02 +0200 | [diff] [blame] | 81 | |
Thierry FOURNIER | ed5a4ae | 2013-10-14 14:07:36 +0200 | [diff] [blame] | 82 | /* "str" is the string that contain the regex to compile. |
| 83 | * "regex" is preallocated memory. After the execution of this function, this |
| 84 | * struct contain the compiled regex. |
| 85 | * "cs" is the case sensitive flag. If cs is true, case sensitive is enabled. |
| 86 | * "cap" is capture flag. If cap if true the regex can capture into |
| 87 | * parenthesis strings. |
Ilya Shipitsin | 77e3b4a | 2020-03-10 12:06:11 +0500 | [diff] [blame] | 88 | * "err" is the standard error message pointer. |
Thierry FOURNIER | ed5a4ae | 2013-10-14 14:07:36 +0200 | [diff] [blame] | 89 | * |
Ilya Shipitsin | 77e3b4a | 2020-03-10 12:06:11 +0500 | [diff] [blame] | 90 | * The function return 1 is success case, else return 0 and err is filled. |
Thierry FOURNIER | ed5a4ae | 2013-10-14 14:07:36 +0200 | [diff] [blame] | 91 | */ |
Dragan Dosen | 2674303 | 2019-04-30 15:54:36 +0200 | [diff] [blame] | 92 | struct my_regex *regex_comp(const char *str, int cs, int cap, char **err); |
Willy Tarreau | c874653 | 2014-05-28 23:05:07 +0200 | [diff] [blame] | 93 | int exp_replace(char *dst, unsigned int dst_size, char *src, const char *str, const regmatch_t *matches); |
Willy Tarreau | b17916e | 2006-10-15 15:17:57 +0200 | [diff] [blame] | 94 | const char *check_replace_string(const char *str); |
Thierry FOURNIER | 09af0d6 | 2014-06-18 11:35:54 +0200 | [diff] [blame] | 95 | const char *chain_regex(struct hdr_exp **head, struct my_regex *preg, |
Willy Tarreau | f4f0412 | 2010-01-28 18:10:50 +0100 | [diff] [blame] | 96 | int action, const char *replace, void *cond); |
Willy Tarreau | baaee00 | 2006-06-26 02:48:02 +0200 | [diff] [blame] | 97 | |
Thierry FOURNIER | b8f980c | 2014-06-11 13:59:05 +0200 | [diff] [blame] | 98 | /* If the function doesn't match, it returns false, else it returns true. |
| 99 | */ |
| 100 | static inline int regex_exec(const struct my_regex *preg, char *subject) { |
Thierry FOURNIER | 2620276 | 2014-06-18 11:50:51 +0200 | [diff] [blame] | 101 | #if defined(USE_PCRE) || defined(USE_PCRE_JIT) |
Thierry FOURNIER | b8f980c | 2014-06-11 13:59:05 +0200 | [diff] [blame] | 102 | if (pcre_exec(preg->reg, preg->extra, subject, strlen(subject), 0, 0, NULL, 0) < 0) |
| 103 | return 0; |
| 104 | return 1; |
David Carlier | f2592b2 | 2016-11-21 21:25:58 +0000 | [diff] [blame] | 105 | #elif defined(USE_PCRE2) |
| 106 | pcre2_match_data *pm; |
| 107 | int ret; |
| 108 | |
| 109 | pm = pcre2_match_data_create_from_pattern(preg->reg, NULL); |
| 110 | ret = pcre2_match(preg->reg, (PCRE2_SPTR)subject, (PCRE2_SIZE)strlen(subject), |
| 111 | 0, 0, pm, NULL); |
| 112 | pcre2_match_data_free(pm); |
| 113 | if (ret < 0) |
| 114 | return 0; |
| 115 | return 1; |
Thierry FOURNIER | b8f980c | 2014-06-11 13:59:05 +0200 | [diff] [blame] | 116 | #else |
| 117 | int match; |
| 118 | match = regexec(&preg->regex, subject, 0, NULL, 0); |
| 119 | if (match == REG_NOMATCH) |
| 120 | return 0; |
| 121 | return 1; |
| 122 | #endif |
| 123 | } |
| 124 | |
Thierry FOURNIER | ef37a66 | 2013-10-15 13:41:44 +0200 | [diff] [blame] | 125 | /* Note that <subject> MUST be at least <length+1> characters long and must |
| 126 | * be writable because the function will temporarily force a zero past the |
| 127 | * last character. |
Thierry FOURNIER | b8f980c | 2014-06-11 13:59:05 +0200 | [diff] [blame] | 128 | * |
| 129 | * If the function doesn't match, it returns false, else it returns true. |
Thierry FOURNIER | ef37a66 | 2013-10-15 13:41:44 +0200 | [diff] [blame] | 130 | */ |
Thierry FOURNIER | b8f980c | 2014-06-11 13:59:05 +0200 | [diff] [blame] | 131 | static inline int regex_exec2(const struct my_regex *preg, char *subject, int length) { |
Thierry FOURNIER | 2620276 | 2014-06-18 11:50:51 +0200 | [diff] [blame] | 132 | #if defined(USE_PCRE) || defined(USE_PCRE_JIT) |
Thierry FOURNIER | b8f980c | 2014-06-11 13:59:05 +0200 | [diff] [blame] | 133 | if (pcre_exec(preg->reg, preg->extra, subject, length, 0, 0, NULL, 0) < 0) |
| 134 | return 0; |
| 135 | return 1; |
David Carlier | f2592b2 | 2016-11-21 21:25:58 +0000 | [diff] [blame] | 136 | #elif defined(USE_PCRE2) |
| 137 | pcre2_match_data *pm; |
| 138 | int ret; |
| 139 | |
| 140 | pm = pcre2_match_data_create_from_pattern(preg->reg, NULL); |
| 141 | ret = pcre2_match(preg->reg, (PCRE2_SPTR)subject, (PCRE2_SIZE)length, |
| 142 | 0, 0, pm, NULL); |
| 143 | pcre2_match_data_free(pm); |
| 144 | if (ret < 0) |
| 145 | return 0; |
| 146 | return 1; |
Hiroaki Nakamura | 7035132 | 2013-01-13 15:00:42 +0900 | [diff] [blame] | 147 | #else |
Thierry FOURNIER | ef37a66 | 2013-10-15 13:41:44 +0200 | [diff] [blame] | 148 | int match; |
| 149 | char old_char = subject[length]; |
| 150 | subject[length] = 0; |
Thierry FOURNIER | 799c042 | 2013-12-06 20:36:20 +0100 | [diff] [blame] | 151 | match = regexec(&preg->regex, subject, 0, NULL, 0); |
Thierry FOURNIER | ef37a66 | 2013-10-15 13:41:44 +0200 | [diff] [blame] | 152 | subject[length] = old_char; |
Thierry FOURNIER | b8f980c | 2014-06-11 13:59:05 +0200 | [diff] [blame] | 153 | if (match == REG_NOMATCH) |
| 154 | return 0; |
| 155 | return 1; |
Hiroaki Nakamura | 7035132 | 2013-01-13 15:00:42 +0900 | [diff] [blame] | 156 | #endif |
| 157 | } |
| 158 | |
Thierry FOURNIER | b8f980c | 2014-06-11 13:59:05 +0200 | [diff] [blame] | 159 | int regex_exec_match(const struct my_regex *preg, const char *subject, |
Willy Tarreau | 15a53a4 | 2015-01-21 13:39:42 +0100 | [diff] [blame] | 160 | size_t nmatch, regmatch_t pmatch[], int flags); |
Thierry FOURNIER | b8f980c | 2014-06-11 13:59:05 +0200 | [diff] [blame] | 161 | int regex_exec_match2(const struct my_regex *preg, char *subject, int length, |
Willy Tarreau | 15a53a4 | 2015-01-21 13:39:42 +0100 | [diff] [blame] | 162 | size_t nmatch, regmatch_t pmatch[], int flags); |
Thierry FOURNIER | b8f980c | 2014-06-11 13:59:05 +0200 | [diff] [blame] | 163 | |
Thierry FOURNIER | 799c042 | 2013-12-06 20:36:20 +0100 | [diff] [blame] | 164 | static inline void regex_free(struct my_regex *preg) { |
Dragan Dosen | 2674303 | 2019-04-30 15:54:36 +0200 | [diff] [blame] | 165 | if (!preg) |
| 166 | return; |
Thierry FOURNIER | 2620276 | 2014-06-18 11:50:51 +0200 | [diff] [blame] | 167 | #if defined(USE_PCRE) || defined(USE_PCRE_JIT) |
| 168 | pcre_free(preg->reg); |
Christian Ruppert | de89871 | 2014-11-18 13:03:58 +0100 | [diff] [blame] | 169 | /* PCRE < 8.20 requires pcre_free() while >= 8.20 requires pcre_study_free(), |
| 170 | * which is easily detected using PCRE_CONFIG_JIT. |
| 171 | */ |
| 172 | #ifdef PCRE_CONFIG_JIT |
Hiroaki Nakamura | 7035132 | 2013-01-13 15:00:42 +0900 | [diff] [blame] | 173 | pcre_free_study(preg->extra); |
Christian Ruppert | de89871 | 2014-11-18 13:03:58 +0100 | [diff] [blame] | 174 | #else /* PCRE_CONFIG_JIT */ |
| 175 | pcre_free(preg->extra); |
| 176 | #endif /* PCRE_CONFIG_JIT */ |
David Carlier | f2592b2 | 2016-11-21 21:25:58 +0000 | [diff] [blame] | 177 | #elif defined(USE_PCRE2) || defined(USE_PCRE2_JIT) |
| 178 | pcre2_code_free(preg->reg); |
Hiroaki Nakamura | 7035132 | 2013-01-13 15:00:42 +0900 | [diff] [blame] | 179 | #else |
Thierry FOURNIER | 799c042 | 2013-12-06 20:36:20 +0100 | [diff] [blame] | 180 | regfree(&preg->regex); |
Hiroaki Nakamura | 7035132 | 2013-01-13 15:00:42 +0900 | [diff] [blame] | 181 | #endif |
Dragan Dosen | 2674303 | 2019-04-30 15:54:36 +0200 | [diff] [blame] | 182 | free(preg); |
Hiroaki Nakamura | 7035132 | 2013-01-13 15:00:42 +0900 | [diff] [blame] | 183 | } |
| 184 | |
Willy Tarreau | 2dd0d47 | 2006-06-29 17:53:05 +0200 | [diff] [blame] | 185 | #endif /* _COMMON_REGEX_H */ |
Willy Tarreau | baaee00 | 2006-06-26 02:48:02 +0200 | [diff] [blame] | 186 | |
| 187 | /* |
| 188 | * Local variables: |
| 189 | * c-indent-level: 8 |
| 190 | * c-basic-offset: 8 |
| 191 | * End: |
| 192 | */ |