Wolfgang Denk | 2aceea1 | 2013-03-23 23:50:31 +0000 | [diff] [blame] | 1 | /* |
| 2 | * Copyright (c) 2004-2005 Sergey Lyubka <valenok@gmail.com> |
| 3 | * All rights reserved |
| 4 | * |
| 5 | * "THE BEER-WARE LICENSE" (Revision 42): |
| 6 | * Sergey Lyubka wrote this file. As long as you retain this notice you |
| 7 | * can do whatever you want with this stuff. If we meet some day, and you think |
| 8 | * this stuff is worth it, you can buy me a beer in return. |
| 9 | */ |
| 10 | |
| 11 | /* |
| 12 | * Downloaded Sat Nov 5 17:43:06 CET 2011 at |
| 13 | * http://slre.sourceforge.net/1.0/slre.c |
| 14 | */ |
| 15 | |
| 16 | #ifdef SLRE_TEST |
| 17 | #include <stdio.h> |
| 18 | #include <assert.h> |
| 19 | #include <ctype.h> |
| 20 | #include <stdlib.h> |
| 21 | #include <string.h> |
| 22 | #else |
Simon Glass | 0f2af88 | 2020-05-10 11:40:05 -0600 | [diff] [blame] | 23 | #include <log.h> |
Wolfgang Denk | 2aceea1 | 2013-03-23 23:50:31 +0000 | [diff] [blame] | 24 | #include <linux/ctype.h> |
Tom Rini | a877ce1 | 2023-12-14 13:16:58 -0500 | [diff] [blame] | 25 | #include <linux/string.h> |
Wolfgang Denk | 2aceea1 | 2013-03-23 23:50:31 +0000 | [diff] [blame] | 26 | #endif /* SLRE_TEST */ |
| 27 | |
| 28 | #include <errno.h> |
| 29 | |
| 30 | #include <slre.h> |
| 31 | |
| 32 | enum {END, BRANCH, ANY, EXACT, ANYOF, ANYBUT, OPEN, CLOSE, BOL, EOL, |
| 33 | STAR, PLUS, STARQ, PLUSQ, QUEST, SPACE, NONSPACE, DIGIT}; |
| 34 | |
| 35 | #ifdef SLRE_TEST |
| 36 | static struct { |
| 37 | const char *name; |
| 38 | int narg; |
| 39 | const char *flags; |
| 40 | } opcodes[] = { |
| 41 | {"END", 0, ""}, /* End of code block or program */ |
| 42 | {"BRANCH", 2, "oo"}, /* Alternative operator, "|" */ |
| 43 | {"ANY", 0, ""}, /* Match any character, "." */ |
| 44 | {"EXACT", 2, "d"}, /* Match exact string */ |
| 45 | {"ANYOF", 2, "D"}, /* Match any from set, "[]" */ |
| 46 | {"ANYBUT", 2, "D"}, /* Match any but from set, "[^]"*/ |
| 47 | {"OPEN ", 1, "i"}, /* Capture start, "(" */ |
| 48 | {"CLOSE", 1, "i"}, /* Capture end, ")" */ |
| 49 | {"BOL", 0, ""}, /* Beginning of string, "^" */ |
| 50 | {"EOL", 0, ""}, /* End of string, "$" */ |
| 51 | {"STAR", 1, "o"}, /* Match zero or more times "*" */ |
| 52 | {"PLUS", 1, "o"}, /* Match one or more times, "+" */ |
| 53 | {"STARQ", 1, "o"}, /* Non-greedy STAR, "*?" */ |
| 54 | {"PLUSQ", 1, "o"}, /* Non-greedy PLUS, "+?" */ |
| 55 | {"QUEST", 1, "o"}, /* Match zero or one time, "?" */ |
| 56 | {"SPACE", 0, ""}, /* Match whitespace, "\s" */ |
| 57 | {"NONSPACE", 0, ""}, /* Match non-space, "\S" */ |
| 58 | {"DIGIT", 0, ""} /* Match digit, "\d" */ |
| 59 | }; |
| 60 | #endif /* SLRE_TEST */ |
| 61 | |
| 62 | /* |
| 63 | * Commands and operands are all unsigned char (1 byte long). All code offsets |
| 64 | * are relative to current address, and positive (always point forward). Data |
| 65 | * offsets are absolute. Commands with operands: |
| 66 | * |
| 67 | * BRANCH offset1 offset2 |
| 68 | * Try to match the code block that follows the BRANCH instruction |
| 69 | * (code block ends with END). If no match, try to match code block that |
| 70 | * starts at offset1. If either of these match, jump to offset2. |
| 71 | * |
| 72 | * EXACT data_offset data_length |
| 73 | * Try to match exact string. String is recorded in data section from |
| 74 | * data_offset, and has length data_length. |
| 75 | * |
| 76 | * OPEN capture_number |
| 77 | * CLOSE capture_number |
| 78 | * If the user have passed 'struct cap' array for captures, OPEN |
| 79 | * records the beginning of the matched substring (cap->ptr), CLOSE |
| 80 | * sets the length (cap->len) for respective capture_number. |
| 81 | * |
| 82 | * STAR code_offset |
| 83 | * PLUS code_offset |
| 84 | * QUEST code_offset |
| 85 | * *, +, ?, respectively. Try to gobble as much as possible from the |
| 86 | * matched buffer, until code block that follows these instructions |
| 87 | * matches. When the longest possible string is matched, |
| 88 | * jump to code_offset |
| 89 | * |
| 90 | * STARQ, PLUSQ are non-greedy versions of STAR and PLUS. |
| 91 | */ |
| 92 | |
| 93 | static const char *meta_chars = "|.^$*+?()[\\"; |
| 94 | |
| 95 | #ifdef SLRE_TEST |
| 96 | |
| 97 | static void |
| 98 | print_character_set(FILE *fp, const unsigned char *p, int len) |
| 99 | { |
| 100 | int i; |
| 101 | |
| 102 | for (i = 0; i < len; i++) { |
| 103 | if (i > 0) |
| 104 | (void) fputc(',', fp); |
| 105 | if (p[i] == 0) { |
| 106 | i++; |
| 107 | if (p[i] == 0) |
| 108 | (void) fprintf(fp, "\\x%02x", p[i]); |
| 109 | else |
| 110 | (void) fprintf(fp, "%s", opcodes[p[i]].name); |
| 111 | } else if (isprint(p[i])) { |
| 112 | (void) fputc(p[i], fp); |
| 113 | } else { |
| 114 | (void) fprintf(fp, "\\x%02x", p[i]); |
| 115 | } |
| 116 | } |
| 117 | } |
| 118 | |
| 119 | void |
| 120 | slre_dump(const struct slre *r, FILE *fp) |
| 121 | { |
| 122 | int i, j, ch, op, pc; |
| 123 | |
| 124 | for (pc = 0; pc < r->code_size; pc++) { |
| 125 | |
| 126 | op = r->code[pc]; |
| 127 | (void) fprintf(fp, "%3d %s ", pc, opcodes[op].name); |
| 128 | |
| 129 | for (i = 0; opcodes[op].flags[i] != '\0'; i++) |
| 130 | switch (opcodes[op].flags[i]) { |
| 131 | case 'i': |
| 132 | (void) fprintf(fp, "%d ", r->code[pc + 1]); |
| 133 | pc++; |
| 134 | break; |
| 135 | case 'o': |
| 136 | (void) fprintf(fp, "%d ", |
| 137 | pc + r->code[pc + 1] - i); |
| 138 | pc++; |
| 139 | break; |
| 140 | case 'D': |
| 141 | print_character_set(fp, r->data + |
| 142 | r->code[pc + 1], r->code[pc + 2]); |
| 143 | pc += 2; |
| 144 | break; |
| 145 | case 'd': |
| 146 | (void) fputc('"', fp); |
| 147 | for (j = 0; j < r->code[pc + 2]; j++) { |
| 148 | ch = r->data[r->code[pc + 1] + j]; |
| 149 | if (isprint(ch)) { |
| 150 | (void) fputc(ch, fp); |
| 151 | } else { |
| 152 | (void) fprintf(fp, |
| 153 | "\\x%02x", ch); |
| 154 | } |
| 155 | } |
| 156 | (void) fputc('"', fp); |
| 157 | pc += 2; |
| 158 | break; |
| 159 | } |
| 160 | |
| 161 | (void) fputc('\n', fp); |
| 162 | } |
| 163 | } |
| 164 | #endif /* SLRE_TEST */ |
| 165 | |
| 166 | static void |
| 167 | set_jump_offset(struct slre *r, int pc, int offset) |
| 168 | { |
| 169 | assert(offset < r->code_size); |
| 170 | |
| 171 | if (r->code_size - offset > 0xff) |
| 172 | r->err_str = "Jump offset is too big"; |
| 173 | else |
| 174 | r->code[pc] = (unsigned char) (r->code_size - offset); |
| 175 | } |
| 176 | |
| 177 | static void |
| 178 | emit(struct slre *r, int code) |
| 179 | { |
| 180 | if (r->code_size >= (int) (sizeof(r->code) / sizeof(r->code[0]))) |
| 181 | r->err_str = "RE is too long (code overflow)"; |
| 182 | else |
| 183 | r->code[r->code_size++] = (unsigned char) code; |
| 184 | } |
| 185 | |
| 186 | static void |
| 187 | store_char_in_data(struct slre *r, int ch) |
| 188 | { |
| 189 | if (r->data_size >= (int) sizeof(r->data)) |
| 190 | r->err_str = "RE is too long (data overflow)"; |
| 191 | else |
| 192 | r->data[r->data_size++] = ch; |
| 193 | } |
| 194 | |
| 195 | static void |
| 196 | exact(struct slre *r, const char **re) |
| 197 | { |
| 198 | int old_data_size = r->data_size; |
| 199 | |
| 200 | while (**re != '\0' && (strchr(meta_chars, **re)) == NULL) |
| 201 | store_char_in_data(r, *(*re)++); |
| 202 | |
| 203 | emit(r, EXACT); |
| 204 | emit(r, old_data_size); |
| 205 | emit(r, r->data_size - old_data_size); |
| 206 | } |
| 207 | |
| 208 | static int |
| 209 | get_escape_char(const char **re) |
| 210 | { |
| 211 | int res; |
| 212 | |
| 213 | switch (*(*re)++) { |
| 214 | case 'n': |
| 215 | res = '\n'; |
| 216 | break; |
| 217 | case 'r': |
| 218 | res = '\r'; |
| 219 | break; |
| 220 | case 't': |
| 221 | res = '\t'; |
| 222 | break; |
| 223 | case '0': |
| 224 | res = 0; |
| 225 | break; |
| 226 | case 'S': |
| 227 | res = NONSPACE << 8; |
| 228 | break; |
| 229 | case 's': |
| 230 | res = SPACE << 8; |
| 231 | break; |
| 232 | case 'd': |
| 233 | res = DIGIT << 8; |
| 234 | break; |
| 235 | default: |
| 236 | res = (*re)[-1]; |
| 237 | break; |
| 238 | } |
| 239 | |
| 240 | return res; |
| 241 | } |
| 242 | |
| 243 | static void |
| 244 | anyof(struct slre *r, const char **re) |
| 245 | { |
| 246 | int esc, old_data_size = r->data_size, op = ANYOF; |
| 247 | |
| 248 | if (**re == '^') { |
| 249 | op = ANYBUT; |
| 250 | (*re)++; |
| 251 | } |
| 252 | |
| 253 | while (**re != '\0') |
| 254 | |
| 255 | switch (*(*re)++) { |
| 256 | case ']': |
| 257 | emit(r, op); |
| 258 | emit(r, old_data_size); |
| 259 | emit(r, r->data_size - old_data_size); |
| 260 | return; |
| 261 | /* NOTREACHED */ |
| 262 | break; |
| 263 | case '\\': |
| 264 | esc = get_escape_char(re); |
| 265 | if ((esc & 0xff) == 0) { |
| 266 | store_char_in_data(r, 0); |
| 267 | store_char_in_data(r, esc >> 8); |
| 268 | } else { |
| 269 | store_char_in_data(r, esc); |
| 270 | } |
| 271 | break; |
| 272 | default: |
| 273 | store_char_in_data(r, (*re)[-1]); |
| 274 | break; |
| 275 | } |
| 276 | |
| 277 | r->err_str = "No closing ']' bracket"; |
| 278 | } |
| 279 | |
| 280 | static void |
| 281 | relocate(struct slre *r, int begin, int shift) |
| 282 | { |
| 283 | emit(r, END); |
| 284 | memmove(r->code + begin + shift, r->code + begin, r->code_size - begin); |
| 285 | r->code_size += shift; |
| 286 | } |
| 287 | |
| 288 | static void |
| 289 | quantifier(struct slre *r, int prev, int op) |
| 290 | { |
| 291 | if (r->code[prev] == EXACT && r->code[prev + 2] > 1) { |
| 292 | r->code[prev + 2]--; |
| 293 | emit(r, EXACT); |
| 294 | emit(r, r->code[prev + 1] + r->code[prev + 2]); |
| 295 | emit(r, 1); |
| 296 | prev = r->code_size - 3; |
| 297 | } |
| 298 | relocate(r, prev, 2); |
| 299 | r->code[prev] = op; |
| 300 | set_jump_offset(r, prev + 1, prev); |
| 301 | } |
| 302 | |
| 303 | static void |
| 304 | exact_one_char(struct slre *r, int ch) |
| 305 | { |
| 306 | emit(r, EXACT); |
| 307 | emit(r, r->data_size); |
| 308 | emit(r, 1); |
| 309 | store_char_in_data(r, ch); |
| 310 | } |
| 311 | |
| 312 | static void |
| 313 | fixup_branch(struct slre *r, int fixup) |
| 314 | { |
| 315 | if (fixup > 0) { |
| 316 | emit(r, END); |
| 317 | set_jump_offset(r, fixup, fixup - 2); |
| 318 | } |
| 319 | } |
| 320 | |
| 321 | static void |
| 322 | compile(struct slre *r, const char **re) |
| 323 | { |
| 324 | int op, esc, branch_start, last_op, fixup, cap_no, level; |
| 325 | |
| 326 | fixup = 0; |
| 327 | level = r->num_caps; |
| 328 | branch_start = last_op = r->code_size; |
| 329 | |
| 330 | for (;;) |
| 331 | switch (*(*re)++) { |
| 332 | case '\0': |
| 333 | (*re)--; |
| 334 | return; |
| 335 | /* NOTREACHED */ |
| 336 | break; |
| 337 | case '^': |
| 338 | emit(r, BOL); |
| 339 | break; |
| 340 | case '$': |
| 341 | emit(r, EOL); |
| 342 | break; |
| 343 | case '.': |
| 344 | last_op = r->code_size; |
| 345 | emit(r, ANY); |
| 346 | break; |
| 347 | case '[': |
| 348 | last_op = r->code_size; |
| 349 | anyof(r, re); |
| 350 | break; |
| 351 | case '\\': |
| 352 | last_op = r->code_size; |
| 353 | esc = get_escape_char(re); |
| 354 | if (esc & 0xff00) |
| 355 | emit(r, esc >> 8); |
| 356 | else |
| 357 | exact_one_char(r, esc); |
| 358 | break; |
| 359 | case '(': |
| 360 | last_op = r->code_size; |
| 361 | cap_no = ++r->num_caps; |
| 362 | emit(r, OPEN); |
| 363 | emit(r, cap_no); |
| 364 | |
| 365 | compile(r, re); |
| 366 | if (*(*re)++ != ')') { |
| 367 | r->err_str = "No closing bracket"; |
| 368 | return; |
| 369 | } |
| 370 | |
| 371 | emit(r, CLOSE); |
| 372 | emit(r, cap_no); |
| 373 | break; |
| 374 | case ')': |
| 375 | (*re)--; |
| 376 | fixup_branch(r, fixup); |
| 377 | if (level == 0) { |
| 378 | r->err_str = "Unbalanced brackets"; |
| 379 | return; |
| 380 | } |
| 381 | return; |
| 382 | /* NOTREACHED */ |
| 383 | break; |
| 384 | case '+': |
| 385 | case '*': |
| 386 | op = (*re)[-1] == '*' ? STAR : PLUS; |
| 387 | if (**re == '?') { |
| 388 | (*re)++; |
| 389 | op = op == STAR ? STARQ : PLUSQ; |
| 390 | } |
| 391 | quantifier(r, last_op, op); |
| 392 | break; |
| 393 | case '?': |
| 394 | quantifier(r, last_op, QUEST); |
| 395 | break; |
| 396 | case '|': |
| 397 | fixup_branch(r, fixup); |
| 398 | relocate(r, branch_start, 3); |
| 399 | r->code[branch_start] = BRANCH; |
| 400 | set_jump_offset(r, branch_start + 1, branch_start); |
| 401 | fixup = branch_start + 2; |
| 402 | r->code[fixup] = 0xff; |
| 403 | break; |
| 404 | default: |
| 405 | (*re)--; |
| 406 | last_op = r->code_size; |
| 407 | exact(r, re); |
| 408 | break; |
| 409 | } |
| 410 | } |
| 411 | |
| 412 | int |
| 413 | slre_compile(struct slre *r, const char *re) |
| 414 | { |
| 415 | r->err_str = NULL; |
| 416 | r->code_size = r->data_size = r->num_caps = r->anchored = 0; |
| 417 | |
| 418 | if (*re == '^') |
| 419 | r->anchored++; |
| 420 | |
| 421 | emit(r, OPEN); /* This will capture what matches full RE */ |
| 422 | emit(r, 0); |
| 423 | |
| 424 | while (*re != '\0') |
| 425 | compile(r, &re); |
| 426 | |
| 427 | if (r->code[2] == BRANCH) |
| 428 | fixup_branch(r, 4); |
| 429 | |
| 430 | emit(r, CLOSE); |
| 431 | emit(r, 0); |
| 432 | emit(r, END); |
| 433 | |
| 434 | return (r->err_str == NULL ? 1 : 0); |
| 435 | } |
| 436 | |
| 437 | static int match(const struct slre *, int, |
| 438 | const char *, int, int *, struct cap *); |
| 439 | |
| 440 | static void |
| 441 | loop_greedy(const struct slre *r, int pc, const char *s, int len, int *ofs) |
| 442 | { |
| 443 | int saved_offset, matched_offset; |
| 444 | |
xypron.glpk@gmx.de | d6462bb | 2017-05-08 21:13:43 +0200 | [diff] [blame] | 445 | matched_offset = *ofs; |
Wolfgang Denk | 2aceea1 | 2013-03-23 23:50:31 +0000 | [diff] [blame] | 446 | |
| 447 | while (match(r, pc + 2, s, len, ofs, NULL)) { |
| 448 | saved_offset = *ofs; |
| 449 | if (match(r, pc + r->code[pc + 1], s, len, ofs, NULL)) |
| 450 | matched_offset = saved_offset; |
| 451 | *ofs = saved_offset; |
| 452 | } |
| 453 | |
| 454 | *ofs = matched_offset; |
| 455 | } |
| 456 | |
| 457 | static void |
| 458 | loop_non_greedy(const struct slre *r, int pc, const char *s, int len, int *ofs) |
| 459 | { |
| 460 | int saved_offset = *ofs; |
| 461 | |
| 462 | while (match(r, pc + 2, s, len, ofs, NULL)) { |
| 463 | saved_offset = *ofs; |
| 464 | if (match(r, pc + r->code[pc + 1], s, len, ofs, NULL)) |
| 465 | break; |
| 466 | } |
| 467 | |
| 468 | *ofs = saved_offset; |
| 469 | } |
| 470 | |
| 471 | static int |
| 472 | is_any_of(const unsigned char *p, int len, const char *s, int *ofs) |
| 473 | { |
| 474 | int i, ch; |
| 475 | |
| 476 | ch = s[*ofs]; |
| 477 | |
| 478 | for (i = 0; i < len; i++) |
| 479 | if (p[i] == ch) { |
| 480 | (*ofs)++; |
| 481 | return 1; |
| 482 | } |
| 483 | |
| 484 | return 0; |
| 485 | } |
| 486 | |
| 487 | static int |
| 488 | is_any_but(const unsigned char *p, int len, const char *s, int *ofs) |
| 489 | { |
| 490 | int i, ch; |
| 491 | |
| 492 | ch = s[*ofs]; |
| 493 | |
| 494 | for (i = 0; i < len; i++) { |
| 495 | if (p[i] == ch) |
| 496 | return 0; |
| 497 | } |
| 498 | |
| 499 | (*ofs)++; |
| 500 | return 1; |
| 501 | } |
| 502 | |
| 503 | static int |
| 504 | match(const struct slre *r, int pc, const char *s, int len, |
| 505 | int *ofs, struct cap *caps) |
| 506 | { |
| 507 | int n, saved_offset, res = 1; |
| 508 | |
| 509 | while (res && r->code[pc] != END) { |
| 510 | |
| 511 | assert(pc < r->code_size); |
| 512 | assert(pc < (int) (sizeof(r->code) / sizeof(r->code[0]))); |
| 513 | |
| 514 | switch (r->code[pc]) { |
| 515 | case BRANCH: |
| 516 | saved_offset = *ofs; |
| 517 | res = match(r, pc + 3, s, len, ofs, caps); |
| 518 | if (res == 0) { |
| 519 | *ofs = saved_offset; |
| 520 | res = match(r, pc + r->code[pc + 1], |
| 521 | s, len, ofs, caps); |
| 522 | } |
| 523 | pc += r->code[pc + 2]; |
| 524 | break; |
| 525 | case EXACT: |
| 526 | res = 0; |
| 527 | n = r->code[pc + 2]; /* String length */ |
| 528 | if (n <= len - *ofs && !memcmp(s + *ofs, r->data + |
| 529 | r->code[pc + 1], n)) { |
| 530 | (*ofs) += n; |
| 531 | res = 1; |
| 532 | } |
| 533 | pc += 3; |
| 534 | break; |
| 535 | case QUEST: |
| 536 | res = 1; |
| 537 | saved_offset = *ofs; |
| 538 | if (!match(r, pc + 2, s, len, ofs, caps)) |
| 539 | *ofs = saved_offset; |
| 540 | pc += r->code[pc + 1]; |
| 541 | break; |
| 542 | case STAR: |
| 543 | res = 1; |
| 544 | loop_greedy(r, pc, s, len, ofs); |
| 545 | pc += r->code[pc + 1]; |
| 546 | break; |
| 547 | case STARQ: |
| 548 | res = 1; |
| 549 | loop_non_greedy(r, pc, s, len, ofs); |
| 550 | pc += r->code[pc + 1]; |
| 551 | break; |
| 552 | case PLUS: |
| 553 | res = match(r, pc + 2, s, len, ofs, caps); |
| 554 | if (res == 0) |
| 555 | break; |
| 556 | |
| 557 | loop_greedy(r, pc, s, len, ofs); |
| 558 | pc += r->code[pc + 1]; |
| 559 | break; |
| 560 | case PLUSQ: |
| 561 | res = match(r, pc + 2, s, len, ofs, caps); |
| 562 | if (res == 0) |
| 563 | break; |
| 564 | |
| 565 | loop_non_greedy(r, pc, s, len, ofs); |
| 566 | pc += r->code[pc + 1]; |
| 567 | break; |
| 568 | case SPACE: |
| 569 | res = 0; |
| 570 | if (*ofs < len && isspace(((unsigned char *)s)[*ofs])) { |
| 571 | (*ofs)++; |
| 572 | res = 1; |
| 573 | } |
| 574 | pc++; |
| 575 | break; |
| 576 | case NONSPACE: |
| 577 | res = 0; |
| 578 | if (*ofs < len && |
| 579 | !isspace(((unsigned char *)s)[*ofs])) { |
| 580 | (*ofs)++; |
| 581 | res = 1; |
| 582 | } |
| 583 | pc++; |
| 584 | break; |
| 585 | case DIGIT: |
| 586 | res = 0; |
| 587 | if (*ofs < len && isdigit(((unsigned char *)s)[*ofs])) { |
| 588 | (*ofs)++; |
| 589 | res = 1; |
| 590 | } |
| 591 | pc++; |
| 592 | break; |
| 593 | case ANY: |
| 594 | res = 0; |
| 595 | if (*ofs < len) { |
| 596 | (*ofs)++; |
| 597 | res = 1; |
| 598 | } |
| 599 | pc++; |
| 600 | break; |
| 601 | case ANYOF: |
| 602 | res = 0; |
| 603 | if (*ofs < len) |
| 604 | res = is_any_of(r->data + r->code[pc + 1], |
| 605 | r->code[pc + 2], s, ofs); |
| 606 | pc += 3; |
| 607 | break; |
| 608 | case ANYBUT: |
| 609 | res = 0; |
| 610 | if (*ofs < len) |
| 611 | res = is_any_but(r->data + r->code[pc + 1], |
| 612 | r->code[pc + 2], s, ofs); |
| 613 | pc += 3; |
| 614 | break; |
| 615 | case BOL: |
| 616 | res = *ofs == 0 ? 1 : 0; |
| 617 | pc++; |
| 618 | break; |
| 619 | case EOL: |
| 620 | res = *ofs == len ? 1 : 0; |
| 621 | pc++; |
| 622 | break; |
| 623 | case OPEN: |
| 624 | if (caps != NULL) |
| 625 | caps[r->code[pc + 1]].ptr = s + *ofs; |
| 626 | pc += 2; |
| 627 | break; |
| 628 | case CLOSE: |
| 629 | if (caps != NULL) |
| 630 | caps[r->code[pc + 1]].len = (s + *ofs) - |
| 631 | caps[r->code[pc + 1]].ptr; |
| 632 | pc += 2; |
| 633 | break; |
| 634 | case END: |
| 635 | pc++; |
| 636 | break; |
| 637 | default: |
| 638 | printf("unknown cmd (%d) at %d\n", r->code[pc], pc); |
| 639 | assert(0); |
| 640 | break; |
| 641 | } |
| 642 | } |
| 643 | |
| 644 | return res; |
| 645 | } |
| 646 | |
| 647 | int |
| 648 | slre_match(const struct slre *r, const char *buf, int len, |
| 649 | struct cap *caps) |
| 650 | { |
| 651 | int i, ofs = 0, res = 0; |
| 652 | |
| 653 | if (r->anchored) { |
| 654 | res = match(r, 0, buf, len, &ofs, caps); |
| 655 | } else { |
| 656 | for (i = 0; i < len && res == 0; i++) { |
| 657 | ofs = i; |
| 658 | res = match(r, 0, buf, len, &ofs, caps); |
| 659 | } |
| 660 | } |
| 661 | |
| 662 | return res; |
| 663 | } |
| 664 | |
| 665 | #ifdef SLRE_TEST |
| 666 | #define N_CAPS 5 |
| 667 | |
| 668 | int main(int argc, char *argv[]) |
| 669 | { |
| 670 | struct slre slre; |
| 671 | struct cap caps[N_CAPS]; |
| 672 | unsigned char data[1 * 1024 * 1024]; |
| 673 | FILE *fp; |
| 674 | int i, res, len; |
| 675 | |
| 676 | if (argc < 2) { |
| 677 | fprintf(stderr, "Usage: %s 'slre' <file>\n", argv[0]); |
| 678 | return 1; |
| 679 | } |
| 680 | |
| 681 | fp = fopen(argv[2], "rb"); |
| 682 | if (fp == NULL) { |
| 683 | fprintf(stderr, "Error: cannot open %s:%s\n", |
| 684 | argv[2], strerror(errno)); |
| 685 | return 1; |
| 686 | } |
| 687 | |
| 688 | if (!slre_compile(&slre, argv[1])) { |
Francois Berder | a341321 | 2023-11-12 20:16:50 +0100 | [diff] [blame] | 689 | (void) fclose(fp); |
Wolfgang Denk | 2aceea1 | 2013-03-23 23:50:31 +0000 | [diff] [blame] | 690 | fprintf(stderr, "Error compiling slre: %s\n", slre.err_str); |
| 691 | return 1; |
| 692 | } |
Wolfgang Denk | bd8ec7e | 2013-10-07 13:07:26 +0200 | [diff] [blame] | 693 | |
Wolfgang Denk | 2aceea1 | 2013-03-23 23:50:31 +0000 | [diff] [blame] | 694 | slre_dump(&slre, stderr); |
| 695 | |
| 696 | while (fgets(data, sizeof(data), fp) != NULL) { |
| 697 | len = strlen(data); |
| 698 | |
| 699 | if ((len > 0) && (data[len-1] == '\n')) { |
| 700 | data[len-1] = '\0'; |
| 701 | --len; |
| 702 | } |
| 703 | |
| 704 | printf("Data = \"%s\"\n", data); |
| 705 | |
| 706 | (void) memset(caps, 0, sizeof(caps)); |
| 707 | |
Wolfgang Denk | 2aceea1 | 2013-03-23 23:50:31 +0000 | [diff] [blame] | 708 | res = slre_match(&slre, data, len, caps); |
| 709 | printf("Result [%d]: %d\n", i, res); |
| 710 | |
| 711 | for (i = 0; i < N_CAPS; i++) { |
| 712 | if (caps[i].len > 0) { |
| 713 | printf("Substring %d: len=%d [%.*s]\n", i, |
| 714 | caps[i].len, |
| 715 | caps[i].len, caps[i].ptr); |
| 716 | } |
| 717 | } |
| 718 | printf("----------------------------------------------------\n"); |
| 719 | } |
| 720 | (void) fclose(fp); |
| 721 | |
| 722 | return 0; |
| 723 | } |
| 724 | #endif /* SLRE_TEST */ |