1 // -V::506
2
3 /* Copyright (c) 2014 by Ian Piumarta
4 * All rights reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the 'Software'),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, provided that the above copyright notice(s) and this
12 * permission notice appear in all copies of the Software. Acknowledgement
13 * of the use of this Software in supporting documentation would be
14 * appreciated but is not required.
15 *
16 * THE SOFTWARE IS PROVIDED 'AS IS'. USE ENTIRELY AT YOUR OWN RISK.
17 */
18
19 #include <ctype.h>
20 #include <stdlib.h>
21 #include <string.h>
22 #include <setjmp.h>
23 #include <assert.h>
24
25 #include "lwre.h"
26
27 #ifndef RE_ERROR
28 # define RE_ERROR(RE, CODE, MESSAGE) { (RE)->error_message = (MESSAGE); \
29 longjmp(*(RE)->error_env, (re->error_code = RE_ERROR_ ## CODE)); }
30 #endif
31
32 #ifndef RE_MALLOC
33 # define RE_MALLOC(RE, SIZE) re__malloc((RE), (SIZE))
34
re__malloc(struct re * re,size_t size)35 static void *re__malloc(struct re *re, size_t size) {
36 void *p = malloc(size);
37 if (!p) {
38 RE_ERROR(re, NOMEM, "out of memory");
39 }
40 return p;
41 }
42
43 #endif
44
45 #ifndef RE_CALLOC
46 # define RE_CALLOC(RE, NMEMB, SIZE) re__calloc((RE), (NMEMB), (SIZE))
47
re__calloc(struct re * re,size_t nmemb,size_t size)48 static void *re__calloc(struct re *re, size_t nmemb, size_t size) {
49 void *p = calloc(nmemb, size);
50 if (p) {
51 return p;
52 }
53 if (re) {
54 RE_ERROR(re, NOMEM, "out of memory");
55 }
56 return p;
57 }
58
59 #endif
60
61 #ifndef RE_REALLOC
62 # define RE_REALLOC(RE, PTR, SIZE) re__realloc((RE), (PTR), (SIZE))
63
re__realloc(struct re * re,void * ptr,size_t size)64 static inline void *re__realloc(struct re *re, void *ptr, size_t size) {
65 void *p = realloc(ptr, size);
66 if (!p) {
67 RE_ERROR(re, NOMEM, "out of memory");
68 }
69 return p;
70 }
71
72 #endif
73
74 #ifndef RE_FREE
75 # define RE_FREE(RE, PTR) free(PTR)
76 #endif
77
78 /* arrays */
79
80 #define re_array_of(TYPE) \
81 struct { \
82 int size; \
83 int capacity; \
84 TYPE *at; \
85 }
86
87 #define RE_ARRAY_OF_INITIALISER { 0, 0, 0 }
88
89 #define re_array_append(RE, ARRAY, ELEMENT) \
90 ((ARRAY).size++, \
91 (((ARRAY).size > (ARRAY).capacity) \
92 ? ((ARRAY).at = RE_REALLOC((RE), (ARRAY).at, \
93 sizeof(*(ARRAY).at) * ((ARRAY).capacity = ((ARRAY).capacity \
94 ? ((ARRAY).capacity * 2) \
95 : 8)))) \
96 : (ARRAY).at) \
97 [(ARRAY).size - 1] = (ELEMENT))
98
99 #define re_array_copy(RE, ARRAY) \
100 { (ARRAY).size, \
101 (ARRAY).size, \
102 memcpy(RE_MALLOC((RE), sizeof((ARRAY).at[0]) * (ARRAY).size), \
103 (ARRAY).at, \
104 sizeof((ARRAY).at[0]) * (ARRAY).size) }
105
106 #define re_array_release(RE, ARRAY) { \
107 if ((ARRAY).at) { \
108 RE_FREE((RE), (ARRAY).at); \
109 (ARRAY).at = 0; \
110 } \
111 }
112
113 /* bit sets */
114
115 struct RE_BitSet;
116 typedef struct RE_BitSet RE_BitSet;
117
118 struct RE_BitSet {
119 int inverted;
120 unsigned char bits[256 / sizeof(unsigned char)];
121 };
122
re_bitset__includes(RE_BitSet * c,int i)123 static int re_bitset__includes(RE_BitSet *c, int i) {
124 if ((i < 0) || (255 < i)) {
125 return 0;
126 }
127 return (c->bits[i / 8] >> (i % 8)) & 1;
128 }
129
re_bitset_includes(RE_BitSet * c,int i)130 static int re_bitset_includes(RE_BitSet *c, int i) {
131 int inc = re_bitset__includes(c, i);
132 if (c->inverted) {
133 inc = !inc;
134 }
135 return inc;
136 }
137
re_bitset_add(RE_BitSet * c,int i)138 static void re_bitset_add(RE_BitSet *c, int i) {
139 if ((i < 0) || (255 < i)) {
140 return;
141 }
142 c->bits[i / 8] |= (1 << (i % 8));
143 }
144
145 /* character classes */
146
re_make_char(struct re * re)147 static int re_make_char(struct re *re) {
148 const char *p = re->position;
149 if (!*p) {
150 return 0;
151 }
152 int c = *p++;
153 if (('\\' == c) && *p) {
154 c = *p++;
155 }
156 re->position = p;
157 return c;
158 }
159
re_make_class(struct re * re)160 static RE_BitSet *re_make_class(struct re *re) {
161 RE_BitSet *c = RE_CALLOC(re, 1, sizeof(RE_BitSet));
162 int last = -1;
163 c->inverted = ('^' == *re->position); // -V522
164 if (c->inverted) {
165 re->position++;
166 }
167 while (*re->position && (']' != *re->position)) {
168 int this = re->position[0];
169 if (('-' == this) && (last >= 0) && re->position[1] && (']' != re->position[1])) {
170 re->position++;
171 this = re_make_char(re);
172 do {
173 re_bitset_add(c, last++);
174 } while (last <= this);
175 last = -1;
176 } else {
177 this = re_make_char(re);
178 re_bitset_add(c, this);
179 last = this;
180 }
181 }
182 return c;
183 }
184
185 /* instructions */
186
187 enum { RE_Any, RE_Char, RE_Class, RE_Accept, RE_Jump, RE_Fork, RE_Begin, RE_End, };
188
189 struct RE_Insn;
190 typedef struct RE_Insn RE_Insn;
191
192 struct RE_Insn {
193 int opcode;
194 long x;
195 union {
196 long y;
197 RE_BitSet *c;
198 };
199 union {
200 RE_Insn *next;
201 char *stamp;
202 };
203 };
204
205 struct RE_Compiled;
206 typedef struct RE_Compiled RE_Compiled;
207
208 /*
209 struct RE_Compiled
210 {
211 int size;
212 RE_Insn *first;
213 RE_Insn *last;
214 };
215
216 #define RE_COMPILED_INITIALISER { 0, 0, 0 }
217 */
218
re_insn_new(struct re * re,int opc)219 static RE_Compiled re_insn_new(struct re *re, int opc) {
220 RE_Insn *insn = RE_CALLOC(re, 1, sizeof(RE_Insn));
221 insn->opcode = opc; // -V522
222 RE_Compiled insns = { 1, insn, insn };
223 return insns;
224 }
225
re_new_Any(struct re * re)226 static RE_Compiled re_new_Any(struct re *re) {
227 RE_Compiled insns = re_insn_new(re, RE_Any);
228 return insns;
229 }
230
re_new_Char(struct re * re,int c)231 static RE_Compiled re_new_Char(struct re *re, int c) {
232 RE_Compiled insns = re_insn_new(re, RE_Char);
233 insns.first->x = c;
234 return insns;
235 }
236
re_new_Class(struct re * re,RE_BitSet * c)237 static RE_Compiled re_new_Class(struct re *re, RE_BitSet *c) {
238 RE_Compiled insns = re_insn_new(re, RE_Class);
239 insns.first->c = c;
240 return insns;
241 }
242
re_new_Accept(struct re * re)243 static RE_Compiled re_new_Accept(struct re *re) {
244 RE_Compiled insns = re_insn_new(re, RE_Accept);
245 return insns;
246 }
247
re_new_Jump(struct re * re,int x)248 static RE_Compiled re_new_Jump(struct re *re, int x) {
249 RE_Compiled insns = re_insn_new(re, RE_Jump);
250 insns.first->x = x;
251 return insns;
252 }
253
re_new_Fork(struct re * re,int x,int y)254 static RE_Compiled re_new_Fork(struct re *re, int x, int y) {
255 RE_Compiled insns = re_insn_new(re, RE_Fork);
256 insns.first->x = x;
257 insns.first->y = y;
258 return insns;
259 }
260
re_new_Begin(struct re * re)261 static RE_Compiled re_new_Begin(struct re *re) {
262 RE_Compiled insns = re_insn_new(re, RE_Begin);
263 return insns;
264 }
265
re_new_End(struct re * re)266 static RE_Compiled re_new_End(struct re *re) {
267 RE_Compiled insns = re_insn_new(re, RE_End);
268 return insns;
269 }
270
re_program_append(RE_Compiled * insns,RE_Compiled tail)271 static void re_program_append(RE_Compiled *insns, RE_Compiled tail) {
272 insns->last->next = tail.first;
273 insns->last = tail.last;
274 insns->size += tail.size;
275 }
276
re_program_prepend(RE_Compiled * insns,RE_Compiled head)277 static void re_program_prepend(RE_Compiled *insns, RE_Compiled head) {
278 head.last->next = insns->first;
279 insns->first = head.first;
280 insns->size += head.size;
281 }
282
re_program_free(struct re * re,RE_Compiled * insns)283 static void re_program_free(struct re *re, RE_Compiled *insns) {
284 int i;
285 for (i = 0; i < insns->size; ++i) {
286 switch (insns->first[i].opcode) {
287 case RE_Class: {
288 RE_FREE(re, insns->first[i].c);
289 insns->first[i].c = 0;
290 break;
291 }
292 }
293 }
294 RE_FREE(re, insns->first);
295 insns->first = insns->last = 0;
296 insns->size = 0;
297 }
298
299 /* compilation */
300
301 /*
302 struct re
303 {
304 char *expression;
305 char *position;
306 jmp_buf *error_env;
307 int error_code;
308 char *error_message;
309 struct RE_Compiled code;
310 char **matches;
311 int nmatches;
312 };
313 */
314
315 static RE_Compiled re_compile_expression(struct re *re);
316
re_compile_primary(struct re * re)317 static RE_Compiled re_compile_primary(struct re *re) {
318 int c = *re->position++;
319 assert(0 != c);
320 switch (c) {
321 case '\\': {
322 if (*re->position) {
323 c = *re->position++;
324 }
325 break;
326 }
327 case '.': {
328 return re_new_Any(re);
329 }
330 case '[': {
331 RE_BitSet *cc = re_make_class(re);
332 if (']' != *re->position) {
333 RE_FREE(re, cc);
334 RE_ERROR(re, CHARSET, "expected ']' at end of character set");
335 }
336 re->position++;
337 return re_new_Class(re, cc);
338 };
339 case '(': {
340 RE_Compiled insns = re_compile_expression(re);
341 if (')' != *re->position) {
342 RE_Insn *insn, *next;
343 for (insn = insns.first; insn; insn = next) {
344 next = insn->next;
345 RE_FREE(re, insn);
346 }
347 RE_ERROR(re, SUBEXP, "expected ')' at end of subexpression");
348 }
349 re->position++;
350 return insns;
351 }
352 case '{': {
353 RE_Compiled insns = re_compile_expression(re);
354 if ('}' != *re->position) {
355 RE_Insn *insn, *next;
356 for (insn = insns.first; insn; insn = next) {
357 next = insn->next;
358 RE_FREE(re, insn);
359 }
360 RE_ERROR(re, SUBMATCH, "expected '}' at end of submatch");
361 }
362 re_program_prepend(&insns, re_new_Begin(re));
363 re_program_append(&insns, re_new_End(re));
364 re->position++;
365 return insns;
366 }
367 }
368 return re_new_Char(re, c);
369 }
370
re_compile_suffix(struct re * re)371 static RE_Compiled re_compile_suffix(struct re *re) {
372 RE_Compiled insns = re_compile_primary(re);
373 switch (*re->position) {
374 case '?': {
375 re->position++;
376 if ('?' == *re->position) {
377 re->position++;
378 re_program_prepend(&insns, re_new_Fork(re, insns.size, 0));
379 } else {
380 re_program_prepend(&insns, re_new_Fork(re, 0, insns.size));
381 }
382 break;
383 }
384 case '*': {
385 re->position++;
386 if ('?' == *re->position) {
387 re->position++;
388 re_program_prepend(&insns, re_new_Fork(re, insns.size + 1, 0));
389 } else {
390 re_program_prepend(&insns, re_new_Fork(re, 0, insns.size + 1));
391 }
392 re_program_append(&insns, re_new_Jump(re, -(insns.size + 1)));
393 break;
394 }
395 case '+': {
396 re->position++;
397 if ('?' == *re->position) {
398 re->position++;
399 re_program_append(&insns, re_new_Fork(re, 0, -(insns.size + 1)));
400 } else {
401 re_program_append(&insns, re_new_Fork(re, -(insns.size + 1), 0));
402 }
403 break;
404 }
405 }
406 return insns;
407 }
408
re_compile_sequence(struct re * re)409 static RE_Compiled re_compile_sequence(struct re *re) {
410 if (!*re->position) {
411 return re_new_Accept(re);
412 }
413 RE_Compiled head = re_compile_suffix(re);
414 while (*re->position && !strchr("|)}>", *re->position))
415 re_program_append(&head, re_compile_suffix(re));
416 if (!*re->position) {
417 re_program_append(&head, re_new_Accept(re));
418 }
419 return head;
420 }
421
re_compile_expression(struct re * re)422 static RE_Compiled re_compile_expression(struct re *re) {
423 RE_Compiled head = re_compile_sequence(re);
424 while ('|' == *re->position) {
425 re->position++;
426 RE_Compiled tail = re_compile_sequence(re);
427 re_program_append(&head, re_new_Jump(re, tail.size));
428 re_program_prepend(&head, re_new_Fork(re, 0, head.size));
429 re_program_append(&head, tail);
430 }
431 return head;
432 }
433
re_compile(struct re * re)434 static RE_Compiled re_compile(struct re *re) {
435 jmp_buf env;
436 RE_Compiled insns = RE_COMPILED_INITIALISER;
437 re->error_env = &env;
438 if (setjmp(env)) { /* syntax error */
439 return insns;
440 }
441 insns = re_compile_expression(re);
442 re_array_of(RE_Insn) program = RE_ARRAY_OF_INITIALISER;
443 RE_Insn *insn, *next;
444 for (insn = insns.first; insn; insn = next) {
445 re_array_append(re, program, *insn);
446 next = insn->next;
447 RE_FREE(re, insn);
448 }
449
450 #if 0
451 int i;
452 for (i = 0; i < program.size; ++i) {
453 RE_Insn *insn = &program.at[i];
454 printf("%03i ", i);
455 switch (insn->opcode) {
456 case RE_Any:
457 printf("Any\n");
458 break;
459 case RE_Char:
460 printf("Char %li\n", insn->x);
461 break;
462 case RE_Class: {
463 printf("Class ");
464 {
465 RE_BitSet *c = insn->c;
466 int i;
467 putchar('[');
468 if (c->inverted) {
469 putchar('^');
470 }
471 for (i = 0; i < 256; ++i)
472 if (re_bitset__includes(c, i)) {
473 switch (i) {
474 case '\a':
475 printf("\\a");
476 break;
477 case '\b':
478 printf("\\b");
479 break;
480 case '\t':
481 printf("\\t");
482 break;
483 case '\n':
484 printf("\\n");
485 break;
486 case '\v':
487 printf("\\v");
488 break;
489 case '\f':
490 printf("\\f");
491 break;
492 case '\r':
493 printf("\\r");
494 break;
495 case '\\':
496 printf("\\\\");
497 break;
498 case ']':
499 printf("\\]");
500 break;
501 case '^':
502 printf("\\^");
503 break;
504 case '-':
505 printf("\\-");
506 break;
507 default:
508 if (isprint(i)) {
509 putchar(i);
510 } else {
511 printf("\\x%02x", i);
512 }
513 }
514 }
515 putchar(']');
516 }
517 putchar('\n');
518 break;
519 }
520 case RE_Accept:
521 printf("Accept\n");
522 break;
523 case RE_Jump:
524 printf("Jump %li -> %03li\n", insn->x, i + 1 + insn->x);
525 break;
526 case RE_Fork:
527 printf("Fork %li %li -> %03li %03li\n", insn->x, insn->y, i + 1 + insn->x, i + 1 + insn->y);
528 break;
529 case RE_Begin:
530 printf("Begin\n");
531 break;
532 case RE_End:
533 printf("End\n");
534 break;
535 default:
536 printf("?%i\n", insn->opcode);
537 break;
538 }
539 }
540 #endif
541
542 assert(program.size == insns.size);
543
544 insns.first = program.at;
545 insns.last = insns.first + insns.size;
546
547 return insns;
548 }
549
550 /* submatch recording */
551
552 typedef re_array_of(char*) re_array_of_charp;
553
554 struct RE_Submatches;
555 typedef struct RE_Submatches RE_Submatches;
556
557 struct RE_Submatches {
558 int refs;
559 re_array_of_charp beginnings;
560 re_array_of_charp endings;
561 };
562
re_submatches_copy(struct re * re,RE_Submatches * orig)563 static RE_Submatches *re_submatches_copy(struct re *re, RE_Submatches *orig) {
564 RE_Submatches *subs = RE_CALLOC(re, 1, sizeof(RE_Submatches));
565 if (orig) {
566 subs->beginnings = (re_array_of_charp) re_array_copy(re, orig->beginnings); // -V522
567 subs->endings = (re_array_of_charp) re_array_copy(re, orig->endings);
568 }
569 return subs;
570 }
571
re_submatches_free(struct re * re,RE_Submatches * subs)572 static void re_submatches_free(struct re *re, RE_Submatches *subs) {
573 assert(subs);
574 assert(!subs->refs);
575 re_array_release(re, subs->beginnings);
576 re_array_release(re, subs->endings);
577 RE_FREE(re, subs);
578 }
579
re_submatches_link(RE_Submatches * subs)580 static inline RE_Submatches *re_submatches_link(RE_Submatches *subs) {
581 if (subs) {
582 subs->refs++;
583 }
584 return subs;
585 }
586
re_submatches_unlink(struct re * re,RE_Submatches * subs)587 static inline void re_submatches_unlink(struct re *re, RE_Submatches *subs) {
588 if (subs && (0 == --(subs->refs))) {
589 re_submatches_free(re, subs);
590 }
591 }
592
593 /* matching */
594
595 struct RE_Thread;
596 typedef struct RE_Thread RE_Thread;
597
598 struct RE_Thread {
599 RE_Insn *pc;
600 RE_Submatches *submatches;
601 };
602
603 struct RE_ThreadList;
604 typedef struct RE_ThreadList RE_ThreadList;
605
606 struct RE_ThreadList {
607 int size;
608 RE_Thread *at;
609 };
610
re_thread(RE_Insn * pc,RE_Submatches * subs)611 static inline RE_Thread re_thread(RE_Insn *pc, RE_Submatches *subs) {
612 return (RE_Thread) {
613 pc, subs
614 };
615 }
616
re_thread_schedule(struct re * re,RE_ThreadList * threads,RE_Insn * pc,char * sp,RE_Submatches * subs)617 static void re_thread_schedule(struct re *re, RE_ThreadList *threads, RE_Insn *pc, char *sp, RE_Submatches *subs) {
618 if (pc->stamp == sp) {
619 return;
620 }
621 pc->stamp = sp;
622
623 switch (pc->opcode) {
624 case RE_Jump:
625 re_thread_schedule(re, threads, pc + 1 + pc->x, sp, subs);
626 return;
627 case RE_Fork:
628 re_thread_schedule(re, threads, pc + 1 + pc->x, sp, subs);
629 re_thread_schedule(re, threads, pc + 1 + pc->y, sp, subs);
630 return;
631 case RE_Begin:
632 subs = re_submatches_copy(re, subs);
633 re_array_append(re, subs->beginnings, sp); // -V522
634 re_thread_schedule(re, threads, pc + 1, sp, subs);
635 if (!subs->refs) {
636 re_submatches_free(re, subs);
637 }
638 return;
639 case RE_End: {
640 subs = re_submatches_copy(re, subs);
641 # if 0 /* non-nesting groups: ab{cd{ef}gh}ij => {cdef} {efgh} */
642 re_array_append(re, subs->endings, sp);
643 # else /* nesting groups: ab{cd{ef}gh}ij => {cdefgh} {ef} */
644 while (subs->endings.size < subs->beginnings.size) re_array_append(re, subs->endings, 0);
645 int i;
646 for (i = subs->endings.size; i--; ) {
647 if (!subs->endings.at[i]) {
648 subs->endings.at[i] = sp;
649 break;
650 }
651 }
652 # endif
653 re_thread_schedule(re, threads, pc + 1, sp, subs);
654 if (!subs->refs) {
655 re_submatches_free(re, subs);
656 }
657 return;
658 }
659 }
660 threads->at[threads->size++] = re_thread(pc, re_submatches_link(subs));
661 }
662
re_program_run(struct re * re,char * input,char *** saved,int * nsaved)663 static int re_program_run(struct re *re, char *input, char ***saved, int *nsaved) {
664 int matched = RE_ERROR_NOMATCH;
665 if (!re) {
666 return matched;
667 }
668
669 RE_Submatches *submatches = 0;
670 RE_ThreadList a = { 0, 0 }, b = { 0, 0 }, *here = &a, *next = &b;
671
672 char *sp = input;
673 re->position = 0;
674
675 jmp_buf env;
676 re->error_env = &env;
677
678 if (setjmp(env)) { /* out of memory */
679 matched = re->error_code;
680 goto bailout;
681 }
682
683 a.at = RE_CALLOC(re, re->code.size, sizeof(RE_Thread));
684 b.at = RE_CALLOC(re, re->code.size, sizeof(RE_Thread));
685
686 re_thread_schedule(re, here, re->code.first, input, 0);
687
688 {
689 int i;
690 for (i = 0; i < re->code.size; ++i)
691 re->code.first[i].stamp = 0;
692 }
693
694 for (sp = input; here->size; ++sp) {
695 int i;
696 for (i = 0; i < here->size; ++i) {
697 RE_Thread t = here->at[i];
698 switch (t.pc->opcode) {
699 case RE_Any: {
700 if (*sp) {
701 re_thread_schedule(re, next, t.pc + 1, sp + 1, t.submatches);
702 }
703 break;
704 }
705 case RE_Char: {
706 if (*sp == t.pc->x) {
707 re_thread_schedule(re, next, t.pc + 1, sp + 1, t.submatches);
708 }
709 break;
710 }
711 case RE_Class: {
712 if (re_bitset_includes(t.pc->c, *sp)) {
713 re_thread_schedule(re, next, t.pc + 1, sp + 1, t.submatches);
714 }
715 break;
716 }
717 case RE_Accept: {
718 matched = sp - input;
719 re_submatches_unlink(re, submatches);
720 submatches = re_submatches_link(t.submatches);
721 while (i < here->size) re_submatches_unlink(re, here->at[i++].submatches);
722 goto nextchar;
723 }
724 default:
725 RE_ERROR(re, ENGINE, "illegal instruction in compiled regular expression (please report this bug)");
726 }
727 re_submatches_unlink(re, t.submatches);
728 }
729 nextchar:
730 ;
731 RE_ThreadList *tmp = here;
732 here = next;
733 next = tmp;
734 next->size = 0;
735 if (!*sp) {
736 break;
737 }
738 }
739
740 bailout:
741 re->position = sp;
742
743 {
744 int i;
745 for (i = 0; i < here->size; ++i)
746 re_submatches_unlink(re, here->at[i].submatches);
747 }
748
749 RE_FREE(re, a.at);
750 RE_FREE(re, b.at);
751
752 if (submatches) {
753 if (saved && nsaved && (matched >= 0)) {
754 assert(submatches->beginnings.size == submatches->endings.size);
755 *nsaved = submatches->beginnings.size * 2;
756 *saved = RE_CALLOC(re, *nsaved, sizeof(char*));
757 int i;
758 for (i = 0; i < *nsaved; i += 2) {
759 (*saved)[i + 0] = submatches->beginnings.at[i / 2];
760 (*saved)[i + 1] = submatches->endings.at[i / 2];
761 }
762 }
763 re_submatches_unlink(re, submatches);
764 }
765
766 return matched;
767 }
768
769 /* public interface */
770
lwre_new(const char * expr)771 struct re *lwre_new(const char *expr) {
772 struct re *re = RE_CALLOC(0, 1, sizeof(struct re));
773 if (re) {
774 re->expression = expr;
775 }
776 return re;
777 }
778
lwre_match(struct re * re,char * input)779 int lwre_match(struct re *re, char *input) {
780 RE_FREE(re, re->matches);
781 re->matches = 0;
782 re->nmatches = 0;
783 if (!re->expression) {
784 return 0;
785 }
786 if (!re->code.size) {
787 re->position = re->expression;
788 re->error_code = 0;
789 re->error_message = 0;
790 re->code = re_compile(re);
791 if (re->error_code) {
792 return re->error_code;
793 }
794 re->position = 0;
795 }
796 return re_program_run(re, input, &re->matches, &re->nmatches);
797 }
798
lwre_release(struct re * re)799 void lwre_release(struct re *re) {
800 RE_FREE(re, re->matches);
801 if (re->code.first) {
802 re_program_free(re, &re->code);
803 }
804 memset(re, 0, sizeof(*re));
805 }
806
lwre_reset(struct re * re,const char * expression)807 void lwre_reset(struct re *re, const char *expression) {
808 lwre_release(re);
809 re->expression = expression;
810 }
811
lwre_free(struct re * re)812 void lwre_free(struct re *re) {
813 lwre_release(re);
814 RE_FREE(0, re);
815 }
816
817 /* utility */
818
re_digit(int c,int base)819 static int re_digit(int c, int base) {
820 if ((c >= '0') && (c <= '9')) {
821 c -= '0';
822 } else if ((c >= 'A') && (c <= 'Z')) {
823 c -= ('A' - 10);
824 } else if ((c >= 'a') && (c <= 'z')) {
825 c -= ('a' - 10);
826 } else {
827 return -1;
828 }
829 if (c >= base) {
830 return -1;
831 }
832 return c;
833 }
834
re_byte(char ** sp,int least,int most,int base,int liberal)835 static int re_byte(char **sp, int least, int most, int base, int liberal) {
836 int c = 0;
837 char *s = *sp;
838 while (s - *sp < most) {
839 int d = re_digit(*s, base);
840 if (d < 0) {
841 break;
842 }
843 ++s;
844 c = c * base + d;
845 }
846 if (s - *sp < least) {
847 if (liberal) {
848 return (*sp)[-1];
849 }
850 --*sp;
851 return '\\';
852 }
853 *sp = s;
854 return c;
855 }
856
re_log2floor(unsigned int n)857 static int re_log2floor(unsigned int n) {
858 if (!n) {
859 return 0;
860 }
861 int b = 1;
862 # define _do(x) if (n >= (1U << x)) (b += x), (n >>= x)
863 _do(16);
864 _do(8);
865 _do(4);
866 _do(2);
867 _do(1);
868 # undef _do
869 return b;
870 }
871
re_escape_utf8(char ** sp,unsigned int c)872 static void re_escape_utf8(char **sp, unsigned int c) {
873 char *s = *sp;
874 if (c < 128) {
875 *s++ = c;
876 } else { /* this is good for up to 36 bits of c, which proves that Gordon Bell was right all along */
877 int n = re_log2floor((unsigned) c) / 6;
878 int m = 6 * n;
879 *s++ = (0xff << (7 - n)) + (c >> m);
880 while ((m -= 6) >= 0) *s++ = 0x80 + ((c >> m) & 0x3F);
881 }
882 *sp = s;
883 }
884
lwre_escape(char * s,int liberal)885 char *lwre_escape(char *s, int liberal) {
886 char *in = s, *out = s;
887 int c;
888 while ((c = *in++)) {
889 int u = 0;
890 if ('\\' == c) {
891 c = *in++;
892 switch (c) {
893 case '0':
894 case '1':
895 c = re_byte(&in, 1, 3, 8, liberal);
896 break;
897 case '\\':
898 //c = '\\';
899 break;
900 case 'a':
901 c = '\a';
902 break;
903 case 'b':
904 c = '\b';
905 break;
906 case 'f':
907 c = '\f';
908 break;
909 case 'n':
910 c = '\n';
911 break;
912 case 'r':
913 c = '\r';
914 break;
915 case 't':
916 c = '\t';
917 break;
918 case 'U':
919 c = re_byte(&in, 8, 8, 16, liberal);
920 u = 1;
921 break;
922 case 'u':
923 c = re_byte(&in, 4, 4, 16, liberal);
924 u = 1;
925 break;
926 case 'v':
927 c = '\v';
928 break;
929 case 'x':
930 c = re_byte(&in, 1, 2, 16, liberal);
931 break;
932 default: {
933 if (!liberal) { /* pass escape character through unharmed */
934 --in;
935 c = '\\';
936 }
937 break;
938 }
939 }
940 }
941 if (u) {
942 re_escape_utf8(&out, c);
943 } else {
944 *out++ = c;
945 }
946 }
947 assert(out <= in);
948 *out = 0;
949 return s;
950 }
951
952 /* testing */
953
954 #ifdef LWRE_TEST
955
956 #include <stdio.h>
957
958 /* echo stdin to stout with ANSI terminal escapes to turn every number red */
959
main(int argc,char ** argv)960 int main(int argc, char **argv) {
961 static struct re re = RE_INITIALISER("(.*?{[0-9]+})*");
962 char buf[1024];
963 while (fgets(buf, sizeof(buf), stdin)) {
964 int n;
965 if (((n = lwre_match(&re, buf)) < 0) && (RE_ERROR_NOMATCH != n)) {
966 fprintf(stderr, "%i %ss: %s\n", n, re.error_message, re.position);
967 break;
968 } else {
969 char *p = buf;
970 int n = 0;
971 while (*p) {
972 if ((n < re.nmatches) && (p == re.matches[n])) {
973 if (n & 1) {
974 printf("\033[0m"); /* end of match: clear all attributes */
975 } else {
976 printf("\033[1;31m"); /* start of match: bold and foreground red */
977 }
978 ++n;
979 }
980 putchar(*p++);
981 }
982 }
983 }
984 lwre_release(&re);
985 return 0;
986 }
987
988 #endif /* REGEXP_TEST */
989