• Home
  • Raw
  • Download

Lines Matching +full:ls +full:- +full:tree

2   regcomp.c - TRE POSIX compatible regex compilation functions.
4 Copyright (c) 2001-2009 Ville Laurikari <vl@iki.fi>
44 from tre-compile.h
60 from tre-ast.c and tre-ast.h
72 #define EMPTY -1 /* Empty leaf (denotes empty string). */
73 #define ASSERTION -2 /* Assertion leaf. */
74 #define TAG -3 /* Tag leaf. */
75 #define BACKREF -4 /* Back reference leaf. */
77 #define IS_SPECIAL(x) ((x)->code_min < 0)
78 #define IS_EMPTY(x) ((x)->code_min == EMPTY)
79 #define IS_ASSERTION(x) ((x)->code_min == ASSERTION)
80 #define IS_TAG(x) ((x)->code_min == TAG)
81 #define IS_BACKREF(x) ((x)->code_min == BACKREF)
146 node->obj = obj; in tre_ast_new_node()
147 node->type = type; in tre_ast_new_node()
148 node->nullable = -1; in tre_ast_new_node()
149 node->submatch_id = -1; in tre_ast_new_node()
163 lit->code_min = code_min; in tre_ast_new_literal()
164 lit->code_max = code_max; in tre_ast_new_literal()
165 lit->position = position; in tre_ast_new_literal()
179 iter->arg = arg; in tre_ast_new_iter()
180 iter->min = min; in tre_ast_new_iter()
181 iter->max = max; in tre_ast_new_iter()
182 iter->minimal = minimal; in tre_ast_new_iter()
183 node->num_submatches = arg->num_submatches; in tre_ast_new_iter()
199 un->left = left; in tre_ast_new_union()
200 un->right = right; in tre_ast_new_union()
201 node->num_submatches = left->num_submatches + right->num_submatches; in tre_ast_new_union()
217 cat->left = left; in tre_ast_new_catenation()
218 cat->right = right; in tre_ast_new_catenation()
219 node->num_submatches = left->num_submatches + right->num_submatches; in tre_ast_new_catenation()
225 from tre-stack.c and tre-stack.h
309 s->stack = xmalloc(sizeof(*s->stack) * size); in tre_stack_new()
310 if (s->stack == NULL) in tre_stack_new()
315 s->size = size; in tre_stack_new()
316 s->max_size = max_size; in tre_stack_new()
317 s->increment = increment; in tre_stack_new()
318 s->ptr = 0; in tre_stack_new()
326 xfree(s->stack); in tre_stack_destroy()
333 return s->ptr; in tre_stack_num_objects()
339 if (s->ptr < s->size) in tre_stack_push()
341 s->stack[s->ptr] = value; in tre_stack_push()
342 s->ptr++; in tre_stack_push()
346 if (s->size >= s->max_size) in tre_stack_push()
354 new_size = s->size + s->increment; in tre_stack_push()
355 if (new_size > s->max_size) in tre_stack_push()
356 new_size = s->max_size; in tre_stack_push()
357 new_buffer = xrealloc(s->stack, sizeof(*new_buffer) * new_size); in tre_stack_push()
362 assert(new_size > s->size); in tre_stack_push()
363 s->size = new_size; in tre_stack_push()
364 s->stack = new_buffer; in tre_stack_push()
383 return s->stack[--s->ptr].typetag ## _value; \
391 from tre-parse.c and tre-parse.h
410 /* The highest back reference or -1 if none seen so far. */
444 return la[0]->code_min - lb[0]->code_min; in tre_compare_lit()
457 if (p->len >= p->cap) { in tre_new_lit()
458 if (p->cap >= 1<<15) in tre_new_lit()
460 p->cap *= 2; in tre_new_lit()
461 a = xrealloc(p->a, p->cap * sizeof *p->a); in tre_new_lit()
464 p->a = a; in tre_new_lit()
466 a = p->a + p->len++; in tre_new_lit()
467 *a = tre_mem_calloc(p->mem, sizeof **a); in tre_new_lit()
471 static int add_icase_literals(struct literals *ls, int min, int max) in add_icase_literals() argument
491 lit = tre_new_lit(ls); in add_icase_literals()
493 return -1; in add_icase_literals()
494 lit->code_min = b; in add_icase_literals()
495 lit->code_max = e-1; in add_icase_literals()
496 lit->position = -1; in add_icase_literals()
511 // TODO: parse bracket into a set of non-overlapping [lo,hi] ranges
518 Range = Char '-' Char | Char '-' '-'
520 Meta = ']' | '-'
526 '-' only at the beginning or end of a List and
531 static reg_errcode_t parse_bracket_terms(tre_parse_ctx_t *ctx, const char *s, struct literals *ls, … in parse_bracket_terms() argument
541 len = mbtowc(&wc, s, -1); in parse_bracket_terms()
545 ctx->s = s+1; in parse_bracket_terms()
548 if (*s == '-' && s != start && s[1] != ']' && in parse_bracket_terms()
549 /* extension: [a-z--@] is accepted as [a-z]|[--@] */ in parse_bracket_terms()
550 (s[1] != '-' || s[2] == ']')) in parse_bracket_terms()
574 if (*s == '-' && s[1] != ']') { in parse_bracket_terms()
576 len = mbtowc(&wc, s, -1); in parse_bracket_terms()
578 /* XXX - Should use collation order instead of in parse_bracket_terms()
586 if (class && neg->negate) { in parse_bracket_terms()
587 if (neg->len >= MAX_NEG_CLASSES) in parse_bracket_terms()
589 neg->a[neg->len++] = class; in parse_bracket_terms()
591 tre_literal_t *lit = tre_new_lit(ls); in parse_bracket_terms()
594 lit->code_min = min; in parse_bracket_terms()
595 lit->code_max = max; in parse_bracket_terms()
596 lit->class = class; in parse_bracket_terms()
597 lit->position = -1; in parse_bracket_terms()
599 /* Add opposite-case codepoints if REG_ICASE is present. in parse_bracket_terms()
601 should happen before case-folding, but most practical in parse_bracket_terms()
604 case-fold ranges and bracket range sets even with in parse_bracket_terms()
606 if (ctx->cflags & REG_ICASE && !class) in parse_bracket_terms()
607 if (add_icase_literals(ls, min, max)) in parse_bracket_terms()
619 struct literals ls; in parse_bracket() local
623 ls.mem = ctx->mem; in parse_bracket()
624 ls.len = 0; in parse_bracket()
625 ls.cap = 32; in parse_bracket()
626 ls.a = xmalloc(ls.cap * sizeof *ls.a); in parse_bracket()
627 if (!ls.a) in parse_bracket()
634 err = parse_bracket_terms(ctx, s, &ls, &neg); in parse_bracket()
641 * any form of a non-matching list. in parse_bracket()
643 if (ctx->cflags & REG_NEWLINE) { in parse_bracket()
644 lit = tre_new_lit(&ls); in parse_bracket()
649 lit->code_min = '\n'; in parse_bracket()
650 lit->code_max = '\n'; in parse_bracket()
651 lit->position = -1; in parse_bracket()
654 qsort(ls.a, ls.len, sizeof *ls.a, tre_compare_lit); in parse_bracket()
656 lit = tre_new_lit(&ls); in parse_bracket()
661 lit->code_min = TRE_CHAR_MAX+1; in parse_bracket()
662 lit->code_max = TRE_CHAR_MAX+1; in parse_bracket()
663 lit->position = -1; in parse_bracket()
666 nc = tre_mem_alloc(ctx->mem, (neg.len+1)*sizeof *neg.a); in parse_bracket()
678 for (i = 0; i < ls.len; i++) { in parse_bracket()
679 lit = ls.a[i]; in parse_bracket()
680 min = lit->code_min; in parse_bracket()
681 max = lit->code_max; in parse_bracket()
688 negmax = min - 1; in parse_bracket()
689 lit->code_min = negmin; in parse_bracket()
690 lit->code_max = negmax; in parse_bracket()
693 lit->position = ctx->position; in parse_bracket()
694 lit->neg_classes = nc; in parse_bracket()
695 n = tre_ast_new_node(ctx->mem, LITERAL, lit); in parse_bracket()
696 node = tre_ast_new_union(ctx->mem, node, n); in parse_bracket()
704 xfree(ls.a); in parse_bracket()
705 ctx->position++; in parse_bracket()
706 ctx->n = node; in parse_bracket()
712 *n = -1; in parse_dup_count()
717 *n = 10 * *n + (*s - '0'); in parse_dup_count()
751 if (c-'0'<10) return c-'0'; in hexval()
753 if (c-'a'<6) return c-'a'+10; in hexval()
754 return -1; in hexval()
759 if (node->submatch_id >= 0) { in marksub()
760 tre_ast_node_t *n = tre_ast_new_literal(ctx->mem, EMPTY, -1, -1); in marksub()
763 n = tre_ast_new_catenation(ctx->mem, n, node); in marksub()
766 n->num_submatches = node->num_submatches; in marksub()
769 node->submatch_id = subid; in marksub()
770 node->num_submatches++; in marksub()
771 ctx->n = node; in marksub()
795 int len, ere = ctx->cflags & REG_EXTENDED; in parse_atom()
807 ctx->s = s+2; in parse_atom()
815 node = tre_ast_new_literal(ctx->mem, ASSERTION, ASSERT_AT_WB, -1); in parse_atom()
818 node = tre_ast_new_literal(ctx->mem, ASSERTION, ASSERT_AT_WB_NEG, -1); in parse_atom()
821 node = tre_ast_new_literal(ctx->mem, ASSERTION, ASSERT_AT_BOW, -1); in parse_atom()
824 node = tre_ast_new_literal(ctx->mem, ASSERTION, ASSERT_AT_EOW, -1); in parse_atom()
845 node = tre_ast_new_literal(ctx->mem, v, v, ctx->position++); in parse_atom()
846 s--; in parse_atom()
858 node = tre_ast_new_literal(ctx->mem, EMPTY, -1, -1); in parse_atom()
859 s--; in parse_atom()
864 if (!ere && (unsigned)*s-'1' < 9) { in parse_atom()
866 int val = *s - '0'; in parse_atom()
867 node = tre_ast_new_literal(ctx->mem, BACKREF, val, ctx->position++); in parse_atom()
868 ctx->max_backref = MAX(val, ctx->max_backref); in parse_atom()
878 if (ctx->cflags & REG_NEWLINE) { in parse_atom()
880 tmp1 = tre_ast_new_literal(ctx->mem, 0, '\n'-1, ctx->position++); in parse_atom()
881 tmp2 = tre_ast_new_literal(ctx->mem, '\n'+1, TRE_CHAR_MAX, ctx->position++); in parse_atom()
883 node = tre_ast_new_union(ctx->mem, tmp1, tmp2); in parse_atom()
887 node = tre_ast_new_literal(ctx->mem, 0, TRE_CHAR_MAX, ctx->position++); in parse_atom()
893 if (!ere && s != ctx->start) in parse_atom()
895 node = tre_ast_new_literal(ctx->mem, ASSERTION, ASSERT_AT_BOL, -1); in parse_atom()
902 node = tre_ast_new_literal(ctx->mem, ASSERTION, ASSERT_AT_EOL, -1); in parse_atom()
916 node = tre_ast_new_literal(ctx->mem, EMPTY, -1, -1); in parse_atom()
920 len = mbtowc(&wc, s, -1); in parse_atom()
923 if (ctx->cflags & REG_ICASE && (tre_isupper(wc) || tre_islower(wc))) { in parse_atom()
926 tmp1 = tre_ast_new_literal(ctx->mem, tre_toupper(wc), tre_toupper(wc), ctx->position); in parse_atom()
927 tmp2 = tre_ast_new_literal(ctx->mem, tre_tolower(wc), tre_tolower(wc), ctx->position); in parse_atom()
929 node = tre_ast_new_union(ctx->mem, tmp1, tmp2); in parse_atom()
933 node = tre_ast_new_literal(ctx->mem, wc, wc, ctx->position); in parse_atom()
935 ctx->position++; in parse_atom()
942 ctx->n = node; in parse_atom()
943 ctx->s = s; in parse_atom()
960 int ere = ctx->cflags & REG_EXTENDED; in tre_parse()
961 const char *s = ctx->start; in tre_parse()
965 tre_stack_t *stack = ctx->stack; in tre_parse()
979 ctx->start = s; in tre_parse()
984 ctx->n = tre_ast_new_literal(ctx->mem, EMPTY, -1, -1); in tre_parse()
985 if (!ctx->n) in tre_parse()
991 s = ctx->s; in tre_parse()
1013 if (!ere && s==ctx->start+1 && s[-1]=='^') in tre_parse()
1026 max=-1; in tre_parse()
1034 ctx->n = tre_ast_new_literal(ctx->mem, EMPTY, -1, -1); in tre_parse()
1036 ctx->n = tre_ast_new_iter(ctx->mem, ctx->n, min, max, 0); in tre_parse()
1037 if (!ctx->n) in tre_parse()
1041 nbranch = tre_ast_new_catenation(ctx->mem, nbranch, ctx->n); in tre_parse()
1051 nunion = tre_ast_new_union(ctx->mem, nunion, nbranch); in tre_parse()
1056 ctx->start = s; in tre_parse()
1059 ctx->start = s; in tre_parse()
1066 depth--; in tre_parse()
1071 ctx->submatch_id = subid; in tre_parse()
1086 from tre-compile.c
1092 - Fix tre_ast_to_tnfa() to recurse using a stack instead of recursive
1101 /* Inserts a catenation node to the root of the tree given in `node'.
1112 c->left = tre_ast_new_literal(mem, TAG, tag_id, -1); in tre_add_tag_left()
1113 if (c->left == NULL) in tre_add_tag_left()
1115 c->right = tre_mem_alloc(mem, sizeof(tre_ast_node_t)); in tre_add_tag_left()
1116 if (c->right == NULL) in tre_add_tag_left()
1119 c->right->obj = node->obj; in tre_add_tag_left()
1120 c->right->type = node->type; in tre_add_tag_left()
1121 c->right->nullable = -1; in tre_add_tag_left()
1122 c->right->submatch_id = -1; in tre_add_tag_left()
1123 c->right->firstpos = NULL; in tre_add_tag_left()
1124 c->right->lastpos = NULL; in tre_add_tag_left()
1125 c->right->num_tags = 0; in tre_add_tag_left()
1126 c->right->num_submatches = 0; in tre_add_tag_left()
1127 node->obj = c; in tre_add_tag_left()
1128 node->type = CATENATION; in tre_add_tag_left()
1132 /* Inserts a catenation node to the root of the tree given in `node'.
1143 c->right = tre_ast_new_literal(mem, TAG, tag_id, -1); in tre_add_tag_right()
1144 if (c->right == NULL) in tre_add_tag_right()
1146 c->left = tre_mem_alloc(mem, sizeof(tre_ast_node_t)); in tre_add_tag_right()
1147 if (c->left == NULL) in tre_add_tag_right()
1150 c->left->obj = node->obj; in tre_add_tag_right()
1151 c->left->type = node->type; in tre_add_tag_right()
1152 c->left->nullable = -1; in tre_add_tag_right()
1153 c->left->submatch_id = -1; in tre_add_tag_right()
1154 c->left->firstpos = NULL; in tre_add_tag_right()
1155 c->left->lastpos = NULL; in tre_add_tag_right()
1156 c->left->num_tags = 0; in tre_add_tag_right()
1157 c->left->num_submatches = 0; in tre_add_tag_right()
1158 node->obj = c; in tre_add_tag_right()
1159 node->type = CATENATION; in tre_add_tag_right()
1192 tnfa->submatch_data[id].so_tag = tag; in tre_purge_regset()
1194 tnfa->submatch_data[id].eo_tag = tag; in tre_purge_regset()
1196 regset[0] = -1; in tre_purge_regset()
1200 /* Adds tags to appropriate locations in the parse tree in `tree', so that
1203 tre_add_tags(tre_mem_t mem, tre_stack_t *stack, tre_ast_node_t *tree, in tre_add_tags() argument
1208 tre_ast_node_t *node = tree; /* Tree node we are currently looking at. */ in tre_add_tags()
1219 int minimal_tag = -1; /* Tag that marks the beginning of a minimal match. */ in tre_add_tags()
1225 tnfa->end_tag = 0; in tre_add_tags()
1226 tnfa->minimal_tags[0] = -1; in tre_add_tags()
1229 regset = xmalloc(sizeof(*regset) * ((tnfa->num_submatches + 1) * 2)); in tre_add_tags()
1232 regset[0] = -1; in tre_add_tags()
1235 parents = xmalloc(sizeof(*parents) * (tnfa->num_submatches + 1)); in tre_add_tags()
1241 parents[0] = -1; in tre_add_tags()
1243 saved_states = xmalloc(sizeof(*saved_states) * (tnfa->num_submatches + 1)); in tre_add_tags()
1253 for (i = 0; i <= tnfa->num_submatches; i++) in tre_add_tags()
1254 saved_states[i].tag = -1; in tre_add_tags()
1277 regset[i + 1] = -1; in tre_add_tags()
1281 parents[i - 1] = -1; in tre_add_tags()
1288 if (node->submatch_id >= 0) in tre_add_tags()
1290 int id = node->submatch_id; in tre_add_tags()
1297 regset[i + 1] = -1; in tre_add_tags()
1302 tnfa->submatch_data[id].parents = NULL; in tre_add_tags()
1311 assert(tnfa->submatch_data[id].parents == NULL); in tre_add_tags()
1312 tnfa->submatch_data[id].parents = p; in tre_add_tags()
1315 p[i] = -1; in tre_add_tags()
1321 STACK_PUSHX(stack, int, node->submatch_id); in tre_add_tags()
1325 switch (node->type) in tre_add_tags()
1329 tre_literal_t *lit = node->obj; in tre_add_tags()
1341 tnfa->tag_directions[tag] = direction; in tre_add_tags()
1344 for (i = 0; tnfa->minimal_tags[i] >= 0; i++); in tre_add_tags()
1345 tnfa->minimal_tags[i] = tag; in tre_add_tags()
1346 tnfa->minimal_tags[i + 1] = minimal_tag; in tre_add_tags()
1347 tnfa->minimal_tags[i + 2] = -1; in tre_add_tags()
1348 minimal_tag = -1; in tre_add_tags()
1355 node->num_tags = 1; in tre_add_tags()
1358 regset[0] = -1; in tre_add_tags()
1372 tre_catenation_t *cat = node->obj; in tre_add_tags()
1373 tre_ast_node_t *left = cat->left; in tre_add_tags()
1374 tre_ast_node_t *right = cat->right; in tre_add_tags()
1375 int reserved_tag = -1; in tre_add_tags()
1387 STACK_PUSHX(stack, int, next_tag + left->num_tags); in tre_add_tags()
1388 if (left->num_tags > 0 && right->num_tags > 0) in tre_add_tags()
1405 tre_iteration_t *iter = node->obj; in tre_add_tags()
1409 STACK_PUSHX(stack, int, regset[0] >= 0 || iter->minimal); in tre_add_tags()
1414 STACK_PUSHX(stack, int, iter->minimal); in tre_add_tags()
1419 STACK_PUSHX(stack, voidptr, iter->arg); in tre_add_tags()
1423 if (regset[0] >= 0 || iter->minimal) in tre_add_tags()
1429 if (iter->minimal) in tre_add_tags()
1430 tnfa->tag_directions[tag] = TRE_TAG_MAXIMIZE; in tre_add_tags()
1432 tnfa->tag_directions[tag] = direction; in tre_add_tags()
1435 for (i = 0; tnfa->minimal_tags[i] >= 0; i++); in tre_add_tags()
1436 tnfa->minimal_tags[i] = tag; in tre_add_tags()
1437 tnfa->minimal_tags[i + 1] = minimal_tag; in tre_add_tags()
1438 tnfa->minimal_tags[i + 2] = -1; in tre_add_tags()
1439 minimal_tag = -1; in tre_add_tags()
1445 regset[0] = -1; in tre_add_tags()
1455 tre_union_t *uni = node->obj; in tre_add_tags()
1456 tre_ast_node_t *left = uni->left; in tre_add_tags()
1457 tre_ast_node_t *right = uni->right; in tre_add_tags()
1500 tnfa->tag_directions[tag] = direction; in tre_add_tags()
1503 for (i = 0; tnfa->minimal_tags[i] >= 0; i++); in tre_add_tags()
1504 tnfa->minimal_tags[i] = tag; in tre_add_tags()
1505 tnfa->minimal_tags[i + 1] = minimal_tag; in tre_add_tags()
1506 tnfa->minimal_tags[i + 2] = -1; in tre_add_tags()
1507 minimal_tag = -1; in tre_add_tags()
1513 regset[0] = -1; in tre_add_tags()
1519 if (node->num_submatches > 0) in tre_add_tags()
1531 if (node->submatch_id >= 0) in tre_add_tags()
1536 parents[i] = node->submatch_id; in tre_add_tags()
1537 parents[i + 1] = -1; in tre_add_tags()
1549 node->num_tags = ((tre_iteration_t *)node->obj)->arg->num_tags in tre_add_tags()
1551 minimal_tag = -1; in tre_add_tags()
1585 node->num_tags = ((tre_catenation_t *)node->obj)->left->num_tags in tre_add_tags()
1586 + ((tre_catenation_t *)node->obj)->right->num_tags; in tre_add_tags()
1607 node->num_tags = ((tre_union_t *)node->obj)->left->num_tags in tre_add_tags()
1608 + ((tre_union_t *)node->obj)->right->num_tags + added_tags in tre_add_tags()
1609 + ((node->num_submatches > 0) ? 2 : 0); in tre_add_tags()
1618 /* XXX - This is not always necessary (if the children have in tre_add_tags()
1620 /* XXX - Check if this is the only place where tre_add_tag_right in tre_add_tags()
1624 if (node->num_submatches > 0) in tre_add_tags()
1629 tnfa->tag_directions[tag_left] = TRE_TAG_MAXIMIZE; in tre_add_tags()
1632 tnfa->tag_directions[tag_right] = TRE_TAG_MAXIMIZE; in tre_add_tags()
1653 for (i = 0; tnfa->minimal_tags[i] >= 0; i++); in tre_add_tags()
1654 tnfa->minimal_tags[i] = tag; in tre_add_tags()
1655 tnfa->minimal_tags[i + 1] = minimal_tag; in tre_add_tags()
1656 tnfa->minimal_tags[i + 2] = -1; in tre_add_tags()
1657 minimal_tag = -1; in tre_add_tags()
1661 assert(tree->num_tags == num_tags); in tre_add_tags()
1662 tnfa->end_tag = num_tags; in tre_add_tags()
1663 tnfa->num_tags = num_tags; in tre_add_tags()
1664 tnfa->num_minimals = num_minimals; in tre_add_tags()
1715 switch (node->type) in tre_copy_ast()
1719 tre_literal_t *lit = node->obj; in tre_copy_ast()
1720 int pos = lit->position; in tre_copy_ast()
1721 int min = lit->code_min; in tre_copy_ast()
1722 int max = lit->code_max; in tre_copy_ast()
1725 /* XXX - e.g. [ab] has only one position but two in tre_copy_ast()
1735 max = pos = -1; in tre_copy_ast()
1748 tre_literal_t *p = (*result)->obj; in tre_copy_ast()
1749 p->class = lit->class; in tre_copy_ast()
1750 p->neg_classes = lit->neg_classes; in tre_copy_ast()
1759 tre_union_t *uni = node->obj; in tre_copy_ast()
1761 *result = tre_ast_new_union(mem, uni->left, uni->right); in tre_copy_ast()
1767 tmp = (*result)->obj; in tre_copy_ast()
1768 result = &tmp->left; in tre_copy_ast()
1769 STACK_PUSHX(stack, voidptr, uni->right); in tre_copy_ast()
1771 STACK_PUSHX(stack, voidptr, &tmp->right); in tre_copy_ast()
1773 STACK_PUSHX(stack, voidptr, uni->left); in tre_copy_ast()
1779 tre_catenation_t *cat = node->obj; in tre_copy_ast()
1781 *result = tre_ast_new_catenation(mem, cat->left, cat->right); in tre_copy_ast()
1787 tmp = (*result)->obj; in tre_copy_ast()
1788 tmp->left = NULL; in tre_copy_ast()
1789 tmp->right = NULL; in tre_copy_ast()
1790 result = &tmp->left; in tre_copy_ast()
1792 STACK_PUSHX(stack, voidptr, cat->right); in tre_copy_ast()
1794 STACK_PUSHX(stack, voidptr, &tmp->right); in tre_copy_ast()
1796 STACK_PUSHX(stack, voidptr, cat->left); in tre_copy_ast()
1802 tre_iteration_t *iter = node->obj; in tre_copy_ast()
1803 STACK_PUSHX(stack, voidptr, iter->arg); in tre_copy_ast()
1805 *result = tre_ast_new_iter(mem, iter->arg, iter->min, in tre_copy_ast()
1806 iter->max, iter->minimal); in tre_copy_ast()
1812 iter = (*result)->obj; in tre_copy_ast()
1813 result = &iter->arg; in tre_copy_ast()
1860 switch (node->type) in tre_expand_ast()
1864 tre_literal_t *lit= node->obj; in tre_expand_ast()
1867 lit->position += pos_add; in tre_expand_ast()
1868 if (lit->position > max_pos) in tre_expand_ast()
1869 max_pos = lit->position; in tre_expand_ast()
1875 tre_union_t *uni = node->obj; in tre_expand_ast()
1876 STACK_PUSHX(stack, voidptr, uni->right); in tre_expand_ast()
1878 STACK_PUSHX(stack, voidptr, uni->left); in tre_expand_ast()
1884 tre_catenation_t *cat = node->obj; in tre_expand_ast()
1885 STACK_PUSHX(stack, voidptr, cat->right); in tre_expand_ast()
1887 STACK_PUSHX(stack, voidptr, cat->left); in tre_expand_ast()
1893 tre_iteration_t *iter = node->obj; in tre_expand_ast()
1897 STACK_PUSHX(stack, voidptr, iter->arg); in tre_expand_ast()
1902 if (iter->min > 1 || iter->max > 1) in tre_expand_ast()
1914 tre_iteration_t *iter = node->obj; in tre_expand_ast()
1918 if (iter->min > 1 || iter->max > 1) in tre_expand_ast()
1925 for (j = 0; j < iter->min; j++) in tre_expand_ast()
1929 int flags = ((j + 1 < iter->min) in tre_expand_ast()
1933 status = tre_copy_ast(mem, stack, iter->arg, flags, in tre_expand_ast()
1946 if (iter->max == -1) in tre_expand_ast()
1950 status = tre_copy_ast(mem, stack, iter->arg, 0, in tre_expand_ast()
1954 seq2 = tre_ast_new_iter(mem, seq2, 0, -1, 0); in tre_expand_ast()
1960 for (j = iter->min; j < iter->max; j++) in tre_expand_ast()
1964 status = tre_copy_ast(mem, stack, iter->arg, 0, in tre_expand_ast()
1974 tmp = tre_ast_new_literal(mem, EMPTY, -1, -1); in tre_expand_ast()
1990 node->obj = seq1->obj; in tre_expand_ast()
1991 node->type = seq1->type; in tre_expand_ast()
1994 iter_depth--; in tre_expand_ast()
1995 pos_add_total += pos_add - pos_add_last; in tre_expand_ast()
2028 new_set[0].position = -1; in tre_set_empty()
2029 new_set[0].code_min = -1; in tre_set_empty()
2030 new_set[0].code_max = -1; in tre_set_empty()
2051 new_set[1].position = -1; in tre_set_one()
2052 new_set[1].code_min = -1; in tre_set_one()
2053 new_set[1].code_max = -1; in tre_set_one()
2096 new_tags[j + i] = -1; in tre_set_union()
2106 /* XXX - why not | assertions here as well? */ in tre_set_union()
2121 new_tags[j] = -1; in tre_set_union()
2125 new_set[s1 + s2].position = -1; in tre_set_union()
2149 /* Walk through the tree recursively. */ in tre_match_empty()
2154 switch (node->type) in tre_match_empty()
2157 lit = (tre_literal_t *)node->obj; in tre_match_empty()
2158 switch (lit->code_min) in tre_match_empty()
2161 if (lit->code_max >= 0) in tre_match_empty()
2167 if (tags[i] == lit->code_max) in tre_match_empty()
2171 tags[i] = lit->code_max; in tre_match_empty()
2172 tags[i + 1] = -1; in tre_match_empty()
2180 assert(lit->code_max >= 1 in tre_match_empty()
2181 || lit->code_max <= ASSERT_LAST); in tre_match_empty()
2183 *assertions |= lit->code_max; in tre_match_empty()
2197 uni = (tre_union_t *)node->obj; in tre_match_empty()
2198 if (uni->left->nullable) in tre_match_empty()
2199 STACK_PUSHX(stack, voidptr, uni->left) in tre_match_empty()
2200 else if (uni->right->nullable) in tre_match_empty()
2201 STACK_PUSHX(stack, voidptr, uni->right) in tre_match_empty()
2208 cat = (tre_catenation_t *)node->obj; in tre_match_empty()
2209 assert(cat->left->nullable); in tre_match_empty()
2210 assert(cat->right->nullable); in tre_match_empty()
2211 STACK_PUSHX(stack, voidptr, cat->left); in tre_match_empty()
2212 STACK_PUSHX(stack, voidptr, cat->right); in tre_match_empty()
2218 iter = (tre_iteration_t *)node->obj; in tre_match_empty()
2219 if (iter->arg->nullable) in tre_match_empty()
2220 STACK_PUSHX(stack, voidptr, iter->arg); in tre_match_empty()
2242 the nodes of the AST `tree'. */
2244 tre_compute_nfl(tre_mem_t mem, tre_stack_t *stack, tre_ast_node_t *tree) in tre_compute_nfl() argument
2248 STACK_PUSHR(stack, voidptr, tree); in tre_compute_nfl()
2261 switch (node->type) in tre_compute_nfl()
2265 tre_literal_t *lit = (tre_literal_t *)node->obj; in tre_compute_nfl()
2270 node->nullable = 0; in tre_compute_nfl()
2271 node->firstpos = tre_set_one(mem, lit->position, 0, in tre_compute_nfl()
2272 TRE_CHAR_MAX, 0, NULL, -1); in tre_compute_nfl()
2273 if (!node->firstpos) in tre_compute_nfl()
2275 node->lastpos = tre_set_one(mem, lit->position, 0, in tre_compute_nfl()
2277 (int)lit->code_max); in tre_compute_nfl()
2278 if (!node->lastpos) in tre_compute_nfl()
2281 else if (lit->code_min < 0) in tre_compute_nfl()
2285 node->nullable = 1; in tre_compute_nfl()
2286 node->firstpos = tre_set_empty(mem); in tre_compute_nfl()
2287 if (!node->firstpos) in tre_compute_nfl()
2289 node->lastpos = tre_set_empty(mem); in tre_compute_nfl()
2290 if (!node->lastpos) in tre_compute_nfl()
2297 node->nullable = 0; in tre_compute_nfl()
2298 node->firstpos = in tre_compute_nfl()
2299 tre_set_one(mem, lit->position, (int)lit->code_min, in tre_compute_nfl()
2300 (int)lit->code_max, 0, NULL, -1); in tre_compute_nfl()
2301 if (!node->firstpos) in tre_compute_nfl()
2303 node->lastpos = tre_set_one(mem, lit->position, in tre_compute_nfl()
2304 (int)lit->code_min, in tre_compute_nfl()
2305 (int)lit->code_max, in tre_compute_nfl()
2306 lit->class, lit->neg_classes, in tre_compute_nfl()
2307 -1); in tre_compute_nfl()
2308 if (!node->lastpos) in tre_compute_nfl()
2319 STACK_PUSHR(stack, voidptr, ((tre_union_t *)node->obj)->right); in tre_compute_nfl()
2321 STACK_PUSHR(stack, voidptr, ((tre_union_t *)node->obj)->left); in tre_compute_nfl()
2330 STACK_PUSHR(stack, voidptr, ((tre_catenation_t *)node->obj)->right); in tre_compute_nfl()
2332 STACK_PUSHR(stack, voidptr, ((tre_catenation_t *)node->obj)->left); in tre_compute_nfl()
2341 STACK_PUSHR(stack, voidptr, ((tre_iteration_t *)node->obj)->arg); in tre_compute_nfl()
2349 tre_union_t *uni = (tre_union_t *)node->obj; in tre_compute_nfl()
2350 node->nullable = uni->left->nullable || uni->right->nullable; in tre_compute_nfl()
2351 node->firstpos = tre_set_union(mem, uni->left->firstpos, in tre_compute_nfl()
2352 uni->right->firstpos, NULL, 0); in tre_compute_nfl()
2353 if (!node->firstpos) in tre_compute_nfl()
2355 node->lastpos = tre_set_union(mem, uni->left->lastpos, in tre_compute_nfl()
2356 uni->right->lastpos, NULL, 0); in tre_compute_nfl()
2357 if (!node->lastpos) in tre_compute_nfl()
2364 tre_iteration_t *iter = (tre_iteration_t *)node->obj; in tre_compute_nfl()
2366 if (iter->min == 0 || iter->arg->nullable) in tre_compute_nfl()
2367 node->nullable = 1; in tre_compute_nfl()
2369 node->nullable = 0; in tre_compute_nfl()
2370 node->firstpos = iter->arg->firstpos; in tre_compute_nfl()
2371 node->lastpos = iter->arg->lastpos; in tre_compute_nfl()
2379 tre_catenation_t *cat = node->obj; in tre_compute_nfl()
2380 node->nullable = cat->left->nullable && cat->right->nullable; in tre_compute_nfl()
2383 if (cat->left->nullable) in tre_compute_nfl()
2388 status = tre_match_empty(stack, cat->left, in tre_compute_nfl()
2396 tags[0] = -1; in tre_compute_nfl()
2400 status = tre_match_empty(stack, cat->left, tags, in tre_compute_nfl()
2407 node->firstpos = in tre_compute_nfl()
2408 tre_set_union(mem, cat->right->firstpos, cat->left->firstpos, in tre_compute_nfl()
2411 if (!node->firstpos) in tre_compute_nfl()
2416 node->firstpos = cat->left->firstpos; in tre_compute_nfl()
2420 if (cat->right->nullable) in tre_compute_nfl()
2425 status = tre_match_empty(stack, cat->right, in tre_compute_nfl()
2433 tags[0] = -1; in tre_compute_nfl()
2437 status = tre_match_empty(stack, cat->right, tags, in tre_compute_nfl()
2444 node->lastpos = in tre_compute_nfl()
2445 tre_set_union(mem, cat->left->lastpos, cat->right->lastpos, in tre_compute_nfl()
2448 if (!node->lastpos) in tre_compute_nfl()
2453 node->lastpos = cat->right->lastpos; in tre_compute_nfl()
2479 while (p1->position >= 0) in tre_make_trans()
2482 prev_p2_pos = -1; in tre_make_trans()
2483 while (p2->position >= 0) in tre_make_trans()
2486 if (p2->position == prev_p2_pos) in tre_make_trans()
2491 prev_p2_pos = p2->position; in tre_make_trans()
2493 position `p1->position'. */ in tre_make_trans()
2494 trans = transitions + offs[p1->position]; in tre_make_trans()
2495 while (trans->state != NULL) in tre_make_trans()
2498 /* If we find a previous transition from `p1->position' to in tre_make_trans()
2499 `p2->position', it is overwritten. This can happen only in tre_make_trans()
2505 /* XXX - The same position is used for all nodes in a bracket in tre_make_trans()
2509 if (trans->state_id == p2->position) in tre_make_trans()
2517 if (trans->state == NULL) in tre_make_trans()
2518 (trans + 1)->state = NULL; in tre_make_trans()
2521 trans->code_min = p1->code_min; in tre_make_trans()
2522 trans->code_max = p1->code_max; in tre_make_trans()
2523 trans->state = transitions + offs[p2->position]; in tre_make_trans()
2524 trans->state_id = p2->position; in tre_make_trans()
2525 trans->assertions = p1->assertions | p2->assertions in tre_make_trans()
2526 | (p1->class ? ASSERT_CHAR_CLASS : 0) in tre_make_trans()
2527 | (p1->neg_classes != NULL ? ASSERT_CHAR_CLASS_NEG : 0); in tre_make_trans()
2528 if (p1->backref >= 0) in tre_make_trans()
2530 assert((trans->assertions & ASSERT_CHAR_CLASS) == 0); in tre_make_trans()
2531 assert(p2->backref < 0); in tre_make_trans()
2532 trans->u.backref = p1->backref; in tre_make_trans()
2533 trans->assertions |= ASSERT_BACKREF; in tre_make_trans()
2536 trans->u.class = p1->class; in tre_make_trans()
2537 if (p1->neg_classes != NULL) in tre_make_trans()
2539 for (i = 0; p1->neg_classes[i] != (tre_ctype_t)0; i++); in tre_make_trans()
2540 trans->neg_classes = in tre_make_trans()
2541 xmalloc(sizeof(*trans->neg_classes) * (i + 1)); in tre_make_trans()
2542 if (trans->neg_classes == NULL) in tre_make_trans()
2544 for (i = 0; p1->neg_classes[i] != (tre_ctype_t)0; i++) in tre_make_trans()
2545 trans->neg_classes[i] = p1->neg_classes[i]; in tre_make_trans()
2546 trans->neg_classes[i] = (tre_ctype_t)0; in tre_make_trans()
2549 trans->neg_classes = NULL; in tre_make_trans()
2553 if (p1->tags != NULL) in tre_make_trans()
2554 while(p1->tags[i] >= 0) in tre_make_trans()
2557 if (p2->tags != NULL) in tre_make_trans()
2558 while(p2->tags[j] >= 0) in tre_make_trans()
2562 if (trans->tags != NULL) in tre_make_trans()
2563 xfree(trans->tags); in tre_make_trans()
2564 trans->tags = NULL; in tre_make_trans()
2569 trans->tags = xmalloc(sizeof(*trans->tags) * (i + j + 1)); in tre_make_trans()
2570 if (!trans->tags) in tre_make_trans()
2573 if (p1->tags != NULL) in tre_make_trans()
2574 while(p1->tags[i] >= 0) in tre_make_trans()
2576 trans->tags[i] = p1->tags[i]; in tre_make_trans()
2581 if (p2->tags != NULL) in tre_make_trans()
2582 while (p2->tags[j] >= 0) in tre_make_trans()
2587 if (trans->tags[k] == p2->tags[j]) in tre_make_trans()
2593 trans->tags[l++] = p2->tags[j]; in tre_make_trans()
2596 trans->tags[l] = -1; in tre_make_trans()
2606 while (p1->position >= 0) in tre_make_trans()
2609 while (p2->position >= 0) in tre_make_trans()
2611 counts[p1->position]++; in tre_make_trans()
2619 /* Converts the syntax tree to a TNFA. All the transitions in the TNFA are
2632 /* XXX - recurse using a stack!. */ in tre_ast_to_tnfa()
2633 switch (node->type) in tre_ast_to_tnfa()
2638 uni = (tre_union_t *)node->obj; in tre_ast_to_tnfa()
2639 errcode = tre_ast_to_tnfa(uni->left, transitions, counts, offs); in tre_ast_to_tnfa()
2642 errcode = tre_ast_to_tnfa(uni->right, transitions, counts, offs); in tre_ast_to_tnfa()
2646 cat = (tre_catenation_t *)node->obj; in tre_ast_to_tnfa()
2647 /* Add a transition from each position in cat->left->lastpos in tre_ast_to_tnfa()
2648 to each position in cat->right->firstpos. */ in tre_ast_to_tnfa()
2649 errcode = tre_make_trans(cat->left->lastpos, cat->right->firstpos, in tre_ast_to_tnfa()
2653 errcode = tre_ast_to_tnfa(cat->left, transitions, counts, offs); in tre_ast_to_tnfa()
2656 errcode = tre_ast_to_tnfa(cat->right, transitions, counts, offs); in tre_ast_to_tnfa()
2660 iter = (tre_iteration_t *)node->obj; in tre_ast_to_tnfa()
2661 assert(iter->max == -1 || iter->max == 1); in tre_ast_to_tnfa()
2663 if (iter->max == -1) in tre_ast_to_tnfa()
2665 assert(iter->min == 0 || iter->min == 1); in tre_ast_to_tnfa()
2668 errcode = tre_make_trans(iter->arg->lastpos, iter->arg->firstpos, in tre_ast_to_tnfa()
2673 errcode = tre_ast_to_tnfa(iter->arg, transitions, counts, offs); in tre_ast_to_tnfa()
2694 tre_ast_node_t *tree, *tmp_ast_l, *tmp_ast_r; in regcomp() local
2727 parse_ctx.max_backref = -1; in regcomp()
2731 preg->re_nsub = parse_ctx.submatch_id - 1; in regcomp()
2732 tree = parse_ctx.n; in regcomp()
2735 tre_ast_print(tree); in regcomp()
2739 if (parse_ctx.max_backref > (int)preg->re_nsub) in regcomp()
2746 tnfa->have_backrefs = parse_ctx.max_backref >= 0; in regcomp()
2747 tnfa->have_approx = 0; in regcomp()
2748 tnfa->num_submatches = parse_ctx.submatch_id; in regcomp()
2752 if (tnfa->have_backrefs || !(cflags & REG_NOSUB)) in regcomp()
2756 errcode = tre_add_tags(NULL, stack, tree, tnfa); in regcomp()
2760 if (tnfa->num_tags > 0) in regcomp()
2763 * (tnfa->num_tags + 1)); in regcomp()
2766 tnfa->tag_directions = tag_directions; in regcomp()
2767 memset(tag_directions, -1, in regcomp()
2768 sizeof(*tag_directions) * (tnfa->num_tags + 1)); in regcomp()
2770 tnfa->minimal_tags = xcalloc((unsigned)tnfa->num_tags * 2 + 1, in regcomp()
2771 sizeof(*tnfa->minimal_tags)); in regcomp()
2772 if (tnfa->minimal_tags == NULL) in regcomp()
2779 tnfa->submatch_data = submatch_data; in regcomp()
2781 errcode = tre_add_tags(mem, stack, tree, tnfa); in regcomp()
2788 errcode = tre_expand_ast(mem, stack, tree, &parse_ctx.position, in regcomp()
2794 XXX - For certain patterns this dummy node can be optimized away, in regcomp()
2797 tmp_ast_l = tree; in regcomp()
2802 tree = tre_ast_new_catenation(mem, tmp_ast_l, tmp_ast_r); in regcomp()
2803 if (tree == NULL) in regcomp()
2806 errcode = tre_compute_nfl(mem, stack, tree); in regcomp()
2820 tre_ast_to_tnfa(tree, NULL, counts, NULL); in regcomp()
2832 tnfa->transitions = transitions; in regcomp()
2833 tnfa->num_transitions = add; in regcomp()
2835 errcode = tre_ast_to_tnfa(tree, transitions, counts, offs); in regcomp()
2839 tnfa->firstpos_chars = NULL; in regcomp()
2841 p = tree->firstpos; in regcomp()
2843 while (p->position >= 0) in regcomp()
2852 tnfa->initial = initial; in regcomp()
2855 for (p = tree->firstpos; p->position >= 0; p++) in regcomp()
2857 initial[i].state = transitions + offs[p->position]; in regcomp()
2858 initial[i].state_id = p->position; in regcomp()
2860 /* Copy the arrays p->tags, and p->params, they are allocated in regcomp()
2862 if (p->tags) in regcomp()
2865 for (j = 0; p->tags[j] >= 0; j++); in regcomp()
2866 initial[i].tags = xmalloc(sizeof(*p->tags) * (j + 1)); in regcomp()
2869 memcpy(initial[i].tags, p->tags, sizeof(*p->tags) * (j + 1)); in regcomp()
2871 initial[i].assertions = p->assertions; in regcomp()
2876 tnfa->num_transitions = add; in regcomp()
2877 tnfa->final = transitions + offs[tree->lastpos[0].position]; in regcomp()
2878 tnfa->num_states = parse_ctx.position; in regcomp()
2879 tnfa->cflags = cflags; in regcomp()
2886 preg->TRE_REGEX_T_FIELD = (void *)tnfa; in regcomp()
2898 preg->TRE_REGEX_T_FIELD = (void *)tnfa; in regcomp()
2913 tnfa = (void *)preg->TRE_REGEX_T_FIELD; in regfree()
2917 for (i = 0; i < tnfa->num_transitions; i++) in regfree()
2918 if (tnfa->transitions[i].state) in regfree()
2920 if (tnfa->transitions[i].tags) in regfree()
2921 xfree(tnfa->transitions[i].tags); in regfree()
2922 if (tnfa->transitions[i].neg_classes) in regfree()
2923 xfree(tnfa->transitions[i].neg_classes); in regfree()
2925 if (tnfa->transitions) in regfree()
2926 xfree(tnfa->transitions); in regfree()
2928 if (tnfa->initial) in regfree()
2930 for (trans = tnfa->initial; trans->state; trans++) in regfree()
2932 if (trans->tags) in regfree()
2933 xfree(trans->tags); in regfree()
2935 xfree(tnfa->initial); in regfree()
2938 if (tnfa->submatch_data) in regfree()
2940 for (i = 0; i < tnfa->num_submatches; i++) in regfree()
2941 if (tnfa->submatch_data[i].parents) in regfree()
2942 xfree(tnfa->submatch_data[i].parents); in regfree()
2943 xfree(tnfa->submatch_data); in regfree()
2946 if (tnfa->tag_directions) in regfree()
2947 xfree(tnfa->tag_directions); in regfree()
2948 if (tnfa->firstpos_chars) in regfree()
2949 xfree(tnfa->firstpos_chars); in regfree()
2950 if (tnfa->minimal_tags) in regfree()
2951 xfree(tnfa->minimal_tags); in regfree()