• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Secret Labs' Regular Expression Engine
3  *
4  * regular expression matching engine
5  *
6  * Copyright (c) 1997-2001 by Secret Labs AB.  All rights reserved.
7  *
8  * See the sre.c file for information on usage and redistribution.
9  */
10 
11 #ifndef SRE_INCLUDED
12 #define SRE_INCLUDED
13 
14 #include "sre_constants.h"
15 
16 /* size of a code word (must be unsigned short or larger, and
17    large enough to hold a UCS4 character) */
18 #define SRE_CODE Py_UCS4
19 #if SIZEOF_SIZE_T > 4
20 # define SRE_MAXREPEAT (~(SRE_CODE)0)
21 # define SRE_MAXGROUPS ((SRE_CODE)INT32_MAX / 2)
22 #else
23 # define SRE_MAXREPEAT ((SRE_CODE)PY_SSIZE_T_MAX)
24 # define SRE_MAXGROUPS ((SRE_CODE)PY_SSIZE_T_MAX / SIZEOF_VOID_P / 2)
25 #endif
26 
27 typedef struct {
28     PyObject_VAR_HEAD
29     Py_ssize_t groups; /* must be first! */
30     PyObject* groupindex; /* dict */
31     PyObject* indexgroup; /* tuple */
32     /* compatibility */
33     PyObject* pattern; /* pattern source (or None) */
34     int flags; /* flags used when compiling pattern source */
35     PyObject *weakreflist; /* List of weak references */
36     int isbytes; /* pattern type (1 - bytes, 0 - string, -1 - None) */
37 #ifdef Py_DEBUG
38     /* for simulation of user interruption */
39     int fail_after_count;
40     PyObject *fail_after_exc;
41 #endif
42     /* pattern code */
43     Py_ssize_t codesize;
44     SRE_CODE code[1];
45 } PatternObject;
46 
47 #define PatternObject_GetCode(o) (((PatternObject*)(o))->code)
48 
49 typedef struct {
50     PyObject_VAR_HEAD
51     PyObject* string; /* link to the target string (must be first) */
52     PyObject* regs; /* cached list of matching spans */
53     PatternObject* pattern; /* link to the regex (pattern) object */
54     Py_ssize_t pos, endpos; /* current target slice */
55     Py_ssize_t lastindex; /* last index marker seen by the engine (-1 if none) */
56     Py_ssize_t groups; /* number of groups (start/end marks) */
57     Py_ssize_t mark[1];
58 } MatchObject;
59 
60 typedef struct {
61     PyObject_VAR_HEAD
62     Py_ssize_t chunks;  /* the number of group references and non-NULL literals
63                          * self->chunks <= 2*Py_SIZE(self) + 1 */
64     PyObject *literal;
65     struct {
66         Py_ssize_t index;
67         PyObject *literal;  /* NULL if empty */
68     } items[0];
69 } TemplateObject;
70 
71 typedef struct SRE_REPEAT_T {
72     Py_ssize_t count;
73     const SRE_CODE* pattern; /* points to REPEAT operator arguments */
74     const void* last_ptr; /* helper to check for infinite loops */
75     struct SRE_REPEAT_T *prev; /* points to previous repeat context */
76     /* for SRE_REPEAT pool */
77     struct SRE_REPEAT_T *pool_prev;
78     struct SRE_REPEAT_T *pool_next;
79 } SRE_REPEAT;
80 
81 typedef struct {
82     /* string pointers */
83     const void* ptr; /* current position (also end of current slice) */
84     const void* beginning; /* start of original string */
85     const void* start; /* start of current slice */
86     const void* end; /* end of original string */
87     /* attributes for the match object */
88     PyObject* string;
89     Py_buffer buffer;
90     Py_ssize_t pos, endpos;
91     int isbytes;
92     int charsize; /* character size */
93     int match_all;
94     int must_advance;
95     int debug;
96     /* marks */
97     int lastmark;
98     int lastindex;
99     const void** mark;
100     /* dynamically allocated stuff */
101     char* data_stack;
102     size_t data_stack_size;
103     size_t data_stack_base;
104     /* current repeat context */
105     SRE_REPEAT *repeat;
106     /* SRE_REPEAT pool */
107     SRE_REPEAT *repeat_pool_used;
108     SRE_REPEAT *repeat_pool_unused;
109     unsigned int sigcount;
110 #ifdef Py_DEBUG
111     int fail_after_count;
112     PyObject *fail_after_exc;
113 #endif
114 } SRE_STATE;
115 
116 typedef struct {
117     PyObject_HEAD
118     PatternObject* pattern;
119     SRE_STATE state;
120     int executing;
121 } ScannerObject;
122 
123 #endif
124