• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Secret Labs' Regular Expression Engine
3  *
4  * regular expression matching engine
5  *
6  * Copyright (c) 1997-2001 by Secret Labs AB.  All rights reserved.
7  *
8  * See the _sre.c file for information on usage and redistribution.
9  */
10 
11 #ifndef SRE_INCLUDED
12 #define SRE_INCLUDED
13 
14 #include "sre_constants.h"
15 
16 /* size of a code word (must be unsigned short or larger, and
17    large enough to hold a UCS4 character) */
18 #ifdef Py_USING_UNICODE
19 # define SRE_CODE Py_UCS4
20 # if SIZEOF_SIZE_T > 4
21 #  define SRE_MAXREPEAT (~(SRE_CODE)0)
22 # else
23 #  define SRE_MAXREPEAT ((SRE_CODE)PY_SSIZE_T_MAX)
24 # endif
25 #else
26 # define SRE_CODE unsigned int
27 # if SIZEOF_SIZE_T > SIZEOF_INT
28 #  define SRE_MAXREPEAT (~(SRE_CODE)0)
29 # else
30 #  define SRE_MAXREPEAT ((SRE_CODE)PY_SSIZE_T_MAX)
31 # endif
32 #endif
33 
34 typedef struct {
35     PyObject_VAR_HEAD
36     Py_ssize_t groups; /* must be first! */
37     PyObject* groupindex;
38     PyObject* indexgroup;
39     /* compatibility */
40     PyObject* pattern; /* pattern source (or None) */
41     int flags; /* flags used when compiling pattern source */
42     PyObject *weakreflist; /* List of weak references */
43     /* pattern code */
44     Py_ssize_t codesize;
45     SRE_CODE code[1];
46 } PatternObject;
47 
48 #define PatternObject_GetCode(o) (((PatternObject*)(o))->code)
49 
50 typedef struct {
51     PyObject_VAR_HEAD
52     PyObject* string; /* link to the target string (must be first) */
53     PyObject* regs; /* cached list of matching spans */
54     PatternObject* pattern; /* link to the regex (pattern) object */
55     Py_ssize_t pos, endpos; /* current target slice */
56     Py_ssize_t lastindex; /* last index marker seen by the engine (-1 if none) */
57     Py_ssize_t groups; /* number of groups (start/end marks) */
58     Py_ssize_t mark[1];
59 } MatchObject;
60 
61 typedef unsigned int (*SRE_TOLOWER_HOOK)(unsigned int ch);
62 
63 /* FIXME: <fl> shouldn't be a constant, really... */
64 #define SRE_MARK_SIZE 200
65 
66 typedef struct SRE_REPEAT_T {
67     Py_ssize_t count;
68     SRE_CODE* pattern; /* points to REPEAT operator arguments */
69     void* last_ptr; /* helper to check for infinite loops */
70     struct SRE_REPEAT_T *prev; /* points to previous repeat context */
71 } SRE_REPEAT;
72 
73 typedef struct {
74     /* string pointers */
75     void* ptr; /* current position (also end of current slice) */
76     void* beginning; /* start of original string */
77     void* start; /* start of current slice */
78     void* end; /* end of original string */
79     /* attributes for the match object */
80     PyObject* string;
81     Py_ssize_t pos, endpos;
82     /* character size */
83     int charsize;
84     /* registers */
85     Py_ssize_t lastindex;
86     Py_ssize_t lastmark;
87     void* mark[SRE_MARK_SIZE];
88     /* dynamically allocated stuff */
89     char* data_stack;
90     size_t data_stack_size;
91     size_t data_stack_base;
92     /* current repeat context */
93     SRE_REPEAT *repeat;
94     /* hooks */
95     SRE_TOLOWER_HOOK lower;
96 } SRE_STATE;
97 
98 typedef struct {
99     PyObject_HEAD
100     PyObject* pattern;
101     SRE_STATE state;
102 } ScannerObject;
103 
104 #endif
105