1 #ifndef Py_TOKENIZER_H 2 #define Py_TOKENIZER_H 3 #ifdef __cplusplus 4 extern "C" { 5 #endif 6 7 #include "object.h" 8 9 /* Tokenizer interface */ 10 11 #include "token.h" /* For token types */ 12 13 #define MAXINDENT 100 /* Max indentation level */ 14 #define MAXLEVEL 200 /* Max parentheses level */ 15 16 enum decoding_state { 17 STATE_INIT, 18 STATE_RAW, 19 STATE_NORMAL /* have a codec associated with input */ 20 }; 21 22 /* Tokenizer state */ 23 struct tok_state { 24 /* Input state; buf <= cur <= inp <= end */ 25 /* NB an entire line is held in the buffer */ 26 char *buf; /* Input buffer, or NULL; malloc'ed if fp != NULL */ 27 char *cur; /* Next character in buffer */ 28 char *inp; /* End of data in buffer */ 29 const char *end; /* End of input buffer if buf != NULL */ 30 const char *start; /* Start of current token if not NULL */ 31 int done; /* E_OK normally, E_EOF at EOF, otherwise error code */ 32 /* NB If done != E_OK, cur must be == inp!!! */ 33 FILE *fp; /* Rest of input; NULL if tokenizing a string */ 34 int tabsize; /* Tab spacing */ 35 int indent; /* Current indentation index */ 36 int indstack[MAXINDENT]; /* Stack of indents */ 37 int atbol; /* Nonzero if at begin of new line */ 38 int pendin; /* Pending indents (if > 0) or dedents (if < 0) */ 39 const char *prompt, *nextprompt; /* For interactive prompting */ 40 int lineno; /* Current line number */ 41 int first_lineno; /* First line of a single line or multi line string 42 expression (cf. issue 16806) */ 43 int level; /* () [] {} Parentheses nesting level */ 44 /* Used to allow free continuations inside them */ 45 char parenstack[MAXLEVEL]; 46 int parenlinenostack[MAXLEVEL]; 47 PyObject *filename; 48 /* Stuff for checking on different tab sizes */ 49 int altindstack[MAXINDENT]; /* Stack of alternate indents */ 50 /* Stuff for PEP 0263 */ 51 enum decoding_state decoding_state; 52 int decoding_erred; /* whether erred in decoding */ 53 int read_coding_spec; /* whether 'coding:...' has been read */ 54 char *encoding; /* Source encoding. */ 55 int cont_line; /* whether we are in a continuation line. */ 56 const char* line_start; /* pointer to start of current line */ 57 const char* multi_line_start; /* pointer to start of first line of 58 a single line or multi line string 59 expression (cf. issue 16806) */ 60 PyObject *decoding_readline; /* open(...).readline */ 61 PyObject *decoding_buffer; 62 const char* enc; /* Encoding for the current str. */ 63 char* str; 64 char* input; /* Tokenizer's newline translated copy of the string. */ 65 66 int type_comments; /* Whether to look for type comments */ 67 68 /* async/await related fields (still needed depending on feature_version) */ 69 int async_hacks; /* =1 if async/await aren't always keywords */ 70 int async_def; /* =1 if tokens are inside an 'async def' body. */ 71 int async_def_indent; /* Indentation level of the outermost 'async def'. */ 72 int async_def_nl; /* =1 if the outermost 'async def' had at least one 73 NEWLINE token after it. */ 74 }; 75 76 extern struct tok_state *PyTokenizer_FromString(const char *, int); 77 extern struct tok_state *PyTokenizer_FromUTF8(const char *, int); 78 extern struct tok_state *PyTokenizer_FromFile(FILE *, const char*, 79 const char *, const char *); 80 extern void PyTokenizer_Free(struct tok_state *); 81 extern int PyTokenizer_Get(struct tok_state *, const char **, const char **); 82 83 #define tok_dump _Py_tok_dump 84 85 #ifdef __cplusplus 86 } 87 #endif 88 #endif /* !Py_TOKENIZER_H */ 89