1 #ifndef Py_TOKENIZER_H 2 #define Py_TOKENIZER_H 3 #ifdef __cplusplus 4 extern "C" { 5 #endif 6 7 #include "object.h" 8 9 /* Tokenizer interface */ 10 11 #include "token.h" /* For token types */ 12 13 #define MAXINDENT 100 /* Max indentation level */ 14 #define MAXLEVEL 200 /* Max parentheses level */ 15 16 enum decoding_state { 17 STATE_INIT, 18 STATE_SEEK_CODING, 19 STATE_NORMAL 20 }; 21 22 enum interactive_underflow_t { 23 /* Normal mode of operation: return a new token when asked in interactie mode */ 24 IUNDERFLOW_NORMAL, 25 /* Forcefully return ENDMARKER when asked for a new token in interactive mode. This 26 * can be used to prevent the tokenizer to prompt the user for new tokens */ 27 IUNDERFLOW_STOP, 28 }; 29 30 /* Tokenizer state */ 31 struct tok_state { 32 /* Input state; buf <= cur <= inp <= end */ 33 /* NB an entire line is held in the buffer */ 34 char *buf; /* Input buffer, or NULL; malloc'ed if fp != NULL */ 35 char *cur; /* Next character in buffer */ 36 char *inp; /* End of data in buffer */ 37 int fp_interactive; /* If the file descriptor is interactive */ 38 char *interactive_src_start; /* The start of the source parsed so far in interactive mode */ 39 char *interactive_src_end; /* The end of the source parsed so far in interactive mode */ 40 const char *end; /* End of input buffer if buf != NULL */ 41 const char *start; /* Start of current token if not NULL */ 42 int done; /* E_OK normally, E_EOF at EOF, otherwise error code */ 43 /* NB If done != E_OK, cur must be == inp!!! */ 44 FILE *fp; /* Rest of input; NULL if tokenizing a string */ 45 int tabsize; /* Tab spacing */ 46 int indent; /* Current indentation index */ 47 int indstack[MAXINDENT]; /* Stack of indents */ 48 int atbol; /* Nonzero if at begin of new line */ 49 int pendin; /* Pending indents (if > 0) or dedents (if < 0) */ 50 const char *prompt, *nextprompt; /* For interactive prompting */ 51 int lineno; /* Current line number */ 52 int first_lineno; /* First line of a single line or multi line string 53 expression (cf. issue 16806) */ 54 int level; /* () [] {} Parentheses nesting level */ 55 /* Used to allow free continuations inside them */ 56 char parenstack[MAXLEVEL]; 57 int parenlinenostack[MAXLEVEL]; 58 int parencolstack[MAXLEVEL]; 59 PyObject *filename; 60 /* Stuff for checking on different tab sizes */ 61 int altindstack[MAXINDENT]; /* Stack of alternate indents */ 62 /* Stuff for PEP 0263 */ 63 enum decoding_state decoding_state; 64 int decoding_erred; /* whether erred in decoding */ 65 char *encoding; /* Source encoding. */ 66 int cont_line; /* whether we are in a continuation line. */ 67 const char* line_start; /* pointer to start of current line */ 68 const char* multi_line_start; /* pointer to start of first line of 69 a single line or multi line string 70 expression (cf. issue 16806) */ 71 PyObject *decoding_readline; /* open(...).readline */ 72 PyObject *decoding_buffer; 73 const char* enc; /* Encoding for the current str. */ 74 char* str; /* Source string being tokenized (if tokenizing from a string)*/ 75 char* input; /* Tokenizer's newline translated copy of the string. */ 76 77 int type_comments; /* Whether to look for type comments */ 78 79 /* async/await related fields (still needed depending on feature_version) */ 80 int async_hacks; /* =1 if async/await aren't always keywords */ 81 int async_def; /* =1 if tokens are inside an 'async def' body. */ 82 int async_def_indent; /* Indentation level of the outermost 'async def'. */ 83 int async_def_nl; /* =1 if the outermost 'async def' had at least one 84 NEWLINE token after it. */ 85 /* How to proceed when asked for a new token in interactive mode */ 86 enum interactive_underflow_t interactive_underflow; 87 }; 88 89 extern struct tok_state *PyTokenizer_FromString(const char *, int); 90 extern struct tok_state *PyTokenizer_FromUTF8(const char *, int); 91 extern struct tok_state *PyTokenizer_FromFile(FILE *, const char*, 92 const char *, const char *); 93 extern void PyTokenizer_Free(struct tok_state *); 94 extern int PyTokenizer_Get(struct tok_state *, const char **, const char **); 95 96 #define tok_dump _Py_tok_dump 97 98 #ifdef __cplusplus 99 } 100 #endif 101 #endif /* !Py_TOKENIZER_H */ 102