• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 #include "Python.h"
2 #include "errcode.h"
3 
4 #include "helpers.h"
5 #include "../lexer/state.h"
6 
7 static int
tok_underflow_string(struct tok_state * tok)8 tok_underflow_string(struct tok_state *tok) {
9     char *end = strchr(tok->inp, '\n');
10     if (end != NULL) {
11         end++;
12     }
13     else {
14         end = strchr(tok->inp, '\0');
15         if (end == tok->inp) {
16             tok->done = E_EOF;
17             return 0;
18         }
19     }
20     if (tok->start == NULL) {
21         tok->buf = tok->cur;
22     }
23     tok->line_start = tok->cur;
24     ADVANCE_LINENO();
25     tok->inp = end;
26     return 1;
27 }
28 
29 /* Fetch a byte from TOK, using the string buffer. */
30 static int
buf_getc(struct tok_state * tok)31 buf_getc(struct tok_state *tok) {
32     return Py_CHARMASK(*tok->str++);
33 }
34 
35 /* Unfetch a byte from TOK, using the string buffer. */
36 static void
buf_ungetc(int c,struct tok_state * tok)37 buf_ungetc(int c, struct tok_state *tok) {
38     tok->str--;
39     assert(Py_CHARMASK(*tok->str) == c);        /* tok->cur may point to read-only segment */
40 }
41 
42 /* Set the readline function for TOK to ENC. For the string-based
43    tokenizer, this means to just record the encoding. */
44 static int
buf_setreadl(struct tok_state * tok,const char * enc)45 buf_setreadl(struct tok_state *tok, const char* enc) {
46     tok->enc = enc;
47     return 1;
48 }
49 
50 /* Decode a byte string STR for use as the buffer of TOK.
51    Look for encoding declarations inside STR, and record them
52    inside TOK.  */
53 static char *
decode_str(const char * input,int single,struct tok_state * tok,int preserve_crlf)54 decode_str(const char *input, int single, struct tok_state *tok, int preserve_crlf)
55 {
56     PyObject* utf8 = NULL;
57     char *str;
58     const char *s;
59     const char *newl[2] = {NULL, NULL};
60     int lineno = 0;
61     tok->input = str = _PyTokenizer_translate_newlines(input, single, preserve_crlf, tok);
62     if (str == NULL)
63         return NULL;
64     tok->enc = NULL;
65     tok->str = str;
66     if (!_PyTokenizer_check_bom(buf_getc, buf_ungetc, buf_setreadl, tok))
67         return _PyTokenizer_error_ret(tok);
68     str = tok->str;             /* string after BOM if any */
69     assert(str);
70     if (tok->enc != NULL) {
71         utf8 = _PyTokenizer_translate_into_utf8(str, tok->enc);
72         if (utf8 == NULL)
73             return _PyTokenizer_error_ret(tok);
74         str = PyBytes_AsString(utf8);
75     }
76     for (s = str;; s++) {
77         if (*s == '\0') break;
78         else if (*s == '\n') {
79             assert(lineno < 2);
80             newl[lineno] = s;
81             lineno++;
82             if (lineno == 2) break;
83         }
84     }
85     tok->enc = NULL;
86     /* need to check line 1 and 2 separately since check_coding_spec
87        assumes a single line as input */
88     if (newl[0]) {
89         if (!_PyTokenizer_check_coding_spec(str, newl[0] - str, tok, buf_setreadl)) {
90             return NULL;
91         }
92         if (tok->enc == NULL && tok->decoding_state != STATE_NORMAL && newl[1]) {
93             if (!_PyTokenizer_check_coding_spec(newl[0]+1, newl[1] - newl[0],
94                                    tok, buf_setreadl))
95                 return NULL;
96         }
97     }
98     if (tok->enc != NULL) {
99         assert(utf8 == NULL);
100         utf8 = _PyTokenizer_translate_into_utf8(str, tok->enc);
101         if (utf8 == NULL)
102             return _PyTokenizer_error_ret(tok);
103         str = PyBytes_AS_STRING(utf8);
104     }
105     assert(tok->decoding_buffer == NULL);
106     tok->decoding_buffer = utf8; /* CAUTION */
107     return str;
108 }
109 
110 /* Set up tokenizer for string */
111 struct tok_state *
_PyTokenizer_FromString(const char * str,int exec_input,int preserve_crlf)112 _PyTokenizer_FromString(const char *str, int exec_input, int preserve_crlf)
113 {
114     struct tok_state *tok = _PyTokenizer_tok_new();
115     char *decoded;
116 
117     if (tok == NULL)
118         return NULL;
119     decoded = decode_str(str, exec_input, tok, preserve_crlf);
120     if (decoded == NULL) {
121         _PyTokenizer_Free(tok);
122         return NULL;
123     }
124 
125     tok->buf = tok->cur = tok->inp = decoded;
126     tok->end = decoded;
127     tok->underflow = &tok_underflow_string;
128     return tok;
129 }
130