• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 #include "Python.h"
2 #include "errcode.h"
3 
4 #include "helpers.h"
5 #include "../lexer/lexer.h"
6 #include "../lexer/state.h"
7 #include "../lexer/buffer.h"
8 
9 static int
tok_readline_string(struct tok_state * tok)10 tok_readline_string(struct tok_state* tok) {
11     PyObject* line = NULL;
12     PyObject* raw_line = PyObject_CallNoArgs(tok->readline);
13     if (raw_line == NULL) {
14         if (PyErr_ExceptionMatches(PyExc_StopIteration)) {
15             PyErr_Clear();
16             return 1;
17         }
18         _PyTokenizer_error_ret(tok);
19         goto error;
20     }
21     if(tok->encoding != NULL) {
22         if (!PyBytes_Check(raw_line)) {
23             PyErr_Format(PyExc_TypeError, "readline() returned a non-bytes object");
24             _PyTokenizer_error_ret(tok);
25             goto error;
26         }
27         line = PyUnicode_Decode(PyBytes_AS_STRING(raw_line), PyBytes_GET_SIZE(raw_line),
28                                 tok->encoding, "replace");
29         Py_CLEAR(raw_line);
30         if (line == NULL) {
31             _PyTokenizer_error_ret(tok);
32             goto error;
33         }
34     } else {
35         if(!PyUnicode_Check(raw_line)) {
36             PyErr_Format(PyExc_TypeError, "readline() returned a non-string object");
37             _PyTokenizer_error_ret(tok);
38             goto error;
39         }
40         line = raw_line;
41         raw_line = NULL;
42     }
43     Py_ssize_t buflen;
44     const char* buf = PyUnicode_AsUTF8AndSize(line, &buflen);
45     if (buf == NULL) {
46         _PyTokenizer_error_ret(tok);
47         goto error;
48     }
49 
50     // Make room for the null terminator *and* potentially
51     // an extra newline character that we may need to artificially
52     // add.
53     size_t buffer_size = buflen + 2;
54     if (!_PyLexer_tok_reserve_buf(tok, buffer_size)) {
55         goto error;
56     }
57     memcpy(tok->inp, buf, buflen);
58     tok->inp += buflen;
59     *tok->inp = '\0';
60 
61     tok->line_start = tok->cur;
62     Py_DECREF(line);
63     return 1;
64 error:
65     Py_XDECREF(raw_line);
66     Py_XDECREF(line);
67     return 0;
68 }
69 
70 static int
tok_underflow_readline(struct tok_state * tok)71 tok_underflow_readline(struct tok_state* tok) {
72     assert(tok->decoding_state == STATE_NORMAL);
73     assert(tok->fp == NULL && tok->input == NULL && tok->decoding_readline == NULL);
74     if (tok->start == NULL && !INSIDE_FSTRING(tok)) {
75         tok->cur = tok->inp = tok->buf;
76     }
77     if (!tok_readline_string(tok)) {
78         return 0;
79     }
80     if (tok->inp == tok->cur) {
81         tok->done = E_EOF;
82         return 0;
83     }
84     tok->implicit_newline = 0;
85     if (tok->inp[-1] != '\n') {
86         assert(tok->inp + 1 < tok->end);
87         /* Last line does not end in \n, fake one */
88         *tok->inp++ = '\n';
89         *tok->inp = '\0';
90         tok->implicit_newline = 1;
91     }
92 
93     if (tok->tok_mode_stack_index && !_PyLexer_update_fstring_expr(tok, 0)) {
94         return 0;
95     }
96 
97     ADVANCE_LINENO();
98     /* The default encoding is UTF-8, so make sure we don't have any
99        non-UTF-8 sequences in it. */
100     if (!tok->encoding && !_PyTokenizer_ensure_utf8(tok->cur, tok)) {
101         _PyTokenizer_error_ret(tok);
102         return 0;
103     }
104     assert(tok->done == E_OK);
105     return tok->done == E_OK;
106 }
107 
108 struct tok_state *
_PyTokenizer_FromReadline(PyObject * readline,const char * enc,int exec_input,int preserve_crlf)109 _PyTokenizer_FromReadline(PyObject* readline, const char* enc,
110                           int exec_input, int preserve_crlf)
111 {
112     struct tok_state *tok = _PyTokenizer_tok_new();
113     if (tok == NULL)
114         return NULL;
115     if ((tok->buf = (char *)PyMem_Malloc(BUFSIZ)) == NULL) {
116         _PyTokenizer_Free(tok);
117         return NULL;
118     }
119     tok->cur = tok->inp = tok->buf;
120     tok->end = tok->buf + BUFSIZ;
121     tok->fp = NULL;
122     if (enc != NULL) {
123         tok->encoding = _PyTokenizer_new_string(enc, strlen(enc), tok);
124         if (!tok->encoding) {
125             _PyTokenizer_Free(tok);
126             return NULL;
127         }
128     }
129     tok->decoding_state = STATE_NORMAL;
130     tok->underflow = &tok_underflow_readline;
131     Py_INCREF(readline);
132     tok->readline = readline;
133     return tok;
134 }
135