1 #include "Python.h"
2 #include "errcode.h"
3
4 #include "helpers.h"
5 #include "../lexer/lexer.h"
6 #include "../lexer/state.h"
7 #include "../lexer/buffer.h"
8
9 static int
tok_readline_string(struct tok_state * tok)10 tok_readline_string(struct tok_state* tok) {
11 PyObject* line = NULL;
12 PyObject* raw_line = PyObject_CallNoArgs(tok->readline);
13 if (raw_line == NULL) {
14 if (PyErr_ExceptionMatches(PyExc_StopIteration)) {
15 PyErr_Clear();
16 return 1;
17 }
18 _PyTokenizer_error_ret(tok);
19 goto error;
20 }
21 if(tok->encoding != NULL) {
22 if (!PyBytes_Check(raw_line)) {
23 PyErr_Format(PyExc_TypeError, "readline() returned a non-bytes object");
24 _PyTokenizer_error_ret(tok);
25 goto error;
26 }
27 line = PyUnicode_Decode(PyBytes_AS_STRING(raw_line), PyBytes_GET_SIZE(raw_line),
28 tok->encoding, "replace");
29 Py_CLEAR(raw_line);
30 if (line == NULL) {
31 _PyTokenizer_error_ret(tok);
32 goto error;
33 }
34 } else {
35 if(!PyUnicode_Check(raw_line)) {
36 PyErr_Format(PyExc_TypeError, "readline() returned a non-string object");
37 _PyTokenizer_error_ret(tok);
38 goto error;
39 }
40 line = raw_line;
41 raw_line = NULL;
42 }
43 Py_ssize_t buflen;
44 const char* buf = PyUnicode_AsUTF8AndSize(line, &buflen);
45 if (buf == NULL) {
46 _PyTokenizer_error_ret(tok);
47 goto error;
48 }
49
50 // Make room for the null terminator *and* potentially
51 // an extra newline character that we may need to artificially
52 // add.
53 size_t buffer_size = buflen + 2;
54 if (!_PyLexer_tok_reserve_buf(tok, buffer_size)) {
55 goto error;
56 }
57 memcpy(tok->inp, buf, buflen);
58 tok->inp += buflen;
59 *tok->inp = '\0';
60
61 tok->line_start = tok->cur;
62 Py_DECREF(line);
63 return 1;
64 error:
65 Py_XDECREF(raw_line);
66 Py_XDECREF(line);
67 return 0;
68 }
69
70 static int
tok_underflow_readline(struct tok_state * tok)71 tok_underflow_readline(struct tok_state* tok) {
72 assert(tok->decoding_state == STATE_NORMAL);
73 assert(tok->fp == NULL && tok->input == NULL && tok->decoding_readline == NULL);
74 if (tok->start == NULL && !INSIDE_FSTRING(tok)) {
75 tok->cur = tok->inp = tok->buf;
76 }
77 if (!tok_readline_string(tok)) {
78 return 0;
79 }
80 if (tok->inp == tok->cur) {
81 tok->done = E_EOF;
82 return 0;
83 }
84 tok->implicit_newline = 0;
85 if (tok->inp[-1] != '\n') {
86 assert(tok->inp + 1 < tok->end);
87 /* Last line does not end in \n, fake one */
88 *tok->inp++ = '\n';
89 *tok->inp = '\0';
90 tok->implicit_newline = 1;
91 }
92
93 if (tok->tok_mode_stack_index && !_PyLexer_update_fstring_expr(tok, 0)) {
94 return 0;
95 }
96
97 ADVANCE_LINENO();
98 /* The default encoding is UTF-8, so make sure we don't have any
99 non-UTF-8 sequences in it. */
100 if (!tok->encoding && !_PyTokenizer_ensure_utf8(tok->cur, tok)) {
101 _PyTokenizer_error_ret(tok);
102 return 0;
103 }
104 assert(tok->done == E_OK);
105 return tok->done == E_OK;
106 }
107
108 struct tok_state *
_PyTokenizer_FromReadline(PyObject * readline,const char * enc,int exec_input,int preserve_crlf)109 _PyTokenizer_FromReadline(PyObject* readline, const char* enc,
110 int exec_input, int preserve_crlf)
111 {
112 struct tok_state *tok = _PyTokenizer_tok_new();
113 if (tok == NULL)
114 return NULL;
115 if ((tok->buf = (char *)PyMem_Malloc(BUFSIZ)) == NULL) {
116 _PyTokenizer_Free(tok);
117 return NULL;
118 }
119 tok->cur = tok->inp = tok->buf;
120 tok->end = tok->buf + BUFSIZ;
121 tok->fp = NULL;
122 if (enc != NULL) {
123 tok->encoding = _PyTokenizer_new_string(enc, strlen(enc), tok);
124 if (!tok->encoding) {
125 _PyTokenizer_Free(tok);
126 return NULL;
127 }
128 }
129 tok->decoding_state = STATE_NORMAL;
130 tok->underflow = &tok_underflow_readline;
131 Py_INCREF(readline);
132 tok->readline = readline;
133 return tok;
134 }
135