• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 #include "Python.h"
2 #include "frameobject.h"
3 
4 #include "pycore_pyerrors.h"
5 
6 #define MAX_CANDIDATE_ITEMS 750
7 #define MAX_STRING_SIZE 40
8 
9 #define MOVE_COST 2
10 #define CASE_COST 1
11 
12 #define LEAST_FIVE_BITS(n) ((n) & 31)
13 
14 static inline int
substitution_cost(char a,char b)15 substitution_cost(char a, char b)
16 {
17     if (LEAST_FIVE_BITS(a) != LEAST_FIVE_BITS(b)) {
18         // Not the same, not a case flip.
19         return MOVE_COST;
20     }
21     if (a == b) {
22         return 0;
23     }
24     if ('A' <= a && a <= 'Z') {
25         a += ('a' - 'A');
26     }
27     if ('A' <= b && b <= 'Z') {
28         b += ('a' - 'A');
29     }
30     if (a == b) {
31         return CASE_COST;
32     }
33     return MOVE_COST;
34 }
35 
36 /* Calculate the Levenshtein distance between string1 and string2 */
37 static Py_ssize_t
levenshtein_distance(const char * a,size_t a_size,const char * b,size_t b_size,size_t max_cost)38 levenshtein_distance(const char *a, size_t a_size,
39                      const char *b, size_t b_size,
40                      size_t max_cost)
41 {
42     static size_t buffer[MAX_STRING_SIZE];
43 
44     // Both strings are the same (by identity)
45     if (a == b) {
46         return 0;
47     }
48 
49     // Trim away common affixes.
50     while (a_size && b_size && a[0] == b[0]) {
51         a++; a_size--;
52         b++; b_size--;
53     }
54     while (a_size && b_size && a[a_size-1] == b[b_size-1]) {
55         a_size--;
56         b_size--;
57     }
58     if (a_size == 0 || b_size == 0) {
59         return (a_size + b_size) * MOVE_COST;
60     }
61     if (a_size > MAX_STRING_SIZE || b_size > MAX_STRING_SIZE) {
62         return max_cost + 1;
63     }
64 
65     // Prefer shorter buffer
66     if (b_size < a_size) {
67         const char *t = a; a = b; b = t;
68         size_t t_size = a_size; a_size = b_size; b_size = t_size;
69     }
70 
71     // quick fail when a match is impossible.
72     if ((b_size - a_size) * MOVE_COST > max_cost) {
73         return max_cost + 1;
74     }
75 
76     // Instead of producing the whole traditional len(a)-by-len(b)
77     // matrix, we can update just one row in place.
78     // Initialize the buffer row
79     for (size_t i = 0; i < a_size; i++) {
80         // cost from b[:0] to a[:i+1]
81         buffer[i] = (i + 1) * MOVE_COST;
82     }
83 
84     size_t result = 0;
85     for (size_t b_index = 0; b_index < b_size; b_index++) {
86         char code = b[b_index];
87         // cost(b[:b_index], a[:0]) == b_index * MOVE_COST
88         size_t distance = result = b_index * MOVE_COST;
89         size_t minimum = SIZE_MAX;
90         for (size_t index = 0; index < a_size; index++) {
91 
92             // cost(b[:b_index+1], a[:index+1]) = min(
93             //     // 1) substitute
94             //     cost(b[:b_index], a[:index])
95             //         + substitution_cost(b[b_index], a[index]),
96             //     // 2) delete from b
97             //     cost(b[:b_index], a[:index+1]) + MOVE_COST,
98             //     // 3) delete from a
99             //     cost(b[:b_index+1], a[index]) + MOVE_COST
100             // )
101 
102             // 1) Previous distance in this row is cost(b[:b_index], a[:index])
103             size_t substitute = distance + substitution_cost(code, a[index]);
104             // 2) cost(b[:b_index], a[:index+1]) from previous row
105             distance = buffer[index];
106             // 3) existing result is cost(b[:b_index+1], a[index])
107 
108             size_t insert_delete = Py_MIN(result, distance) + MOVE_COST;
109             result = Py_MIN(insert_delete, substitute);
110 
111             // cost(b[:b_index+1], a[:index+1])
112             buffer[index] = result;
113             if (result < minimum) {
114                 minimum = result;
115             }
116         }
117         if (minimum > max_cost) {
118             // Everything in this row is too big, so bail early.
119             return max_cost + 1;
120         }
121     }
122     return result;
123 }
124 
125 static inline PyObject *
calculate_suggestions(PyObject * dir,PyObject * name)126 calculate_suggestions(PyObject *dir,
127                       PyObject *name)
128 {
129     assert(!PyErr_Occurred());
130     assert(PyList_CheckExact(dir));
131 
132     Py_ssize_t dir_size = PyList_GET_SIZE(dir);
133     if (dir_size >= MAX_CANDIDATE_ITEMS) {
134         return NULL;
135     }
136 
137     Py_ssize_t suggestion_distance = PY_SSIZE_T_MAX;
138     PyObject *suggestion = NULL;
139     Py_ssize_t name_size;
140     const char *name_str = PyUnicode_AsUTF8AndSize(name, &name_size);
141     if (name_str == NULL) {
142         return NULL;
143     }
144 
145     for (int i = 0; i < dir_size; ++i) {
146         PyObject *item = PyList_GET_ITEM(dir, i);
147         Py_ssize_t item_size;
148         const char *item_str = PyUnicode_AsUTF8AndSize(item, &item_size);
149         if (item_str == NULL) {
150             return NULL;
151         }
152         if (PyUnicode_CompareWithASCIIString(name, item_str) == 0) {
153             continue;
154         }
155         // No more than 1/3 of the involved characters should need changed.
156         Py_ssize_t max_distance = (name_size + item_size + 3) * MOVE_COST / 6;
157         // Don't take matches we've already beaten.
158         max_distance = Py_MIN(max_distance, suggestion_distance - 1);
159         Py_ssize_t current_distance =
160             levenshtein_distance(name_str, name_size,
161                                  item_str, item_size, max_distance);
162         if (current_distance > max_distance) {
163             continue;
164         }
165         if (!suggestion || current_distance < suggestion_distance) {
166             suggestion = item;
167             suggestion_distance = current_distance;
168         }
169     }
170     Py_XINCREF(suggestion);
171     return suggestion;
172 }
173 
174 static PyObject *
offer_suggestions_for_attribute_error(PyAttributeErrorObject * exc)175 offer_suggestions_for_attribute_error(PyAttributeErrorObject *exc)
176 {
177     PyObject *name = exc->name; // borrowed reference
178     PyObject *obj = exc->obj; // borrowed reference
179 
180     // Abort if we don't have an attribute name or we have an invalid one
181     if (name == NULL || obj == NULL || !PyUnicode_CheckExact(name)) {
182         return NULL;
183     }
184 
185     PyObject *dir = PyObject_Dir(obj);
186     if (dir == NULL) {
187         return NULL;
188     }
189 
190     PyObject *suggestions = calculate_suggestions(dir, name);
191     Py_DECREF(dir);
192     return suggestions;
193 }
194 
195 
196 static PyObject *
offer_suggestions_for_name_error(PyNameErrorObject * exc)197 offer_suggestions_for_name_error(PyNameErrorObject *exc)
198 {
199     PyObject *name = exc->name; // borrowed reference
200     PyTracebackObject *traceback = (PyTracebackObject *) exc->traceback; // borrowed reference
201     // Abort if we don't have a variable name or we have an invalid one
202     // or if we don't have a traceback to work with
203     if (name == NULL || !PyUnicode_CheckExact(name) ||
204         traceback == NULL || !Py_IS_TYPE(traceback, &PyTraceBack_Type)
205     ) {
206         return NULL;
207     }
208 
209     // Move to the traceback of the exception
210     while (1) {
211         PyTracebackObject *next = traceback->tb_next;
212         if (next == NULL || !Py_IS_TYPE(next, &PyTraceBack_Type)) {
213             break;
214         }
215         else {
216             traceback = next;
217         }
218     }
219 
220     PyFrameObject *frame = traceback->tb_frame;
221     assert(frame != NULL);
222     PyCodeObject *code = frame->f_code;
223     assert(code != NULL && code->co_varnames != NULL);
224     PyObject *dir = PySequence_List(code->co_varnames);
225     if (dir == NULL) {
226         return NULL;
227     }
228 
229     PyObject *suggestions = calculate_suggestions(dir, name);
230     Py_DECREF(dir);
231     if (suggestions != NULL) {
232         return suggestions;
233     }
234 
235     dir = PySequence_List(frame->f_globals);
236     if (dir == NULL) {
237         return NULL;
238     }
239     suggestions = calculate_suggestions(dir, name);
240     Py_DECREF(dir);
241     if (suggestions != NULL) {
242         return suggestions;
243     }
244 
245     dir = PySequence_List(frame->f_builtins);
246     if (dir == NULL) {
247         return NULL;
248     }
249     suggestions = calculate_suggestions(dir, name);
250     Py_DECREF(dir);
251 
252     return suggestions;
253 }
254 
255 // Offer suggestions for a given exception. Returns a python string object containing the
256 // suggestions. This function returns NULL if no suggestion was found or if an exception happened,
257 // users must call PyErr_Occurred() to disambiguate.
258 PyObject *
_Py_Offer_Suggestions(PyObject * exception)259 _Py_Offer_Suggestions(PyObject *exception)
260 {
261     PyObject *result = NULL;
262     assert(!PyErr_Occurred());
263     if (Py_IS_TYPE(exception, (PyTypeObject*)PyExc_AttributeError)) {
264         result = offer_suggestions_for_attribute_error((PyAttributeErrorObject *) exception);
265     } else if (Py_IS_TYPE(exception, (PyTypeObject*)PyExc_NameError)) {
266         result = offer_suggestions_for_name_error((PyNameErrorObject *) exception);
267     }
268     return result;
269 }
270 
271 Py_ssize_t
_Py_UTF8_Edit_Cost(PyObject * a,PyObject * b,Py_ssize_t max_cost)272 _Py_UTF8_Edit_Cost(PyObject *a, PyObject *b, Py_ssize_t max_cost)
273 {
274     assert(PyUnicode_Check(a) && PyUnicode_Check(b));
275     Py_ssize_t size_a, size_b;
276     const char *utf8_a = PyUnicode_AsUTF8AndSize(a, &size_a);
277     if (utf8_a == NULL) {
278         return -1;
279     }
280     const char *utf8_b = PyUnicode_AsUTF8AndSize(b, &size_b);
281     if (utf8_b == NULL) {
282         return -1;
283     }
284     if (max_cost == -1) {
285         max_cost = MOVE_COST * Py_MAX(size_a, size_b);
286     }
287     return levenshtein_distance(utf8_a, size_a, utf8_b, size_b, max_cost);
288 }
289 
290