• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1# The code here consists of hacks for pre-populating the known.tsv file.
2
3from c_analyzer.parser.preprocessor import _iter_clean_lines
4from c_analyzer.parser.naive import (
5        iter_variables, parse_variable_declaration, find_variables,
6        )
7from c_analyzer.common.known import HEADER as KNOWN_HEADER
8from c_analyzer.common.info import UNKNOWN, ID
9from c_analyzer.variables import Variable
10from c_analyzer.util import write_tsv
11
12from . import SOURCE_DIRS, REPO_ROOT
13from .known import DATA_FILE as KNOWN_FILE
14from .files import iter_cpython_files
15
16
17POTS = ('char ', 'wchar_t ', 'int ', 'Py_ssize_t ')
18POTS += tuple('const ' + v for v in POTS)
19STRUCTS = ('PyTypeObject', 'PyObject', 'PyMethodDef', 'PyModuleDef', 'grammar')
20
21
22def _parse_global(line, funcname=None):
23    line = line.strip()
24    if line.startswith('static '):
25        if '(' in line and '[' not in line and ' = ' not in line:
26            return None, None
27        name, decl = parse_variable_declaration(line)
28    elif line.startswith(('Py_LOCAL(', 'Py_LOCAL_INLINE(')):
29        name, decl = parse_variable_declaration(line)
30    elif line.startswith('_Py_static_string('):
31        decl = line.strip(';').strip()
32        name = line.split('(')[1].split(',')[0].strip()
33    elif line.startswith('_Py_IDENTIFIER('):
34        decl = line.strip(';').strip()
35        name = 'PyId_' + line.split('(')[1].split(')')[0].strip()
36    elif funcname:
37        return None, None
38
39    # global-only
40    elif line.startswith('PyAPI_DATA('):  # only in .h files
41        name, decl = parse_variable_declaration(line)
42    elif line.startswith('extern '):  # only in .h files
43        name, decl = parse_variable_declaration(line)
44    elif line.startswith('PyDoc_VAR('):
45        decl = line.strip(';').strip()
46        name = line.split('(')[1].split(')')[0].strip()
47    elif line.startswith(POTS):  # implied static
48        if '(' in line and '[' not in line and ' = ' not in line:
49            return None, None
50        name, decl = parse_variable_declaration(line)
51    elif line.startswith(STRUCTS) and line.endswith(' = {'):  # implied static
52        name, decl = parse_variable_declaration(line)
53    elif line.startswith(STRUCTS) and line.endswith(' = NULL;'):  # implied static
54        name, decl = parse_variable_declaration(line)
55    elif line.startswith('struct '):
56        if not line.endswith(' = {'):
57            return None, None
58        if not line.partition(' ')[2].startswith(STRUCTS):
59            return None, None
60        # implied static
61        name, decl = parse_variable_declaration(line)
62
63    # file-specific
64    elif line.startswith(('SLOT1BINFULL(', 'SLOT1BIN(')):
65        # Objects/typeobject.c
66        funcname = line.split('(')[1].split(',')[0]
67        return [
68                ('op_id', funcname, '_Py_static_string(op_id, OPSTR)'),
69                ('rop_id', funcname, '_Py_static_string(op_id, OPSTR)'),
70                ]
71    elif line.startswith('WRAP_METHOD('):
72        # Objects/weakrefobject.c
73        funcname, name = (v.strip() for v in line.split('(')[1].split(')')[0].split(','))
74        return [
75                ('PyId_' + name, funcname, f'_Py_IDENTIFIER({name})'),
76                ]
77
78    else:
79        return None, None
80    return name, decl
81
82
83def _pop_cached(varcache, filename, funcname, name, *,
84                _iter_variables=iter_variables,
85                ):
86    # Look for the file.
87    try:
88        cached = varcache[filename]
89    except KeyError:
90        cached = varcache[filename] = {}
91        for variable in _iter_variables(filename,
92                                        parse_variable=_parse_global,
93                                        ):
94            variable._isglobal = True
95            cached[variable.id] = variable
96        for var in cached:
97            print(' ', var)
98
99    # Look for the variable.
100    if funcname == UNKNOWN:
101        for varid in cached:
102            if varid.name == name:
103                break
104        else:
105            return None
106        return cached.pop(varid)
107    else:
108        return cached.pop((filename, funcname, name), None)
109
110
111def find_matching_variable(varid, varcache, allfilenames, *,
112                           _pop_cached=_pop_cached,
113                           ):
114    if varid.filename and varid.filename != UNKNOWN:
115        filenames = [varid.filename]
116    else:
117        filenames = allfilenames
118    for filename in filenames:
119        variable = _pop_cached(varcache, filename, varid.funcname, varid.name)
120        if variable is not None:
121            return variable
122    else:
123        if varid.filename and varid.filename != UNKNOWN and varid.funcname is None:
124            for filename in allfilenames:
125                if not filename.endswith('.h'):
126                    continue
127                variable = _pop_cached(varcache, filename, None, varid.name)
128                if variable is not None:
129                    return variable
130        return None
131
132
133MULTILINE = {
134    # Python/Python-ast.c
135    'Load_singleton': 'PyObject *',
136    'Store_singleton': 'PyObject *',
137    'Del_singleton': 'PyObject *',
138    'AugLoad_singleton': 'PyObject *',
139    'AugStore_singleton': 'PyObject *',
140    'Param_singleton': 'PyObject *',
141    'And_singleton': 'PyObject *',
142    'Or_singleton': 'PyObject *',
143    'Add_singleton': 'static PyObject *',
144    'Sub_singleton': 'static PyObject *',
145    'Mult_singleton': 'static PyObject *',
146    'MatMult_singleton': 'static PyObject *',
147    'Div_singleton': 'static PyObject *',
148    'Mod_singleton': 'static PyObject *',
149    'Pow_singleton': 'static PyObject *',
150    'LShift_singleton': 'static PyObject *',
151    'RShift_singleton': 'static PyObject *',
152    'BitOr_singleton': 'static PyObject *',
153    'BitXor_singleton': 'static PyObject *',
154    'BitAnd_singleton': 'static PyObject *',
155    'FloorDiv_singleton': 'static PyObject *',
156    'Invert_singleton': 'static PyObject *',
157    'Not_singleton': 'static PyObject *',
158    'UAdd_singleton': 'static PyObject *',
159    'USub_singleton': 'static PyObject *',
160    'Eq_singleton': 'static PyObject *',
161    'NotEq_singleton': 'static PyObject *',
162    'Lt_singleton': 'static PyObject *',
163    'LtE_singleton': 'static PyObject *',
164    'Gt_singleton': 'static PyObject *',
165    'GtE_singleton': 'static PyObject *',
166    'Is_singleton': 'static PyObject *',
167    'IsNot_singleton': 'static PyObject *',
168    'In_singleton': 'static PyObject *',
169    'NotIn_singleton': 'static PyObject *',
170    # Python/symtable.c
171    'top': 'static identifier ',
172    'lambda': 'static identifier ',
173    'genexpr': 'static identifier ',
174    'listcomp': 'static identifier ',
175    'setcomp': 'static identifier ',
176    'dictcomp': 'static identifier ',
177    '__class__': 'static identifier ',
178    # Python/compile.c
179    '__doc__': 'static PyObject *',
180    '__annotations__': 'static PyObject *',
181    # Objects/floatobject.c
182    'double_format': 'static float_format_type ',
183    'float_format': 'static float_format_type ',
184    'detected_double_format': 'static float_format_type ',
185    'detected_float_format': 'static float_format_type ',
186    # Parser/listnode.c
187    'level': 'static int ',
188    'atbol': 'static int ',
189    # Python/dtoa.c
190    'private_mem': 'static double private_mem[PRIVATE_mem]',
191    'pmem_next': 'static double *',
192    # Modules/_weakref.c
193    'weakref_functions': 'static PyMethodDef ',
194}
195INLINE = {
196    # Modules/_tracemalloc.c
197    'allocators': 'static struct { PyMemAllocatorEx mem; PyMemAllocatorEx raw; PyMemAllocatorEx obj; } ',
198    # Modules/faulthandler.c
199    'fatal_error': 'static struct { int enabled; PyObject *file; int fd; int all_threads; PyInterpreterState *interp; void *exc_handler; } ',
200    'thread': 'static struct { PyObject *file; int fd; PY_TIMEOUT_T timeout_us; int repeat; PyInterpreterState *interp; int exit; char *header; size_t header_len; PyThread_type_lock cancel_event; PyThread_type_lock running; } ',
201    # Modules/signalmodule.c
202    'Handlers': 'static volatile struct { _Py_atomic_int tripped; PyObject *func; } Handlers[NSIG]',
203    'wakeup': 'static volatile struct { SOCKET_T fd; int warn_on_full_buffer; int use_send; } ',
204    # Python/dynload_shlib.c
205    'handles': 'static struct { dev_t dev; ino_t ino; void *handle; } handles[128]',
206    # Objects/obmalloc.c
207    '_PyMem_Debug': 'static struct { debug_alloc_api_t raw; debug_alloc_api_t mem; debug_alloc_api_t obj; } ',
208    # Python/bootstrap_hash.c
209    'urandom_cache': 'static struct { int fd; dev_t st_dev; ino_t st_ino; } ',
210    }
211FUNC = {
212    # Objects/object.c
213    '_Py_abstract_hack': 'Py_ssize_t (*_Py_abstract_hack)(PyObject *)',
214    # Parser/myreadline.c
215    'PyOS_InputHook': 'int (*PyOS_InputHook)(void)',
216    # Python/pylifecycle.c
217    '_PyOS_mystrnicmp_hack': 'int (*_PyOS_mystrnicmp_hack)(const char *, const char *, Py_ssize_t)',
218    # Parser/myreadline.c
219    'PyOS_ReadlineFunctionPointer': 'char *(*PyOS_ReadlineFunctionPointer)(FILE *, FILE *, const char *)',
220    }
221IMPLIED = {
222    # Objects/boolobject.c
223    '_Py_FalseStruct': 'static struct _longobject ',
224    '_Py_TrueStruct': 'static struct _longobject ',
225    # Modules/config.c
226    '_PyImport_Inittab': 'struct _inittab _PyImport_Inittab[]',
227    }
228GLOBALS = {}
229GLOBALS.update(MULTILINE)
230GLOBALS.update(INLINE)
231GLOBALS.update(FUNC)
232GLOBALS.update(IMPLIED)
233
234LOCALS = {
235    'buildinfo': ('Modules/getbuildinfo.c',
236                  'Py_GetBuildInfo',
237                  'static char buildinfo[50 + sizeof(GITVERSION) + ((sizeof(GITTAG) > sizeof(GITBRANCH)) ?  sizeof(GITTAG) : sizeof(GITBRANCH))]'),
238    'methods': ('Python/codecs.c',
239                '_PyCodecRegistry_Init',
240                'static struct { char *name; PyMethodDef def; } methods[]'),
241    }
242
243
244def _known(symbol):
245    if symbol.funcname:
246        if symbol.funcname != UNKNOWN or symbol.filename != UNKNOWN:
247            raise KeyError(symbol.name)
248        filename, funcname, decl = LOCALS[symbol.name]
249        varid = ID(filename, funcname, symbol.name)
250    elif not symbol.filename or symbol.filename == UNKNOWN:
251        raise KeyError(symbol.name)
252    else:
253        varid = symbol.id
254        try:
255            decl = GLOBALS[symbol.name]
256        except KeyError:
257
258            if symbol.name.endswith('_methods'):
259                decl = 'static PyMethodDef '
260            elif symbol.filename == 'Objects/exceptions.c' and symbol.name.startswith(('PyExc_', '_PyExc_')):
261                decl = 'static PyTypeObject '
262            else:
263                raise
264    if symbol.name not in decl:
265        decl = decl + symbol.name
266    return Variable(varid, 'static', decl)
267
268
269def known_row(varid, decl):
270    return (
271            varid.filename,
272            varid.funcname or '-',
273            varid.name,
274            'variable',
275            decl,
276            )
277
278
279def known_rows(symbols, *,
280               cached=True,
281               _get_filenames=iter_cpython_files,
282               _find_match=find_matching_variable,
283               _find_symbols=find_variables,
284               _as_known=known_row,
285               ):
286    filenames = list(_get_filenames())
287    cache = {}
288    if cached:
289        for symbol in symbols:
290            try:
291                found = _known(symbol)
292            except KeyError:
293                found = _find_match(symbol, cache, filenames)
294                if found is None:
295                    found = Variable(symbol.id, UNKNOWN, UNKNOWN)
296            yield _as_known(found.id, found.vartype)
297    else:
298        raise NotImplementedError  # XXX incorporate KNOWN
299        for variable in _find_symbols(symbols, filenames,
300                                      srccache=cache,
301                                      parse_variable=_parse_global,
302                                      ):
303            #variable = variable._replace(
304            #    filename=os.path.relpath(variable.filename, REPO_ROOT))
305            if variable.funcname == UNKNOWN:
306                print(variable)
307            if variable.vartype== UNKNOWN:
308                print(variable)
309            yield _as_known(variable.id, variable.vartype)
310
311
312def generate(symbols, filename=None, *,
313             _generate_rows=known_rows,
314             _write_tsv=write_tsv,
315             ):
316    if not filename:
317        filename = KNOWN_FILE + '.new'
318
319    rows = _generate_rows(symbols)
320    _write_tsv(filename, KNOWN_HEADER, rows)
321
322
323if __name__ == '__main__':
324    from c_symbols import binary
325    symbols = binary.iter_symbols(
326            binary.PYTHON,
327            find_local_symbol=None,
328            )
329    generate(symbols)
330