1# The code here consists of hacks for pre-populating the known.tsv file. 2 3from c_analyzer.parser.preprocessor import _iter_clean_lines 4from c_analyzer.parser.naive import ( 5 iter_variables, parse_variable_declaration, find_variables, 6 ) 7from c_analyzer.common.known import HEADER as KNOWN_HEADER 8from c_analyzer.common.info import UNKNOWN, ID 9from c_analyzer.variables import Variable 10from c_analyzer.util import write_tsv 11 12from . import SOURCE_DIRS, REPO_ROOT 13from .known import DATA_FILE as KNOWN_FILE 14from .files import iter_cpython_files 15 16 17POTS = ('char ', 'wchar_t ', 'int ', 'Py_ssize_t ') 18POTS += tuple('const ' + v for v in POTS) 19STRUCTS = ('PyTypeObject', 'PyObject', 'PyMethodDef', 'PyModuleDef', 'grammar') 20 21 22def _parse_global(line, funcname=None): 23 line = line.strip() 24 if line.startswith('static '): 25 if '(' in line and '[' not in line and ' = ' not in line: 26 return None, None 27 name, decl = parse_variable_declaration(line) 28 elif line.startswith(('Py_LOCAL(', 'Py_LOCAL_INLINE(')): 29 name, decl = parse_variable_declaration(line) 30 elif line.startswith('_Py_static_string('): 31 decl = line.strip(';').strip() 32 name = line.split('(')[1].split(',')[0].strip() 33 elif line.startswith('_Py_IDENTIFIER('): 34 decl = line.strip(';').strip() 35 name = 'PyId_' + line.split('(')[1].split(')')[0].strip() 36 elif funcname: 37 return None, None 38 39 # global-only 40 elif line.startswith('PyAPI_DATA('): # only in .h files 41 name, decl = parse_variable_declaration(line) 42 elif line.startswith('extern '): # only in .h files 43 name, decl = parse_variable_declaration(line) 44 elif line.startswith('PyDoc_VAR('): 45 decl = line.strip(';').strip() 46 name = line.split('(')[1].split(')')[0].strip() 47 elif line.startswith(POTS): # implied static 48 if '(' in line and '[' not in line and ' = ' not in line: 49 return None, None 50 name, decl = parse_variable_declaration(line) 51 elif line.startswith(STRUCTS) and line.endswith(' = {'): # implied static 52 name, decl = parse_variable_declaration(line) 53 elif line.startswith(STRUCTS) and line.endswith(' = NULL;'): # implied static 54 name, decl = parse_variable_declaration(line) 55 elif line.startswith('struct '): 56 if not line.endswith(' = {'): 57 return None, None 58 if not line.partition(' ')[2].startswith(STRUCTS): 59 return None, None 60 # implied static 61 name, decl = parse_variable_declaration(line) 62 63 # file-specific 64 elif line.startswith(('SLOT1BINFULL(', 'SLOT1BIN(')): 65 # Objects/typeobject.c 66 funcname = line.split('(')[1].split(',')[0] 67 return [ 68 ('op_id', funcname, '_Py_static_string(op_id, OPSTR)'), 69 ('rop_id', funcname, '_Py_static_string(op_id, OPSTR)'), 70 ] 71 elif line.startswith('WRAP_METHOD('): 72 # Objects/weakrefobject.c 73 funcname, name = (v.strip() for v in line.split('(')[1].split(')')[0].split(',')) 74 return [ 75 ('PyId_' + name, funcname, f'_Py_IDENTIFIER({name})'), 76 ] 77 78 else: 79 return None, None 80 return name, decl 81 82 83def _pop_cached(varcache, filename, funcname, name, *, 84 _iter_variables=iter_variables, 85 ): 86 # Look for the file. 87 try: 88 cached = varcache[filename] 89 except KeyError: 90 cached = varcache[filename] = {} 91 for variable in _iter_variables(filename, 92 parse_variable=_parse_global, 93 ): 94 variable._isglobal = True 95 cached[variable.id] = variable 96 for var in cached: 97 print(' ', var) 98 99 # Look for the variable. 100 if funcname == UNKNOWN: 101 for varid in cached: 102 if varid.name == name: 103 break 104 else: 105 return None 106 return cached.pop(varid) 107 else: 108 return cached.pop((filename, funcname, name), None) 109 110 111def find_matching_variable(varid, varcache, allfilenames, *, 112 _pop_cached=_pop_cached, 113 ): 114 if varid.filename and varid.filename != UNKNOWN: 115 filenames = [varid.filename] 116 else: 117 filenames = allfilenames 118 for filename in filenames: 119 variable = _pop_cached(varcache, filename, varid.funcname, varid.name) 120 if variable is not None: 121 return variable 122 else: 123 if varid.filename and varid.filename != UNKNOWN and varid.funcname is None: 124 for filename in allfilenames: 125 if not filename.endswith('.h'): 126 continue 127 variable = _pop_cached(varcache, filename, None, varid.name) 128 if variable is not None: 129 return variable 130 return None 131 132 133MULTILINE = { 134 # Python/Python-ast.c 135 'Load_singleton': 'PyObject *', 136 'Store_singleton': 'PyObject *', 137 'Del_singleton': 'PyObject *', 138 'AugLoad_singleton': 'PyObject *', 139 'AugStore_singleton': 'PyObject *', 140 'Param_singleton': 'PyObject *', 141 'And_singleton': 'PyObject *', 142 'Or_singleton': 'PyObject *', 143 'Add_singleton': 'static PyObject *', 144 'Sub_singleton': 'static PyObject *', 145 'Mult_singleton': 'static PyObject *', 146 'MatMult_singleton': 'static PyObject *', 147 'Div_singleton': 'static PyObject *', 148 'Mod_singleton': 'static PyObject *', 149 'Pow_singleton': 'static PyObject *', 150 'LShift_singleton': 'static PyObject *', 151 'RShift_singleton': 'static PyObject *', 152 'BitOr_singleton': 'static PyObject *', 153 'BitXor_singleton': 'static PyObject *', 154 'BitAnd_singleton': 'static PyObject *', 155 'FloorDiv_singleton': 'static PyObject *', 156 'Invert_singleton': 'static PyObject *', 157 'Not_singleton': 'static PyObject *', 158 'UAdd_singleton': 'static PyObject *', 159 'USub_singleton': 'static PyObject *', 160 'Eq_singleton': 'static PyObject *', 161 'NotEq_singleton': 'static PyObject *', 162 'Lt_singleton': 'static PyObject *', 163 'LtE_singleton': 'static PyObject *', 164 'Gt_singleton': 'static PyObject *', 165 'GtE_singleton': 'static PyObject *', 166 'Is_singleton': 'static PyObject *', 167 'IsNot_singleton': 'static PyObject *', 168 'In_singleton': 'static PyObject *', 169 'NotIn_singleton': 'static PyObject *', 170 # Python/symtable.c 171 'top': 'static identifier ', 172 'lambda': 'static identifier ', 173 'genexpr': 'static identifier ', 174 'listcomp': 'static identifier ', 175 'setcomp': 'static identifier ', 176 'dictcomp': 'static identifier ', 177 '__class__': 'static identifier ', 178 # Python/compile.c 179 '__doc__': 'static PyObject *', 180 '__annotations__': 'static PyObject *', 181 # Objects/floatobject.c 182 'double_format': 'static float_format_type ', 183 'float_format': 'static float_format_type ', 184 'detected_double_format': 'static float_format_type ', 185 'detected_float_format': 'static float_format_type ', 186 # Parser/listnode.c 187 'level': 'static int ', 188 'atbol': 'static int ', 189 # Python/dtoa.c 190 'private_mem': 'static double private_mem[PRIVATE_mem]', 191 'pmem_next': 'static double *', 192 # Modules/_weakref.c 193 'weakref_functions': 'static PyMethodDef ', 194} 195INLINE = { 196 # Modules/_tracemalloc.c 197 'allocators': 'static struct { PyMemAllocatorEx mem; PyMemAllocatorEx raw; PyMemAllocatorEx obj; } ', 198 # Modules/faulthandler.c 199 'fatal_error': 'static struct { int enabled; PyObject *file; int fd; int all_threads; PyInterpreterState *interp; void *exc_handler; } ', 200 'thread': 'static struct { PyObject *file; int fd; PY_TIMEOUT_T timeout_us; int repeat; PyInterpreterState *interp; int exit; char *header; size_t header_len; PyThread_type_lock cancel_event; PyThread_type_lock running; } ', 201 # Modules/signalmodule.c 202 'Handlers': 'static volatile struct { _Py_atomic_int tripped; PyObject *func; } Handlers[NSIG]', 203 'wakeup': 'static volatile struct { SOCKET_T fd; int warn_on_full_buffer; int use_send; } ', 204 # Python/dynload_shlib.c 205 'handles': 'static struct { dev_t dev; ino_t ino; void *handle; } handles[128]', 206 # Objects/obmalloc.c 207 '_PyMem_Debug': 'static struct { debug_alloc_api_t raw; debug_alloc_api_t mem; debug_alloc_api_t obj; } ', 208 # Python/bootstrap_hash.c 209 'urandom_cache': 'static struct { int fd; dev_t st_dev; ino_t st_ino; } ', 210 } 211FUNC = { 212 # Objects/object.c 213 '_Py_abstract_hack': 'Py_ssize_t (*_Py_abstract_hack)(PyObject *)', 214 # Parser/myreadline.c 215 'PyOS_InputHook': 'int (*PyOS_InputHook)(void)', 216 # Python/pylifecycle.c 217 '_PyOS_mystrnicmp_hack': 'int (*_PyOS_mystrnicmp_hack)(const char *, const char *, Py_ssize_t)', 218 # Parser/myreadline.c 219 'PyOS_ReadlineFunctionPointer': 'char *(*PyOS_ReadlineFunctionPointer)(FILE *, FILE *, const char *)', 220 } 221IMPLIED = { 222 # Objects/boolobject.c 223 '_Py_FalseStruct': 'static struct _longobject ', 224 '_Py_TrueStruct': 'static struct _longobject ', 225 # Modules/config.c 226 '_PyImport_Inittab': 'struct _inittab _PyImport_Inittab[]', 227 } 228GLOBALS = {} 229GLOBALS.update(MULTILINE) 230GLOBALS.update(INLINE) 231GLOBALS.update(FUNC) 232GLOBALS.update(IMPLIED) 233 234LOCALS = { 235 'buildinfo': ('Modules/getbuildinfo.c', 236 'Py_GetBuildInfo', 237 'static char buildinfo[50 + sizeof(GITVERSION) + ((sizeof(GITTAG) > sizeof(GITBRANCH)) ? sizeof(GITTAG) : sizeof(GITBRANCH))]'), 238 'methods': ('Python/codecs.c', 239 '_PyCodecRegistry_Init', 240 'static struct { char *name; PyMethodDef def; } methods[]'), 241 } 242 243 244def _known(symbol): 245 if symbol.funcname: 246 if symbol.funcname != UNKNOWN or symbol.filename != UNKNOWN: 247 raise KeyError(symbol.name) 248 filename, funcname, decl = LOCALS[symbol.name] 249 varid = ID(filename, funcname, symbol.name) 250 elif not symbol.filename or symbol.filename == UNKNOWN: 251 raise KeyError(symbol.name) 252 else: 253 varid = symbol.id 254 try: 255 decl = GLOBALS[symbol.name] 256 except KeyError: 257 258 if symbol.name.endswith('_methods'): 259 decl = 'static PyMethodDef ' 260 elif symbol.filename == 'Objects/exceptions.c' and symbol.name.startswith(('PyExc_', '_PyExc_')): 261 decl = 'static PyTypeObject ' 262 else: 263 raise 264 if symbol.name not in decl: 265 decl = decl + symbol.name 266 return Variable(varid, 'static', decl) 267 268 269def known_row(varid, decl): 270 return ( 271 varid.filename, 272 varid.funcname or '-', 273 varid.name, 274 'variable', 275 decl, 276 ) 277 278 279def known_rows(symbols, *, 280 cached=True, 281 _get_filenames=iter_cpython_files, 282 _find_match=find_matching_variable, 283 _find_symbols=find_variables, 284 _as_known=known_row, 285 ): 286 filenames = list(_get_filenames()) 287 cache = {} 288 if cached: 289 for symbol in symbols: 290 try: 291 found = _known(symbol) 292 except KeyError: 293 found = _find_match(symbol, cache, filenames) 294 if found is None: 295 found = Variable(symbol.id, UNKNOWN, UNKNOWN) 296 yield _as_known(found.id, found.vartype) 297 else: 298 raise NotImplementedError # XXX incorporate KNOWN 299 for variable in _find_symbols(symbols, filenames, 300 srccache=cache, 301 parse_variable=_parse_global, 302 ): 303 #variable = variable._replace( 304 # filename=os.path.relpath(variable.filename, REPO_ROOT)) 305 if variable.funcname == UNKNOWN: 306 print(variable) 307 if variable.vartype== UNKNOWN: 308 print(variable) 309 yield _as_known(variable.id, variable.vartype) 310 311 312def generate(symbols, filename=None, *, 313 _generate_rows=known_rows, 314 _write_tsv=write_tsv, 315 ): 316 if not filename: 317 filename = KNOWN_FILE + '.new' 318 319 rows = _generate_rows(symbols) 320 _write_tsv(filename, KNOWN_HEADER, rows) 321 322 323if __name__ == '__main__': 324 from c_symbols import binary 325 symbols = binary.iter_symbols( 326 binary.PYTHON, 327 find_local_symbol=None, 328 ) 329 generate(symbols) 330