1import os.path 2import re 3 4from c_analyzer.common.info import ID 5from c_analyzer.common.util import read_tsv, write_tsv 6 7from . import DATA_DIR 8 9# XXX need tests: 10# * generate / script 11 12 13IGNORED_FILE = os.path.join(DATA_DIR, 'ignored.tsv') 14 15IGNORED_COLUMNS = ('filename', 'funcname', 'name', 'kind', 'reason') 16IGNORED_HEADER = '\t'.join(IGNORED_COLUMNS) 17 18# XXX Move these to ignored.tsv. 19IGNORED = { 20 # global 21 'PyImport_FrozenModules': 'process-global', 22 'M___hello__': 'process-global', 23 'inittab_copy': 'process-global', 24 'PyHash_Func': 'process-global', 25 '_Py_HashSecret_Initialized': 'process-global', 26 '_TARGET_LOCALES': 'process-global', 27 28 # startup (only changed before/during) 29 '_PyRuntime': 'runtime startup', 30 'runtime_initialized': 'runtime startup', 31 'static_arg_parsers': 'runtime startup', 32 'orig_argv': 'runtime startup', 33 'opt_ptr': 'runtime startup', 34 '_preinit_warnoptions': 'runtime startup', 35 '_Py_StandardStreamEncoding': 'runtime startup', 36 'Py_FileSystemDefaultEncoding': 'runtime startup', 37 '_Py_StandardStreamErrors': 'runtime startup', 38 'Py_FileSystemDefaultEncodeErrors': 'runtime startup', 39 'Py_BytesWarningFlag': 'runtime startup', 40 'Py_DebugFlag': 'runtime startup', 41 'Py_DontWriteBytecodeFlag': 'runtime startup', 42 'Py_FrozenFlag': 'runtime startup', 43 'Py_HashRandomizationFlag': 'runtime startup', 44 'Py_IgnoreEnvironmentFlag': 'runtime startup', 45 'Py_InspectFlag': 'runtime startup', 46 'Py_InteractiveFlag': 'runtime startup', 47 'Py_IsolatedFlag': 'runtime startup', 48 'Py_NoSiteFlag': 'runtime startup', 49 'Py_NoUserSiteDirectory': 'runtime startup', 50 'Py_OptimizeFlag': 'runtime startup', 51 'Py_QuietFlag': 'runtime startup', 52 'Py_UTF8Mode': 'runtime startup', 53 'Py_UnbufferedStdioFlag': 'runtime startup', 54 'Py_VerboseFlag': 'runtime startup', 55 '_Py_path_config': 'runtime startup', 56 '_PyOS_optarg': 'runtime startup', 57 '_PyOS_opterr': 'runtime startup', 58 '_PyOS_optind': 'runtime startup', 59 '_Py_HashSecret': 'runtime startup', 60 61 # REPL 62 '_PyOS_ReadlineLock': 'repl', 63 '_PyOS_ReadlineTState': 'repl', 64 65 # effectively const 66 'tracemalloc_empty_traceback': 'const', 67 '_empty_bitmap_node': 'const', 68 'posix_constants_pathconf': 'const', 69 'posix_constants_confstr': 'const', 70 'posix_constants_sysconf': 'const', 71 '_PySys_ImplCacheTag': 'const', 72 '_PySys_ImplName': 'const', 73 'PyImport_Inittab': 'const', 74 '_PyImport_DynLoadFiletab': 'const', 75 '_PyParser_Grammar': 'const', 76 'Py_hexdigits': 'const', 77 '_PyImport_Inittab': 'const', 78 '_PyByteArray_empty_string': 'const', 79 '_PyLong_DigitValue': 'const', 80 '_Py_SwappedOp': 'const', 81 'PyStructSequence_UnnamedField': 'const', 82 83 # signals are main-thread only 84 'faulthandler_handlers': 'signals are main-thread only', 85 'user_signals': 'signals are main-thread only', 86 'wakeup': 'signals are main-thread only', 87 88 # hacks 89 '_PySet_Dummy': 'only used as a placeholder', 90 } 91 92BENIGN = 'races here are benign and unlikely' 93 94 95def is_supported(variable, ignored=None, known=None, *, 96 _ignored=(lambda *a, **k: _is_ignored(*a, **k)), 97 _vartype_okay=(lambda *a, **k: _is_vartype_okay(*a, **k)), 98 ): 99 """Return True if the given global variable is okay in CPython.""" 100 if _ignored(variable, 101 ignored and ignored.get('variables')): 102 return True 103 elif _vartype_okay(variable.vartype, 104 ignored.get('types')): 105 return True 106 else: 107 return False 108 109 110def _is_ignored(variable, ignoredvars=None, *, 111 _IGNORED=IGNORED, 112 ): 113 """Return the reason if the variable is a supported global. 114 115 Return None if the variable is not a supported global. 116 """ 117 if ignoredvars and (reason := ignoredvars.get(variable.id)): 118 return reason 119 120 if variable.funcname is None: 121 if reason := _IGNORED.get(variable.name): 122 return reason 123 124 # compiler 125 if variable.filename == 'Python/graminit.c': 126 if variable.vartype.startswith('static state '): 127 return 'compiler' 128 if variable.filename == 'Python/symtable.c': 129 if variable.vartype.startswith('static identifier '): 130 return 'compiler' 131 if variable.filename == 'Python/Python-ast.c': 132 # These should be const. 133 if variable.name.endswith('_field'): 134 return 'compiler' 135 if variable.name.endswith('_attribute'): 136 return 'compiler' 137 138 # other 139 if variable.filename == 'Python/dtoa.c': 140 # guarded by lock? 141 if variable.name in ('p5s', 'freelist'): 142 return 'dtoa is thread-safe?' 143 if variable.name in ('private_mem', 'pmem_next'): 144 return 'dtoa is thread-safe?' 145 if variable.filename == 'Python/thread.c': 146 # Threads do not become an issue until after these have been set 147 # and these never get changed after that. 148 if variable.name in ('initialized', 'thread_debug'): 149 return 'thread-safe' 150 if variable.filename == 'Python/getversion.c': 151 if variable.name == 'version': 152 # Races are benign here, as well as unlikely. 153 return BENIGN 154 if variable.filename == 'Python/fileutils.c': 155 if variable.name == 'force_ascii': 156 return BENIGN 157 if variable.name == 'ioctl_works': 158 return BENIGN 159 if variable.name == '_Py_open_cloexec_works': 160 return BENIGN 161 if variable.filename == 'Python/codecs.c': 162 if variable.name == 'ucnhash_CAPI': 163 return BENIGN 164 if variable.filename == 'Python/bootstrap_hash.c': 165 if variable.name == 'getrandom_works': 166 return BENIGN 167 if variable.filename == 'Objects/unicodeobject.c': 168 if variable.name == 'ucnhash_CAPI': 169 return BENIGN 170 if variable.name == 'bloom_linebreak': 171 # *mostly* benign 172 return BENIGN 173 if variable.filename == 'Modules/getbuildinfo.c': 174 if variable.name == 'buildinfo': 175 # The static is used for pre-allocation. 176 return BENIGN 177 if variable.filename == 'Modules/posixmodule.c': 178 if variable.name == 'ticks_per_second': 179 return BENIGN 180 if variable.name == 'dup3_works': 181 return BENIGN 182 if variable.filename == 'Modules/timemodule.c': 183 if variable.name == 'ticks_per_second': 184 return BENIGN 185 if variable.filename == 'Objects/longobject.c': 186 if variable.name == 'log_base_BASE': 187 return BENIGN 188 if variable.name == 'convwidth_base': 189 return BENIGN 190 if variable.name == 'convmultmax_base': 191 return BENIGN 192 193 return None 194 195 196def _is_vartype_okay(vartype, ignoredtypes=None): 197 if _is_object(vartype): 198 return None 199 200 if vartype.startswith('static const '): 201 return 'const' 202 if vartype.startswith('const '): 203 return 'const' 204 205 # components for TypeObject definitions 206 for name in ('PyMethodDef', 'PyGetSetDef', 'PyMemberDef'): 207 if name in vartype: 208 return 'const' 209 for name in ('PyNumberMethods', 'PySequenceMethods', 'PyMappingMethods', 210 'PyBufferProcs', 'PyAsyncMethods'): 211 if name in vartype: 212 return 'const' 213 for name in ('slotdef', 'newfunc'): 214 if name in vartype: 215 return 'const' 216 217 # structseq 218 for name in ('PyStructSequence_Desc', 'PyStructSequence_Field'): 219 if name in vartype: 220 return 'const' 221 222 # other definiitions 223 if 'PyModuleDef' in vartype: 224 return 'const' 225 226 # thread-safe 227 if '_Py_atomic_int' in vartype: 228 return 'thread-safe' 229 if 'pthread_condattr_t' in vartype: 230 return 'thread-safe' 231 232 # startup 233 if '_Py_PreInitEntry' in vartype: 234 return 'startup' 235 236 # global 237# if 'PyMemAllocatorEx' in vartype: 238# return True 239 240 # others 241# if 'PyThread_type_lock' in vartype: 242# return True 243 244 # XXX ??? 245 # _Py_tss_t 246 # _Py_hashtable_t 247 # stack_t 248 # _PyUnicode_Name_CAPI 249 250 # functions 251 if '(' in vartype and '[' not in vartype: 252 return 'function pointer' 253 254 # XXX finish! 255 # * allow const values? 256 #raise NotImplementedError 257 return None 258 259 260PYOBJECT_RE = re.compile(r''' 261 ^ 262 ( 263 # must start with "static " 264 static \s+ 265 ( 266 identifier 267 ) 268 \b 269 ) | 270 ( 271 # may start with "static " 272 ( static \s+ )? 273 ( 274 .* 275 ( 276 PyObject | 277 PyTypeObject | 278 _? Py \w+ Object | 279 _PyArg_Parser | 280 _Py_Identifier | 281 traceback_t | 282 PyAsyncGenASend | 283 _PyAsyncGenWrappedValue | 284 PyContext | 285 method_cache_entry 286 ) 287 \b 288 ) | 289 ( 290 ( 291 _Py_IDENTIFIER | 292 _Py_static_string 293 ) 294 [(] 295 ) 296 ) 297 ''', re.VERBOSE) 298 299 300def _is_object(vartype): 301 if 'PyDictKeysObject' in vartype: 302 return False 303 if PYOBJECT_RE.match(vartype): 304 return True 305 if vartype.endswith((' _Py_FalseStruct', ' _Py_TrueStruct')): 306 return True 307 308 # XXX Add more? 309 310 #for part in vartype.split(): 311 # # XXX const is automatic True? 312 # if part == 'PyObject' or part.startswith('PyObject['): 313 # return True 314 return False 315 316 317def ignored_from_file(infile, *, 318 _read_tsv=read_tsv, 319 ): 320 """Yield a Variable for each ignored var in the file.""" 321 ignored = { 322 'variables': {}, 323 #'types': {}, 324 #'constants': {}, 325 #'macros': {}, 326 } 327 for row in _read_tsv(infile, IGNORED_HEADER): 328 filename, funcname, name, kind, reason = row 329 if not funcname or funcname == '-': 330 funcname = None 331 id = ID(filename, funcname, name) 332 if kind == 'variable': 333 values = ignored['variables'] 334 else: 335 raise ValueError(f'unsupported kind in row {row}') 336 values[id] = reason 337 return ignored 338 339 340################################## 341# generate 342 343def _get_row(varid, reason): 344 return ( 345 varid.filename, 346 varid.funcname or '-', 347 varid.name, 348 'variable', 349 str(reason), 350 ) 351 352 353def _get_rows(variables, ignored=None, *, 354 _as_row=_get_row, 355 _is_ignored=_is_ignored, 356 _vartype_okay=_is_vartype_okay, 357 ): 358 count = 0 359 for variable in variables: 360 reason = _is_ignored(variable, 361 ignored and ignored.get('variables'), 362 ) 363 if not reason: 364 reason = _vartype_okay(variable.vartype, 365 ignored and ignored.get('types')) 366 if not reason: 367 continue 368 369 print(' ', variable, repr(reason)) 370 yield _as_row(variable.id, reason) 371 count += 1 372 print(f'total: {count}') 373 374 375def _generate_ignored_file(variables, filename=None, *, 376 _generate_rows=_get_rows, 377 _write_tsv=write_tsv, 378 ): 379 if not filename: 380 filename = IGNORED_FILE + '.new' 381 rows = _generate_rows(variables) 382 _write_tsv(filename, IGNORED_HEADER, rows) 383 384 385if __name__ == '__main__': 386 from cpython import SOURCE_DIRS 387 from cpython.known import ( 388 from_file as known_from_file, 389 DATA_FILE as KNOWN_FILE, 390 ) 391 # XXX This is wrong! 392 from . import find 393 known = known_from_file(KNOWN_FILE) 394 knownvars = (known or {}).get('variables') 395 variables = find.globals_from_binary(knownvars=knownvars, 396 dirnames=SOURCE_DIRS) 397 398 _generate_ignored_file(variables) 399