• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1
2from collections import namedtuple
3import glob
4import os.path
5import re
6import shutil
7import sys
8import subprocess
9
10
11VERBOSITY = 2
12
13C_GLOBALS_DIR = os.path.abspath(os.path.dirname(__file__))
14TOOLS_DIR = os.path.dirname(C_GLOBALS_DIR)
15ROOT_DIR = os.path.dirname(TOOLS_DIR)
16GLOBALS_FILE = os.path.join(C_GLOBALS_DIR, 'ignored-globals.txt')
17
18SOURCE_DIRS = ['Include', 'Objects', 'Modules', 'Parser', 'Python']
19
20CAPI_REGEX = re.compile(r'^ *PyAPI_DATA\([^)]*\) \W*(_?Py\w+(?:, \w+)*\w).*;.*$')
21
22
23IGNORED_VARS = {
24        '_DYNAMIC',
25        '_GLOBAL_OFFSET_TABLE_',
26        '__JCR_LIST__',
27        '__JCR_END__',
28        '__TMC_END__',
29        '__bss_start',
30        '__data_start',
31        '__dso_handle',
32        '_edata',
33        '_end',
34        }
35
36
37def find_capi_vars(root):
38    capi_vars = {}
39    for dirname in SOURCE_DIRS:
40        for filename in glob.glob(os.path.join(ROOT_DIR, dirname, '**/*.[hc]'),
41                                  recursive=True):
42            with open(filename) as file:
43                for name in _find_capi_vars(file):
44                    if name in capi_vars:
45                        assert not filename.endswith('.c')
46                        assert capi_vars[name].endswith('.c')
47                    capi_vars[name] = filename
48    return capi_vars
49
50
51def _find_capi_vars(lines):
52    for line in lines:
53        if not line.startswith('PyAPI_DATA'):
54            continue
55        assert '{' not in line
56        match = CAPI_REGEX.match(line)
57        assert match
58        names, = match.groups()
59        for name in names.split(', '):
60            yield name
61
62
63def _read_global_names(filename):
64    # These variables are shared between all interpreters in the process.
65    with open(filename) as file:
66        return {line.partition('#')[0].strip()
67                for line in file
68                if line.strip() and not line.startswith('#')}
69
70
71def _is_global_var(name, globalnames):
72    if _is_autogen_var(name):
73        return True
74    if _is_type_var(name):
75        return True
76    if _is_module(name):
77        return True
78    if _is_exception(name):
79        return True
80    if _is_compiler(name):
81        return True
82    return name in globalnames
83
84
85def _is_autogen_var(name):
86    return (
87        name.startswith('PyId_') or
88        '.' in name or
89        # Objects/typeobject.c
90        name.startswith('op_id.') or
91        name.startswith('rop_id.') or
92        # Python/graminit.c
93        name.startswith('arcs_') or
94        name.startswith('states_')
95        )
96
97
98def _is_type_var(name):
99    if name.endswith(('Type', '_Type', '_type')):  # XXX Always a static type?
100        return True
101    if name.endswith('_desc'):  # for structseq types
102        return True
103    return (
104        name.startswith('doc_') or
105        name.endswith(('_doc', '__doc__', '_docstring')) or
106        name.endswith('_methods') or
107        name.endswith('_fields') or
108        name.endswith(('_memberlist', '_members')) or
109        name.endswith('_slots') or
110        name.endswith(('_getset', '_getsets', '_getsetlist')) or
111        name.endswith('_as_mapping') or
112        name.endswith('_as_number') or
113        name.endswith('_as_sequence') or
114        name.endswith('_as_buffer') or
115        name.endswith('_as_async')
116        )
117
118
119def _is_module(name):
120    if name.endswith(('_functions', 'Methods', '_Methods')):
121        return True
122    if name == 'module_def':
123        return True
124    if name == 'initialized':
125        return True
126    return name.endswith(('module', '_Module'))
127
128
129def _is_exception(name):
130    # Other vars are enumerated in globals-core.txt.
131    if not name.startswith(('PyExc_', '_PyExc_')):
132        return False
133    return name.endswith(('Error', 'Warning'))
134
135
136def _is_compiler(name):
137    return (
138        # Python/Python-ast.c
139        name.endswith('_type') or
140        name.endswith('_singleton') or
141        name.endswith('_attributes')
142        )
143
144
145class Var(namedtuple('Var', 'name kind scope capi filename')):
146
147    @classmethod
148    def parse_nm(cls, line, expected, ignored, capi_vars, globalnames):
149        _, _, line = line.partition(' ')  # strip off the address
150        line = line.strip()
151        kind, _, line = line.partition(' ')
152        if kind in ignored or ():
153            return None
154        elif kind not in expected or ():
155            raise RuntimeError('unsupported NM type {!r}'.format(kind))
156
157        name, _, filename = line.partition('\t')
158        name = name.strip()
159        if _is_autogen_var(name):
160            return None
161        if _is_global_var(name, globalnames):
162            scope = 'global'
163        else:
164            scope = None
165        capi = (name in capi_vars or ())
166        if filename:
167            filename = os.path.relpath(filename.partition(':')[0])
168        return cls(name, kind, scope, capi, filename or '~???~')
169
170    @property
171    def external(self):
172        return self.kind.isupper()
173
174
175def find_vars(root, globals_filename=GLOBALS_FILE):
176    python = os.path.join(root, 'python')
177    if not os.path.exists(python):
178        raise RuntimeError('python binary missing (need to build it first?)')
179    capi_vars = find_capi_vars(root)
180    globalnames = _read_global_names(globals_filename)
181
182    nm = shutil.which('nm')
183    if nm is None:
184        # XXX Use dumpbin.exe /SYMBOLS on Windows.
185        raise NotImplementedError
186    else:
187        yield from (var
188                    for var in _find_var_symbols(python, nm, capi_vars,
189                                                 globalnames)
190                    if var.name not in IGNORED_VARS)
191
192
193NM_FUNCS = set('Tt')
194NM_PUBLIC_VARS = set('BD')
195NM_PRIVATE_VARS = set('bd')
196NM_VARS = NM_PUBLIC_VARS | NM_PRIVATE_VARS
197NM_DATA = set('Rr')
198NM_OTHER = set('ACGgiINpSsuUVvWw-?')
199NM_IGNORED = NM_FUNCS | NM_DATA | NM_OTHER
200
201
202def _find_var_symbols(python, nm, capi_vars, globalnames):
203    args = [nm,
204            '--line-numbers',
205            python]
206    out = subprocess.check_output(args)
207    for line in out.decode('utf-8').splitlines():
208        var = Var.parse_nm(line, NM_VARS, NM_IGNORED, capi_vars, globalnames)
209        if var is None:
210            continue
211        yield var
212
213
214#######################################
215
216class Filter(namedtuple('Filter', 'name op value action')):
217
218    @classmethod
219    def parse(cls, raw):
220        action = '+'
221        if raw.startswith(('+', '-')):
222            action = raw[0]
223            raw = raw[1:]
224        # XXX Support < and >?
225        name, op, value = raw.partition('=')
226        return cls(name, op, value, action)
227
228    def check(self, var):
229        value = getattr(var, self.name, None)
230        if not self.op:
231            matched = bool(value)
232        elif self.op == '=':
233            matched = (value == self.value)
234        else:
235            raise NotImplementedError
236
237        if self.action == '+':
238            return matched
239        elif self.action == '-':
240            return not matched
241        else:
242            raise NotImplementedError
243
244
245def filter_var(var, filters):
246    for filter in filters:
247        if not filter.check(var):
248            return False
249    return True
250
251
252def make_sort_key(spec):
253    columns = [(col.strip('_'), '_' if col.startswith('_') else '')
254               for col in spec]
255    def sort_key(var):
256        return tuple(getattr(var, col).lstrip(prefix)
257                     for col, prefix in columns)
258    return sort_key
259
260
261def make_groups(allvars, spec):
262    group = spec
263    groups = {}
264    for var in allvars:
265        value = getattr(var, group)
266        key = '{}: {}'.format(group, value)
267        try:
268            groupvars = groups[key]
269        except KeyError:
270            groupvars = groups[key] = []
271        groupvars.append(var)
272    return groups
273
274
275def format_groups(groups, columns, fmts, widths):
276    for group in sorted(groups):
277        groupvars = groups[group]
278        yield '', 0
279        yield '  # {}'.format(group), 0
280        yield from format_vars(groupvars, columns, fmts, widths)
281
282
283def format_vars(allvars, columns, fmts, widths):
284    fmt = ' '.join(fmts[col] for col in columns)
285    fmt = ' ' + fmt.replace(' ', '   ') + ' '  # for div margin
286    header = fmt.replace(':', ':^').format(*(col.upper() for col in columns))
287    yield header, 0
288    div = ' '.join('-'*(widths[col]+2) for col in columns)
289    yield div, 0
290    for var in allvars:
291        values = (getattr(var, col) for col in columns)
292        row = fmt.format(*('X' if val is True else val or ''
293                           for val in values))
294        yield row, 1
295    yield div, 0
296
297
298#######################################
299
300COLUMNS = 'name,external,capi,scope,filename'
301COLUMN_NAMES = COLUMNS.split(',')
302
303COLUMN_WIDTHS = {col: len(col)
304                 for col in COLUMN_NAMES}
305COLUMN_WIDTHS.update({
306        'name': 50,
307        'scope': 7,
308        'filename': 40,
309        })
310COLUMN_FORMATS = {col: '{:%s}' % width
311                  for col, width in COLUMN_WIDTHS.items()}
312for col in COLUMN_FORMATS:
313    if COLUMN_WIDTHS[col] == len(col):
314        COLUMN_FORMATS[col] = COLUMN_FORMATS[col].replace(':', ':^')
315
316
317def _parse_filters_arg(raw, error):
318    filters = []
319    for value in raw.split(','):
320        value=value.strip()
321        if not value:
322            continue
323        try:
324            filter = Filter.parse(value)
325            if filter.name not in COLUMN_NAMES:
326                raise Exception('unsupported column {!r}'.format(filter.name))
327        except Exception as e:
328            error('bad filter {!r}: {}'.format(raw, e))
329        filters.append(filter)
330    return filters
331
332
333def _parse_columns_arg(raw, error):
334    columns = raw.split(',')
335    for column in columns:
336        if column not in COLUMN_NAMES:
337            error('unsupported column {!r}'.format(column))
338    return columns
339
340
341def _parse_sort_arg(raw, error):
342    sort = raw.split(',')
343    for column in sort:
344        if column.lstrip('_') not in COLUMN_NAMES:
345            error('unsupported column {!r}'.format(column))
346    return sort
347
348
349def _parse_group_arg(raw, error):
350    if not raw:
351        return raw
352    group = raw
353    if group not in COLUMN_NAMES:
354        error('unsupported column {!r}'.format(group))
355    if group != 'filename':
356        error('unsupported group {!r}'.format(group))
357    return group
358
359
360def parse_args(argv=None):
361    if argv is None:
362        argv = sys.argv[1:]
363
364    import argparse
365    parser = argparse.ArgumentParser()
366
367    parser.add_argument('-v', '--verbose', action='count', default=0)
368    parser.add_argument('-q', '--quiet', action='count', default=0)
369
370    parser.add_argument('--filters', default='-scope',
371                        help='[[-]<COLUMN>[=<GLOB>]] ...')
372
373    parser.add_argument('--columns', default=COLUMNS,
374                        help='a comma-separated list of columns to show')
375    parser.add_argument('--sort', default='filename,_name',
376                        help='a comma-separated list of columns to sort')
377    parser.add_argument('--group',
378                        help='group by the given column name (- to not group)')
379
380    parser.add_argument('--rc-on-match', dest='rc', type=int)
381
382    parser.add_argument('filename', nargs='?', default=GLOBALS_FILE)
383
384    args = parser.parse_args(argv)
385
386    verbose = vars(args).pop('verbose', 0)
387    quiet = vars(args).pop('quiet', 0)
388    args.verbosity = max(0, VERBOSITY + verbose - quiet)
389
390    if args.sort.startswith('filename') and not args.group:
391        args.group = 'filename'
392
393    if args.rc is None:
394        if '-scope=core' in args.filters or 'core' not in args.filters:
395            args.rc = 0
396        else:
397            args.rc = 1
398
399    args.filters = _parse_filters_arg(args.filters, parser.error)
400    args.columns = _parse_columns_arg(args.columns, parser.error)
401    args.sort = _parse_sort_arg(args.sort, parser.error)
402    args.group = _parse_group_arg(args.group, parser.error)
403
404    return args
405
406
407def main(root=ROOT_DIR, filename=GLOBALS_FILE,
408         filters=None, columns=COLUMN_NAMES, sort=None, group=None,
409         verbosity=VERBOSITY, rc=1):
410
411    log = lambda msg: ...
412    if verbosity >= 2:
413        log = lambda msg: print(msg)
414
415    allvars = (var
416               for var in find_vars(root, filename)
417               if filter_var(var, filters))
418    if sort:
419        allvars = sorted(allvars, key=make_sort_key(sort))
420
421    if group:
422        try:
423            columns.remove(group)
424        except ValueError:
425            pass
426        grouped = make_groups(allvars, group)
427        lines = format_groups(grouped, columns, COLUMN_FORMATS, COLUMN_WIDTHS)
428    else:
429        lines = format_vars(allvars, columns, COLUMN_FORMATS, COLUMN_WIDTHS)
430
431    total = 0
432    for line, count in lines:
433        total += count
434        log(line)
435    log('\ntotal: {}'.format(total))
436
437    if total and rc:
438        print('ERROR: found unsafe globals', file=sys.stderr)
439        return rc
440    return 0
441
442
443if __name__ == '__main__':
444    args = parse_args()
445    sys.exit(
446            main(**vars(args)))
447