• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1#! /usr/bin/env python3
2# This script generates token related files from Grammar/Tokens:
3#
4#   Doc/library/token-list.inc
5#   Include/token.h
6#   Parser/token.c
7#   Lib/token.py
8
9
10NT_OFFSET = 256
11
12def load_tokens(path):
13    tok_names = []
14    string_to_tok = {}
15    ERRORTOKEN = None
16    with open(path) as fp:
17        for line in fp:
18            line = line.strip()
19            # strip comments
20            i = line.find('#')
21            if i >= 0:
22                line = line[:i].strip()
23            if not line:
24                continue
25            fields = line.split()
26            name = fields[0]
27            value = len(tok_names)
28            if name == 'ERRORTOKEN':
29                ERRORTOKEN = value
30            string = fields[1] if len(fields) > 1 else None
31            if string:
32                string = eval(string)
33                string_to_tok[string] = value
34            tok_names.append(name)
35    return tok_names, ERRORTOKEN, string_to_tok
36
37
38def update_file(file, content):
39    try:
40        with open(file, 'r') as fobj:
41            if fobj.read() == content:
42                return False
43    except (OSError, ValueError):
44        pass
45    with open(file, 'w') as fobj:
46        fobj.write(content)
47    return True
48
49
50token_h_template = """\
51/* Auto-generated by Tools/scripts/generate_token.py */
52
53/* Token types */
54#ifndef Py_LIMITED_API
55#ifndef Py_TOKEN_H
56#define Py_TOKEN_H
57#ifdef __cplusplus
58extern "C" {
59#endif
60
61#undef TILDE   /* Prevent clash of our definition with system macro. Ex AIX, ioctl.h */
62
63%s\
64#define N_TOKENS        %d
65#define NT_OFFSET       %d
66
67/* Special definitions for cooperation with parser */
68
69#define ISTERMINAL(x)           ((x) < NT_OFFSET)
70#define ISNONTERMINAL(x)        ((x) >= NT_OFFSET)
71#define ISEOF(x)                ((x) == ENDMARKER)
72
73
74PyAPI_DATA(const char * const) _PyParser_TokenNames[]; /* Token names */
75PyAPI_FUNC(int) PyToken_OneChar(int);
76PyAPI_FUNC(int) PyToken_TwoChars(int, int);
77PyAPI_FUNC(int) PyToken_ThreeChars(int, int, int);
78
79#ifdef __cplusplus
80}
81#endif
82#endif /* !Py_TOKEN_H */
83#endif /* Py_LIMITED_API */
84"""
85
86def make_h(infile, outfile='Include/token.h'):
87    tok_names, ERRORTOKEN, string_to_tok = load_tokens(infile)
88
89    defines = []
90    for value, name in enumerate(tok_names[:ERRORTOKEN + 1]):
91        defines.append("#define %-15s %d\n" % (name, value))
92
93    if update_file(outfile, token_h_template % (
94            ''.join(defines),
95            len(tok_names),
96            NT_OFFSET
97        )):
98        print("%s regenerated from %s" % (outfile, infile))
99
100
101token_c_template = """\
102/* Auto-generated by Tools/scripts/generate_token.py */
103
104#include "Python.h"
105#include "token.h"
106
107/* Token names */
108
109const char * const _PyParser_TokenNames[] = {
110%s\
111};
112
113/* Return the token corresponding to a single character */
114
115int
116PyToken_OneChar(int c1)
117{
118%s\
119    return OP;
120}
121
122int
123PyToken_TwoChars(int c1, int c2)
124{
125%s\
126    return OP;
127}
128
129int
130PyToken_ThreeChars(int c1, int c2, int c3)
131{
132%s\
133    return OP;
134}
135"""
136
137def generate_chars_to_token(mapping, n=1):
138    result = []
139    write = result.append
140    indent = '    ' * n
141    write(indent)
142    write('switch (c%d) {\n' % (n,))
143    for c in sorted(mapping):
144        write(indent)
145        value = mapping[c]
146        if isinstance(value, dict):
147            write("case '%s':\n" % (c,))
148            write(generate_chars_to_token(value, n + 1))
149            write(indent)
150            write('    break;\n')
151        else:
152            write("case '%s': return %s;\n" % (c, value))
153    write(indent)
154    write('}\n')
155    return ''.join(result)
156
157def make_c(infile, outfile='Parser/token.c'):
158    tok_names, ERRORTOKEN, string_to_tok = load_tokens(infile)
159    string_to_tok['<>'] = string_to_tok['!=']
160    chars_to_token = {}
161    for string, value in string_to_tok.items():
162        assert 1 <= len(string) <= 3
163        name = tok_names[value]
164        m = chars_to_token.setdefault(len(string), {})
165        for c in string[:-1]:
166            m = m.setdefault(c, {})
167        m[string[-1]] = name
168
169    names = []
170    for value, name in enumerate(tok_names):
171        if value >= ERRORTOKEN:
172            name = '<%s>' % name
173        names.append('    "%s",\n' % name)
174    names.append('    "<N_TOKENS>",\n')
175
176    if update_file(outfile, token_c_template % (
177            ''.join(names),
178            generate_chars_to_token(chars_to_token[1]),
179            generate_chars_to_token(chars_to_token[2]),
180            generate_chars_to_token(chars_to_token[3])
181        )):
182        print("%s regenerated from %s" % (outfile, infile))
183
184
185token_inc_template = """\
186.. Auto-generated by Tools/scripts/generate_token.py
187%s
188.. data:: N_TOKENS
189
190.. data:: NT_OFFSET
191"""
192
193def make_rst(infile, outfile='Doc/library/token-list.inc'):
194    tok_names, ERRORTOKEN, string_to_tok = load_tokens(infile)
195    tok_to_string = {value: s for s, value in string_to_tok.items()}
196
197    names = []
198    for value, name in enumerate(tok_names[:ERRORTOKEN + 1]):
199        names.append('.. data:: %s' % (name,))
200        if value in tok_to_string:
201            names.append('')
202            names.append('   Token value for ``"%s"``.' % tok_to_string[value])
203        names.append('')
204
205    if update_file(outfile, token_inc_template % '\n'.join(names)):
206        print("%s regenerated from %s" % (outfile, infile))
207
208
209token_py_template = '''\
210"""Token constants."""
211# Auto-generated by Tools/scripts/generate_token.py
212
213__all__ = ['tok_name', 'ISTERMINAL', 'ISNONTERMINAL', 'ISEOF']
214
215%s
216N_TOKENS = %d
217# Special definitions for cooperation with parser
218NT_OFFSET = %d
219
220tok_name = {value: name
221            for name, value in globals().items()
222            if isinstance(value, int) and not name.startswith('_')}
223__all__.extend(tok_name.values())
224
225EXACT_TOKEN_TYPES = {
226%s
227}
228
229def ISTERMINAL(x):
230    return x < NT_OFFSET
231
232def ISNONTERMINAL(x):
233    return x >= NT_OFFSET
234
235def ISEOF(x):
236    return x == ENDMARKER
237'''
238
239def make_py(infile, outfile='Lib/token.py'):
240    tok_names, ERRORTOKEN, string_to_tok = load_tokens(infile)
241
242    constants = []
243    for value, name in enumerate(tok_names):
244        constants.append('%s = %d' % (name, value))
245    constants.insert(ERRORTOKEN,
246        "# These aren't used by the C tokenizer but are needed for tokenize.py")
247
248    token_types = []
249    for s, value in sorted(string_to_tok.items()):
250        token_types.append('    %r: %s,' % (s, tok_names[value]))
251
252    if update_file(outfile, token_py_template % (
253            '\n'.join(constants),
254            len(tok_names),
255            NT_OFFSET,
256            '\n'.join(token_types),
257        )):
258        print("%s regenerated from %s" % (outfile, infile))
259
260
261def main(op, infile='Grammar/Tokens', *args):
262    make = globals()['make_' + op]
263    make(infile, *args)
264
265
266if __name__ == '__main__':
267    import sys
268    main(*sys.argv[1:])
269