• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1#! /usr/bin/env python3
2# This script generates token related files from Grammar/Tokens:
3#
4#   Doc/library/token-list.inc
5#   Include/token.h
6#   Parser/token.c
7#   Lib/token.py
8
9
10NT_OFFSET = 256
11
12def load_tokens(path):
13    tok_names = []
14    string_to_tok = {}
15    ERRORTOKEN = None
16    with open(path) as fp:
17        for line in fp:
18            line = line.strip()
19            # strip comments
20            i = line.find('#')
21            if i >= 0:
22                line = line[:i].strip()
23            if not line:
24                continue
25            fields = line.split()
26            name = fields[0]
27            value = len(tok_names)
28            if name == 'ERRORTOKEN':
29                ERRORTOKEN = value
30            string = fields[1] if len(fields) > 1 else None
31            if string:
32                string = eval(string)
33                string_to_tok[string] = value
34            tok_names.append(name)
35    return tok_names, ERRORTOKEN, string_to_tok
36
37
38def update_file(file, content):
39    try:
40        with open(file, 'r') as fobj:
41            if fobj.read() == content:
42                return False
43    except (OSError, ValueError):
44        pass
45    with open(file, 'w') as fobj:
46        fobj.write(content)
47    return True
48
49
50token_h_template = """\
51/* Auto-generated by Tools/scripts/generate_token.py */
52
53/* Token types */
54#ifndef Py_LIMITED_API
55#ifndef Py_TOKEN_H
56#define Py_TOKEN_H
57#ifdef __cplusplus
58extern "C" {
59#endif
60
61#undef TILDE   /* Prevent clash of our definition with system macro. Ex AIX, ioctl.h */
62
63%s\
64#define N_TOKENS        %d
65#define NT_OFFSET       %d
66
67/* Special definitions for cooperation with parser */
68
69#define ISTERMINAL(x)           ((x) < NT_OFFSET)
70#define ISNONTERMINAL(x)        ((x) >= NT_OFFSET)
71#define ISEOF(x)                ((x) == ENDMARKER)
72#define ISWHITESPACE(x)         ((x) == ENDMARKER || \\
73                                 (x) == NEWLINE   || \\
74                                 (x) == INDENT    || \\
75                                 (x) == DEDENT)
76
77
78PyAPI_DATA(const char * const) _PyParser_TokenNames[]; /* Token names */
79PyAPI_FUNC(int) PyToken_OneChar(int);
80PyAPI_FUNC(int) PyToken_TwoChars(int, int);
81PyAPI_FUNC(int) PyToken_ThreeChars(int, int, int);
82
83#ifdef __cplusplus
84}
85#endif
86#endif /* !Py_TOKEN_H */
87#endif /* Py_LIMITED_API */
88"""
89
90def make_h(infile, outfile='Include/token.h'):
91    tok_names, ERRORTOKEN, string_to_tok = load_tokens(infile)
92
93    defines = []
94    for value, name in enumerate(tok_names[:ERRORTOKEN + 1]):
95        defines.append("#define %-15s %d\n" % (name, value))
96
97    if update_file(outfile, token_h_template % (
98            ''.join(defines),
99            len(tok_names),
100            NT_OFFSET
101        )):
102        print("%s regenerated from %s" % (outfile, infile))
103
104
105token_c_template = """\
106/* Auto-generated by Tools/scripts/generate_token.py */
107
108#include "Python.h"
109#include "token.h"
110
111/* Token names */
112
113const char * const _PyParser_TokenNames[] = {
114%s\
115};
116
117/* Return the token corresponding to a single character */
118
119int
120PyToken_OneChar(int c1)
121{
122%s\
123    return OP;
124}
125
126int
127PyToken_TwoChars(int c1, int c2)
128{
129%s\
130    return OP;
131}
132
133int
134PyToken_ThreeChars(int c1, int c2, int c3)
135{
136%s\
137    return OP;
138}
139"""
140
141def generate_chars_to_token(mapping, n=1):
142    result = []
143    write = result.append
144    indent = '    ' * n
145    write(indent)
146    write('switch (c%d) {\n' % (n,))
147    for c in sorted(mapping):
148        write(indent)
149        value = mapping[c]
150        if isinstance(value, dict):
151            write("case '%s':\n" % (c,))
152            write(generate_chars_to_token(value, n + 1))
153            write(indent)
154            write('    break;\n')
155        else:
156            write("case '%s': return %s;\n" % (c, value))
157    write(indent)
158    write('}\n')
159    return ''.join(result)
160
161def make_c(infile, outfile='Parser/token.c'):
162    tok_names, ERRORTOKEN, string_to_tok = load_tokens(infile)
163    string_to_tok['<>'] = string_to_tok['!=']
164    chars_to_token = {}
165    for string, value in string_to_tok.items():
166        assert 1 <= len(string) <= 3
167        name = tok_names[value]
168        m = chars_to_token.setdefault(len(string), {})
169        for c in string[:-1]:
170            m = m.setdefault(c, {})
171        m[string[-1]] = name
172
173    names = []
174    for value, name in enumerate(tok_names):
175        if value >= ERRORTOKEN:
176            name = '<%s>' % name
177        names.append('    "%s",\n' % name)
178    names.append('    "<N_TOKENS>",\n')
179
180    if update_file(outfile, token_c_template % (
181            ''.join(names),
182            generate_chars_to_token(chars_to_token[1]),
183            generate_chars_to_token(chars_to_token[2]),
184            generate_chars_to_token(chars_to_token[3])
185        )):
186        print("%s regenerated from %s" % (outfile, infile))
187
188
189token_inc_template = """\
190.. Auto-generated by Tools/scripts/generate_token.py
191%s
192.. data:: N_TOKENS
193
194.. data:: NT_OFFSET
195"""
196
197def make_rst(infile, outfile='Doc/library/token-list.inc'):
198    tok_names, ERRORTOKEN, string_to_tok = load_tokens(infile)
199    tok_to_string = {value: s for s, value in string_to_tok.items()}
200
201    names = []
202    for value, name in enumerate(tok_names[:ERRORTOKEN + 1]):
203        names.append('.. data:: %s' % (name,))
204        if value in tok_to_string:
205            names.append('')
206            names.append('   Token value for ``"%s"``.' % tok_to_string[value])
207        names.append('')
208
209    if update_file(outfile, token_inc_template % '\n'.join(names)):
210        print("%s regenerated from %s" % (outfile, infile))
211
212
213token_py_template = '''\
214"""Token constants."""
215# Auto-generated by Tools/scripts/generate_token.py
216
217__all__ = ['tok_name', 'ISTERMINAL', 'ISNONTERMINAL', 'ISEOF']
218
219%s
220N_TOKENS = %d
221# Special definitions for cooperation with parser
222NT_OFFSET = %d
223
224tok_name = {value: name
225            for name, value in globals().items()
226            if isinstance(value, int) and not name.startswith('_')}
227__all__.extend(tok_name.values())
228
229EXACT_TOKEN_TYPES = {
230%s
231}
232
233def ISTERMINAL(x):
234    return x < NT_OFFSET
235
236def ISNONTERMINAL(x):
237    return x >= NT_OFFSET
238
239def ISEOF(x):
240    return x == ENDMARKER
241'''
242
243def make_py(infile, outfile='Lib/token.py'):
244    tok_names, ERRORTOKEN, string_to_tok = load_tokens(infile)
245
246    constants = []
247    for value, name in enumerate(tok_names):
248        constants.append('%s = %d' % (name, value))
249    constants.insert(ERRORTOKEN,
250        "# These aren't used by the C tokenizer but are needed for tokenize.py")
251
252    token_types = []
253    for s, value in sorted(string_to_tok.items()):
254        token_types.append('    %r: %s,' % (s, tok_names[value]))
255
256    if update_file(outfile, token_py_template % (
257            '\n'.join(constants),
258            len(tok_names),
259            NT_OFFSET,
260            '\n'.join(token_types),
261        )):
262        print("%s regenerated from %s" % (outfile, infile))
263
264
265def main(op, infile='Grammar/Tokens', *args):
266    make = globals()['make_' + op]
267    make(infile, *args)
268
269
270if __name__ == '__main__':
271    import sys
272    main(*sys.argv[1:])
273