• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1#! /usr/bin/env python3
2# This script generates token related files from Grammar/Tokens:
3#
4#   Doc/library/token-list.inc
5#   Include/token.h
6#   Parser/token.c
7#   Lib/token.py
8
9
10SCRIPT_NAME = 'Tools/build/generate_token.py'
11AUTO_GENERATED_BY_SCRIPT = f'Auto-generated by {SCRIPT_NAME}'
12NT_OFFSET = 256
13
14def load_tokens(path):
15    tok_names = []
16    string_to_tok = {}
17    ERRORTOKEN = None
18    with open(path) as fp:
19        for line in fp:
20            line = line.strip()
21            # strip comments
22            i = line.find('#')
23            if i >= 0:
24                line = line[:i].strip()
25            if not line:
26                continue
27            fields = line.split()
28            name = fields[0]
29            value = len(tok_names)
30            if name == 'ERRORTOKEN':
31                ERRORTOKEN = value
32            string = fields[1] if len(fields) > 1 else None
33            if string:
34                string = eval(string)
35                string_to_tok[string] = value
36            tok_names.append(name)
37    return tok_names, ERRORTOKEN, string_to_tok
38
39
40def update_file(file, content):
41    try:
42        with open(file, 'r') as fobj:
43            if fobj.read() == content:
44                return False
45    except (OSError, ValueError):
46        pass
47    with open(file, 'w') as fobj:
48        fobj.write(content)
49    return True
50
51
52token_h_template = f"""\
53// {AUTO_GENERATED_BY_SCRIPT}
54"""
55token_h_template += """\
56
57/* Token types */
58#ifndef Py_INTERNAL_TOKEN_H
59#define Py_INTERNAL_TOKEN_H
60#ifdef __cplusplus
61extern "C" {
62#endif
63
64#ifndef Py_BUILD_CORE
65#  error "this header requires Py_BUILD_CORE define"
66#endif
67
68#undef TILDE   /* Prevent clash of our definition with system macro. Ex AIX, ioctl.h */
69
70%s\
71#define N_TOKENS        %d
72#define NT_OFFSET       %d
73
74/* Special definitions for cooperation with parser */
75
76#define ISTERMINAL(x)           ((x) < NT_OFFSET)
77#define ISNONTERMINAL(x)        ((x) >= NT_OFFSET)
78#define ISEOF(x)                ((x) == ENDMARKER)
79#define ISWHITESPACE(x)         ((x) == ENDMARKER || \\
80                                 (x) == NEWLINE   || \\
81                                 (x) == INDENT    || \\
82                                 (x) == DEDENT)
83#define ISSTRINGLIT(x)          ((x) == STRING           || \\
84                                 (x) == FSTRING_MIDDLE)
85
86
87// Export these 4 symbols for 'test_peg_generator'
88PyAPI_DATA(const char * const) _PyParser_TokenNames[]; /* Token names */
89PyAPI_FUNC(int) _PyToken_OneChar(int);
90PyAPI_FUNC(int) _PyToken_TwoChars(int, int);
91PyAPI_FUNC(int) _PyToken_ThreeChars(int, int, int);
92
93#ifdef __cplusplus
94}
95#endif
96#endif  // !Py_INTERNAL_TOKEN_H
97"""
98
99def make_h(infile, outfile='Include/internal/pycore_token.h'):
100    tok_names, ERRORTOKEN, string_to_tok = load_tokens(infile)
101
102    defines = []
103    for value, name in enumerate(tok_names[:ERRORTOKEN + 1]):
104        defines.append("#define %-15s %d\n" % (name, value))
105
106    if update_file(outfile, token_h_template % (
107            ''.join(defines),
108            len(tok_names),
109            NT_OFFSET
110        )):
111        print("%s regenerated from %s" % (outfile, infile))
112
113
114token_c_template = f"""\
115/* {AUTO_GENERATED_BY_SCRIPT} */
116"""
117token_c_template += """\
118
119#include "Python.h"
120#include "pycore_token.h"
121
122/* Token names */
123
124const char * const _PyParser_TokenNames[] = {
125%s\
126};
127
128/* Return the token corresponding to a single character */
129
130int
131_PyToken_OneChar(int c1)
132{
133%s\
134    return OP;
135}
136
137int
138_PyToken_TwoChars(int c1, int c2)
139{
140%s\
141    return OP;
142}
143
144int
145_PyToken_ThreeChars(int c1, int c2, int c3)
146{
147%s\
148    return OP;
149}
150"""
151
152def generate_chars_to_token(mapping, n=1):
153    result = []
154    write = result.append
155    indent = '    ' * n
156    write(indent)
157    write('switch (c%d) {\n' % (n,))
158    for c in sorted(mapping):
159        write(indent)
160        value = mapping[c]
161        if isinstance(value, dict):
162            write("case '%s':\n" % (c,))
163            write(generate_chars_to_token(value, n + 1))
164            write(indent)
165            write('    break;\n')
166        else:
167            write("case '%s': return %s;\n" % (c, value))
168    write(indent)
169    write('}\n')
170    return ''.join(result)
171
172def make_c(infile, outfile='Parser/token.c'):
173    tok_names, ERRORTOKEN, string_to_tok = load_tokens(infile)
174    string_to_tok['<>'] = string_to_tok['!=']
175    chars_to_token = {}
176    for string, value in string_to_tok.items():
177        assert 1 <= len(string) <= 3
178        name = tok_names[value]
179        m = chars_to_token.setdefault(len(string), {})
180        for c in string[:-1]:
181            m = m.setdefault(c, {})
182        m[string[-1]] = name
183
184    names = []
185    for value, name in enumerate(tok_names):
186        if value >= ERRORTOKEN:
187            name = '<%s>' % name
188        names.append('    "%s",\n' % name)
189    names.append('    "<N_TOKENS>",\n')
190
191    if update_file(outfile, token_c_template % (
192            ''.join(names),
193            generate_chars_to_token(chars_to_token[1]),
194            generate_chars_to_token(chars_to_token[2]),
195            generate_chars_to_token(chars_to_token[3])
196        )):
197        print("%s regenerated from %s" % (outfile, infile))
198
199
200token_inc_template = f"""\
201.. {AUTO_GENERATED_BY_SCRIPT}
202%s
203.. data:: N_TOKENS
204
205.. data:: NT_OFFSET
206"""
207
208def make_rst(infile, outfile='Doc/library/token-list.inc'):
209    tok_names, ERRORTOKEN, string_to_tok = load_tokens(infile)
210    tok_to_string = {value: s for s, value in string_to_tok.items()}
211
212    names = []
213    for value, name in enumerate(tok_names[:ERRORTOKEN + 1]):
214        names.append('.. data:: %s' % (name,))
215        if value in tok_to_string:
216            names.append('')
217            names.append('   Token value for ``"%s"``.' % tok_to_string[value])
218        names.append('')
219
220    if update_file(outfile, token_inc_template % '\n'.join(names)):
221        print("%s regenerated from %s" % (outfile, infile))
222
223
224token_py_template = f'''\
225"""Token constants."""
226# {AUTO_GENERATED_BY_SCRIPT}
227'''
228token_py_template += '''
229__all__ = ['tok_name', 'ISTERMINAL', 'ISNONTERMINAL', 'ISEOF',
230           'EXACT_TOKEN_TYPES']
231
232%s
233N_TOKENS = %d
234# Special definitions for cooperation with parser
235NT_OFFSET = %d
236
237tok_name = {value: name
238            for name, value in globals().items()
239            if isinstance(value, int) and not name.startswith('_')}
240__all__.extend(tok_name.values())
241
242EXACT_TOKEN_TYPES = {
243%s
244}
245
246def ISTERMINAL(x):
247    return x < NT_OFFSET
248
249def ISNONTERMINAL(x):
250    return x >= NT_OFFSET
251
252def ISEOF(x):
253    return x == ENDMARKER
254'''
255
256def make_py(infile, outfile='Lib/token.py'):
257    tok_names, ERRORTOKEN, string_to_tok = load_tokens(infile)
258
259    constants = []
260    for value, name in enumerate(tok_names):
261        constants.append('%s = %d' % (name, value))
262    constants.insert(ERRORTOKEN,
263        "# These aren't used by the C tokenizer but are needed for tokenize.py")
264
265    token_types = []
266    for s, value in sorted(string_to_tok.items()):
267        token_types.append('    %r: %s,' % (s, tok_names[value]))
268
269    if update_file(outfile, token_py_template % (
270            '\n'.join(constants),
271            len(tok_names),
272            NT_OFFSET,
273            '\n'.join(token_types),
274        )):
275        print("%s regenerated from %s" % (outfile, infile))
276
277
278def main(op, infile='Grammar/Tokens', *args):
279    make = globals()['make_' + op]
280    make(infile, *args)
281
282
283if __name__ == '__main__':
284    import sys
285    main(*sys.argv[1:])
286