1#! /usr/bin/env python3 2# This script generates token related files from Grammar/Tokens: 3# 4# Doc/library/token-list.inc 5# Include/token.h 6# Parser/token.c 7# Lib/token.py 8 9 10NT_OFFSET = 256 11 12def load_tokens(path): 13 tok_names = [] 14 string_to_tok = {} 15 ERRORTOKEN = None 16 with open(path) as fp: 17 for line in fp: 18 line = line.strip() 19 # strip comments 20 i = line.find('#') 21 if i >= 0: 22 line = line[:i].strip() 23 if not line: 24 continue 25 fields = line.split() 26 name = fields[0] 27 value = len(tok_names) 28 if name == 'ERRORTOKEN': 29 ERRORTOKEN = value 30 string = fields[1] if len(fields) > 1 else None 31 if string: 32 string = eval(string) 33 string_to_tok[string] = value 34 tok_names.append(name) 35 return tok_names, ERRORTOKEN, string_to_tok 36 37 38def update_file(file, content): 39 try: 40 with open(file, 'r') as fobj: 41 if fobj.read() == content: 42 return False 43 except (OSError, ValueError): 44 pass 45 with open(file, 'w') as fobj: 46 fobj.write(content) 47 return True 48 49 50token_h_template = """\ 51/* Auto-generated by Tools/scripts/generate_token.py */ 52 53/* Token types */ 54#ifndef Py_LIMITED_API 55#ifndef Py_TOKEN_H 56#define Py_TOKEN_H 57#ifdef __cplusplus 58extern "C" { 59#endif 60 61#undef TILDE /* Prevent clash of our definition with system macro. Ex AIX, ioctl.h */ 62 63%s\ 64#define N_TOKENS %d 65#define NT_OFFSET %d 66 67/* Special definitions for cooperation with parser */ 68 69#define ISTERMINAL(x) ((x) < NT_OFFSET) 70#define ISNONTERMINAL(x) ((x) >= NT_OFFSET) 71#define ISEOF(x) ((x) == ENDMARKER) 72 73 74PyAPI_DATA(const char * const) _PyParser_TokenNames[]; /* Token names */ 75PyAPI_FUNC(int) PyToken_OneChar(int); 76PyAPI_FUNC(int) PyToken_TwoChars(int, int); 77PyAPI_FUNC(int) PyToken_ThreeChars(int, int, int); 78 79#ifdef __cplusplus 80} 81#endif 82#endif /* !Py_TOKEN_H */ 83#endif /* Py_LIMITED_API */ 84""" 85 86def make_h(infile, outfile='Include/token.h'): 87 tok_names, ERRORTOKEN, string_to_tok = load_tokens(infile) 88 89 defines = [] 90 for value, name in enumerate(tok_names[:ERRORTOKEN + 1]): 91 defines.append("#define %-15s %d\n" % (name, value)) 92 93 if update_file(outfile, token_h_template % ( 94 ''.join(defines), 95 len(tok_names), 96 NT_OFFSET 97 )): 98 print("%s regenerated from %s" % (outfile, infile)) 99 100 101token_c_template = """\ 102/* Auto-generated by Tools/scripts/generate_token.py */ 103 104#include "Python.h" 105#include "token.h" 106 107/* Token names */ 108 109const char * const _PyParser_TokenNames[] = { 110%s\ 111}; 112 113/* Return the token corresponding to a single character */ 114 115int 116PyToken_OneChar(int c1) 117{ 118%s\ 119 return OP; 120} 121 122int 123PyToken_TwoChars(int c1, int c2) 124{ 125%s\ 126 return OP; 127} 128 129int 130PyToken_ThreeChars(int c1, int c2, int c3) 131{ 132%s\ 133 return OP; 134} 135""" 136 137def generate_chars_to_token(mapping, n=1): 138 result = [] 139 write = result.append 140 indent = ' ' * n 141 write(indent) 142 write('switch (c%d) {\n' % (n,)) 143 for c in sorted(mapping): 144 write(indent) 145 value = mapping[c] 146 if isinstance(value, dict): 147 write("case '%s':\n" % (c,)) 148 write(generate_chars_to_token(value, n + 1)) 149 write(indent) 150 write(' break;\n') 151 else: 152 write("case '%s': return %s;\n" % (c, value)) 153 write(indent) 154 write('}\n') 155 return ''.join(result) 156 157def make_c(infile, outfile='Parser/token.c'): 158 tok_names, ERRORTOKEN, string_to_tok = load_tokens(infile) 159 string_to_tok['<>'] = string_to_tok['!='] 160 chars_to_token = {} 161 for string, value in string_to_tok.items(): 162 assert 1 <= len(string) <= 3 163 name = tok_names[value] 164 m = chars_to_token.setdefault(len(string), {}) 165 for c in string[:-1]: 166 m = m.setdefault(c, {}) 167 m[string[-1]] = name 168 169 names = [] 170 for value, name in enumerate(tok_names): 171 if value >= ERRORTOKEN: 172 name = '<%s>' % name 173 names.append(' "%s",\n' % name) 174 names.append(' "<N_TOKENS>",\n') 175 176 if update_file(outfile, token_c_template % ( 177 ''.join(names), 178 generate_chars_to_token(chars_to_token[1]), 179 generate_chars_to_token(chars_to_token[2]), 180 generate_chars_to_token(chars_to_token[3]) 181 )): 182 print("%s regenerated from %s" % (outfile, infile)) 183 184 185token_inc_template = """\ 186.. Auto-generated by Tools/scripts/generate_token.py 187%s 188.. data:: N_TOKENS 189 190.. data:: NT_OFFSET 191""" 192 193def make_rst(infile, outfile='Doc/library/token-list.inc'): 194 tok_names, ERRORTOKEN, string_to_tok = load_tokens(infile) 195 tok_to_string = {value: s for s, value in string_to_tok.items()} 196 197 names = [] 198 for value, name in enumerate(tok_names[:ERRORTOKEN + 1]): 199 names.append('.. data:: %s' % (name,)) 200 if value in tok_to_string: 201 names.append('') 202 names.append(' Token value for ``"%s"``.' % tok_to_string[value]) 203 names.append('') 204 205 if update_file(outfile, token_inc_template % '\n'.join(names)): 206 print("%s regenerated from %s" % (outfile, infile)) 207 208 209token_py_template = '''\ 210"""Token constants.""" 211# Auto-generated by Tools/scripts/generate_token.py 212 213__all__ = ['tok_name', 'ISTERMINAL', 'ISNONTERMINAL', 'ISEOF'] 214 215%s 216N_TOKENS = %d 217# Special definitions for cooperation with parser 218NT_OFFSET = %d 219 220tok_name = {value: name 221 for name, value in globals().items() 222 if isinstance(value, int) and not name.startswith('_')} 223__all__.extend(tok_name.values()) 224 225EXACT_TOKEN_TYPES = { 226%s 227} 228 229def ISTERMINAL(x): 230 return x < NT_OFFSET 231 232def ISNONTERMINAL(x): 233 return x >= NT_OFFSET 234 235def ISEOF(x): 236 return x == ENDMARKER 237''' 238 239def make_py(infile, outfile='Lib/token.py'): 240 tok_names, ERRORTOKEN, string_to_tok = load_tokens(infile) 241 242 constants = [] 243 for value, name in enumerate(tok_names): 244 constants.append('%s = %d' % (name, value)) 245 constants.insert(ERRORTOKEN, 246 "# These aren't used by the C tokenizer but are needed for tokenize.py") 247 248 token_types = [] 249 for s, value in sorted(string_to_tok.items()): 250 token_types.append(' %r: %s,' % (s, tok_names[value])) 251 252 if update_file(outfile, token_py_template % ( 253 '\n'.join(constants), 254 len(tok_names), 255 NT_OFFSET, 256 '\n'.join(token_types), 257 )): 258 print("%s regenerated from %s" % (outfile, infile)) 259 260 261def main(op, infile='Grammar/Tokens', *args): 262 make = globals()['make_' + op] 263 make(infile, *args) 264 265 266if __name__ == '__main__': 267 import sys 268 main(*sys.argv[1:]) 269