1#! /usr/bin/env python3 2# This script generates token related files from Grammar/Tokens: 3# 4# Doc/library/token-list.inc 5# Include/token.h 6# Parser/token.c 7# Lib/token.py 8 9 10NT_OFFSET = 256 11 12def load_tokens(path): 13 tok_names = [] 14 string_to_tok = {} 15 ERRORTOKEN = None 16 with open(path) as fp: 17 for line in fp: 18 line = line.strip() 19 # strip comments 20 i = line.find('#') 21 if i >= 0: 22 line = line[:i].strip() 23 if not line: 24 continue 25 fields = line.split() 26 name = fields[0] 27 value = len(tok_names) 28 if name == 'ERRORTOKEN': 29 ERRORTOKEN = value 30 string = fields[1] if len(fields) > 1 else None 31 if string: 32 string = eval(string) 33 string_to_tok[string] = value 34 tok_names.append(name) 35 return tok_names, ERRORTOKEN, string_to_tok 36 37 38def update_file(file, content): 39 try: 40 with open(file, 'r') as fobj: 41 if fobj.read() == content: 42 return False 43 except (OSError, ValueError): 44 pass 45 with open(file, 'w') as fobj: 46 fobj.write(content) 47 return True 48 49 50token_h_template = """\ 51/* Auto-generated by Tools/scripts/generate_token.py */ 52 53/* Token types */ 54#ifndef Py_LIMITED_API 55#ifndef Py_TOKEN_H 56#define Py_TOKEN_H 57#ifdef __cplusplus 58extern "C" { 59#endif 60 61#undef TILDE /* Prevent clash of our definition with system macro. Ex AIX, ioctl.h */ 62 63%s\ 64#define N_TOKENS %d 65#define NT_OFFSET %d 66 67/* Special definitions for cooperation with parser */ 68 69#define ISTERMINAL(x) ((x) < NT_OFFSET) 70#define ISNONTERMINAL(x) ((x) >= NT_OFFSET) 71#define ISEOF(x) ((x) == ENDMARKER) 72#define ISWHITESPACE(x) ((x) == ENDMARKER || \\ 73 (x) == NEWLINE || \\ 74 (x) == INDENT || \\ 75 (x) == DEDENT) 76 77 78PyAPI_DATA(const char * const) _PyParser_TokenNames[]; /* Token names */ 79PyAPI_FUNC(int) PyToken_OneChar(int); 80PyAPI_FUNC(int) PyToken_TwoChars(int, int); 81PyAPI_FUNC(int) PyToken_ThreeChars(int, int, int); 82 83#ifdef __cplusplus 84} 85#endif 86#endif /* !Py_TOKEN_H */ 87#endif /* Py_LIMITED_API */ 88""" 89 90def make_h(infile, outfile='Include/token.h'): 91 tok_names, ERRORTOKEN, string_to_tok = load_tokens(infile) 92 93 defines = [] 94 for value, name in enumerate(tok_names[:ERRORTOKEN + 1]): 95 defines.append("#define %-15s %d\n" % (name, value)) 96 97 if update_file(outfile, token_h_template % ( 98 ''.join(defines), 99 len(tok_names), 100 NT_OFFSET 101 )): 102 print("%s regenerated from %s" % (outfile, infile)) 103 104 105token_c_template = """\ 106/* Auto-generated by Tools/scripts/generate_token.py */ 107 108#include "Python.h" 109#include "token.h" 110 111/* Token names */ 112 113const char * const _PyParser_TokenNames[] = { 114%s\ 115}; 116 117/* Return the token corresponding to a single character */ 118 119int 120PyToken_OneChar(int c1) 121{ 122%s\ 123 return OP; 124} 125 126int 127PyToken_TwoChars(int c1, int c2) 128{ 129%s\ 130 return OP; 131} 132 133int 134PyToken_ThreeChars(int c1, int c2, int c3) 135{ 136%s\ 137 return OP; 138} 139""" 140 141def generate_chars_to_token(mapping, n=1): 142 result = [] 143 write = result.append 144 indent = ' ' * n 145 write(indent) 146 write('switch (c%d) {\n' % (n,)) 147 for c in sorted(mapping): 148 write(indent) 149 value = mapping[c] 150 if isinstance(value, dict): 151 write("case '%s':\n" % (c,)) 152 write(generate_chars_to_token(value, n + 1)) 153 write(indent) 154 write(' break;\n') 155 else: 156 write("case '%s': return %s;\n" % (c, value)) 157 write(indent) 158 write('}\n') 159 return ''.join(result) 160 161def make_c(infile, outfile='Parser/token.c'): 162 tok_names, ERRORTOKEN, string_to_tok = load_tokens(infile) 163 string_to_tok['<>'] = string_to_tok['!='] 164 chars_to_token = {} 165 for string, value in string_to_tok.items(): 166 assert 1 <= len(string) <= 3 167 name = tok_names[value] 168 m = chars_to_token.setdefault(len(string), {}) 169 for c in string[:-1]: 170 m = m.setdefault(c, {}) 171 m[string[-1]] = name 172 173 names = [] 174 for value, name in enumerate(tok_names): 175 if value >= ERRORTOKEN: 176 name = '<%s>' % name 177 names.append(' "%s",\n' % name) 178 names.append(' "<N_TOKENS>",\n') 179 180 if update_file(outfile, token_c_template % ( 181 ''.join(names), 182 generate_chars_to_token(chars_to_token[1]), 183 generate_chars_to_token(chars_to_token[2]), 184 generate_chars_to_token(chars_to_token[3]) 185 )): 186 print("%s regenerated from %s" % (outfile, infile)) 187 188 189token_inc_template = """\ 190.. Auto-generated by Tools/scripts/generate_token.py 191%s 192.. data:: N_TOKENS 193 194.. data:: NT_OFFSET 195""" 196 197def make_rst(infile, outfile='Doc/library/token-list.inc'): 198 tok_names, ERRORTOKEN, string_to_tok = load_tokens(infile) 199 tok_to_string = {value: s for s, value in string_to_tok.items()} 200 201 names = [] 202 for value, name in enumerate(tok_names[:ERRORTOKEN + 1]): 203 names.append('.. data:: %s' % (name,)) 204 if value in tok_to_string: 205 names.append('') 206 names.append(' Token value for ``"%s"``.' % tok_to_string[value]) 207 names.append('') 208 209 if update_file(outfile, token_inc_template % '\n'.join(names)): 210 print("%s regenerated from %s" % (outfile, infile)) 211 212 213token_py_template = '''\ 214"""Token constants.""" 215# Auto-generated by Tools/scripts/generate_token.py 216 217__all__ = ['tok_name', 'ISTERMINAL', 'ISNONTERMINAL', 'ISEOF'] 218 219%s 220N_TOKENS = %d 221# Special definitions for cooperation with parser 222NT_OFFSET = %d 223 224tok_name = {value: name 225 for name, value in globals().items() 226 if isinstance(value, int) and not name.startswith('_')} 227__all__.extend(tok_name.values()) 228 229EXACT_TOKEN_TYPES = { 230%s 231} 232 233def ISTERMINAL(x): 234 return x < NT_OFFSET 235 236def ISNONTERMINAL(x): 237 return x >= NT_OFFSET 238 239def ISEOF(x): 240 return x == ENDMARKER 241''' 242 243def make_py(infile, outfile='Lib/token.py'): 244 tok_names, ERRORTOKEN, string_to_tok = load_tokens(infile) 245 246 constants = [] 247 for value, name in enumerate(tok_names): 248 constants.append('%s = %d' % (name, value)) 249 constants.insert(ERRORTOKEN, 250 "# These aren't used by the C tokenizer but are needed for tokenize.py") 251 252 token_types = [] 253 for s, value in sorted(string_to_tok.items()): 254 token_types.append(' %r: %s,' % (s, tok_names[value])) 255 256 if update_file(outfile, token_py_template % ( 257 '\n'.join(constants), 258 len(tok_names), 259 NT_OFFSET, 260 '\n'.join(token_types), 261 )): 262 print("%s regenerated from %s" % (outfile, infile)) 263 264 265def main(op, infile='Grammar/Tokens', *args): 266 make = globals()['make_' + op] 267 make(infile, *args) 268 269 270if __name__ == '__main__': 271 import sys 272 main(*sys.argv[1:]) 273