1#!/usr/bin/python 2"""Utility to generate files to benchmark""" 3 4# Copyright Abel Sinkovics (abel@sinkovics.hu) 2016. 5# Distributed under the Boost Software License, Version 1.0. 6# (See accompanying file LICENSE_1_0.txt or copy at 7# http://www.boost.org/LICENSE_1_0.txt) 8 9import argparse 10import os 11import string 12import random 13import re 14import json 15 16import Cheetah.Template 17import chars 18 19 20def regex_to_error_msg(regex): 21 """Format a human-readable error message from a regex""" 22 return re.sub('([^\\\\])[()]', '\\1', regex) \ 23 .replace('[ \t]*$', '') \ 24 .replace('^', '') \ 25 .replace('$', '') \ 26 .replace('[ \t]*', ' ') \ 27 .replace('[ \t]+', ' ') \ 28 .replace('[0-9]+', 'X') \ 29 \ 30 .replace('\\[', '[') \ 31 .replace('\\]', ']') \ 32 .replace('\\(', '(') \ 33 .replace('\\)', ')') \ 34 .replace('\\.', '.') 35 36 37def mkdir_p(path): 38 """mkdir -p path""" 39 try: 40 os.makedirs(path) 41 except OSError: 42 pass 43 44 45def in_comment(regex): 46 """Builds a regex matching "regex" in a comment""" 47 return '^[ \t]*//[ \t]*' + regex + '[ \t]*$' 48 49 50def random_chars(number): 51 """Generate random characters""" 52 char_map = { 53 k: v for k, v in chars.CHARS.iteritems() 54 if not format_character(k).startswith('\\x') 55 } 56 57 char_num = sum(char_map.values()) 58 return ( 59 format_character(nth_char(char_map, random.randint(0, char_num - 1))) 60 for _ in xrange(0, number) 61 ) 62 63 64def random_string(length): 65 """Generate a random string or character list depending on the mode""" 66 return \ 67 'BOOST_METAPARSE_STRING("{0}")'.format(''.join(random_chars(length))) 68 69 70class Mode(object): 71 """Represents a generation mode""" 72 73 def __init__(self, name): 74 self.name = name 75 if name == 'BOOST_METAPARSE_STRING': 76 self.identifier = 'bmp' 77 elif name == 'manual': 78 self.identifier = 'man' 79 else: 80 raise Exception('Invalid mode: {0}'.format(name)) 81 82 def description(self): 83 """The description of the mode""" 84 if self.identifier == 'bmp': 85 return 'Using BOOST_METAPARSE_STRING' 86 elif self.identifier == 'man': 87 return 'Generating strings manually' 88 89 def convert_from(self, base): 90 """Convert a BOOST_METAPARSE_STRING mode document into one with 91 this mode""" 92 if self.identifier == 'bmp': 93 return base 94 elif self.identifier == 'man': 95 result = [] 96 prefix = 'BOOST_METAPARSE_STRING("' 97 while True: 98 bmp_at = base.find(prefix) 99 if bmp_at == -1: 100 return ''.join(result) + base 101 else: 102 result.append( 103 base[0:bmp_at] + '::boost::metaparse::string<' 104 ) 105 new_base = '' 106 was_backslash = False 107 comma = '' 108 for i in xrange(bmp_at + len(prefix), len(base)): 109 if was_backslash: 110 result.append( 111 '{0}\'\\{1}\''.format(comma, base[i]) 112 ) 113 was_backslash = False 114 comma = ',' 115 elif base[i] == '"': 116 new_base = base[i+2:] 117 break 118 elif base[i] == '\\': 119 was_backslash = True 120 else: 121 result.append('{0}\'{1}\''.format(comma, base[i])) 122 comma = ',' 123 base = new_base 124 result.append('>') 125 126 127class Template(object): 128 """Represents a loaded template""" 129 130 def __init__(self, name, content): 131 self.name = name 132 self.content = content 133 134 def instantiate(self, value_of_n): 135 """Instantiates the template""" 136 template = Cheetah.Template.Template( 137 self.content, 138 searchList={'n': value_of_n} 139 ) 140 template.random_string = random_string 141 return str(template) 142 143 def range(self): 144 """Returns the range for N""" 145 match = self._match(in_comment( 146 'n[ \t]+in[ \t]*\\[([0-9]+)\\.\\.([0-9]+)\\),[ \t]+' 147 'step[ \t]+([0-9]+)' 148 )) 149 return range( 150 int(match.group(1)), 151 int(match.group(2)), 152 int(match.group(3)) 153 ) 154 155 def property(self, name): 156 """Parses and returns a property""" 157 return self._get_line(in_comment(name + ':[ \t]*(.*)')) 158 159 def modes(self): 160 """Returns the list of generation modes""" 161 return [Mode(s.strip()) for s in self.property('modes').split(',')] 162 163 def _match(self, regex): 164 """Find the first line matching regex and return the match object""" 165 cregex = re.compile(regex) 166 for line in self.content.splitlines(): 167 match = cregex.match(line) 168 if match: 169 return match 170 raise Exception('No "{0}" line in {1}.cpp'.format( 171 regex_to_error_msg(regex), 172 self.name 173 )) 174 175 def _get_line(self, regex): 176 """Get a line based on a regex""" 177 return self._match(regex).group(1) 178 179 180def load_file(path): 181 """Returns the content of the file""" 182 with open(path, 'rb') as in_file: 183 return in_file.read() 184 185 186def templates_in(path): 187 """Enumerate the templates found in path""" 188 ext = '.cpp' 189 return ( 190 Template(f[0:-len(ext)], load_file(os.path.join(path, f))) 191 for f in os.listdir(path) if f.endswith(ext) 192 ) 193 194 195def nth_char(char_map, index): 196 """Returns the nth character of a character->occurrence map""" 197 for char in char_map: 198 if index < char_map[char]: 199 return char 200 index = index - char_map[char] 201 return None 202 203 204def format_character(char): 205 """Returns the C-formatting of the character""" 206 if \ 207 char in string.ascii_letters \ 208 or char in string.digits \ 209 or char in [ 210 '_', '.', ':', ';', ' ', '!', '?', '+', '-', '/', '=', '<', 211 '>', '$', '(', ')', '@', '~', '`', '|', '#', '[', ']', '{', 212 '}', '&', '*', '^', '%']: 213 return char 214 elif char in ['"', '\'', '\\']: 215 return '\\{0}'.format(char) 216 elif char == '\n': 217 return '\\n' 218 elif char == '\r': 219 return '\\r' 220 elif char == '\t': 221 return '\\t' 222 else: 223 return '\\x{:02x}'.format(ord(char)) 224 225 226def write_file(filename, content): 227 """Create the file with the given content""" 228 print 'Generating {0}'.format(filename) 229 with open(filename, 'wb') as out_f: 230 out_f.write(content) 231 232 233def out_filename(template, n_val, mode): 234 """Determine the output filename""" 235 return '{0}_{1}_{2}.cpp'.format(template.name, n_val, mode.identifier) 236 237 238def main(): 239 """The main function of the script""" 240 desc = 'Generate files to benchmark' 241 parser = argparse.ArgumentParser(description=desc) 242 parser.add_argument( 243 '--src', 244 dest='src_dir', 245 default='src', 246 help='The directory containing the templates' 247 ) 248 parser.add_argument( 249 '--out', 250 dest='out_dir', 251 default='generated', 252 help='The output directory' 253 ) 254 parser.add_argument( 255 '--seed', 256 dest='seed', 257 default='13', 258 help='The random seed (to ensure consistent regeneration)' 259 ) 260 261 args = parser.parse_args() 262 263 random.seed(int(args.seed)) 264 265 mkdir_p(args.out_dir) 266 267 for template in templates_in(args.src_dir): 268 modes = template.modes() 269 270 n_range = template.range() 271 for n_value in n_range: 272 base = template.instantiate(n_value) 273 for mode in modes: 274 write_file( 275 os.path.join( 276 args.out_dir, 277 out_filename(template, n_value, mode) 278 ), 279 mode.convert_from(base) 280 ) 281 write_file( 282 os.path.join(args.out_dir, '{0}.json'.format(template.name)), 283 json.dumps({ 284 'files': { 285 n: { 286 m.identifier: out_filename(template, n, m) 287 for m in modes 288 } for n in n_range 289 }, 290 'name': template.name, 291 'x_axis_label': template.property('x_axis_label'), 292 'desc': template.property('desc'), 293 'modes': {m.identifier: m.description() for m in modes} 294 }) 295 ) 296 297 298if __name__ == '__main__': 299 main() 300