1#!/usr/bin/env python 2# coding=utf-8 3 4# amalgamate.py - Amalgamate C source and header files. 5# Copyright (c) 2012, Erik Edlund <erik.edlund@32767.se> 6# 7# Redistribution and use in source and binary forms, with or without modification, 8# are permitted provided that the following conditions are met: 9# 10# * Redistributions of source code must retain the above copyright notice, 11# this list of conditions and the following disclaimer. 12# 13# * Redistributions in binary form must reproduce the above copyright notice, 14# this list of conditions and the following disclaimer in the documentation 15# and/or other materials provided with the distribution. 16# 17# * Neither the name of Erik Edlund, nor the names of its contributors may 18# be used to endorse or promote products derived from this software without 19# specific prior written permission. 20# 21# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 22# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 23# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 24# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 25# ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 26# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 27# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 28# ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 29# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 30# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 31 32from __future__ import division 33from __future__ import print_function 34from __future__ import unicode_literals 35 36import argparse 37import datetime 38import json 39import os 40import re 41 42 43class Amalgamation(object): 44 45 # Prepends self.source_path to file_path if needed. 46 def actual_path(self, file_path): 47 if not os.path.isabs(file_path): 48 file_path = os.path.join(self.source_path, file_path) 49 return file_path 50 51 # Search included file_path in self.include_paths and 52 # in source_dir if specified. 53 def find_included_file(self, file_path, source_dir): 54 search_dirs = self.include_paths[:] 55 if source_dir: 56 search_dirs.insert(0, source_dir) 57 58 for search_dir in search_dirs: 59 search_path = os.path.join(search_dir, file_path) 60 if os.path.isfile(self.actual_path(search_path)): 61 return search_path 62 return None 63 64 def __init__(self, args): 65 with open(args.config, 'r') as f: 66 config = json.loads(f.read()) 67 for key in config: 68 setattr(self, key, config[key]) 69 70 self.verbose = args.verbose == "yes" 71 self.prologue = args.prologue 72 self.source_path = args.source_path 73 self.included_files = [] 74 75 # Generate the amalgamation and write it to the target file. 76 def generate(self): 77 amalgamation = "" 78 79 if self.prologue: 80 with open(self.prologue, 'r') as f: 81 amalgamation += datetime.datetime.now().strftime(f.read()) 82 83 if self.verbose: 84 print("Config:") 85 print(" target = {0}".format(self.target)) 86 print(" working_dir = {0}".format(os.getcwd())) 87 print(" include_paths = {0}".format(self.include_paths)) 88 print("Creating amalgamation:") 89 for file_path in self.sources: 90 # Do not check the include paths while processing the source 91 # list, all given source paths must be correct. 92 # actual_path = self.actual_path(file_path) 93 print(" - processing \"{0}\"".format(file_path)) 94 t = TranslationUnit(file_path, self, True) 95 amalgamation += t.content 96 97 with open(self.target, 'w') as f: 98 f.write(amalgamation) 99 100 print("...done!\n") 101 if self.verbose: 102 print("Files processed: {0}".format(self.sources)) 103 print("Files included: {0}".format(self.included_files)) 104 print("") 105 106 107def _is_within(match, matches): 108 for m in matches: 109 if match.start() > m.start() and \ 110 match.end() < m.end(): 111 return True 112 return False 113 114 115class TranslationUnit(object): 116 # // C++ comment. 117 cpp_comment_pattern = re.compile(r"//.*?\n") 118 119 # /* C comment. */ 120 c_comment_pattern = re.compile(r"/\*.*?\*/", re.S) 121 122 # "complex \"stri\\\ng\" value". 123 string_pattern = re.compile("[^']" r'".*?(?<=[^\\])"', re.S) 124 125 # Handle simple include directives. Support for advanced 126 # directives where macros and defines needs to expanded is 127 # not a concern right now. 128 include_pattern = re.compile( 129 r'#\s*include\s+(<|")(?P<path>.*?)("|>)', re.S) 130 131 # #pragma once 132 pragma_once_pattern = re.compile(r'#\s*pragma\s+once', re.S) 133 134 # Search for pattern in self.content, add the match to 135 # contexts if found and update the index accordingly. 136 def _search_content(self, index, pattern, contexts): 137 match = pattern.search(self.content, index) 138 if match: 139 contexts.append(match) 140 return match.end() 141 return index + 2 142 143 # Return all the skippable contexts, i.e., comments and strings 144 def _find_skippable_contexts(self): 145 # Find contexts in the content in which a found include 146 # directive should not be processed. 147 skippable_contexts = [] 148 149 # Walk through the content char by char, and try to grab 150 # skippable contexts using regular expressions when found. 151 i = 1 152 content_len = len(self.content) 153 while i < content_len: 154 j = i - 1 155 current = self.content[i] 156 previous = self.content[j] 157 158 if current == '"': 159 # String value. 160 i = self._search_content(j, self.string_pattern, 161 skippable_contexts) 162 elif current == '*' and previous == '/': 163 # C style comment. 164 i = self._search_content(j, self.c_comment_pattern, 165 skippable_contexts) 166 elif current == '/' and previous == '/': 167 # C++ style comment. 168 i = self._search_content(j, self.cpp_comment_pattern, 169 skippable_contexts) 170 else: 171 # Skip to the next char. 172 i += 1 173 174 return skippable_contexts 175 176 # Returns True if the match is within list of other matches 177 178 # Removes pragma once from content 179 def _process_pragma_once(self): 180 content_len = len(self.content) 181 if content_len < len("#include <x>"): 182 return 0 183 184 # Find contexts in the content in which a found include 185 # directive should not be processed. 186 skippable_contexts = self._find_skippable_contexts() 187 188 pragmas = [] 189 pragma_once_match = self.pragma_once_pattern.search(self.content) 190 while pragma_once_match: 191 if not _is_within(pragma_once_match, skippable_contexts): 192 pragmas.append(pragma_once_match) 193 194 pragma_once_match = self.pragma_once_pattern.search(self.content, 195 pragma_once_match.end()) 196 197 # Handle all collected pragma once directives. 198 prev_end = 0 199 tmp_content = '' 200 for pragma_match in pragmas: 201 tmp_content += self.content[prev_end:pragma_match.start()] 202 prev_end = pragma_match.end() 203 tmp_content += self.content[prev_end:] 204 self.content = tmp_content 205 206 # Include all trivial #include directives into self.content. 207 def _process_includes(self): 208 content_len = len(self.content) 209 if content_len < len("#include <x>"): 210 return 0 211 212 # Find contexts in the content in which a found include 213 # directive should not be processed. 214 skippable_contexts = self._find_skippable_contexts() 215 216 # Search for include directives in the content, collect those 217 # which should be included into the content. 218 includes = [] 219 include_match = self.include_pattern.search(self.content) 220 while include_match: 221 if not _is_within(include_match, skippable_contexts): 222 include_path = include_match.group("path") 223 search_same_dir = include_match.group(1) == '"' 224 found_included_path = self.amalgamation.find_included_file( 225 include_path, self.file_dir if search_same_dir else None) 226 if found_included_path: 227 includes.append((include_match, found_included_path)) 228 229 include_match = self.include_pattern.search(self.content, 230 include_match.end()) 231 232 # Handle all collected include directives. 233 prev_end = 0 234 tmp_content = '' 235 for include in includes: 236 include_match, found_included_path = include 237 tmp_content += self.content[prev_end:include_match.start()] 238 tmp_content += "// {0}\n".format(include_match.group(0)) 239 if found_included_path not in self.amalgamation.included_files: 240 t = TranslationUnit(found_included_path, self.amalgamation, False) 241 tmp_content += t.content 242 prev_end = include_match.end() 243 tmp_content += self.content[prev_end:] 244 self.content = tmp_content 245 246 return len(includes) 247 248 # Make all content processing 249 def _process(self): 250 if not self.is_root: 251 self._process_pragma_once() 252 self._process_includes() 253 254 def __init__(self, file_path, amalgamation, is_root): 255 self.file_path = file_path 256 self.file_dir = os.path.dirname(file_path) 257 self.amalgamation = amalgamation 258 self.is_root = is_root 259 260 self.amalgamation.included_files.append(self.file_path) 261 262 actual_path = self.amalgamation.actual_path(file_path) 263 if not os.path.isfile(actual_path): 264 raise IOError("File not found: \"{0}\"".format(file_path)) 265 with open(actual_path, 'r') as f: 266 self.content = f.read() 267 self._process() 268 269 270def main(): 271 description = "Amalgamate C source and header files." 272 usage = " ".join([ 273 "amalgamate.py", 274 "[-v]", 275 "-c path/to/config.json", 276 "-s path/to/source/dir", 277 "[-p path/to/prologue.(c|h)]" 278 ]) 279 argsparser = argparse.ArgumentParser( 280 description=description, usage=usage) 281 282 argsparser.add_argument("-v", "--verbose", dest="verbose", 283 choices=["yes", "no"], metavar="", help="be verbose") 284 285 argsparser.add_argument("-c", "--config", dest="config", 286 required=True, metavar="", help="path to a JSON config file") 287 288 argsparser.add_argument("-s", "--source", dest="source_path", 289 required=True, metavar="", help="source code path") 290 291 argsparser.add_argument("-p", "--prologue", dest="prologue", 292 required=False, metavar="", help="path to a C prologue file") 293 294 amalgamation = Amalgamation(argsparser.parse_args()) 295 amalgamation.generate() 296 297 298if __name__ == "__main__": 299 main() 300