• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1#!/usr/bin/env python
2# coding=utf-8
3
4# amalgamate.py - Amalgamate C source and header files.
5# Copyright (c) 2012, Erik Edlund <erik.edlund@32767.se>
6#
7# Redistribution and use in source and binary forms, with or without modification,
8# are permitted provided that the following conditions are met:
9#
10#  * Redistributions of source code must retain the above copyright notice,
11#  this list of conditions and the following disclaimer.
12#
13#  * Redistributions in binary form must reproduce the above copyright notice,
14#  this list of conditions and the following disclaimer in the documentation
15#  and/or other materials provided with the distribution.
16#
17#  * Neither the name of Erik Edlund, nor the names of its contributors may
18#  be used to endorse or promote products derived from this software without
19#  specific prior written permission.
20#
21# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
22# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
23# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
24# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
25# ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
26# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
27# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
28# ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
30# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31
32from __future__ import division
33from __future__ import print_function
34from __future__ import unicode_literals
35
36import argparse
37import datetime
38import json
39import os
40import re
41
42
43class Amalgamation(object):
44
45    # Prepends self.source_path to file_path if needed.
46    def actual_path(self, file_path):
47        if not os.path.isabs(file_path):
48            file_path = os.path.join(self.source_path, file_path)
49        return file_path
50
51    # Search included file_path in self.include_paths and
52    # in source_dir if specified.
53    def find_included_file(self, file_path, source_dir):
54        search_dirs = self.include_paths[:]
55        if source_dir:
56            search_dirs.insert(0, source_dir)
57
58        for search_dir in search_dirs:
59            search_path = os.path.join(search_dir, file_path)
60            if os.path.isfile(self.actual_path(search_path)):
61                return search_path
62        return None
63
64    def __init__(self, args):
65        with open(args.config, 'r') as f:
66            config = json.loads(f.read())
67            for key in config:
68                setattr(self, key, config[key])
69
70            self.verbose = args.verbose == "yes"
71            self.prologue = args.prologue
72            self.source_path = args.source_path
73            self.included_files = []
74
75    # Generate the amalgamation and write it to the target file.
76    def generate(self):
77        amalgamation = ""
78
79        if self.prologue:
80            with open(self.prologue, 'r') as f:
81                amalgamation += datetime.datetime.now().strftime(f.read())
82
83        if self.verbose:
84            print("Config:")
85            print(" target        = {0}".format(self.target))
86            print(" working_dir   = {0}".format(os.getcwd()))
87            print(" include_paths = {0}".format(self.include_paths))
88        print("Creating amalgamation:")
89        for file_path in self.sources:
90            # Do not check the include paths while processing the source
91            # list, all given source paths must be correct.
92            # actual_path = self.actual_path(file_path)
93            print(" - processing \"{0}\"".format(file_path))
94            t = TranslationUnit(file_path, self, True)
95            amalgamation += t.content
96
97        with open(self.target, 'w') as f:
98            f.write(amalgamation)
99
100        print("...done!\n")
101        if self.verbose:
102            print("Files processed: {0}".format(self.sources))
103            print("Files included: {0}".format(self.included_files))
104        print("")
105
106
107def _is_within(match, matches):
108    for m in matches:
109        if match.start() > m.start() and \
110                match.end() < m.end():
111            return True
112    return False
113
114
115class TranslationUnit(object):
116    # // C++ comment.
117    cpp_comment_pattern = re.compile(r"//.*?\n")
118
119    # /* C comment. */
120    c_comment_pattern = re.compile(r"/\*.*?\*/", re.S)
121
122    # "complex \"stri\\\ng\" value".
123    string_pattern = re.compile("[^']" r'".*?(?<=[^\\])"', re.S)
124
125    # Handle simple include directives. Support for advanced
126    # directives where macros and defines needs to expanded is
127    # not a concern right now.
128    include_pattern = re.compile(
129        r'#\s*include\s+(<|")(?P<path>.*?)("|>)', re.S)
130
131    # #pragma once
132    pragma_once_pattern = re.compile(r'#\s*pragma\s+once', re.S)
133
134    # Search for pattern in self.content, add the match to
135    # contexts if found and update the index accordingly.
136    def _search_content(self, index, pattern, contexts):
137        match = pattern.search(self.content, index)
138        if match:
139            contexts.append(match)
140            return match.end()
141        return index + 2
142
143    # Return all the skippable contexts, i.e., comments and strings
144    def _find_skippable_contexts(self):
145        # Find contexts in the content in which a found include
146        # directive should not be processed.
147        skippable_contexts = []
148
149        # Walk through the content char by char, and try to grab
150        # skippable contexts using regular expressions when found.
151        i = 1
152        content_len = len(self.content)
153        while i < content_len:
154            j = i - 1
155            current = self.content[i]
156            previous = self.content[j]
157
158            if current == '"':
159                # String value.
160                i = self._search_content(j, self.string_pattern,
161                                         skippable_contexts)
162            elif current == '*' and previous == '/':
163                # C style comment.
164                i = self._search_content(j, self.c_comment_pattern,
165                                         skippable_contexts)
166            elif current == '/' and previous == '/':
167                # C++ style comment.
168                i = self._search_content(j, self.cpp_comment_pattern,
169                                         skippable_contexts)
170            else:
171                # Skip to the next char.
172                i += 1
173
174        return skippable_contexts
175
176    # Returns True if the match is within list of other matches
177
178    # Removes pragma once from content
179    def _process_pragma_once(self):
180        content_len = len(self.content)
181        if content_len < len("#include <x>"):
182            return 0
183
184        # Find contexts in the content in which a found include
185        # directive should not be processed.
186        skippable_contexts = self._find_skippable_contexts()
187
188        pragmas = []
189        pragma_once_match = self.pragma_once_pattern.search(self.content)
190        while pragma_once_match:
191            if not _is_within(pragma_once_match, skippable_contexts):
192                pragmas.append(pragma_once_match)
193
194            pragma_once_match = self.pragma_once_pattern.search(self.content,
195                                                                pragma_once_match.end())
196
197        # Handle all collected pragma once directives.
198        prev_end = 0
199        tmp_content = ''
200        for pragma_match in pragmas:
201            tmp_content += self.content[prev_end:pragma_match.start()]
202            prev_end = pragma_match.end()
203        tmp_content += self.content[prev_end:]
204        self.content = tmp_content
205
206    # Include all trivial #include directives into self.content.
207    def _process_includes(self):
208        content_len = len(self.content)
209        if content_len < len("#include <x>"):
210            return 0
211
212        # Find contexts in the content in which a found include
213        # directive should not be processed.
214        skippable_contexts = self._find_skippable_contexts()
215
216        # Search for include directives in the content, collect those
217        # which should be included into the content.
218        includes = []
219        include_match = self.include_pattern.search(self.content)
220        while include_match:
221            if not _is_within(include_match, skippable_contexts):
222                include_path = include_match.group("path")
223                search_same_dir = include_match.group(1) == '"'
224                found_included_path = self.amalgamation.find_included_file(
225                    include_path, self.file_dir if search_same_dir else None)
226                if found_included_path:
227                    includes.append((include_match, found_included_path))
228
229            include_match = self.include_pattern.search(self.content,
230                                                        include_match.end())
231
232        # Handle all collected include directives.
233        prev_end = 0
234        tmp_content = ''
235        for include in includes:
236            include_match, found_included_path = include
237            tmp_content += self.content[prev_end:include_match.start()]
238            tmp_content += "// {0}\n".format(include_match.group(0))
239            if found_included_path not in self.amalgamation.included_files:
240                t = TranslationUnit(found_included_path, self.amalgamation, False)
241                tmp_content += t.content
242            prev_end = include_match.end()
243        tmp_content += self.content[prev_end:]
244        self.content = tmp_content
245
246        return len(includes)
247
248    # Make all content processing
249    def _process(self):
250        if not self.is_root:
251            self._process_pragma_once()
252        self._process_includes()
253
254    def __init__(self, file_path, amalgamation, is_root):
255        self.file_path = file_path
256        self.file_dir = os.path.dirname(file_path)
257        self.amalgamation = amalgamation
258        self.is_root = is_root
259
260        self.amalgamation.included_files.append(self.file_path)
261
262        actual_path = self.amalgamation.actual_path(file_path)
263        if not os.path.isfile(actual_path):
264            raise IOError("File not found: \"{0}\"".format(file_path))
265        with open(actual_path, 'r') as f:
266            self.content = f.read()
267            self._process()
268
269
270def main():
271    description = "Amalgamate C source and header files."
272    usage = " ".join([
273        "amalgamate.py",
274        "[-v]",
275        "-c path/to/config.json",
276        "-s path/to/source/dir",
277        "[-p path/to/prologue.(c|h)]"
278    ])
279    argsparser = argparse.ArgumentParser(
280        description=description, usage=usage)
281
282    argsparser.add_argument("-v", "--verbose", dest="verbose",
283                            choices=["yes", "no"], metavar="", help="be verbose")
284
285    argsparser.add_argument("-c", "--config", dest="config",
286                            required=True, metavar="", help="path to a JSON config file")
287
288    argsparser.add_argument("-s", "--source", dest="source_path",
289                            required=True, metavar="", help="source code path")
290
291    argsparser.add_argument("-p", "--prologue", dest="prologue",
292                            required=False, metavar="", help="path to a C prologue file")
293
294    amalgamation = Amalgamation(argsparser.parse_args())
295    amalgamation.generate()
296
297
298if __name__ == "__main__":
299    main()
300