• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1"""
2modulegraph.find_modules - High-level module dependency finding interface
3=========================================================================
4
5History
6........
7
8Originally (loosely) based on code in py2exe's build_exe.py by Thomas Heller.
9"""
10from __future__ import absolute_import
11
12import sys
13import os
14import imp
15import warnings
16
17import modulegraph.modulegraph as modulegraph
18from modulegraph.modulegraph import Alias, Script, Extension
19from modulegraph.util import imp_find_module
20
21__all__ = [
22    'find_modules', 'parse_mf_results'
23]
24
25def get_implies():
26    result = {
27        # imports done from builtin modules in C code (untrackable by modulegraph)
28        "_curses":      ["curses"],
29        "posix":        ["resource"],
30        "gc":           ["time"],
31        "time":         ["_strptime"],
32        "datetime":     ["time"],
33        "MacOS":        ["macresource"],
34        "cPickle":      ["copy_reg", "cStringIO"],
35        "parser":       ["copy_reg"],
36        "codecs":       ["encodings"],
37        "cStringIO":    ["copy_reg"],
38        "_sre":         ["copy", "string", "sre"],
39        "zipimport":    ["zlib"],
40
41        # Python 3.2:
42        "_datetime":    ["time", "_strptime"],
43        "_json":        ["json.decoder"],
44        "_pickle":      ["codecs", "copyreg", "_compat_pickle"],
45        "_posixsubprocess": ["gc"],
46        "_ssl":         ["socket"],
47
48        # Python 3.3:
49        "_elementtree": ["copy", "xml.etree.ElementPath" ],
50
51        # mactoolboxglue can do a bunch more of these
52        # that are far harder to predict, these should be tracked
53        # manually for now.
54
55        # this isn't C, but it uses __import__
56        "anydbm":       ["dbhash", "gdbm", "dbm", "dumbdbm", "whichdb"],
57        # package aliases
58        "wxPython.wx":  Alias('wx'),
59
60    }
61
62    if sys.version_info[0] == 3:
63        result["_sre"] = ["copy", "re"]
64        result["parser"] = ["copyreg"]
65
66        # _frozen_importlib is part of the interpreter itself
67        result["_frozen_importlib"] = None
68
69    if sys.version_info[0] == 2 and sys.version_info[1] >= 5:
70        result.update({
71            "email.base64MIME":         Alias("email.base64mime"),
72            "email.Charset":            Alias("email.charset"),
73            "email.Encoders":           Alias("email.encoders"),
74            "email.Errors":             Alias("email.errors"),
75            "email.Feedparser":         Alias("email.feedParser"),
76            "email.Generator":          Alias("email.generator"),
77            "email.Header":             Alias("email.header"),
78            "email.Iterators":          Alias("email.iterators"),
79            "email.Message":            Alias("email.message"),
80            "email.Parser":             Alias("email.parser"),
81            "email.quopriMIME":         Alias("email.quoprimime"),
82            "email.Utils":              Alias("email.utils"),
83            "email.MIMEAudio":          Alias("email.mime.audio"),
84            "email.MIMEBase":           Alias("email.mime.base"),
85            "email.MIMEImage":          Alias("email.mime.image"),
86            "email.MIMEMessage":        Alias("email.mime.message"),
87            "email.MIMEMultipart":      Alias("email.mime.multipart"),
88            "email.MIMENonMultipart":   Alias("email.mime.nonmultipart"),
89            "email.MIMEText":           Alias("email.mime.text"),
90        })
91
92    if sys.version_info[:2] >= (2, 5):
93        result["_elementtree"] = ["pyexpat"]
94
95        import xml.etree
96        files = os.listdir(xml.etree.__path__[0])
97        for fn in files:
98            if fn.endswith('.py') and fn != "__init__.py":
99                result["_elementtree"].append("xml.etree.%s"%(fn[:-3],))
100
101    if sys.version_info[:2] >= (2, 6):
102        result['future_builtins'] = ['itertools']
103
104    # os.path is an alias for a platform specific submodule,
105    # ensure that the graph shows this.
106    result['os.path'] = Alias(os.path.__name__)
107
108
109    return result
110
111def parse_mf_results(mf):
112    """
113    Return two lists: the first one contains the python files in the graph,
114    the second the C extensions.
115
116    :param mf: a :class:`modulegraph.modulegraph.ModuleGraph` instance
117    """
118    #for name, imports in get_hidden_imports().items():
119    #    if name in mf.modules.keys():
120    #        for mod in imports:
121    #            mf.import_hook(mod)
122
123    # Retrieve modules from modulegraph
124    py_files = []
125    extensions = []
126
127    for item in mf.flatten():
128        # There may be __main__ modules (from mf.run_script), but
129        # we don't need it in the zipfile we build.
130        if item.identifier == "__main__":
131            continue
132        src = item.filename
133        if src and src != '-':
134            if isinstance(item, Script):
135                # Scripts are python files
136                py_files.append(item)
137
138            elif isinstance(item, Extension):
139                extensions.append(item)
140
141            else:
142                py_files.append(item)
143
144    # sort on the file names, the output is nicer to read
145    py_files.sort(key=lambda v: v.filename)
146    extensions.sort(key=lambda v: v.filename)
147    return py_files, extensions
148
149
150def plat_prepare(includes, packages, excludes):
151    # used by Python itself
152    includes.update(["warnings", "unicodedata", "weakref"])
153
154    #if os.uname()[0] != 'java':
155        # Jython specific imports in the stdlib:
156        #excludes.update([
157        #    'java.lang',
158        #    'org.python.core',
159        #])
160
161    if not sys.platform.startswith('irix'):
162        excludes.update([
163            'AL',
164            'sgi',
165            'vms_lib',
166        ])
167
168    if not sys.platform in ('mac', 'darwin'):
169        # XXX - this doesn't look nearly complete
170        excludes.update([
171            'Audio_mac',
172            'Carbon.File',
173            'Carbon.Folder',
174            'Carbon.Folders',
175            'EasyDialogs',
176            'MacOS',
177            'macfs',
178            'macostools',
179            #'macpath',
180            '_scproxy',
181        ])
182
183    if not sys.platform == 'win32':
184        # only win32
185        excludes.update([
186            #'ntpath',
187            'nturl2path',
188            'win32api',
189            'win32con',
190            'win32event',
191            'win32evtlogutil',
192            'win32evtlog',
193            'win32file',
194            'win32gui',
195            'win32pipe',
196            'win32process',
197            'win32security',
198            'pywintypes',
199            'winsound',
200            'win32',
201            '_winreg',
202            '_winapi',
203            'msvcrt',
204            'winreg',
205            '_subprocess',
206         ])
207
208    if not sys.platform == 'riscos':
209        excludes.update([
210             'riscosenviron',
211             #'riscospath',
212             'rourl2path',
213          ])
214
215    if not sys.platform == 'dos' or sys.platform.startswith('ms-dos'):
216        excludes.update([
217            'dos',
218        ])
219
220    if not sys.platform == 'os2emx':
221        excludes.update([
222            #'os2emxpath',
223            '_emx_link',
224        ])
225
226    excludes.update(set(['posix', 'nt', 'os2', 'mac', 'ce', 'riscos']) - set(sys.builtin_module_names))
227
228    # Carbon.Res depends on this, but the module hasn't been present
229    # for a while...
230    excludes.add('OverrideFrom23')
231    excludes.add('OverrideFrom23._Res')
232
233    # import trickery in the dummy_threading module (stdlib)
234    excludes.add('_dummy_threading')
235
236    try:
237        imp_find_module('poll')
238    except ImportError:
239        excludes.update([
240            'poll',
241        ])
242
243def find_needed_modules(mf=None, scripts=(), includes=(), packages=(), warn=warnings.warn):
244    if mf is None:
245        mf = modulegraph.ModuleGraph()
246    # feed Modulefinder with everything, and return it.
247
248    for path in scripts:
249        mf.run_script(path)
250
251    for mod in includes:
252        try:
253            if mod[-2:] == '.*':
254                mf.import_hook(mod[:-2], None, ['*'])
255            else:
256                mf.import_hook(mod)
257        except ImportError:
258            warn("No module named %s"%(mod,))
259
260    for f in packages:
261        # If modulegraph has seen a reference to the package, then
262        # we prefer to believe that (imp_find_module doesn't seem to locate
263        # sub-packages)
264        m = mf.findNode(f)
265        if m is not None:
266            path = m.packagepath[0]
267        else:
268            # Find path of package
269            # TODO: use imp_find_module_or_importer
270            try:
271                path = imp_find_module(f, mf.path)[1]
272            except ImportError:
273                warn("No package named %s" % f)
274                continue
275
276        # walk the path to find subdirs containing __init__.py files
277        # scan the results (directory of __init__.py files)
278        # first trim the path (of the head package),
279        # then convert directory name in package name,
280        # finally push into modulegraph.
281        # FIXME:
282        # 1) Needs to be adjusted for namespace packages in python 3.3
283        # 2) Code is fairly dodgy and needs better tests
284        for (dirpath, dirnames, filenames) in os.walk(path):
285            if '__init__.py' in filenames and dirpath.startswith(path):
286                package = f + '.' + dirpath[len(path)+1:].replace(os.sep, '.')
287                if package.endswith('.'):
288                    package = package[:-1]
289                m = mf.import_hook(package, None, ["*"])
290            else:
291                # Exclude subtrees that aren't packages
292                dirnames[:] = []
293
294
295    return mf
296
297#
298# resource constants
299#
300PY_SUFFIXES = ['.py', '.pyw', '.pyo', '.pyc']
301C_SUFFIXES = [
302    _triple[0] for _triple in imp.get_suffixes()
303    if _triple[2] == imp.C_EXTENSION
304]
305
306#
307# side-effects
308#
309
310def _replacePackages():
311    REPLACEPACKAGES = {
312        '_xmlplus':     'xml',
313    }
314    for k,v in REPLACEPACKAGES.items():
315        modulegraph.replacePackage(k, v)
316
317_replacePackages()
318
319def find_modules(scripts=(), includes=(), packages=(), excludes=(), path=None, debug=0):
320    """
321    High-level interface, takes iterables for:
322        scripts, includes, packages, excludes
323
324    And returns a :class:`modulegraph.modulegraph.ModuleGraph` instance,
325    python_files, and extensions
326
327    python_files is a list of pure python dependencies as modulegraph.Module objects,
328    extensions is a list of platform-specific C extension dependencies as modulegraph.Module objects
329    """
330    scripts = set(scripts)
331    includes = set(includes)
332    packages = set(packages)
333    excludes = set(excludes)
334    plat_prepare(includes, packages, excludes)
335    mf = modulegraph.ModuleGraph(
336        path=path,
337        excludes=(excludes - includes),
338        implies=get_implies(),
339        debug=debug,
340    )
341    find_needed_modules(mf, scripts, includes, packages)
342    return mf
343
344def test():
345    if '-g' in sys.argv[1:]:
346        sys.argv.remove('-g')
347        dograph = True
348    else:
349        dograph = False
350    if '-x' in sys.argv[1:]:
351        sys.argv.remove('-x')
352        doxref = True
353    else:
354        doxref= False
355
356    scripts = sys.argv[1:] or [__file__]
357    mf = find_modules(scripts=scripts)
358    if doxref:
359        mf.create_xref()
360    elif dograph:
361        mf.graphreport()
362    else:
363        mf.report()
364
365if __name__ == '__main__':
366    test()
367