1""" 2modulegraph.find_modules - High-level module dependency finding interface 3========================================================================= 4 5History 6........ 7 8Originally (loosely) based on code in py2exe's build_exe.py by Thomas Heller. 9""" 10from __future__ import absolute_import 11 12import sys 13import os 14import imp 15import warnings 16 17import modulegraph.modulegraph as modulegraph 18from modulegraph.modulegraph import Alias, Script, Extension 19from modulegraph.util import imp_find_module 20 21__all__ = [ 22 'find_modules', 'parse_mf_results' 23] 24 25def get_implies(): 26 result = { 27 # imports done from builtin modules in C code (untrackable by modulegraph) 28 "_curses": ["curses"], 29 "posix": ["resource"], 30 "gc": ["time"], 31 "time": ["_strptime"], 32 "datetime": ["time"], 33 "MacOS": ["macresource"], 34 "cPickle": ["copy_reg", "cStringIO"], 35 "parser": ["copy_reg"], 36 "codecs": ["encodings"], 37 "cStringIO": ["copy_reg"], 38 "_sre": ["copy", "string", "sre"], 39 "zipimport": ["zlib"], 40 41 # Python 3.2: 42 "_datetime": ["time", "_strptime"], 43 "_json": ["json.decoder"], 44 "_pickle": ["codecs", "copyreg", "_compat_pickle"], 45 "_posixsubprocess": ["gc"], 46 "_ssl": ["socket"], 47 48 # Python 3.3: 49 "_elementtree": ["copy", "xml.etree.ElementPath" ], 50 51 # mactoolboxglue can do a bunch more of these 52 # that are far harder to predict, these should be tracked 53 # manually for now. 54 55 # this isn't C, but it uses __import__ 56 "anydbm": ["dbhash", "gdbm", "dbm", "dumbdbm", "whichdb"], 57 # package aliases 58 "wxPython.wx": Alias('wx'), 59 60 } 61 62 if sys.version_info[0] == 3: 63 result["_sre"] = ["copy", "re"] 64 result["parser"] = ["copyreg"] 65 66 # _frozen_importlib is part of the interpreter itself 67 result["_frozen_importlib"] = None 68 69 if sys.version_info[0] == 2 and sys.version_info[1] >= 5: 70 result.update({ 71 "email.base64MIME": Alias("email.base64mime"), 72 "email.Charset": Alias("email.charset"), 73 "email.Encoders": Alias("email.encoders"), 74 "email.Errors": Alias("email.errors"), 75 "email.Feedparser": Alias("email.feedParser"), 76 "email.Generator": Alias("email.generator"), 77 "email.Header": Alias("email.header"), 78 "email.Iterators": Alias("email.iterators"), 79 "email.Message": Alias("email.message"), 80 "email.Parser": Alias("email.parser"), 81 "email.quopriMIME": Alias("email.quoprimime"), 82 "email.Utils": Alias("email.utils"), 83 "email.MIMEAudio": Alias("email.mime.audio"), 84 "email.MIMEBase": Alias("email.mime.base"), 85 "email.MIMEImage": Alias("email.mime.image"), 86 "email.MIMEMessage": Alias("email.mime.message"), 87 "email.MIMEMultipart": Alias("email.mime.multipart"), 88 "email.MIMENonMultipart": Alias("email.mime.nonmultipart"), 89 "email.MIMEText": Alias("email.mime.text"), 90 }) 91 92 if sys.version_info[:2] >= (2, 5): 93 result["_elementtree"] = ["pyexpat"] 94 95 import xml.etree 96 files = os.listdir(xml.etree.__path__[0]) 97 for fn in files: 98 if fn.endswith('.py') and fn != "__init__.py": 99 result["_elementtree"].append("xml.etree.%s"%(fn[:-3],)) 100 101 if sys.version_info[:2] >= (2, 6): 102 result['future_builtins'] = ['itertools'] 103 104 # os.path is an alias for a platform specific submodule, 105 # ensure that the graph shows this. 106 result['os.path'] = Alias(os.path.__name__) 107 108 109 return result 110 111def parse_mf_results(mf): 112 """ 113 Return two lists: the first one contains the python files in the graph, 114 the second the C extensions. 115 116 :param mf: a :class:`modulegraph.modulegraph.ModuleGraph` instance 117 """ 118 #for name, imports in get_hidden_imports().items(): 119 # if name in mf.modules.keys(): 120 # for mod in imports: 121 # mf.import_hook(mod) 122 123 # Retrieve modules from modulegraph 124 py_files = [] 125 extensions = [] 126 127 for item in mf.flatten(): 128 # There may be __main__ modules (from mf.run_script), but 129 # we don't need it in the zipfile we build. 130 if item.identifier == "__main__": 131 continue 132 src = item.filename 133 if src and src != '-': 134 if isinstance(item, Script): 135 # Scripts are python files 136 py_files.append(item) 137 138 elif isinstance(item, Extension): 139 extensions.append(item) 140 141 else: 142 py_files.append(item) 143 144 # sort on the file names, the output is nicer to read 145 py_files.sort(key=lambda v: v.filename) 146 extensions.sort(key=lambda v: v.filename) 147 return py_files, extensions 148 149 150def plat_prepare(includes, packages, excludes): 151 # used by Python itself 152 includes.update(["warnings", "unicodedata", "weakref"]) 153 154 #if os.uname()[0] != 'java': 155 # Jython specific imports in the stdlib: 156 #excludes.update([ 157 # 'java.lang', 158 # 'org.python.core', 159 #]) 160 161 if not sys.platform.startswith('irix'): 162 excludes.update([ 163 'AL', 164 'sgi', 165 'vms_lib', 166 ]) 167 168 if not sys.platform in ('mac', 'darwin'): 169 # XXX - this doesn't look nearly complete 170 excludes.update([ 171 'Audio_mac', 172 'Carbon.File', 173 'Carbon.Folder', 174 'Carbon.Folders', 175 'EasyDialogs', 176 'MacOS', 177 'macfs', 178 'macostools', 179 #'macpath', 180 '_scproxy', 181 ]) 182 183 if not sys.platform == 'win32': 184 # only win32 185 excludes.update([ 186 #'ntpath', 187 'nturl2path', 188 'win32api', 189 'win32con', 190 'win32event', 191 'win32evtlogutil', 192 'win32evtlog', 193 'win32file', 194 'win32gui', 195 'win32pipe', 196 'win32process', 197 'win32security', 198 'pywintypes', 199 'winsound', 200 'win32', 201 '_winreg', 202 '_winapi', 203 'msvcrt', 204 'winreg', 205 '_subprocess', 206 ]) 207 208 if not sys.platform == 'riscos': 209 excludes.update([ 210 'riscosenviron', 211 #'riscospath', 212 'rourl2path', 213 ]) 214 215 if not sys.platform == 'dos' or sys.platform.startswith('ms-dos'): 216 excludes.update([ 217 'dos', 218 ]) 219 220 if not sys.platform == 'os2emx': 221 excludes.update([ 222 #'os2emxpath', 223 '_emx_link', 224 ]) 225 226 excludes.update(set(['posix', 'nt', 'os2', 'mac', 'ce', 'riscos']) - set(sys.builtin_module_names)) 227 228 # Carbon.Res depends on this, but the module hasn't been present 229 # for a while... 230 excludes.add('OverrideFrom23') 231 excludes.add('OverrideFrom23._Res') 232 233 # import trickery in the dummy_threading module (stdlib) 234 excludes.add('_dummy_threading') 235 236 try: 237 imp_find_module('poll') 238 except ImportError: 239 excludes.update([ 240 'poll', 241 ]) 242 243def find_needed_modules(mf=None, scripts=(), includes=(), packages=(), warn=warnings.warn): 244 if mf is None: 245 mf = modulegraph.ModuleGraph() 246 # feed Modulefinder with everything, and return it. 247 248 for path in scripts: 249 mf.run_script(path) 250 251 for mod in includes: 252 try: 253 if mod[-2:] == '.*': 254 mf.import_hook(mod[:-2], None, ['*']) 255 else: 256 mf.import_hook(mod) 257 except ImportError: 258 warn("No module named %s"%(mod,)) 259 260 for f in packages: 261 # If modulegraph has seen a reference to the package, then 262 # we prefer to believe that (imp_find_module doesn't seem to locate 263 # sub-packages) 264 m = mf.findNode(f) 265 if m is not None: 266 path = m.packagepath[0] 267 else: 268 # Find path of package 269 # TODO: use imp_find_module_or_importer 270 try: 271 path = imp_find_module(f, mf.path)[1] 272 except ImportError: 273 warn("No package named %s" % f) 274 continue 275 276 # walk the path to find subdirs containing __init__.py files 277 # scan the results (directory of __init__.py files) 278 # first trim the path (of the head package), 279 # then convert directory name in package name, 280 # finally push into modulegraph. 281 # FIXME: 282 # 1) Needs to be adjusted for namespace packages in python 3.3 283 # 2) Code is fairly dodgy and needs better tests 284 for (dirpath, dirnames, filenames) in os.walk(path): 285 if '__init__.py' in filenames and dirpath.startswith(path): 286 package = f + '.' + dirpath[len(path)+1:].replace(os.sep, '.') 287 if package.endswith('.'): 288 package = package[:-1] 289 m = mf.import_hook(package, None, ["*"]) 290 else: 291 # Exclude subtrees that aren't packages 292 dirnames[:] = [] 293 294 295 return mf 296 297# 298# resource constants 299# 300PY_SUFFIXES = ['.py', '.pyw', '.pyo', '.pyc'] 301C_SUFFIXES = [ 302 _triple[0] for _triple in imp.get_suffixes() 303 if _triple[2] == imp.C_EXTENSION 304] 305 306# 307# side-effects 308# 309 310def _replacePackages(): 311 REPLACEPACKAGES = { 312 '_xmlplus': 'xml', 313 } 314 for k,v in REPLACEPACKAGES.items(): 315 modulegraph.replacePackage(k, v) 316 317_replacePackages() 318 319def find_modules(scripts=(), includes=(), packages=(), excludes=(), path=None, debug=0): 320 """ 321 High-level interface, takes iterables for: 322 scripts, includes, packages, excludes 323 324 And returns a :class:`modulegraph.modulegraph.ModuleGraph` instance, 325 python_files, and extensions 326 327 python_files is a list of pure python dependencies as modulegraph.Module objects, 328 extensions is a list of platform-specific C extension dependencies as modulegraph.Module objects 329 """ 330 scripts = set(scripts) 331 includes = set(includes) 332 packages = set(packages) 333 excludes = set(excludes) 334 plat_prepare(includes, packages, excludes) 335 mf = modulegraph.ModuleGraph( 336 path=path, 337 excludes=(excludes - includes), 338 implies=get_implies(), 339 debug=debug, 340 ) 341 find_needed_modules(mf, scripts, includes, packages) 342 return mf 343 344def test(): 345 if '-g' in sys.argv[1:]: 346 sys.argv.remove('-g') 347 dograph = True 348 else: 349 dograph = False 350 if '-x' in sys.argv[1:]: 351 sys.argv.remove('-x') 352 doxref = True 353 else: 354 doxref= False 355 356 scripts = sys.argv[1:] or [__file__] 357 mf = find_modules(scripts=scripts) 358 if doxref: 359 mf.create_xref() 360 elif dograph: 361 mf.graphreport() 362 else: 363 mf.report() 364 365if __name__ == '__main__': 366 test() 367