1# Copyright 2015 The TensorFlow Authors. All Rights Reserved. 2# 3# Licensed under the Apache License, Version 2.0 (the "License"); 4# you may not use this file except in compliance with the License. 5# You may obtain a copy of the License at 6# 7# http://www.apache.org/licenses/LICENSE-2.0 8# 9# Unless required by applicable law or agreed to in writing, software 10# distributed under the License is distributed on an "AS IS" BASIS, 11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12# See the License for the specific language governing permissions and 13# limitations under the License. 14# ============================================================================== 15"""Generate docs for the TensorFlow Python API.""" 16 17from __future__ import absolute_import 18from __future__ import division 19from __future__ import print_function 20 21import argparse 22import fnmatch 23import os 24import shutil 25import tempfile 26 27import six 28 29from tensorflow.python.util import tf_inspect 30from tensorflow.tools.common import public_api 31from tensorflow.tools.common import traverse 32from tensorflow.tools.docs import doc_controls 33from tensorflow.tools.docs import doc_generator_visitor 34from tensorflow.tools.docs import parser 35from tensorflow.tools.docs import pretty_docs 36from tensorflow.tools.docs import py_guide_parser 37 38 39def write_docs(output_dir, 40 parser_config, 41 yaml_toc, 42 root_title='TensorFlow', 43 search_hints=True, 44 site_api_path=''): 45 """Write previously extracted docs to disk. 46 47 Write a docs page for each symbol included in the indices of parser_config to 48 a tree of docs at `output_dir`. 49 50 Symbols with multiple aliases will have only one page written about 51 them, which is referenced for all aliases. 52 53 Args: 54 output_dir: Directory to write documentation markdown files to. Will be 55 created if it doesn't exist. 56 parser_config: A `parser.ParserConfig` object, containing all the necessary 57 indices. 58 yaml_toc: Set to `True` to generate a "_toc.yaml" file. 59 root_title: The title name for the root level index.md. 60 search_hints: (bool) include meta-data search hints at the top of each 61 output file. 62 site_api_path: The output path relative to the site root. Used in the 63 `_toc.yaml` and `_redirects.yaml` files. 64 65 Raises: 66 ValueError: if `output_dir` is not an absolute path 67 """ 68 # Make output_dir. 69 if not os.path.isabs(output_dir): 70 raise ValueError("'output_dir' must be an absolute path.\n" 71 " output_dir='%s'" % output_dir) 72 73 if not os.path.exists(output_dir): 74 os.makedirs(output_dir) 75 76 # These dictionaries are used for table-of-contents generation below 77 # They will contain, after the for-loop below:: 78 # - module name(string):classes and functions the module contains(list) 79 module_children = {} 80 # - symbol name(string):pathname (string) 81 symbol_to_file = {} 82 83 # Collect redirects for an api _redirects.yaml file. 84 redirects = [] 85 86 # Parse and write Markdown pages, resolving cross-links (@{symbol}). 87 for full_name, py_object in six.iteritems(parser_config.index): 88 parser_config.reference_resolver.current_doc_full_name = full_name 89 90 if full_name in parser_config.duplicate_of: 91 continue 92 93 # Methods and some routines are documented only as part of their class. 94 if not (tf_inspect.ismodule(py_object) or tf_inspect.isclass(py_object) or 95 parser.is_free_function(py_object, full_name, parser_config.index)): 96 continue 97 98 sitepath = os.path.join('api_docs/python', 99 parser.documentation_path(full_name)[:-3]) 100 101 # For TOC, we need to store a mapping from full_name to the file 102 # we're generating 103 symbol_to_file[full_name] = sitepath 104 105 # For a module, remember the module for the table-of-contents 106 if tf_inspect.ismodule(py_object): 107 if full_name in parser_config.tree: 108 module_children.setdefault(full_name, []) 109 110 # For something else that's documented, 111 # figure out what module it lives in 112 else: 113 subname = str(full_name) 114 while True: 115 subname = subname[:subname.rindex('.')] 116 if tf_inspect.ismodule(parser_config.index[subname]): 117 module_children.setdefault(subname, []).append(full_name) 118 break 119 120 # Generate docs for `py_object`, resolving references. 121 page_info = parser.docs_for_object(full_name, py_object, parser_config) 122 123 path = os.path.join(output_dir, parser.documentation_path(full_name)) 124 directory = os.path.dirname(path) 125 try: 126 if not os.path.exists(directory): 127 os.makedirs(directory) 128 # This function returns raw bytes in PY2 or unicode in PY3. 129 if search_hints: 130 content = [page_info.get_metadata_html()] 131 else: 132 content = [''] 133 134 content.append(pretty_docs.build_md_page(page_info)) 135 text = '\n'.join(content) 136 if six.PY3: 137 text = text.encode('utf-8') 138 with open(path, 'wb') as f: 139 f.write(text) 140 except OSError: 141 raise OSError( 142 'Cannot write documentation for %s to %s' % (full_name, directory)) 143 144 duplicates = parser_config.duplicates.get(full_name, []) 145 if not duplicates: 146 continue 147 148 duplicates = [item for item in duplicates if item != full_name] 149 150 for dup in duplicates: 151 from_path = os.path.join(site_api_path, dup.replace('.', '/')) 152 to_path = os.path.join(site_api_path, full_name.replace('.', '/')) 153 redirects.append(( 154 os.path.join('/', from_path), 155 os.path.join('/', to_path))) 156 157 if redirects: 158 redirects = sorted(redirects) 159 template = ('- from: {}\n' 160 ' to: {}\n') 161 redirects = [template.format(f, t) for f, t in redirects] 162 api_redirects_path = os.path.join(output_dir, '_redirects.yaml') 163 with open(api_redirects_path, 'w') as redirect_file: 164 redirect_file.write('redirects:\n') 165 redirect_file.write(''.join(redirects)) 166 167 if yaml_toc: 168 # Generate table of contents 169 170 # Put modules in alphabetical order, case-insensitive 171 modules = sorted(module_children.keys(), key=lambda a: a.upper()) 172 173 leftnav_path = os.path.join(output_dir, '_toc.yaml') 174 with open(leftnav_path, 'w') as f: 175 176 # Generate header 177 f.write('# Automatically generated file; please do not edit\ntoc:\n') 178 for module in modules: 179 indent_num = module.count('.') 180 # Don't list `tf.submodule` inside `tf` 181 indent_num = max(indent_num, 1) 182 indent = ' '*indent_num 183 184 if indent_num > 1: 185 # tf.contrib.baysflow.entropy will be under 186 # tf.contrib->baysflow->entropy 187 title = module.split('.')[-1] 188 else: 189 title = module 190 191 header = [ 192 '- title: ' + title, 193 ' section:', 194 ' - title: Overview', 195 ' path: ' + os.path.join('/', site_api_path, 196 symbol_to_file[module])] 197 header = ''.join([indent+line+'\n' for line in header]) 198 f.write(header) 199 200 symbols_in_module = module_children.get(module, []) 201 # Sort case-insensitive, if equal sort case sensitive (upper first) 202 symbols_in_module.sort(key=lambda a: (a.upper(), a)) 203 204 for full_name in symbols_in_module: 205 item = [ 206 ' - title: ' + full_name[len(module) + 1:], 207 ' path: ' + os.path.join('/', site_api_path, 208 symbol_to_file[full_name])] 209 item = ''.join([indent+line+'\n' for line in item]) 210 f.write(item) 211 212 # Write a global index containing all full names with links. 213 with open(os.path.join(output_dir, 'index.md'), 'w') as f: 214 f.write( 215 parser.generate_global_index(root_title, parser_config.index, 216 parser_config.reference_resolver)) 217 218 219def add_dict_to_dict(add_from, add_to): 220 for key in add_from: 221 if key in add_to: 222 add_to[key].extend(add_from[key]) 223 else: 224 add_to[key] = add_from[key] 225 226 227# Exclude some libraries in contrib from the documentation altogether. 228def _get_default_private_map(): 229 return { 230 'tf.contrib.autograph': ['utils', 'operators'], 231 'tf.test': ['mock'], 232 'tf.compat': ['v1', 'v2'], 233 'tf.contrib.estimator': ['python'], 234 } 235 236 237# Exclude members of some libraries. 238def _get_default_do_not_descend_map(): 239 # TODO(markdaoust): Use docs_controls decorators, locally, instead. 240 return { 241 'tf': ['cli', 'lib', 'wrappers'], 242 'tf.contrib': [ 243 'compiler', 244 'grid_rnn', 245 # Block contrib.keras to de-clutter the docs 246 'keras', 247 'labeled_tensor', 248 'quantization', 249 'session_bundle', 250 'slim', 251 'solvers', 252 'specs', 253 'tensor_forest', 254 'tensorboard', 255 'testing', 256 'tfprof', 257 ], 258 'tf.contrib.bayesflow': [ 259 'special_math', 'stochastic_gradient_estimators', 260 'stochastic_variables' 261 ], 262 'tf.contrib.ffmpeg': ['ffmpeg_ops'], 263 'tf.contrib.graph_editor': [ 264 'edit', 'match', 'reroute', 'subgraph', 'transform', 'select', 'util' 265 ], 266 'tf.contrib.keras': ['api', 'python'], 267 'tf.contrib.layers': ['feature_column', 'summaries'], 268 'tf.contrib.learn': [ 269 'datasets', 270 'head', 271 'graph_actions', 272 'io', 273 'models', 274 'monitors', 275 'ops', 276 'preprocessing', 277 'utils', 278 ], 279 'tf.contrib.util': ['loader'], 280 } 281 282 283class DocControlsAwareCrawler(public_api.PublicAPIVisitor): 284 """A `docs_controls` aware API-crawler.""" 285 286 def _is_private(self, path, name, obj): 287 if doc_controls.should_skip(obj): 288 return True 289 return super(DocControlsAwareCrawler, self)._is_private(path, name, obj) 290 291 292def extract(py_modules, 293 private_map, 294 do_not_descend_map, 295 visitor_cls=doc_generator_visitor.DocGeneratorVisitor): 296 """Extract docs from tf namespace and write them to disk.""" 297 # Traverse the first module. 298 visitor = visitor_cls(py_modules[0][0]) 299 api_visitor = DocControlsAwareCrawler(visitor) 300 api_visitor.set_root_name(py_modules[0][0]) 301 add_dict_to_dict(private_map, api_visitor.private_map) 302 add_dict_to_dict(do_not_descend_map, api_visitor.do_not_descend_map) 303 304 traverse.traverse(py_modules[0][1], api_visitor) 305 306 # Traverse all py_modules after the first: 307 for module_name, module in py_modules[1:]: 308 visitor.set_root_name(module_name) 309 api_visitor.set_root_name(module_name) 310 traverse.traverse(module, api_visitor) 311 312 return visitor 313 314 315class _GetMarkdownTitle(py_guide_parser.PyGuideParser): 316 """Extract the title from a .md file.""" 317 318 def __init__(self): 319 self.title = None 320 py_guide_parser.PyGuideParser.__init__(self) 321 322 def process_title(self, _, title): 323 if self.title is None: # only use the first title 324 self.title = title 325 326 327class _DocInfo(object): 328 """A simple struct for holding a doc's url and title.""" 329 330 def __init__(self, url, title): 331 self.url = url 332 self.title = title 333 334 335def build_doc_index(src_dir): 336 """Build an index from a keyword designating a doc to _DocInfo objects.""" 337 doc_index = {} 338 if not os.path.isabs(src_dir): 339 raise ValueError("'src_dir' must be an absolute path.\n" 340 " src_dir='%s'" % src_dir) 341 342 if not os.path.exists(src_dir): 343 raise ValueError("'src_dir' path must exist.\n" 344 " src_dir='%s'" % src_dir) 345 346 for dirpath, _, filenames in os.walk(src_dir): 347 suffix = os.path.relpath(path=dirpath, start=src_dir) 348 for base_name in filenames: 349 if not base_name.endswith('.md'): 350 continue 351 title_parser = _GetMarkdownTitle() 352 title_parser.process(os.path.join(dirpath, base_name)) 353 if title_parser.title is None: 354 msg = ('`{}` has no markdown title (# title)'.format( 355 os.path.join(dirpath, base_name))) 356 raise ValueError(msg) 357 key_parts = os.path.join(suffix, base_name[:-3]).split('/') 358 if key_parts[-1] == 'index': 359 key_parts = key_parts[:-1] 360 doc_info = _DocInfo(os.path.join(suffix, base_name), title_parser.title) 361 doc_index[key_parts[-1]] = doc_info 362 if len(key_parts) > 1: 363 doc_index['/'.join(key_parts[-2:])] = doc_info 364 365 return doc_index 366 367 368class _GuideRef(object): 369 370 def __init__(self, base_name, title, section_title, section_tag): 371 self.url = 'api_guides/python/' + (('%s#%s' % (base_name, section_tag)) 372 if section_tag else base_name) 373 self.link_text = (('%s > %s' % (title, section_title)) 374 if section_title else title) 375 376 def make_md_link(self, url_prefix): 377 return '[%s](%s%s)' % (self.link_text, url_prefix, self.url) 378 379 380class _GenerateGuideIndex(py_guide_parser.PyGuideParser): 381 """Turn guide files into an index from symbol name to a list of _GuideRefs.""" 382 383 def __init__(self): 384 self.index = {} 385 py_guide_parser.PyGuideParser.__init__(self) 386 387 def process(self, full_path, base_name): 388 """Index a file, reading from `full_path`, with `base_name` as the link.""" 389 self.full_path = full_path 390 self.base_name = base_name 391 self.title = None 392 self.section_title = None 393 self.section_tag = None 394 py_guide_parser.PyGuideParser.process(self, full_path) 395 396 def process_title(self, _, title): 397 if self.title is None: # only use the first title 398 self.title = title 399 400 def process_section(self, _, section_title, tag): 401 self.section_title = section_title 402 self.section_tag = tag 403 404 def process_line(self, _, line): 405 """Index the file and section of each `symbol` reference.""" 406 for match in parser.AUTO_REFERENCE_RE.finditer(line): 407 val = self.index.get(match.group(1), []) 408 val.append( 409 _GuideRef(self.base_name, self.title, self.section_title, 410 self.section_tag)) 411 self.index[match.group(1)] = val 412 413 414def _build_guide_index(guide_src_dir): 415 """Return dict: symbol name -> _GuideRef from the files in `guide_src_dir`.""" 416 index_generator = _GenerateGuideIndex() 417 if os.path.exists(guide_src_dir): 418 for full_path, base_name in py_guide_parser.md_files_in_dir(guide_src_dir): 419 index_generator.process(full_path, base_name) 420 return index_generator.index 421 422 423class _UpdateTags(py_guide_parser.PyGuideParser): 424 """Rewrites a Python guide so that each section has an explicit id tag. 425 426 "section" here refers to blocks delimited by second level headings. 427 """ 428 429 def process_section(self, line_number, section_title, tag): 430 self.replace_line(line_number, '<h2 id="%s">%s</h2>' % (tag, section_title)) 431 432 433def update_id_tags_inplace(src_dir): 434 """Set explicit ids on all second-level headings to ensure back-links work. 435 436 Args: 437 src_dir: The directory of md-files to convert (inplace). 438 """ 439 tag_updater = _UpdateTags() 440 441 for dirpath, _, filenames in os.walk(src_dir): 442 for base_name in filenames: 443 if not base_name.endswith('.md'): 444 continue 445 full_path = os.path.join(src_dir, dirpath, base_name) 446 447 # Tag updater loads the file, makes the replacements, and returns the 448 # modified file contents 449 content = tag_updater.process(full_path) 450 with open(full_path, 'w') as f: 451 f.write(content) 452 453 454EXCLUDED = set(['__init__.py', 'OWNERS', 'README.txt']) 455 456 457def replace_refs(src_dir, 458 output_dir, 459 reference_resolver, 460 file_pattern='*.md', 461 api_docs_relpath='api_docs'): 462 """Fix @{} references in all files under `src_dir` matching `file_pattern`. 463 464 A matching directory structure, with the modified files is 465 written to `output_dir`. 466 467 `{"__init__.py","OWNERS","README.txt"}` are skipped. 468 469 Files not matching `file_pattern` (using `fnmatch`) are copied with no change. 470 471 Also, files in the `api_guides/python` directory get explicit ids set on all 472 heading-2s to ensure back-links work. 473 474 Args: 475 src_dir: The directory to convert files from. 476 output_dir: The root directory to write the resulting files to. 477 reference_resolver: A `parser.ReferenceResolver` to make the replacements. 478 file_pattern: Only replace references in files matching file_patters, 479 using fnmatch. Non-matching files are copied unchanged. 480 api_docs_relpath: Relative-path string to the api_docs, from the src_dir. 481 """ 482 # Iterate through all the source files and process them. 483 for dirpath, _, filenames in os.walk(src_dir): 484 depth = os.path.relpath(src_dir, start=dirpath) 485 # How to get from `dirpath` to api_docs/python/ 486 relative_path_to_root = os.path.join(depth, api_docs_relpath, 'python') 487 488 # Make the directory under output_dir. 489 new_dir = os.path.join(output_dir, 490 os.path.relpath(path=dirpath, start=src_dir)) 491 if not os.path.exists(new_dir): 492 os.makedirs(new_dir) 493 494 for base_name in filenames: 495 if base_name in EXCLUDED: 496 continue 497 full_in_path = os.path.join(dirpath, base_name) 498 499 # Set the `current_doc_full_name` so bad files can be reported on errors. 500 reference_resolver.current_doc_full_name = full_in_path 501 502 suffix = os.path.relpath(path=full_in_path, start=src_dir) 503 full_out_path = os.path.join(output_dir, suffix) 504 # Copy files that do not match the file_pattern, unmodified. 505 if not fnmatch.fnmatch(base_name, file_pattern): 506 if full_in_path != full_out_path: 507 shutil.copyfile(full_in_path, full_out_path) 508 continue 509 510 with open(full_in_path, 'rb') as f: 511 content = f.read().decode('utf-8') 512 513 content = reference_resolver.replace_references(content, 514 relative_path_to_root) 515 with open(full_out_path, 'wb') as f: 516 f.write(content.encode('utf-8')) 517 518 519class DocGenerator(object): 520 """Main entry point for generating docs.""" 521 522 def __init__(self): 523 self.argument_parser = argparse.ArgumentParser() 524 self._py_modules = None 525 self._private_map = _get_default_private_map() 526 self._do_not_descend_map = _get_default_do_not_descend_map() 527 self.yaml_toc = True 528 529 self.argument_parser.add_argument( 530 '--no_search_hints', 531 dest='search_hints', 532 action='store_false', 533 default=True) 534 535 self.argument_parser.add_argument( 536 '--site_api_path', 537 type=str, default='', 538 help='The path from the site-root to api_docs' 539 'directory for this project') 540 541 self.argument_parser.add_argument( 542 '--api_cache_out_path', 543 type=str, 544 default=None, 545 help='Path to store a json-serialized api-index, so links can be ' 546 'inserted into docs without rebuilding the api_docs') 547 548 def add_output_dir_argument(self): 549 self.argument_parser.add_argument( 550 '--output_dir', 551 type=str, 552 default=None, 553 required=True, 554 help='Directory to write docs to.') 555 556 def add_src_dir_argument(self): 557 self.argument_parser.add_argument( 558 '--src_dir', 559 type=str, 560 default=tempfile.mkdtemp(), 561 required=False, 562 help='Optional directory of source docs to add api_docs links to') 563 564 def add_base_dir_argument(self, default_base_dir): 565 self.argument_parser.add_argument( 566 '--base_dir', 567 type=str, 568 default=default_base_dir, 569 help='Base directory to strip from file names referenced in docs.') 570 571 def parse_known_args(self): 572 flags, _ = self.argument_parser.parse_known_args() 573 return flags 574 575 def add_to_private_map(self, d): 576 add_dict_to_dict(d, self._private_map) 577 578 def add_to_do_not_descend_map(self, d): 579 add_dict_to_dict(d, self._do_not_descend_map) 580 581 def set_private_map(self, d): 582 self._private_map = d 583 584 def set_do_not_descend_map(self, d): 585 self._do_not_descend_map = d 586 587 def set_py_modules(self, py_modules): 588 self._py_modules = py_modules 589 590 def py_module_names(self): 591 if self._py_modules is None: 592 raise RuntimeError( 593 'Must call set_py_modules() before running py_module_names().') 594 return [name for (name, _) in self._py_modules] 595 596 def make_reference_resolver(self, visitor, doc_index): 597 return parser.ReferenceResolver.from_visitor( 598 visitor, doc_index, py_module_names=self.py_module_names()) 599 600 def make_parser_config(self, visitor, reference_resolver, guide_index, 601 base_dir): 602 return parser.ParserConfig( 603 reference_resolver=reference_resolver, 604 duplicates=visitor.duplicates, 605 duplicate_of=visitor.duplicate_of, 606 tree=visitor.tree, 607 index=visitor.index, 608 reverse_index=visitor.reverse_index, 609 guide_index=guide_index, 610 base_dir=base_dir) 611 612 def run_extraction(self): 613 return extract(self._py_modules, self._private_map, 614 self._do_not_descend_map) 615 616 def build(self, flags): 617 """Build all the docs. 618 619 This produces two outputs 620 621 python api docs: 622 623 * generated from modules set with `set_py_modules`. 624 * written to '{FLAGS.output_dir}/api_docs/python/' 625 626 non-api docs: 627 628 * Everything in '{FLAGS.src_dir}' is copied to '{FLAGS.output_dir}'. 629 * '@{}' references in '.md' files are replaced with links. 630 * '.md' files under 'api_guides/python' have explicit ids set for their 631 second level headings. 632 633 Args: 634 flags: 635 * src_dir: Where to fetch the non-api-docs. 636 * base_dir: Base of the docs directory (Used to build correct 637 relative links). 638 * output_dir: Where to write the resulting docs. 639 640 Returns: 641 The number of errors encountered while processing. 642 """ 643 # Extract the python api from the _py_modules 644 doc_index = build_doc_index(flags.src_dir) 645 visitor = self.run_extraction() 646 reference_resolver = self.make_reference_resolver(visitor, doc_index) 647 648 if getattr(flags, 'api_cache_out_path', None): 649 reference_resolver.to_json_file(flags.api_cache_out_path) 650 651 # Build the guide_index for the api_docs back links. 652 root_title = getattr(flags, 'root_title', 'TensorFlow') 653 guide_index = _build_guide_index( 654 os.path.join(flags.src_dir, 'api_guides/python')) 655 656 # Write the api docs. 657 parser_config = self.make_parser_config(visitor, reference_resolver, 658 guide_index, flags.base_dir) 659 output_dir = os.path.join(flags.output_dir, 'api_docs/python') 660 661 write_docs( 662 output_dir, 663 parser_config, 664 yaml_toc=self.yaml_toc, 665 root_title=root_title, 666 search_hints=getattr(flags, 'search_hints', True), 667 site_api_path=getattr(flags, 'site_api_path', '')) 668 669 # Replace all the @{} references in files under `FLAGS.src_dir` 670 replace_refs(flags.src_dir, flags.output_dir, reference_resolver, '*.md') 671 # Fix the tags in the guide dir. 672 guide_dir = os.path.join(flags.output_dir, 'api_guides/python') 673 if os.path.exists(guide_dir): 674 update_id_tags_inplace(guide_dir) 675 676 # Report all errors found by the reference resolver, and return the error 677 # code. 678 parser_config.reference_resolver.log_errors() 679 680 return parser_config.reference_resolver.num_errors() 681