• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1# Copyright 2015 The TensorFlow Authors. All Rights Reserved.
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7#     http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14# ==============================================================================
15"""Generate docs for the TensorFlow Python API."""
16
17from __future__ import absolute_import
18from __future__ import division
19from __future__ import print_function
20
21import argparse
22import fnmatch
23import os
24import shutil
25import tempfile
26
27import six
28
29from tensorflow.python.util import tf_inspect
30from tensorflow.tools.common import public_api
31from tensorflow.tools.common import traverse
32from tensorflow.tools.docs import doc_controls
33from tensorflow.tools.docs import doc_generator_visitor
34from tensorflow.tools.docs import parser
35from tensorflow.tools.docs import pretty_docs
36from tensorflow.tools.docs import py_guide_parser
37
38
39def write_docs(output_dir,
40               parser_config,
41               yaml_toc,
42               root_title='TensorFlow',
43               search_hints=True,
44               site_api_path=''):
45  """Write previously extracted docs to disk.
46
47  Write a docs page for each symbol included in the indices of parser_config to
48  a tree of docs at `output_dir`.
49
50  Symbols with multiple aliases will have only one page written about
51  them, which is referenced for all aliases.
52
53  Args:
54    output_dir: Directory to write documentation markdown files to. Will be
55      created if it doesn't exist.
56    parser_config: A `parser.ParserConfig` object, containing all the necessary
57      indices.
58    yaml_toc: Set to `True` to generate a "_toc.yaml" file.
59    root_title: The title name for the root level index.md.
60    search_hints: (bool) include meta-data search hints at the top of each
61      output file.
62    site_api_path: The output path relative to the site root. Used in the
63      `_toc.yaml` and `_redirects.yaml` files.
64
65  Raises:
66    ValueError: if `output_dir` is not an absolute path
67  """
68  # Make output_dir.
69  if not os.path.isabs(output_dir):
70    raise ValueError("'output_dir' must be an absolute path.\n"
71                     "    output_dir='%s'" % output_dir)
72
73  if not os.path.exists(output_dir):
74    os.makedirs(output_dir)
75
76  # These dictionaries are used for table-of-contents generation below
77  # They will contain, after the for-loop below::
78  #  - module name(string):classes and functions the module contains(list)
79  module_children = {}
80  #  - symbol name(string):pathname (string)
81  symbol_to_file = {}
82
83  # Collect redirects for an api _redirects.yaml file.
84  redirects = []
85
86  # Parse and write Markdown pages, resolving cross-links (@{symbol}).
87  for full_name, py_object in six.iteritems(parser_config.index):
88    parser_config.reference_resolver.current_doc_full_name = full_name
89
90    if full_name in parser_config.duplicate_of:
91      continue
92
93    # Methods and some routines are documented only as part of their class.
94    if not (tf_inspect.ismodule(py_object) or tf_inspect.isclass(py_object) or
95            parser.is_free_function(py_object, full_name, parser_config.index)):
96      continue
97
98    sitepath = os.path.join('api_docs/python',
99                            parser.documentation_path(full_name)[:-3])
100
101    # For TOC, we need to store a mapping from full_name to the file
102    # we're generating
103    symbol_to_file[full_name] = sitepath
104
105    # For a module, remember the module for the table-of-contents
106    if tf_inspect.ismodule(py_object):
107      if full_name in parser_config.tree:
108        module_children.setdefault(full_name, [])
109
110    # For something else that's documented,
111    # figure out what module it lives in
112    else:
113      subname = str(full_name)
114      while True:
115        subname = subname[:subname.rindex('.')]
116        if tf_inspect.ismodule(parser_config.index[subname]):
117          module_children.setdefault(subname, []).append(full_name)
118          break
119
120    # Generate docs for `py_object`, resolving references.
121    page_info = parser.docs_for_object(full_name, py_object, parser_config)
122
123    path = os.path.join(output_dir, parser.documentation_path(full_name))
124    directory = os.path.dirname(path)
125    try:
126      if not os.path.exists(directory):
127        os.makedirs(directory)
128      # This function returns raw bytes in PY2 or unicode in PY3.
129      if search_hints:
130        content = [page_info.get_metadata_html()]
131      else:
132        content = ['']
133
134      content.append(pretty_docs.build_md_page(page_info))
135      text = '\n'.join(content)
136      if six.PY3:
137        text = text.encode('utf-8')
138      with open(path, 'wb') as f:
139        f.write(text)
140    except OSError:
141      raise OSError(
142          'Cannot write documentation for %s to %s' % (full_name, directory))
143
144    duplicates = parser_config.duplicates.get(full_name, [])
145    if not duplicates:
146      continue
147
148    duplicates = [item for item in duplicates if item != full_name]
149
150    for dup in duplicates:
151      from_path = os.path.join(site_api_path, dup.replace('.', '/'))
152      to_path = os.path.join(site_api_path, full_name.replace('.', '/'))
153      redirects.append((
154          os.path.join('/', from_path),
155          os.path.join('/', to_path)))
156
157  if redirects:
158    redirects = sorted(redirects)
159    template = ('- from: {}\n'
160                '  to: {}\n')
161    redirects = [template.format(f, t) for f, t in redirects]
162    api_redirects_path = os.path.join(output_dir, '_redirects.yaml')
163    with open(api_redirects_path, 'w') as redirect_file:
164      redirect_file.write('redirects:\n')
165      redirect_file.write(''.join(redirects))
166
167  if yaml_toc:
168    # Generate table of contents
169
170    # Put modules in alphabetical order, case-insensitive
171    modules = sorted(module_children.keys(), key=lambda a: a.upper())
172
173    leftnav_path = os.path.join(output_dir, '_toc.yaml')
174    with open(leftnav_path, 'w') as f:
175
176      # Generate header
177      f.write('# Automatically generated file; please do not edit\ntoc:\n')
178      for module in modules:
179        indent_num = module.count('.')
180        # Don't list `tf.submodule` inside `tf`
181        indent_num = max(indent_num, 1)
182        indent = '  '*indent_num
183
184        if indent_num > 1:
185          # tf.contrib.baysflow.entropy will be under
186          #   tf.contrib->baysflow->entropy
187          title = module.split('.')[-1]
188        else:
189          title = module
190
191        header = [
192            '- title: ' + title,
193            '  section:',
194            '  - title: Overview',
195            '    path: ' + os.path.join('/', site_api_path,
196                                        symbol_to_file[module])]
197        header = ''.join([indent+line+'\n' for line in header])
198        f.write(header)
199
200        symbols_in_module = module_children.get(module, [])
201        # Sort case-insensitive, if equal sort case sensitive (upper first)
202        symbols_in_module.sort(key=lambda a: (a.upper(), a))
203
204        for full_name in symbols_in_module:
205          item = [
206              '  - title: ' + full_name[len(module) + 1:],
207              '    path: ' + os.path.join('/', site_api_path,
208                                          symbol_to_file[full_name])]
209          item = ''.join([indent+line+'\n' for line in item])
210          f.write(item)
211
212  # Write a global index containing all full names with links.
213  with open(os.path.join(output_dir, 'index.md'), 'w') as f:
214    f.write(
215        parser.generate_global_index(root_title, parser_config.index,
216                                     parser_config.reference_resolver))
217
218
219def add_dict_to_dict(add_from, add_to):
220  for key in add_from:
221    if key in add_to:
222      add_to[key].extend(add_from[key])
223    else:
224      add_to[key] = add_from[key]
225
226
227# Exclude some libraries in contrib from the documentation altogether.
228def _get_default_private_map():
229  return {
230      'tf.contrib.autograph': ['utils', 'operators'],
231      'tf.test': ['mock'],
232      'tf.compat': ['v1', 'v2'],
233      'tf.contrib.estimator': ['python'],
234  }
235
236
237# Exclude members of some libraries.
238def _get_default_do_not_descend_map():
239  # TODO(markdaoust): Use docs_controls decorators, locally, instead.
240  return {
241      'tf': ['cli', 'lib', 'wrappers'],
242      'tf.contrib': [
243          'compiler',
244          'grid_rnn',
245          # Block contrib.keras to de-clutter the docs
246          'keras',
247          'labeled_tensor',
248          'quantization',
249          'session_bundle',
250          'slim',
251          'solvers',
252          'specs',
253          'tensor_forest',
254          'tensorboard',
255          'testing',
256          'tfprof',
257      ],
258      'tf.contrib.bayesflow': [
259          'special_math', 'stochastic_gradient_estimators',
260          'stochastic_variables'
261      ],
262      'tf.contrib.ffmpeg': ['ffmpeg_ops'],
263      'tf.contrib.graph_editor': [
264          'edit', 'match', 'reroute', 'subgraph', 'transform', 'select', 'util'
265      ],
266      'tf.contrib.keras': ['api', 'python'],
267      'tf.contrib.layers': ['feature_column', 'summaries'],
268      'tf.contrib.learn': [
269          'datasets',
270          'head',
271          'graph_actions',
272          'io',
273          'models',
274          'monitors',
275          'ops',
276          'preprocessing',
277          'utils',
278      ],
279      'tf.contrib.util': ['loader'],
280  }
281
282
283class DocControlsAwareCrawler(public_api.PublicAPIVisitor):
284  """A `docs_controls` aware API-crawler."""
285
286  def _is_private(self, path, name, obj):
287    if doc_controls.should_skip(obj):
288      return True
289    return super(DocControlsAwareCrawler, self)._is_private(path, name, obj)
290
291
292def extract(py_modules,
293            private_map,
294            do_not_descend_map,
295            visitor_cls=doc_generator_visitor.DocGeneratorVisitor):
296  """Extract docs from tf namespace and write them to disk."""
297  # Traverse the first module.
298  visitor = visitor_cls(py_modules[0][0])
299  api_visitor = DocControlsAwareCrawler(visitor)
300  api_visitor.set_root_name(py_modules[0][0])
301  add_dict_to_dict(private_map, api_visitor.private_map)
302  add_dict_to_dict(do_not_descend_map, api_visitor.do_not_descend_map)
303
304  traverse.traverse(py_modules[0][1], api_visitor)
305
306  # Traverse all py_modules after the first:
307  for module_name, module in py_modules[1:]:
308    visitor.set_root_name(module_name)
309    api_visitor.set_root_name(module_name)
310    traverse.traverse(module, api_visitor)
311
312  return visitor
313
314
315class _GetMarkdownTitle(py_guide_parser.PyGuideParser):
316  """Extract the title from a .md file."""
317
318  def __init__(self):
319    self.title = None
320    py_guide_parser.PyGuideParser.__init__(self)
321
322  def process_title(self, _, title):
323    if self.title is None:  # only use the first title
324      self.title = title
325
326
327class _DocInfo(object):
328  """A simple struct for holding a doc's url and title."""
329
330  def __init__(self, url, title):
331    self.url = url
332    self.title = title
333
334
335def build_doc_index(src_dir):
336  """Build an index from a keyword designating a doc to _DocInfo objects."""
337  doc_index = {}
338  if not os.path.isabs(src_dir):
339    raise ValueError("'src_dir' must be an absolute path.\n"
340                     "    src_dir='%s'" % src_dir)
341
342  if not os.path.exists(src_dir):
343    raise ValueError("'src_dir' path must exist.\n"
344                     "    src_dir='%s'" % src_dir)
345
346  for dirpath, _, filenames in os.walk(src_dir):
347    suffix = os.path.relpath(path=dirpath, start=src_dir)
348    for base_name in filenames:
349      if not base_name.endswith('.md'):
350        continue
351      title_parser = _GetMarkdownTitle()
352      title_parser.process(os.path.join(dirpath, base_name))
353      if title_parser.title is None:
354        msg = ('`{}` has no markdown title (# title)'.format(
355            os.path.join(dirpath, base_name)))
356        raise ValueError(msg)
357      key_parts = os.path.join(suffix, base_name[:-3]).split('/')
358      if key_parts[-1] == 'index':
359        key_parts = key_parts[:-1]
360      doc_info = _DocInfo(os.path.join(suffix, base_name), title_parser.title)
361      doc_index[key_parts[-1]] = doc_info
362      if len(key_parts) > 1:
363        doc_index['/'.join(key_parts[-2:])] = doc_info
364
365  return doc_index
366
367
368class _GuideRef(object):
369
370  def __init__(self, base_name, title, section_title, section_tag):
371    self.url = 'api_guides/python/' + (('%s#%s' % (base_name, section_tag))
372                                       if section_tag else base_name)
373    self.link_text = (('%s > %s' % (title, section_title))
374                      if section_title else title)
375
376  def make_md_link(self, url_prefix):
377    return '[%s](%s%s)' % (self.link_text, url_prefix, self.url)
378
379
380class _GenerateGuideIndex(py_guide_parser.PyGuideParser):
381  """Turn guide files into an index from symbol name to a list of _GuideRefs."""
382
383  def __init__(self):
384    self.index = {}
385    py_guide_parser.PyGuideParser.__init__(self)
386
387  def process(self, full_path, base_name):
388    """Index a file, reading from `full_path`, with `base_name` as the link."""
389    self.full_path = full_path
390    self.base_name = base_name
391    self.title = None
392    self.section_title = None
393    self.section_tag = None
394    py_guide_parser.PyGuideParser.process(self, full_path)
395
396  def process_title(self, _, title):
397    if self.title is None:  # only use the first title
398      self.title = title
399
400  def process_section(self, _, section_title, tag):
401    self.section_title = section_title
402    self.section_tag = tag
403
404  def process_line(self, _, line):
405    """Index the file and section of each `symbol` reference."""
406    for match in parser.AUTO_REFERENCE_RE.finditer(line):
407      val = self.index.get(match.group(1), [])
408      val.append(
409          _GuideRef(self.base_name, self.title, self.section_title,
410                    self.section_tag))
411      self.index[match.group(1)] = val
412
413
414def _build_guide_index(guide_src_dir):
415  """Return dict: symbol name -> _GuideRef from the files in `guide_src_dir`."""
416  index_generator = _GenerateGuideIndex()
417  if os.path.exists(guide_src_dir):
418    for full_path, base_name in py_guide_parser.md_files_in_dir(guide_src_dir):
419      index_generator.process(full_path, base_name)
420  return index_generator.index
421
422
423class _UpdateTags(py_guide_parser.PyGuideParser):
424  """Rewrites a Python guide so that each section has an explicit id tag.
425
426  "section" here refers to blocks delimited by second level headings.
427  """
428
429  def process_section(self, line_number, section_title, tag):
430    self.replace_line(line_number, '<h2 id="%s">%s</h2>' % (tag, section_title))
431
432
433def update_id_tags_inplace(src_dir):
434  """Set explicit ids on all second-level headings to ensure back-links work.
435
436  Args:
437    src_dir: The directory of md-files to convert (inplace).
438  """
439  tag_updater = _UpdateTags()
440
441  for dirpath, _, filenames in os.walk(src_dir):
442    for base_name in filenames:
443      if not base_name.endswith('.md'):
444        continue
445      full_path = os.path.join(src_dir, dirpath, base_name)
446
447      # Tag updater loads the file, makes the replacements, and returns the
448      # modified file contents
449      content = tag_updater.process(full_path)
450      with open(full_path, 'w') as f:
451        f.write(content)
452
453
454EXCLUDED = set(['__init__.py', 'OWNERS', 'README.txt'])
455
456
457def replace_refs(src_dir,
458                 output_dir,
459                 reference_resolver,
460                 file_pattern='*.md',
461                 api_docs_relpath='api_docs'):
462  """Fix @{} references in all files under `src_dir` matching `file_pattern`.
463
464  A matching directory structure, with the modified files is
465  written to `output_dir`.
466
467  `{"__init__.py","OWNERS","README.txt"}` are skipped.
468
469  Files not matching `file_pattern` (using `fnmatch`) are copied with no change.
470
471  Also, files in the `api_guides/python` directory get explicit ids set on all
472  heading-2s to ensure back-links work.
473
474  Args:
475    src_dir: The directory to convert files from.
476    output_dir: The root directory to write the resulting files to.
477    reference_resolver: A `parser.ReferenceResolver` to make the replacements.
478    file_pattern: Only replace references in files matching file_patters,
479      using fnmatch. Non-matching files are copied unchanged.
480    api_docs_relpath: Relative-path string to the api_docs, from the src_dir.
481  """
482  # Iterate through all the source files and process them.
483  for dirpath, _, filenames in os.walk(src_dir):
484    depth = os.path.relpath(src_dir, start=dirpath)
485    # How to get from `dirpath` to api_docs/python/
486    relative_path_to_root = os.path.join(depth, api_docs_relpath, 'python')
487
488    # Make the directory under output_dir.
489    new_dir = os.path.join(output_dir,
490                           os.path.relpath(path=dirpath, start=src_dir))
491    if not os.path.exists(new_dir):
492      os.makedirs(new_dir)
493
494    for base_name in filenames:
495      if base_name in EXCLUDED:
496        continue
497      full_in_path = os.path.join(dirpath, base_name)
498
499      # Set the `current_doc_full_name` so bad files can be reported on errors.
500      reference_resolver.current_doc_full_name = full_in_path
501
502      suffix = os.path.relpath(path=full_in_path, start=src_dir)
503      full_out_path = os.path.join(output_dir, suffix)
504      # Copy files that do not match the file_pattern, unmodified.
505      if not fnmatch.fnmatch(base_name, file_pattern):
506        if full_in_path != full_out_path:
507          shutil.copyfile(full_in_path, full_out_path)
508        continue
509
510      with open(full_in_path, 'rb') as f:
511        content = f.read().decode('utf-8')
512
513      content = reference_resolver.replace_references(content,
514                                                      relative_path_to_root)
515      with open(full_out_path, 'wb') as f:
516        f.write(content.encode('utf-8'))
517
518
519class DocGenerator(object):
520  """Main entry point for generating docs."""
521
522  def __init__(self):
523    self.argument_parser = argparse.ArgumentParser()
524    self._py_modules = None
525    self._private_map = _get_default_private_map()
526    self._do_not_descend_map = _get_default_do_not_descend_map()
527    self.yaml_toc = True
528
529    self.argument_parser.add_argument(
530        '--no_search_hints',
531        dest='search_hints',
532        action='store_false',
533        default=True)
534
535    self.argument_parser.add_argument(
536        '--site_api_path',
537        type=str, default='',
538        help='The path from the site-root to api_docs'
539             'directory for this project')
540
541    self.argument_parser.add_argument(
542        '--api_cache_out_path',
543        type=str,
544        default=None,
545        help='Path to store a json-serialized api-index, so links can be '
546        'inserted into docs without rebuilding the api_docs')
547
548  def add_output_dir_argument(self):
549    self.argument_parser.add_argument(
550        '--output_dir',
551        type=str,
552        default=None,
553        required=True,
554        help='Directory to write docs to.')
555
556  def add_src_dir_argument(self):
557    self.argument_parser.add_argument(
558        '--src_dir',
559        type=str,
560        default=tempfile.mkdtemp(),
561        required=False,
562        help='Optional directory of source docs to add api_docs links to')
563
564  def add_base_dir_argument(self, default_base_dir):
565    self.argument_parser.add_argument(
566        '--base_dir',
567        type=str,
568        default=default_base_dir,
569        help='Base directory to strip from file names referenced in docs.')
570
571  def parse_known_args(self):
572    flags, _ = self.argument_parser.parse_known_args()
573    return flags
574
575  def add_to_private_map(self, d):
576    add_dict_to_dict(d, self._private_map)
577
578  def add_to_do_not_descend_map(self, d):
579    add_dict_to_dict(d, self._do_not_descend_map)
580
581  def set_private_map(self, d):
582    self._private_map = d
583
584  def set_do_not_descend_map(self, d):
585    self._do_not_descend_map = d
586
587  def set_py_modules(self, py_modules):
588    self._py_modules = py_modules
589
590  def py_module_names(self):
591    if self._py_modules is None:
592      raise RuntimeError(
593          'Must call set_py_modules() before running py_module_names().')
594    return [name for (name, _) in self._py_modules]
595
596  def make_reference_resolver(self, visitor, doc_index):
597    return parser.ReferenceResolver.from_visitor(
598        visitor, doc_index, py_module_names=self.py_module_names())
599
600  def make_parser_config(self, visitor, reference_resolver, guide_index,
601                         base_dir):
602    return parser.ParserConfig(
603        reference_resolver=reference_resolver,
604        duplicates=visitor.duplicates,
605        duplicate_of=visitor.duplicate_of,
606        tree=visitor.tree,
607        index=visitor.index,
608        reverse_index=visitor.reverse_index,
609        guide_index=guide_index,
610        base_dir=base_dir)
611
612  def run_extraction(self):
613    return extract(self._py_modules, self._private_map,
614                   self._do_not_descend_map)
615
616  def build(self, flags):
617    """Build all the docs.
618
619    This produces two outputs
620
621    python api docs:
622
623      * generated from modules set with `set_py_modules`.
624      * written to '{FLAGS.output_dir}/api_docs/python/'
625
626    non-api docs:
627
628      * Everything in '{FLAGS.src_dir}' is copied to '{FLAGS.output_dir}'.
629      * '@{}' references in '.md' files are replaced with links.
630      * '.md' files under 'api_guides/python' have explicit ids set for their
631        second level headings.
632
633    Args:
634      flags:
635        * src_dir: Where to fetch the non-api-docs.
636        * base_dir: Base of the docs directory (Used to build correct
637          relative links).
638        * output_dir: Where to write the resulting docs.
639
640    Returns:
641      The number of errors encountered while processing.
642    """
643    # Extract the python api from the _py_modules
644    doc_index = build_doc_index(flags.src_dir)
645    visitor = self.run_extraction()
646    reference_resolver = self.make_reference_resolver(visitor, doc_index)
647
648    if getattr(flags, 'api_cache_out_path', None):
649      reference_resolver.to_json_file(flags.api_cache_out_path)
650
651    # Build the guide_index for the api_docs back links.
652    root_title = getattr(flags, 'root_title', 'TensorFlow')
653    guide_index = _build_guide_index(
654        os.path.join(flags.src_dir, 'api_guides/python'))
655
656    # Write the api docs.
657    parser_config = self.make_parser_config(visitor, reference_resolver,
658                                            guide_index, flags.base_dir)
659    output_dir = os.path.join(flags.output_dir, 'api_docs/python')
660
661    write_docs(
662        output_dir,
663        parser_config,
664        yaml_toc=self.yaml_toc,
665        root_title=root_title,
666        search_hints=getattr(flags, 'search_hints', True),
667        site_api_path=getattr(flags, 'site_api_path', ''))
668
669    # Replace all the @{} references in files under `FLAGS.src_dir`
670    replace_refs(flags.src_dir, flags.output_dir, reference_resolver, '*.md')
671    # Fix the tags in the guide dir.
672    guide_dir = os.path.join(flags.output_dir, 'api_guides/python')
673    if os.path.exists(guide_dir):
674      update_id_tags_inplace(guide_dir)
675
676    # Report all errors found by the reference resolver, and return the error
677    # code.
678    parser_config.reference_resolver.log_errors()
679
680    return parser_config.reference_resolver.num_errors()
681