1#!/usr/bin/env python3
2
3from argparse import ArgumentParser
4import os
5from pathlib import Path
6from re import search
7from shutil import copyfile, rmtree
8from sys import exit
9
10"""
11Script which takes in Dackka docs and produces offline docs with CSS and relative links.
12Run `python3 offlinify_dackka_docs.py --help` for argument descriptions.
13"""
14
15SCRIPT_PATH = Path(__file__).parent.absolute()
16REL_PATH_TO_DOCS = '../../../../out/androidx/docs-tip-of-tree/build'
17DEFAULT_INPUT  = os.path.abspath(os.path.join(SCRIPT_PATH, REL_PATH_TO_DOCS, 'docs'))
18DEFAULT_OUTPUT = os.path.abspath(os.path.join(SCRIPT_PATH, REL_PATH_TO_DOCS, 'offlineDocs'))
19REL_PATH_TO_LIBRARIES = 'reference/kotlin/androidx'
20STYLE_FILENAME = 'style.css'
21CSS_SOURCE_PATH = os.path.join(SCRIPT_PATH, STYLE_FILENAME)
22PUBLISHED_DOCS_URL = 'https://developer.android.com'
23INDEX_PAGES = ['classes.html', 'packages.html']
24
25"""
26Check environment and args, then create offline docs.
27"""
28def main():
29  check_env()
30
31  args = parse_args()
32  input_path = check_input_path(args.input)
33  output_path = check_output_path(args.output)
34  library = check_library(args.library, input_path, output_path)
35
36  process_input(input_path, output_path, library)
37
38"""
39Error early if any system setup is missing.
40"""
41def check_env():
42  try:
43    from bs4 import BeautifulSoup
44  except ModuleNotFoundError:
45    print('ERROR: This script requires beatifulsoup module `bs4` to run.')
46    print('Please install with pip or another package manager.')
47    exit(-1)
48
49"""
50Parses script args.
51"""
52def parse_args():
53  parser = ArgumentParser(
54    description='Converts Dackka docs to an offline version by adding CSS, fixing links, and ' \
55        'removing book.yaml templating.'
56  )
57  parser.add_argument(
58    '--input', required=False,
59    help='Path to generated Dackka docs. This directory is expected to contain a `reference` ' \
60         f'subdirectory. If no path is provided, {DEFAULT_INPUT} is used by default.')
61  parser.add_argument(
62    '--output', required=False,
63    help='Path to store output offline docs. If a directory already exists at this path, it will' \
64          f' be deleted. If no path is provided, {DEFAULT_OUTPUT} is used by default.'
65  )
66  parser.add_argument(
67    '--library', required=False,
68    help='Specific androidx library to convert docs for. Docs for this library are expected to be' \
69          f' in a subdirectory of `{REL_PATH_TO_LIBRARIES}` within the input path. '\
70          'If no library is provided, docs for all libraries are converted to offline mode.'
71  )
72  return parser.parse_args()
73
74"""
75Verify the provided input arg is a valid directory.
76"""
77def check_input_path(path):
78  if path is None:
79    if not os.path.exists(DEFAULT_INPUT):
80      print(f'ERROR: Default input path `{DEFAULT_INPUT}` does not exist. Generate docs by running')
81      print('    ./gradlew docs')
82      exit(-1)
83    return DEFAULT_INPUT
84
85  path = os.path.normpath(path)
86  if not os.path.exists(path):
87    print(f'ERROR: Provided input path `{path}` does not exist.')
88    exit(-1)
89
90  if not os.path.isdir(path):
91    print(f'ERROR: Provided input path `{path} does not point to a directory.')
92    exit(-1)
93
94  return path
95
96"""
97Verifies the output arg by creating a directory at the path, removing existing directory if needed.
98"""
99def check_output_path(path):
100  if path is None:
101    path = DEFAULT_OUTPUT
102
103  if os.path.exists(path):
104    if os.path.isdir(path):
105      print(f'Removing existing directory at output path {path}')
106      rmtree(path)
107    else:
108      print(f'ERROR: output path {path} exists but is not a directory.')
109      exit(-1)
110
111  os.makedirs(path)
112  return path
113
114"""
115Verify the library arg by ensuring the input docs directory exists and making output directories.
116"""
117def check_library(library, input_path, output_path):
118  if library is None:
119    return None
120
121  rel_library_path = os.path.join(REL_PATH_TO_LIBRARIES, library)
122  input_library_path = os.path.join(input_path, rel_library_path)
123
124  if not os.path.exists(input_library_path):
125    print(f'ERROR: Docs directory for library {library} could not be found at')
126    print(f'    {input_library_path}')
127    exit(-1)
128
129  os.makedirs(os.path.join(output_path, rel_library_path))
130
131  return rel_library_path
132
133"""
134Fixes all HTML files in the input directory (or just the pages for the library if a specific one is
135provided) and puts the new pages in the output directory.
136"""
137def process_input(input_path, output_path, library):
138  css_path = os.path.join(output_path, STYLE_FILENAME)
139  copyfile(CSS_SOURCE_PATH, css_path)
140
141  # Go through just the subdirectory for the specific library, or for the entire input directory.
142  path_to_walk = os.path.join(input_path, library) if library is not None else input_path
143  for root, dirs, files in os.walk(path_to_walk):
144    rel_root = os.path.relpath(root, start=input_path)
145    output_root = os.path.join(output_path, rel_root)
146
147    for name in dirs:
148      os.mkdir(os.path.join(output_root, name))
149
150    for name in files:
151      _, ext = os.path.splitext(name)
152      input_file_path = os.path.join(root, name)
153      output_file_path = os.path.join(output_root, name)
154      rel_css_path = os.path.relpath(css_path, start=output_root)
155      if ext == '.html':
156        fix_html_file(input_file_path, input_path, output_file_path, rel_css_path, library, False)
157
158  if library is not None:
159    # In addition to the library pages, copy over package and class index pages.
160    base_output_dir = os.path.join(output_path, REL_PATH_TO_LIBRARIES)
161    rel_css_path = os.path.relpath(css_path, start = base_output_dir)
162    for file in INDEX_PAGES:
163      input_file_path = os.path.join(input_path, REL_PATH_TO_LIBRARIES, file)
164      if os.path.exists(input_file_path):
165        output_file_path = os.path.join(base_output_dir, file)
166        fix_html_file(input_file_path, input_path, output_file_path, rel_css_path, library, True)
167
168"""
169Performs all fixes to the input HTML file and saves the resulting HTML at the output path.
170"""
171def fix_html_file(file_path, root_input_path, output_file_path, css_path, library, index_page):
172  from bs4 import BeautifulSoup
173
174  with open(file_path, 'r') as f:
175    parsed = BeautifulSoup(f, 'html.parser')
176
177  if index_page:
178    filter_index(parsed, library)
179
180  remove_book_template_strings(parsed)
181  add_css(parsed, css_path)
182  fix_links(parsed, file_path, root_input_path, library)
183
184  with open(output_file_path, 'w') as f:
185    f.write(str(parsed))
186
187"""
188Removes template strings containing book.yaml information for DAC.
189"""
190def remove_book_template_strings(page):
191  # page.find_all wasn't working here because the template strings are not within HTML tags.
192  for element in page.head.contents:
193    if search('{%.*%}', element.text):
194      element.extract()
195
196"""
197Replace any CSS links with a correct link.
198"""
199def add_css(page, relative_css):
200  for tag in page.find_all('link', rel='stylesheet'):
201    tag.extract()
202
203  new_tag = page.new_tag('link', rel='stylesheet', href=relative_css)
204  page.head.append(new_tag)
205
206"""
207Convert links to other pages in the generated docs into relative paths to work offline.
208If docs are being converted for just one library, links for docs outside the library are converted
209to a link to the published version.
210"""
211def fix_links(page, page_path, root_input_path, library):
212  for a_tag in page.find_all('a'):
213    original_path = a_tag.get('href')
214    if original_path is None:
215      continue
216    if not original_path.startswith('/'):
217      continue
218    lstrip_original_path = original_path.lstrip('/')
219
220    if page_should_be_linked(lstrip_original_path, library):
221      abs_path = os.path.join(root_input_path, lstrip_original_path)
222      abs_dir = os.path.dirname(abs_path)
223      # Make sure the link will work -- this uses the directory because the basename of the path
224      # might end with something like `Class.html#function`
225      if os.path.exists(abs_dir):
226        rel_path = os.path.relpath(abs_path, start=os.path.dirname(page_path))
227        a_tag['href'] = rel_path
228        continue
229
230    # The link isn't in this library or doesn't exist locally, use the published page.
231    a_tag['href'] = PUBLISHED_DOCS_URL + original_path
232
233"""
234Determines whether to link to the local version of the page at path.
235"""
236def page_should_be_linked(path, library):
237  # All library docs are generated, so all pages are linked.
238  if library is None:
239    return True
240
241  # The index pages are the only ones outside of the library dir that will exist.
242  if os.path.basename(path) in INDEX_PAGES:
243    return True
244
245  # Check if the page is in the library dir.
246  common_path = os.path.commonpath([library, path])
247  return common_path == library
248
249"""
250For the class and package index pages, removes all rows which link outside the library.
251"""
252def filter_index(page, library):
253  for row in page.find_all('tr'):
254    link = row.a.get('href')
255    common_path = os.path.commonpath([link.lstrip('/'), library])
256    if link is not None and common_path != library:
257      row.extract()
258
259if __name__ == '__main__':
260  main()
261