1#!/usr/bin/env python3 2 3from argparse import ArgumentParser 4import os 5from pathlib import Path 6from re import search 7from shutil import copyfile, rmtree 8from sys import exit 9 10""" 11Script which takes in Dackka docs and produces offline docs with CSS and relative links. 12Run `python3 offlinify_dackka_docs.py --help` for argument descriptions. 13""" 14 15SCRIPT_PATH = Path(__file__).parent.absolute() 16REL_PATH_TO_DOCS = '../../../../out/androidx/docs-tip-of-tree/build' 17DEFAULT_INPUT = os.path.abspath(os.path.join(SCRIPT_PATH, REL_PATH_TO_DOCS, 'docs')) 18DEFAULT_OUTPUT = os.path.abspath(os.path.join(SCRIPT_PATH, REL_PATH_TO_DOCS, 'offlineDocs')) 19REL_PATH_TO_LIBRARIES = 'reference/kotlin/androidx' 20STYLE_FILENAME = 'style.css' 21CSS_SOURCE_PATH = os.path.join(SCRIPT_PATH, STYLE_FILENAME) 22PUBLISHED_DOCS_URL = 'https://developer.android.com' 23INDEX_PAGES = ['classes.html', 'packages.html'] 24 25""" 26Check environment and args, then create offline docs. 27""" 28def main(): 29 check_env() 30 31 args = parse_args() 32 input_path = check_input_path(args.input) 33 output_path = check_output_path(args.output) 34 library = check_library(args.library, input_path, output_path) 35 36 process_input(input_path, output_path, library) 37 38""" 39Error early if any system setup is missing. 40""" 41def check_env(): 42 try: 43 from bs4 import BeautifulSoup 44 except ModuleNotFoundError: 45 print('ERROR: This script requires beatifulsoup module `bs4` to run.') 46 print('Please install with pip or another package manager.') 47 exit(-1) 48 49""" 50Parses script args. 51""" 52def parse_args(): 53 parser = ArgumentParser( 54 description='Converts Dackka docs to an offline version by adding CSS, fixing links, and ' \ 55 'removing book.yaml templating.' 56 ) 57 parser.add_argument( 58 '--input', required=False, 59 help='Path to generated Dackka docs. This directory is expected to contain a `reference` ' \ 60 f'subdirectory. If no path is provided, {DEFAULT_INPUT} is used by default.') 61 parser.add_argument( 62 '--output', required=False, 63 help='Path to store output offline docs. If a directory already exists at this path, it will' \ 64 f' be deleted. If no path is provided, {DEFAULT_OUTPUT} is used by default.' 65 ) 66 parser.add_argument( 67 '--library', required=False, 68 help='Specific androidx library to convert docs for. Docs for this library are expected to be' \ 69 f' in a subdirectory of `{REL_PATH_TO_LIBRARIES}` within the input path. '\ 70 'If no library is provided, docs for all libraries are converted to offline mode.' 71 ) 72 return parser.parse_args() 73 74""" 75Verify the provided input arg is a valid directory. 76""" 77def check_input_path(path): 78 if path is None: 79 if not os.path.exists(DEFAULT_INPUT): 80 print(f'ERROR: Default input path `{DEFAULT_INPUT}` does not exist. Generate docs by running') 81 print(' ./gradlew docs') 82 exit(-1) 83 return DEFAULT_INPUT 84 85 path = os.path.normpath(path) 86 if not os.path.exists(path): 87 print(f'ERROR: Provided input path `{path}` does not exist.') 88 exit(-1) 89 90 if not os.path.isdir(path): 91 print(f'ERROR: Provided input path `{path} does not point to a directory.') 92 exit(-1) 93 94 return path 95 96""" 97Verifies the output arg by creating a directory at the path, removing existing directory if needed. 98""" 99def check_output_path(path): 100 if path is None: 101 path = DEFAULT_OUTPUT 102 103 if os.path.exists(path): 104 if os.path.isdir(path): 105 print(f'Removing existing directory at output path {path}') 106 rmtree(path) 107 else: 108 print(f'ERROR: output path {path} exists but is not a directory.') 109 exit(-1) 110 111 os.makedirs(path) 112 return path 113 114""" 115Verify the library arg by ensuring the input docs directory exists and making output directories. 116""" 117def check_library(library, input_path, output_path): 118 if library is None: 119 return None 120 121 rel_library_path = os.path.join(REL_PATH_TO_LIBRARIES, library) 122 input_library_path = os.path.join(input_path, rel_library_path) 123 124 if not os.path.exists(input_library_path): 125 print(f'ERROR: Docs directory for library {library} could not be found at') 126 print(f' {input_library_path}') 127 exit(-1) 128 129 os.makedirs(os.path.join(output_path, rel_library_path)) 130 131 return rel_library_path 132 133""" 134Fixes all HTML files in the input directory (or just the pages for the library if a specific one is 135provided) and puts the new pages in the output directory. 136""" 137def process_input(input_path, output_path, library): 138 css_path = os.path.join(output_path, STYLE_FILENAME) 139 copyfile(CSS_SOURCE_PATH, css_path) 140 141 # Go through just the subdirectory for the specific library, or for the entire input directory. 142 path_to_walk = os.path.join(input_path, library) if library is not None else input_path 143 for root, dirs, files in os.walk(path_to_walk): 144 rel_root = os.path.relpath(root, start=input_path) 145 output_root = os.path.join(output_path, rel_root) 146 147 for name in dirs: 148 os.mkdir(os.path.join(output_root, name)) 149 150 for name in files: 151 _, ext = os.path.splitext(name) 152 input_file_path = os.path.join(root, name) 153 output_file_path = os.path.join(output_root, name) 154 rel_css_path = os.path.relpath(css_path, start=output_root) 155 if ext == '.html': 156 fix_html_file(input_file_path, input_path, output_file_path, rel_css_path, library, False) 157 158 if library is not None: 159 # In addition to the library pages, copy over package and class index pages. 160 base_output_dir = os.path.join(output_path, REL_PATH_TO_LIBRARIES) 161 rel_css_path = os.path.relpath(css_path, start = base_output_dir) 162 for file in INDEX_PAGES: 163 input_file_path = os.path.join(input_path, REL_PATH_TO_LIBRARIES, file) 164 if os.path.exists(input_file_path): 165 output_file_path = os.path.join(base_output_dir, file) 166 fix_html_file(input_file_path, input_path, output_file_path, rel_css_path, library, True) 167 168""" 169Performs all fixes to the input HTML file and saves the resulting HTML at the output path. 170""" 171def fix_html_file(file_path, root_input_path, output_file_path, css_path, library, index_page): 172 from bs4 import BeautifulSoup 173 174 with open(file_path, 'r') as f: 175 parsed = BeautifulSoup(f, 'html.parser') 176 177 if index_page: 178 filter_index(parsed, library) 179 180 remove_book_template_strings(parsed) 181 add_css(parsed, css_path) 182 fix_links(parsed, file_path, root_input_path, library) 183 184 with open(output_file_path, 'w') as f: 185 f.write(str(parsed)) 186 187""" 188Removes template strings containing book.yaml information for DAC. 189""" 190def remove_book_template_strings(page): 191 # page.find_all wasn't working here because the template strings are not within HTML tags. 192 for element in page.head.contents: 193 if search('{%.*%}', element.text): 194 element.extract() 195 196""" 197Replace any CSS links with a correct link. 198""" 199def add_css(page, relative_css): 200 for tag in page.find_all('link', rel='stylesheet'): 201 tag.extract() 202 203 new_tag = page.new_tag('link', rel='stylesheet', href=relative_css) 204 page.head.append(new_tag) 205 206""" 207Convert links to other pages in the generated docs into relative paths to work offline. 208If docs are being converted for just one library, links for docs outside the library are converted 209to a link to the published version. 210""" 211def fix_links(page, page_path, root_input_path, library): 212 for a_tag in page.find_all('a'): 213 original_path = a_tag.get('href') 214 if original_path is None: 215 continue 216 if not original_path.startswith('/'): 217 continue 218 lstrip_original_path = original_path.lstrip('/') 219 220 if page_should_be_linked(lstrip_original_path, library): 221 abs_path = os.path.join(root_input_path, lstrip_original_path) 222 abs_dir = os.path.dirname(abs_path) 223 # Make sure the link will work -- this uses the directory because the basename of the path 224 # might end with something like `Class.html#function` 225 if os.path.exists(abs_dir): 226 rel_path = os.path.relpath(abs_path, start=os.path.dirname(page_path)) 227 a_tag['href'] = rel_path 228 continue 229 230 # The link isn't in this library or doesn't exist locally, use the published page. 231 a_tag['href'] = PUBLISHED_DOCS_URL + original_path 232 233""" 234Determines whether to link to the local version of the page at path. 235""" 236def page_should_be_linked(path, library): 237 # All library docs are generated, so all pages are linked. 238 if library is None: 239 return True 240 241 # The index pages are the only ones outside of the library dir that will exist. 242 if os.path.basename(path) in INDEX_PAGES: 243 return True 244 245 # Check if the page is in the library dir. 246 common_path = os.path.commonpath([library, path]) 247 return common_path == library 248 249""" 250For the class and package index pages, removes all rows which link outside the library. 251""" 252def filter_index(page, library): 253 for row in page.find_all('tr'): 254 link = row.a.get('href') 255 common_path = os.path.commonpath([link.lstrip('/'), library]) 256 if link is not None and common_path != library: 257 row.extract() 258 259if __name__ == '__main__': 260 main() 261