1#!/usr/bin/env python 2 3# 4# updateDocumentToC.py 5# 6# Insert table of contents at top of Catch markdown documents. 7# 8# This script is distributed under the GNU General Public License v3.0 9# 10# It is based on markdown-toclify version 1.7.1 by Sebastian Raschka, 11# https://github.com/rasbt/markdown-toclify 12# 13 14from __future__ import print_function 15 16import argparse 17import glob 18import os 19import re 20import sys 21 22from scriptCommon import catchPath 23 24# Configuration: 25 26minTocEntries = 4 27 28headingExcludeDefault = [1,3,4,5] # use level 2 headers for at default 29headingExcludeRelease = [1,3,4,5] # use level 1 headers for release-notes.md 30 31documentsDefault = os.path.join(os.path.relpath(catchPath), 'docs/*.md') 32releaseNotesName = 'release-notes.md' 33 34contentTitle = '**Contents**' 35contentLineNo = 4 36contentLineNdx = contentLineNo - 1 37 38# End configuration 39 40VALIDS = '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_-&' 41 42def readLines(in_file): 43 """Returns a list of lines from a input markdown file.""" 44 45 with open(in_file, 'r') as inf: 46 in_contents = inf.read().split('\n') 47 return in_contents 48 49def removeLines(lines, remove=('[[back to top]', '<a class="mk-toclify"')): 50 """Removes existing [back to top] links and <a id> tags.""" 51 52 if not remove: 53 return lines[:] 54 55 out = [] 56 for l in lines: 57 if l.startswith(remove): 58 continue 59 out.append(l) 60 return out 61 62def removeToC(lines): 63 """Removes existing table of contents starting at index contentLineNdx.""" 64 if not lines[contentLineNdx ].startswith(contentTitle): 65 return lines[:] 66 67 result_top = lines[:contentLineNdx] 68 69 pos = contentLineNdx + 1 70 while lines[pos].startswith('['): 71 pos = pos + 1 72 73 result_bottom = lines[pos + 1:] 74 75 return result_top + result_bottom 76 77def dashifyHeadline(line): 78 """ 79 Takes a header line from a Markdown document and 80 returns a tuple of the 81 '#'-stripped version of the head line, 82 a string version for <a id=''></a> anchor tags, 83 and the level of the headline as integer. 84 E.g., 85 >>> dashifyHeadline('### some header lvl3') 86 ('Some header lvl3', 'some-header-lvl3', 3) 87 88 """ 89 stripped_right = line.rstrip('#') 90 stripped_both = stripped_right.lstrip('#') 91 level = len(stripped_right) - len(stripped_both) 92 stripped_wspace = stripped_both.strip() 93 94 # GitHub's sluggification works in an interesting way 95 # 1) '+', '/', '(', ')' and so on are just removed 96 # 2) spaces are converted into '-' directly 97 # 3) multiple -- are not collapsed 98 99 dashified = '' 100 for c in stripped_wspace: 101 if c in VALIDS: 102 dashified += c.lower() 103 elif c.isspace(): 104 dashified += '-' 105 else: 106 # Unknown symbols are just removed 107 continue 108 109 return [stripped_wspace, dashified, level] 110 111def tagAndCollect(lines, id_tag=True, back_links=False, exclude_h=None): 112 """ 113 Gets headlines from the markdown document and creates anchor tags. 114 115 Keyword arguments: 116 lines: a list of sublists where every sublist 117 represents a line from a Markdown document. 118 id_tag: if true, creates inserts a the <a id> tags (not req. by GitHub) 119 back_links: if true, adds "back to top" links below each headline 120 exclude_h: header levels to exclude. E.g., [2, 3] 121 excludes level 2 and 3 headings. 122 123 Returns a tuple of 2 lists: 124 1st list: 125 A modified version of the input list where 126 <a id="some-header"></a> anchor tags where inserted 127 above the header lines (if github is False). 128 129 2nd list: 130 A list of 3-value sublists, where the first value 131 represents the heading, the second value the string 132 that was inserted assigned to the IDs in the anchor tags, 133 and the third value is an integer that represents the headline level. 134 E.g., 135 [['some header lvl3', 'some-header-lvl3', 3], ...] 136 137 """ 138 out_contents = [] 139 headlines = [] 140 for l in lines: 141 saw_headline = False 142 143 orig_len = len(l) 144 l_stripped = l.lstrip() 145 146 if l_stripped.startswith(('# ', '## ', '### ', '#### ', '##### ', '###### ')): 147 148 # comply with new markdown standards 149 150 # not a headline if '#' not followed by whitespace '##no-header': 151 if not l.lstrip('#').startswith(' '): 152 continue 153 # not a headline if more than 6 '#': 154 if len(l) - len(l.lstrip('#')) > 6: 155 continue 156 # headers can be indented by at most 3 spaces: 157 if orig_len - len(l_stripped) > 3: 158 continue 159 160 # ignore empty headers 161 if not set(l) - {'#', ' '}: 162 continue 163 164 saw_headline = True 165 dashified = dashifyHeadline(l) 166 167 if not exclude_h or not dashified[-1] in exclude_h: 168 if id_tag: 169 id_tag = '<a class="mk-toclify" id="%s"></a>'\ 170 % (dashified[1]) 171 out_contents.append(id_tag) 172 headlines.append(dashified) 173 174 out_contents.append(l) 175 if back_links and saw_headline: 176 out_contents.append('[[back to top](#table-of-contents)]') 177 return out_contents, headlines 178 179def positioningHeadlines(headlines): 180 """ 181 Strips unnecessary whitespaces/tabs if first header is not left-aligned 182 """ 183 left_just = False 184 for row in headlines: 185 if row[-1] == 1: 186 left_just = True 187 break 188 if not left_just: 189 for row in headlines: 190 row[-1] -= 1 191 return headlines 192 193def createToc(headlines, hyperlink=True, top_link=False, no_toc_header=False): 194 """ 195 Creates the table of contents from the headline list 196 that was returned by the tagAndCollect function. 197 198 Keyword Arguments: 199 headlines: list of lists 200 e.g., ['Some header lvl3', 'some-header-lvl3', 3] 201 hyperlink: Creates hyperlinks in Markdown format if True, 202 e.g., '- [Some header lvl1](#some-header-lvl1)' 203 top_link: if True, add a id tag for linking the table 204 of contents itself (for the back-to-top-links) 205 no_toc_header: suppresses TOC header if True. 206 207 Returns a list of headlines for a table of contents 208 in Markdown format, 209 e.g., [' - [Some header lvl3](#some-header-lvl3)', ...] 210 211 """ 212 processed = [] 213 if not no_toc_header: 214 if top_link: 215 processed.append('<a class="mk-toclify" id="table-of-contents"></a>\n') 216 processed.append(contentTitle + '<br>') 217 218 for line in headlines: 219 if hyperlink: 220 item = '[%s](#%s)' % (line[0], line[1]) 221 else: 222 item = '%s- %s' % ((line[2]-1)*' ', line[0]) 223 processed.append(item + '<br>') 224 processed.append('\n') 225 return processed 226 227def buildMarkdown(toc_headlines, body, spacer=0, placeholder=None): 228 """ 229 Returns a string with the Markdown output contents incl. 230 the table of contents. 231 232 Keyword arguments: 233 toc_headlines: lines for the table of contents 234 as created by the createToc function. 235 body: contents of the Markdown file including 236 ID-anchor tags as returned by the 237 tagAndCollect function. 238 spacer: Adds vertical space after the table 239 of contents. Height in pixels. 240 placeholder: If a placeholder string is provided, the placeholder 241 will be replaced by the TOC instead of inserting the TOC at 242 the top of the document 243 244 """ 245 if spacer: 246 spacer_line = ['\n<div style="height:%spx;"></div>\n' % (spacer)] 247 toc_markdown = "\n".join(toc_headlines + spacer_line) 248 else: 249 toc_markdown = "\n".join(toc_headlines) 250 251 if placeholder: 252 body_markdown = "\n".join(body) 253 markdown = body_markdown.replace(placeholder, toc_markdown) 254 else: 255 body_markdown_p1 = "\n".join(body[:contentLineNdx ]) + '\n' 256 body_markdown_p2 = "\n".join(body[ contentLineNdx:]) 257 markdown = body_markdown_p1 + toc_markdown + body_markdown_p2 258 259 return markdown 260 261def outputMarkdown(markdown_cont, output_file): 262 """ 263 Writes to an output file if `outfile` is a valid path. 264 265 """ 266 if output_file: 267 with open(output_file, 'w') as out: 268 out.write(markdown_cont) 269 270def markdownToclify( 271 input_file, 272 output_file=None, 273 min_toc_len=2, 274 github=False, 275 back_to_top=False, 276 nolink=False, 277 no_toc_header=False, 278 spacer=0, 279 placeholder=None, 280 exclude_h=None): 281 """ Function to add table of contents to markdown files. 282 283 Parameters 284 ----------- 285 input_file: str 286 Path to the markdown input file. 287 288 output_file: str (default: None) 289 Path to the markdown output file. 290 291 min_toc_len: int (default: 2) 292 Miniumum number of entries to create a table of contents for. 293 294 github: bool (default: False) 295 Uses GitHub TOC syntax if True. 296 297 back_to_top: bool (default: False) 298 Inserts back-to-top links below headings if True. 299 300 nolink: bool (default: False) 301 Creates the table of contents without internal links if True. 302 303 no_toc_header: bool (default: False) 304 Suppresses the Table of Contents header if True 305 306 spacer: int (default: 0) 307 Inserts horizontal space (in pixels) after the table of contents. 308 309 placeholder: str (default: None) 310 Inserts the TOC at the placeholder string instead 311 of inserting the TOC at the top of the document. 312 313 exclude_h: list (default None) 314 Excludes header levels, e.g., if [2, 3], ignores header 315 levels 2 and 3 in the TOC. 316 317 Returns 318 ----------- 319 changed: Boolean 320 True if the file has been updated, False otherwise. 321 322 """ 323 cleaned_contents = removeLines( 324 removeToC(readLines(input_file)), 325 remove=('[[back to top]', '<a class="mk-toclify"')) 326 327 processed_contents, raw_headlines = tagAndCollect( 328 cleaned_contents, 329 id_tag=not github, 330 back_links=back_to_top, 331 exclude_h=exclude_h) 332 333 # add table of contents? 334 if len(raw_headlines) < min_toc_len: 335 processed_headlines = [] 336 else: 337 leftjustified_headlines = positioningHeadlines(raw_headlines) 338 339 processed_headlines = createToc( 340 leftjustified_headlines, 341 hyperlink=not nolink, 342 top_link=not nolink and not github, 343 no_toc_header=no_toc_header) 344 345 if nolink: 346 processed_contents = cleaned_contents 347 348 cont = buildMarkdown( 349 toc_headlines=processed_headlines, 350 body=processed_contents, 351 spacer=spacer, 352 placeholder=placeholder) 353 354 if output_file: 355 outputMarkdown(cont, output_file) 356 357def isReleaseNotes(f): 358 return os.path.basename(f) == releaseNotesName 359 360def excludeHeadingsFor(f): 361 return headingExcludeRelease if isReleaseNotes(f) else headingExcludeDefault 362 363def updateSingleDocumentToC(input_file, min_toc_len, verbose=False): 364 """Add or update table of contents in specified file. Return 1 if file changed, 0 otherwise.""" 365 if verbose : 366 print( 'file: {}'.format(input_file)) 367 368 output_file = input_file + '.tmp' 369 370 markdownToclify( 371 input_file=input_file, 372 output_file=output_file, 373 min_toc_len=min_toc_len, 374 github=True, 375 back_to_top=False, 376 nolink=False, 377 no_toc_header=False, 378 spacer=False, 379 placeholder=False, 380 exclude_h=excludeHeadingsFor(input_file)) 381 382 # prevent race-condition (Python 3.3): 383 if sys.version_info >= (3, 3): 384 os.replace(output_file, input_file) 385 else: 386 os.remove(input_file) 387 os.rename(output_file, input_file) 388 389 return 1 390 391def updateDocumentToC(paths, min_toc_len, verbose): 392 """Add or update table of contents to specified paths. Return number of changed files""" 393 n = 0 394 for g in paths: 395 for f in glob.glob(g): 396 if os.path.isfile(f): 397 n = n + updateSingleDocumentToC(input_file=f, min_toc_len=min_toc_len, verbose=verbose) 398 return n 399 400def updateDocumentToCMain(): 401 """Add or update table of contents to specified paths.""" 402 403 parser = argparse.ArgumentParser( 404 description='Add or update table of contents in markdown documents.', 405 epilog="""""", 406 formatter_class=argparse.RawTextHelpFormatter) 407 408 parser.add_argument( 409 'Input', 410 metavar='file', 411 type=str, 412 nargs=argparse.REMAINDER, 413 help='files to process, at default: docs/*.md') 414 415 parser.add_argument( 416 '-v', '--verbose', 417 action='store_true', 418 help='report the name of the file being processed') 419 420 parser.add_argument( 421 '--min-toc-entries', 422 dest='minTocEntries', 423 default=minTocEntries, 424 type=int, 425 metavar='N', 426 help='the minimum number of entries to create a table of contents for [{default}]'.format(default=minTocEntries)) 427 428 parser.add_argument( 429 '--remove-toc', 430 action='store_const', 431 dest='minTocEntries', 432 const=99, 433 help='remove all tables of contents') 434 435 args = parser.parse_args() 436 437 paths = args.Input if args.Input else [documentsDefault] 438 439 changedFiles = updateDocumentToC(paths=paths, min_toc_len=args.minTocEntries, verbose=args.verbose) 440 441 if changedFiles > 0: 442 print( "Processed table of contents in " + str(changedFiles) + " file(s)" ) 443 else: 444 print( "No table of contents added or updated" ) 445 446if __name__ == '__main__': 447 updateDocumentToCMain() 448 449# end of file 450