• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1#!/usr/bin/env python3
2
3#
4# updateDocumentToC.py
5#
6# Insert table of contents at top of Catch markdown documents.
7#
8# This script is distributed under the GNU General Public License v3.0
9#
10# It is based on markdown-toclify version 1.7.1 by Sebastian Raschka,
11# https://github.com/rasbt/markdown-toclify
12#
13
14from  __future__  import print_function
15
16import argparse
17import glob
18import os
19import re
20import sys
21
22from scriptCommon import catchPath
23
24# Configuration:
25
26minTocEntries = 4
27
28headingExcludeDefault = [1,3,4,5]  # use level 2 headers for at default
29headingExcludeRelease = [1,3,4,5]  # use level 1 headers for release-notes.md
30
31documentsDefault = os.path.join(os.path.relpath(catchPath), 'docs/*.md')
32releaseNotesName = 'release-notes.md'
33
34contentTitle = '**Contents**'
35contentLineNo = 4
36contentLineNdx = contentLineNo - 1
37
38# End configuration
39
40VALIDS = '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_-&'
41
42def readLines(in_file):
43    """Returns a list of lines from a input markdown file."""
44
45    with open(in_file, 'r') as inf:
46        in_contents = inf.read().split('\n')
47    return in_contents
48
49def removeLines(lines, remove=('[[back to top]', '<a class="mk-toclify"')):
50    """Removes existing [back to top] links and <a id> tags."""
51
52    if not remove:
53        return lines[:]
54
55    out = []
56    for l in lines:
57        if l.startswith(remove):
58            continue
59        out.append(l)
60    return out
61
62def removeToC(lines):
63    """Removes existing table of contents starting at index contentLineNdx."""
64    if not lines[contentLineNdx ].startswith(contentTitle):
65        return lines[:]
66
67    result_top = lines[:contentLineNdx]
68
69    pos = contentLineNdx + 1
70    while lines[pos].startswith('['):
71        pos = pos + 1
72
73    result_bottom = lines[pos + 1:]
74
75    return result_top + result_bottom
76
77def dashifyHeadline(line):
78    """
79    Takes a header line from a Markdown document and
80    returns a tuple of the
81        '#'-stripped version of the head line,
82        a string version for <a id=''></a> anchor tags,
83        and the level of the headline as integer.
84    E.g.,
85    >>> dashifyHeadline('### some header lvl3')
86    ('Some header lvl3', 'some-header-lvl3', 3)
87
88    """
89    stripped_right = line.rstrip('#')
90    stripped_both = stripped_right.lstrip('#')
91    level = len(stripped_right) - len(stripped_both)
92    stripped_wspace = stripped_both.strip()
93
94    # GitHub's sluggification works in an interesting way
95    # 1) '+', '/', '(', ')' and so on are just removed
96    # 2) spaces are converted into '-' directly
97    # 3) multiple -- are not collapsed
98
99    dashified = ''
100    for c in stripped_wspace:
101        if c in VALIDS:
102            dashified += c.lower()
103        elif c.isspace():
104            dashified += '-'
105        else:
106            # Unknown symbols are just removed
107            continue
108
109    return [stripped_wspace, dashified, level]
110
111def tagAndCollect(lines, id_tag=True, back_links=False, exclude_h=None):
112    """
113    Gets headlines from the markdown document and creates anchor tags.
114
115    Keyword arguments:
116        lines: a list of sublists where every sublist
117            represents a line from a Markdown document.
118        id_tag: if true, creates inserts a the <a id> tags (not req. by GitHub)
119        back_links: if true, adds "back to top" links below each headline
120        exclude_h: header levels to exclude. E.g., [2, 3]
121            excludes level 2 and 3 headings.
122
123    Returns a tuple of 2 lists:
124        1st list:
125            A modified version of the input list where
126            <a id="some-header"></a> anchor tags where inserted
127            above the header lines (if github is False).
128
129        2nd list:
130            A list of 3-value sublists, where the first value
131            represents the heading, the second value the string
132            that was inserted assigned to the IDs in the anchor tags,
133            and the third value is an integer that represents the headline level.
134            E.g.,
135            [['some header lvl3', 'some-header-lvl3', 3], ...]
136
137    """
138    out_contents = []
139    headlines = []
140    for l in lines:
141        saw_headline = False
142
143        orig_len = len(l)
144        l_stripped = l.lstrip()
145
146        if l_stripped.startswith(('# ', '## ', '### ', '#### ', '##### ', '###### ')):
147
148            # comply with new markdown standards
149
150            # not a headline if '#' not followed by whitespace '##no-header':
151            if not l.lstrip('#').startswith(' '):
152                continue
153            # not a headline if more than 6 '#':
154            if len(l) - len(l.lstrip('#')) > 6:
155                continue
156            # headers can be indented by at most 3 spaces:
157            if orig_len - len(l_stripped) > 3:
158                continue
159
160            # ignore empty headers
161            if not set(l) - {'#', ' '}:
162                continue
163
164            saw_headline = True
165            dashified = dashifyHeadline(l)
166
167            if not exclude_h or not dashified[-1] in exclude_h:
168                if id_tag:
169                    id_tag = '<a class="mk-toclify" id="%s"></a>'\
170                              % (dashified[1])
171                    out_contents.append(id_tag)
172                headlines.append(dashified)
173
174        out_contents.append(l)
175        if back_links and saw_headline:
176            out_contents.append('[[back to top](#table-of-contents)]')
177    return out_contents, headlines
178
179def positioningHeadlines(headlines):
180    """
181    Strips unnecessary whitespaces/tabs if first header is not left-aligned
182    """
183    left_just = False
184    for row in headlines:
185        if row[-1] == 1:
186            left_just = True
187            break
188    if not left_just:
189        for row in headlines:
190            row[-1] -= 1
191    return headlines
192
193def createToc(headlines, hyperlink=True, top_link=False, no_toc_header=False):
194    """
195    Creates the table of contents from the headline list
196    that was returned by the tagAndCollect function.
197
198    Keyword Arguments:
199        headlines: list of lists
200            e.g., ['Some header lvl3', 'some-header-lvl3', 3]
201        hyperlink: Creates hyperlinks in Markdown format if True,
202            e.g., '- [Some header lvl1](#some-header-lvl1)'
203        top_link: if True, add a id tag for linking the table
204            of contents itself (for the back-to-top-links)
205        no_toc_header: suppresses TOC header if True.
206
207    Returns  a list of headlines for a table of contents
208    in Markdown format,
209    e.g., ['        - [Some header lvl3](#some-header-lvl3)', ...]
210
211    """
212    processed = []
213    if not no_toc_header:
214        if top_link:
215            processed.append('<a class="mk-toclify" id="table-of-contents"></a>\n')
216        processed.append(contentTitle + '<br>')
217
218    for line in headlines:
219        if hyperlink:
220            item = '[%s](#%s)' % (line[0], line[1])
221        else:
222            item = '%s- %s' % ((line[2]-1)*'    ', line[0])
223        processed.append(item + '<br>')
224    processed.append('\n')
225    return processed
226
227def buildMarkdown(toc_headlines, body, spacer=0, placeholder=None):
228    """
229    Returns a string with the Markdown output contents incl.
230    the table of contents.
231
232    Keyword arguments:
233        toc_headlines: lines for the table of contents
234            as created by the createToc function.
235        body: contents of the Markdown file including
236            ID-anchor tags as returned by the
237            tagAndCollect function.
238        spacer: Adds vertical space after the table
239            of contents. Height in pixels.
240        placeholder: If a placeholder string is provided, the placeholder
241            will be replaced by the TOC instead of inserting the TOC at
242            the top of the document
243
244    """
245    if spacer:
246        spacer_line = ['\n<div style="height:%spx;"></div>\n' % (spacer)]
247        toc_markdown = "\n".join(toc_headlines + spacer_line)
248    else:
249        toc_markdown = "\n".join(toc_headlines)
250
251    if placeholder:
252        body_markdown = "\n".join(body)
253        markdown = body_markdown.replace(placeholder, toc_markdown)
254    else:
255        body_markdown_p1 = "\n".join(body[:contentLineNdx ]) + '\n'
256        body_markdown_p2 = "\n".join(body[ contentLineNdx:])
257        markdown = body_markdown_p1 + toc_markdown + body_markdown_p2
258
259    return markdown
260
261def outputMarkdown(markdown_cont, output_file):
262    """
263    Writes to an output file if `outfile` is a valid path.
264
265    """
266    if output_file:
267        with open(output_file, 'w') as out:
268            out.write(markdown_cont)
269
270def markdownToclify(
271    input_file,
272    output_file=None,
273    min_toc_len=2,
274    github=False,
275    back_to_top=False,
276    nolink=False,
277    no_toc_header=False,
278    spacer=0,
279    placeholder=None,
280    exclude_h=None):
281    """ Function to add table of contents to markdown files.
282
283    Parameters
284    -----------
285      input_file: str
286        Path to the markdown input file.
287
288      output_file: str (default: None)
289        Path to the markdown output file.
290
291      min_toc_len: int (default: 2)
292        Miniumum number of entries to create a table of contents for.
293
294      github: bool (default: False)
295        Uses GitHub TOC syntax if True.
296
297      back_to_top: bool (default: False)
298        Inserts back-to-top links below headings if True.
299
300      nolink: bool (default: False)
301        Creates the table of contents without internal links if True.
302
303      no_toc_header: bool (default: False)
304        Suppresses the Table of Contents header if True
305
306      spacer: int (default: 0)
307        Inserts horizontal space (in pixels) after the table of contents.
308
309      placeholder: str (default: None)
310        Inserts the TOC at the placeholder string instead
311        of inserting the TOC at the top of the document.
312
313      exclude_h: list (default None)
314        Excludes header levels, e.g., if [2, 3], ignores header
315        levels 2 and 3 in the TOC.
316
317    Returns
318    -----------
319    changed: Boolean
320      True if the file has been updated, False otherwise.
321
322    """
323    cleaned_contents = removeLines(
324        removeToC(readLines(input_file)),
325        remove=('[[back to top]', '<a class="mk-toclify"'))
326
327    processed_contents, raw_headlines = tagAndCollect(
328        cleaned_contents,
329        id_tag=not github,
330        back_links=back_to_top,
331        exclude_h=exclude_h)
332
333    # add table of contents?
334    if len(raw_headlines) < min_toc_len:
335        processed_headlines = []
336    else:
337        leftjustified_headlines = positioningHeadlines(raw_headlines)
338
339        processed_headlines = createToc(
340            leftjustified_headlines,
341            hyperlink=not nolink,
342            top_link=not nolink and not github,
343            no_toc_header=no_toc_header)
344
345    if nolink:
346        processed_contents = cleaned_contents
347
348    cont = buildMarkdown(
349        toc_headlines=processed_headlines,
350        body=processed_contents,
351        spacer=spacer,
352        placeholder=placeholder)
353
354    if output_file:
355        outputMarkdown(cont, output_file)
356
357def isReleaseNotes(f):
358    return os.path.basename(f) == releaseNotesName
359
360def excludeHeadingsFor(f):
361    return headingExcludeRelease if isReleaseNotes(f) else headingExcludeDefault
362
363def updateSingleDocumentToC(input_file, min_toc_len, verbose=False):
364    """Add or update table of contents in specified file. Return 1 if file changed, 0 otherwise."""
365    if verbose :
366        print( 'file: {}'.format(input_file))
367
368    output_file = input_file + '.tmp'
369
370    markdownToclify(
371        input_file=input_file,
372        output_file=output_file,
373        min_toc_len=min_toc_len,
374        github=True,
375        back_to_top=False,
376        nolink=False,
377        no_toc_header=False,
378        spacer=False,
379        placeholder=False,
380        exclude_h=excludeHeadingsFor(input_file))
381
382    # prevent race-condition (Python 3.3):
383    if sys.version_info >= (3, 3):
384        os.replace(output_file, input_file)
385    else:
386        os.remove(input_file)
387        os.rename(output_file, input_file)
388
389    return 1
390
391def updateDocumentToC(paths, min_toc_len, verbose):
392    """Add or update table of contents to specified paths. Return number of changed files"""
393    n = 0
394    for g in paths:
395        for f in glob.glob(g):
396            if os.path.isfile(f):
397                n = n + updateSingleDocumentToC(input_file=f, min_toc_len=min_toc_len, verbose=verbose)
398    return n
399
400def updateDocumentToCMain():
401    """Add or update table of contents to specified paths."""
402
403    parser = argparse.ArgumentParser(
404        description='Add or update table of contents in markdown documents.',
405        epilog="""""",
406        formatter_class=argparse.RawTextHelpFormatter)
407
408    parser.add_argument(
409        'Input',
410        metavar='file',
411        type=str,
412        nargs=argparse.REMAINDER,
413        help='files to process, at default: docs/*.md')
414
415    parser.add_argument(
416        '-v', '--verbose',
417        action='store_true',
418        help='report the name of the file being processed')
419
420    parser.add_argument(
421        '--min-toc-entries',
422        dest='minTocEntries',
423        default=minTocEntries,
424        type=int,
425        metavar='N',
426        help='the minimum number of entries to create a table of contents for [{default}]'.format(default=minTocEntries))
427
428    parser.add_argument(
429        '--remove-toc',
430        action='store_const',
431        dest='minTocEntries',
432        const=99,
433        help='remove all tables of contents')
434
435    args = parser.parse_args()
436
437    paths = args.Input if args.Input else [documentsDefault]
438
439    changedFiles = updateDocumentToC(paths=paths, min_toc_len=args.minTocEntries, verbose=args.verbose)
440
441    if changedFiles > 0:
442        print( "Processed table of contents in " + str(changedFiles) + " file(s)" )
443    else:
444        print( "No table of contents added or updated" )
445
446if __name__ == '__main__':
447    updateDocumentToCMain()
448
449# end of file
450