• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1# Copyright 2023 The Pigweed Authors
2#
3# Licensed under the Apache License, Version 2.0 (the "License"); you may not
4# use this file except in compliance with the License. You may obtain a copy of
5# the License at
6#
7#     https://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
11# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
12# License for the specific language governing permissions and limitations under
13# the License.
14"""Generates content related to Pigweed module metadata on pigweed.dev.
15
16This file implements the following pigweed.dev features:
17
18* The `.. pigweed-module::` and `.. pigweed-module-subpage::` directives.
19* The auto-generated "Source code" and "Issues" URLs that appear in the site
20  nav for each module.
21
22Everything is implemented through the Sphinx Extension API.
23"""
24
25from dataclasses import dataclass
26import json
27import os
28import sys
29from typing import cast, TypeVar
30
31# We use BeautifulSoup for certain docs rendering features. It may not be
32# available in downstream projects. If so, no problem. We fall back to simpler
33# docs rendering.
34# pylint: disable=import-error
35try:
36    from bs4 import BeautifulSoup  # type: ignore
37    from bs4.element import Tag as HTMLTag  # type: ignore
38
39    bs_enabled = True
40except ModuleNotFoundError:
41    bs_enabled = False
42
43try:
44    import jsonschema  # type: ignore
45
46    jsonschema_enabled = True
47except ModuleNotFoundError:
48    jsonschema_enabled = False
49# pylint: enable=import-error
50
51import docutils
52from docutils import nodes
53from docutils.nodes import Element
54import docutils.statemachine
55
56# pylint: disable=consider-using-from-import
57import docutils.parsers.rst.directives as directives  # type: ignore
58
59# pylint: enable=consider-using-from-import
60from sphinx.addnodes import document as Document
61from sphinx.application import Sphinx
62from sphinx.environment import BuildEnvironment
63from sphinx.util.docutils import SphinxDirective
64
65from sphinx_design.cards import CardDirective
66
67EnvAttrT = TypeVar('EnvAttrT')
68
69
70# The module metadata is exposed as a global because it's used as read-only
71# data. Opening and reading the metadata file in one of the event handlers
72# would cause hundreds of filesystem reads on each build because those event
73# handlers fire once for each docs page.
74metadata_file = 'docs/module_metadata.json'
75schema_file = 'docs/module_metadata_schema.json'
76with open(f'{os.environ["PW_ROOT"]}/{schema_file}', 'r') as f:
77    schema = json.load(f)
78with open(f'{os.environ["PW_ROOT"]}/{metadata_file}', 'r') as f:
79    metadata = json.load(f)
80# Make sure the metadata matches its schema. Raise an uncaught exception
81# if not.
82if jsonschema_enabled:
83    jsonschema.validate(metadata, schema)
84
85
86@dataclass
87class ParsedBody:
88    topnav: str
89    body_without_topnav: str
90
91
92class EnvMetadata:
93    """Easier access to the Sphinx `env` for custom metadata.
94
95    You can store things in the Sphinx `env`, which is just a dict. But each
96    time you do, you have to handle the possibility that the key you want
97    hasn't been set yet, and set it to a default. The `env` is also untyped,
98    so you have to cast the value you get to whatever type you expect it to be.
99
100    Or you can use this class to define your metadata keys up front, and just
101    access them like: `value = EnvMetadata(env).my_value`
102
103    ... which will handle initializing the value if it hasn't been yet and
104    provide you a typed result.
105    """
106
107    def __init__(self, env: BuildEnvironment):
108        self._env = env
109
110    def _get_env_attr(self, attr: str, default: EnvAttrT) -> EnvAttrT:
111        if not hasattr(self._env, attr):
112            value: EnvAttrT = default
113            setattr(self._env, attr, value)
114        else:
115            value = getattr(self._env, attr)
116
117        return value
118
119    @property
120    def pw_parsed_bodies(self) -> dict[str, ParsedBody]:
121        default: dict[str, ParsedBody] = {}
122        return self._get_env_attr('pw_module_nav', default)
123
124
125def get_languages(module_name: str) -> list[str] | None:
126    """Returns the list of languages that a module supports.
127
128    Args:
129        module_name: The module to look up.
130
131    Returns:
132        A list of programming languages that the module supports, or ``None``
133        if this has not been defined in ``//docs/module_metadata.json``.
134    """
135    if module_name not in metadata:
136        return None
137    if 'languages' not in metadata[module_name]:
138        return None
139    return metadata[module_name]['languages']
140
141
142def get_status(module_name: str) -> str:
143    """Returns the status of a module.
144
145    Preconditions:
146        The status must be defined in ``//docs/module_metadata.json``.
147
148    Args:
149        module_name: The module to look up.
150
151    Returns:
152        The status of the module as a string.
153    """
154    if module_name not in metadata:
155        sys.exit(f'{module_name} not found in {metadata_file}')
156    if 'status' not in metadata[module_name]:
157        sys.exit(f'{module_name}.status not found in {metadata_file}')
158    return metadata[module_name]['status']
159
160
161def get_tagline(module_name: str) -> str | None:
162    """Returns the tagline for a module.
163
164    Args:
165        module_name: The module to look up.
166
167    Returns:
168        The module's tagline or ``None`` if no tagline has been defined
169        in ``//docs/module_metadata.json``.
170    """
171    if module_name not in metadata:
172        return None
173    if 'tagline' not in metadata[module_name]:
174        return None
175    return metadata[module_name]['tagline']
176
177
178def get_code_size(module_name: str) -> str | None:
179    """Returns the code size impact summary for a module.
180
181    Args:
182        module_name: The module to look up.
183
184    Returns:
185        The code size impact summary as a string or ``None`` if no summary
186        has been defined in ``//docs/module_metadata.json``.
187    """
188    if module_name not in metadata:
189        return None
190    if 'size' not in metadata[module_name]:
191        return None
192    return metadata[module_name]['size']
193
194
195def status_badge(module_status: str) -> str:
196    """Given a module status, return the status badge for rendering."""
197    role = ':bdg-primary:'
198    return role + f'`{module_status.title()}`'
199
200
201def cs_url(module_name: str) -> str:
202    """Return the codesearch URL for the given module."""
203    return f'https://cs.opensource.google/pigweed/pigweed/+/main:{module_name}/'
204
205
206def issues_url(module_name: str) -> str:
207    """Returns open issues that mention the given module name."""
208    return f'https://issues.pigweed.dev/issues?q={module_name}%20status:open'
209
210
211def concat_tags(*tag_lists: list[str]) -> list[str]:
212    """Given a list of tag lists, return them concat'ed and ready for render."""
213
214    all_tags = tag_lists[0]
215
216    for tag_list in tag_lists[1:]:
217        if len(tag_list) > 0:
218            all_tags.append(':octicon:`dot-fill`')
219            all_tags.extend(tag_list)
220
221    return all_tags
222
223
224def create_topnav(
225    subtitle: str | None,
226    extra_classes: list[str] | None = None,
227) -> nodes.Node:
228    """Create the nodes for the top title and navigation bar."""
229
230    topnav_classes = (
231        ['pw-topnav'] + extra_classes if extra_classes is not None else []
232    )
233
234    topnav_container = nodes.container(classes=topnav_classes)
235
236    if subtitle:
237        subtitle_node = nodes.paragraph(
238            classes=['pw-topnav-subtitle'],
239            text=subtitle,
240        )
241        topnav_container += subtitle_node
242
243    return topnav_container
244
245
246class PigweedModuleDirective(SphinxDirective):
247    """Directive registering module metadata, rendering title & info card."""
248
249    required_arguments = 0
250    final_argument_whitespace = True
251    has_content = True
252    option_spec = {'name': directives.unchanged_required}
253
254    def _try_get_option(self, option: str):
255        """Try to get an option by name and raise on failure."""
256
257        try:
258            return self.options[option]
259        except KeyError:
260            raise self.error(f' :{option}: option is required')
261
262    def _maybe_get_option(self, option: str):
263        """Try to get an option by name and return None on failure."""
264        return self.options.get(option, None)
265
266    def run(self) -> list[nodes.Node]:
267        module_name = self._try_get_option('name')
268        tagline = get_tagline(module_name)
269        status = get_status(module_name)
270
271        status_tags: list[str] = [
272            status_badge(status),
273        ]
274
275        languages = get_languages(module_name)
276        language_tags = []
277        if languages:
278            for language in languages:
279                language_tags.append(f':bdg-info:`{language}`')
280
281        code_size_impact = []
282
283        code_size_text = get_code_size(module_name)
284        if code_size_text:
285            code_size_impact.append(f'**Code Size Impact:** {code_size_text}')
286
287        # Move the directive content into a section that we can render wherever
288        # we want.
289        raw_content = cast(list[str], self.content)  # type: ignore
290        content = nodes.paragraph()
291        self.state.nested_parse(raw_content, 0, content)
292
293        # The card inherits its content from this node's content, which we've
294        # already pulled out. So we can replace this node's content with the
295        # content we need in the card.
296        self.content = docutils.statemachine.StringList(
297            concat_tags(status_tags, language_tags, code_size_impact)
298        )
299
300        card = CardDirective.create_card(
301            inst=self,
302            arguments=[],
303            options={},
304        )
305
306        topbar = create_topnav(
307            tagline,
308            ['pw-module-index'],
309        )
310
311        return [topbar, card, content]
312
313
314class PigweedModuleSubpageDirective(PigweedModuleDirective):
315    """Directive registering module metadata, rendering title & info card."""
316
317    required_arguments = 0
318    final_argument_whitespace = True
319    has_content = True
320    option_spec = {
321        'name': directives.unchanged_required,
322        'nav': directives.unchanged_required,
323    }
324
325    def run(self) -> list[nodes.Node]:
326        module_name = self._try_get_option('name')
327        tagline = get_tagline(module_name)
328        # Prepend the module name on sub-pages so that it's very clear what
329        # the tagline is referring to.
330        tagline = f'{module_name}: {tagline}'
331
332        topbar = create_topnav(
333            tagline,
334            ['pw-module-subpage'],
335        )
336
337        return [topbar]
338
339
340def _parse_body(body: str) -> ParsedBody:
341    """From the `body` HTML, return the topnav and the body without topnav.
342
343    The fundamental idea is this: Our Sphinx directives can only render nodes
344    *within* the docutils doc, but we want to elevate the top navbar *outside*
345    of that doc into the web theme. Sphinx by itself provides no mechanism for
346    this, since it's model looks something like this:
347
348      ┌──────────────────┐
349      │ Theme            │
350      │  ┌──────────────┐│    When Sphinx builds HTML, the output is plain HTML
351      │  │ Sphinx HTML  ││    with a structure defined by docutils. Themes can
352      │  │              ││    build *around* that and cascade styles down *into*
353      │  │              ││    that HTML, but there's no mechanism in the Sphinx
354      │  └──────────────┘│    build to render docutils nodes in the theme.
355      └──────────────────┘
356
357    The escape hatch is this:
358    - Render things within the Sphinx HTML output (`body`)
359    - Use Sphinx theme templates to run code during the final render phase
360    - Extract the HTML from the `body` and insert it in the theme via templates
361
362    So this function extracts the things that we rendered in the `body` but
363    actually want in the theme (the top navbar), returns them for rendering in
364    the template, and returns the `body` with those things removed.
365    """
366    if not bs_enabled:
367        return ParsedBody('', body)
368
369    def _add_class_to_tag(tag: HTMLTag, classname: str) -> None:
370        tag['class'] = tag.get('class', []) + [classname]  # type: ignore
371
372    def _add_classes_to_tag(
373        tag: HTMLTag, classnames: str | list[str] | None
374    ) -> None:
375        tag['class'] = tag.get('class', []) + classnames  # type: ignore
376
377    html = BeautifulSoup(body, features='html.parser')
378
379    # Render the doc unchanged, unless it has the module doc topnav
380    if (topnav := html.find('div', attrs={'class': 'pw-topnav'})) is None:
381        return ParsedBody('', body)
382
383    assert isinstance(topnav, HTMLTag)
384
385    # Find the topnav title and subtitle
386    topnav_title = topnav.find('p', attrs={'class': 'pw-topnav-title'})
387    topnav_subtitle = topnav.find('p', attrs={'class': 'pw-topnav-subtitle'})
388    assert isinstance(topnav_title, HTMLTag)
389    assert isinstance(topnav_subtitle, HTMLTag)
390
391    # Find the single `h1` element, the doc's canonical title
392    doc_title = html.find('h1')
393    assert isinstance(doc_title, HTMLTag)
394
395    topnav_str = ''
396
397    if 'pw-module-index' in topnav['class']:
398        # Take the standard Sphinx/docutils title and add topnav styling
399        _add_class_to_tag(doc_title, 'pw-topnav-title')
400        # Replace the placeholder title in the topnav with the "official" `h1`
401        topnav_title.replace_with(doc_title)
402        # Promote the subtitle to `h2`
403        topnav_subtitle.name = 'h2'
404        # We're done mutating topnav; write it to string for rendering elsewhere
405        topnav_str = str(topnav)
406        # Destroy the instance that was rendered in the document
407        topnav.decompose()
408
409    elif 'pw-module-subpage' in topnav['class']:
410        # Take the title from the topnav (the module name), promote it to `h1`
411        topnav_title.name = 'h1'
412        # Add the heading link, but pointed to the module index page
413        heading_link = html.new_tag(
414            'a',
415            attrs={
416                'class': ['headerlink'],
417                'href': 'docs.html',
418                'title': 'Permalink to module index',
419            },
420        )
421        heading_link.string = '#'
422        topnav_title.append(heading_link)
423        # Promote the subtitle to `h2`
424        topnav_subtitle.name = 'h2'
425        # We're done mutating topnav; write it to string for rendering elsewhere
426        topnav_str = str(topnav)
427        # Destroy the instance that was rendered in the document
428        topnav.decompose()
429
430    return ParsedBody(topnav_str, str(html))
431
432
433def setup_parse_body(_app, _pagename, _templatename, context, _doctree):
434    def parse_body(body: str) -> ParsedBody:
435        return _parse_body(body)
436
437    context['parse_body'] = parse_body
438
439
440def fix_canonical_url(canonical_url: str | None) -> str | None:
441    """Rewrites the canonical URL for `pigweed.dev/*/docs.html` pages.
442
443    Our server is configured to remove `docs.html` from URLs. E.g.
444    pigweed.dev/pw_string/docs.html` redirects to `pigweed.dev/pw_string`.
445    To improve our SEO, the `<link rel="canonical" href="..."/>` tag in our
446    HTML should match the URL that the server provides.
447
448    Args:
449        docname:
450            Basically the relative path to the doc, except `.rst` is omitted
451            from the filename. E.g. `pw_string/docs`.
452        canonical_url:
453            The default canonical URL that Sphinx has generated for the doc.
454
455    Returns:
456        The corrected canonical URL if the page would normally end with
457        `docs.html`, otherwise the original canonical URL value unmodified.
458    """
459    if canonical_url is None or not canonical_url.endswith('/docs.html'):
460        return canonical_url
461    canonical_url = canonical_url.replace('/docs.html', '/')
462    return canonical_url
463
464
465def on_html_page_context(
466    app: Sphinx,  # pylint: disable=unused-argument
467    docname: str,  # pylint: disable=unused-argument
468    templatename: str,  # pylint: disable=unused-argument
469    context: dict[str, str | None] | None,
470    doctree: Document,  # pylint: disable=unused-argument
471) -> None:
472    """Handles modifications to HTML page metadata, e.g. canonical URLs.
473
474    Args:
475        docname:
476            Basically the relative path to the doc, except `.rst` is omitted
477            from the filename. E.g. `pw_string/docs`.
478        context:
479            A dict containing the HTML page's metadata.
480
481    Returns:
482        None. Modifications happen to the HTML metadata in-place.
483    """
484    canonical_url_key = 'pageurl'
485    if context is None or canonical_url_key not in context:
486        return
487    canonical_url = context[canonical_url_key]
488    context[canonical_url_key] = fix_canonical_url(canonical_url)
489
490
491def add_links(module_name: str, toctree: Element) -> None:
492    """Adds source code and issues URLs to a module's table of contents tree.
493
494    This function is how we auto-generate the source code and issues URLs
495    that appear for each module in the pigweed.dev site nav.
496
497    Args:
498        module_name:
499            The Pigweed module that we're creating links for.
500        toctree:
501            The table of contents tree from that module's homepage.
502
503    Returns:
504        `None`. `toctree` is modified in-place.
505    """
506    src = ('Source code', cs_url(module_name))
507    issues = ('Issues', issues_url(module_name))
508    # Maintenance tip: the trick here is to create the `toctree` the same way
509    # that Sphinx generates it. When in doubt, enable logging in this file,
510    # manually modify the `.. toctree::` directive on a module's homepage, log
511    # out `toctree` from somewhere in this script (you should see an XML-style
512    # node), and then just make sure your code modifies the `toctree` the same
513    # way that Sphinx generates it.
514    toctree['entries'] += [src, issues]
515    toctree['rawentries'] += [src[0], issues[0]]
516
517
518def find_first_toctree(doctree: Document) -> Element | None:
519    """Finds the first `toctree` (table of contents tree) node in a `Document`.
520
521    Args:
522        doctree:
523            The content of a doc, represented as a tree of Docutils nodes.
524
525    Returns:
526        The first `toctree` node found in `doctree` or `None` if none was
527        found.
528    """
529    for node in doctree.traverse(nodes.Element):
530        if node.tagname == 'toctree':
531            return node
532    return None
533
534
535def parse_module_name(docname: str) -> str:
536    """Extracts a Pigweed module name from a Sphinx docname.
537
538    Preconditions:
539        `docname` is assumed to start with `pw_`. I.e. the docs are assumed to
540        have a flat directory structure, where the first directory is the name
541        of a Pigweed module.
542
543    Args:
544        docname:
545            Basically the relative path to the doc, except `.rst` is omitted
546            from the filename. E.g. `pw_string/docs`.
547
548    Returns:
549        Just the Pigweed module name, e.g. `pw_string`.
550    """
551    tokens = docname.split('/')
552    return tokens[0]
553
554
555def on_doctree_read(app: Sphinx, doctree: Document) -> None:
556    """Event handler that enables manipulating a doc's Docutils tree.
557
558    Sphinx fires this listener after it has parsed a doc's reStructuredText
559    into a tree of Docutils nodes. The listener fires once for each doc that's
560    processed.
561
562    In general, this stage of the Sphinx event lifecycle can only be used for
563    content changes that do not affect the Sphinx build environment [1]. For
564    example, creating a `toctree` node at this stage does not work, but
565    inserting links into a pre-existing `toctree` node is OK.
566
567    Args:
568        app:
569            Our Sphinx docs build system.
570        doctree:
571            The doc content, structured as a tree.
572
573    Returns:
574        `None`. The main modifications happen in-place in `doctree`.
575
576    [1] See link in `on_source_read()`
577    """
578    docname = app.env.docname
579    if not is_module_homepage(docname):
580        return
581    toctree = find_first_toctree(doctree)
582    if toctree is None:
583        # `add_toctree_to_module_homepage()` should ensure that every
584        # `pw_*/docs.rst` file has a `toctree` node but if something went wrong
585        # then we should bail.
586        sys.exit(f'[module_metadata.py] error: toctree missing in {docname}')
587    module_name = parse_module_name(docname)
588    add_links(module_name, toctree)
589
590
591def is_module_homepage(docname: str) -> bool:
592    """Determines if a doc is a module homepage.
593
594    Any doc that matches the pattern `pw_*/docs.rst` is considered a module
595    homepage. Watch out for the false positive of `pw_*/*/docs.rst`.
596
597    Preconditions:
598        `docname` is assumed to start with `pw_`. I.e. the docs are assumed to
599        have a flat directory structure, where the first directory is the name
600        of a Pigweed module.
601
602    Args:
603        docname:
604            Basically the relative path to the doc, except `.rst` is omitted
605            from the filename.
606
607    Returns:
608        `True` if the doc is a module homepage, else `False`.
609    """
610    tokens = docname.split('/')
611    if len(tokens) != 2:
612        return False
613    if not tokens[0].startswith('pw_'):
614        return False
615    if tokens[1] != 'docs':
616        return False
617    return True
618
619
620def add_toctree_to_module_homepage(docname: str, source: str) -> str:
621    """Appends an empty `toctree` to a module homepage.
622
623    Note that this function only needs to create the `toctree` node; it doesn't
624    need to fully populate the `toctree`. Inserting links later via the more
625    ergonomic Docutils API works fine.
626
627    Args:
628        docname:
629            Basically the relative path to `source`, except `.rst` is omitted
630            from the filename.
631        source:
632            The reStructuredText source code of `docname`.
633
634    Returns:
635        For module homepages that did not already have a `toctree`, the
636        original contents of `source` plus an empty `toctree` is returned.
637        For all other cases, the original contents of `source` are returned
638        with no modification.
639    """
640    # Don't do anything if the page is not a module homepage, i.e. its
641    # `docname` doesn't match the pattern `pw_*`/docs`.
642    if not is_module_homepage(docname):
643        return source
644    # Don't do anything if the module homepage already has a `toctree`.
645    if '.. toctree::' in source:
646        return source
647    # Append an empty `toctree` to the content.
648    # yapf: disable
649    return (
650        f'{source}\n\n'
651        '.. toctree::\n'
652        '   :hidden:\n'
653        '   :maxdepth: 1\n'
654    )
655    # yapf: enable
656    # Python formatting (yapf) is disabled in the return statement because the
657    # formatter tries to change it to a less-readable single line string.
658
659
660# inclusive-language: disable
661def on_source_read(
662    app: Sphinx,  # pylint: disable=unused-argument
663    docname: str,
664    source: list[str],
665) -> None:
666    """Event handler that enables manipulating a doc's reStructuredText.
667
668    Sphinx fires this event early in its event lifecycle [1], before it has
669    converted a doc's reStructuredText (reST) into a tree of Docutils nodes.
670    The listener fires once for each doc that's processed.
671
672    This is the place to make docs changes that have to propagate across the
673    site. Take our use case of adding a link in the site nav to each module's
674    source code. To do this we need a `toctree` (table of contents tree) node
675    on each module's homepage; the `toctree` is where we insert the source code
676    link. If we try to dynamically insert the `toctree` node via the Docutils
677    API later in the event lifecycle, e.g. during the `doctree-read` event, we
678    have to do a bunch of complex and fragile logic to make the Sphinx build
679    environment [2] aware of the new node. It's simpler and more reliable to
680    just insert a `.. toctree::` directive into the doc source before Sphinx
681    has processed the doc and then let Sphinx create its build environment as
682    it normally does. We just have to make sure the reStructuredText we're
683    injecting into the content is syntactically correct.
684
685    Args:
686        app:
687            Our Sphinx docs build system.
688        docname:
689            Basically the relative path to `source`, except `.rst` is omitted
690            from the filename.
691        source:
692            The reStructuredText source code of `docname`.
693
694    Returns:
695        None. `source` is modified in-place.
696
697    [1] www.sphinx-doc.org/en/master/extdev/appapi.html#sphinx-core-events
698    [2] www.sphinx-doc.org/en/master/extdev/envapi.html
699    """
700    # inclusive-language: enable
701    # If a module homepage doesn't have a `toctree`, add one.
702    source[0] = add_toctree_to_module_homepage(docname, source[0])
703
704
705def setup(app: Sphinx) -> dict[str, bool]:
706    """Hooks the extension into our Sphinx docs build system.
707
708    This runs only once per docs build.
709
710    Args:
711        app:
712            Our Sphinx docs build system.
713
714    Returns:
715        A dict that provides Sphinx info about our extension.
716    """
717    # Register the `.. pigweed-module::` and `.. pigweed-module-subpage::`
718    # directives that are used on `pw_*/*.rst` pages.
719    app.add_directive('pigweed-module', PigweedModuleDirective)
720    app.add_directive('pigweed-module-subpage', PigweedModuleSubpageDirective)
721    # inclusive-language: disable
722    # Register the Sphinx event listeners that automatically generate content
723    # for `pw_*/*.rst` pages:
724    # www.sphinx-doc.org/en/master/extdev/appapi.html#sphinx-core-events
725    # inclusive-language: enable
726    app.connect('source-read', on_source_read)
727    app.connect('doctree-read', on_doctree_read)
728    app.connect('html-page-context', on_html_page_context)
729    return {
730        'parallel_read_safe': True,
731        'parallel_write_safe': True,
732    }
733