1# Copyright 2023 The Pigweed Authors 2# 3# Licensed under the Apache License, Version 2.0 (the "License"); you may not 4# use this file except in compliance with the License. You may obtain a copy of 5# the License at 6# 7# https://www.apache.org/licenses/LICENSE-2.0 8# 9# Unless required by applicable law or agreed to in writing, software 10# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 11# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 12# License for the specific language governing permissions and limitations under 13# the License. 14"""Generates content related to Pigweed module metadata on pigweed.dev. 15 16This file implements the following pigweed.dev features: 17 18* The `.. pigweed-module::` and `.. pigweed-module-subpage::` directives. 19* The auto-generated "Source code" and "Issues" URLs that appear in the site 20 nav for each module. 21 22Everything is implemented through the Sphinx Extension API. 23""" 24 25from dataclasses import dataclass 26import json 27import os 28import sys 29from typing import cast, TypeVar 30 31# We use BeautifulSoup for certain docs rendering features. It may not be 32# available in downstream projects. If so, no problem. We fall back to simpler 33# docs rendering. 34# pylint: disable=import-error 35try: 36 from bs4 import BeautifulSoup # type: ignore 37 from bs4.element import Tag as HTMLTag # type: ignore 38 39 bs_enabled = True 40except ModuleNotFoundError: 41 bs_enabled = False 42 43try: 44 import jsonschema # type: ignore 45 46 jsonschema_enabled = True 47except ModuleNotFoundError: 48 jsonschema_enabled = False 49# pylint: enable=import-error 50 51import docutils 52from docutils import nodes 53from docutils.nodes import Element 54import docutils.statemachine 55 56# pylint: disable=consider-using-from-import 57import docutils.parsers.rst.directives as directives # type: ignore 58 59# pylint: enable=consider-using-from-import 60from sphinx.addnodes import document as Document 61from sphinx.application import Sphinx 62from sphinx.environment import BuildEnvironment 63from sphinx.util.docutils import SphinxDirective 64 65from sphinx_design.cards import CardDirective 66 67EnvAttrT = TypeVar('EnvAttrT') 68 69 70# The module metadata is exposed as a global because it's used as read-only 71# data. Opening and reading the metadata file in one of the event handlers 72# would cause hundreds of filesystem reads on each build because those event 73# handlers fire once for each docs page. 74metadata_file = 'docs/module_metadata.json' 75schema_file = 'docs/module_metadata_schema.json' 76with open(f'{os.environ["PW_ROOT"]}/{schema_file}', 'r') as f: 77 schema = json.load(f) 78with open(f'{os.environ["PW_ROOT"]}/{metadata_file}', 'r') as f: 79 metadata = json.load(f) 80# Make sure the metadata matches its schema. Raise an uncaught exception 81# if not. 82if jsonschema_enabled: 83 jsonschema.validate(metadata, schema) 84 85 86@dataclass 87class ParsedBody: 88 topnav: str 89 body_without_topnav: str 90 91 92class EnvMetadata: 93 """Easier access to the Sphinx `env` for custom metadata. 94 95 You can store things in the Sphinx `env`, which is just a dict. But each 96 time you do, you have to handle the possibility that the key you want 97 hasn't been set yet, and set it to a default. The `env` is also untyped, 98 so you have to cast the value you get to whatever type you expect it to be. 99 100 Or you can use this class to define your metadata keys up front, and just 101 access them like: `value = EnvMetadata(env).my_value` 102 103 ... which will handle initializing the value if it hasn't been yet and 104 provide you a typed result. 105 """ 106 107 def __init__(self, env: BuildEnvironment): 108 self._env = env 109 110 def _get_env_attr(self, attr: str, default: EnvAttrT) -> EnvAttrT: 111 if not hasattr(self._env, attr): 112 value: EnvAttrT = default 113 setattr(self._env, attr, value) 114 else: 115 value = getattr(self._env, attr) 116 117 return value 118 119 @property 120 def pw_parsed_bodies(self) -> dict[str, ParsedBody]: 121 default: dict[str, ParsedBody] = {} 122 return self._get_env_attr('pw_module_nav', default) 123 124 125def get_languages(module_name: str) -> list[str] | None: 126 """Returns the list of languages that a module supports. 127 128 Args: 129 module_name: The module to look up. 130 131 Returns: 132 A list of programming languages that the module supports, or ``None`` 133 if this has not been defined in ``//docs/module_metadata.json``. 134 """ 135 if module_name not in metadata: 136 return None 137 if 'languages' not in metadata[module_name]: 138 return None 139 return metadata[module_name]['languages'] 140 141 142def get_status(module_name: str) -> str: 143 """Returns the status of a module. 144 145 Preconditions: 146 The status must be defined in ``//docs/module_metadata.json``. 147 148 Args: 149 module_name: The module to look up. 150 151 Returns: 152 The status of the module as a string. 153 """ 154 if module_name not in metadata: 155 sys.exit(f'{module_name} not found in {metadata_file}') 156 if 'status' not in metadata[module_name]: 157 sys.exit(f'{module_name}.status not found in {metadata_file}') 158 return metadata[module_name]['status'] 159 160 161def get_tagline(module_name: str) -> str | None: 162 """Returns the tagline for a module. 163 164 Args: 165 module_name: The module to look up. 166 167 Returns: 168 The module's tagline or ``None`` if no tagline has been defined 169 in ``//docs/module_metadata.json``. 170 """ 171 if module_name not in metadata: 172 return None 173 if 'tagline' not in metadata[module_name]: 174 return None 175 return metadata[module_name]['tagline'] 176 177 178def get_code_size(module_name: str) -> str | None: 179 """Returns the code size impact summary for a module. 180 181 Args: 182 module_name: The module to look up. 183 184 Returns: 185 The code size impact summary as a string or ``None`` if no summary 186 has been defined in ``//docs/module_metadata.json``. 187 """ 188 if module_name not in metadata: 189 return None 190 if 'size' not in metadata[module_name]: 191 return None 192 return metadata[module_name]['size'] 193 194 195def status_badge(module_status: str) -> str: 196 """Given a module status, return the status badge for rendering.""" 197 role = ':bdg-primary:' 198 return role + f'`{module_status.title()}`' 199 200 201def cs_url(module_name: str) -> str: 202 """Return the codesearch URL for the given module.""" 203 return f'https://cs.opensource.google/pigweed/pigweed/+/main:{module_name}/' 204 205 206def issues_url(module_name: str) -> str: 207 """Returns open issues that mention the given module name.""" 208 return f'https://issues.pigweed.dev/issues?q={module_name}%20status:open' 209 210 211def concat_tags(*tag_lists: list[str]) -> list[str]: 212 """Given a list of tag lists, return them concat'ed and ready for render.""" 213 214 all_tags = tag_lists[0] 215 216 for tag_list in tag_lists[1:]: 217 if len(tag_list) > 0: 218 all_tags.append(':octicon:`dot-fill`') 219 all_tags.extend(tag_list) 220 221 return all_tags 222 223 224def create_topnav( 225 subtitle: str | None, 226 extra_classes: list[str] | None = None, 227) -> nodes.Node: 228 """Create the nodes for the top title and navigation bar.""" 229 230 topnav_classes = ( 231 ['pw-topnav'] + extra_classes if extra_classes is not None else [] 232 ) 233 234 topnav_container = nodes.container(classes=topnav_classes) 235 236 if subtitle: 237 subtitle_node = nodes.paragraph( 238 classes=['pw-topnav-subtitle'], 239 text=subtitle, 240 ) 241 topnav_container += subtitle_node 242 243 return topnav_container 244 245 246class PigweedModuleDirective(SphinxDirective): 247 """Directive registering module metadata, rendering title & info card.""" 248 249 required_arguments = 0 250 final_argument_whitespace = True 251 has_content = True 252 option_spec = {'name': directives.unchanged_required} 253 254 def _try_get_option(self, option: str): 255 """Try to get an option by name and raise on failure.""" 256 257 try: 258 return self.options[option] 259 except KeyError: 260 raise self.error(f' :{option}: option is required') 261 262 def _maybe_get_option(self, option: str): 263 """Try to get an option by name and return None on failure.""" 264 return self.options.get(option, None) 265 266 def run(self) -> list[nodes.Node]: 267 module_name = self._try_get_option('name') 268 tagline = get_tagline(module_name) 269 status = get_status(module_name) 270 271 status_tags: list[str] = [ 272 status_badge(status), 273 ] 274 275 languages = get_languages(module_name) 276 language_tags = [] 277 if languages: 278 for language in languages: 279 language_tags.append(f':bdg-info:`{language}`') 280 281 code_size_impact = [] 282 283 code_size_text = get_code_size(module_name) 284 if code_size_text: 285 code_size_impact.append(f'**Code Size Impact:** {code_size_text}') 286 287 # Move the directive content into a section that we can render wherever 288 # we want. 289 raw_content = cast(list[str], self.content) # type: ignore 290 content = nodes.paragraph() 291 self.state.nested_parse(raw_content, 0, content) 292 293 # The card inherits its content from this node's content, which we've 294 # already pulled out. So we can replace this node's content with the 295 # content we need in the card. 296 self.content = docutils.statemachine.StringList( 297 concat_tags(status_tags, language_tags, code_size_impact) 298 ) 299 300 card = CardDirective.create_card( 301 inst=self, 302 arguments=[], 303 options={}, 304 ) 305 306 topbar = create_topnav( 307 tagline, 308 ['pw-module-index'], 309 ) 310 311 return [topbar, card, content] 312 313 314class PigweedModuleSubpageDirective(PigweedModuleDirective): 315 """Directive registering module metadata, rendering title & info card.""" 316 317 required_arguments = 0 318 final_argument_whitespace = True 319 has_content = True 320 option_spec = { 321 'name': directives.unchanged_required, 322 'nav': directives.unchanged_required, 323 } 324 325 def run(self) -> list[nodes.Node]: 326 module_name = self._try_get_option('name') 327 tagline = get_tagline(module_name) 328 # Prepend the module name on sub-pages so that it's very clear what 329 # the tagline is referring to. 330 tagline = f'{module_name}: {tagline}' 331 332 topbar = create_topnav( 333 tagline, 334 ['pw-module-subpage'], 335 ) 336 337 return [topbar] 338 339 340def _parse_body(body: str) -> ParsedBody: 341 """From the `body` HTML, return the topnav and the body without topnav. 342 343 The fundamental idea is this: Our Sphinx directives can only render nodes 344 *within* the docutils doc, but we want to elevate the top navbar *outside* 345 of that doc into the web theme. Sphinx by itself provides no mechanism for 346 this, since it's model looks something like this: 347 348 ┌──────────────────┐ 349 │ Theme │ 350 │ ┌──────────────┐│ When Sphinx builds HTML, the output is plain HTML 351 │ │ Sphinx HTML ││ with a structure defined by docutils. Themes can 352 │ │ ││ build *around* that and cascade styles down *into* 353 │ │ ││ that HTML, but there's no mechanism in the Sphinx 354 │ └──────────────┘│ build to render docutils nodes in the theme. 355 └──────────────────┘ 356 357 The escape hatch is this: 358 - Render things within the Sphinx HTML output (`body`) 359 - Use Sphinx theme templates to run code during the final render phase 360 - Extract the HTML from the `body` and insert it in the theme via templates 361 362 So this function extracts the things that we rendered in the `body` but 363 actually want in the theme (the top navbar), returns them for rendering in 364 the template, and returns the `body` with those things removed. 365 """ 366 if not bs_enabled: 367 return ParsedBody('', body) 368 369 def _add_class_to_tag(tag: HTMLTag, classname: str) -> None: 370 tag['class'] = tag.get('class', []) + [classname] # type: ignore 371 372 def _add_classes_to_tag( 373 tag: HTMLTag, classnames: str | list[str] | None 374 ) -> None: 375 tag['class'] = tag.get('class', []) + classnames # type: ignore 376 377 html = BeautifulSoup(body, features='html.parser') 378 379 # Render the doc unchanged, unless it has the module doc topnav 380 if (topnav := html.find('div', attrs={'class': 'pw-topnav'})) is None: 381 return ParsedBody('', body) 382 383 assert isinstance(topnav, HTMLTag) 384 385 # Find the topnav title and subtitle 386 topnav_title = topnav.find('p', attrs={'class': 'pw-topnav-title'}) 387 topnav_subtitle = topnav.find('p', attrs={'class': 'pw-topnav-subtitle'}) 388 assert isinstance(topnav_title, HTMLTag) 389 assert isinstance(topnav_subtitle, HTMLTag) 390 391 # Find the single `h1` element, the doc's canonical title 392 doc_title = html.find('h1') 393 assert isinstance(doc_title, HTMLTag) 394 395 topnav_str = '' 396 397 if 'pw-module-index' in topnav['class']: 398 # Take the standard Sphinx/docutils title and add topnav styling 399 _add_class_to_tag(doc_title, 'pw-topnav-title') 400 # Replace the placeholder title in the topnav with the "official" `h1` 401 topnav_title.replace_with(doc_title) 402 # Promote the subtitle to `h2` 403 topnav_subtitle.name = 'h2' 404 # We're done mutating topnav; write it to string for rendering elsewhere 405 topnav_str = str(topnav) 406 # Destroy the instance that was rendered in the document 407 topnav.decompose() 408 409 elif 'pw-module-subpage' in topnav['class']: 410 # Take the title from the topnav (the module name), promote it to `h1` 411 topnav_title.name = 'h1' 412 # Add the heading link, but pointed to the module index page 413 heading_link = html.new_tag( 414 'a', 415 attrs={ 416 'class': ['headerlink'], 417 'href': 'docs.html', 418 'title': 'Permalink to module index', 419 }, 420 ) 421 heading_link.string = '#' 422 topnav_title.append(heading_link) 423 # Promote the subtitle to `h2` 424 topnav_subtitle.name = 'h2' 425 # We're done mutating topnav; write it to string for rendering elsewhere 426 topnav_str = str(topnav) 427 # Destroy the instance that was rendered in the document 428 topnav.decompose() 429 430 return ParsedBody(topnav_str, str(html)) 431 432 433def setup_parse_body(_app, _pagename, _templatename, context, _doctree): 434 def parse_body(body: str) -> ParsedBody: 435 return _parse_body(body) 436 437 context['parse_body'] = parse_body 438 439 440def fix_canonical_url(canonical_url: str | None) -> str | None: 441 """Rewrites the canonical URL for `pigweed.dev/*/docs.html` pages. 442 443 Our server is configured to remove `docs.html` from URLs. E.g. 444 pigweed.dev/pw_string/docs.html` redirects to `pigweed.dev/pw_string`. 445 To improve our SEO, the `<link rel="canonical" href="..."/>` tag in our 446 HTML should match the URL that the server provides. 447 448 Args: 449 docname: 450 Basically the relative path to the doc, except `.rst` is omitted 451 from the filename. E.g. `pw_string/docs`. 452 canonical_url: 453 The default canonical URL that Sphinx has generated for the doc. 454 455 Returns: 456 The corrected canonical URL if the page would normally end with 457 `docs.html`, otherwise the original canonical URL value unmodified. 458 """ 459 if canonical_url is None or not canonical_url.endswith('/docs.html'): 460 return canonical_url 461 canonical_url = canonical_url.replace('/docs.html', '/') 462 return canonical_url 463 464 465def on_html_page_context( 466 app: Sphinx, # pylint: disable=unused-argument 467 docname: str, # pylint: disable=unused-argument 468 templatename: str, # pylint: disable=unused-argument 469 context: dict[str, str | None] | None, 470 doctree: Document, # pylint: disable=unused-argument 471) -> None: 472 """Handles modifications to HTML page metadata, e.g. canonical URLs. 473 474 Args: 475 docname: 476 Basically the relative path to the doc, except `.rst` is omitted 477 from the filename. E.g. `pw_string/docs`. 478 context: 479 A dict containing the HTML page's metadata. 480 481 Returns: 482 None. Modifications happen to the HTML metadata in-place. 483 """ 484 canonical_url_key = 'pageurl' 485 if context is None or canonical_url_key not in context: 486 return 487 canonical_url = context[canonical_url_key] 488 context[canonical_url_key] = fix_canonical_url(canonical_url) 489 490 491def add_links(module_name: str, toctree: Element) -> None: 492 """Adds source code and issues URLs to a module's table of contents tree. 493 494 This function is how we auto-generate the source code and issues URLs 495 that appear for each module in the pigweed.dev site nav. 496 497 Args: 498 module_name: 499 The Pigweed module that we're creating links for. 500 toctree: 501 The table of contents tree from that module's homepage. 502 503 Returns: 504 `None`. `toctree` is modified in-place. 505 """ 506 src = ('Source code', cs_url(module_name)) 507 issues = ('Issues', issues_url(module_name)) 508 # Maintenance tip: the trick here is to create the `toctree` the same way 509 # that Sphinx generates it. When in doubt, enable logging in this file, 510 # manually modify the `.. toctree::` directive on a module's homepage, log 511 # out `toctree` from somewhere in this script (you should see an XML-style 512 # node), and then just make sure your code modifies the `toctree` the same 513 # way that Sphinx generates it. 514 toctree['entries'] += [src, issues] 515 toctree['rawentries'] += [src[0], issues[0]] 516 517 518def find_first_toctree(doctree: Document) -> Element | None: 519 """Finds the first `toctree` (table of contents tree) node in a `Document`. 520 521 Args: 522 doctree: 523 The content of a doc, represented as a tree of Docutils nodes. 524 525 Returns: 526 The first `toctree` node found in `doctree` or `None` if none was 527 found. 528 """ 529 for node in doctree.traverse(nodes.Element): 530 if node.tagname == 'toctree': 531 return node 532 return None 533 534 535def parse_module_name(docname: str) -> str: 536 """Extracts a Pigweed module name from a Sphinx docname. 537 538 Preconditions: 539 `docname` is assumed to start with `pw_`. I.e. the docs are assumed to 540 have a flat directory structure, where the first directory is the name 541 of a Pigweed module. 542 543 Args: 544 docname: 545 Basically the relative path to the doc, except `.rst` is omitted 546 from the filename. E.g. `pw_string/docs`. 547 548 Returns: 549 Just the Pigweed module name, e.g. `pw_string`. 550 """ 551 tokens = docname.split('/') 552 return tokens[0] 553 554 555def on_doctree_read(app: Sphinx, doctree: Document) -> None: 556 """Event handler that enables manipulating a doc's Docutils tree. 557 558 Sphinx fires this listener after it has parsed a doc's reStructuredText 559 into a tree of Docutils nodes. The listener fires once for each doc that's 560 processed. 561 562 In general, this stage of the Sphinx event lifecycle can only be used for 563 content changes that do not affect the Sphinx build environment [1]. For 564 example, creating a `toctree` node at this stage does not work, but 565 inserting links into a pre-existing `toctree` node is OK. 566 567 Args: 568 app: 569 Our Sphinx docs build system. 570 doctree: 571 The doc content, structured as a tree. 572 573 Returns: 574 `None`. The main modifications happen in-place in `doctree`. 575 576 [1] See link in `on_source_read()` 577 """ 578 docname = app.env.docname 579 if not is_module_homepage(docname): 580 return 581 toctree = find_first_toctree(doctree) 582 if toctree is None: 583 # `add_toctree_to_module_homepage()` should ensure that every 584 # `pw_*/docs.rst` file has a `toctree` node but if something went wrong 585 # then we should bail. 586 sys.exit(f'[module_metadata.py] error: toctree missing in {docname}') 587 module_name = parse_module_name(docname) 588 add_links(module_name, toctree) 589 590 591def is_module_homepage(docname: str) -> bool: 592 """Determines if a doc is a module homepage. 593 594 Any doc that matches the pattern `pw_*/docs.rst` is considered a module 595 homepage. Watch out for the false positive of `pw_*/*/docs.rst`. 596 597 Preconditions: 598 `docname` is assumed to start with `pw_`. I.e. the docs are assumed to 599 have a flat directory structure, where the first directory is the name 600 of a Pigweed module. 601 602 Args: 603 docname: 604 Basically the relative path to the doc, except `.rst` is omitted 605 from the filename. 606 607 Returns: 608 `True` if the doc is a module homepage, else `False`. 609 """ 610 tokens = docname.split('/') 611 if len(tokens) != 2: 612 return False 613 if not tokens[0].startswith('pw_'): 614 return False 615 if tokens[1] != 'docs': 616 return False 617 return True 618 619 620def add_toctree_to_module_homepage(docname: str, source: str) -> str: 621 """Appends an empty `toctree` to a module homepage. 622 623 Note that this function only needs to create the `toctree` node; it doesn't 624 need to fully populate the `toctree`. Inserting links later via the more 625 ergonomic Docutils API works fine. 626 627 Args: 628 docname: 629 Basically the relative path to `source`, except `.rst` is omitted 630 from the filename. 631 source: 632 The reStructuredText source code of `docname`. 633 634 Returns: 635 For module homepages that did not already have a `toctree`, the 636 original contents of `source` plus an empty `toctree` is returned. 637 For all other cases, the original contents of `source` are returned 638 with no modification. 639 """ 640 # Don't do anything if the page is not a module homepage, i.e. its 641 # `docname` doesn't match the pattern `pw_*`/docs`. 642 if not is_module_homepage(docname): 643 return source 644 # Don't do anything if the module homepage already has a `toctree`. 645 if '.. toctree::' in source: 646 return source 647 # Append an empty `toctree` to the content. 648 # yapf: disable 649 return ( 650 f'{source}\n\n' 651 '.. toctree::\n' 652 ' :hidden:\n' 653 ' :maxdepth: 1\n' 654 ) 655 # yapf: enable 656 # Python formatting (yapf) is disabled in the return statement because the 657 # formatter tries to change it to a less-readable single line string. 658 659 660# inclusive-language: disable 661def on_source_read( 662 app: Sphinx, # pylint: disable=unused-argument 663 docname: str, 664 source: list[str], 665) -> None: 666 """Event handler that enables manipulating a doc's reStructuredText. 667 668 Sphinx fires this event early in its event lifecycle [1], before it has 669 converted a doc's reStructuredText (reST) into a tree of Docutils nodes. 670 The listener fires once for each doc that's processed. 671 672 This is the place to make docs changes that have to propagate across the 673 site. Take our use case of adding a link in the site nav to each module's 674 source code. To do this we need a `toctree` (table of contents tree) node 675 on each module's homepage; the `toctree` is where we insert the source code 676 link. If we try to dynamically insert the `toctree` node via the Docutils 677 API later in the event lifecycle, e.g. during the `doctree-read` event, we 678 have to do a bunch of complex and fragile logic to make the Sphinx build 679 environment [2] aware of the new node. It's simpler and more reliable to 680 just insert a `.. toctree::` directive into the doc source before Sphinx 681 has processed the doc and then let Sphinx create its build environment as 682 it normally does. We just have to make sure the reStructuredText we're 683 injecting into the content is syntactically correct. 684 685 Args: 686 app: 687 Our Sphinx docs build system. 688 docname: 689 Basically the relative path to `source`, except `.rst` is omitted 690 from the filename. 691 source: 692 The reStructuredText source code of `docname`. 693 694 Returns: 695 None. `source` is modified in-place. 696 697 [1] www.sphinx-doc.org/en/master/extdev/appapi.html#sphinx-core-events 698 [2] www.sphinx-doc.org/en/master/extdev/envapi.html 699 """ 700 # inclusive-language: enable 701 # If a module homepage doesn't have a `toctree`, add one. 702 source[0] = add_toctree_to_module_homepage(docname, source[0]) 703 704 705def setup(app: Sphinx) -> dict[str, bool]: 706 """Hooks the extension into our Sphinx docs build system. 707 708 This runs only once per docs build. 709 710 Args: 711 app: 712 Our Sphinx docs build system. 713 714 Returns: 715 A dict that provides Sphinx info about our extension. 716 """ 717 # Register the `.. pigweed-module::` and `.. pigweed-module-subpage::` 718 # directives that are used on `pw_*/*.rst` pages. 719 app.add_directive('pigweed-module', PigweedModuleDirective) 720 app.add_directive('pigweed-module-subpage', PigweedModuleSubpageDirective) 721 # inclusive-language: disable 722 # Register the Sphinx event listeners that automatically generate content 723 # for `pw_*/*.rst` pages: 724 # www.sphinx-doc.org/en/master/extdev/appapi.html#sphinx-core-events 725 # inclusive-language: enable 726 app.connect('source-read', on_source_read) 727 app.connect('doctree-read', on_doctree_read) 728 app.connect('html-page-context', on_html_page_context) 729 return { 730 'parallel_read_safe': True, 731 'parallel_write_safe': True, 732 } 733