• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1"""
2CodeHilite Extension for Python-Markdown
3========================================
4
5Adds code/syntax highlighting to standard Python-Markdown code blocks.
6
7See <https://Python-Markdown.github.io/extensions/code_hilite>
8for documentation.
9
10Original code Copyright 2006-2008 [Waylan Limberg](http://achinghead.com/).
11
12All changes Copyright 2008-2014 The Python Markdown Project
13
14License: [BSD](https://opensource.org/licenses/bsd-license.php)
15
16"""
17
18from . import Extension
19from ..treeprocessors import Treeprocessor
20from ..util import parseBoolValue
21
22try:  # pragma: no cover
23    from pygments import highlight
24    from pygments.lexers import get_lexer_by_name, guess_lexer
25    from pygments.formatters import get_formatter_by_name
26    from pygments.util import ClassNotFound
27    pygments = True
28except ImportError:  # pragma: no cover
29    pygments = False
30
31
32def parse_hl_lines(expr):
33    """Support our syntax for emphasizing certain lines of code.
34
35    expr should be like '1 2' to emphasize lines 1 and 2 of a code block.
36    Returns a list of ints, the line numbers to emphasize.
37    """
38    if not expr:
39        return []
40
41    try:
42        return list(map(int, expr.split()))
43    except ValueError:  # pragma: no cover
44        return []
45
46
47# ------------------ The Main CodeHilite Class ----------------------
48class CodeHilite:
49    """
50    Determine language of source code, and pass it on to the Pygments highlighter.
51
52    Usage:
53        code = CodeHilite(src=some_code, lang='python')
54        html = code.hilite()
55
56    Arguments:
57    * src: Source string or any object with a .readline attribute.
58
59    * lang: String name of Pygments lexer to use for highlighting. Default: `None`.
60
61    * guess_lang: Auto-detect which lexer to use. Ignored if `lang` is set to a valid
62      value. Default: `True`.
63
64    * use_pygments: Pass code to pygments for code highlighting. If `False`, the code is
65      instead wrapped for highlighting by a JavaScript library. Default: `True`.
66
67    * pygments_formatter: The name of a Pygments formatter or a formatter class used for
68      highlighting the code blocks. Default: `html`.
69
70    * linenums: An alias to Pygments `linenos` formatter option. Default: `None`.
71
72    * css_class: An alias to Pygments `cssclass` formatter option. Default: 'codehilite'.
73
74    * lang_prefix: Prefix prepended to the language. Default: "language-".
75
76    Other Options:
77    Any other options are accepted and passed on to the lexer and formatter. Therefore,
78    valid options include any options which are accepted by the `html` formatter or
79    whichever lexer the code's language uses. Note that most lexers do not have any
80    options. However, a few have very useful options, such as PHP's `startinline` option.
81    Any invalid options are ignored without error.
82
83    Formatter options: https://pygments.org/docs/formatters/#HtmlFormatter
84    Lexer Options: https://pygments.org/docs/lexers/
85
86    Additionally, when Pygments is enabled, the code's language is passed to the
87    formatter as an extra option `lang_str`, whose value being `{lang_prefix}{lang}`.
88    This option has no effect to the Pygments's builtin formatters.
89
90    Advanced Usage:
91        code = CodeHilite(
92            src = some_code,
93            lang = 'php',
94            startinline = True,      # Lexer option. Snippet does not start with `<?php`.
95            linenostart = 42,        # Formatter option. Snippet starts on line 42.
96            hl_lines = [45, 49, 50], # Formatter option. Highlight lines 45, 49, and 50.
97            linenos = 'inline'       # Formatter option. Avoid alignment problems.
98        )
99        html = code.hilite()
100
101    """
102
103    def __init__(self, src, **options):
104        self.src = src
105        self.lang = options.pop('lang', None)
106        self.guess_lang = options.pop('guess_lang', True)
107        self.use_pygments = options.pop('use_pygments', True)
108        self.lang_prefix = options.pop('lang_prefix', 'language-')
109        self.pygments_formatter = options.pop('pygments_formatter', 'html')
110
111        if 'linenos' not in options:
112            options['linenos'] = options.pop('linenums', None)
113        if 'cssclass' not in options:
114            options['cssclass'] = options.pop('css_class', 'codehilite')
115        if 'wrapcode' not in options:
116            # Override pygments default
117            options['wrapcode'] = True
118        # Disallow use of `full` option
119        options['full'] = False
120
121        self.options = options
122
123    def hilite(self, shebang=True):
124        """
125        Pass code to the [Pygments](http://pygments.pocoo.org/) highliter with
126        optional line numbers. The output should then be styled with css to
127        your liking. No styles are applied by default - only styling hooks
128        (i.e.: <span class="k">).
129
130        returns : A string of html.
131
132        """
133
134        self.src = self.src.strip('\n')
135
136        if self.lang is None and shebang:
137            self._parseHeader()
138
139        if pygments and self.use_pygments:
140            try:
141                lexer = get_lexer_by_name(self.lang, **self.options)
142            except ValueError:
143                try:
144                    if self.guess_lang:
145                        lexer = guess_lexer(self.src, **self.options)
146                    else:
147                        lexer = get_lexer_by_name('text', **self.options)
148                except ValueError:  # pragma: no cover
149                    lexer = get_lexer_by_name('text', **self.options)
150            if not self.lang:
151                # Use the guessed lexer's language instead
152                self.lang = lexer.aliases[0]
153            lang_str = f'{self.lang_prefix}{self.lang}'
154            if isinstance(self.pygments_formatter, str):
155                try:
156                    formatter = get_formatter_by_name(self.pygments_formatter, **self.options)
157                except ClassNotFound:
158                    formatter = get_formatter_by_name('html', **self.options)
159            else:
160                formatter = self.pygments_formatter(lang_str=lang_str, **self.options)
161            return highlight(self.src, lexer, formatter)
162        else:
163            # just escape and build markup usable by JS highlighting libs
164            txt = self.src.replace('&', '&amp;')
165            txt = txt.replace('<', '&lt;')
166            txt = txt.replace('>', '&gt;')
167            txt = txt.replace('"', '&quot;')
168            classes = []
169            if self.lang:
170                classes.append('{}{}'.format(self.lang_prefix, self.lang))
171            if self.options['linenos']:
172                classes.append('linenums')
173            class_str = ''
174            if classes:
175                class_str = ' class="{}"'.format(' '.join(classes))
176            return '<pre class="{}"><code{}>{}\n</code></pre>\n'.format(
177                self.options['cssclass'],
178                class_str,
179                txt
180            )
181
182    def _parseHeader(self):
183        """
184        Determines language of a code block from shebang line and whether the
185        said line should be removed or left in place. If the sheband line
186        contains a path (even a single /) then it is assumed to be a real
187        shebang line and left alone. However, if no path is given
188        (e.i.: #!python or :::python) then it is assumed to be a mock shebang
189        for language identification of a code fragment and removed from the
190        code block prior to processing for code highlighting. When a mock
191        shebang (e.i: #!python) is found, line numbering is turned on. When
192        colons are found in place of a shebang (e.i.: :::python), line
193        numbering is left in the current state - off by default.
194
195        Also parses optional list of highlight lines, like:
196
197            :::python hl_lines="1 3"
198        """
199
200        import re
201
202        # split text into lines
203        lines = self.src.split("\n")
204        # pull first line to examine
205        fl = lines.pop(0)
206
207        c = re.compile(r'''
208            (?:(?:^::+)|(?P<shebang>^[#]!)) # Shebang or 2 or more colons
209            (?P<path>(?:/\w+)*[/ ])?        # Zero or 1 path
210            (?P<lang>[\w#.+-]*)             # The language
211            \s*                             # Arbitrary whitespace
212            # Optional highlight lines, single- or double-quote-delimited
213            (hl_lines=(?P<quot>"|')(?P<hl_lines>.*?)(?P=quot))?
214            ''',  re.VERBOSE)
215        # search first line for shebang
216        m = c.search(fl)
217        if m:
218            # we have a match
219            try:
220                self.lang = m.group('lang').lower()
221            except IndexError:  # pragma: no cover
222                self.lang = None
223            if m.group('path'):
224                # path exists - restore first line
225                lines.insert(0, fl)
226            if self.options['linenos'] is None and m.group('shebang'):
227                # Overridable and Shebang exists - use line numbers
228                self.options['linenos'] = True
229
230            self.options['hl_lines'] = parse_hl_lines(m.group('hl_lines'))
231        else:
232            # No match
233            lines.insert(0, fl)
234
235        self.src = "\n".join(lines).strip("\n")
236
237
238# ------------------ The Markdown Extension -------------------------------
239
240
241class HiliteTreeprocessor(Treeprocessor):
242    """ Highlight source code in code blocks. """
243
244    def code_unescape(self, text):
245        """Unescape code."""
246        text = text.replace("&lt;", "<")
247        text = text.replace("&gt;", ">")
248        # Escaped '&' should be replaced at the end to avoid
249        # conflicting with < and >.
250        text = text.replace("&amp;", "&")
251        return text
252
253    def run(self, root):
254        """ Find code blocks and store in htmlStash. """
255        blocks = root.iter('pre')
256        for block in blocks:
257            if len(block) == 1 and block[0].tag == 'code':
258                local_config = self.config.copy()
259                code = CodeHilite(
260                    self.code_unescape(block[0].text),
261                    tab_length=self.md.tab_length,
262                    style=local_config.pop('pygments_style', 'default'),
263                    **local_config
264                )
265                placeholder = self.md.htmlStash.store(code.hilite())
266                # Clear codeblock in etree instance
267                block.clear()
268                # Change to p element which will later
269                # be removed when inserting raw html
270                block.tag = 'p'
271                block.text = placeholder
272
273
274class CodeHiliteExtension(Extension):
275    """ Add source code highlighting to markdown codeblocks. """
276
277    def __init__(self, **kwargs):
278        # define default configs
279        self.config = {
280            'linenums': [None,
281                         "Use lines numbers. True|table|inline=yes, False=no, None=auto"],
282            'guess_lang': [True,
283                           "Automatic language detection - Default: True"],
284            'css_class': ["codehilite",
285                          "Set class name for wrapper <div> - "
286                          "Default: codehilite"],
287            'pygments_style': ['default',
288                               'Pygments HTML Formatter Style '
289                               '(Colorscheme) - Default: default'],
290            'noclasses': [False,
291                          'Use inline styles instead of CSS classes - '
292                          'Default false'],
293            'use_pygments': [True,
294                             'Use Pygments to Highlight code blocks. '
295                             'Disable if using a JavaScript library. '
296                             'Default: True'],
297            'lang_prefix': [
298                'language-',
299                'Prefix prepended to the language when use_pygments is false. Default: "language-"'
300            ],
301            'pygments_formatter': ['html',
302                                   'Use a specific formatter for Pygments highlighting.'
303                                   'Default: "html"',
304                                   ],
305            }
306
307        for key, value in kwargs.items():
308            if key in self.config:
309                self.setConfig(key, value)
310            else:
311                # manually set unknown keywords.
312                if isinstance(value, str):
313                    try:
314                        # Attempt to parse str as a bool value
315                        value = parseBoolValue(value, preserve_none=True)
316                    except ValueError:
317                        pass  # Assume it's not a bool value. Use as-is.
318                self.config[key] = [value, '']
319
320    def extendMarkdown(self, md):
321        """ Add HilitePostprocessor to Markdown instance. """
322        hiliter = HiliteTreeprocessor(md)
323        hiliter.config = self.getConfigs()
324        md.treeprocessors.register(hiliter, 'hilite', 30)
325
326        md.registerExtension(self)
327
328
329def makeExtension(**kwargs):  # pragma: no cover
330    return CodeHiliteExtension(**kwargs)
331