1""" 2CodeHilite Extension for Python-Markdown 3======================================== 4 5Adds code/syntax highlighting to standard Python-Markdown code blocks. 6 7See <https://Python-Markdown.github.io/extensions/code_hilite> 8for documentation. 9 10Original code Copyright 2006-2008 [Waylan Limberg](http://achinghead.com/). 11 12All changes Copyright 2008-2014 The Python Markdown Project 13 14License: [BSD](https://opensource.org/licenses/bsd-license.php) 15 16""" 17 18from . import Extension 19from ..treeprocessors import Treeprocessor 20from ..util import parseBoolValue 21 22try: # pragma: no cover 23 from pygments import highlight 24 from pygments.lexers import get_lexer_by_name, guess_lexer 25 from pygments.formatters import get_formatter_by_name 26 from pygments.util import ClassNotFound 27 pygments = True 28except ImportError: # pragma: no cover 29 pygments = False 30 31 32def parse_hl_lines(expr): 33 """Support our syntax for emphasizing certain lines of code. 34 35 expr should be like '1 2' to emphasize lines 1 and 2 of a code block. 36 Returns a list of ints, the line numbers to emphasize. 37 """ 38 if not expr: 39 return [] 40 41 try: 42 return list(map(int, expr.split())) 43 except ValueError: # pragma: no cover 44 return [] 45 46 47# ------------------ The Main CodeHilite Class ---------------------- 48class CodeHilite: 49 """ 50 Determine language of source code, and pass it on to the Pygments highlighter. 51 52 Usage: 53 code = CodeHilite(src=some_code, lang='python') 54 html = code.hilite() 55 56 Arguments: 57 * src: Source string or any object with a .readline attribute. 58 59 * lang: String name of Pygments lexer to use for highlighting. Default: `None`. 60 61 * guess_lang: Auto-detect which lexer to use. Ignored if `lang` is set to a valid 62 value. Default: `True`. 63 64 * use_pygments: Pass code to pygments for code highlighting. If `False`, the code is 65 instead wrapped for highlighting by a JavaScript library. Default: `True`. 66 67 * pygments_formatter: The name of a Pygments formatter or a formatter class used for 68 highlighting the code blocks. Default: `html`. 69 70 * linenums: An alias to Pygments `linenos` formatter option. Default: `None`. 71 72 * css_class: An alias to Pygments `cssclass` formatter option. Default: 'codehilite'. 73 74 * lang_prefix: Prefix prepended to the language. Default: "language-". 75 76 Other Options: 77 Any other options are accepted and passed on to the lexer and formatter. Therefore, 78 valid options include any options which are accepted by the `html` formatter or 79 whichever lexer the code's language uses. Note that most lexers do not have any 80 options. However, a few have very useful options, such as PHP's `startinline` option. 81 Any invalid options are ignored without error. 82 83 Formatter options: https://pygments.org/docs/formatters/#HtmlFormatter 84 Lexer Options: https://pygments.org/docs/lexers/ 85 86 Additionally, when Pygments is enabled, the code's language is passed to the 87 formatter as an extra option `lang_str`, whose value being `{lang_prefix}{lang}`. 88 This option has no effect to the Pygments's builtin formatters. 89 90 Advanced Usage: 91 code = CodeHilite( 92 src = some_code, 93 lang = 'php', 94 startinline = True, # Lexer option. Snippet does not start with `<?php`. 95 linenostart = 42, # Formatter option. Snippet starts on line 42. 96 hl_lines = [45, 49, 50], # Formatter option. Highlight lines 45, 49, and 50. 97 linenos = 'inline' # Formatter option. Avoid alignment problems. 98 ) 99 html = code.hilite() 100 101 """ 102 103 def __init__(self, src, **options): 104 self.src = src 105 self.lang = options.pop('lang', None) 106 self.guess_lang = options.pop('guess_lang', True) 107 self.use_pygments = options.pop('use_pygments', True) 108 self.lang_prefix = options.pop('lang_prefix', 'language-') 109 self.pygments_formatter = options.pop('pygments_formatter', 'html') 110 111 if 'linenos' not in options: 112 options['linenos'] = options.pop('linenums', None) 113 if 'cssclass' not in options: 114 options['cssclass'] = options.pop('css_class', 'codehilite') 115 if 'wrapcode' not in options: 116 # Override pygments default 117 options['wrapcode'] = True 118 # Disallow use of `full` option 119 options['full'] = False 120 121 self.options = options 122 123 def hilite(self, shebang=True): 124 """ 125 Pass code to the [Pygments](http://pygments.pocoo.org/) highliter with 126 optional line numbers. The output should then be styled with css to 127 your liking. No styles are applied by default - only styling hooks 128 (i.e.: <span class="k">). 129 130 returns : A string of html. 131 132 """ 133 134 self.src = self.src.strip('\n') 135 136 if self.lang is None and shebang: 137 self._parseHeader() 138 139 if pygments and self.use_pygments: 140 try: 141 lexer = get_lexer_by_name(self.lang, **self.options) 142 except ValueError: 143 try: 144 if self.guess_lang: 145 lexer = guess_lexer(self.src, **self.options) 146 else: 147 lexer = get_lexer_by_name('text', **self.options) 148 except ValueError: # pragma: no cover 149 lexer = get_lexer_by_name('text', **self.options) 150 if not self.lang: 151 # Use the guessed lexer's language instead 152 self.lang = lexer.aliases[0] 153 lang_str = f'{self.lang_prefix}{self.lang}' 154 if isinstance(self.pygments_formatter, str): 155 try: 156 formatter = get_formatter_by_name(self.pygments_formatter, **self.options) 157 except ClassNotFound: 158 formatter = get_formatter_by_name('html', **self.options) 159 else: 160 formatter = self.pygments_formatter(lang_str=lang_str, **self.options) 161 return highlight(self.src, lexer, formatter) 162 else: 163 # just escape and build markup usable by JS highlighting libs 164 txt = self.src.replace('&', '&') 165 txt = txt.replace('<', '<') 166 txt = txt.replace('>', '>') 167 txt = txt.replace('"', '"') 168 classes = [] 169 if self.lang: 170 classes.append('{}{}'.format(self.lang_prefix, self.lang)) 171 if self.options['linenos']: 172 classes.append('linenums') 173 class_str = '' 174 if classes: 175 class_str = ' class="{}"'.format(' '.join(classes)) 176 return '<pre class="{}"><code{}>{}\n</code></pre>\n'.format( 177 self.options['cssclass'], 178 class_str, 179 txt 180 ) 181 182 def _parseHeader(self): 183 """ 184 Determines language of a code block from shebang line and whether the 185 said line should be removed or left in place. If the sheband line 186 contains a path (even a single /) then it is assumed to be a real 187 shebang line and left alone. However, if no path is given 188 (e.i.: #!python or :::python) then it is assumed to be a mock shebang 189 for language identification of a code fragment and removed from the 190 code block prior to processing for code highlighting. When a mock 191 shebang (e.i: #!python) is found, line numbering is turned on. When 192 colons are found in place of a shebang (e.i.: :::python), line 193 numbering is left in the current state - off by default. 194 195 Also parses optional list of highlight lines, like: 196 197 :::python hl_lines="1 3" 198 """ 199 200 import re 201 202 # split text into lines 203 lines = self.src.split("\n") 204 # pull first line to examine 205 fl = lines.pop(0) 206 207 c = re.compile(r''' 208 (?:(?:^::+)|(?P<shebang>^[#]!)) # Shebang or 2 or more colons 209 (?P<path>(?:/\w+)*[/ ])? # Zero or 1 path 210 (?P<lang>[\w#.+-]*) # The language 211 \s* # Arbitrary whitespace 212 # Optional highlight lines, single- or double-quote-delimited 213 (hl_lines=(?P<quot>"|')(?P<hl_lines>.*?)(?P=quot))? 214 ''', re.VERBOSE) 215 # search first line for shebang 216 m = c.search(fl) 217 if m: 218 # we have a match 219 try: 220 self.lang = m.group('lang').lower() 221 except IndexError: # pragma: no cover 222 self.lang = None 223 if m.group('path'): 224 # path exists - restore first line 225 lines.insert(0, fl) 226 if self.options['linenos'] is None and m.group('shebang'): 227 # Overridable and Shebang exists - use line numbers 228 self.options['linenos'] = True 229 230 self.options['hl_lines'] = parse_hl_lines(m.group('hl_lines')) 231 else: 232 # No match 233 lines.insert(0, fl) 234 235 self.src = "\n".join(lines).strip("\n") 236 237 238# ------------------ The Markdown Extension ------------------------------- 239 240 241class HiliteTreeprocessor(Treeprocessor): 242 """ Highlight source code in code blocks. """ 243 244 def code_unescape(self, text): 245 """Unescape code.""" 246 text = text.replace("<", "<") 247 text = text.replace(">", ">") 248 # Escaped '&' should be replaced at the end to avoid 249 # conflicting with < and >. 250 text = text.replace("&", "&") 251 return text 252 253 def run(self, root): 254 """ Find code blocks and store in htmlStash. """ 255 blocks = root.iter('pre') 256 for block in blocks: 257 if len(block) == 1 and block[0].tag == 'code': 258 local_config = self.config.copy() 259 code = CodeHilite( 260 self.code_unescape(block[0].text), 261 tab_length=self.md.tab_length, 262 style=local_config.pop('pygments_style', 'default'), 263 **local_config 264 ) 265 placeholder = self.md.htmlStash.store(code.hilite()) 266 # Clear codeblock in etree instance 267 block.clear() 268 # Change to p element which will later 269 # be removed when inserting raw html 270 block.tag = 'p' 271 block.text = placeholder 272 273 274class CodeHiliteExtension(Extension): 275 """ Add source code highlighting to markdown codeblocks. """ 276 277 def __init__(self, **kwargs): 278 # define default configs 279 self.config = { 280 'linenums': [None, 281 "Use lines numbers. True|table|inline=yes, False=no, None=auto"], 282 'guess_lang': [True, 283 "Automatic language detection - Default: True"], 284 'css_class': ["codehilite", 285 "Set class name for wrapper <div> - " 286 "Default: codehilite"], 287 'pygments_style': ['default', 288 'Pygments HTML Formatter Style ' 289 '(Colorscheme) - Default: default'], 290 'noclasses': [False, 291 'Use inline styles instead of CSS classes - ' 292 'Default false'], 293 'use_pygments': [True, 294 'Use Pygments to Highlight code blocks. ' 295 'Disable if using a JavaScript library. ' 296 'Default: True'], 297 'lang_prefix': [ 298 'language-', 299 'Prefix prepended to the language when use_pygments is false. Default: "language-"' 300 ], 301 'pygments_formatter': ['html', 302 'Use a specific formatter for Pygments highlighting.' 303 'Default: "html"', 304 ], 305 } 306 307 for key, value in kwargs.items(): 308 if key in self.config: 309 self.setConfig(key, value) 310 else: 311 # manually set unknown keywords. 312 if isinstance(value, str): 313 try: 314 # Attempt to parse str as a bool value 315 value = parseBoolValue(value, preserve_none=True) 316 except ValueError: 317 pass # Assume it's not a bool value. Use as-is. 318 self.config[key] = [value, ''] 319 320 def extendMarkdown(self, md): 321 """ Add HilitePostprocessor to Markdown instance. """ 322 hiliter = HiliteTreeprocessor(md) 323 hiliter.config = self.getConfigs() 324 md.treeprocessors.register(hiliter, 'hilite', 30) 325 326 md.registerExtension(self) 327 328 329def makeExtension(**kwargs): # pragma: no cover 330 return CodeHiliteExtension(**kwargs) 331