1 2""" 3yaml.py 4 5Lexer for YAML, a human-friendly data serialization language 6(http://yaml.org/). 7 8Written by Kirill Simonov <xi@resolvent.net>. 9 10License: Whatever suitable for inclusion into the Pygments package. 11""" 12 13from pygments.lexer import \ 14 ExtendedRegexLexer, LexerContext, include, bygroups 15from pygments.token import \ 16 Text, Comment, Punctuation, Name, Literal 17 18__all__ = ['YAMLLexer'] 19 20 21class YAMLLexerContext(LexerContext): 22 """Indentation context for the YAML lexer.""" 23 24 def __init__(self, *args, **kwds): 25 super(YAMLLexerContext, self).__init__(*args, **kwds) 26 self.indent_stack = [] 27 self.indent = -1 28 self.next_indent = 0 29 self.block_scalar_indent = None 30 31 32def something(TokenClass): 33 """Do not produce empty tokens.""" 34 def callback(lexer, match, context): 35 text = match.group() 36 if not text: 37 return 38 yield match.start(), TokenClass, text 39 context.pos = match.end() 40 return callback 41 42def reset_indent(TokenClass): 43 """Reset the indentation levels.""" 44 def callback(lexer, match, context): 45 text = match.group() 46 context.indent_stack = [] 47 context.indent = -1 48 context.next_indent = 0 49 context.block_scalar_indent = None 50 yield match.start(), TokenClass, text 51 context.pos = match.end() 52 return callback 53 54def save_indent(TokenClass, start=False): 55 """Save a possible indentation level.""" 56 def callback(lexer, match, context): 57 text = match.group() 58 extra = '' 59 if start: 60 context.next_indent = len(text) 61 if context.next_indent < context.indent: 62 while context.next_indent < context.indent: 63 context.indent = context.indent_stack.pop() 64 if context.next_indent > context.indent: 65 extra = text[context.indent:] 66 text = text[:context.indent] 67 else: 68 context.next_indent += len(text) 69 if text: 70 yield match.start(), TokenClass, text 71 if extra: 72 yield match.start()+len(text), TokenClass.Error, extra 73 context.pos = match.end() 74 return callback 75 76def set_indent(TokenClass, implicit=False): 77 """Set the previously saved indentation level.""" 78 def callback(lexer, match, context): 79 text = match.group() 80 if context.indent < context.next_indent: 81 context.indent_stack.append(context.indent) 82 context.indent = context.next_indent 83 if not implicit: 84 context.next_indent += len(text) 85 yield match.start(), TokenClass, text 86 context.pos = match.end() 87 return callback 88 89def set_block_scalar_indent(TokenClass): 90 """Set an explicit indentation level for a block scalar.""" 91 def callback(lexer, match, context): 92 text = match.group() 93 context.block_scalar_indent = None 94 if not text: 95 return 96 increment = match.group(1) 97 if increment: 98 current_indent = max(context.indent, 0) 99 increment = int(increment) 100 context.block_scalar_indent = current_indent + increment 101 if text: 102 yield match.start(), TokenClass, text 103 context.pos = match.end() 104 return callback 105 106def parse_block_scalar_empty_line(IndentTokenClass, ContentTokenClass): 107 """Process an empty line in a block scalar.""" 108 def callback(lexer, match, context): 109 text = match.group() 110 if (context.block_scalar_indent is None or 111 len(text) <= context.block_scalar_indent): 112 if text: 113 yield match.start(), IndentTokenClass, text 114 else: 115 indentation = text[:context.block_scalar_indent] 116 content = text[context.block_scalar_indent:] 117 yield match.start(), IndentTokenClass, indentation 118 yield (match.start()+context.block_scalar_indent, 119 ContentTokenClass, content) 120 context.pos = match.end() 121 return callback 122 123def parse_block_scalar_indent(TokenClass): 124 """Process indentation spaces in a block scalar.""" 125 def callback(lexer, match, context): 126 text = match.group() 127 if context.block_scalar_indent is None: 128 if len(text) <= max(context.indent, 0): 129 context.stack.pop() 130 context.stack.pop() 131 return 132 context.block_scalar_indent = len(text) 133 else: 134 if len(text) < context.block_scalar_indent: 135 context.stack.pop() 136 context.stack.pop() 137 return 138 if text: 139 yield match.start(), TokenClass, text 140 context.pos = match.end() 141 return callback 142 143def parse_plain_scalar_indent(TokenClass): 144 """Process indentation spaces in a plain scalar.""" 145 def callback(lexer, match, context): 146 text = match.group() 147 if len(text) <= context.indent: 148 context.stack.pop() 149 context.stack.pop() 150 return 151 if text: 152 yield match.start(), TokenClass, text 153 context.pos = match.end() 154 return callback 155 156 157class YAMLLexer(ExtendedRegexLexer): 158 """Lexer for the YAML language.""" 159 160 name = 'YAML' 161 aliases = ['yaml'] 162 filenames = ['*.yaml', '*.yml'] 163 mimetypes = ['text/x-yaml'] 164 165 tokens = { 166 167 # the root rules 168 'root': [ 169 # ignored whitespaces 170 (r'[ ]+(?=#|$)', Text.Blank), 171 # line breaks 172 (r'\n+', Text.Break), 173 # a comment 174 (r'#[^\n]*', Comment.Single), 175 # the '%YAML' directive 176 (r'^%YAML(?=[ ]|$)', reset_indent(Name.Directive), 177 'yaml-directive'), 178 # the %TAG directive 179 (r'^%TAG(?=[ ]|$)', reset_indent(Name.Directive), 180 'tag-directive'), 181 # document start and document end indicators 182 (r'^(?:---|\.\.\.)(?=[ ]|$)', 183 reset_indent(Punctuation.Document), 'block-line'), 184 # indentation spaces 185 (r'[ ]*(?![ \t\n\r\f\v]|$)', 186 save_indent(Text.Indent, start=True), 187 ('block-line', 'indentation')), 188 ], 189 190 # trailing whitespaces after directives or a block scalar indicator 191 'ignored-line': [ 192 # ignored whitespaces 193 (r'[ ]+(?=#|$)', Text.Blank), 194 # a comment 195 (r'#[^\n]*', Comment.Single), 196 # line break 197 (r'\n', Text.Break, '#pop:2'), 198 ], 199 200 # the %YAML directive 201 'yaml-directive': [ 202 # the version number 203 (r'([ ]+)([0-9]+\.[0-9]+)', 204 bygroups(Text.Blank, Literal.Version), 'ignored-line'), 205 ], 206 207 # the %YAG directive 208 'tag-directive': [ 209 # a tag handle and the corresponding prefix 210 (r'([ ]+)(!|![0-9A-Za-z_-]*!)' 211 r'([ ]+)(!|!?[0-9A-Za-z;/?:@&=+$,_.!~*\'()\[\]%-]+)', 212 bygroups(Text.Blank, Name.Type, Text.Blank, Name.Type), 213 'ignored-line'), 214 ], 215 216 # block scalar indicators and indentation spaces 217 'indentation': [ 218 # trailing whitespaces are ignored 219 (r'[ ]*$', something(Text.Blank), '#pop:2'), 220 # whitespaces preceding block collection indicators 221 (r'[ ]+(?=[?:-](?:[ ]|$))', save_indent(Text.Indent)), 222 # block collection indicators 223 (r'[?:-](?=[ ]|$)', set_indent(Punctuation.Indicator)), 224 # the beginning a block line 225 (r'[ ]*', save_indent(Text.Indent), '#pop'), 226 ], 227 228 # an indented line in the block context 229 'block-line': [ 230 # the line end 231 (r'[ ]*(?=#|$)', something(Text.Blank), '#pop'), 232 # whitespaces separating tokens 233 (r'[ ]+', Text.Blank), 234 # tags, anchors and aliases, 235 include('descriptors'), 236 # block collections and scalars 237 include('block-nodes'), 238 # flow collections and quoted scalars 239 include('flow-nodes'), 240 # a plain scalar 241 (r'(?=[^ \t\n\r\f\v?:,\[\]{}#&*!|>\'"%@`-]|[?:-][^ \t\n\r\f\v])', 242 something(Literal.Scalar.Plain), 243 'plain-scalar-in-block-context'), 244 ], 245 246 # tags, anchors, aliases 247 'descriptors' : [ 248 # a full-form tag 249 (r'!<[0-9A-Za-z;/?:@&=+$,_.!~*\'()\[\]%-]+>', Name.Type), 250 # a tag in the form '!', '!suffix' or '!handle!suffix' 251 (r'!(?:[0-9A-Za-z_-]+)?' 252 r'(?:![0-9A-Za-z;/?:@&=+$,_.!~*\'()\[\]%-]+)?', Name.Type), 253 # an anchor 254 (r'&[0-9A-Za-z_-]+', Name.Anchor), 255 # an alias 256 (r'\*[0-9A-Za-z_-]+', Name.Alias), 257 ], 258 259 # block collections and scalars 260 'block-nodes': [ 261 # implicit key 262 (r':(?=[ ]|$)', set_indent(Punctuation.Indicator, implicit=True)), 263 # literal and folded scalars 264 (r'[|>]', Punctuation.Indicator, 265 ('block-scalar-content', 'block-scalar-header')), 266 ], 267 268 # flow collections and quoted scalars 269 'flow-nodes': [ 270 # a flow sequence 271 (r'\[', Punctuation.Indicator, 'flow-sequence'), 272 # a flow mapping 273 (r'\{', Punctuation.Indicator, 'flow-mapping'), 274 # a single-quoted scalar 275 (r'\'', Literal.Scalar.Flow.Quote, 'single-quoted-scalar'), 276 # a double-quoted scalar 277 (r'\"', Literal.Scalar.Flow.Quote, 'double-quoted-scalar'), 278 ], 279 280 # the content of a flow collection 281 'flow-collection': [ 282 # whitespaces 283 (r'[ ]+', Text.Blank), 284 # line breaks 285 (r'\n+', Text.Break), 286 # a comment 287 (r'#[^\n]*', Comment.Single), 288 # simple indicators 289 (r'[?:,]', Punctuation.Indicator), 290 # tags, anchors and aliases 291 include('descriptors'), 292 # nested collections and quoted scalars 293 include('flow-nodes'), 294 # a plain scalar 295 (r'(?=[^ \t\n\r\f\v?:,\[\]{}#&*!|>\'"%@`])', 296 something(Literal.Scalar.Plain), 297 'plain-scalar-in-flow-context'), 298 ], 299 300 # a flow sequence indicated by '[' and ']' 301 'flow-sequence': [ 302 # include flow collection rules 303 include('flow-collection'), 304 # the closing indicator 305 (r'\]', Punctuation.Indicator, '#pop'), 306 ], 307 308 # a flow mapping indicated by '{' and '}' 309 'flow-mapping': [ 310 # include flow collection rules 311 include('flow-collection'), 312 # the closing indicator 313 (r'\}', Punctuation.Indicator, '#pop'), 314 ], 315 316 # block scalar lines 317 'block-scalar-content': [ 318 # line break 319 (r'\n', Text.Break), 320 # empty line 321 (r'^[ ]+$', 322 parse_block_scalar_empty_line(Text.Indent, 323 Literal.Scalar.Block)), 324 # indentation spaces (we may leave the state here) 325 (r'^[ ]*', parse_block_scalar_indent(Text.Indent)), 326 # line content 327 (r'[^\n\r\f\v]+', Literal.Scalar.Block), 328 ], 329 330 # the content of a literal or folded scalar 331 'block-scalar-header': [ 332 # indentation indicator followed by chomping flag 333 (r'([1-9])?[+-]?(?=[ ]|$)', 334 set_block_scalar_indent(Punctuation.Indicator), 335 'ignored-line'), 336 # chomping flag followed by indentation indicator 337 (r'[+-]?([1-9])?(?=[ ]|$)', 338 set_block_scalar_indent(Punctuation.Indicator), 339 'ignored-line'), 340 ], 341 342 # ignored and regular whitespaces in quoted scalars 343 'quoted-scalar-whitespaces': [ 344 # leading and trailing whitespaces are ignored 345 (r'^[ ]+|[ ]+$', Text.Blank), 346 # line breaks are ignored 347 (r'\n+', Text.Break), 348 # other whitespaces are a part of the value 349 (r'[ ]+', Literal.Scalar.Flow), 350 ], 351 352 # single-quoted scalars 353 'single-quoted-scalar': [ 354 # include whitespace and line break rules 355 include('quoted-scalar-whitespaces'), 356 # escaping of the quote character 357 (r'\'\'', Literal.Scalar.Flow.Escape), 358 # regular non-whitespace characters 359 (r'[^ \t\n\r\f\v\']+', Literal.Scalar.Flow), 360 # the closing quote 361 (r'\'', Literal.Scalar.Flow.Quote, '#pop'), 362 ], 363 364 # double-quoted scalars 365 'double-quoted-scalar': [ 366 # include whitespace and line break rules 367 include('quoted-scalar-whitespaces'), 368 # escaping of special characters 369 (r'\\[0abt\tn\nvfre "\\N_LP]', Literal.Scalar.Flow.Escape), 370 # escape codes 371 (r'\\(?:x[0-9A-Fa-f]{2}|u[0-9A-Fa-f]{4}|U[0-9A-Fa-f]{8})', 372 Literal.Scalar.Flow.Escape), 373 # regular non-whitespace characters 374 (r'[^ \t\n\r\f\v\"\\]+', Literal.Scalar.Flow), 375 # the closing quote 376 (r'"', Literal.Scalar.Flow.Quote, '#pop'), 377 ], 378 379 # the beginning of a new line while scanning a plain scalar 380 'plain-scalar-in-block-context-new-line': [ 381 # empty lines 382 (r'^[ ]+$', Text.Blank), 383 # line breaks 384 (r'\n+', Text.Break), 385 # document start and document end indicators 386 (r'^(?=---|\.\.\.)', something(Punctuation.Document), '#pop:3'), 387 # indentation spaces (we may leave the block line state here) 388 (r'^[ ]*', parse_plain_scalar_indent(Text.Indent), '#pop'), 389 ], 390 391 # a plain scalar in the block context 392 'plain-scalar-in-block-context': [ 393 # the scalar ends with the ':' indicator 394 (r'[ ]*(?=:[ ]|:$)', something(Text.Blank), '#pop'), 395 # the scalar ends with whitespaces followed by a comment 396 (r'[ ]+(?=#)', Text.Blank, '#pop'), 397 # trailing whitespaces are ignored 398 (r'[ ]+$', Text.Blank), 399 # line breaks are ignored 400 (r'\n+', Text.Break, 'plain-scalar-in-block-context-new-line'), 401 # other whitespaces are a part of the value 402 (r'[ ]+', Literal.Scalar.Plain), 403 # regular non-whitespace characters 404 (r'(?::(?![ \t\n\r\f\v])|[^ \t\n\r\f\v:])+', 405 Literal.Scalar.Plain), 406 ], 407 408 # a plain scalar is the flow context 409 'plain-scalar-in-flow-context': [ 410 # the scalar ends with an indicator character 411 (r'[ ]*(?=[,:?\[\]{}])', something(Text.Blank), '#pop'), 412 # the scalar ends with a comment 413 (r'[ ]+(?=#)', Text.Blank, '#pop'), 414 # leading and trailing whitespaces are ignored 415 (r'^[ ]+|[ ]+$', Text.Blank), 416 # line breaks are ignored 417 (r'\n+', Text.Break), 418 # other whitespaces are a part of the value 419 (r'[ ]+', Literal.Scalar.Plain), 420 # regular non-whitespace characters 421 (r'[^ \t\n\r\f\v,:?\[\]{}]+', Literal.Scalar.Plain), 422 ], 423 424 } 425 426 def get_tokens_unprocessed(self, text=None, context=None): 427 if context is None: 428 context = YAMLLexerContext(text, 0) 429 return super(YAMLLexer, self).get_tokens_unprocessed(text, context) 430 431 432