1 2# Emitter expects events obeying the following grammar: 3# stream ::= STREAM-START document* STREAM-END 4# document ::= DOCUMENT-START node DOCUMENT-END 5# node ::= SCALAR | sequence | mapping 6# sequence ::= SEQUENCE-START node* SEQUENCE-END 7# mapping ::= MAPPING-START (node node)* MAPPING-END 8 9__all__ = ['Emitter', 'EmitterError'] 10 11import sys 12 13from error import YAMLError 14from events import * 15 16has_ucs4 = sys.maxunicode > 0xffff 17 18class EmitterError(YAMLError): 19 pass 20 21class ScalarAnalysis(object): 22 def __init__(self, scalar, empty, multiline, 23 allow_flow_plain, allow_block_plain, 24 allow_single_quoted, allow_double_quoted, 25 allow_block): 26 self.scalar = scalar 27 self.empty = empty 28 self.multiline = multiline 29 self.allow_flow_plain = allow_flow_plain 30 self.allow_block_plain = allow_block_plain 31 self.allow_single_quoted = allow_single_quoted 32 self.allow_double_quoted = allow_double_quoted 33 self.allow_block = allow_block 34 35class Emitter(object): 36 37 DEFAULT_TAG_PREFIXES = { 38 u'!' : u'!', 39 u'tag:yaml.org,2002:' : u'!!', 40 } 41 42 def __init__(self, stream, canonical=None, indent=None, width=None, 43 allow_unicode=None, line_break=None): 44 45 # The stream should have the methods `write` and possibly `flush`. 46 self.stream = stream 47 48 # Encoding can be overridden by STREAM-START. 49 self.encoding = None 50 51 # Emitter is a state machine with a stack of states to handle nested 52 # structures. 53 self.states = [] 54 self.state = self.expect_stream_start 55 56 # Current event and the event queue. 57 self.events = [] 58 self.event = None 59 60 # The current indentation level and the stack of previous indents. 61 self.indents = [] 62 self.indent = None 63 64 # Flow level. 65 self.flow_level = 0 66 67 # Contexts. 68 self.root_context = False 69 self.sequence_context = False 70 self.mapping_context = False 71 self.simple_key_context = False 72 73 # Characteristics of the last emitted character: 74 # - current position. 75 # - is it a whitespace? 76 # - is it an indention character 77 # (indentation space, '-', '?', or ':')? 78 self.line = 0 79 self.column = 0 80 self.whitespace = True 81 self.indention = True 82 83 # Whether the document requires an explicit document indicator 84 self.open_ended = False 85 86 # Formatting details. 87 self.canonical = canonical 88 self.allow_unicode = allow_unicode 89 self.best_indent = 2 90 if indent and 1 < indent < 10: 91 self.best_indent = indent 92 self.best_width = 80 93 if width and width > self.best_indent*2: 94 self.best_width = width 95 self.best_line_break = u'\n' 96 if line_break in [u'\r', u'\n', u'\r\n']: 97 self.best_line_break = line_break 98 99 # Tag prefixes. 100 self.tag_prefixes = None 101 102 # Prepared anchor and tag. 103 self.prepared_anchor = None 104 self.prepared_tag = None 105 106 # Scalar analysis and style. 107 self.analysis = None 108 self.style = None 109 110 def dispose(self): 111 # Reset the state attributes (to clear self-references) 112 self.states = [] 113 self.state = None 114 115 def emit(self, event): 116 self.events.append(event) 117 while not self.need_more_events(): 118 self.event = self.events.pop(0) 119 self.state() 120 self.event = None 121 122 # In some cases, we wait for a few next events before emitting. 123 124 def need_more_events(self): 125 if not self.events: 126 return True 127 event = self.events[0] 128 if isinstance(event, DocumentStartEvent): 129 return self.need_events(1) 130 elif isinstance(event, SequenceStartEvent): 131 return self.need_events(2) 132 elif isinstance(event, MappingStartEvent): 133 return self.need_events(3) 134 else: 135 return False 136 137 def need_events(self, count): 138 level = 0 139 for event in self.events[1:]: 140 if isinstance(event, (DocumentStartEvent, CollectionStartEvent)): 141 level += 1 142 elif isinstance(event, (DocumentEndEvent, CollectionEndEvent)): 143 level -= 1 144 elif isinstance(event, StreamEndEvent): 145 level = -1 146 if level < 0: 147 return False 148 return (len(self.events) < count+1) 149 150 def increase_indent(self, flow=False, indentless=False): 151 self.indents.append(self.indent) 152 if self.indent is None: 153 if flow: 154 self.indent = self.best_indent 155 else: 156 self.indent = 0 157 elif not indentless: 158 self.indent += self.best_indent 159 160 # States. 161 162 # Stream handlers. 163 164 def expect_stream_start(self): 165 if isinstance(self.event, StreamStartEvent): 166 if self.event.encoding and not getattr(self.stream, 'encoding', None): 167 self.encoding = self.event.encoding 168 self.write_stream_start() 169 self.state = self.expect_first_document_start 170 else: 171 raise EmitterError("expected StreamStartEvent, but got %s" 172 % self.event) 173 174 def expect_nothing(self): 175 raise EmitterError("expected nothing, but got %s" % self.event) 176 177 # Document handlers. 178 179 def expect_first_document_start(self): 180 return self.expect_document_start(first=True) 181 182 def expect_document_start(self, first=False): 183 if isinstance(self.event, DocumentStartEvent): 184 if (self.event.version or self.event.tags) and self.open_ended: 185 self.write_indicator(u'...', True) 186 self.write_indent() 187 if self.event.version: 188 version_text = self.prepare_version(self.event.version) 189 self.write_version_directive(version_text) 190 self.tag_prefixes = self.DEFAULT_TAG_PREFIXES.copy() 191 if self.event.tags: 192 handles = self.event.tags.keys() 193 handles.sort() 194 for handle in handles: 195 prefix = self.event.tags[handle] 196 self.tag_prefixes[prefix] = handle 197 handle_text = self.prepare_tag_handle(handle) 198 prefix_text = self.prepare_tag_prefix(prefix) 199 self.write_tag_directive(handle_text, prefix_text) 200 implicit = (first and not self.event.explicit and not self.canonical 201 and not self.event.version and not self.event.tags 202 and not self.check_empty_document()) 203 if not implicit: 204 self.write_indent() 205 self.write_indicator(u'---', True) 206 if self.canonical: 207 self.write_indent() 208 self.state = self.expect_document_root 209 elif isinstance(self.event, StreamEndEvent): 210 if self.open_ended: 211 self.write_indicator(u'...', True) 212 self.write_indent() 213 self.write_stream_end() 214 self.state = self.expect_nothing 215 else: 216 raise EmitterError("expected DocumentStartEvent, but got %s" 217 % self.event) 218 219 def expect_document_end(self): 220 if isinstance(self.event, DocumentEndEvent): 221 self.write_indent() 222 if self.event.explicit: 223 self.write_indicator(u'...', True) 224 self.write_indent() 225 self.flush_stream() 226 self.state = self.expect_document_start 227 else: 228 raise EmitterError("expected DocumentEndEvent, but got %s" 229 % self.event) 230 231 def expect_document_root(self): 232 self.states.append(self.expect_document_end) 233 self.expect_node(root=True) 234 235 # Node handlers. 236 237 def expect_node(self, root=False, sequence=False, mapping=False, 238 simple_key=False): 239 self.root_context = root 240 self.sequence_context = sequence 241 self.mapping_context = mapping 242 self.simple_key_context = simple_key 243 if isinstance(self.event, AliasEvent): 244 self.expect_alias() 245 elif isinstance(self.event, (ScalarEvent, CollectionStartEvent)): 246 self.process_anchor(u'&') 247 self.process_tag() 248 if isinstance(self.event, ScalarEvent): 249 self.expect_scalar() 250 elif isinstance(self.event, SequenceStartEvent): 251 if self.flow_level or self.canonical or self.event.flow_style \ 252 or self.check_empty_sequence(): 253 self.expect_flow_sequence() 254 else: 255 self.expect_block_sequence() 256 elif isinstance(self.event, MappingStartEvent): 257 if self.flow_level or self.canonical or self.event.flow_style \ 258 or self.check_empty_mapping(): 259 self.expect_flow_mapping() 260 else: 261 self.expect_block_mapping() 262 else: 263 raise EmitterError("expected NodeEvent, but got %s" % self.event) 264 265 def expect_alias(self): 266 if self.event.anchor is None: 267 raise EmitterError("anchor is not specified for alias") 268 self.process_anchor(u'*') 269 self.state = self.states.pop() 270 271 def expect_scalar(self): 272 self.increase_indent(flow=True) 273 self.process_scalar() 274 self.indent = self.indents.pop() 275 self.state = self.states.pop() 276 277 # Flow sequence handlers. 278 279 def expect_flow_sequence(self): 280 self.write_indicator(u'[', True, whitespace=True) 281 self.flow_level += 1 282 self.increase_indent(flow=True) 283 self.state = self.expect_first_flow_sequence_item 284 285 def expect_first_flow_sequence_item(self): 286 if isinstance(self.event, SequenceEndEvent): 287 self.indent = self.indents.pop() 288 self.flow_level -= 1 289 self.write_indicator(u']', False) 290 self.state = self.states.pop() 291 else: 292 if self.canonical or self.column > self.best_width: 293 self.write_indent() 294 self.states.append(self.expect_flow_sequence_item) 295 self.expect_node(sequence=True) 296 297 def expect_flow_sequence_item(self): 298 if isinstance(self.event, SequenceEndEvent): 299 self.indent = self.indents.pop() 300 self.flow_level -= 1 301 if self.canonical: 302 self.write_indicator(u',', False) 303 self.write_indent() 304 self.write_indicator(u']', False) 305 self.state = self.states.pop() 306 else: 307 self.write_indicator(u',', False) 308 if self.canonical or self.column > self.best_width: 309 self.write_indent() 310 self.states.append(self.expect_flow_sequence_item) 311 self.expect_node(sequence=True) 312 313 # Flow mapping handlers. 314 315 def expect_flow_mapping(self): 316 self.write_indicator(u'{', True, whitespace=True) 317 self.flow_level += 1 318 self.increase_indent(flow=True) 319 self.state = self.expect_first_flow_mapping_key 320 321 def expect_first_flow_mapping_key(self): 322 if isinstance(self.event, MappingEndEvent): 323 self.indent = self.indents.pop() 324 self.flow_level -= 1 325 self.write_indicator(u'}', False) 326 self.state = self.states.pop() 327 else: 328 if self.canonical or self.column > self.best_width: 329 self.write_indent() 330 if not self.canonical and self.check_simple_key(): 331 self.states.append(self.expect_flow_mapping_simple_value) 332 self.expect_node(mapping=True, simple_key=True) 333 else: 334 self.write_indicator(u'?', True) 335 self.states.append(self.expect_flow_mapping_value) 336 self.expect_node(mapping=True) 337 338 def expect_flow_mapping_key(self): 339 if isinstance(self.event, MappingEndEvent): 340 self.indent = self.indents.pop() 341 self.flow_level -= 1 342 if self.canonical: 343 self.write_indicator(u',', False) 344 self.write_indent() 345 self.write_indicator(u'}', False) 346 self.state = self.states.pop() 347 else: 348 self.write_indicator(u',', False) 349 if self.canonical or self.column > self.best_width: 350 self.write_indent() 351 if not self.canonical and self.check_simple_key(): 352 self.states.append(self.expect_flow_mapping_simple_value) 353 self.expect_node(mapping=True, simple_key=True) 354 else: 355 self.write_indicator(u'?', True) 356 self.states.append(self.expect_flow_mapping_value) 357 self.expect_node(mapping=True) 358 359 def expect_flow_mapping_simple_value(self): 360 self.write_indicator(u':', False) 361 self.states.append(self.expect_flow_mapping_key) 362 self.expect_node(mapping=True) 363 364 def expect_flow_mapping_value(self): 365 if self.canonical or self.column > self.best_width: 366 self.write_indent() 367 self.write_indicator(u':', True) 368 self.states.append(self.expect_flow_mapping_key) 369 self.expect_node(mapping=True) 370 371 # Block sequence handlers. 372 373 def expect_block_sequence(self): 374 indentless = (self.mapping_context and not self.indention) 375 self.increase_indent(flow=False, indentless=indentless) 376 self.state = self.expect_first_block_sequence_item 377 378 def expect_first_block_sequence_item(self): 379 return self.expect_block_sequence_item(first=True) 380 381 def expect_block_sequence_item(self, first=False): 382 if not first and isinstance(self.event, SequenceEndEvent): 383 self.indent = self.indents.pop() 384 self.state = self.states.pop() 385 else: 386 self.write_indent() 387 self.write_indicator(u'-', True, indention=True) 388 self.states.append(self.expect_block_sequence_item) 389 self.expect_node(sequence=True) 390 391 # Block mapping handlers. 392 393 def expect_block_mapping(self): 394 self.increase_indent(flow=False) 395 self.state = self.expect_first_block_mapping_key 396 397 def expect_first_block_mapping_key(self): 398 return self.expect_block_mapping_key(first=True) 399 400 def expect_block_mapping_key(self, first=False): 401 if not first and isinstance(self.event, MappingEndEvent): 402 self.indent = self.indents.pop() 403 self.state = self.states.pop() 404 else: 405 self.write_indent() 406 if self.check_simple_key(): 407 self.states.append(self.expect_block_mapping_simple_value) 408 self.expect_node(mapping=True, simple_key=True) 409 else: 410 self.write_indicator(u'?', True, indention=True) 411 self.states.append(self.expect_block_mapping_value) 412 self.expect_node(mapping=True) 413 414 def expect_block_mapping_simple_value(self): 415 self.write_indicator(u':', False) 416 self.states.append(self.expect_block_mapping_key) 417 self.expect_node(mapping=True) 418 419 def expect_block_mapping_value(self): 420 self.write_indent() 421 self.write_indicator(u':', True, indention=True) 422 self.states.append(self.expect_block_mapping_key) 423 self.expect_node(mapping=True) 424 425 # Checkers. 426 427 def check_empty_sequence(self): 428 return (isinstance(self.event, SequenceStartEvent) and self.events 429 and isinstance(self.events[0], SequenceEndEvent)) 430 431 def check_empty_mapping(self): 432 return (isinstance(self.event, MappingStartEvent) and self.events 433 and isinstance(self.events[0], MappingEndEvent)) 434 435 def check_empty_document(self): 436 if not isinstance(self.event, DocumentStartEvent) or not self.events: 437 return False 438 event = self.events[0] 439 return (isinstance(event, ScalarEvent) and event.anchor is None 440 and event.tag is None and event.implicit and event.value == u'') 441 442 def check_simple_key(self): 443 length = 0 444 if isinstance(self.event, NodeEvent) and self.event.anchor is not None: 445 if self.prepared_anchor is None: 446 self.prepared_anchor = self.prepare_anchor(self.event.anchor) 447 length += len(self.prepared_anchor) 448 if isinstance(self.event, (ScalarEvent, CollectionStartEvent)) \ 449 and self.event.tag is not None: 450 if self.prepared_tag is None: 451 self.prepared_tag = self.prepare_tag(self.event.tag) 452 length += len(self.prepared_tag) 453 if isinstance(self.event, ScalarEvent): 454 if self.analysis is None: 455 self.analysis = self.analyze_scalar(self.event.value) 456 length += len(self.analysis.scalar) 457 return (length < 128 and (isinstance(self.event, AliasEvent) 458 or (isinstance(self.event, ScalarEvent) 459 and not self.analysis.empty and not self.analysis.multiline) 460 or self.check_empty_sequence() or self.check_empty_mapping())) 461 462 # Anchor, Tag, and Scalar processors. 463 464 def process_anchor(self, indicator): 465 if self.event.anchor is None: 466 self.prepared_anchor = None 467 return 468 if self.prepared_anchor is None: 469 self.prepared_anchor = self.prepare_anchor(self.event.anchor) 470 if self.prepared_anchor: 471 self.write_indicator(indicator+self.prepared_anchor, True) 472 self.prepared_anchor = None 473 474 def process_tag(self): 475 tag = self.event.tag 476 if isinstance(self.event, ScalarEvent): 477 if self.style is None: 478 self.style = self.choose_scalar_style() 479 if ((not self.canonical or tag is None) and 480 ((self.style == '' and self.event.implicit[0]) 481 or (self.style != '' and self.event.implicit[1]))): 482 self.prepared_tag = None 483 return 484 if self.event.implicit[0] and tag is None: 485 tag = u'!' 486 self.prepared_tag = None 487 else: 488 if (not self.canonical or tag is None) and self.event.implicit: 489 self.prepared_tag = None 490 return 491 if tag is None: 492 raise EmitterError("tag is not specified") 493 if self.prepared_tag is None: 494 self.prepared_tag = self.prepare_tag(tag) 495 if self.prepared_tag: 496 self.write_indicator(self.prepared_tag, True) 497 self.prepared_tag = None 498 499 def choose_scalar_style(self): 500 if self.analysis is None: 501 self.analysis = self.analyze_scalar(self.event.value) 502 if self.event.style == '"' or self.canonical: 503 return '"' 504 if not self.event.style and self.event.implicit[0]: 505 if (not (self.simple_key_context and 506 (self.analysis.empty or self.analysis.multiline)) 507 and (self.flow_level and self.analysis.allow_flow_plain 508 or (not self.flow_level and self.analysis.allow_block_plain))): 509 return '' 510 if self.event.style and self.event.style in '|>': 511 if (not self.flow_level and not self.simple_key_context 512 and self.analysis.allow_block): 513 return self.event.style 514 if not self.event.style or self.event.style == '\'': 515 if (self.analysis.allow_single_quoted and 516 not (self.simple_key_context and self.analysis.multiline)): 517 return '\'' 518 return '"' 519 520 def process_scalar(self): 521 if self.analysis is None: 522 self.analysis = self.analyze_scalar(self.event.value) 523 if self.style is None: 524 self.style = self.choose_scalar_style() 525 split = (not self.simple_key_context) 526 #if self.analysis.multiline and split \ 527 # and (not self.style or self.style in '\'\"'): 528 # self.write_indent() 529 if self.style == '"': 530 self.write_double_quoted(self.analysis.scalar, split) 531 elif self.style == '\'': 532 self.write_single_quoted(self.analysis.scalar, split) 533 elif self.style == '>': 534 self.write_folded(self.analysis.scalar) 535 elif self.style == '|': 536 self.write_literal(self.analysis.scalar) 537 else: 538 self.write_plain(self.analysis.scalar, split) 539 self.analysis = None 540 self.style = None 541 542 # Analyzers. 543 544 def prepare_version(self, version): 545 major, minor = version 546 if major != 1: 547 raise EmitterError("unsupported YAML version: %d.%d" % (major, minor)) 548 return u'%d.%d' % (major, minor) 549 550 def prepare_tag_handle(self, handle): 551 if not handle: 552 raise EmitterError("tag handle must not be empty") 553 if handle[0] != u'!' or handle[-1] != u'!': 554 raise EmitterError("tag handle must start and end with '!': %r" 555 % (handle.encode('utf-8'))) 556 for ch in handle[1:-1]: 557 if not (u'0' <= ch <= u'9' or u'A' <= ch <= u'Z' or u'a' <= ch <= u'z' \ 558 or ch in u'-_'): 559 raise EmitterError("invalid character %r in the tag handle: %r" 560 % (ch.encode('utf-8'), handle.encode('utf-8'))) 561 return handle 562 563 def prepare_tag_prefix(self, prefix): 564 if not prefix: 565 raise EmitterError("tag prefix must not be empty") 566 chunks = [] 567 start = end = 0 568 if prefix[0] == u'!': 569 end = 1 570 while end < len(prefix): 571 ch = prefix[end] 572 if u'0' <= ch <= u'9' or u'A' <= ch <= u'Z' or u'a' <= ch <= u'z' \ 573 or ch in u'-;/?!:@&=+$,_.~*\'()[]': 574 end += 1 575 else: 576 if start < end: 577 chunks.append(prefix[start:end]) 578 start = end = end+1 579 data = ch.encode('utf-8') 580 for ch in data: 581 chunks.append(u'%%%02X' % ord(ch)) 582 if start < end: 583 chunks.append(prefix[start:end]) 584 return u''.join(chunks) 585 586 def prepare_tag(self, tag): 587 if not tag: 588 raise EmitterError("tag must not be empty") 589 if tag == u'!': 590 return tag 591 handle = None 592 suffix = tag 593 prefixes = self.tag_prefixes.keys() 594 prefixes.sort() 595 for prefix in prefixes: 596 if tag.startswith(prefix) \ 597 and (prefix == u'!' or len(prefix) < len(tag)): 598 handle = self.tag_prefixes[prefix] 599 suffix = tag[len(prefix):] 600 chunks = [] 601 start = end = 0 602 while end < len(suffix): 603 ch = suffix[end] 604 if u'0' <= ch <= u'9' or u'A' <= ch <= u'Z' or u'a' <= ch <= u'z' \ 605 or ch in u'-;/?:@&=+$,_.~*\'()[]' \ 606 or (ch == u'!' and handle != u'!'): 607 end += 1 608 else: 609 if start < end: 610 chunks.append(suffix[start:end]) 611 start = end = end+1 612 data = ch.encode('utf-8') 613 for ch in data: 614 chunks.append(u'%%%02X' % ord(ch)) 615 if start < end: 616 chunks.append(suffix[start:end]) 617 suffix_text = u''.join(chunks) 618 if handle: 619 return u'%s%s' % (handle, suffix_text) 620 else: 621 return u'!<%s>' % suffix_text 622 623 def prepare_anchor(self, anchor): 624 if not anchor: 625 raise EmitterError("anchor must not be empty") 626 for ch in anchor: 627 if not (u'0' <= ch <= u'9' or u'A' <= ch <= u'Z' or u'a' <= ch <= u'z' \ 628 or ch in u'-_'): 629 raise EmitterError("invalid character %r in the anchor: %r" 630 % (ch.encode('utf-8'), anchor.encode('utf-8'))) 631 return anchor 632 633 def analyze_scalar(self, scalar): 634 635 # Empty scalar is a special case. 636 if not scalar: 637 return ScalarAnalysis(scalar=scalar, empty=True, multiline=False, 638 allow_flow_plain=False, allow_block_plain=True, 639 allow_single_quoted=True, allow_double_quoted=True, 640 allow_block=False) 641 642 # Indicators and special characters. 643 block_indicators = False 644 flow_indicators = False 645 line_breaks = False 646 special_characters = False 647 648 # Important whitespace combinations. 649 leading_space = False 650 leading_break = False 651 trailing_space = False 652 trailing_break = False 653 break_space = False 654 space_break = False 655 656 # Check document indicators. 657 if scalar.startswith(u'---') or scalar.startswith(u'...'): 658 block_indicators = True 659 flow_indicators = True 660 661 # First character or preceded by a whitespace. 662 preceded_by_whitespace = True 663 664 # Last character or followed by a whitespace. 665 followed_by_whitespace = (len(scalar) == 1 or 666 scalar[1] in u'\0 \t\r\n\x85\u2028\u2029') 667 668 # The previous character is a space. 669 previous_space = False 670 671 # The previous character is a break. 672 previous_break = False 673 674 index = 0 675 while index < len(scalar): 676 ch = scalar[index] 677 678 # Check for indicators. 679 if index == 0: 680 # Leading indicators are special characters. 681 if ch in u'#,[]{}&*!|>\'\"%@`': 682 flow_indicators = True 683 block_indicators = True 684 if ch in u'?:': 685 flow_indicators = True 686 if followed_by_whitespace: 687 block_indicators = True 688 if ch == u'-' and followed_by_whitespace: 689 flow_indicators = True 690 block_indicators = True 691 else: 692 # Some indicators cannot appear within a scalar as well. 693 if ch in u',?[]{}': 694 flow_indicators = True 695 if ch == u':': 696 flow_indicators = True 697 if followed_by_whitespace: 698 block_indicators = True 699 if ch == u'#' and preceded_by_whitespace: 700 flow_indicators = True 701 block_indicators = True 702 703 # Check for line breaks, special, and unicode characters. 704 if ch in u'\n\x85\u2028\u2029': 705 line_breaks = True 706 if not (ch == u'\n' or u'\x20' <= ch <= u'\x7E'): 707 if (ch == u'\x85' or u'\xA0' <= ch <= u'\uD7FF' 708 or u'\uE000' <= ch <= u'\uFFFD' 709 or (u'\U00010000' <= ch < u'\U0010ffff')) and ch != u'\uFEFF': 710 unicode_characters = True 711 if not self.allow_unicode: 712 special_characters = True 713 else: 714 special_characters = True 715 716 # Detect important whitespace combinations. 717 if ch == u' ': 718 if index == 0: 719 leading_space = True 720 if index == len(scalar)-1: 721 trailing_space = True 722 if previous_break: 723 break_space = True 724 previous_space = True 725 previous_break = False 726 elif ch in u'\n\x85\u2028\u2029': 727 if index == 0: 728 leading_break = True 729 if index == len(scalar)-1: 730 trailing_break = True 731 if previous_space: 732 space_break = True 733 previous_space = False 734 previous_break = True 735 else: 736 previous_space = False 737 previous_break = False 738 739 # Prepare for the next character. 740 index += 1 741 preceded_by_whitespace = (ch in u'\0 \t\r\n\x85\u2028\u2029') 742 followed_by_whitespace = (index+1 >= len(scalar) or 743 scalar[index+1] in u'\0 \t\r\n\x85\u2028\u2029') 744 745 # Let's decide what styles are allowed. 746 allow_flow_plain = True 747 allow_block_plain = True 748 allow_single_quoted = True 749 allow_double_quoted = True 750 allow_block = True 751 752 # Leading and trailing whitespaces are bad for plain scalars. 753 if (leading_space or leading_break 754 or trailing_space or trailing_break): 755 allow_flow_plain = allow_block_plain = False 756 757 # We do not permit trailing spaces for block scalars. 758 if trailing_space: 759 allow_block = False 760 761 # Spaces at the beginning of a new line are only acceptable for block 762 # scalars. 763 if break_space: 764 allow_flow_plain = allow_block_plain = allow_single_quoted = False 765 766 # Spaces followed by breaks, as well as special character are only 767 # allowed for double quoted scalars. 768 if space_break or special_characters: 769 allow_flow_plain = allow_block_plain = \ 770 allow_single_quoted = allow_block = False 771 772 # Although the plain scalar writer supports breaks, we never emit 773 # multiline plain scalars. 774 if line_breaks: 775 allow_flow_plain = allow_block_plain = False 776 777 # Flow indicators are forbidden for flow plain scalars. 778 if flow_indicators: 779 allow_flow_plain = False 780 781 # Block indicators are forbidden for block plain scalars. 782 if block_indicators: 783 allow_block_plain = False 784 785 return ScalarAnalysis(scalar=scalar, 786 empty=False, multiline=line_breaks, 787 allow_flow_plain=allow_flow_plain, 788 allow_block_plain=allow_block_plain, 789 allow_single_quoted=allow_single_quoted, 790 allow_double_quoted=allow_double_quoted, 791 allow_block=allow_block) 792 793 # Writers. 794 795 def flush_stream(self): 796 if hasattr(self.stream, 'flush'): 797 self.stream.flush() 798 799 def write_stream_start(self): 800 # Write BOM if needed. 801 if self.encoding and self.encoding.startswith('utf-16'): 802 self.stream.write(u'\uFEFF'.encode(self.encoding)) 803 804 def write_stream_end(self): 805 self.flush_stream() 806 807 def write_indicator(self, indicator, need_whitespace, 808 whitespace=False, indention=False): 809 if self.whitespace or not need_whitespace: 810 data = indicator 811 else: 812 data = u' '+indicator 813 self.whitespace = whitespace 814 self.indention = self.indention and indention 815 self.column += len(data) 816 self.open_ended = False 817 if self.encoding: 818 data = data.encode(self.encoding) 819 self.stream.write(data) 820 821 def write_indent(self): 822 indent = self.indent or 0 823 if not self.indention or self.column > indent \ 824 or (self.column == indent and not self.whitespace): 825 self.write_line_break() 826 if self.column < indent: 827 self.whitespace = True 828 data = u' '*(indent-self.column) 829 self.column = indent 830 if self.encoding: 831 data = data.encode(self.encoding) 832 self.stream.write(data) 833 834 def write_line_break(self, data=None): 835 if data is None: 836 data = self.best_line_break 837 self.whitespace = True 838 self.indention = True 839 self.line += 1 840 self.column = 0 841 if self.encoding: 842 data = data.encode(self.encoding) 843 self.stream.write(data) 844 845 def write_version_directive(self, version_text): 846 data = u'%%YAML %s' % version_text 847 if self.encoding: 848 data = data.encode(self.encoding) 849 self.stream.write(data) 850 self.write_line_break() 851 852 def write_tag_directive(self, handle_text, prefix_text): 853 data = u'%%TAG %s %s' % (handle_text, prefix_text) 854 if self.encoding: 855 data = data.encode(self.encoding) 856 self.stream.write(data) 857 self.write_line_break() 858 859 # Scalar streams. 860 861 def write_single_quoted(self, text, split=True): 862 self.write_indicator(u'\'', True) 863 spaces = False 864 breaks = False 865 start = end = 0 866 while end <= len(text): 867 ch = None 868 if end < len(text): 869 ch = text[end] 870 if spaces: 871 if ch is None or ch != u' ': 872 if start+1 == end and self.column > self.best_width and split \ 873 and start != 0 and end != len(text): 874 self.write_indent() 875 else: 876 data = text[start:end] 877 self.column += len(data) 878 if self.encoding: 879 data = data.encode(self.encoding) 880 self.stream.write(data) 881 start = end 882 elif breaks: 883 if ch is None or ch not in u'\n\x85\u2028\u2029': 884 if text[start] == u'\n': 885 self.write_line_break() 886 for br in text[start:end]: 887 if br == u'\n': 888 self.write_line_break() 889 else: 890 self.write_line_break(br) 891 self.write_indent() 892 start = end 893 else: 894 if ch is None or ch in u' \n\x85\u2028\u2029' or ch == u'\'': 895 if start < end: 896 data = text[start:end] 897 self.column += len(data) 898 if self.encoding: 899 data = data.encode(self.encoding) 900 self.stream.write(data) 901 start = end 902 if ch == u'\'': 903 data = u'\'\'' 904 self.column += 2 905 if self.encoding: 906 data = data.encode(self.encoding) 907 self.stream.write(data) 908 start = end + 1 909 if ch is not None: 910 spaces = (ch == u' ') 911 breaks = (ch in u'\n\x85\u2028\u2029') 912 end += 1 913 self.write_indicator(u'\'', False) 914 915 ESCAPE_REPLACEMENTS = { 916 u'\0': u'0', 917 u'\x07': u'a', 918 u'\x08': u'b', 919 u'\x09': u't', 920 u'\x0A': u'n', 921 u'\x0B': u'v', 922 u'\x0C': u'f', 923 u'\x0D': u'r', 924 u'\x1B': u'e', 925 u'\"': u'\"', 926 u'\\': u'\\', 927 u'\x85': u'N', 928 u'\xA0': u'_', 929 u'\u2028': u'L', 930 u'\u2029': u'P', 931 } 932 933 def write_double_quoted(self, text, split=True): 934 self.write_indicator(u'"', True) 935 start = end = 0 936 while end <= len(text): 937 ch = None 938 if end < len(text): 939 ch = text[end] 940 if ch is None or ch in u'"\\\x85\u2028\u2029\uFEFF' \ 941 or not (u'\x20' <= ch <= u'\x7E' 942 or (self.allow_unicode 943 and (u'\xA0' <= ch <= u'\uD7FF' 944 or u'\uE000' <= ch <= u'\uFFFD'))): 945 if start < end: 946 data = text[start:end] 947 self.column += len(data) 948 if self.encoding: 949 data = data.encode(self.encoding) 950 self.stream.write(data) 951 start = end 952 if ch is not None: 953 if ch in self.ESCAPE_REPLACEMENTS: 954 data = u'\\'+self.ESCAPE_REPLACEMENTS[ch] 955 elif ch <= u'\xFF': 956 data = u'\\x%02X' % ord(ch) 957 elif ch <= u'\uFFFF': 958 data = u'\\u%04X' % ord(ch) 959 else: 960 data = u'\\U%08X' % ord(ch) 961 self.column += len(data) 962 if self.encoding: 963 data = data.encode(self.encoding) 964 self.stream.write(data) 965 start = end+1 966 if 0 < end < len(text)-1 and (ch == u' ' or start >= end) \ 967 and self.column+(end-start) > self.best_width and split: 968 data = text[start:end]+u'\\' 969 if start < end: 970 start = end 971 self.column += len(data) 972 if self.encoding: 973 data = data.encode(self.encoding) 974 self.stream.write(data) 975 self.write_indent() 976 self.whitespace = False 977 self.indention = False 978 if text[start] == u' ': 979 data = u'\\' 980 self.column += len(data) 981 if self.encoding: 982 data = data.encode(self.encoding) 983 self.stream.write(data) 984 end += 1 985 self.write_indicator(u'"', False) 986 987 def determine_block_hints(self, text): 988 hints = u'' 989 if text: 990 if text[0] in u' \n\x85\u2028\u2029': 991 hints += unicode(self.best_indent) 992 if text[-1] not in u'\n\x85\u2028\u2029': 993 hints += u'-' 994 elif len(text) == 1 or text[-2] in u'\n\x85\u2028\u2029': 995 hints += u'+' 996 return hints 997 998 def write_folded(self, text): 999 hints = self.determine_block_hints(text) 1000 self.write_indicator(u'>'+hints, True) 1001 if hints[-1:] == u'+': 1002 self.open_ended = True 1003 self.write_line_break() 1004 leading_space = True 1005 spaces = False 1006 breaks = True 1007 start = end = 0 1008 while end <= len(text): 1009 ch = None 1010 if end < len(text): 1011 ch = text[end] 1012 if breaks: 1013 if ch is None or ch not in u'\n\x85\u2028\u2029': 1014 if not leading_space and ch is not None and ch != u' ' \ 1015 and text[start] == u'\n': 1016 self.write_line_break() 1017 leading_space = (ch == u' ') 1018 for br in text[start:end]: 1019 if br == u'\n': 1020 self.write_line_break() 1021 else: 1022 self.write_line_break(br) 1023 if ch is not None: 1024 self.write_indent() 1025 start = end 1026 elif spaces: 1027 if ch != u' ': 1028 if start+1 == end and self.column > self.best_width: 1029 self.write_indent() 1030 else: 1031 data = text[start:end] 1032 self.column += len(data) 1033 if self.encoding: 1034 data = data.encode(self.encoding) 1035 self.stream.write(data) 1036 start = end 1037 else: 1038 if ch is None or ch in u' \n\x85\u2028\u2029': 1039 data = text[start:end] 1040 self.column += len(data) 1041 if self.encoding: 1042 data = data.encode(self.encoding) 1043 self.stream.write(data) 1044 if ch is None: 1045 self.write_line_break() 1046 start = end 1047 if ch is not None: 1048 breaks = (ch in u'\n\x85\u2028\u2029') 1049 spaces = (ch == u' ') 1050 end += 1 1051 1052 def write_literal(self, text): 1053 hints = self.determine_block_hints(text) 1054 self.write_indicator(u'|'+hints, True) 1055 if hints[-1:] == u'+': 1056 self.open_ended = True 1057 self.write_line_break() 1058 breaks = True 1059 start = end = 0 1060 while end <= len(text): 1061 ch = None 1062 if end < len(text): 1063 ch = text[end] 1064 if breaks: 1065 if ch is None or ch not in u'\n\x85\u2028\u2029': 1066 for br in text[start:end]: 1067 if br == u'\n': 1068 self.write_line_break() 1069 else: 1070 self.write_line_break(br) 1071 if ch is not None: 1072 self.write_indent() 1073 start = end 1074 else: 1075 if ch is None or ch in u'\n\x85\u2028\u2029': 1076 data = text[start:end] 1077 if self.encoding: 1078 data = data.encode(self.encoding) 1079 self.stream.write(data) 1080 if ch is None: 1081 self.write_line_break() 1082 start = end 1083 if ch is not None: 1084 breaks = (ch in u'\n\x85\u2028\u2029') 1085 end += 1 1086 1087 def write_plain(self, text, split=True): 1088 if self.root_context: 1089 self.open_ended = True 1090 if not text: 1091 return 1092 if not self.whitespace: 1093 data = u' ' 1094 self.column += len(data) 1095 if self.encoding: 1096 data = data.encode(self.encoding) 1097 self.stream.write(data) 1098 self.whitespace = False 1099 self.indention = False 1100 spaces = False 1101 breaks = False 1102 start = end = 0 1103 while end <= len(text): 1104 ch = None 1105 if end < len(text): 1106 ch = text[end] 1107 if spaces: 1108 if ch != u' ': 1109 if start+1 == end and self.column > self.best_width and split: 1110 self.write_indent() 1111 self.whitespace = False 1112 self.indention = False 1113 else: 1114 data = text[start:end] 1115 self.column += len(data) 1116 if self.encoding: 1117 data = data.encode(self.encoding) 1118 self.stream.write(data) 1119 start = end 1120 elif breaks: 1121 if ch not in u'\n\x85\u2028\u2029': 1122 if text[start] == u'\n': 1123 self.write_line_break() 1124 for br in text[start:end]: 1125 if br == u'\n': 1126 self.write_line_break() 1127 else: 1128 self.write_line_break(br) 1129 self.write_indent() 1130 self.whitespace = False 1131 self.indention = False 1132 start = end 1133 else: 1134 if ch is None or ch in u' \n\x85\u2028\u2029': 1135 data = text[start:end] 1136 self.column += len(data) 1137 if self.encoding: 1138 data = data.encode(self.encoding) 1139 self.stream.write(data) 1140 start = end 1141 if ch is not None: 1142 spaces = (ch == u' ') 1143 breaks = (ch in u'\n\x85\u2028\u2029') 1144 end += 1 1145