1 2# Emitter expects events obeying the following grammar: 3# stream ::= STREAM-START document* STREAM-END 4# document ::= DOCUMENT-START node DOCUMENT-END 5# node ::= SCALAR | sequence | mapping 6# sequence ::= SEQUENCE-START node* SEQUENCE-END 7# mapping ::= MAPPING-START (node node)* MAPPING-END 8 9__all__ = ['Emitter', 'EmitterError'] 10 11from .error import YAMLError 12from .events import * 13 14class EmitterError(YAMLError): 15 pass 16 17class ScalarAnalysis: 18 def __init__(self, scalar, empty, multiline, 19 allow_flow_plain, allow_block_plain, 20 allow_single_quoted, allow_double_quoted, 21 allow_block): 22 self.scalar = scalar 23 self.empty = empty 24 self.multiline = multiline 25 self.allow_flow_plain = allow_flow_plain 26 self.allow_block_plain = allow_block_plain 27 self.allow_single_quoted = allow_single_quoted 28 self.allow_double_quoted = allow_double_quoted 29 self.allow_block = allow_block 30 31class Emitter: 32 33 DEFAULT_TAG_PREFIXES = { 34 '!' : '!', 35 'tag:yaml.org,2002:' : '!!', 36 } 37 38 def __init__(self, stream, canonical=None, indent=None, width=None, 39 allow_unicode=None, line_break=None): 40 41 # The stream should have the methods `write` and possibly `flush`. 42 self.stream = stream 43 44 # Encoding can be overridden by STREAM-START. 45 self.encoding = None 46 47 # Emitter is a state machine with a stack of states to handle nested 48 # structures. 49 self.states = [] 50 self.state = self.expect_stream_start 51 52 # Current event and the event queue. 53 self.events = [] 54 self.event = None 55 56 # The current indentation level and the stack of previous indents. 57 self.indents = [] 58 self.indent = None 59 60 # Flow level. 61 self.flow_level = 0 62 63 # Contexts. 64 self.root_context = False 65 self.sequence_context = False 66 self.mapping_context = False 67 self.simple_key_context = False 68 69 # Characteristics of the last emitted character: 70 # - current position. 71 # - is it a whitespace? 72 # - is it an indention character 73 # (indentation space, '-', '?', or ':')? 74 self.line = 0 75 self.column = 0 76 self.whitespace = True 77 self.indention = True 78 79 # Whether the document requires an explicit document indicator 80 self.open_ended = False 81 82 # Formatting details. 83 self.canonical = canonical 84 self.allow_unicode = allow_unicode 85 self.best_indent = 2 86 if indent and 1 < indent < 10: 87 self.best_indent = indent 88 self.best_width = 80 89 if width and width > self.best_indent*2: 90 self.best_width = width 91 self.best_line_break = '\n' 92 if line_break in ['\r', '\n', '\r\n']: 93 self.best_line_break = line_break 94 95 # Tag prefixes. 96 self.tag_prefixes = None 97 98 # Prepared anchor and tag. 99 self.prepared_anchor = None 100 self.prepared_tag = None 101 102 # Scalar analysis and style. 103 self.analysis = None 104 self.style = None 105 106 def dispose(self): 107 # Reset the state attributes (to clear self-references) 108 self.states = [] 109 self.state = None 110 111 def emit(self, event): 112 self.events.append(event) 113 while not self.need_more_events(): 114 self.event = self.events.pop(0) 115 self.state() 116 self.event = None 117 118 # In some cases, we wait for a few next events before emitting. 119 120 def need_more_events(self): 121 if not self.events: 122 return True 123 event = self.events[0] 124 if isinstance(event, DocumentStartEvent): 125 return self.need_events(1) 126 elif isinstance(event, SequenceStartEvent): 127 return self.need_events(2) 128 elif isinstance(event, MappingStartEvent): 129 return self.need_events(3) 130 else: 131 return False 132 133 def need_events(self, count): 134 level = 0 135 for event in self.events[1:]: 136 if isinstance(event, (DocumentStartEvent, CollectionStartEvent)): 137 level += 1 138 elif isinstance(event, (DocumentEndEvent, CollectionEndEvent)): 139 level -= 1 140 elif isinstance(event, StreamEndEvent): 141 level = -1 142 if level < 0: 143 return False 144 return (len(self.events) < count+1) 145 146 def increase_indent(self, flow=False, indentless=False): 147 self.indents.append(self.indent) 148 if self.indent is None: 149 if flow: 150 self.indent = self.best_indent 151 else: 152 self.indent = 0 153 elif not indentless: 154 self.indent += self.best_indent 155 156 # States. 157 158 # Stream handlers. 159 160 def expect_stream_start(self): 161 if isinstance(self.event, StreamStartEvent): 162 if self.event.encoding and not hasattr(self.stream, 'encoding'): 163 self.encoding = self.event.encoding 164 self.write_stream_start() 165 self.state = self.expect_first_document_start 166 else: 167 raise EmitterError("expected StreamStartEvent, but got %s" 168 % self.event) 169 170 def expect_nothing(self): 171 raise EmitterError("expected nothing, but got %s" % self.event) 172 173 # Document handlers. 174 175 def expect_first_document_start(self): 176 return self.expect_document_start(first=True) 177 178 def expect_document_start(self, first=False): 179 if isinstance(self.event, DocumentStartEvent): 180 if (self.event.version or self.event.tags) and self.open_ended: 181 self.write_indicator('...', True) 182 self.write_indent() 183 if self.event.version: 184 version_text = self.prepare_version(self.event.version) 185 self.write_version_directive(version_text) 186 self.tag_prefixes = self.DEFAULT_TAG_PREFIXES.copy() 187 if self.event.tags: 188 handles = sorted(self.event.tags.keys()) 189 for handle in handles: 190 prefix = self.event.tags[handle] 191 self.tag_prefixes[prefix] = handle 192 handle_text = self.prepare_tag_handle(handle) 193 prefix_text = self.prepare_tag_prefix(prefix) 194 self.write_tag_directive(handle_text, prefix_text) 195 implicit = (first and not self.event.explicit and not self.canonical 196 and not self.event.version and not self.event.tags 197 and not self.check_empty_document()) 198 if not implicit: 199 self.write_indent() 200 self.write_indicator('---', True) 201 if self.canonical: 202 self.write_indent() 203 self.state = self.expect_document_root 204 elif isinstance(self.event, StreamEndEvent): 205 if self.open_ended: 206 self.write_indicator('...', True) 207 self.write_indent() 208 self.write_stream_end() 209 self.state = self.expect_nothing 210 else: 211 raise EmitterError("expected DocumentStartEvent, but got %s" 212 % self.event) 213 214 def expect_document_end(self): 215 if isinstance(self.event, DocumentEndEvent): 216 self.write_indent() 217 if self.event.explicit: 218 self.write_indicator('...', True) 219 self.write_indent() 220 self.flush_stream() 221 self.state = self.expect_document_start 222 else: 223 raise EmitterError("expected DocumentEndEvent, but got %s" 224 % self.event) 225 226 def expect_document_root(self): 227 self.states.append(self.expect_document_end) 228 self.expect_node(root=True) 229 230 # Node handlers. 231 232 def expect_node(self, root=False, sequence=False, mapping=False, 233 simple_key=False): 234 self.root_context = root 235 self.sequence_context = sequence 236 self.mapping_context = mapping 237 self.simple_key_context = simple_key 238 if isinstance(self.event, AliasEvent): 239 self.expect_alias() 240 elif isinstance(self.event, (ScalarEvent, CollectionStartEvent)): 241 self.process_anchor('&') 242 self.process_tag() 243 if isinstance(self.event, ScalarEvent): 244 self.expect_scalar() 245 elif isinstance(self.event, SequenceStartEvent): 246 if self.flow_level or self.canonical or self.event.flow_style \ 247 or self.check_empty_sequence(): 248 self.expect_flow_sequence() 249 else: 250 self.expect_block_sequence() 251 elif isinstance(self.event, MappingStartEvent): 252 if self.flow_level or self.canonical or self.event.flow_style \ 253 or self.check_empty_mapping(): 254 self.expect_flow_mapping() 255 else: 256 self.expect_block_mapping() 257 else: 258 raise EmitterError("expected NodeEvent, but got %s" % self.event) 259 260 def expect_alias(self): 261 if self.event.anchor is None: 262 raise EmitterError("anchor is not specified for alias") 263 self.process_anchor('*') 264 self.state = self.states.pop() 265 266 def expect_scalar(self): 267 self.increase_indent(flow=True) 268 self.process_scalar() 269 self.indent = self.indents.pop() 270 self.state = self.states.pop() 271 272 # Flow sequence handlers. 273 274 def expect_flow_sequence(self): 275 self.write_indicator('[', True, whitespace=True) 276 self.flow_level += 1 277 self.increase_indent(flow=True) 278 self.state = self.expect_first_flow_sequence_item 279 280 def expect_first_flow_sequence_item(self): 281 if isinstance(self.event, SequenceEndEvent): 282 self.indent = self.indents.pop() 283 self.flow_level -= 1 284 self.write_indicator(']', False) 285 self.state = self.states.pop() 286 else: 287 if self.canonical or self.column > self.best_width: 288 self.write_indent() 289 self.states.append(self.expect_flow_sequence_item) 290 self.expect_node(sequence=True) 291 292 def expect_flow_sequence_item(self): 293 if isinstance(self.event, SequenceEndEvent): 294 self.indent = self.indents.pop() 295 self.flow_level -= 1 296 if self.canonical: 297 self.write_indicator(',', False) 298 self.write_indent() 299 self.write_indicator(']', False) 300 self.state = self.states.pop() 301 else: 302 self.write_indicator(',', False) 303 if self.canonical or self.column > self.best_width: 304 self.write_indent() 305 self.states.append(self.expect_flow_sequence_item) 306 self.expect_node(sequence=True) 307 308 # Flow mapping handlers. 309 310 def expect_flow_mapping(self): 311 self.write_indicator('{', True, whitespace=True) 312 self.flow_level += 1 313 self.increase_indent(flow=True) 314 self.state = self.expect_first_flow_mapping_key 315 316 def expect_first_flow_mapping_key(self): 317 if isinstance(self.event, MappingEndEvent): 318 self.indent = self.indents.pop() 319 self.flow_level -= 1 320 self.write_indicator('}', False) 321 self.state = self.states.pop() 322 else: 323 if self.canonical or self.column > self.best_width: 324 self.write_indent() 325 if not self.canonical and self.check_simple_key(): 326 self.states.append(self.expect_flow_mapping_simple_value) 327 self.expect_node(mapping=True, simple_key=True) 328 else: 329 self.write_indicator('?', True) 330 self.states.append(self.expect_flow_mapping_value) 331 self.expect_node(mapping=True) 332 333 def expect_flow_mapping_key(self): 334 if isinstance(self.event, MappingEndEvent): 335 self.indent = self.indents.pop() 336 self.flow_level -= 1 337 if self.canonical: 338 self.write_indicator(',', False) 339 self.write_indent() 340 self.write_indicator('}', False) 341 self.state = self.states.pop() 342 else: 343 self.write_indicator(',', False) 344 if self.canonical or self.column > self.best_width: 345 self.write_indent() 346 if not self.canonical and self.check_simple_key(): 347 self.states.append(self.expect_flow_mapping_simple_value) 348 self.expect_node(mapping=True, simple_key=True) 349 else: 350 self.write_indicator('?', True) 351 self.states.append(self.expect_flow_mapping_value) 352 self.expect_node(mapping=True) 353 354 def expect_flow_mapping_simple_value(self): 355 self.write_indicator(':', False) 356 self.states.append(self.expect_flow_mapping_key) 357 self.expect_node(mapping=True) 358 359 def expect_flow_mapping_value(self): 360 if self.canonical or self.column > self.best_width: 361 self.write_indent() 362 self.write_indicator(':', True) 363 self.states.append(self.expect_flow_mapping_key) 364 self.expect_node(mapping=True) 365 366 # Block sequence handlers. 367 368 def expect_block_sequence(self): 369 indentless = (self.mapping_context and not self.indention) 370 self.increase_indent(flow=False, indentless=indentless) 371 self.state = self.expect_first_block_sequence_item 372 373 def expect_first_block_sequence_item(self): 374 return self.expect_block_sequence_item(first=True) 375 376 def expect_block_sequence_item(self, first=False): 377 if not first and isinstance(self.event, SequenceEndEvent): 378 self.indent = self.indents.pop() 379 self.state = self.states.pop() 380 else: 381 self.write_indent() 382 self.write_indicator('-', True, indention=True) 383 self.states.append(self.expect_block_sequence_item) 384 self.expect_node(sequence=True) 385 386 # Block mapping handlers. 387 388 def expect_block_mapping(self): 389 self.increase_indent(flow=False) 390 self.state = self.expect_first_block_mapping_key 391 392 def expect_first_block_mapping_key(self): 393 return self.expect_block_mapping_key(first=True) 394 395 def expect_block_mapping_key(self, first=False): 396 if not first and isinstance(self.event, MappingEndEvent): 397 self.indent = self.indents.pop() 398 self.state = self.states.pop() 399 else: 400 self.write_indent() 401 if self.check_simple_key(): 402 self.states.append(self.expect_block_mapping_simple_value) 403 self.expect_node(mapping=True, simple_key=True) 404 else: 405 self.write_indicator('?', True, indention=True) 406 self.states.append(self.expect_block_mapping_value) 407 self.expect_node(mapping=True) 408 409 def expect_block_mapping_simple_value(self): 410 self.write_indicator(':', False) 411 self.states.append(self.expect_block_mapping_key) 412 self.expect_node(mapping=True) 413 414 def expect_block_mapping_value(self): 415 self.write_indent() 416 self.write_indicator(':', True, indention=True) 417 self.states.append(self.expect_block_mapping_key) 418 self.expect_node(mapping=True) 419 420 # Checkers. 421 422 def check_empty_sequence(self): 423 return (isinstance(self.event, SequenceStartEvent) and self.events 424 and isinstance(self.events[0], SequenceEndEvent)) 425 426 def check_empty_mapping(self): 427 return (isinstance(self.event, MappingStartEvent) and self.events 428 and isinstance(self.events[0], MappingEndEvent)) 429 430 def check_empty_document(self): 431 if not isinstance(self.event, DocumentStartEvent) or not self.events: 432 return False 433 event = self.events[0] 434 return (isinstance(event, ScalarEvent) and event.anchor is None 435 and event.tag is None and event.implicit and event.value == '') 436 437 def check_simple_key(self): 438 length = 0 439 if isinstance(self.event, NodeEvent) and self.event.anchor is not None: 440 if self.prepared_anchor is None: 441 self.prepared_anchor = self.prepare_anchor(self.event.anchor) 442 length += len(self.prepared_anchor) 443 if isinstance(self.event, (ScalarEvent, CollectionStartEvent)) \ 444 and self.event.tag is not None: 445 if self.prepared_tag is None: 446 self.prepared_tag = self.prepare_tag(self.event.tag) 447 length += len(self.prepared_tag) 448 if isinstance(self.event, ScalarEvent): 449 if self.analysis is None: 450 self.analysis = self.analyze_scalar(self.event.value) 451 length += len(self.analysis.scalar) 452 return (length < 128 and (isinstance(self.event, AliasEvent) 453 or (isinstance(self.event, ScalarEvent) 454 and not self.analysis.empty and not self.analysis.multiline) 455 or self.check_empty_sequence() or self.check_empty_mapping())) 456 457 # Anchor, Tag, and Scalar processors. 458 459 def process_anchor(self, indicator): 460 if self.event.anchor is None: 461 self.prepared_anchor = None 462 return 463 if self.prepared_anchor is None: 464 self.prepared_anchor = self.prepare_anchor(self.event.anchor) 465 if self.prepared_anchor: 466 self.write_indicator(indicator+self.prepared_anchor, True) 467 self.prepared_anchor = None 468 469 def process_tag(self): 470 tag = self.event.tag 471 if isinstance(self.event, ScalarEvent): 472 if self.style is None: 473 self.style = self.choose_scalar_style() 474 if ((not self.canonical or tag is None) and 475 ((self.style == '' and self.event.implicit[0]) 476 or (self.style != '' and self.event.implicit[1]))): 477 self.prepared_tag = None 478 return 479 if self.event.implicit[0] and tag is None: 480 tag = '!' 481 self.prepared_tag = None 482 else: 483 if (not self.canonical or tag is None) and self.event.implicit: 484 self.prepared_tag = None 485 return 486 if tag is None: 487 raise EmitterError("tag is not specified") 488 if self.prepared_tag is None: 489 self.prepared_tag = self.prepare_tag(tag) 490 if self.prepared_tag: 491 self.write_indicator(self.prepared_tag, True) 492 self.prepared_tag = None 493 494 def choose_scalar_style(self): 495 if self.analysis is None: 496 self.analysis = self.analyze_scalar(self.event.value) 497 if self.event.style == '"' or self.canonical: 498 return '"' 499 if not self.event.style and self.event.implicit[0]: 500 if (not (self.simple_key_context and 501 (self.analysis.empty or self.analysis.multiline)) 502 and (self.flow_level and self.analysis.allow_flow_plain 503 or (not self.flow_level and self.analysis.allow_block_plain))): 504 return '' 505 if self.event.style and self.event.style in '|>': 506 if (not self.flow_level and not self.simple_key_context 507 and self.analysis.allow_block): 508 return self.event.style 509 if not self.event.style or self.event.style == '\'': 510 if (self.analysis.allow_single_quoted and 511 not (self.simple_key_context and self.analysis.multiline)): 512 return '\'' 513 return '"' 514 515 def process_scalar(self): 516 if self.analysis is None: 517 self.analysis = self.analyze_scalar(self.event.value) 518 if self.style is None: 519 self.style = self.choose_scalar_style() 520 split = (not self.simple_key_context) 521 #if self.analysis.multiline and split \ 522 # and (not self.style or self.style in '\'\"'): 523 # self.write_indent() 524 if self.style == '"': 525 self.write_double_quoted(self.analysis.scalar, split) 526 elif self.style == '\'': 527 self.write_single_quoted(self.analysis.scalar, split) 528 elif self.style == '>': 529 self.write_folded(self.analysis.scalar) 530 elif self.style == '|': 531 self.write_literal(self.analysis.scalar) 532 else: 533 self.write_plain(self.analysis.scalar, split) 534 self.analysis = None 535 self.style = None 536 537 # Analyzers. 538 539 def prepare_version(self, version): 540 major, minor = version 541 if major != 1: 542 raise EmitterError("unsupported YAML version: %d.%d" % (major, minor)) 543 return '%d.%d' % (major, minor) 544 545 def prepare_tag_handle(self, handle): 546 if not handle: 547 raise EmitterError("tag handle must not be empty") 548 if handle[0] != '!' or handle[-1] != '!': 549 raise EmitterError("tag handle must start and end with '!': %r" % handle) 550 for ch in handle[1:-1]: 551 if not ('0' <= ch <= '9' or 'A' <= ch <= 'Z' or 'a' <= ch <= 'z' \ 552 or ch in '-_'): 553 raise EmitterError("invalid character %r in the tag handle: %r" 554 % (ch, handle)) 555 return handle 556 557 def prepare_tag_prefix(self, prefix): 558 if not prefix: 559 raise EmitterError("tag prefix must not be empty") 560 chunks = [] 561 start = end = 0 562 if prefix[0] == '!': 563 end = 1 564 while end < len(prefix): 565 ch = prefix[end] 566 if '0' <= ch <= '9' or 'A' <= ch <= 'Z' or 'a' <= ch <= 'z' \ 567 or ch in '-;/?!:@&=+$,_.~*\'()[]': 568 end += 1 569 else: 570 if start < end: 571 chunks.append(prefix[start:end]) 572 start = end = end+1 573 data = ch.encode('utf-8') 574 for ch in data: 575 chunks.append('%%%02X' % ord(ch)) 576 if start < end: 577 chunks.append(prefix[start:end]) 578 return ''.join(chunks) 579 580 def prepare_tag(self, tag): 581 if not tag: 582 raise EmitterError("tag must not be empty") 583 if tag == '!': 584 return tag 585 handle = None 586 suffix = tag 587 prefixes = sorted(self.tag_prefixes.keys()) 588 for prefix in prefixes: 589 if tag.startswith(prefix) \ 590 and (prefix == '!' or len(prefix) < len(tag)): 591 handle = self.tag_prefixes[prefix] 592 suffix = tag[len(prefix):] 593 chunks = [] 594 start = end = 0 595 while end < len(suffix): 596 ch = suffix[end] 597 if '0' <= ch <= '9' or 'A' <= ch <= 'Z' or 'a' <= ch <= 'z' \ 598 or ch in '-;/?:@&=+$,_.~*\'()[]' \ 599 or (ch == '!' and handle != '!'): 600 end += 1 601 else: 602 if start < end: 603 chunks.append(suffix[start:end]) 604 start = end = end+1 605 data = ch.encode('utf-8') 606 for ch in data: 607 chunks.append('%%%02X' % ch) 608 if start < end: 609 chunks.append(suffix[start:end]) 610 suffix_text = ''.join(chunks) 611 if handle: 612 return '%s%s' % (handle, suffix_text) 613 else: 614 return '!<%s>' % suffix_text 615 616 def prepare_anchor(self, anchor): 617 if not anchor: 618 raise EmitterError("anchor must not be empty") 619 for ch in anchor: 620 if not ('0' <= ch <= '9' or 'A' <= ch <= 'Z' or 'a' <= ch <= 'z' \ 621 or ch in '-_'): 622 raise EmitterError("invalid character %r in the anchor: %r" 623 % (ch, anchor)) 624 return anchor 625 626 def analyze_scalar(self, scalar): 627 628 # Empty scalar is a special case. 629 if not scalar: 630 return ScalarAnalysis(scalar=scalar, empty=True, multiline=False, 631 allow_flow_plain=False, allow_block_plain=True, 632 allow_single_quoted=True, allow_double_quoted=True, 633 allow_block=False) 634 635 # Indicators and special characters. 636 block_indicators = False 637 flow_indicators = False 638 line_breaks = False 639 special_characters = False 640 641 # Important whitespace combinations. 642 leading_space = False 643 leading_break = False 644 trailing_space = False 645 trailing_break = False 646 break_space = False 647 space_break = False 648 649 # Check document indicators. 650 if scalar.startswith('---') or scalar.startswith('...'): 651 block_indicators = True 652 flow_indicators = True 653 654 # First character or preceded by a whitespace. 655 preceded_by_whitespace = True 656 657 # Last character or followed by a whitespace. 658 followed_by_whitespace = (len(scalar) == 1 or 659 scalar[1] in '\0 \t\r\n\x85\u2028\u2029') 660 661 # The previous character is a space. 662 previous_space = False 663 664 # The previous character is a break. 665 previous_break = False 666 667 index = 0 668 while index < len(scalar): 669 ch = scalar[index] 670 671 # Check for indicators. 672 if index == 0: 673 # Leading indicators are special characters. 674 if ch in '#,[]{}&*!|>\'\"%@`': 675 flow_indicators = True 676 block_indicators = True 677 if ch in '?:': 678 flow_indicators = True 679 if followed_by_whitespace: 680 block_indicators = True 681 if ch == '-' and followed_by_whitespace: 682 flow_indicators = True 683 block_indicators = True 684 else: 685 # Some indicators cannot appear within a scalar as well. 686 if ch in ',?[]{}': 687 flow_indicators = True 688 if ch == ':': 689 flow_indicators = True 690 if followed_by_whitespace: 691 block_indicators = True 692 if ch == '#' and preceded_by_whitespace: 693 flow_indicators = True 694 block_indicators = True 695 696 # Check for line breaks, special, and unicode characters. 697 if ch in '\n\x85\u2028\u2029': 698 line_breaks = True 699 if not (ch == '\n' or '\x20' <= ch <= '\x7E'): 700 if (ch == '\x85' or '\xA0' <= ch <= '\uD7FF' 701 or '\uE000' <= ch <= '\uFFFD' 702 or '\U00010000' <= ch < '\U0010ffff') and ch != '\uFEFF': 703 unicode_characters = True 704 if not self.allow_unicode: 705 special_characters = True 706 else: 707 special_characters = True 708 709 # Detect important whitespace combinations. 710 if ch == ' ': 711 if index == 0: 712 leading_space = True 713 if index == len(scalar)-1: 714 trailing_space = True 715 if previous_break: 716 break_space = True 717 previous_space = True 718 previous_break = False 719 elif ch in '\n\x85\u2028\u2029': 720 if index == 0: 721 leading_break = True 722 if index == len(scalar)-1: 723 trailing_break = True 724 if previous_space: 725 space_break = True 726 previous_space = False 727 previous_break = True 728 else: 729 previous_space = False 730 previous_break = False 731 732 # Prepare for the next character. 733 index += 1 734 preceded_by_whitespace = (ch in '\0 \t\r\n\x85\u2028\u2029') 735 followed_by_whitespace = (index+1 >= len(scalar) or 736 scalar[index+1] in '\0 \t\r\n\x85\u2028\u2029') 737 738 # Let's decide what styles are allowed. 739 allow_flow_plain = True 740 allow_block_plain = True 741 allow_single_quoted = True 742 allow_double_quoted = True 743 allow_block = True 744 745 # Leading and trailing whitespaces are bad for plain scalars. 746 if (leading_space or leading_break 747 or trailing_space or trailing_break): 748 allow_flow_plain = allow_block_plain = False 749 750 # We do not permit trailing spaces for block scalars. 751 if trailing_space: 752 allow_block = False 753 754 # Spaces at the beginning of a new line are only acceptable for block 755 # scalars. 756 if break_space: 757 allow_flow_plain = allow_block_plain = allow_single_quoted = False 758 759 # Spaces followed by breaks, as well as special character are only 760 # allowed for double quoted scalars. 761 if space_break or special_characters: 762 allow_flow_plain = allow_block_plain = \ 763 allow_single_quoted = allow_block = False 764 765 # Although the plain scalar writer supports breaks, we never emit 766 # multiline plain scalars. 767 if line_breaks: 768 allow_flow_plain = allow_block_plain = False 769 770 # Flow indicators are forbidden for flow plain scalars. 771 if flow_indicators: 772 allow_flow_plain = False 773 774 # Block indicators are forbidden for block plain scalars. 775 if block_indicators: 776 allow_block_plain = False 777 778 return ScalarAnalysis(scalar=scalar, 779 empty=False, multiline=line_breaks, 780 allow_flow_plain=allow_flow_plain, 781 allow_block_plain=allow_block_plain, 782 allow_single_quoted=allow_single_quoted, 783 allow_double_quoted=allow_double_quoted, 784 allow_block=allow_block) 785 786 # Writers. 787 788 def flush_stream(self): 789 if hasattr(self.stream, 'flush'): 790 self.stream.flush() 791 792 def write_stream_start(self): 793 # Write BOM if needed. 794 if self.encoding and self.encoding.startswith('utf-16'): 795 self.stream.write('\uFEFF'.encode(self.encoding)) 796 797 def write_stream_end(self): 798 self.flush_stream() 799 800 def write_indicator(self, indicator, need_whitespace, 801 whitespace=False, indention=False): 802 if self.whitespace or not need_whitespace: 803 data = indicator 804 else: 805 data = ' '+indicator 806 self.whitespace = whitespace 807 self.indention = self.indention and indention 808 self.column += len(data) 809 self.open_ended = False 810 if self.encoding: 811 data = data.encode(self.encoding) 812 self.stream.write(data) 813 814 def write_indent(self): 815 indent = self.indent or 0 816 if not self.indention or self.column > indent \ 817 or (self.column == indent and not self.whitespace): 818 self.write_line_break() 819 if self.column < indent: 820 self.whitespace = True 821 data = ' '*(indent-self.column) 822 self.column = indent 823 if self.encoding: 824 data = data.encode(self.encoding) 825 self.stream.write(data) 826 827 def write_line_break(self, data=None): 828 if data is None: 829 data = self.best_line_break 830 self.whitespace = True 831 self.indention = True 832 self.line += 1 833 self.column = 0 834 if self.encoding: 835 data = data.encode(self.encoding) 836 self.stream.write(data) 837 838 def write_version_directive(self, version_text): 839 data = '%%YAML %s' % version_text 840 if self.encoding: 841 data = data.encode(self.encoding) 842 self.stream.write(data) 843 self.write_line_break() 844 845 def write_tag_directive(self, handle_text, prefix_text): 846 data = '%%TAG %s %s' % (handle_text, prefix_text) 847 if self.encoding: 848 data = data.encode(self.encoding) 849 self.stream.write(data) 850 self.write_line_break() 851 852 # Scalar streams. 853 854 def write_single_quoted(self, text, split=True): 855 self.write_indicator('\'', True) 856 spaces = False 857 breaks = False 858 start = end = 0 859 while end <= len(text): 860 ch = None 861 if end < len(text): 862 ch = text[end] 863 if spaces: 864 if ch is None or ch != ' ': 865 if start+1 == end and self.column > self.best_width and split \ 866 and start != 0 and end != len(text): 867 self.write_indent() 868 else: 869 data = text[start:end] 870 self.column += len(data) 871 if self.encoding: 872 data = data.encode(self.encoding) 873 self.stream.write(data) 874 start = end 875 elif breaks: 876 if ch is None or ch not in '\n\x85\u2028\u2029': 877 if text[start] == '\n': 878 self.write_line_break() 879 for br in text[start:end]: 880 if br == '\n': 881 self.write_line_break() 882 else: 883 self.write_line_break(br) 884 self.write_indent() 885 start = end 886 else: 887 if ch is None or ch in ' \n\x85\u2028\u2029' or ch == '\'': 888 if start < end: 889 data = text[start:end] 890 self.column += len(data) 891 if self.encoding: 892 data = data.encode(self.encoding) 893 self.stream.write(data) 894 start = end 895 if ch == '\'': 896 data = '\'\'' 897 self.column += 2 898 if self.encoding: 899 data = data.encode(self.encoding) 900 self.stream.write(data) 901 start = end + 1 902 if ch is not None: 903 spaces = (ch == ' ') 904 breaks = (ch in '\n\x85\u2028\u2029') 905 end += 1 906 self.write_indicator('\'', False) 907 908 ESCAPE_REPLACEMENTS = { 909 '\0': '0', 910 '\x07': 'a', 911 '\x08': 'b', 912 '\x09': 't', 913 '\x0A': 'n', 914 '\x0B': 'v', 915 '\x0C': 'f', 916 '\x0D': 'r', 917 '\x1B': 'e', 918 '\"': '\"', 919 '\\': '\\', 920 '\x85': 'N', 921 '\xA0': '_', 922 '\u2028': 'L', 923 '\u2029': 'P', 924 } 925 926 def write_double_quoted(self, text, split=True): 927 self.write_indicator('"', True) 928 start = end = 0 929 while end <= len(text): 930 ch = None 931 if end < len(text): 932 ch = text[end] 933 if ch is None or ch in '"\\\x85\u2028\u2029\uFEFF' \ 934 or not ('\x20' <= ch <= '\x7E' 935 or (self.allow_unicode 936 and ('\xA0' <= ch <= '\uD7FF' 937 or '\uE000' <= ch <= '\uFFFD'))): 938 if start < end: 939 data = text[start:end] 940 self.column += len(data) 941 if self.encoding: 942 data = data.encode(self.encoding) 943 self.stream.write(data) 944 start = end 945 if ch is not None: 946 if ch in self.ESCAPE_REPLACEMENTS: 947 data = '\\'+self.ESCAPE_REPLACEMENTS[ch] 948 elif ch <= '\xFF': 949 data = '\\x%02X' % ord(ch) 950 elif ch <= '\uFFFF': 951 data = '\\u%04X' % ord(ch) 952 else: 953 data = '\\U%08X' % ord(ch) 954 self.column += len(data) 955 if self.encoding: 956 data = data.encode(self.encoding) 957 self.stream.write(data) 958 start = end+1 959 if 0 < end < len(text)-1 and (ch == ' ' or start >= end) \ 960 and self.column+(end-start) > self.best_width and split: 961 data = text[start:end]+'\\' 962 if start < end: 963 start = end 964 self.column += len(data) 965 if self.encoding: 966 data = data.encode(self.encoding) 967 self.stream.write(data) 968 self.write_indent() 969 self.whitespace = False 970 self.indention = False 971 if text[start] == ' ': 972 data = '\\' 973 self.column += len(data) 974 if self.encoding: 975 data = data.encode(self.encoding) 976 self.stream.write(data) 977 end += 1 978 self.write_indicator('"', False) 979 980 def determine_block_hints(self, text): 981 hints = '' 982 if text: 983 if text[0] in ' \n\x85\u2028\u2029': 984 hints += str(self.best_indent) 985 if text[-1] not in '\n\x85\u2028\u2029': 986 hints += '-' 987 elif len(text) == 1 or text[-2] in '\n\x85\u2028\u2029': 988 hints += '+' 989 return hints 990 991 def write_folded(self, text): 992 hints = self.determine_block_hints(text) 993 self.write_indicator('>'+hints, True) 994 if hints[-1:] == '+': 995 self.open_ended = True 996 self.write_line_break() 997 leading_space = True 998 spaces = False 999 breaks = True 1000 start = end = 0 1001 while end <= len(text): 1002 ch = None 1003 if end < len(text): 1004 ch = text[end] 1005 if breaks: 1006 if ch is None or ch not in '\n\x85\u2028\u2029': 1007 if not leading_space and ch is not None and ch != ' ' \ 1008 and text[start] == '\n': 1009 self.write_line_break() 1010 leading_space = (ch == ' ') 1011 for br in text[start:end]: 1012 if br == '\n': 1013 self.write_line_break() 1014 else: 1015 self.write_line_break(br) 1016 if ch is not None: 1017 self.write_indent() 1018 start = end 1019 elif spaces: 1020 if ch != ' ': 1021 if start+1 == end and self.column > self.best_width: 1022 self.write_indent() 1023 else: 1024 data = text[start:end] 1025 self.column += len(data) 1026 if self.encoding: 1027 data = data.encode(self.encoding) 1028 self.stream.write(data) 1029 start = end 1030 else: 1031 if ch is None or ch in ' \n\x85\u2028\u2029': 1032 data = text[start:end] 1033 self.column += len(data) 1034 if self.encoding: 1035 data = data.encode(self.encoding) 1036 self.stream.write(data) 1037 if ch is None: 1038 self.write_line_break() 1039 start = end 1040 if ch is not None: 1041 breaks = (ch in '\n\x85\u2028\u2029') 1042 spaces = (ch == ' ') 1043 end += 1 1044 1045 def write_literal(self, text): 1046 hints = self.determine_block_hints(text) 1047 self.write_indicator('|'+hints, True) 1048 if hints[-1:] == '+': 1049 self.open_ended = True 1050 self.write_line_break() 1051 breaks = True 1052 start = end = 0 1053 while end <= len(text): 1054 ch = None 1055 if end < len(text): 1056 ch = text[end] 1057 if breaks: 1058 if ch is None or ch not in '\n\x85\u2028\u2029': 1059 for br in text[start:end]: 1060 if br == '\n': 1061 self.write_line_break() 1062 else: 1063 self.write_line_break(br) 1064 if ch is not None: 1065 self.write_indent() 1066 start = end 1067 else: 1068 if ch is None or ch in '\n\x85\u2028\u2029': 1069 data = text[start:end] 1070 if self.encoding: 1071 data = data.encode(self.encoding) 1072 self.stream.write(data) 1073 if ch is None: 1074 self.write_line_break() 1075 start = end 1076 if ch is not None: 1077 breaks = (ch in '\n\x85\u2028\u2029') 1078 end += 1 1079 1080 def write_plain(self, text, split=True): 1081 if self.root_context: 1082 self.open_ended = True 1083 if not text: 1084 return 1085 if not self.whitespace: 1086 data = ' ' 1087 self.column += len(data) 1088 if self.encoding: 1089 data = data.encode(self.encoding) 1090 self.stream.write(data) 1091 self.whitespace = False 1092 self.indention = False 1093 spaces = False 1094 breaks = False 1095 start = end = 0 1096 while end <= len(text): 1097 ch = None 1098 if end < len(text): 1099 ch = text[end] 1100 if spaces: 1101 if ch != ' ': 1102 if start+1 == end and self.column > self.best_width and split: 1103 self.write_indent() 1104 self.whitespace = False 1105 self.indention = False 1106 else: 1107 data = text[start:end] 1108 self.column += len(data) 1109 if self.encoding: 1110 data = data.encode(self.encoding) 1111 self.stream.write(data) 1112 start = end 1113 elif breaks: 1114 if ch not in '\n\x85\u2028\u2029': 1115 if text[start] == '\n': 1116 self.write_line_break() 1117 for br in text[start:end]: 1118 if br == '\n': 1119 self.write_line_break() 1120 else: 1121 self.write_line_break(br) 1122 self.write_indent() 1123 self.whitespace = False 1124 self.indention = False 1125 start = end 1126 else: 1127 if ch is None or ch in ' \n\x85\u2028\u2029': 1128 data = text[start:end] 1129 self.column += len(data) 1130 if self.encoding: 1131 data = data.encode(self.encoding) 1132 self.stream.write(data) 1133 start = end 1134 if ch is not None: 1135 spaces = (ch == ' ') 1136 breaks = (ch in '\n\x85\u2028\u2029') 1137 end += 1 1138