1import unittest 2import textwrap 3import antlr3 4import antlr3.tree 5import testbase 6import sys 7 8class T(testbase.ANTLRTest): 9 def parserClass(self, base): 10 class TParser(base): 11 def __init__(self, *args, **kwargs): 12 super().__init__(*args, **kwargs) 13 14 self._output = "" 15 16 17 def capture(self, t): 18 self._output += t 19 20 21 def traceIn(self, ruleName, ruleIndex): 22 self.traces.append('>'+ruleName) 23 24 25 def traceOut(self, ruleName, ruleIndex): 26 self.traces.append('<'+ruleName) 27 28 29 def recover(self, input, re): 30 # no error recovery yet, just crash! 31 raise 32 33 return TParser 34 35 36 def lexerClass(self, base): 37 class TLexer(base): 38 def __init__(self, *args, **kwargs): 39 super().__init__(*args, **kwargs) 40 41 self._output = "" 42 43 44 def capture(self, t): 45 self._output += t 46 47 48 def traceIn(self, ruleName, ruleIndex): 49 self.traces.append('>'+ruleName) 50 51 52 def traceOut(self, ruleName, ruleIndex): 53 self.traces.append('<'+ruleName) 54 55 56 def recover(self, input, re): 57 # no error recovery yet, just crash! 58 raise 59 60 return TLexer 61 62 63 def execParser(self, grammar, grammarEntry, input): 64 lexerCls, parserCls = self.compileInlineGrammar(grammar) 65 66 cStream = antlr3.StringStream(input) 67 lexer = lexerCls(cStream) 68 tStream = antlr3.CommonTokenStream(lexer) 69 parser = parserCls(tStream) 70 r = getattr(parser, grammarEntry)() 71 72 if r: 73 return r.tree.toStringTree() 74 75 return "" 76 77 78 def execTreeParser(self, grammar, grammarEntry, treeGrammar, treeEntry, input): 79 lexerCls, parserCls = self.compileInlineGrammar(grammar) 80 walkerCls = self.compileInlineGrammar(treeGrammar) 81 82 cStream = antlr3.StringStream(input) 83 lexer = lexerCls(cStream) 84 tStream = antlr3.CommonTokenStream(lexer) 85 parser = parserCls(tStream) 86 r = getattr(parser, grammarEntry)() 87 nodes = antlr3.tree.CommonTreeNodeStream(r.tree) 88 nodes.setTokenStream(tStream) 89 walker = walkerCls(nodes) 90 r = getattr(walker, treeEntry)() 91 92 if r: 93 return r.tree.toStringTree() 94 95 return "" 96 97 98 # PARSERS -- AUTO AST 99 100 def testToken(self): 101 grammar = textwrap.dedent( 102 r''' 103 grammar T1; 104 options { 105 language=Python3; 106 output=AST; 107 } 108 @header { 109 class V(CommonTree): 110 def toString(self): 111 return self.token.text + "<V>" 112 __str__ = toString 113 114 } 115 a : ID<V> ; 116 ID : 'a'..'z'+ ; 117 WS : (' '|'\n') {$channel=HIDDEN;} ; 118 ''') 119 120 found = self.execParser( 121 grammar, 'a', 122 input="a" 123 ) 124 125 self.assertEqual("a<V>", found) 126 127 128 def testTokenCommonTree(self): 129 grammar = textwrap.dedent( 130 r''' 131 grammar T; 132 options { 133 language=Python3; 134 output=AST; 135 } 136 a : ID<CommonTree> ; 137 ID : 'a'..'z'+ ; 138 WS : (' '|'\n') {$channel=HIDDEN;} ; 139 ''') 140 141 found = self.execParser( 142 grammar, 'a', 143 input="a") 144 145 self.assertEqual("a", found) 146 147 148 def testTokenWithQualifiedType(self): 149 grammar = textwrap.dedent( 150 r''' 151 grammar T; 152 options { 153 language=Python3; 154 output=AST; 155 } 156 @members { 157 class V(CommonTree): 158 def toString(self): 159 return self.token.text + "<V>" 160 __str__ = toString 161 } 162 a : ID<TParser.V> ; // TParser.V is qualified name 163 ID : 'a'..'z'+ ; 164 WS : (' '|'\n') {$channel=HIDDEN;} ; 165 ''') 166 167 found = self.execParser( 168 grammar, 'a', 169 input="a" 170 ) 171 172 self.assertEqual("a<V>", found) 173 174 175 def testNamedType(self): 176 grammar = textwrap.dedent( 177 r""" 178 grammar $T; 179 options { 180 language=Python3; 181 output=AST; 182 } 183 @header { 184 class V(CommonTree): 185 def toString(self): 186 return self.token.text + "<V>" 187 __str__ = toString 188 } 189 a : ID<node=V> ; 190 ID : 'a'..'z'+ ; 191 WS : (' '|'\\n') {$channel=HIDDEN;} ; 192 """) 193 194 found = self.execParser(grammar, 'a', input="a") 195 self.assertEqual("a<V>", found) 196 197 198 def testTokenWithLabel(self): 199 grammar = textwrap.dedent( 200 r''' 201 grammar T2; 202 options { 203 language=Python3; 204 output=AST; 205 } 206 @header { 207 class V(CommonTree): 208 def toString(self): 209 return self.token.text + "<V>" 210 __str__ = toString 211 212 } 213 a : x=ID<V> ; 214 ID : 'a'..'z'+ ; 215 WS : (' '|'\n') {$channel=HIDDEN;} ; 216 ''') 217 218 found = self.execParser( 219 grammar, 'a', 220 input="a" 221 ) 222 223 self.assertEqual("a<V>", found) 224 225 226 def testTokenWithListLabel(self): 227 grammar = textwrap.dedent( 228 r''' 229 grammar T3; 230 options { 231 language=Python3; 232 output=AST; 233 } 234 @header { 235 class V(CommonTree): 236 def toString(self): 237 return self.token.text + "<V>" 238 __str__ = toString 239 240 } 241 a : x+=ID<V> ; 242 ID : 'a'..'z'+ ; 243 WS : (' '|'\n') {$channel=HIDDEN;} ; 244 ''') 245 246 found = self.execParser( 247 grammar, 'a', 248 input="a" 249 ) 250 251 self.assertEqual("a<V>", found) 252 253 254 def testTokenRoot(self): 255 grammar = textwrap.dedent( 256 r''' 257 grammar T4; 258 options { 259 language=Python3; 260 output=AST; 261 } 262 @header { 263 class V(CommonTree): 264 def toString(self): 265 return self.token.text + "<V>" 266 __str__ = toString 267 268 } 269 a : ID<V>^ ; 270 ID : 'a'..'z'+ ; 271 WS : (' '|'\n') {$channel=HIDDEN;} ; 272 ''') 273 274 found = self.execParser( 275 grammar, 'a', 276 input="a" 277 ) 278 279 self.assertEqual("a<V>", found) 280 281 282 def testTokenRootWithListLabel(self): 283 grammar = textwrap.dedent( 284 r''' 285 grammar T5; 286 options { 287 language=Python3; 288 output=AST; 289 } 290 @header { 291 class V(CommonTree): 292 def toString(self): 293 return self.token.text + "<V>" 294 __str__ = toString 295 296 } 297 a : x+=ID<V>^ ; 298 ID : 'a'..'z'+ ; 299 WS : (' '|'\n') {$channel=HIDDEN;} ; 300 ''') 301 302 found = self.execParser( 303 grammar, 'a', 304 input="a" 305 ) 306 307 self.assertEqual("a<V>", found) 308 309 310 def testString(self): 311 grammar = textwrap.dedent( 312 r''' 313 grammar T6; 314 options { 315 language=Python3; 316 output=AST; 317 } 318 @header { 319 class V(CommonTree): 320 def toString(self): 321 return self.token.text + "<V>" 322 __str__ = toString 323 324 } 325 a : 'begin'<V> ; 326 ID : 'a'..'z'+ ; 327 WS : (' '|'\n') {$channel=HIDDEN;} ; 328 ''') 329 330 found = self.execParser( 331 grammar, 'a', 332 input="begin" 333 ) 334 335 self.assertEqual("begin<V>", found) 336 337 338 def testStringRoot(self): 339 grammar = textwrap.dedent( 340 r''' 341 grammar T7; 342 options { 343 language=Python3; 344 output=AST; 345 } 346 @header { 347 class V(CommonTree): 348 def toString(self): 349 return self.token.text + "<V>" 350 __str__ = toString 351 352 } 353 a : 'begin'<V>^ ; 354 ID : 'a'..'z'+ ; 355 WS : (' '|'\n') {$channel=HIDDEN;} ; 356 ''') 357 358 found = self.execParser( 359 grammar, 'a', 360 input="begin" 361 ) 362 363 self.assertEqual("begin<V>", found) 364 365 366 # PARSERS -- REWRITE AST 367 368 def testRewriteToken(self): 369 grammar = textwrap.dedent( 370 r''' 371 grammar T8; 372 options { 373 language=Python3; 374 output=AST; 375 } 376 @header { 377 class V(CommonTree): 378 def toString(self): 379 return self.token.text + "<V>" 380 __str__ = toString 381 382 } 383 a : ID -> ID<V> ; 384 ID : 'a'..'z'+ ; 385 WS : (' '|'\n') {$channel=HIDDEN;} ; 386 ''') 387 388 found = self.execParser( 389 grammar, 'a', 390 input="a" 391 ) 392 393 self.assertEqual("a<V>", found) 394 395 396 def testRewriteTokenWithArgs(self): 397 grammar = textwrap.dedent( 398 r''' 399 grammar T9; 400 options { 401 language=Python3; 402 output=AST; 403 } 404 @header { 405 class V(CommonTree): 406 def __init__(self, *args): 407 if len(args) == 4: 408 ttype = args[0] 409 x = args[1] 410 y = args[2] 411 z = args[3] 412 token = CommonToken(type=ttype, text="") 413 414 elif len(args) == 3: 415 ttype = args[0] 416 token = args[1] 417 x = args[2] 418 y, z = 0, 0 419 420 else: 421 raise TypeError("Invalid args {!r}".format(args)) 422 423 super().__init__(token) 424 self.x = x 425 self.y = y 426 self.z = z 427 428 def toString(self): 429 txt = "" 430 if self.token: 431 txt += self.token.text 432 txt +="<V>;{0.x}{0.y}{0.z}".format(self) 433 return txt 434 __str__ = toString 435 436 } 437 a : ID -> ID<V>[42,19,30] ID<V>[$ID,99]; 438 ID : 'a'..'z'+ ; 439 WS : (' '|'\n') {$channel=HIDDEN;} ; 440 ''') 441 442 found = self.execParser( 443 grammar, 'a', 444 input="a" 445 ) 446 447 self.assertEqual("<V>;421930 a<V>;9900", found) 448 449 450 def testRewriteTokenRoot(self): 451 grammar = textwrap.dedent( 452 r''' 453 grammar T10; 454 options { 455 language=Python3; 456 output=AST; 457 } 458 @header { 459 class V(CommonTree): 460 def toString(self): 461 return self.token.text + "<V>" 462 __str__ = toString 463 464 } 465 a : ID INT -> ^(ID<V> INT) ; 466 ID : 'a'..'z'+ ; 467 INT : '0'..'9'+ ; 468 WS : (' '|'\n') {$channel=HIDDEN;} ; 469 ''') 470 471 found = self.execParser( 472 grammar, 'a', 473 input="a 2" 474 ) 475 476 self.assertEqual("(a<V> 2)", found) 477 478 479 def testRewriteString(self): 480 grammar = textwrap.dedent( 481 r''' 482 grammar T11; 483 options { 484 language=Python3; 485 output=AST; 486 } 487 @header { 488 class V(CommonTree): 489 def toString(self): 490 return self.token.text + "<V>" 491 __str__ = toString 492 493 } 494 a : 'begin' -> 'begin'<V> ; 495 ID : 'a'..'z'+ ; 496 WS : (' '|'\n') {$channel=HIDDEN;} ; 497 ''') 498 499 found = self.execParser( 500 grammar, 'a', 501 input="begin" 502 ) 503 504 self.assertEqual("begin<V>", found) 505 506 507 def testRewriteStringRoot(self): 508 grammar = textwrap.dedent( 509 r''' 510 grammar T12; 511 options { 512 language=Python3; 513 output=AST; 514 } 515 @header { 516 class V(CommonTree): 517 def toString(self): 518 return self.token.text + "<V>" 519 __str__ = toString 520 521 } 522 a : 'begin' INT -> ^('begin'<V> INT) ; 523 ID : 'a'..'z'+ ; 524 INT : '0'..'9'+ ; 525 WS : (' '|'\n') {$channel=HIDDEN;} ; 526 ''') 527 528 found = self.execParser( 529 grammar, 'a', 530 input="begin 2" 531 ) 532 533 self.assertEqual("(begin<V> 2)", found) 534 535 def testRewriteRuleResults(self): 536 grammar = textwrap.dedent( 537 r''' 538 grammar T; 539 options { 540 language=Python3; 541 output=AST; 542 } 543 tokens {LIST;} 544 @header { 545 class V(CommonTree): 546 def toString(self): 547 return self.token.text + "<V>" 548 __str__ = toString 549 550 class W(CommonTree): 551 def __init__(self, tokenType, txt): 552 super().__init__( 553 CommonToken(type=tokenType, text=txt)) 554 555 def toString(self): 556 return self.token.text + "<W>" 557 __str__ = toString 558 559 } 560 a : id (',' id)* -> ^(LIST<W>["LIST"] id+); 561 id : ID -> ID<V>; 562 ID : 'a'..'z'+ ; 563 WS : (' '|'\n') {$channel=HIDDEN;} ; 564 ''') 565 566 found = self.execParser( 567 grammar, 'a', 568 input="a,b,c") 569 570 self.assertEqual("(LIST<W> a<V> b<V> c<V>)", found) 571 572 def testCopySemanticsWithHetero(self): 573 grammar = textwrap.dedent( 574 r''' 575 grammar T; 576 options { 577 language=Python3; 578 output=AST; 579 } 580 @header { 581 class V(CommonTree): 582 def dupNode(self): 583 return V(self) 584 585 def toString(self): 586 return self.token.text + "<V>" 587 __str__ = toString 588 589 } 590 a : type ID (',' ID)* ';' -> ^(type ID)+; 591 type : 'int'<V> ; 592 ID : 'a'..'z'+ ; 593 INT : '0'..'9'+; 594 WS : (' '|'\\n') {$channel=HIDDEN;} ; 595 ''') 596 597 found = self.execParser( 598 grammar, 'a', 599 input="int a, b, c;") 600 self.assertEqual("(int<V> a) (int<V> b) (int<V> c)", found) 601 602 # TREE PARSERS -- REWRITE AST 603 604 def testTreeParserRewriteFlatList(self): 605 grammar = textwrap.dedent( 606 r''' 607 grammar T13; 608 options { 609 language=Python3; 610 output=AST; 611 } 612 a : ID INT; 613 ID : 'a'..'z'+ ; 614 INT : '0'..'9'+; 615 WS : (' '|'\n') {$channel=HIDDEN;} ; 616 ''') 617 618 treeGrammar = textwrap.dedent( 619 r''' 620 tree grammar TP13; 621 options { 622 language=Python3; 623 output=AST; 624 ASTLabelType=CommonTree; 625 tokenVocab=T13; 626 } 627 @header { 628 class V(CommonTree): 629 def toString(self): 630 return self.token.text + "<V>" 631 __str__ = toString 632 633 class W(CommonTree): 634 def toString(self): 635 return self.token.text + "<W>" 636 __str__ = toString 637 638 } 639 a : ID INT -> INT<V> ID<W> 640 ; 641 ''') 642 643 found = self.execTreeParser( 644 grammar, 'a', 645 treeGrammar, 'a', 646 input="abc 34" 647 ) 648 649 self.assertEqual("34<V> abc<W>", found) 650 651 652 def testTreeParserRewriteTree(self): 653 grammar = textwrap.dedent( 654 r''' 655 grammar T14; 656 options { 657 language=Python3; 658 output=AST; 659 } 660 a : ID INT; 661 ID : 'a'..'z'+ ; 662 INT : '0'..'9'+; 663 WS : (' '|'\n') {$channel=HIDDEN;} ; 664 ''') 665 666 treeGrammar = textwrap.dedent( 667 r''' 668 tree grammar TP14; 669 options { 670 language=Python3; 671 output=AST; 672 ASTLabelType=CommonTree; 673 tokenVocab=T14; 674 } 675 @header { 676 class V(CommonTree): 677 def toString(self): 678 return self.token.text + "<V>" 679 __str__ = toString 680 681 class W(CommonTree): 682 def toString(self): 683 return self.token.text + "<W>" 684 __str__ = toString 685 686 } 687 a : ID INT -> ^(INT<V> ID<W>) 688 ; 689 ''') 690 691 found = self.execTreeParser( 692 grammar, 'a', 693 treeGrammar, 'a', 694 input="abc 34" 695 ) 696 697 self.assertEqual("(34<V> abc<W>)", found) 698 699 700 def testTreeParserRewriteImaginary(self): 701 grammar = textwrap.dedent( 702 r''' 703 grammar T15; 704 options { 705 language=Python3; 706 output=AST; 707 } 708 a : ID ; 709 ID : 'a'..'z'+ ; 710 INT : '0'..'9'+; 711 WS : (' '|'\n') {$channel=HIDDEN;} ; 712 ''') 713 714 treeGrammar = textwrap.dedent( 715 r''' 716 tree grammar TP15; 717 options { 718 language=Python3; 719 output=AST; 720 ASTLabelType=CommonTree; 721 tokenVocab=T15; 722 } 723 tokens { ROOT; } 724 @header { 725 class V(CommonTree): 726 def __init__(self, tokenType): 727 super().__init__(CommonToken(tokenType)) 728 729 def toString(self): 730 return tokenNames[self.token.type] + "<V>" 731 __str__ = toString 732 733 734 } 735 a : ID -> ROOT<V> ID 736 ; 737 ''') 738 739 found = self.execTreeParser( 740 grammar, 'a', 741 treeGrammar, 'a', 742 input="abc" 743 ) 744 745 self.assertEqual("ROOT<V> abc", found) 746 747 748 def testTreeParserRewriteImaginaryWithArgs(self): 749 grammar = textwrap.dedent( 750 r''' 751 grammar T16; 752 options { 753 language=Python3; 754 output=AST; 755 } 756 a : ID ; 757 ID : 'a'..'z'+ ; 758 INT : '0'..'9'+; 759 WS : (' '|'\n') {$channel=HIDDEN;} ; 760 ''') 761 762 treeGrammar = textwrap.dedent( 763 r''' 764 tree grammar TP16; 765 options { 766 language=Python3; 767 output=AST; 768 ASTLabelType=CommonTree; 769 tokenVocab=T16; 770 } 771 tokens { ROOT; } 772 @header { 773 class V(CommonTree): 774 def __init__(self, tokenType, x): 775 super().__init__(CommonToken(tokenType)) 776 self.x = x 777 778 def toString(self): 779 return tokenNames[self.token.type] + "<V>;" + str(self.x) 780 __str__ = toString 781 782 } 783 a : ID -> ROOT<V>[42] ID 784 ; 785 ''') 786 787 found = self.execTreeParser( 788 grammar, 'a', 789 treeGrammar, 'a', 790 input="abc" 791 ) 792 793 self.assertEqual("ROOT<V>;42 abc", found) 794 795 796 def testTreeParserRewriteImaginaryRoot(self): 797 grammar = textwrap.dedent( 798 r''' 799 grammar T17; 800 options { 801 language=Python3; 802 output=AST; 803 } 804 a : ID ; 805 ID : 'a'..'z'+ ; 806 INT : '0'..'9'+; 807 WS : (' '|'\n') {$channel=HIDDEN;} ; 808 ''') 809 810 treeGrammar = textwrap.dedent( 811 r''' 812 tree grammar TP17; 813 options { 814 language=Python3; 815 output=AST; 816 ASTLabelType=CommonTree; 817 tokenVocab=T17; 818 } 819 tokens { ROOT; } 820 @header { 821 class V(CommonTree): 822 def __init__(self, tokenType): 823 super().__init__(CommonToken(tokenType)) 824 825 def toString(self): 826 return tokenNames[self.token.type] + "<V>" 827 __str__ = toString 828 829 } 830 a : ID -> ^(ROOT<V> ID) 831 ; 832 ''') 833 834 found = self.execTreeParser( 835 grammar, 'a', 836 treeGrammar, 'a', 837 input="abc" 838 ) 839 840 self.assertEqual("(ROOT<V> abc)", found) 841 842 843 def testTreeParserRewriteImaginaryFromReal(self): 844 grammar = textwrap.dedent( 845 r''' 846 grammar T18; 847 options { 848 language=Python3; 849 output=AST; 850 } 851 a : ID ; 852 ID : 'a'..'z'+ ; 853 INT : '0'..'9'+; 854 WS : (' '|'\n') {$channel=HIDDEN;} ; 855 ''') 856 857 treeGrammar = textwrap.dedent( 858 r''' 859 tree grammar TP18; 860 options { 861 language=Python3; 862 output=AST; 863 ASTLabelType=CommonTree; 864 tokenVocab=T18; 865 } 866 tokens { ROOT; } 867 @header { 868 class V(CommonTree): 869 def __init__(self, tokenType, tree=None): 870 if tree is None: 871 super().__init__(CommonToken(tokenType)) 872 else: 873 super().__init__(tree) 874 self.token.type = tokenType 875 876 def toString(self): 877 return tokenNames[self.token.type]+"<V>@"+str(self.token.line) 878 __str__ = toString 879 880 } 881 a : ID -> ROOT<V>[$ID] 882 ; 883 ''') 884 885 found = self.execTreeParser( 886 grammar, 'a', 887 treeGrammar, 'a', 888 input="abc" 889 ) 890 891 self.assertEqual("ROOT<V>@1", found) 892 893 894 def testTreeParserAutoHeteroAST(self): 895 grammar = textwrap.dedent( 896 r''' 897 grammar T; 898 options { 899 language=Python3; 900 output=AST; 901 } 902 a : ID ';' ; 903 ID : 'a'..'z'+ ; 904 INT : '0'..'9'+; 905 WS : (' '|'\n') {$channel=HIDDEN;} ; 906 ''') 907 908 treeGrammar = textwrap.dedent( 909 r''' 910 tree grammar TP; 911 options { 912 language=Python3; 913 output=AST; 914 ASTLabelType=CommonTree; 915 tokenVocab=T; 916 } 917 tokens { ROOT; } 918 @header { 919 class V(CommonTree): 920 def toString(self): 921 return CommonTree.toString(self) + "<V>" 922 __str__ = toString 923 924 } 925 926 a : ID<V> ';'<V>; 927 ''') 928 929 found = self.execTreeParser( 930 grammar, 'a', 931 treeGrammar, 'a', 932 input="abc;" 933 ) 934 935 self.assertEqual("abc<V> ;<V>", found) 936 937 938if __name__ == '__main__': 939 unittest.main() 940