1# begin[licence] 2# 3# [The "BSD licence"] 4# Copyright (c) 2005-2012 Terence Parr 5# All rights reserved. 6 7# Redistribution and use in source and binary forms, with or without 8# modification, are permitted provided that the following conditions 9# are met: 10# 1. Redistributions of source code must retain the above copyright 11# notice, this list of conditions and the following disclaimer. 12# 2. Redistributions in binary form must reproduce the above copyright 13# notice, this list of conditions and the following disclaimer in the 14# documentation and/or other materials provided with the distribution. 15# 3. The name of the author may not be used to endorse or promote products 16# derived from this software without specific prior written permission. 17 18# THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 19# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 20# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 21# IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 22# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 23# NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 24# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 25# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 26# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 27# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28# 29# end[licence] 30 31import socket 32import sys 33from .constants import INVALID_TOKEN_TYPE 34from .exceptions import RecognitionException 35from .recognizers import Parser 36from .streams import TokenStream 37from .tokens import Token 38from .tree import CommonTreeAdaptor, TreeAdaptor, Tree 39 40class DebugParser(Parser): 41 def __init__(self, stream, state=None, dbg=None, *args, **kwargs): 42 # wrap token stream in DebugTokenStream (unless user already did so). 43 if not isinstance(stream, DebugTokenStream): 44 stream = DebugTokenStream(stream, dbg) 45 46 super().__init__(stream, state, *args, **kwargs) 47 48 # Who to notify when events in the parser occur. 49 self._dbg = None 50 51 self.setDebugListener(dbg) 52 53 54 def setDebugListener(self, dbg): 55 """Provide a new debug event listener for this parser. Notify the 56 input stream too that it should send events to this listener. 57 """ 58 59 if hasattr(self.input, 'dbg'): 60 self.input.dbg = dbg 61 62 self._dbg = dbg 63 64 def getDebugListener(self): 65 return self._dbg 66 67 dbg = property(getDebugListener, setDebugListener) 68 69 70 def beginResync(self): 71 self._dbg.beginResync() 72 73 74 def endResync(self): 75 self._dbg.endResync() 76 77 78 def beginBacktrack(self, level): 79 self._dbg.beginBacktrack(level) 80 81 82 def endBacktrack(self, level, successful): 83 self._dbg.endBacktrack(level, successful) 84 85 86 def reportError(self, exc): 87 Parser.reportError(self, exc) 88 89 if isinstance(exc, RecognitionException): 90 self._dbg.recognitionException(exc) 91 92 93class DebugTokenStream(TokenStream): 94 def __init__(self, input, dbg=None): 95 super().__init__() 96 self.input = input 97 self.initialStreamState = True 98 # Track the last mark() call result value for use in rewind(). 99 self.lastMarker = None 100 101 self._dbg = None 102 self.setDebugListener(dbg) 103 104 # force TokenStream to get at least first valid token 105 # so we know if there are any hidden tokens first in the stream 106 self.input.LT(1) 107 108 109 def getDebugListener(self): 110 return self._dbg 111 112 def setDebugListener(self, dbg): 113 self._dbg = dbg 114 115 dbg = property(getDebugListener, setDebugListener) 116 117 118 def consume(self): 119 if self.initialStreamState: 120 self.consumeInitialHiddenTokens() 121 122 a = self.input.index() 123 t = self.input.LT(1) 124 self.input.consume() 125 b = self.input.index() 126 self._dbg.consumeToken(t) 127 128 if b > a + 1: 129 # then we consumed more than one token; must be off channel tokens 130 for idx in range(a + 1, b): 131 self._dbg.consumeHiddenToken(self.input.get(idx)) 132 133 134 def consumeInitialHiddenTokens(self): 135 """consume all initial off-channel tokens""" 136 137 firstOnChannelTokenIndex = self.input.index() 138 for idx in range(firstOnChannelTokenIndex): 139 self._dbg.consumeHiddenToken(self.input.get(idx)) 140 141 self.initialStreamState = False 142 143 144 def LT(self, i): 145 if self.initialStreamState: 146 self.consumeInitialHiddenTokens() 147 148 t = self.input.LT(i) 149 self._dbg.LT(i, t) 150 return t 151 152 153 def LA(self, i): 154 if self.initialStreamState: 155 self.consumeInitialHiddenTokens() 156 157 t = self.input.LT(i) 158 self._dbg.LT(i, t) 159 return t.type 160 161 162 def get(self, i): 163 return self.input.get(i) 164 165 166 def index(self): 167 return self.input.index() 168 169 170 def mark(self): 171 self.lastMarker = self.input.mark() 172 self._dbg.mark(self.lastMarker) 173 return self.lastMarker 174 175 176 def rewind(self, marker=None): 177 self._dbg.rewind(marker) 178 self.input.rewind(marker) 179 180 181 def release(self, marker): 182 pass 183 184 185 def seek(self, index): 186 # TODO: implement seek in dbg interface 187 # self._dbg.seek(index); 188 self.input.seek(index) 189 190 191 def size(self): 192 return self.input.size() 193 194 195 def getTokenSource(self): 196 return self.input.getTokenSource() 197 198 199 def getSourceName(self): 200 return self.getTokenSource().getSourceName() 201 202 203 def toString(self, start=None, stop=None): 204 return self.input.toString(start, stop) 205 206 207class DebugTreeAdaptor(TreeAdaptor): 208 """A TreeAdaptor proxy that fires debugging events to a DebugEventListener 209 delegate and uses the TreeAdaptor delegate to do the actual work. All 210 AST events are triggered by this adaptor; no code gen changes are needed 211 in generated rules. Debugging events are triggered *after* invoking 212 tree adaptor routines. 213 214 Trees created with actions in rewrite actions like "-> ^(ADD {foo} {bar})" 215 cannot be tracked as they might not use the adaptor to create foo, bar. 216 The debug listener has to deal with tree node IDs for which it did 217 not see a createNode event. A single <unknown> node is sufficient even 218 if it represents a whole tree. 219 """ 220 221 def __init__(self, dbg, adaptor): 222 super().__init__() 223 self.dbg = dbg 224 self.adaptor = adaptor 225 226 227 def createWithPayload(self, payload): 228 if payload.index < 0: 229 # could be token conjured up during error recovery 230 return self.createFromType(payload.type, payload.text) 231 232 node = self.adaptor.createWithPayload(payload) 233 self.dbg.createNode(node, payload) 234 return node 235 236 def createFromToken(self, tokenType, fromToken, text=None): 237 node = self.adaptor.createFromToken(tokenType, fromToken, text) 238 self.dbg.createNode(node) 239 return node 240 241 def createFromType(self, tokenType, text): 242 node = self.adaptor.createFromType(tokenType, text) 243 self.dbg.createNode(node) 244 return node 245 246 247 def errorNode(self, input, start, stop, exc): 248 node = self.adaptor.errorNode(input, start, stop, exc) 249 if node is not None: 250 self.dbg.errorNode(node) 251 252 return node 253 254 255 def dupTree(self, tree): 256 t = self.adaptor.dupTree(tree) 257 # walk the tree and emit create and add child events 258 # to simulate what dupTree has done. dupTree does not call this debug 259 # adapter so I must simulate. 260 self.simulateTreeConstruction(t) 261 return t 262 263 264 def simulateTreeConstruction(self, t): 265 """^(A B C): emit create A, create B, add child, ...""" 266 self.dbg.createNode(t) 267 for i in range(self.adaptor.getChildCount(t)): 268 child = self.adaptor.getChild(t, i) 269 self.simulateTreeConstruction(child) 270 self.dbg.addChild(t, child) 271 272 273 def dupNode(self, treeNode): 274 d = self.adaptor.dupNode(treeNode) 275 self.dbg.createNode(d) 276 return d 277 278 279 def nil(self): 280 node = self.adaptor.nil() 281 self.dbg.nilNode(node) 282 return node 283 284 285 def isNil(self, tree): 286 return self.adaptor.isNil(tree) 287 288 289 def addChild(self, t, child): 290 if isinstance(child, Token): 291 n = self.createWithPayload(child) 292 self.addChild(t, n) 293 294 else: 295 if t is None or child is None: 296 return 297 298 self.adaptor.addChild(t, child) 299 self.dbg.addChild(t, child) 300 301 def becomeRoot(self, newRoot, oldRoot): 302 if isinstance(newRoot, Token): 303 n = self.createWithPayload(newRoot) 304 self.adaptor.becomeRoot(n, oldRoot) 305 else: 306 n = self.adaptor.becomeRoot(newRoot, oldRoot) 307 308 self.dbg.becomeRoot(newRoot, oldRoot) 309 return n 310 311 312 def rulePostProcessing(self, root): 313 return self.adaptor.rulePostProcessing(root) 314 315 316 def getType(self, t): 317 return self.adaptor.getType(t) 318 319 320 def setType(self, t, type): 321 self.adaptor.setType(t, type) 322 323 324 def getText(self, t): 325 return self.adaptor.getText(t) 326 327 328 def setText(self, t, text): 329 self.adaptor.setText(t, text) 330 331 332 def getToken(self, t): 333 return self.adaptor.getToken(t) 334 335 336 def setTokenBoundaries(self, t, startToken, stopToken): 337 self.adaptor.setTokenBoundaries(t, startToken, stopToken) 338 if t and startToken and stopToken: 339 self.dbg.setTokenBoundaries( 340 t, startToken.index, stopToken.index) 341 342 343 def getTokenStartIndex(self, t): 344 return self.adaptor.getTokenStartIndex(t) 345 346 347 def getTokenStopIndex(self, t): 348 return self.adaptor.getTokenStopIndex(t) 349 350 351 def getChild(self, t, i): 352 return self.adaptor.getChild(t, i) 353 354 355 def setChild(self, t, i, child): 356 self.adaptor.setChild(t, i, child) 357 358 359 def deleteChild(self, t, i): 360 return self.adaptor.deleteChild(t, i) 361 362 363 def getChildCount(self, t): 364 return self.adaptor.getChildCount(t) 365 366 367 def getUniqueID(self, node): 368 return self.adaptor.getUniqueID(node) 369 370 371 def getParent(self, t): 372 return self.adaptor.getParent(t) 373 374 375 def getChildIndex(self, t): 376 return self.adaptor.getChildIndex(t) 377 378 379 def setParent(self, t, parent): 380 self.adaptor.setParent(t, parent) 381 382 383 def setChildIndex(self, t, index): 384 self.adaptor.setChildIndex(t, index) 385 386 387 def replaceChildren(self, parent, startChildIndex, stopChildIndex, t): 388 self.adaptor.replaceChildren(parent, startChildIndex, stopChildIndex, t) 389 390 391 ## support 392 393 def getDebugListener(self): 394 return self.dbg 395 396 def setDebugListener(self, dbg): 397 self.dbg = dbg 398 399 400 def getTreeAdaptor(self): 401 return self.adaptor 402 403 404 405class DebugEventListener(object): 406 """All debugging events that a recognizer can trigger. 407 408 I did not create a separate AST debugging interface as it would create 409 lots of extra classes and DebugParser has a dbg var defined, which makes 410 it hard to change to ASTDebugEventListener. I looked hard at this issue 411 and it is easier to understand as one monolithic event interface for all 412 possible events. Hopefully, adding ST debugging stuff won't be bad. Leave 413 for future. 4/26/2006. 414 """ 415 416 # Moved to version 2 for v3.1: added grammar name to enter/exit Rule 417 PROTOCOL_VERSION = "2" 418 419 def enterRule(self, grammarFileName, ruleName): 420 """The parser has just entered a rule. No decision has been made about 421 which alt is predicted. This is fired AFTER init actions have been 422 executed. Attributes are defined and available etc... 423 The grammarFileName allows composite grammars to jump around among 424 multiple grammar files. 425 """ 426 427 pass 428 429 430 def enterAlt(self, alt): 431 """Because rules can have lots of alternatives, it is very useful to 432 know which alt you are entering. This is 1..n for n alts. 433 """ 434 pass 435 436 437 def exitRule(self, grammarFileName, ruleName): 438 """This is the last thing executed before leaving a rule. It is 439 executed even if an exception is thrown. This is triggered after 440 error reporting and recovery have occurred (unless the exception is 441 not caught in this rule). This implies an "exitAlt" event. 442 The grammarFileName allows composite grammars to jump around among 443 multiple grammar files. 444 """ 445 pass 446 447 448 def enterSubRule(self, decisionNumber): 449 """Track entry into any (...) subrule other EBNF construct""" 450 pass 451 452 453 def exitSubRule(self, decisionNumber): 454 pass 455 456 457 def enterDecision(self, decisionNumber, couldBacktrack): 458 """Every decision, fixed k or arbitrary, has an enter/exit event 459 so that a GUI can easily track what LT/consume events are 460 associated with prediction. You will see a single enter/exit 461 subrule but multiple enter/exit decision events, one for each 462 loop iteration. 463 """ 464 pass 465 466 467 def exitDecision(self, decisionNumber): 468 pass 469 470 471 def consumeToken(self, t): 472 """An input token was consumed; matched by any kind of element. 473 Trigger after the token was matched by things like match(), matchAny(). 474 """ 475 pass 476 477 478 def consumeHiddenToken(self, t): 479 """An off-channel input token was consumed. 480 Trigger after the token was matched by things like match(), matchAny(). 481 (unless of course the hidden token is first stuff in the input stream). 482 """ 483 pass 484 485 486 def LT(self, i, t): 487 """Somebody (anybody) looked ahead. Note that this actually gets 488 triggered by both LA and LT calls. The debugger will want to know 489 which Token object was examined. Like consumeToken, this indicates 490 what token was seen at that depth. A remote debugger cannot look 491 ahead into a file it doesn't have so LT events must pass the token 492 even if the info is redundant. 493 For tree parsers, if the type is UP or DOWN, 494 then the ID is not really meaningful as it's fixed--there is 495 just one UP node and one DOWN navigation node. 496 """ 497 pass 498 499 500 def mark(self, marker): 501 """The parser is going to look arbitrarily ahead; mark this location, 502 the token stream's marker is sent in case you need it. 503 """ 504 pass 505 506 507 def rewind(self, marker=None): 508 """After an arbitrairly long lookahead as with a cyclic DFA (or with 509 any backtrack), this informs the debugger that stream should be 510 rewound to the position associated with marker. 511 512 """ 513 pass 514 515 516 def beginBacktrack(self, level): 517 pass 518 519 520 def endBacktrack(self, level, successful): 521 pass 522 523 524 def location(self, line, pos): 525 """To watch a parser move through the grammar, the parser needs to 526 inform the debugger what line/charPos it is passing in the grammar. 527 For now, this does not know how to switch from one grammar to the 528 other and back for island grammars etc... 529 530 This should also allow breakpoints because the debugger can stop 531 the parser whenever it hits this line/pos. 532 """ 533 pass 534 535 536 def recognitionException(self, e): 537 """A recognition exception occurred such as NoViableAltException. I made 538 this a generic event so that I can alter the exception hierachy later 539 without having to alter all the debug objects. 540 541 Upon error, the stack of enter rule/subrule must be properly unwound. 542 If no viable alt occurs it is within an enter/exit decision, which 543 also must be rewound. Even the rewind for each mark must be unwount. 544 In the Java target this is pretty easy using try/finally, if a bit 545 ugly in the generated code. The rewind is generated in DFA.predict() 546 actually so no code needs to be generated for that. For languages 547 w/o this "finally" feature (C++?), the target implementor will have 548 to build an event stack or something. 549 550 Across a socket for remote debugging, only the RecognitionException 551 data fields are transmitted. The token object or whatever that 552 caused the problem was the last object referenced by LT. The 553 immediately preceding LT event should hold the unexpected Token or 554 char. 555 556 Here is a sample event trace for grammar: 557 558 b : C ({;}A|B) // {;} is there to prevent A|B becoming a set 559 | D 560 ; 561 562 The sequence for this rule (with no viable alt in the subrule) for 563 input 'c c' (there are 3 tokens) is: 564 565 commence 566 LT(1) 567 enterRule b 568 location 7 1 569 enter decision 3 570 LT(1) 571 exit decision 3 572 enterAlt1 573 location 7 5 574 LT(1) 575 consumeToken [c/<4>,1:0] 576 location 7 7 577 enterSubRule 2 578 enter decision 2 579 LT(1) 580 LT(1) 581 recognitionException NoViableAltException 2 1 2 582 exit decision 2 583 exitSubRule 2 584 beginResync 585 LT(1) 586 consumeToken [c/<4>,1:1] 587 LT(1) 588 endResync 589 LT(-1) 590 exitRule b 591 terminate 592 """ 593 pass 594 595 596 def beginResync(self): 597 """Indicates the recognizer is about to consume tokens to resynchronize 598 the parser. Any consume events from here until the recovered event 599 are not part of the parse--they are dead tokens. 600 """ 601 pass 602 603 604 def endResync(self): 605 """Indicates that the recognizer has finished consuming tokens in order 606 to resychronize. There may be multiple beginResync/endResync pairs 607 before the recognizer comes out of errorRecovery mode (in which 608 multiple errors are suppressed). This will be useful 609 in a gui where you want to probably grey out tokens that are consumed 610 but not matched to anything in grammar. Anything between 611 a beginResync/endResync pair was tossed out by the parser. 612 """ 613 pass 614 615 616 def semanticPredicate(self, result, predicate): 617 """A semantic predicate was evaluate with this result and action text""" 618 pass 619 620 621 def commence(self): 622 """Announce that parsing has begun. Not technically useful except for 623 sending events over a socket. A GUI for example will launch a thread 624 to connect and communicate with a remote parser. The thread will want 625 to notify the GUI when a connection is made. ANTLR parsers 626 trigger this upon entry to the first rule (the ruleLevel is used to 627 figure this out). 628 """ 629 pass 630 631 632 def terminate(self): 633 """Parsing is over; successfully or not. Mostly useful for telling 634 remote debugging listeners that it's time to quit. When the rule 635 invocation level goes to zero at the end of a rule, we are done 636 parsing. 637 """ 638 pass 639 640 641 ## T r e e P a r s i n g 642 643 def consumeNode(self, t): 644 """Input for a tree parser is an AST, but we know nothing for sure 645 about a node except its type and text (obtained from the adaptor). 646 This is the analog of the consumeToken method. Again, the ID is 647 the hashCode usually of the node so it only works if hashCode is 648 not implemented. If the type is UP or DOWN, then 649 the ID is not really meaningful as it's fixed--there is 650 just one UP node and one DOWN navigation node. 651 """ 652 pass 653 654 655 ## A S T E v e n t s 656 657 def nilNode(self, t): 658 """A nil was created (even nil nodes have a unique ID... 659 they are not "null" per se). As of 4/28/2006, this 660 seems to be uniquely triggered when starting a new subtree 661 such as when entering a subrule in automatic mode and when 662 building a tree in rewrite mode. 663 664 If you are receiving this event over a socket via 665 RemoteDebugEventSocketListener then only t.ID is set. 666 """ 667 pass 668 669 670 def errorNode(self, t): 671 """Upon syntax error, recognizers bracket the error with an error node 672 if they are building ASTs. 673 """ 674 pass 675 676 677 def createNode(self, node, token=None): 678 """Announce a new node built from token elements such as type etc... 679 680 If you are receiving this event over a socket via 681 RemoteDebugEventSocketListener then only t.ID, type, text are 682 set. 683 """ 684 pass 685 686 687 def becomeRoot(self, newRoot, oldRoot): 688 """Make a node the new root of an existing root. 689 690 Note: the newRootID parameter is possibly different 691 than the TreeAdaptor.becomeRoot() newRoot parameter. 692 In our case, it will always be the result of calling 693 TreeAdaptor.becomeRoot() and not root_n or whatever. 694 695 The listener should assume that this event occurs 696 only when the current subrule (or rule) subtree is 697 being reset to newRootID. 698 699 If you are receiving this event over a socket via 700 RemoteDebugEventSocketListener then only IDs are set. 701 702 @see antlr3.tree.TreeAdaptor.becomeRoot() 703 """ 704 pass 705 706 707 def addChild(self, root, child): 708 """Make childID a child of rootID. 709 710 If you are receiving this event over a socket via 711 RemoteDebugEventSocketListener then only IDs are set. 712 713 @see antlr3.tree.TreeAdaptor.addChild() 714 """ 715 pass 716 717 718 def setTokenBoundaries(self, t, tokenStartIndex, tokenStopIndex): 719 """Set the token start/stop token index for a subtree root or node. 720 721 If you are receiving this event over a socket via 722 RemoteDebugEventSocketListener then only t.ID is set. 723 """ 724 pass 725 726 727class BlankDebugEventListener(DebugEventListener): 728 """A blank listener that does nothing; useful for real classes so 729 they don't have to have lots of blank methods and are less 730 sensitive to updates to debug interface. 731 732 Note: this class is identical to DebugEventListener and exists purely 733 for compatibility with Java. 734 """ 735 pass 736 737 738class TraceDebugEventListener(DebugEventListener): 739 """A listener that simply records text representations of the events. 740 741 Useful for debugging the debugging facility ;) 742 743 Subclasses can override the record() method (which defaults to printing to 744 stdout) to record the events in a different way. 745 """ 746 747 def __init__(self, adaptor=None): 748 super().__init__() 749 750 if adaptor is None: 751 adaptor = CommonTreeAdaptor() 752 self.adaptor = adaptor 753 754 def record(self, event): 755 sys.stdout.write(event + '\n') 756 757 def enterRule(self, grammarFileName, ruleName): 758 self.record("enterRule " + ruleName) 759 760 def exitRule(self, grammarFileName, ruleName): 761 self.record("exitRule " + ruleName) 762 763 def enterSubRule(self, decisionNumber): 764 self.record("enterSubRule") 765 766 def exitSubRule(self, decisionNumber): 767 self.record("exitSubRule") 768 769 def location(self, line, pos): 770 self.record("location {}:{}".format(line, pos)) 771 772 ## Tree parsing stuff 773 774 def consumeNode(self, t): 775 self.record("consumeNode {} {} {}".format( 776 self.adaptor.getUniqueID(t), 777 self.adaptor.getText(t), 778 self.adaptor.getType(t))) 779 780 def LT(self, i, t): 781 self.record("LT {} {} {} {}".format( 782 i, 783 self.adaptor.getUniqueID(t), 784 self.adaptor.getText(t), 785 self.adaptor.getType(t))) 786 787 788 ## AST stuff 789 def nilNode(self, t): 790 self.record("nilNode {}".format(self.adaptor.getUniqueID(t))) 791 792 def createNode(self, t, token=None): 793 if token is None: 794 self.record("create {}: {}, {}".format( 795 self.adaptor.getUniqueID(t), 796 self.adaptor.getText(t), 797 self.adaptor.getType(t))) 798 799 else: 800 self.record("create {}: {}".format( 801 self.adaptor.getUniqueID(t), 802 token.index)) 803 804 def becomeRoot(self, newRoot, oldRoot): 805 self.record("becomeRoot {}, {}".format( 806 self.adaptor.getUniqueID(newRoot), 807 self.adaptor.getUniqueID(oldRoot))) 808 809 def addChild(self, root, child): 810 self.record("addChild {}, {}".format( 811 self.adaptor.getUniqueID(root), 812 self.adaptor.getUniqueID(child))) 813 814 def setTokenBoundaries(self, t, tokenStartIndex, tokenStopIndex): 815 self.record("setTokenBoundaries {}, {}, {}".format( 816 self.adaptor.getUniqueID(t), 817 tokenStartIndex, tokenStopIndex)) 818 819 820class RecordDebugEventListener(TraceDebugEventListener): 821 """A listener that records events as strings in an array.""" 822 823 def __init__(self, adaptor=None): 824 super().__init__(adaptor) 825 826 self.events = [] 827 828 def record(self, event): 829 self.events.append(event) 830 831 832class DebugEventSocketProxy(DebugEventListener): 833 """A proxy debug event listener that forwards events over a socket to 834 a debugger (or any other listener) using a simple text-based protocol; 835 one event per line. ANTLRWorks listens on server socket with a 836 RemoteDebugEventSocketListener instance. These two objects must therefore 837 be kept in sync. New events must be handled on both sides of socket. 838 """ 839 840 DEFAULT_DEBUGGER_PORT = 49100 841 842 def __init__(self, recognizer, adaptor=None, port=None, debug=None): 843 super().__init__() 844 845 self.grammarFileName = recognizer.getGrammarFileName() 846 847 # Almost certainly the recognizer will have adaptor set, but 848 # we don't know how to cast it (Parser or TreeParser) to get 849 # the adaptor field. Must be set with a constructor. :( 850 self.adaptor = adaptor 851 852 self.port = port or self.DEFAULT_DEBUGGER_PORT 853 854 self.debug = debug 855 856 self.socket = None 857 self.connection = None 858 self.input = None 859 self.output = None 860 861 862 def log(self, msg): 863 if self.debug: 864 self.debug.write(msg + '\n') 865 866 867 def handshake(self): 868 if self.socket is None: 869 # create listening socket 870 self.socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM) 871 self.socket.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) 872 self.socket.bind(('', self.port)) 873 self.socket.listen(1) 874 self.log("Waiting for incoming connection on port {}".format(self.port)) 875 876 # wait for an incoming connection 877 self.connection, addr = self.socket.accept() 878 self.log("Accepted connection from {}:{}".format(addr[0], addr[1])) 879 880 self.connection.setblocking(1) 881 self.connection.setsockopt(socket.SOL_TCP, socket.TCP_NODELAY, 1) 882 883 self.output = self.connection.makefile('w', 1) 884 self.input = self.connection.makefile('r', 1) 885 886 self.write("ANTLR {}".format(self.PROTOCOL_VERSION)) 887 self.write('grammar "{}"'.format(self.grammarFileName)) 888 self.ack() 889 890 891 def write(self, msg): 892 self.log("> {}".format(msg)) 893 self.output.write("{}\n".format(msg)) 894 self.output.flush() 895 896 897 def ack(self): 898 t = self.input.readline() 899 self.log("< {}".format(t.rstrip())) 900 901 902 def transmit(self, event): 903 self.write(event) 904 self.ack() 905 906 907 def commence(self): 908 # don't bother sending event; listener will trigger upon connection 909 pass 910 911 912 def terminate(self): 913 self.transmit("terminate") 914 self.output.close() 915 self.input.close() 916 self.connection.close() 917 self.socket.close() 918 919 920 def enterRule(self, grammarFileName, ruleName): 921 self.transmit("enterRule\t{}\t{}".format(grammarFileName, ruleName)) 922 923 924 def enterAlt(self, alt): 925 self.transmit("enterAlt\t{}".format(alt)) 926 927 928 def exitRule(self, grammarFileName, ruleName): 929 self.transmit("exitRule\t{}\t{}".format(grammarFileName, ruleName)) 930 931 932 def enterSubRule(self, decisionNumber): 933 self.transmit("enterSubRule\t{}".format(decisionNumber)) 934 935 936 def exitSubRule(self, decisionNumber): 937 self.transmit("exitSubRule\t{}".format(decisionNumber)) 938 939 940 def enterDecision(self, decisionNumber, couldBacktrack): 941 self.transmit( 942 "enterDecision\t{}\t{:d}".format(decisionNumber, couldBacktrack)) 943 944 945 def exitDecision(self, decisionNumber): 946 self.transmit("exitDecision\t{}".format(decisionNumber)) 947 948 949 def consumeToken(self, t): 950 self.transmit("consumeToken\t{}".format(self.serializeToken(t))) 951 952 953 def consumeHiddenToken(self, t): 954 self.transmit("consumeHiddenToken\t{}".format(self.serializeToken(t))) 955 956 957 def LT(self, i, o): 958 if isinstance(o, Tree): 959 return self.LT_tree(i, o) 960 return self.LT_token(i, o) 961 962 963 def LT_token(self, i, t): 964 if t is not None: 965 self.transmit("LT\t{}\t{}".format(i, self.serializeToken(t))) 966 967 968 def mark(self, i): 969 self.transmit("mark\t{}".format(i)) 970 971 972 def rewind(self, i=None): 973 if i is not None: 974 self.transmit("rewind\t{}".format(i)) 975 else: 976 self.transmit("rewind") 977 978 979 def beginBacktrack(self, level): 980 self.transmit("beginBacktrack\t{}".format(level)) 981 982 983 def endBacktrack(self, level, successful): 984 self.transmit("endBacktrack\t{}\t{}".format( 985 level, '1' if successful else '0')) 986 987 988 def location(self, line, pos): 989 self.transmit("location\t{}\t{}".format(line, pos)) 990 991 992 def recognitionException(self, exc): 993 self.transmit('\t'.join([ 994 "exception", 995 exc.__class__.__name__, 996 str(int(exc.index)), 997 str(int(exc.line)), 998 str(int(exc.charPositionInLine))])) 999 1000 1001 def beginResync(self): 1002 self.transmit("beginResync") 1003 1004 1005 def endResync(self): 1006 self.transmit("endResync") 1007 1008 1009 def semanticPredicate(self, result, predicate): 1010 self.transmit('\t'.join([ 1011 "semanticPredicate", 1012 str(int(result)), 1013 self.escapeNewlines(predicate)])) 1014 1015 ## A S T P a r s i n g E v e n t s 1016 1017 def consumeNode(self, t): 1018 FIXME(31) 1019# StringBuffer buf = new StringBuffer(50); 1020# buf.append("consumeNode"); 1021# serializeNode(buf, t); 1022# transmit(buf.toString()); 1023 1024 1025 def LT_tree(self, i, t): 1026 FIXME(34) 1027# int ID = adaptor.getUniqueID(t); 1028# String text = adaptor.getText(t); 1029# int type = adaptor.getType(t); 1030# StringBuffer buf = new StringBuffer(50); 1031# buf.append("LN\t"); // lookahead node; distinguish from LT in protocol 1032# buf.append(i); 1033# serializeNode(buf, t); 1034# transmit(buf.toString()); 1035 1036 1037 def serializeNode(self, buf, t): 1038 FIXME(33) 1039# int ID = adaptor.getUniqueID(t); 1040# String text = adaptor.getText(t); 1041# int type = adaptor.getType(t); 1042# buf.append("\t"); 1043# buf.append(ID); 1044# buf.append("\t"); 1045# buf.append(type); 1046# Token token = adaptor.getToken(t); 1047# int line = -1; 1048# int pos = -1; 1049# if ( token!=null ) { 1050# line = token.getLine(); 1051# pos = token.getCharPositionInLine(); 1052# } 1053# buf.append("\t"); 1054# buf.append(line); 1055# buf.append("\t"); 1056# buf.append(pos); 1057# int tokenIndex = adaptor.getTokenStartIndex(t); 1058# buf.append("\t"); 1059# buf.append(tokenIndex); 1060# serializeText(buf, text); 1061 1062 1063 ## A S T E v e n t s 1064 1065 def nilNode(self, t): 1066 self.transmit("nilNode\t{}".format(self.adaptor.getUniqueID(t))) 1067 1068 1069 def errorNode(self, t): 1070 self.transmit('errorNode\t{}\t{}\t"{}'.format( 1071 self.adaptor.getUniqueID(t), 1072 INVALID_TOKEN_TYPE, 1073 self.escapeNewlines(t.toString()))) 1074 1075 1076 def createNode(self, node, token=None): 1077 if token is not None: 1078 self.transmit("createNode\t{}\t{}".format( 1079 self.adaptor.getUniqueID(node), 1080 token.index)) 1081 1082 else: 1083 self.transmit('createNodeFromTokenElements\t{}\t{}\t"{}'.format( 1084 self.adaptor.getUniqueID(node), 1085 self.adaptor.getType(node), 1086 self.adaptor.getText(node))) 1087 1088 1089 def becomeRoot(self, newRoot, oldRoot): 1090 self.transmit("becomeRoot\t{}\t{}".format( 1091 self.adaptor.getUniqueID(newRoot), 1092 self.adaptor.getUniqueID(oldRoot))) 1093 1094 1095 def addChild(self, root, child): 1096 self.transmit("addChild\t{}\t{}".format( 1097 self.adaptor.getUniqueID(root), 1098 self.adaptor.getUniqueID(child))) 1099 1100 1101 def setTokenBoundaries(self, t, tokenStartIndex, tokenStopIndex): 1102 self.transmit("setTokenBoundaries\t{}\t{}\t{}".format( 1103 self.adaptor.getUniqueID(t), 1104 tokenStartIndex, tokenStopIndex)) 1105 1106 1107 1108 ## support 1109 1110 def setTreeAdaptor(self, adaptor): 1111 self.adaptor = adaptor 1112 1113 def getTreeAdaptor(self): 1114 return self.adaptor 1115 1116 1117 def serializeToken(self, t): 1118 buf = [str(int(t.index)), 1119 str(int(t.type)), 1120 str(int(t.channel)), 1121 str(int(t.line or 0)), 1122 str(int(t.charPositionInLine or 0)), 1123 '"' + self.escapeNewlines(t.text)] 1124 return '\t'.join(buf) 1125 1126 1127 def escapeNewlines(self, txt): 1128 if txt is None: 1129 return '' 1130 1131 txt = txt.replace("%","%25") # escape all escape char ;) 1132 txt = txt.replace("\n","%0A") # escape \n 1133 txt = txt.replace("\r","%0D") # escape \r 1134 return txt 1135