1# begin[licence] 2# 3# [The "BSD licence"] 4# Copyright (c) 2005-2009 Terence Parr 5# All rights reserved. 6 7# Redistribution and use in source and binary forms, with or without 8# modification, are permitted provided that the following conditions 9# are met: 10# 1. Redistributions of source code must retain the above copyright 11# notice, this list of conditions and the following disclaimer. 12# 2. Redistributions in binary form must reproduce the above copyright 13# notice, this list of conditions and the following disclaimer in the 14# documentation and/or other materials provided with the distribution. 15# 3. The name of the author may not be used to endorse or promote products 16# derived from this software without specific prior written permission. 17 18# THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 19# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 20# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 21# IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 22# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 23# NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 24# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 25# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 26# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 27# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28# 29# end[licence] 30 31import socket 32from antlr3 import Parser, TokenStream, RecognitionException, Token 33from antlr3.tree import CommonTreeAdaptor, TreeAdaptor, Tree 34 35class DebugParser(Parser): 36 def __init__(self, stream, state=None, dbg=None, *args, **kwargs): 37 # wrap token stream in DebugTokenStream (unless user already did so). 38 if not isinstance(stream, DebugTokenStream): 39 stream = DebugTokenStream(stream, dbg) 40 41 super(DebugParser, self).__init__(stream, state, *args, **kwargs) 42 43 # Who to notify when events in the parser occur. 44 self._dbg = None 45 46 self.setDebugListener(dbg) 47 48 49 def setDebugListener(self, dbg): 50 """Provide a new debug event listener for this parser. Notify the 51 input stream too that it should send events to this listener. 52 """ 53 54 if hasattr(self.input, 'dbg'): 55 self.input.dbg = dbg 56 57 self._dbg = dbg 58 59 def getDebugListener(self): 60 return self._dbg 61 62 dbg = property(getDebugListener, setDebugListener) 63 64 65 def beginResync(self): 66 self._dbg.beginResync() 67 68 69 def endResync(self): 70 self._dbg.endResync() 71 72 73 def beginBacktrack(self, level): 74 self._dbg.beginBacktrack(level) 75 76 77 def endBacktrack(self, level, successful): 78 self._dbg.endBacktrack(level,successful) 79 80 81 def reportError(self, exc): 82 Parser.reportError(self, exc) 83 84 if isinstance(exc, RecognitionException): 85 self._dbg.recognitionException(exc) 86 87 88class DebugTokenStream(TokenStream): 89 def __init__(self, input, dbg=None): 90 self.input = input 91 self.initialStreamState = True 92 # Track the last mark() call result value for use in rewind(). 93 self.lastMarker = None 94 95 self._dbg = None 96 self.setDebugListener(dbg) 97 98 # force TokenStream to get at least first valid token 99 # so we know if there are any hidden tokens first in the stream 100 self.input.LT(1) 101 102 103 def getDebugListener(self): 104 return self._dbg 105 106 def setDebugListener(self, dbg): 107 self._dbg = dbg 108 109 dbg = property(getDebugListener, setDebugListener) 110 111 112 def consume(self): 113 if self.initialStreamState: 114 self.consumeInitialHiddenTokens() 115 116 a = self.input.index() 117 t = self.input.LT(1) 118 self.input.consume() 119 b = self.input.index() 120 self._dbg.consumeToken(t) 121 122 if b > a+1: 123 # then we consumed more than one token; must be off channel tokens 124 for idx in range(a+1, b): 125 self._dbg.consumeHiddenToken(self.input.get(idx)); 126 127 128 def consumeInitialHiddenTokens(self): 129 """consume all initial off-channel tokens""" 130 131 firstOnChannelTokenIndex = self.input.index() 132 for idx in range(firstOnChannelTokenIndex): 133 self._dbg.consumeHiddenToken(self.input.get(idx)) 134 135 self.initialStreamState = False 136 137 138 def LT(self, i): 139 if self.initialStreamState: 140 self.consumeInitialHiddenTokens() 141 142 t = self.input.LT(i) 143 self._dbg.LT(i, t) 144 return t 145 146 147 def LA(self, i): 148 if self.initialStreamState: 149 self.consumeInitialHiddenTokens() 150 151 t = self.input.LT(i) 152 self._dbg.LT(i, t) 153 return t.type 154 155 156 def get(self, i): 157 return self.input.get(i) 158 159 160 def index(self): 161 return self.input.index() 162 163 164 def mark(self): 165 self.lastMarker = self.input.mark() 166 self._dbg.mark(self.lastMarker) 167 return self.lastMarker 168 169 170 def rewind(self, marker=None): 171 self._dbg.rewind(marker) 172 self.input.rewind(marker) 173 174 175 def release(self, marker): 176 pass 177 178 179 def seek(self, index): 180 # TODO: implement seek in dbg interface 181 # self._dbg.seek(index); 182 self.input.seek(index) 183 184 185 def size(self): 186 return self.input.size() 187 188 189 def getTokenSource(self): 190 return self.input.getTokenSource() 191 192 193 def getSourceName(self): 194 return self.getTokenSource().getSourceName() 195 196 197 def toString(self, start=None, stop=None): 198 return self.input.toString(start, stop) 199 200 201class DebugTreeAdaptor(TreeAdaptor): 202 """A TreeAdaptor proxy that fires debugging events to a DebugEventListener 203 delegate and uses the TreeAdaptor delegate to do the actual work. All 204 AST events are triggered by this adaptor; no code gen changes are needed 205 in generated rules. Debugging events are triggered *after* invoking 206 tree adaptor routines. 207 208 Trees created with actions in rewrite actions like "-> ^(ADD {foo} {bar})" 209 cannot be tracked as they might not use the adaptor to create foo, bar. 210 The debug listener has to deal with tree node IDs for which it did 211 not see a createNode event. A single <unknown> node is sufficient even 212 if it represents a whole tree. 213 """ 214 215 def __init__(self, dbg, adaptor): 216 self.dbg = dbg 217 self.adaptor = adaptor 218 219 220 def createWithPayload(self, payload): 221 if payload.getTokenIndex() < 0: 222 # could be token conjured up during error recovery 223 return self.createFromType(payload.getType(), payload.getText()) 224 225 node = self.adaptor.createWithPayload(payload) 226 self.dbg.createNode(node, payload) 227 return node 228 229 def createFromToken(self, tokenType, fromToken, text=None): 230 node = self.adaptor.createFromToken(tokenType, fromToken, text) 231 self.dbg.createNode(node) 232 return node 233 234 def createFromType(self, tokenType, text): 235 node = self.adaptor.createFromType(tokenType, text) 236 self.dbg.createNode(node) 237 return node 238 239 240 def errorNode(self, input, start, stop, exc): 241 node = selfadaptor.errorNode(input, start, stop, exc) 242 if node is not None: 243 dbg.errorNode(node) 244 245 return node 246 247 248 def dupTree(self, tree): 249 t = self.adaptor.dupTree(tree) 250 # walk the tree and emit create and add child events 251 # to simulate what dupTree has done. dupTree does not call this debug 252 # adapter so I must simulate. 253 self.simulateTreeConstruction(t) 254 return t 255 256 257 def simulateTreeConstruction(self, t): 258 """^(A B C): emit create A, create B, add child, ...""" 259 self.dbg.createNode(t) 260 for i in range(self.adaptor.getChildCount(t)): 261 child = self.adaptor.getChild(t, i) 262 self.simulateTreeConstruction(child) 263 self.dbg.addChild(t, child) 264 265 266 def dupNode(self, treeNode): 267 d = self.adaptor.dupNode(treeNode) 268 self.dbg.createNode(d) 269 return d 270 271 272 def nil(self): 273 node = self.adaptor.nil() 274 self.dbg.nilNode(node) 275 return node 276 277 278 def isNil(self, tree): 279 return self.adaptor.isNil(tree) 280 281 282 def addChild(self, t, child): 283 if isinstance(child, Token): 284 n = self.createWithPayload(child) 285 self.addChild(t, n) 286 287 else: 288 if t is None or child is None: 289 return 290 291 self.adaptor.addChild(t, child) 292 self.dbg.addChild(t, child) 293 294 def becomeRoot(self, newRoot, oldRoot): 295 if isinstance(newRoot, Token): 296 n = self.createWithPayload(newRoot) 297 self.adaptor.becomeRoot(n, oldRoot) 298 else: 299 n = self.adaptor.becomeRoot(newRoot, oldRoot) 300 301 self.dbg.becomeRoot(newRoot, oldRoot) 302 return n 303 304 305 def rulePostProcessing(self, root): 306 return self.adaptor.rulePostProcessing(root) 307 308 309 def getType(self, t): 310 return self.adaptor.getType(t) 311 312 313 def setType(self, t, type): 314 self.adaptor.setType(t, type) 315 316 317 def getText(self, t): 318 return self.adaptor.getText(t) 319 320 321 def setText(self, t, text): 322 self.adaptor.setText(t, text) 323 324 325 def getToken(self, t): 326 return self.adaptor.getToken(t) 327 328 329 def setTokenBoundaries(self, t, startToken, stopToken): 330 self.adaptor.setTokenBoundaries(t, startToken, stopToken) 331 if t is not None and startToken is not None and stopToken is not None: 332 self.dbg.setTokenBoundaries( 333 t, startToken.getTokenIndex(), 334 stopToken.getTokenIndex()) 335 336 337 def getTokenStartIndex(self, t): 338 return self.adaptor.getTokenStartIndex(t) 339 340 341 def getTokenStopIndex(self, t): 342 return self.adaptor.getTokenStopIndex(t) 343 344 345 def getChild(self, t, i): 346 return self.adaptor.getChild(t, i) 347 348 349 def setChild(self, t, i, child): 350 self.adaptor.setChild(t, i, child) 351 352 353 def deleteChild(self, t, i): 354 return self.adaptor.deleteChild(t, i) 355 356 357 def getChildCount(self, t): 358 return self.adaptor.getChildCount(t) 359 360 361 def getUniqueID(self, node): 362 return self.adaptor.getUniqueID(node) 363 364 365 def getParent(self, t): 366 return self.adaptor.getParent(t) 367 368 369 def getChildIndex(self, t): 370 return self.adaptor.getChildIndex(t) 371 372 373 def setParent(self, t, parent): 374 self.adaptor.setParent(t, parent) 375 376 377 def setChildIndex(self, t, index): 378 self.adaptor.setChildIndex(t, index) 379 380 381 def replaceChildren(self, parent, startChildIndex, stopChildIndex, t): 382 self.adaptor.replaceChildren(parent, startChildIndex, stopChildIndex, t) 383 384 385 ## support 386 387 def getDebugListener(self): 388 return dbg 389 390 def setDebugListener(self, dbg): 391 self.dbg = dbg 392 393 394 def getTreeAdaptor(self): 395 return self.adaptor 396 397 398 399class DebugEventListener(object): 400 """All debugging events that a recognizer can trigger. 401 402 I did not create a separate AST debugging interface as it would create 403 lots of extra classes and DebugParser has a dbg var defined, which makes 404 it hard to change to ASTDebugEventListener. I looked hard at this issue 405 and it is easier to understand as one monolithic event interface for all 406 possible events. Hopefully, adding ST debugging stuff won't be bad. Leave 407 for future. 4/26/2006. 408 """ 409 410 # Moved to version 2 for v3.1: added grammar name to enter/exit Rule 411 PROTOCOL_VERSION = "2" 412 413 def enterRule(self, grammarFileName, ruleName): 414 """The parser has just entered a rule. No decision has been made about 415 which alt is predicted. This is fired AFTER init actions have been 416 executed. Attributes are defined and available etc... 417 The grammarFileName allows composite grammars to jump around among 418 multiple grammar files. 419 """ 420 421 pass 422 423 424 def enterAlt(self, alt): 425 """Because rules can have lots of alternatives, it is very useful to 426 know which alt you are entering. This is 1..n for n alts. 427 """ 428 pass 429 430 431 def exitRule(self, grammarFileName, ruleName): 432 """This is the last thing executed before leaving a rule. It is 433 executed even if an exception is thrown. This is triggered after 434 error reporting and recovery have occurred (unless the exception is 435 not caught in this rule). This implies an "exitAlt" event. 436 The grammarFileName allows composite grammars to jump around among 437 multiple grammar files. 438 """ 439 pass 440 441 442 def enterSubRule(self, decisionNumber): 443 """Track entry into any (...) subrule other EBNF construct""" 444 pass 445 446 447 def exitSubRule(self, decisionNumber): 448 pass 449 450 451 def enterDecision(self, decisionNumber, couldBacktrack): 452 """Every decision, fixed k or arbitrary, has an enter/exit event 453 so that a GUI can easily track what LT/consume events are 454 associated with prediction. You will see a single enter/exit 455 subrule but multiple enter/exit decision events, one for each 456 loop iteration. 457 """ 458 pass 459 460 461 def exitDecision(self, decisionNumber): 462 pass 463 464 465 def consumeToken(self, t): 466 """An input token was consumed; matched by any kind of element. 467 Trigger after the token was matched by things like match(), matchAny(). 468 """ 469 pass 470 471 472 def consumeHiddenToken(self, t): 473 """An off-channel input token was consumed. 474 Trigger after the token was matched by things like match(), matchAny(). 475 (unless of course the hidden token is first stuff in the input stream). 476 """ 477 pass 478 479 480 def LT(self, i, t): 481 """Somebody (anybody) looked ahead. Note that this actually gets 482 triggered by both LA and LT calls. The debugger will want to know 483 which Token object was examined. Like consumeToken, this indicates 484 what token was seen at that depth. A remote debugger cannot look 485 ahead into a file it doesn't have so LT events must pass the token 486 even if the info is redundant. 487 """ 488 pass 489 490 491 def mark(self, marker): 492 """The parser is going to look arbitrarily ahead; mark this location, 493 the token stream's marker is sent in case you need it. 494 """ 495 pass 496 497 498 def rewind(self, marker=None): 499 """After an arbitrairly long lookahead as with a cyclic DFA (or with 500 any backtrack), this informs the debugger that stream should be 501 rewound to the position associated with marker. 502 503 """ 504 pass 505 506 507 def beginBacktrack(self, level): 508 pass 509 510 511 def endBacktrack(self, level, successful): 512 pass 513 514 515 def location(self, line, pos): 516 """To watch a parser move through the grammar, the parser needs to 517 inform the debugger what line/charPos it is passing in the grammar. 518 For now, this does not know how to switch from one grammar to the 519 other and back for island grammars etc... 520 521 This should also allow breakpoints because the debugger can stop 522 the parser whenever it hits this line/pos. 523 """ 524 pass 525 526 527 def recognitionException(self, e): 528 """A recognition exception occurred such as NoViableAltException. I made 529 this a generic event so that I can alter the exception hierachy later 530 without having to alter all the debug objects. 531 532 Upon error, the stack of enter rule/subrule must be properly unwound. 533 If no viable alt occurs it is within an enter/exit decision, which 534 also must be rewound. Even the rewind for each mark must be unwount. 535 In the Java target this is pretty easy using try/finally, if a bit 536 ugly in the generated code. The rewind is generated in DFA.predict() 537 actually so no code needs to be generated for that. For languages 538 w/o this "finally" feature (C++?), the target implementor will have 539 to build an event stack or something. 540 541 Across a socket for remote debugging, only the RecognitionException 542 data fields are transmitted. The token object or whatever that 543 caused the problem was the last object referenced by LT. The 544 immediately preceding LT event should hold the unexpected Token or 545 char. 546 547 Here is a sample event trace for grammar: 548 549 b : C ({;}A|B) // {;} is there to prevent A|B becoming a set 550 | D 551 ; 552 553 The sequence for this rule (with no viable alt in the subrule) for 554 input 'c c' (there are 3 tokens) is: 555 556 commence 557 LT(1) 558 enterRule b 559 location 7 1 560 enter decision 3 561 LT(1) 562 exit decision 3 563 enterAlt1 564 location 7 5 565 LT(1) 566 consumeToken [c/<4>,1:0] 567 location 7 7 568 enterSubRule 2 569 enter decision 2 570 LT(1) 571 LT(1) 572 recognitionException NoViableAltException 2 1 2 573 exit decision 2 574 exitSubRule 2 575 beginResync 576 LT(1) 577 consumeToken [c/<4>,1:1] 578 LT(1) 579 endResync 580 LT(-1) 581 exitRule b 582 terminate 583 """ 584 pass 585 586 587 def beginResync(self): 588 """Indicates the recognizer is about to consume tokens to resynchronize 589 the parser. Any consume events from here until the recovered event 590 are not part of the parse--they are dead tokens. 591 """ 592 pass 593 594 595 def endResync(self): 596 """Indicates that the recognizer has finished consuming tokens in order 597 to resychronize. There may be multiple beginResync/endResync pairs 598 before the recognizer comes out of errorRecovery mode (in which 599 multiple errors are suppressed). This will be useful 600 in a gui where you want to probably grey out tokens that are consumed 601 but not matched to anything in grammar. Anything between 602 a beginResync/endResync pair was tossed out by the parser. 603 """ 604 pass 605 606 607 def semanticPredicate(self, result, predicate): 608 """A semantic predicate was evaluate with this result and action text""" 609 pass 610 611 612 def commence(self): 613 """Announce that parsing has begun. Not technically useful except for 614 sending events over a socket. A GUI for example will launch a thread 615 to connect and communicate with a remote parser. The thread will want 616 to notify the GUI when a connection is made. ANTLR parsers 617 trigger this upon entry to the first rule (the ruleLevel is used to 618 figure this out). 619 """ 620 pass 621 622 623 def terminate(self): 624 """Parsing is over; successfully or not. Mostly useful for telling 625 remote debugging listeners that it's time to quit. When the rule 626 invocation level goes to zero at the end of a rule, we are done 627 parsing. 628 """ 629 pass 630 631 632 ## T r e e P a r s i n g 633 634 def consumeNode(self, t): 635 """Input for a tree parser is an AST, but we know nothing for sure 636 about a node except its type and text (obtained from the adaptor). 637 This is the analog of the consumeToken method. Again, the ID is 638 the hashCode usually of the node so it only works if hashCode is 639 not implemented. If the type is UP or DOWN, then 640 the ID is not really meaningful as it's fixed--there is 641 just one UP node and one DOWN navigation node. 642 """ 643 pass 644 645 646 def LT(self, i, t): 647 """The tree parser lookedahead. If the type is UP or DOWN, 648 then the ID is not really meaningful as it's fixed--there is 649 just one UP node and one DOWN navigation node. 650 """ 651 pass 652 653 654 655 ## A S T E v e n t s 656 657 def nilNode(self, t): 658 """A nil was created (even nil nodes have a unique ID... 659 they are not "null" per se). As of 4/28/2006, this 660 seems to be uniquely triggered when starting a new subtree 661 such as when entering a subrule in automatic mode and when 662 building a tree in rewrite mode. 663 664 If you are receiving this event over a socket via 665 RemoteDebugEventSocketListener then only t.ID is set. 666 """ 667 pass 668 669 670 def errorNode(self, t): 671 """Upon syntax error, recognizers bracket the error with an error node 672 if they are building ASTs. 673 """ 674 pass 675 676 677 def createNode(self, node, token=None): 678 """Announce a new node built from token elements such as type etc... 679 680 If you are receiving this event over a socket via 681 RemoteDebugEventSocketListener then only t.ID, type, text are 682 set. 683 """ 684 pass 685 686 687 def becomeRoot(self, newRoot, oldRoot): 688 """Make a node the new root of an existing root. 689 690 Note: the newRootID parameter is possibly different 691 than the TreeAdaptor.becomeRoot() newRoot parameter. 692 In our case, it will always be the result of calling 693 TreeAdaptor.becomeRoot() and not root_n or whatever. 694 695 The listener should assume that this event occurs 696 only when the current subrule (or rule) subtree is 697 being reset to newRootID. 698 699 If you are receiving this event over a socket via 700 RemoteDebugEventSocketListener then only IDs are set. 701 702 @see antlr3.tree.TreeAdaptor.becomeRoot() 703 """ 704 pass 705 706 707 def addChild(self, root, child): 708 """Make childID a child of rootID. 709 710 If you are receiving this event over a socket via 711 RemoteDebugEventSocketListener then only IDs are set. 712 713 @see antlr3.tree.TreeAdaptor.addChild() 714 """ 715 pass 716 717 718 def setTokenBoundaries(self, t, tokenStartIndex, tokenStopIndex): 719 """Set the token start/stop token index for a subtree root or node. 720 721 If you are receiving this event over a socket via 722 RemoteDebugEventSocketListener then only t.ID is set. 723 """ 724 pass 725 726 727class BlankDebugEventListener(DebugEventListener): 728 """A blank listener that does nothing; useful for real classes so 729 they don't have to have lots of blank methods and are less 730 sensitive to updates to debug interface. 731 732 Note: this class is identical to DebugEventListener and exists purely 733 for compatibility with Java. 734 """ 735 pass 736 737 738class TraceDebugEventListener(DebugEventListener): 739 """A listener that simply records text representations of the events. 740 741 Useful for debugging the debugging facility ;) 742 743 Subclasses can override the record() method (which defaults to printing to 744 stdout) to record the events in a different way. 745 """ 746 747 def __init__(self, adaptor=None): 748 super(TraceDebugEventListener, self).__init__() 749 750 if adaptor is None: 751 adaptor = CommonTreeAdaptor() 752 self.adaptor = adaptor 753 754 def record(self, event): 755 sys.stdout.write(event + '\n') 756 757 def enterRule(self, grammarFileName, ruleName): 758 self.record("enterRule "+ruleName) 759 760 def exitRule(self, grammarFileName, ruleName): 761 self.record("exitRule "+ruleName) 762 763 def enterSubRule(self, decisionNumber): 764 self.record("enterSubRule") 765 766 def exitSubRule(self, decisionNumber): 767 self.record("exitSubRule") 768 769 def location(self, line, pos): 770 self.record("location %s:%s" % (line, pos)) 771 772 ## Tree parsing stuff 773 774 def consumeNode(self, t): 775 self.record("consumeNode %s %s %s" % ( 776 self.adaptor.getUniqueID(t), 777 self.adaptor.getText(t), 778 self.adaptor.getType(t))) 779 780 def LT(self, i, t): 781 self.record("LT %s %s %s %s" % ( 782 i, 783 self.adaptor.getUniqueID(t), 784 self.adaptor.getText(t), 785 self.adaptor.getType(t))) 786 787 788 ## AST stuff 789 def nilNode(self, t): 790 self.record("nilNode %s" % self.adaptor.getUniqueID(t)) 791 792 def createNode(self, t, token=None): 793 if token is None: 794 self.record("create %s: %s, %s" % ( 795 self.adaptor.getUniqueID(t), 796 self.adaptor.getText(t), 797 self.adaptor.getType(t))) 798 799 else: 800 self.record("create %s: %s" % ( 801 self.adaptor.getUniqueID(t), 802 token.getTokenIndex())) 803 804 def becomeRoot(self, newRoot, oldRoot): 805 self.record("becomeRoot %s, %s" % ( 806 self.adaptor.getUniqueID(newRoot), 807 self.adaptor.getUniqueID(oldRoot))) 808 809 def addChild(self, root, child): 810 self.record("addChild %s, %s" % ( 811 self.adaptor.getUniqueID(root), 812 self.adaptor.getUniqueID(child))) 813 814 def setTokenBoundaries(self, t, tokenStartIndex, tokenStopIndex): 815 self.record("setTokenBoundaries %s, %s, %s" % ( 816 self.adaptor.getUniqueID(t), 817 tokenStartIndex, tokenStopIndex)) 818 819 820class RecordDebugEventListener(TraceDebugEventListener): 821 """A listener that records events as strings in an array.""" 822 823 def __init__(self, adaptor=None): 824 super(RecordDebugEventListener, self).__init__(adaptor) 825 826 self.events = [] 827 828 def record(self, event): 829 self.events.append(event) 830 831 832class DebugEventSocketProxy(DebugEventListener): 833 """A proxy debug event listener that forwards events over a socket to 834 a debugger (or any other listener) using a simple text-based protocol; 835 one event per line. ANTLRWorks listens on server socket with a 836 RemoteDebugEventSocketListener instance. These two objects must therefore 837 be kept in sync. New events must be handled on both sides of socket. 838 """ 839 840 DEFAULT_DEBUGGER_PORT = 49100 841 842 def __init__(self, recognizer, adaptor=None, port=None, 843 debug=None): 844 super(DebugEventSocketProxy, self).__init__() 845 846 self.grammarFileName = recognizer.getGrammarFileName() 847 848 # Almost certainly the recognizer will have adaptor set, but 849 # we don't know how to cast it (Parser or TreeParser) to get 850 # the adaptor field. Must be set with a constructor. :( 851 self.adaptor = adaptor 852 853 self.port = port or self.DEFAULT_DEBUGGER_PORT 854 855 self.debug = debug 856 857 self.socket = None 858 self.connection = None 859 self.input = None 860 self.output = None 861 862 863 def log(self, msg): 864 if self.debug is not None: 865 self.debug.write(msg + '\n') 866 867 868 def handshake(self): 869 if self.socket is None: 870 # create listening socket 871 self.socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM) 872 self.socket.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) 873 self.socket.bind(('', self.port)) 874 self.socket.listen(1) 875 self.log("Waiting for incoming connection on port %d" % self.port) 876 877 # wait for an incoming connection 878 self.connection, addr = self.socket.accept() 879 self.log("Accepted connection from %s:%d" % addr) 880 881 self.connection.setblocking(1) 882 self.connection.setsockopt(socket.SOL_TCP, socket.TCP_NODELAY, 1) 883 884 # FIXME(pink): wrap into utf8 encoding stream 885 self.output = self.connection.makefile('w', 0) 886 self.input = self.connection.makefile('r', 0) 887 888 self.write("ANTLR %s" % self.PROTOCOL_VERSION) 889 self.write("grammar \"%s" % self.grammarFileName) 890 self.ack() 891 892 893 def write(self, msg): 894 self.log("> %s" % msg) 895 self.output.write("%s\n" % msg) 896 self.output.flush() 897 898 899 def ack(self): 900 t = self.input.readline() 901 self.log("< %s" % t.rstrip()) 902 903 904 def transmit(self, event): 905 self.write(event); 906 self.ack(); 907 908 909 def commence(self): 910 # don't bother sending event; listener will trigger upon connection 911 pass 912 913 914 def terminate(self): 915 self.transmit("terminate") 916 self.output.close() 917 self.input.close() 918 self.connection.close() 919 self.socket.close() 920 921 922 def enterRule(self, grammarFileName, ruleName): 923 self.transmit("enterRule\t%s\t%s" % (grammarFileName, ruleName)) 924 925 926 def enterAlt(self, alt): 927 self.transmit("enterAlt\t%d" % alt) 928 929 930 def exitRule(self, grammarFileName, ruleName): 931 self.transmit("exitRule\t%s\t%s" % (grammarFileName, ruleName)) 932 933 934 def enterSubRule(self, decisionNumber): 935 self.transmit("enterSubRule\t%d" % decisionNumber) 936 937 938 def exitSubRule(self, decisionNumber): 939 self.transmit("exitSubRule\t%d" % decisionNumber) 940 941 942 def enterDecision(self, decisionNumber, couldBacktrack): 943 self.transmit( 944 "enterDecision\t%d\t%d" % (decisionNumber, couldBacktrack)) 945 946 947 def exitDecision(self, decisionNumber): 948 self.transmit("exitDecision\t%d" % decisionNumber) 949 950 951 def consumeToken(self, t): 952 self.transmit("consumeToken\t%s" % self.serializeToken(t)) 953 954 955 def consumeHiddenToken(self, t): 956 self.transmit("consumeHiddenToken\t%s" % self.serializeToken(t)) 957 958 959 def LT(self, i, o): 960 if isinstance(o, Tree): 961 return self.LT_tree(i, o) 962 return self.LT_token(i, o) 963 964 965 def LT_token(self, i, t): 966 if t is not None: 967 self.transmit("LT\t%d\t%s" % (i, self.serializeToken(t))) 968 969 970 def mark(self, i): 971 self.transmit("mark\t%d" % i) 972 973 974 def rewind(self, i=None): 975 if i is not None: 976 self.transmit("rewind\t%d" % i) 977 else: 978 self.transmit("rewind") 979 980 981 def beginBacktrack(self, level): 982 self.transmit("beginBacktrack\t%d" % level) 983 984 985 def endBacktrack(self, level, successful): 986 self.transmit("endBacktrack\t%d\t%s" % ( 987 level, ['0', '1'][bool(successful)])) 988 989 990 def location(self, line, pos): 991 self.transmit("location\t%d\t%d" % (line, pos)) 992 993 994 def recognitionException(self, exc): 995 self.transmit('\t'.join([ 996 "exception", 997 exc.__class__.__name__, 998 str(int(exc.index)), 999 str(int(exc.line)), 1000 str(int(exc.charPositionInLine))])) 1001 1002 1003 def beginResync(self): 1004 self.transmit("beginResync") 1005 1006 1007 def endResync(self): 1008 self.transmit("endResync") 1009 1010 1011 def semanticPredicate(self, result, predicate): 1012 self.transmit('\t'.join([ 1013 "semanticPredicate", 1014 str(int(result)), 1015 self.escapeNewlines(predicate)])) 1016 1017 ## A S T P a r s i n g E v e n t s 1018 1019 def consumeNode(self, t): 1020 FIXME(31) 1021# StringBuffer buf = new StringBuffer(50); 1022# buf.append("consumeNode"); 1023# serializeNode(buf, t); 1024# transmit(buf.toString()); 1025 1026 1027 def LT_tree(self, i, t): 1028 FIXME(34) 1029# int ID = adaptor.getUniqueID(t); 1030# String text = adaptor.getText(t); 1031# int type = adaptor.getType(t); 1032# StringBuffer buf = new StringBuffer(50); 1033# buf.append("LN\t"); // lookahead node; distinguish from LT in protocol 1034# buf.append(i); 1035# serializeNode(buf, t); 1036# transmit(buf.toString()); 1037 1038 1039 def serializeNode(self, buf, t): 1040 FIXME(33) 1041# int ID = adaptor.getUniqueID(t); 1042# String text = adaptor.getText(t); 1043# int type = adaptor.getType(t); 1044# buf.append("\t"); 1045# buf.append(ID); 1046# buf.append("\t"); 1047# buf.append(type); 1048# Token token = adaptor.getToken(t); 1049# int line = -1; 1050# int pos = -1; 1051# if ( token!=null ) { 1052# line = token.getLine(); 1053# pos = token.getCharPositionInLine(); 1054# } 1055# buf.append("\t"); 1056# buf.append(line); 1057# buf.append("\t"); 1058# buf.append(pos); 1059# int tokenIndex = adaptor.getTokenStartIndex(t); 1060# buf.append("\t"); 1061# buf.append(tokenIndex); 1062# serializeText(buf, text); 1063 1064 1065 ## A S T E v e n t s 1066 1067 def nilNode(self, t): 1068 self.transmit("nilNode\t%d" % self.adaptor.getUniqueID(t)) 1069 1070 1071 def errorNode(self, t): 1072 self.transmit("errorNode\t%d\t%d\t\"%s" % ( 1073 self.adaptor.getUniqueID(t), 1074 Token.INVALID_TOKEN_TYPE, 1075 self.escapeNewlines(t.toString()))) 1076 1077 1078 1079 def createNode(self, node, token=None): 1080 if token is not None: 1081 self.transmit("createNode\t%d\t%d" % ( 1082 self.adaptor.getUniqueID(node), 1083 token.getTokenIndex())) 1084 1085 else: 1086 self.transmit("createNodeFromTokenElements\t%d\t%d\t\"%s" % ( 1087 self.adaptor.getUniqueID(node), 1088 self.adaptor.getType(node), 1089 self.adaptor.getText(node))) 1090 1091 1092 def becomeRoot(self, newRoot, oldRoot): 1093 self.transmit("becomeRoot\t%d\t%d" % ( 1094 self.adaptor.getUniqueID(newRoot), 1095 self.adaptor.getUniqueID(oldRoot))) 1096 1097 1098 def addChild(self, root, child): 1099 self.transmit("addChild\t%d\t%d" % ( 1100 self.adaptor.getUniqueID(root), 1101 self.adaptor.getUniqueID(child))) 1102 1103 1104 def setTokenBoundaries(self, t, tokenStartIndex, tokenStopIndex): 1105 self.transmit("setTokenBoundaries\t%d\t%d\t%d" % ( 1106 self.adaptor.getUniqueID(t), 1107 tokenStartIndex, tokenStopIndex)) 1108 1109 1110 1111 ## support 1112 1113 def setTreeAdaptor(self, adaptor): 1114 self.adaptor = adaptor 1115 1116 def getTreeAdaptor(self): 1117 return self.adaptor 1118 1119 1120 def serializeToken(self, t): 1121 buf = [str(int(t.getTokenIndex())), 1122 str(int(t.getType())), 1123 str(int(t.getChannel())), 1124 str(int(t.getLine() or 0)), 1125 str(int(t.getCharPositionInLine() or 0)), 1126 '\"' + self.escapeNewlines(t.getText())] 1127 return '\t'.join(buf) 1128 1129 1130 def escapeNewlines(self, txt): 1131 if txt is None: 1132 return '' 1133 1134 txt = txt.replace("%","%25") # escape all escape char ;) 1135 txt = txt.replace("\n","%0A") # escape \n 1136 txt = txt.replace("\r","%0D") # escape \r 1137 return txt 1138