1/* Useful for dumping out the input stream after doing some 2 * augmentation or other manipulations. 3 * 4 * You can insert stuff, replace, and delete chunks. Note that the 5 * operations are done lazily--only if you convert the buffer to a 6 * String. This is very efficient because you are not moving data around 7 * all the time. As the buffer of tokens is converted to strings, the 8 * toString() method(s) check to see if there is an operation at the 9 * current index. If so, the operation is done and then normal String 10 * rendering continues on the buffer. This is like having multiple Turing 11 * machine instruction streams (programs) operating on a single input tape. :) 12 * 13 * Since the operations are done lazily at toString-time, operations do not 14 * screw up the token index values. That is, an insert operation at token 15 * index i does not change the index values for tokens i+1..n-1. 16 * 17 * Because operations never actually alter the buffer, you may always get 18 * the original token stream back without undoing anything. Since 19 * the instructions are queued up, you can easily simulate transactions and 20 * roll back any changes if there is an error just by removing instructions. 21 * For example, 22 * 23 * CharStream input = new ANTLRFileStream("input"); 24 * TLexer lex = new TLexer(input); 25 * TokenRewriteStream tokens = new TokenRewriteStream(lex); 26 * T parser = new T(tokens); 27 * parser.startRule(); 28 * 29 * Then in the rules, you can execute 30 * Token t,u; 31 * ... 32 * input.insertAfter(t, "text to put after t");} 33 * input.insertAfter(u, "text after u");} 34 * System.out.println(tokens.toString()); 35 * 36 * Actually, you have to cast the 'input' to a TokenRewriteStream. :( 37 * 38 * You can also have multiple "instruction streams" and get multiple 39 * rewrites from a single pass over the input. Just name the instruction 40 * streams and use that name again when printing the buffer. This could be 41 * useful for generating a C file and also its header file--all from the 42 * same buffer: 43 * 44 * tokens.insertAfter("pass1", t, "text to put after t");} 45 * tokens.insertAfter("pass2", u, "text after u");} 46 * System.out.println(tokens.toString("pass1")); 47 * System.out.println(tokens.toString("pass2")); 48 * 49 * If you don't use named rewrite streams, a "default" stream is used as 50 * the first example shows. 51 */ 52 53org.antlr.runtime.TokenRewriteStream = function() { 54 var sup = org.antlr.runtime.TokenRewriteStream.superclass; 55 56 /** You may have multiple, named streams of rewrite operations. 57 * I'm calling these things "programs." 58 * Maps String (name) -> rewrite (List) 59 */ 60 this.programs = null; 61 62 /** Map String (program name) -> Integer index */ 63 this.lastRewriteTokenIndexes = null; 64 65 66 if (arguments.length===0) { 67 this.init(); 68 } else { 69 sup.constructor.apply(this, arguments); 70 this.init(); 71 } 72}; 73 74(function(){ 75var trs = org.antlr.runtime.TokenRewriteStream; 76 77org.antlr.lang.augmentObject(trs, { 78 DEFAULT_PROGRAM_NAME: "default", 79 PROGRAM_INIT_SIZE: 100, 80 MIN_TOKEN_INDEX: 0 81}); 82 83// 84// Define the rewrite operation hierarchy 85// 86 87trs.RewriteOperation = function(index, text) { 88 this.index = index; 89 this.text = text; 90}; 91 92/** Execute the rewrite operation by possibly adding to the buffer. 93 * Return the index of the next token to operate on. 94 */ 95trs.RewriteOperation.prototype = { 96 execute: function(buf) { 97 return this.index; 98 }, 99 toString: function() { 100 /*String opName = getClass().getName(); 101 int $index = opName.indexOf('$'); 102 opName = opName.substring($index+1, opName.length()); 103 return opName+"@"+index+'"'+text+'"';*/ 104 return this.text; 105 } 106}; 107 108trs.InsertBeforeOp = function(index, text) { 109 trs.InsertBeforeOp.superclass.constructor.call(this, index, text); 110}; 111org.antlr.lang.extend(trs.InsertBeforeOp, trs.RewriteOperation, { 112 execute: function(buf) { 113 buf.push(this.text); 114 return this.index; 115 } 116}); 117 118/** I'm going to try replacing range from x..y with (y-x)+1 ReplaceOp 119 * instructions. 120 */ 121trs.ReplaceOp = function(from, to, text) { 122 trs.ReplaceOp.superclass.constructor.call(this, from, text); 123 this.lastIndex = to; 124}; 125org.antlr.lang.extend(trs.ReplaceOp, trs.RewriteOperation, { 126 execute: function(buf) { 127 if (org.antlr.lang.isValue(this.text)) { 128 buf.push(this.text); 129 } 130 return this.lastIndex+1; 131 } 132}); 133 134trs.DeleteOp = function(from, to) { 135 trs.DeleteOp.superclass.constructor.call(this, from, to); 136}; 137org.antlr.lang.extend(trs.DeleteOp, trs.ReplaceOp); 138 139org.antlr.lang.extend(trs, org.antlr.runtime.CommonTokenStream, { 140 init: function() { 141 this.programs = {}; 142 this.programs[trs.DEFAULT_PROGRAM_NAME] = []; 143 this.lastRewriteTokenIndexes = {}; 144 }, 145 146 /** Rollback the instruction stream for a program so that 147 * the indicated instruction (via instructionIndex) is no 148 * longer in the stream. UNTESTED! 149 */ 150 rollback: function() { 151 var programName, 152 instructionIndex; 153 154 if (arguments.length===1) { 155 programName = trs.DEFAULT_PROGRAM_NAME; 156 instructionIndex = arguments[0]; 157 } else if (arguments.length===2) { 158 programName = arguments[0]; 159 instructionIndex = arguments[1]; 160 } 161 var is = this.programs[programName]; 162 if (is) { 163 programs[programName] = is.slice(trs.MIN_TOKEN_INDEX, this.instructionIndex); 164 } 165 }, 166 167 /** Reset the program so that no instructions exist */ 168 deleteProgram: function(programName) { 169 programName = programName || trs.DEFAULT_PROGRAM_NAME; 170 this.rollback(programName, trs.MIN_TOKEN_INDEX); 171 }, 172 173 /** Add an instruction to the rewrite instruction list ordered by 174 * the instruction number (use a binary search for efficiency). 175 * The list is ordered so that toString() can be done efficiently. 176 * 177 * When there are multiple instructions at the same index, the instructions 178 * must be ordered to ensure proper behavior. For example, a delete at 179 * index i must kill any replace operation at i. Insert-before operations 180 * must come before any replace / delete instructions. If there are 181 * multiple insert instructions for a single index, they are done in 182 * reverse insertion order so that "insert foo" then "insert bar" yields 183 * "foobar" in front rather than "barfoo". This is convenient because 184 * I can insert new InsertOp instructions at the index returned by 185 * the binary search. A ReplaceOp kills any previous replace op. Since 186 * delete is the same as replace with null text, i can check for 187 * ReplaceOp and cover DeleteOp at same time. :) 188 */ 189 addToSortedRewriteList: function() { 190 var programName, 191 op; 192 if (arguments.length===1) { 193 programName = trs.DEFAULT_PROGRAM_NAME; 194 op = arguments[0]; 195 } else if (arguments.length===2) { 196 programName = arguments[0]; 197 op = arguments[1]; 198 } 199 200 var rewrites = this.getProgram(programName); 201 var len, pos, searchOp, replaced, prevOp, i; 202 for (pos=0, len=rewrites.length; pos<len; pos++) { 203 searchOp = rewrites[pos]; 204 if (searchOp.index===op.index) { 205 // now pos is the index in rewrites of first op with op.index 206 207 // an instruction operating already on that index was found; 208 // make this one happen after all the others 209 if (op instanceof trs.ReplaceOp) { 210 replaced = false; 211 // look for an existing replace 212 for (i=pos; i<rewrites.length; i++) { 213 prevOp = rewrites[pos]; 214 if (prevOp.index!==op.index) { 215 break; 216 } 217 if (prevOp instanceof trs.ReplaceOp) { 218 rewrites[pos] = op; // replace old with new 219 replaced=true; 220 break; 221 } 222 // keep going; must be an insert 223 } 224 if ( !replaced ) { 225 // add replace op to the end of all the inserts 226 rewrites.splice(i, 0, op); 227 } 228 } else { 229 // inserts are added in front of existing inserts 230 rewrites.splice(pos, 0, op); 231 } 232 break; 233 } else if (searchOp.index > op.index) { 234 rewrites.splice(pos, 0, op); 235 break; 236 } 237 } 238 if (pos===len) { 239 rewrites.push(op); 240 } 241 }, 242 243 insertAfter: function() { 244 var index, programName, text; 245 if (arguments.length===2) { 246 programName = trs.DEFAULT_PROGRAM_NAME; 247 index = arguments[0]; 248 text = arguments[1]; 249 } else if (arguments.length===3) { 250 programName = arguments[0]; 251 index = arguments[1]; 252 text = arguments[2]; 253 } 254 255 if (index instanceof org.antlr.runtime.Token) { 256 // index is a Token, grab it's stream index 257 index = index.index; // that's ugly 258 } 259 260 // insert after is the same as insert before the next index 261 this.insertBefore(programName, index+1, text); 262 }, 263 264 insertBefore: function() { 265 var index, programName, text; 266 if (arguments.length===2) { 267 programName = trs.DEFAULT_PROGRAM_NAME; 268 index = arguments[0]; 269 text = arguments[1]; 270 } else if (arguments.length===3) { 271 programName = arguments[0]; 272 index = arguments[1]; 273 text = arguments[2]; 274 } 275 276 if (index instanceof org.antlr.runtime.Token) { 277 // index is a Token, grab it's stream index 278 index = index.index; // that's ugly 279 } 280 281 this.addToSortedRewriteList( 282 programName, 283 new trs.InsertBeforeOp(index,text) 284 ); 285 }, 286 287 replace: function() { 288 var programName, first, last, text; 289 if (arguments.length===2) { 290 programName = trs.DEFAULT_PROGRAM_NAME; 291 first = arguments[0]; 292 last = arguments[0]; 293 text = arguments[1]; 294 } else if (arguments.length===3) { 295 programName = trs.DEFAULT_PROGRAM_NAME; 296 first = arguments[0]; 297 last = arguments[1]; 298 text = arguments[2]; 299 } if (arguments.length===4) { 300 programName = arguments[0]; 301 first = arguments[1]; 302 last = arguments[2]; 303 text = arguments[3]; 304 } 305 306 if (first instanceof org.antlr.runtime.Token) { 307 first = first.index; 308 } 309 310 if (last instanceof org.antlr.runtime.Token) { 311 last = last.index; // that's ugly 312 } 313 314 if ( first > last || last<0 || first<0 ) { 315 return; 316 } 317 this.addToSortedRewriteList( 318 programName, 319 new trs.ReplaceOp(first, last, text)); 320 }, 321 322 // !!! API Break: delete is a JS keyword, so using remove instead. 323 remove: function() { 324 // convert arguments to a real array 325 var args=[], i=arguments.length-1; 326 while (i>=0) { 327 args[i] = arguments[i]; 328 i--; 329 } 330 331 args.push(""); 332 this.replace.apply(this, args); 333 }, 334 335 getLastRewriteTokenIndex: function(programName) { 336 programName = programName || trs.DEFAULT_PROGRAM_NAME; 337 return this.lastRewriteTokenIndexes[programName] || -1; 338 }, 339 340 setLastRewriteTokenIndex: function(programName, i) { 341 this.lastRewriteTokenIndexes[programName] = i; 342 }, 343 344 getProgram: function(name) { 345 var is = this.programs[name]; 346 if ( !is ) { 347 is = this.initializeProgram(name); 348 } 349 return is; 350 }, 351 352 initializeProgram: function(name) { 353 var is = []; 354 this.programs[name] = is; 355 return is; 356 }, 357 358 toOriginalString: function(start, end) { 359 if (!org.antlr.lang.isNumber(start)) { 360 start = trs.MIN_TOKEN_INDEX; 361 } 362 if (!org.antlr.lang.isNumber(end)) { 363 end = this.size()-1; 364 } 365 366 var buf = [], i; 367 for (i=start; i>=trs.MIN_TOKEN_INDEX && i<=end && i<this.tokens.length; i++) { 368 buf.push(this.get(i).getText()); 369 } 370 return buf.join(""); 371 }, 372 373 toString: function() { 374 var programName, start, end; 375 if (arguments.length===0) { 376 programName = trs.DEFAULT_PROGRAM_NAME; 377 start = trs.MIN_TOKEN_INDEX; 378 end = this.size() - 1; 379 } else if (arguments.length===1) { 380 programName = arguments[0]; 381 start = trs.MIN_TOKEN_INDEX; 382 end = this.size() - 1; 383 } else if (arguments.length===2) { 384 programName = trs.DEFAULT_PROGRAM_NAME; 385 start = arguments[0]; 386 end = arguments[1]; 387 } 388 389 var rewrites = this.programs[programName]; 390 if ( !rewrites || rewrites.length===0 ) { 391 return this.toOriginalString(start,end); 392 } 393 394 /// Index of first rewrite we have not done 395 var rewriteOpIndex = 0, 396 tokenCursor=start, 397 buf = [], 398 op; 399 while ( tokenCursor>=trs.MIN_TOKEN_INDEX && 400 tokenCursor<=end && 401 tokenCursor<this.tokens.length ) 402 { 403 // execute instructions associated with this token index 404 if ( rewriteOpIndex<rewrites.length ) { 405 op = rewrites[rewriteOpIndex]; 406 407 // skip all ops at lower index 408 while (op.index<tokenCursor && rewriteOpIndex<rewrites.length) { 409 rewriteOpIndex++; 410 if ( rewriteOpIndex<rewrites.length ) { 411 op = rewrites[rewriteOpIndex]; 412 } 413 } 414 415 // while we have ops for this token index, exec them 416 while (tokenCursor===op.index && rewriteOpIndex<rewrites.length) { 417 //System.out.println("execute "+op+" at instruction "+rewriteOpIndex); 418 tokenCursor = op.execute(buf); 419 //System.out.println("after execute tokenCursor = "+tokenCursor); 420 rewriteOpIndex++; 421 if ( rewriteOpIndex<rewrites.length ) { 422 op = rewrites[rewriteOpIndex]; 423 } 424 } 425 } 426 // dump the token at this index 427 if ( tokenCursor<=end ) { 428 buf.push(this.get(tokenCursor).getText()); 429 tokenCursor++; 430 } 431 } 432 // now see if there are operations (append) beyond last token index 433 var opi; 434 for (opi=rewriteOpIndex; opi<rewrites.length; opi++) { 435 op = rewrites[opi]; 436 if ( op.index>=this.size() ) { 437 op.execute(buf); // must be insertions if after last token 438 } 439 } 440 441 return buf.join(""); 442 }, 443 444 toDebugString: function(start, end) { 445 if (!org.antlr.lang.isNumber(start)) { 446 start = trs.MIN_TOKEN_INDEX; 447 } 448 if (!org.antlr.lang.isNumber(end)) { 449 end = this.size()-1; 450 } 451 452 var buf = [], 453 i; 454 for (i=start; i>=trs.MIN_TOKEN_INDEX && i<=end && i<this.tokens.length; i++) { 455 buf.push(this.get(i)); 456 } 457 return buf.join(""); 458 } 459}); 460 461})(); 462