1 /* 2 * Copyright (C) 2019 The JavaParser Team. 3 * 4 * This file is part of JavaParser. 5 * 6 * JavaParser can be used either under the terms of 7 * a) the GNU Lesser General Public License as published by 8 * the Free Software Foundation, either version 3 of the License, or 9 * (at your option) any later version. 10 * b) the terms of the Apache License 11 * 12 * You should have received a copy of both licenses in LICENCE.LGPL and 13 * LICENCE.APACHE. Please refer to those files for details. 14 * 15 * JavaParser is distributed in the hope that it will be useful, 16 * but WITHOUT ANY WARRANTY; without even the implied warranty of 17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 18 * GNU Lesser General Public License for more details. 19 */ 20 package com.github.javaparser; 21 22 import java.io.IOException; 23 import java.util.ArrayList; 24 import java.util.Collections; 25 import java.util.List; 26 27 /** 28 * {@link Provider} un-escaping unicode escape sequences in the input sequence. 29 */ 30 public class UnicodeEscapeProcessingProvider implements Provider { 31 32 private static final char LF = '\n'; 33 34 private static final char CR = '\r'; 35 36 private static final char BACKSLASH = '\\'; 37 38 private static final int EOF = -1; 39 40 private char[] _data; 41 42 /** 43 * The number of characters in {@link #_data}. 44 */ 45 private int _len = 0; 46 47 /** 48 * The position in {@link #_data} where to read the next source character from. 49 */ 50 private int _pos = 0; 51 52 private boolean _backslashSeen; 53 54 private final LineCounter _inputLine = new LineCounter(); 55 56 private final LineCounter _outputLine = new LineCounter(); 57 58 private final PositionMappingBuilder _mappingBuilder = new PositionMappingBuilder(_outputLine, _inputLine); 59 60 private Provider _input; 61 62 /** 63 * Creates a {@link UnicodeEscapeProcessingProvider}. 64 */ UnicodeEscapeProcessingProvider(Provider input)65 public UnicodeEscapeProcessingProvider(Provider input) { 66 this(2048, input); 67 } 68 69 /** 70 * Creates a {@link UnicodeEscapeProcessingProvider}. 71 */ UnicodeEscapeProcessingProvider(int bufferSize, Provider input)72 public UnicodeEscapeProcessingProvider(int bufferSize, Provider input) { 73 _input = input; 74 _data = new char[bufferSize]; 75 } 76 77 /** 78 * The {@link LineCounter} of the input file. 79 */ getInputCounter()80 public LineCounter getInputCounter() { 81 return _inputLine; 82 } 83 84 /** 85 * The {@link LineCounter} of the output file. 86 */ getOutputCounter()87 public LineCounter getOutputCounter() { 88 return _outputLine; 89 } 90 91 @Override read(char[] buffer, final int offset, int len)92 public int read(char[] buffer, final int offset, int len) throws IOException { 93 int pos = offset; 94 int stop = offset + len; 95 while (pos < stop) { 96 int ch = _outputLine.process(nextOutputChar()); 97 if (ch < 0) { 98 if (pos == offset) { 99 // Nothing read yet, this is the end of the stream. 100 return EOF; 101 } else { 102 break; 103 } 104 } else { 105 _mappingBuilder.update(); 106 buffer[pos++] = (char) ch; 107 } 108 } 109 return pos - offset; 110 } 111 112 @Override close()113 public void close() throws IOException { 114 _input.close(); 115 } 116 117 /** 118 * Produces the next un-escaped character to be written to the output. 119 * 120 * @return The next character or <code>-1</code> if no more characters are available. 121 */ nextOutputChar()122 private int nextOutputChar() throws IOException { 123 int next = nextInputChar(); 124 switch (next) { 125 case EOF: 126 return EOF; 127 case BACKSLASH: { 128 if (_backslashSeen) { 129 return clearBackSlashSeen(next); 130 } else { 131 return backSlashSeen(); 132 } 133 } 134 default: { 135 // An arbitrary character. 136 return clearBackSlashSeen(next); 137 } 138 } 139 } 140 clearBackSlashSeen(int next)141 private int clearBackSlashSeen(int next) { 142 _backslashSeen = false; 143 return next; 144 } 145 backSlashSeen()146 private int backSlashSeen() throws IOException { 147 _backslashSeen = true; 148 149 int next = nextInputChar(); 150 switch (next) { 151 case EOF: 152 // End of file after backslash produces the backslash itself. 153 return BACKSLASH; 154 case 'u': { 155 return unicodeStartSeen(); 156 } 157 default: { 158 pushBack(next); 159 return BACKSLASH; 160 } 161 } 162 } 163 unicodeStartSeen()164 private int unicodeStartSeen() throws IOException { 165 int uCnt = 1; 166 while (true) { 167 int next = nextInputChar(); 168 switch (next) { 169 case EOF: { 170 pushBackUs(uCnt); 171 return BACKSLASH; 172 } 173 case 'u': { 174 uCnt++; 175 continue; 176 } 177 default: { 178 return readDigits(uCnt, next); 179 } 180 } 181 } 182 } 183 readDigits(int uCnt, int next3)184 private int readDigits(int uCnt, int next3) throws IOException { 185 int digit3 = digit(next3); 186 if (digit3 < 0) { 187 pushBack(next3); 188 pushBackUs(uCnt); 189 return BACKSLASH; 190 } 191 192 int next2 = nextInputChar(); 193 int digit2 = digit(next2); 194 if (digit2 < 0) { 195 pushBack(next2); 196 pushBack(next3); 197 pushBackUs(uCnt); 198 return BACKSLASH; 199 } 200 201 int next1 = nextInputChar(); 202 int digit1 = digit(next1); 203 if (digit1 < 0) { 204 pushBack(next1); 205 pushBack(next2); 206 pushBack(next3); 207 pushBackUs(uCnt); 208 return BACKSLASH; 209 } 210 211 int next0 = nextInputChar(); 212 int digit0 = digit(next0); 213 if (digit0 < 0) { 214 pushBack(next0); 215 pushBack(next1); 216 pushBack(next2); 217 pushBack(next3); 218 pushBackUs(uCnt); 219 return BACKSLASH; 220 } 221 222 int ch = digit3 << 12 | digit2 << 8 | digit1 << 4 | digit0; 223 return clearBackSlashSeen(ch); 224 } 225 pushBackUs(int cnt)226 private void pushBackUs(int cnt) { 227 for (int n = 0; n < cnt; n++) { 228 pushBack('u'); 229 } 230 } 231 digit(int ch)232 private static int digit(int ch) { 233 if (ch >= '0' && ch <= '9') { 234 return ch - '0'; 235 } 236 if (ch >= 'A' && ch <= 'F') { 237 return 10 + ch - 'A'; 238 } 239 if (ch >= 'a' && ch <= 'f') { 240 return 10 + ch - 'a'; 241 } 242 return -1; 243 } 244 245 /** 246 * Processes column/line information from the input file. 247 * 248 * @return The next character or <code>-1</code> if no more input is available. 249 */ nextInputChar()250 private int nextInputChar() throws IOException { 251 int result = nextBufferedChar(); 252 return _inputLine.process(result); 253 } 254 255 /** 256 * Retrieves the next un-escaped character from the buffered {@link #_input}. 257 * 258 * @return The next character or <code>-1</code> if no more input is available. 259 */ nextBufferedChar()260 private int nextBufferedChar() throws IOException { 261 while (isBufferEmpty()) { 262 int direct = fillBuffer(); 263 if (direct < 0) { 264 return EOF; 265 } 266 } 267 return _data[_pos++]; 268 } 269 isBufferEmpty()270 private boolean isBufferEmpty() { 271 return _pos >= _len; 272 } 273 fillBuffer()274 private int fillBuffer() throws IOException { 275 _pos = 0; 276 int direct = _input.read(_data, 0, _data.length); 277 if (direct != 0) { 278 _len = direct; 279 } 280 return direct; 281 } 282 pushBack(int ch)283 private void pushBack(int ch) { 284 if (ch < 0) { 285 return; 286 } 287 288 if (isBufferEmpty()) { 289 _pos = _data.length; 290 _len = _data.length; 291 } else if (_pos == 0) { 292 if (_len == _data.length) { 293 // Buffer is completely full, no push possible, enlarge buffer. 294 char[] newData = new char[_data.length + 1024]; 295 _len = newData.length; 296 _pos = newData.length - _data.length; 297 System.arraycopy(_data, 0, newData, _pos, _data.length); 298 _data = newData; 299 } else { 300 // Move contents to the right. 301 int cnt = _len - _pos; 302 _pos = _data.length - _len; 303 _len = _data.length; 304 System.arraycopy(_data, 0, _data, _pos, cnt); 305 } 306 } 307 _data[--_pos] = (char) ch; 308 } 309 310 /** 311 * The {@link PositionMapping} being built during processing the file. 312 */ getPositionMapping()313 public PositionMapping getPositionMapping() { 314 return _mappingBuilder.getMapping(); 315 } 316 317 /** 318 * An algorithm mapping {@link Position} form two corresponding files. 319 */ 320 public static final class PositionMapping { 321 322 private final List<DeltaInfo> _deltas = new ArrayList<>(); 323 324 /** 325 * Creates a {@link UnicodeEscapeProcessingProvider.PositionMapping}. 326 */ PositionMapping()327 public PositionMapping() { 328 super(); 329 } 330 331 /** 332 * Whether this is the identity transformation. 333 */ isEmpty()334 public boolean isEmpty() { 335 return _deltas.isEmpty(); 336 } 337 add(int line, int column, int lineDelta, int columnDelta)338 void add(int line, int column, int lineDelta, int columnDelta) { 339 _deltas.add(new DeltaInfo(line, column, lineDelta, columnDelta)); 340 } 341 342 /** 343 * Looks up the {@link PositionUpdate} for the given Position. 344 */ lookup(Position position)345 public PositionUpdate lookup(Position position) { 346 int result = Collections.binarySearch(_deltas, position); 347 if (result >= 0) { 348 return _deltas.get(result); 349 } else { 350 int insertIndex = -result - 1; 351 if (insertIndex == 0) { 352 // Before the first delta info, identity mapping. 353 return PositionUpdate.NONE; 354 } else { 355 // The relevant update is the one with the position smaller 356 // than the requested position. 357 return _deltas.get(insertIndex - 1); 358 } 359 } 360 } 361 362 /** 363 * Algorithm updating a {@link Position} from one file to a 364 * {@link Position} in a corresponding file. 365 */ 366 public static interface PositionUpdate { 367 368 /** 369 * The identity position mapping. 370 */ 371 PositionUpdate NONE = new PositionUpdate() { 372 @Override 373 public int transformLine(int line) { 374 return line; 375 } 376 377 @Override 378 public int transformColumn(int column) { 379 return column; 380 } 381 382 @Override 383 public Position transform(Position pos) { 384 return pos; 385 } 386 }; 387 388 /** 389 * Maps the given line to an original line. 390 */ transformLine(int line)391 int transformLine(int line); 392 393 /** 394 * Maps the given column to an original column. 395 */ transformColumn(int column)396 int transformColumn(int column); 397 398 /** 399 * The transformed position. 400 */ transform(Position pos)401 default Position transform(Position pos) { 402 int line = pos.line; 403 int column = pos.column; 404 int transformedLine = transformLine(line); 405 int transformedColumn = transformColumn(column); 406 return new Position(transformedLine, transformedColumn); 407 } 408 409 } 410 411 private static final class DeltaInfo extends Position implements PositionUpdate { 412 413 /** 414 * The offset to add to the {@link #line} and all following source 415 * positions up to the next {@link PositionUpdate}. 416 */ 417 private final int _lineDelta; 418 419 /** 420 * The offset to add to the {@link #column} and all following 421 * source positions up to the next {@link PositionUpdate}. 422 */ 423 private final int _columnDelta; 424 425 /** 426 * Creates a {@link PositionUpdate}. 427 */ DeltaInfo(int line, int column, int lineDelta, int columnDelta)428 public DeltaInfo(int line, int column, int lineDelta, 429 int columnDelta) { 430 super(line, column); 431 _lineDelta = lineDelta; 432 _columnDelta = columnDelta; 433 } 434 435 @Override transformLine(int sourceLine)436 public int transformLine(int sourceLine) { 437 return sourceLine + _lineDelta; 438 } 439 440 @Override transformColumn(int sourceColumn)441 public int transformColumn(int sourceColumn) { 442 return sourceColumn + _columnDelta; 443 } 444 445 @Override toString()446 public String toString() { 447 return "(" + line + ", " + column + ": " + _lineDelta + ", " + _columnDelta + ")"; 448 } 449 450 } 451 452 /** 453 * Transforms the given {@link Position}. 454 */ transform(Position pos)455 public Position transform(Position pos) { 456 return lookup(pos).transform(pos); 457 } 458 459 /** 460 * Transforms the given {@link Range}. 461 */ transform(Range range)462 public Range transform(Range range) { 463 Position begin = transform(range.begin); 464 Position end = transform(range.end); 465 if (begin == range.begin && end == range.end) { 466 // No change. 467 return range; 468 } 469 return new Range(begin, end); 470 } 471 } 472 473 private static final class PositionMappingBuilder { 474 475 private LineCounter _left; 476 477 private LineCounter _right; 478 479 private final PositionMapping _mapping = new PositionMapping(); 480 481 private int _lineDelta = 0; 482 private int _columnDelta = 0; 483 484 /** 485 * Creates a {@link PositionMappingBuilder}. 486 * 487 * @param left The source {@link LineCounter}. 488 * @param right The target {@link LineCounter}. 489 */ PositionMappingBuilder(LineCounter left, LineCounter right)490 public PositionMappingBuilder(LineCounter left, LineCounter right) { 491 _left = left; 492 _right = right; 493 update(); 494 } 495 496 /** 497 * The built {@link PositionMapping}. 498 */ getMapping()499 public PositionMapping getMapping() { 500 return _mapping; 501 } 502 update()503 public void update() { 504 int lineDelta = _right.getLine() - _left.getLine(); 505 int columnDelta = _right.getColumn() - _left.getColumn(); 506 507 if (lineDelta != _lineDelta || columnDelta != _columnDelta) { 508 _mapping.add(_left.getLine(), _left.getColumn(), lineDelta, columnDelta); 509 510 _lineDelta = lineDelta; 511 _columnDelta = columnDelta; 512 } 513 } 514 515 } 516 517 /** 518 * Processor keeping track of the current line and column in a stream of 519 * incoming characters. 520 * 521 * @see #process(int) 522 */ 523 public static final class LineCounter { 524 525 /** 526 * Whether {@link #CR} has been seen on the input as last character. 527 */ 528 private boolean _crSeen; 529 530 private int _line = 1; 531 532 private int _column = 1; 533 534 /** 535 * Creates a {@link UnicodeEscapeProcessingProvider.LineCounter}. 536 */ LineCounter()537 public LineCounter() { 538 super(); 539 } 540 541 /** 542 * The line of the currently processed input character. 543 */ getLine()544 public int getLine() { 545 return _line; 546 } 547 548 /** 549 * The column of the currently processed input character. 550 */ getColumn()551 public int getColumn() { 552 return _column; 553 } 554 555 /** 556 * The current position. 557 */ getPosition()558 public Position getPosition() { 559 return new Position(getLine(), getColumn()); 560 } 561 562 /** 563 * Analyzes the given character for line feed. 564 */ process(int ch)565 public int process(int ch) { 566 switch (ch) { 567 case EOF: { 568 break; 569 } 570 case CR: { 571 incLine(); 572 _crSeen = true; 573 break; 574 } 575 case LF: { 576 // CR LF does only count as a single line terminator. 577 if (_crSeen) { 578 _crSeen = false; 579 } else { 580 incLine(); 581 } 582 break; 583 } 584 default: { 585 _crSeen = false; 586 _column++; 587 } 588 } 589 return ch; 590 } 591 incLine()592 private void incLine() { 593 _line++; 594 _column = 1; 595 } 596 597 } 598 599 } 600