• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2019 The JavaParser Team.
3  *
4  * This file is part of JavaParser.
5  *
6  * JavaParser can be used either under the terms of
7  * a) the GNU Lesser General Public License as published by
8  *     the Free Software Foundation, either version 3 of the License, or
9  *     (at your option) any later version.
10  * b) the terms of the Apache License
11  *
12  * You should have received a copy of both licenses in LICENCE.LGPL and
13  * LICENCE.APACHE. Please refer to those files for details.
14  *
15  * JavaParser is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18  * GNU Lesser General Public License for more details.
19  */
20 package com.github.javaparser;
21 
22 import java.io.IOException;
23 import java.util.ArrayList;
24 import java.util.Collections;
25 import java.util.List;
26 
27 /**
28  * {@link Provider} un-escaping unicode escape sequences in the input sequence.
29  */
30 public class UnicodeEscapeProcessingProvider implements Provider {
31 
32 	private static final char LF = '\n';
33 
34 	private static final char CR = '\r';
35 
36 	private static final char BACKSLASH = '\\';
37 
38 	private static final int EOF = -1;
39 
40 	private char[] _data;
41 
42 	/**
43 	 * The number of characters in {@link #_data}.
44 	 */
45 	private int _len = 0;
46 
47 	/**
48 	 * The position in {@link #_data} where to read the next source character from.
49 	 */
50 	private int _pos = 0;
51 
52 	private boolean _backslashSeen;
53 
54 	private final LineCounter _inputLine = new LineCounter();
55 
56 	private final LineCounter _outputLine = new LineCounter();
57 
58 	private final PositionMappingBuilder _mappingBuilder = new PositionMappingBuilder(_outputLine, _inputLine);
59 
60 	private Provider _input;
61 
62 	/**
63 	 * Creates a {@link UnicodeEscapeProcessingProvider}.
64 	 */
UnicodeEscapeProcessingProvider(Provider input)65 	public UnicodeEscapeProcessingProvider(Provider input) {
66 		this(2048, input);
67 	}
68 
69 	/**
70 	 * Creates a {@link UnicodeEscapeProcessingProvider}.
71 	 */
UnicodeEscapeProcessingProvider(int bufferSize, Provider input)72 	public UnicodeEscapeProcessingProvider(int bufferSize, Provider input) {
73 		_input = input;
74 		_data = new char[bufferSize];
75 	}
76 
77 	/**
78 	 * The {@link LineCounter} of the input file.
79 	 */
getInputCounter()80 	public LineCounter getInputCounter() {
81 		return _inputLine;
82 	}
83 
84 	/**
85 	 * The {@link LineCounter} of the output file.
86 	 */
getOutputCounter()87 	public LineCounter getOutputCounter() {
88 		return _outputLine;
89 	}
90 
91 	@Override
read(char[] buffer, final int offset, int len)92 	public int read(char[] buffer, final int offset, int len) throws IOException {
93 		int pos = offset;
94 		int stop = offset + len;
95 		while (pos < stop) {
96 			int ch = _outputLine.process(nextOutputChar());
97 			if (ch < 0) {
98 				if (pos == offset) {
99 					// Nothing read yet, this is the end of the stream.
100 					return EOF;
101 				} else {
102 					break;
103 				}
104 			} else {
105 				_mappingBuilder.update();
106 				buffer[pos++] = (char) ch;
107 			}
108 		}
109 		return pos - offset;
110 	}
111 
112 	@Override
close()113 	public void close() throws IOException {
114 		_input.close();
115 	}
116 
117 	/**
118 	 * Produces the next un-escaped character to be written to the output.
119 	 *
120 	 * @return The next character or <code>-1</code> if no more characters are available.
121 	 */
nextOutputChar()122 	private int nextOutputChar() throws IOException {
123 		int next = nextInputChar();
124 		switch (next) {
125 			case EOF:
126 				return EOF;
127 			case BACKSLASH: {
128 				if (_backslashSeen) {
129 					return clearBackSlashSeen(next);
130 				} else {
131 					return backSlashSeen();
132 				}
133 			}
134 			default: {
135 				// An arbitrary character.
136 				return clearBackSlashSeen(next);
137 			}
138 		}
139 	}
140 
clearBackSlashSeen(int next)141 	private int clearBackSlashSeen(int next) {
142 		_backslashSeen = false;
143 		return next;
144 	}
145 
backSlashSeen()146 	private int backSlashSeen() throws IOException {
147 		_backslashSeen = true;
148 
149 		int next = nextInputChar();
150 		switch (next) {
151 			case EOF:
152 				// End of file after backslash produces the backslash itself.
153 				return BACKSLASH;
154 			case 'u': {
155 				return unicodeStartSeen();
156 			}
157 			default: {
158 				pushBack(next);
159 				return BACKSLASH;
160 			}
161 		}
162 	}
163 
unicodeStartSeen()164 	private int unicodeStartSeen() throws IOException {
165 		int uCnt = 1;
166 		while (true) {
167 			int next = nextInputChar();
168 			switch (next) {
169 				case EOF: {
170 					pushBackUs(uCnt);
171 					return BACKSLASH;
172 				}
173 				case 'u': {
174 					uCnt++;
175 					continue;
176 				}
177 				default: {
178 					return readDigits(uCnt, next);
179 				}
180 			}
181 		}
182 	}
183 
readDigits(int uCnt, int next3)184 	private int readDigits(int uCnt, int next3) throws IOException {
185 		int digit3 = digit(next3);
186 		if (digit3 < 0) {
187 			pushBack(next3);
188 			pushBackUs(uCnt);
189 			return BACKSLASH;
190 		}
191 
192 		int next2 = nextInputChar();
193 		int digit2 = digit(next2);
194 		if (digit2 < 0) {
195 			pushBack(next2);
196 			pushBack(next3);
197 			pushBackUs(uCnt);
198 			return BACKSLASH;
199 		}
200 
201 		int next1 = nextInputChar();
202 		int digit1 = digit(next1);
203 		if (digit1 < 0) {
204 			pushBack(next1);
205 			pushBack(next2);
206 			pushBack(next3);
207 			pushBackUs(uCnt);
208 			return BACKSLASH;
209 		}
210 
211 		int next0 = nextInputChar();
212 		int digit0 = digit(next0);
213 		if (digit0 < 0) {
214 			pushBack(next0);
215 			pushBack(next1);
216 			pushBack(next2);
217 			pushBack(next3);
218 			pushBackUs(uCnt);
219 			return BACKSLASH;
220 		}
221 
222 		int ch = digit3 << 12 | digit2 << 8 | digit1 << 4 | digit0;
223 		return clearBackSlashSeen(ch);
224 	}
225 
pushBackUs(int cnt)226 	private void pushBackUs(int cnt) {
227 		for (int n = 0; n < cnt; n++) {
228 			pushBack('u');
229 		}
230 	}
231 
digit(int ch)232 	private static int digit(int ch) {
233 		if (ch >= '0' && ch <= '9') {
234 			return ch - '0';
235 		}
236 		if (ch >= 'A' && ch <= 'F') {
237 			return 10 + ch - 'A';
238 		}
239 		if (ch >= 'a' && ch <= 'f') {
240 			return 10 + ch - 'a';
241 		}
242 		return -1;
243 	}
244 
245 	/**
246 	 * Processes column/line information from the input file.
247 	 *
248 	 * @return The next character or <code>-1</code> if no more input is available.
249 	 */
nextInputChar()250 	private int nextInputChar() throws IOException {
251 		int result = nextBufferedChar();
252 		return _inputLine.process(result);
253 	}
254 
255 	/**
256 	 * Retrieves the next un-escaped character from the buffered {@link #_input}.
257 	 *
258 	 * @return The next character or <code>-1</code> if no more input is available.
259 	 */
nextBufferedChar()260 	private int nextBufferedChar() throws IOException {
261 		while (isBufferEmpty()) {
262 			int direct = fillBuffer();
263 			if (direct < 0) {
264 				return EOF;
265 			}
266 		}
267 		return _data[_pos++];
268 	}
269 
isBufferEmpty()270 	private boolean isBufferEmpty() {
271 		return _pos >= _len;
272 	}
273 
fillBuffer()274 	private int fillBuffer() throws IOException {
275 		_pos = 0;
276 		int direct = _input.read(_data, 0, _data.length);
277 		if (direct != 0) {
278 			_len = direct;
279 		}
280 		return direct;
281 	}
282 
pushBack(int ch)283 	private void pushBack(int ch) {
284 		if (ch < 0) {
285 			return;
286 		}
287 
288 		if (isBufferEmpty()) {
289 			_pos = _data.length;
290 			_len = _data.length;
291 		} else if (_pos == 0) {
292 			if (_len == _data.length) {
293 				// Buffer is completely full, no push possible, enlarge buffer.
294 				char[] newData = new char[_data.length + 1024];
295 				_len = newData.length;
296 				_pos = newData.length - _data.length;
297 				System.arraycopy(_data, 0, newData, _pos, _data.length);
298 				_data = newData;
299 			} else {
300 				// Move contents to the right.
301 				int cnt = _len - _pos;
302 				_pos = _data.length - _len;
303 				_len = _data.length;
304 				System.arraycopy(_data, 0, _data, _pos, cnt);
305 			}
306 		}
307 		_data[--_pos] = (char) ch;
308 	}
309 
310 	/**
311 	 * The {@link PositionMapping} being built during processing the file.
312 	 */
getPositionMapping()313 	public PositionMapping getPositionMapping() {
314 		return _mappingBuilder.getMapping();
315 	}
316 
317 	/**
318 	 * An algorithm mapping {@link Position} form two corresponding files.
319 	 */
320 	public static final class PositionMapping {
321 
322 		private final List<DeltaInfo> _deltas = new ArrayList<>();
323 
324 		/**
325 		 * Creates a {@link UnicodeEscapeProcessingProvider.PositionMapping}.
326 		 */
PositionMapping()327 		public PositionMapping() {
328 			super();
329 		}
330 
331 		/**
332 		 * Whether this is the identity transformation.
333 		 */
isEmpty()334 		public boolean isEmpty() {
335 			return _deltas.isEmpty();
336 		}
337 
add(int line, int column, int lineDelta, int columnDelta)338 		void add(int line, int column, int lineDelta, int columnDelta) {
339 			_deltas.add(new DeltaInfo(line, column, lineDelta, columnDelta));
340 		}
341 
342 		/**
343 		 * Looks up the {@link PositionUpdate} for the given Position.
344 		 */
lookup(Position position)345 		public PositionUpdate lookup(Position position) {
346 			int result = Collections.binarySearch(_deltas, position);
347 			if (result >= 0) {
348 				return _deltas.get(result);
349 			} else {
350 				int insertIndex = -result - 1;
351 				if (insertIndex == 0) {
352 					// Before the first delta info, identity mapping.
353 					return PositionUpdate.NONE;
354 				} else {
355 					// The relevant update is the one with the position smaller
356 					// than the requested position.
357 					return _deltas.get(insertIndex - 1);
358 				}
359 			}
360 		}
361 
362 		/**
363 		 * Algorithm updating a {@link Position} from one file to a
364 		 * {@link Position} in a corresponding file.
365 		 */
366 		public static interface PositionUpdate {
367 
368 			/**
369 			 * The identity position mapping.
370 			 */
371 			PositionUpdate NONE = new PositionUpdate() {
372 				@Override
373 				public int transformLine(int line) {
374 					return line;
375 				}
376 
377 				@Override
378 				public int transformColumn(int column) {
379 					return column;
380 				}
381 
382 				@Override
383 				public Position transform(Position pos) {
384 					return pos;
385 				}
386 			};
387 
388 			/**
389 			 * Maps the given line to an original line.
390 			 */
transformLine(int line)391 			int transformLine(int line);
392 
393 			/**
394 			 * Maps the given column to an original column.
395 			 */
transformColumn(int column)396 			int transformColumn(int column);
397 
398 			/**
399 			 * The transformed position.
400 			 */
transform(Position pos)401 			default Position transform(Position pos) {
402 				int line = pos.line;
403 				int column = pos.column;
404 				int transformedLine = transformLine(line);
405 				int transformedColumn = transformColumn(column);
406 				return new Position(transformedLine, transformedColumn);
407 			}
408 
409 		}
410 
411 		private static final class DeltaInfo extends Position implements PositionUpdate {
412 
413 			/**
414 			 * The offset to add to the {@link #line} and all following source
415 			 * positions up to the next {@link PositionUpdate}.
416 			 */
417 			private final int _lineDelta;
418 
419 			/**
420 			 * The offset to add to the {@link #column} and all following
421 			 * source positions up to the next {@link PositionUpdate}.
422 			 */
423 			private final int _columnDelta;
424 
425 			/**
426 			 * Creates a {@link PositionUpdate}.
427 			 */
DeltaInfo(int line, int column, int lineDelta, int columnDelta)428 			public DeltaInfo(int line, int column, int lineDelta,
429 					int columnDelta) {
430 				super(line, column);
431 				_lineDelta = lineDelta;
432 				_columnDelta = columnDelta;
433 			}
434 
435 			@Override
transformLine(int sourceLine)436 			public int transformLine(int sourceLine) {
437 				return sourceLine + _lineDelta;
438 			}
439 
440 			@Override
transformColumn(int sourceColumn)441 			public int transformColumn(int sourceColumn) {
442 				return sourceColumn + _columnDelta;
443 			}
444 
445 			@Override
toString()446 			public String toString() {
447 				return "(" + line + ", " + column + ": " + _lineDelta + ", " + _columnDelta + ")";
448 			}
449 
450 		}
451 
452 		/**
453 		 * Transforms the given {@link Position}.
454 		 */
transform(Position pos)455 		public Position transform(Position pos) {
456 			return lookup(pos).transform(pos);
457 		}
458 
459 		/**
460 		 * Transforms the given {@link Range}.
461 		 */
transform(Range range)462 		public Range transform(Range range) {
463 			Position begin = transform(range.begin);
464 			Position end = transform(range.end);
465 			if (begin == range.begin && end == range.end) {
466 				// No change.
467 				return range;
468 			}
469 			return new Range(begin, end);
470 		}
471 	}
472 
473 	private static final class PositionMappingBuilder {
474 
475 		private LineCounter _left;
476 
477 		private LineCounter _right;
478 
479 		private final PositionMapping _mapping = new PositionMapping();
480 
481 		private int _lineDelta = 0;
482 		private int _columnDelta = 0;
483 
484 		/**
485 		 * Creates a {@link PositionMappingBuilder}.
486 		 *
487 		 * @param left The source {@link LineCounter}.
488 		 * @param right The target {@link LineCounter}.
489 		 */
PositionMappingBuilder(LineCounter left, LineCounter right)490 		public PositionMappingBuilder(LineCounter left, LineCounter right) {
491 			_left = left;
492 			_right = right;
493 			update();
494 		}
495 
496 		/**
497 		 * The built {@link PositionMapping}.
498 		 */
getMapping()499 		public PositionMapping getMapping() {
500 			return _mapping;
501 		}
502 
update()503 		public void update() {
504 			int lineDelta = _right.getLine() - _left.getLine();
505 			int columnDelta = _right.getColumn() - _left.getColumn();
506 
507 			if (lineDelta != _lineDelta || columnDelta != _columnDelta) {
508 				_mapping.add(_left.getLine(), _left.getColumn(), lineDelta, columnDelta);
509 
510 				_lineDelta = lineDelta;
511 				_columnDelta = columnDelta;
512 			}
513 		}
514 
515 	}
516 
517 	/**
518 	 * Processor keeping track of the current line and column in a stream of
519 	 * incoming characters.
520 	 *
521 	 * @see #process(int)
522 	 */
523 	public static final class LineCounter {
524 
525 		/**
526 		 * Whether {@link #CR} has been seen on the input as last character.
527 		 */
528 		private boolean _crSeen;
529 
530 		private int _line = 1;
531 
532 		private int _column = 1;
533 
534 		/**
535 		 * Creates a {@link UnicodeEscapeProcessingProvider.LineCounter}.
536 		 */
LineCounter()537 		public LineCounter() {
538 			super();
539 		}
540 
541 		/**
542 		 * The line of the currently processed input character.
543 		 */
getLine()544 		public int getLine() {
545 			return _line;
546 		}
547 
548 		/**
549 		 * The column of the currently processed input character.
550 		 */
getColumn()551 		public int getColumn() {
552 			return _column;
553 		}
554 
555 		/**
556 		 * The current position.
557 		 */
getPosition()558 		public Position getPosition() {
559 			return new Position(getLine(), getColumn());
560 		}
561 
562 		/**
563 		 * Analyzes the given character for line feed.
564 		 */
process(int ch)565 		public int process(int ch) {
566 			switch (ch) {
567 				case EOF: {
568 					break;
569 				}
570 				case CR: {
571 					incLine();
572 					_crSeen = true;
573 					break;
574 				}
575 				case LF: {
576 					// CR LF does only count as a single line terminator.
577 					if (_crSeen) {
578 						_crSeen = false;
579 					} else {
580 						incLine();
581 					}
582 					break;
583 				}
584 				default: {
585 					_crSeen = false;
586 					_column++;
587 				}
588 			}
589 			return ch;
590 		}
591 
incLine()592 		private void incLine() {
593 			_line++;
594 			_column = 1;
595 		}
596 
597 	}
598 
599 }
600