1 // © 2016 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html 3 /* 4 ********************************************************************** 5 * Copyright (c) 2004-2015, International Business Machines 6 * Corporation and others. All Rights Reserved. 7 ********************************************************************** 8 * Author: Alan Liu 9 * Created: March 16 2004 10 * Since: ICU 3.0 11 ********************************************************************** 12 */ 13 package com.ibm.icu.impl.data; 14 15 import java.io.IOException; 16 17 import com.ibm.icu.impl.PatternProps; 18 import com.ibm.icu.impl.Utility; 19 import com.ibm.icu.text.UTF16; 20 21 /** 22 * An iterator class that returns successive string tokens from some 23 * source. String tokens are, in general, separated by Pattern_White_Space 24 * in the source test. Furthermore, they may be delimited by 25 * either single or double quotes (opening and closing quotes must 26 * match). Escapes are processed using standard ICU unescaping. 27 * 28 * <p>2015-sep-03 TODO: Only used in com.ibm.icu.dev.test.format, move there. 29 */ 30 public class TokenIterator { 31 32 private ResourceReader reader; 33 private String line; 34 private StringBuffer buf; 35 private boolean done; 36 private int pos; 37 private int lastpos; 38 39 /** 40 * Construct an iterator over the tokens returned by the given 41 * ResourceReader, ignoring blank lines and comment lines (first 42 * non-blank character is '#'). Note that trailing comments on a 43 * line, beginning with the first unquoted '#', are recognized. 44 */ TokenIterator(ResourceReader r)45 public TokenIterator(ResourceReader r) { 46 reader = r; 47 line = null; 48 done = false; 49 buf = new StringBuffer(); 50 pos = lastpos = -1; 51 } 52 53 /** 54 * Return the next token from this iterator, or null if the last 55 * token has been returned. 56 */ next()57 public String next() throws IOException { 58 if (done) { 59 return null; 60 } 61 for (;;) { 62 if (line == null) { 63 line = reader.readLineSkippingComments(); 64 if (line == null) { 65 done = true; 66 return null; 67 } 68 pos = 0; 69 } 70 buf.setLength(0); 71 lastpos = pos; 72 pos = nextToken(pos); 73 if (pos < 0) { 74 line = null; 75 continue; 76 } 77 return buf.toString(); 78 } 79 } 80 81 /** 82 * Return the one-based line number of the line of the last token returned by 83 * next(). Should only be called 84 * after a call to next(); otherwise the return 85 * value is undefined. 86 */ getLineNumber()87 public int getLineNumber() { 88 return reader.getLineNumber(); 89 } 90 91 /** 92 * Return a string description of the position of the last line 93 * returned by readLine() or readLineSkippingComments(). 94 */ describePosition()95 public String describePosition() { 96 return reader.describePosition() + ':' + (lastpos+1); 97 } 98 99 /** 100 * Read the next token from 'this.line' and append it to 101 * 'this.buf'. Tokens are separated by Pattern_White_Space. Tokens 102 * may also be delimited by double or single quotes. The closing 103 * quote must match the opening quote. If a '#' is encountered, 104 * the rest of the line is ignored, unless it is backslash-escaped 105 * or within quotes. 106 * @param position the offset into the string 107 * @return offset to the next character to read from line, or if 108 * the end of the line is reached without scanning a valid token, 109 * -1 110 */ nextToken(int position)111 private int nextToken(int position) { 112 position = PatternProps.skipWhiteSpace(line, position); 113 if (position == line.length()) { 114 return -1; 115 } 116 int startpos = position; 117 char c = line.charAt(position++); 118 char quote = 0; 119 switch (c) { 120 case '"': 121 case '\'': 122 quote = c; 123 break; 124 case '#': 125 return -1; 126 default: 127 buf.append(c); 128 break; 129 } 130 while (position < line.length()) { 131 c = line.charAt(position); // 16-bit ok 132 if (c == '\\') { 133 int cpAndLength = Utility.unescapeAndLengthAt(line, position + 1); 134 if (cpAndLength < 0) { 135 throw new RuntimeException("Invalid escape at " + 136 reader.describePosition() + ':' + 137 position); 138 } 139 UTF16.append(buf, Utility.cpFromCodePointAndLength(cpAndLength)); 140 position += 1 + Utility.lengthFromCodePointAndLength(cpAndLength); 141 } else if ((quote != 0 && c == quote) || 142 (quote == 0 && PatternProps.isWhiteSpace(c))) { 143 return ++position; 144 } else if (quote == 0 && c == '#') { 145 return position; // do NOT increment 146 } else { 147 buf.append(c); 148 ++position; 149 } 150 } 151 if (quote != 0) { 152 throw new RuntimeException("Unterminated quote at " + 153 reader.describePosition() + ':' + 154 startpos); 155 } 156 return position; 157 } 158 } 159