1 /** 2 * Copyright (c) 2008, SnakeYAML 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except 5 * in compliance with the License. You may obtain a copy of the License at 6 * 7 * http://www.apache.org/licenses/LICENSE-2.0 8 * 9 * Unless required by applicable law or agreed to in writing, software distributed under the License 10 * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express 11 * or implied. See the License for the specific language governing permissions and limitations under 12 * the License. 13 */ 14 package org.pyyaml; 15 16 import java.util.ArrayList; 17 import java.util.List; 18 import java.util.Map; 19 import org.yaml.snakeyaml.error.Mark; 20 import org.yaml.snakeyaml.nodes.Tag; 21 import org.yaml.snakeyaml.scanner.Scanner; 22 import org.yaml.snakeyaml.scanner.ScannerImpl; 23 import org.yaml.snakeyaml.tokens.AliasToken; 24 import org.yaml.snakeyaml.tokens.AnchorToken; 25 import org.yaml.snakeyaml.tokens.DirectiveToken; 26 import org.yaml.snakeyaml.tokens.DocumentStartToken; 27 import org.yaml.snakeyaml.tokens.FlowEntryToken; 28 import org.yaml.snakeyaml.tokens.FlowMappingEndToken; 29 import org.yaml.snakeyaml.tokens.FlowMappingStartToken; 30 import org.yaml.snakeyaml.tokens.FlowSequenceEndToken; 31 import org.yaml.snakeyaml.tokens.FlowSequenceStartToken; 32 import org.yaml.snakeyaml.tokens.KeyToken; 33 import org.yaml.snakeyaml.tokens.ScalarToken; 34 import org.yaml.snakeyaml.tokens.StreamEndToken; 35 import org.yaml.snakeyaml.tokens.StreamStartToken; 36 import org.yaml.snakeyaml.tokens.TagToken; 37 import org.yaml.snakeyaml.tokens.TagTuple; 38 import org.yaml.snakeyaml.tokens.Token; 39 import org.yaml.snakeyaml.tokens.ValueToken; 40 41 public class CanonicalScanner implements Scanner { 42 43 private static final String DIRECTIVE = "%YAML 1.1"; 44 private final static Map<Character, Integer> QUOTE_CODES = ScannerImpl.ESCAPE_CODES; 45 46 private final static Map<Character, String> QUOTE_REPLACES = ScannerImpl.ESCAPE_REPLACEMENTS; 47 48 private final String data; 49 private int index; 50 public ArrayList<Token> tokens; 51 private boolean scanned; 52 private final Mark mark; 53 CanonicalScanner(String data)54 public CanonicalScanner(String data) { 55 this.data = data + "\0"; 56 this.index = 0; 57 this.tokens = new ArrayList<Token>(); 58 this.scanned = false; 59 this.mark = new Mark("test", 0, 0, 0, data.toCharArray(), 0); 60 } 61 checkToken(Token.ID... choices)62 public boolean checkToken(Token.ID... choices) { 63 if (!scanned) { 64 scan(); 65 } 66 if (!tokens.isEmpty()) { 67 if (choices.length == 0) { 68 return true; 69 } 70 Token first = this.tokens.get(0); 71 for (Token.ID choice : choices) { 72 if (first.getTokenId() == choice) { 73 return true; 74 } 75 } 76 } 77 return false; 78 } 79 peekToken()80 public Token peekToken() { 81 if (!scanned) { 82 scan(); 83 } 84 if (!tokens.isEmpty()) { 85 return this.tokens.get(0); 86 } 87 return null; 88 } 89 getToken()90 public Token getToken() { 91 if (!scanned) { 92 scan(); 93 } 94 return this.tokens.remove(0); 95 } 96 getToken(Token.ID choice)97 public Token getToken(Token.ID choice) { 98 Token token = getToken(); 99 if (choice != null && token.getTokenId() != choice) { 100 throw new CanonicalException("unexpected token " + token); 101 } 102 return token; 103 } 104 scan()105 private void scan() { 106 this.tokens.add(new StreamStartToken(mark, mark)); 107 boolean stop = false; 108 while (!stop) { 109 findToken(); 110 int c = data.codePointAt(index); 111 switch (c) { 112 case '\0': 113 tokens.add(new StreamEndToken(mark, mark)); 114 stop = true; 115 break; 116 117 case '%': 118 tokens.add(scanDirective()); 119 break; 120 121 case '-': 122 if ("---".equals(data.substring(index, index + 3))) { 123 index += 3; 124 tokens.add(new DocumentStartToken(mark, mark)); 125 } 126 break; 127 128 case '[': 129 index++; 130 tokens.add(new FlowSequenceStartToken(mark, mark)); 131 break; 132 133 case '{': 134 index++; 135 tokens.add(new FlowMappingStartToken(mark, mark)); 136 break; 137 138 case ']': 139 index++; 140 tokens.add(new FlowSequenceEndToken(mark, mark)); 141 break; 142 143 case '}': 144 index++; 145 tokens.add(new FlowMappingEndToken(mark, mark)); 146 break; 147 148 case '?': 149 index++; 150 tokens.add(new KeyToken(mark, mark)); 151 break; 152 153 case ':': 154 index++; 155 tokens.add(new ValueToken(mark, mark)); 156 break; 157 158 case ',': 159 index++; 160 tokens.add(new FlowEntryToken(mark, mark)); 161 break; 162 163 case '*': 164 tokens.add(scanAlias()); 165 break; 166 167 case '&': 168 tokens.add(scanAlias()); 169 break; 170 171 case '!': 172 tokens.add(scanTag()); 173 break; 174 175 case '"': 176 tokens.add(scanScalar()); 177 break; 178 179 default: 180 throw new CanonicalException("invalid token"); 181 } 182 } 183 scanned = true; 184 } 185 scanDirective()186 private Token scanDirective() { 187 String chunk1 = data.substring(index, index + DIRECTIVE.length()); 188 char chunk2 = data.charAt(index + DIRECTIVE.length()); 189 if (DIRECTIVE.equals(chunk1) && "\n\0".indexOf(chunk2) != -1) { 190 index += DIRECTIVE.length(); 191 List<Integer> implicit = new ArrayList<Integer>(2); 192 implicit.add(1); 193 implicit.add(1); 194 return new DirectiveToken<Integer>("YAML", implicit, mark, mark); 195 } else { 196 throw new CanonicalException("invalid directive"); 197 } 198 } 199 scanAlias()200 private Token scanAlias() { 201 boolean isTokenClassAlias; 202 final int c = data.codePointAt(index); 203 isTokenClassAlias = c == '*'; 204 index += Character.charCount(c); 205 int start = index; 206 while (", \n\0".indexOf(data.charAt(index)) == -1) { 207 index++; 208 } 209 String value = data.substring(start, index); 210 Token token; 211 if (isTokenClassAlias) { 212 token = new AliasToken(value, mark, mark); 213 } else { 214 token = new AnchorToken(value, mark, mark); 215 } 216 return token; 217 } 218 scanTag()219 private Token scanTag() { 220 index += Character.charCount(data.codePointAt(index)); 221 int start = index; 222 while (" \n\0".indexOf(data.charAt(index)) == -1) { 223 index++; 224 } 225 String value = data.substring(start, index); 226 if (value.length() == 0) { 227 value = "!"; 228 } else if (value.charAt(0) == '!') { 229 value = Tag.PREFIX + value.substring(1); 230 } else if (value.charAt(0) == '<' && value.charAt(value.length() - 1) == '>') { 231 value = value.substring(1, value.length() - 1); 232 } else { 233 value = "!" + value; 234 } 235 return new TagToken(new TagTuple("", value), mark, mark); 236 } 237 scanScalar()238 private Token scanScalar() { 239 index += Character.charCount(data.codePointAt(index)); 240 StringBuilder chunks = new StringBuilder(); 241 int start = index; 242 boolean ignoreSpaces = false; 243 while (data.charAt(index) != '"') { 244 if (data.charAt(index) == '\\') { 245 ignoreSpaces = false; 246 chunks.append(data, start, index); 247 index += Character.charCount(data.codePointAt(index)); 248 int c = data.codePointAt(index); 249 index += Character.charCount(data.codePointAt(index)); 250 if (c == '\n') { 251 ignoreSpaces = true; 252 } else if (!Character.isSupplementaryCodePoint(c) && QUOTE_CODES.containsKey((char) c)) { 253 int length = QUOTE_CODES.get((char) c); 254 int code = Integer.parseInt(data.substring(index, index + length), 16); 255 chunks.append((char) code); 256 index += length; 257 } else { 258 if (Character.isSupplementaryCodePoint(c) || !QUOTE_REPLACES.containsKey((char) c)) { 259 throw new CanonicalException("invalid escape code"); 260 } 261 chunks.append(QUOTE_REPLACES.get((char) c)); 262 } 263 start = index; 264 } else if (data.charAt(index) == '\n') { 265 chunks.append(data, start, index); 266 chunks.append(" "); 267 index += Character.charCount(data.codePointAt(index)); 268 start = index; 269 ignoreSpaces = true; 270 } else if (ignoreSpaces && data.charAt(index) == ' ') { 271 index += Character.charCount(data.codePointAt(index)); 272 start = index; 273 } else { 274 ignoreSpaces = false; 275 index += Character.charCount(data.codePointAt(index)); 276 } 277 } 278 chunks.append(data, start, index); 279 index += Character.charCount(data.codePointAt(index)); 280 return new ScalarToken(chunks.toString(), mark, mark, false); 281 } 282 findToken()283 private void findToken() { 284 boolean found = false; 285 while (!found) { 286 while (" \t".indexOf(data.charAt(index)) != -1) { 287 index++; 288 } 289 if (data.charAt(index) == '#') { 290 while (data.charAt(index) != '\n') { 291 index++; 292 } 293 } 294 if (data.charAt(index) == '\n') { 295 index++; 296 } else { 297 found = true; 298 } 299 } 300 } 301 } 302