1 /** 2 * Copyright (c) 2008, http://www.snakeyaml.org 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 package org.pyyaml; 17 18 import java.util.ArrayList; 19 import java.util.List; 20 import java.util.Map; 21 22 import org.yaml.snakeyaml.error.Mark; 23 import org.yaml.snakeyaml.nodes.Tag; 24 import org.yaml.snakeyaml.scanner.Scanner; 25 import org.yaml.snakeyaml.scanner.ScannerImpl; 26 import org.yaml.snakeyaml.tokens.AliasToken; 27 import org.yaml.snakeyaml.tokens.AnchorToken; 28 import org.yaml.snakeyaml.tokens.DirectiveToken; 29 import org.yaml.snakeyaml.tokens.DocumentStartToken; 30 import org.yaml.snakeyaml.tokens.FlowEntryToken; 31 import org.yaml.snakeyaml.tokens.FlowMappingEndToken; 32 import org.yaml.snakeyaml.tokens.FlowMappingStartToken; 33 import org.yaml.snakeyaml.tokens.FlowSequenceEndToken; 34 import org.yaml.snakeyaml.tokens.FlowSequenceStartToken; 35 import org.yaml.snakeyaml.tokens.KeyToken; 36 import org.yaml.snakeyaml.tokens.ScalarToken; 37 import org.yaml.snakeyaml.tokens.StreamEndToken; 38 import org.yaml.snakeyaml.tokens.StreamStartToken; 39 import org.yaml.snakeyaml.tokens.TagToken; 40 import org.yaml.snakeyaml.tokens.TagTuple; 41 import org.yaml.snakeyaml.tokens.Token; 42 import org.yaml.snakeyaml.tokens.ValueToken; 43 44 public class CanonicalScanner implements Scanner { 45 private static final String DIRECTIVE = "%YAML 1.1"; 46 private final static Map<Character, Integer> QUOTE_CODES = ScannerImpl.ESCAPE_CODES; 47 48 private final static Map<Character, String> QUOTE_REPLACES = ScannerImpl.ESCAPE_REPLACEMENTS; 49 50 private String data; 51 private int index; 52 public ArrayList<Token> tokens; 53 private boolean scanned; 54 private Mark mark; 55 CanonicalScanner(String data)56 public CanonicalScanner(String data) { 57 this.data = data + "\0"; 58 this.index = 0; 59 this.tokens = new ArrayList<Token>(); 60 this.scanned = false; 61 this.mark = new Mark("test", 0, 0, 0, data, 0); 62 } 63 checkToken(Token.ID... choices)64 public boolean checkToken(Token.ID... choices) { 65 if (!scanned) { 66 scan(); 67 } 68 if (!tokens.isEmpty()) { 69 if (choices.length == 0) { 70 return true; 71 } 72 Token first = this.tokens.get(0); 73 for (Token.ID choice : choices) { 74 if (first.getTokenId() == choice) { 75 return true; 76 } 77 } 78 } 79 return false; 80 } 81 peekToken()82 public Token peekToken() { 83 if (!scanned) { 84 scan(); 85 } 86 if (!tokens.isEmpty()) { 87 return this.tokens.get(0); 88 } 89 return null; 90 } 91 getToken()92 public Token getToken() { 93 if (!scanned) { 94 scan(); 95 } 96 return this.tokens.remove(0); 97 } 98 getToken(Token.ID choice)99 public Token getToken(Token.ID choice) { 100 Token token = getToken(); 101 if (choice != null && token.getTokenId() != choice) { 102 throw new CanonicalException("unexpected token " + token); 103 } 104 return token; 105 } 106 scan()107 private void scan() { 108 this.tokens.add(new StreamStartToken(mark, mark)); 109 boolean stop = false; 110 while (!stop) { 111 findToken(); 112 char ch = data.charAt(index); 113 switch (ch) { 114 case '\0': 115 tokens.add(new StreamEndToken(mark, mark)); 116 stop = true; 117 break; 118 119 case '%': 120 tokens.add(scanDirective()); 121 break; 122 123 case '-': 124 if ("---".equals(data.substring(index, index + 3))) { 125 index += 3; 126 tokens.add(new DocumentStartToken(mark, mark)); 127 } 128 break; 129 130 case '[': 131 index++; 132 tokens.add(new FlowSequenceStartToken(mark, mark)); 133 break; 134 135 case '{': 136 index++; 137 tokens.add(new FlowMappingStartToken(mark, mark)); 138 break; 139 140 case ']': 141 index++; 142 tokens.add(new FlowSequenceEndToken(mark, mark)); 143 break; 144 145 case '}': 146 index++; 147 tokens.add(new FlowMappingEndToken(mark, mark)); 148 break; 149 150 case '?': 151 index++; 152 tokens.add(new KeyToken(mark, mark)); 153 break; 154 155 case ':': 156 index++; 157 tokens.add(new ValueToken(mark, mark)); 158 break; 159 160 case ',': 161 index++; 162 tokens.add(new FlowEntryToken(mark, mark)); 163 break; 164 165 case '*': 166 tokens.add(scanAlias()); 167 break; 168 169 case '&': 170 tokens.add(scanAlias()); 171 break; 172 173 case '!': 174 tokens.add(scanTag()); 175 break; 176 177 case '"': 178 tokens.add(scanScalar()); 179 break; 180 181 default: 182 throw new CanonicalException("invalid token"); 183 } 184 } 185 scanned = true; 186 } 187 scanDirective()188 private Token scanDirective() { 189 String chunk1 = data.substring(index, index + DIRECTIVE.length()); 190 char chunk2 = data.charAt(index + DIRECTIVE.length()); 191 if (DIRECTIVE.equals(chunk1) && "\n\0".indexOf(chunk2) != -1) { 192 index += DIRECTIVE.length(); 193 List<Integer> implicit = new ArrayList<Integer>(2); 194 implicit.add(new Integer(1)); 195 implicit.add(new Integer(1)); 196 return new DirectiveToken<Integer>("YAML", implicit, mark, mark); 197 } else { 198 throw new CanonicalException("invalid directive"); 199 } 200 } 201 scanAlias()202 private Token scanAlias() { 203 boolean isTokenClassAlias; 204 if (data.charAt(index) == '*') { 205 isTokenClassAlias = true; 206 } else { 207 isTokenClassAlias = false; 208 } 209 index++; 210 int start = index; 211 while (", \n\0".indexOf(data.charAt(index)) == -1) { 212 index++; 213 } 214 String value = data.substring(start, index); 215 Token token; 216 if (isTokenClassAlias) { 217 token = new AliasToken(value, mark, mark); 218 } else { 219 token = new AnchorToken(value, mark, mark); 220 } 221 return token; 222 } 223 scanTag()224 private Token scanTag() { 225 index++; 226 int start = index; 227 while (" \n\0".indexOf(data.charAt(index)) == -1) { 228 index++; 229 } 230 String value = data.substring(start, index); 231 if (value.length() == 0) { 232 value = "!"; 233 } else if (value.charAt(0) == '!') { 234 value = Tag.PREFIX + value.substring(1); 235 } else if (value.charAt(0) == '<' && value.charAt(value.length() - 1) == '>') { 236 value = value.substring(1, value.length() - 1); 237 } else { 238 value = "!" + value; 239 } 240 return new TagToken(new TagTuple("", value), mark, mark); 241 } 242 scanScalar()243 private Token scanScalar() { 244 index++; 245 StringBuilder chunks = new StringBuilder(); 246 int start = index; 247 boolean ignoreSpaces = false; 248 while (data.charAt(index) != '"') { 249 if (data.charAt(index) == '\\') { 250 ignoreSpaces = false; 251 chunks.append(data.substring(start, index)); 252 index++; 253 char ch = data.charAt(index); 254 index++; 255 if (ch == '\n') { 256 ignoreSpaces = true; 257 } else if (QUOTE_CODES.keySet().contains(ch)) { 258 int length = QUOTE_CODES.get(ch); 259 int code = Integer.parseInt(data.substring(index, index + length), 16); 260 chunks.append(String.valueOf((char) code)); 261 index += length; 262 } else { 263 if (!QUOTE_REPLACES.keySet().contains(ch)) { 264 throw new CanonicalException("invalid escape code"); 265 } 266 chunks.append(QUOTE_REPLACES.get(ch)); 267 } 268 start = index; 269 } else if (data.charAt(index) == '\n') { 270 chunks.append(data.substring(start, index)); 271 chunks.append(" "); 272 index++; 273 start = index; 274 ignoreSpaces = true; 275 } else if (ignoreSpaces && data.charAt(index) == ' ') { 276 index++; 277 start = index; 278 } else { 279 ignoreSpaces = false; 280 index++; 281 } 282 } 283 chunks.append(data.substring(start, index)); 284 index++; 285 return new ScalarToken(chunks.toString(), mark, mark, false); 286 } 287 findToken()288 private void findToken() { 289 boolean found = false; 290 while (!found) { 291 while (" \t".indexOf(data.charAt(index)) != -1) { 292 index++; 293 } 294 if (data.charAt(index) == '#') { 295 while (data.charAt(index) != '\n') { 296 index++; 297 } 298 } 299 if (data.charAt(index) == '\n') { 300 index++; 301 } else { 302 found = true; 303 } 304 } 305 } 306 } 307