• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**
2  * Copyright (c) 2008, SnakeYAML
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
5  * in compliance with the License. You may obtain a copy of the License at
6  *
7  * http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software distributed under the License
10  * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
11  * or implied. See the License for the specific language governing permissions and limitations under
12  * the License.
13  */
14 package org.pyyaml;
15 
16 import java.util.ArrayList;
17 import java.util.List;
18 import java.util.Map;
19 import org.yaml.snakeyaml.error.Mark;
20 import org.yaml.snakeyaml.nodes.Tag;
21 import org.yaml.snakeyaml.scanner.Scanner;
22 import org.yaml.snakeyaml.scanner.ScannerImpl;
23 import org.yaml.snakeyaml.tokens.AliasToken;
24 import org.yaml.snakeyaml.tokens.AnchorToken;
25 import org.yaml.snakeyaml.tokens.DirectiveToken;
26 import org.yaml.snakeyaml.tokens.DocumentStartToken;
27 import org.yaml.snakeyaml.tokens.FlowEntryToken;
28 import org.yaml.snakeyaml.tokens.FlowMappingEndToken;
29 import org.yaml.snakeyaml.tokens.FlowMappingStartToken;
30 import org.yaml.snakeyaml.tokens.FlowSequenceEndToken;
31 import org.yaml.snakeyaml.tokens.FlowSequenceStartToken;
32 import org.yaml.snakeyaml.tokens.KeyToken;
33 import org.yaml.snakeyaml.tokens.ScalarToken;
34 import org.yaml.snakeyaml.tokens.StreamEndToken;
35 import org.yaml.snakeyaml.tokens.StreamStartToken;
36 import org.yaml.snakeyaml.tokens.TagToken;
37 import org.yaml.snakeyaml.tokens.TagTuple;
38 import org.yaml.snakeyaml.tokens.Token;
39 import org.yaml.snakeyaml.tokens.ValueToken;
40 
41 public class CanonicalScanner implements Scanner {
42 
43   private static final String DIRECTIVE = "%YAML 1.1";
44   private final static Map<Character, Integer> QUOTE_CODES = ScannerImpl.ESCAPE_CODES;
45 
46   private final static Map<Character, String> QUOTE_REPLACES = ScannerImpl.ESCAPE_REPLACEMENTS;
47 
48   private final String data;
49   private int index;
50   public ArrayList<Token> tokens;
51   private boolean scanned;
52   private final Mark mark;
53 
CanonicalScanner(String data)54   public CanonicalScanner(String data) {
55     this.data = data + "\0";
56     this.index = 0;
57     this.tokens = new ArrayList<Token>();
58     this.scanned = false;
59     this.mark = new Mark("test", 0, 0, 0, data.toCharArray(), 0);
60   }
61 
checkToken(Token.ID... choices)62   public boolean checkToken(Token.ID... choices) {
63     if (!scanned) {
64       scan();
65     }
66     if (!tokens.isEmpty()) {
67       if (choices.length == 0) {
68         return true;
69       }
70       Token first = this.tokens.get(0);
71       for (Token.ID choice : choices) {
72         if (first.getTokenId() == choice) {
73           return true;
74         }
75       }
76     }
77     return false;
78   }
79 
peekToken()80   public Token peekToken() {
81     if (!scanned) {
82       scan();
83     }
84     if (!tokens.isEmpty()) {
85       return this.tokens.get(0);
86     }
87     return null;
88   }
89 
getToken()90   public Token getToken() {
91     if (!scanned) {
92       scan();
93     }
94     return this.tokens.remove(0);
95   }
96 
getToken(Token.ID choice)97   public Token getToken(Token.ID choice) {
98     Token token = getToken();
99     if (choice != null && token.getTokenId() != choice) {
100       throw new CanonicalException("unexpected token " + token);
101     }
102     return token;
103   }
104 
scan()105   private void scan() {
106     this.tokens.add(new StreamStartToken(mark, mark));
107     boolean stop = false;
108     while (!stop) {
109       findToken();
110       int c = data.codePointAt(index);
111       switch (c) {
112         case '\0':
113           tokens.add(new StreamEndToken(mark, mark));
114           stop = true;
115           break;
116 
117         case '%':
118           tokens.add(scanDirective());
119           break;
120 
121         case '-':
122           if ("---".equals(data.substring(index, index + 3))) {
123             index += 3;
124             tokens.add(new DocumentStartToken(mark, mark));
125           }
126           break;
127 
128         case '[':
129           index++;
130           tokens.add(new FlowSequenceStartToken(mark, mark));
131           break;
132 
133         case '{':
134           index++;
135           tokens.add(new FlowMappingStartToken(mark, mark));
136           break;
137 
138         case ']':
139           index++;
140           tokens.add(new FlowSequenceEndToken(mark, mark));
141           break;
142 
143         case '}':
144           index++;
145           tokens.add(new FlowMappingEndToken(mark, mark));
146           break;
147 
148         case '?':
149           index++;
150           tokens.add(new KeyToken(mark, mark));
151           break;
152 
153         case ':':
154           index++;
155           tokens.add(new ValueToken(mark, mark));
156           break;
157 
158         case ',':
159           index++;
160           tokens.add(new FlowEntryToken(mark, mark));
161           break;
162 
163         case '*':
164           tokens.add(scanAlias());
165           break;
166 
167         case '&':
168           tokens.add(scanAlias());
169           break;
170 
171         case '!':
172           tokens.add(scanTag());
173           break;
174 
175         case '"':
176           tokens.add(scanScalar());
177           break;
178 
179         default:
180           throw new CanonicalException("invalid token");
181       }
182     }
183     scanned = true;
184   }
185 
scanDirective()186   private Token scanDirective() {
187     String chunk1 = data.substring(index, index + DIRECTIVE.length());
188     char chunk2 = data.charAt(index + DIRECTIVE.length());
189     if (DIRECTIVE.equals(chunk1) && "\n\0".indexOf(chunk2) != -1) {
190       index += DIRECTIVE.length();
191       List<Integer> implicit = new ArrayList<Integer>(2);
192       implicit.add(1);
193       implicit.add(1);
194       return new DirectiveToken<Integer>("YAML", implicit, mark, mark);
195     } else {
196       throw new CanonicalException("invalid directive");
197     }
198   }
199 
scanAlias()200   private Token scanAlias() {
201     boolean isTokenClassAlias;
202     final int c = data.codePointAt(index);
203     isTokenClassAlias = c == '*';
204     index += Character.charCount(c);
205     int start = index;
206     while (", \n\0".indexOf(data.charAt(index)) == -1) {
207       index++;
208     }
209     String value = data.substring(start, index);
210     Token token;
211     if (isTokenClassAlias) {
212       token = new AliasToken(value, mark, mark);
213     } else {
214       token = new AnchorToken(value, mark, mark);
215     }
216     return token;
217   }
218 
scanTag()219   private Token scanTag() {
220     index += Character.charCount(data.codePointAt(index));
221     int start = index;
222     while (" \n\0".indexOf(data.charAt(index)) == -1) {
223       index++;
224     }
225     String value = data.substring(start, index);
226     if (value.length() == 0) {
227       value = "!";
228     } else if (value.charAt(0) == '!') {
229       value = Tag.PREFIX + value.substring(1);
230     } else if (value.charAt(0) == '<' && value.charAt(value.length() - 1) == '>') {
231       value = value.substring(1, value.length() - 1);
232     } else {
233       value = "!" + value;
234     }
235     return new TagToken(new TagTuple("", value), mark, mark);
236   }
237 
scanScalar()238   private Token scanScalar() {
239     index += Character.charCount(data.codePointAt(index));
240     StringBuilder chunks = new StringBuilder();
241     int start = index;
242     boolean ignoreSpaces = false;
243     while (data.charAt(index) != '"') {
244       if (data.charAt(index) == '\\') {
245         ignoreSpaces = false;
246         chunks.append(data, start, index);
247         index += Character.charCount(data.codePointAt(index));
248         int c = data.codePointAt(index);
249         index += Character.charCount(data.codePointAt(index));
250         if (c == '\n') {
251           ignoreSpaces = true;
252         } else if (!Character.isSupplementaryCodePoint(c) && QUOTE_CODES.containsKey((char) c)) {
253           int length = QUOTE_CODES.get((char) c);
254           int code = Integer.parseInt(data.substring(index, index + length), 16);
255           chunks.append((char) code);
256           index += length;
257         } else {
258           if (Character.isSupplementaryCodePoint(c) || !QUOTE_REPLACES.containsKey((char) c)) {
259             throw new CanonicalException("invalid escape code");
260           }
261           chunks.append(QUOTE_REPLACES.get((char) c));
262         }
263         start = index;
264       } else if (data.charAt(index) == '\n') {
265         chunks.append(data, start, index);
266         chunks.append(" ");
267         index += Character.charCount(data.codePointAt(index));
268         start = index;
269         ignoreSpaces = true;
270       } else if (ignoreSpaces && data.charAt(index) == ' ') {
271         index += Character.charCount(data.codePointAt(index));
272         start = index;
273       } else {
274         ignoreSpaces = false;
275         index += Character.charCount(data.codePointAt(index));
276       }
277     }
278     chunks.append(data, start, index);
279     index += Character.charCount(data.codePointAt(index));
280     return new ScalarToken(chunks.toString(), mark, mark, false);
281   }
282 
findToken()283   private void findToken() {
284     boolean found = false;
285     while (!found) {
286       while (" \t".indexOf(data.charAt(index)) != -1) {
287         index++;
288       }
289       if (data.charAt(index) == '#') {
290         while (data.charAt(index) != '\n') {
291           index++;
292         }
293       }
294       if (data.charAt(index) == '\n') {
295         index++;
296       } else {
297         found = true;
298       }
299     }
300   }
301 }
302