1 // Copyright (c) 2003-2004 Brian Wellington (bwelling@xbill.org)
2 //
3 // Copyright (C) 2003-2004 Nominum, Inc.
4 //
5 // Permission to use, copy, modify, and distribute this software for any
6 // purpose with or without fee is hereby granted, provided that the above
7 // copyright notice and this permission notice appear in all copies.
8 //
9 // THE SOFTWARE IS PROVIDED "AS IS" AND NOMINUM DISCLAIMS ALL WARRANTIES
10 // WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11 // MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL NOMINUM BE LIABLE FOR ANY
12 // SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13 // WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14 // ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
15 // OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16 //
17
18 package org.xbill.DNS;
19
20 import java.io.*;
21 import java.net.*;
22
23 import org.xbill.DNS.utils.*;
24
25 /**
26 * Tokenizer is used to parse DNS records and zones from text format,
27 *
28 * @author Brian Wellington
29 * @author Bob Halley
30 */
31
32 public class Tokenizer {
33
34 private static String delim = " \t\n;()\"";
35 private static String quotes = "\"";
36
37 /** End of file */
38 public static final int EOF = 0;
39
40 /** End of line */
41 public static final int EOL = 1;
42
43 /** Whitespace; only returned when wantWhitespace is set */
44 public static final int WHITESPACE = 2;
45
46 /** An identifier (unquoted string) */
47 public static final int IDENTIFIER = 3;
48
49 /** A quoted string */
50 public static final int QUOTED_STRING = 4;
51
52 /** A comment; only returned when wantComment is set */
53 public static final int COMMENT = 5;
54
55 private PushbackInputStream is;
56 private boolean ungottenToken;
57 private int multiline;
58 private boolean quoting;
59 private String delimiters;
60 private Token current;
61 private StringBuffer sb;
62 private boolean wantClose;
63
64 private String filename;
65 private int line;
66
67 public static class Token {
68 /** The type of token. */
69 public int type;
70
71 /** The value of the token, or null for tokens without values. */
72 public String value;
73
74 private
Token()75 Token() {
76 type = -1;
77 value = null;
78 }
79
80 private Token
set(int type, StringBuffer value)81 set(int type, StringBuffer value) {
82 if (type < 0)
83 throw new IllegalArgumentException();
84 this.type = type;
85 this.value = value == null ? null : value.toString();
86 return this;
87 }
88
89 /**
90 * Converts the token to a string containing a representation useful
91 * for debugging.
92 */
93 public String
toString()94 toString() {
95 switch (type) {
96 case EOF:
97 return "<eof>";
98 case EOL:
99 return "<eol>";
100 case WHITESPACE:
101 return "<whitespace>";
102 case IDENTIFIER:
103 return "<identifier: " + value + ">";
104 case QUOTED_STRING:
105 return "<quoted_string: " + value + ">";
106 case COMMENT:
107 return "<comment: " + value + ">";
108 default:
109 return "<unknown>";
110 }
111 }
112
113 /** Indicates whether this token contains a string. */
114 public boolean
isString()115 isString() {
116 return (type == IDENTIFIER || type == QUOTED_STRING);
117 }
118
119 /** Indicates whether this token contains an EOL or EOF. */
120 public boolean
isEOL()121 isEOL() {
122 return (type == EOL || type == EOF);
123 }
124 }
125
126 static class TokenizerException extends TextParseException {
127 String message;
128
129 public
TokenizerException(String filename, int line, String message)130 TokenizerException(String filename, int line, String message) {
131 super(filename + ":" + line + ": " + message);
132 this.message = message;
133 }
134
135 public String
getBaseMessage()136 getBaseMessage() {
137 return message;
138 }
139 }
140
141 /**
142 * Creates a Tokenizer from an arbitrary input stream.
143 * @param is The InputStream to tokenize.
144 */
145 public
Tokenizer(InputStream is)146 Tokenizer(InputStream is) {
147 if (!(is instanceof BufferedInputStream))
148 is = new BufferedInputStream(is);
149 this.is = new PushbackInputStream(is, 2);
150 ungottenToken = false;
151 multiline = 0;
152 quoting = false;
153 delimiters = delim;
154 current = new Token();
155 sb = new StringBuffer();
156 filename = "<none>";
157 line = 1;
158 }
159
160 /**
161 * Creates a Tokenizer from a string.
162 * @param s The String to tokenize.
163 */
164 public
Tokenizer(String s)165 Tokenizer(String s) {
166 this(new ByteArrayInputStream(s.getBytes()));
167 }
168
169 /**
170 * Creates a Tokenizer from a file.
171 * @param f The File to tokenize.
172 */
173 public
Tokenizer(File f)174 Tokenizer(File f) throws FileNotFoundException {
175 this(new FileInputStream(f));
176 wantClose = true;
177 filename = f.getName();
178 }
179
180 private int
getChar()181 getChar() throws IOException {
182 int c = is.read();
183 if (c == '\r') {
184 int next = is.read();
185 if (next != '\n')
186 is.unread(next);
187 c = '\n';
188 }
189 if (c == '\n')
190 line++;
191 return c;
192 }
193
194 private void
ungetChar(int c)195 ungetChar(int c) throws IOException {
196 if (c == -1)
197 return;
198 is.unread(c);
199 if (c == '\n')
200 line--;
201 }
202
203 private int
skipWhitespace()204 skipWhitespace() throws IOException {
205 int skipped = 0;
206 while (true) {
207 int c = getChar();
208 if (c != ' ' && c != '\t') {
209 if (!(c == '\n' && multiline > 0)) {
210 ungetChar(c);
211 return skipped;
212 }
213 }
214 skipped++;
215 }
216 }
217
218 private void
checkUnbalancedParens()219 checkUnbalancedParens() throws TextParseException {
220 if (multiline > 0)
221 throw exception("unbalanced parentheses");
222 }
223
224 /**
225 * Gets the next token from a tokenizer.
226 * @param wantWhitespace If true, leading whitespace will be returned as a
227 * token.
228 * @param wantComment If true, comments are returned as tokens.
229 * @return The next token in the stream.
230 * @throws TextParseException The input was invalid.
231 * @throws IOException An I/O error occurred.
232 */
233 public Token
get(boolean wantWhitespace, boolean wantComment)234 get(boolean wantWhitespace, boolean wantComment) throws IOException {
235 int type;
236 int c;
237
238 if (ungottenToken) {
239 ungottenToken = false;
240 if (current.type == WHITESPACE) {
241 if (wantWhitespace)
242 return current;
243 } else if (current.type == COMMENT) {
244 if (wantComment)
245 return current;
246 } else {
247 if (current.type == EOL)
248 line++;
249 return current;
250 }
251 }
252 int skipped = skipWhitespace();
253 if (skipped > 0 && wantWhitespace)
254 return current.set(WHITESPACE, null);
255 type = IDENTIFIER;
256 sb.setLength(0);
257 while (true) {
258 c = getChar();
259 if (c == -1 || delimiters.indexOf(c) != -1) {
260 if (c == -1) {
261 if (quoting)
262 throw exception("EOF in " +
263 "quoted string");
264 else if (sb.length() == 0)
265 return current.set(EOF, null);
266 else
267 return current.set(type, sb);
268 }
269 if (sb.length() == 0 && type != QUOTED_STRING) {
270 if (c == '(') {
271 multiline++;
272 skipWhitespace();
273 continue;
274 } else if (c == ')') {
275 if (multiline <= 0)
276 throw exception("invalid " +
277 "close " +
278 "parenthesis");
279 multiline--;
280 skipWhitespace();
281 continue;
282 } else if (c == '"') {
283 if (!quoting) {
284 quoting = true;
285 delimiters = quotes;
286 type = QUOTED_STRING;
287 } else {
288 quoting = false;
289 delimiters = delim;
290 skipWhitespace();
291 }
292 continue;
293 } else if (c == '\n') {
294 return current.set(EOL, null);
295 } else if (c == ';') {
296 while (true) {
297 c = getChar();
298 if (c == '\n' || c == -1)
299 break;
300 sb.append((char)c);
301 }
302 if (wantComment) {
303 ungetChar(c);
304 return current.set(COMMENT, sb);
305 } else if (c == -1 &&
306 type != QUOTED_STRING)
307 {
308 checkUnbalancedParens();
309 return current.set(EOF, null);
310 } else if (multiline > 0) {
311 skipWhitespace();
312 sb.setLength(0);
313 continue;
314 } else
315 return current.set(EOL, null);
316 } else
317 throw new IllegalStateException();
318 } else
319 ungetChar(c);
320 break;
321 } else if (c == '\\') {
322 c = getChar();
323 if (c == -1)
324 throw exception("unterminated escape sequence");
325 sb.append('\\');
326 } else if (quoting && c == '\n') {
327 throw exception("newline in quoted string");
328 }
329 sb.append((char)c);
330 }
331 if (sb.length() == 0 && type != QUOTED_STRING) {
332 checkUnbalancedParens();
333 return current.set(EOF, null);
334 }
335 return current.set(type, sb);
336 }
337
338 /**
339 * Gets the next token from a tokenizer, ignoring whitespace and comments.
340 * @return The next token in the stream.
341 * @throws TextParseException The input was invalid.
342 * @throws IOException An I/O error occurred.
343 */
344 public Token
get()345 get() throws IOException {
346 return get(false, false);
347 }
348
349 /**
350 * Returns a token to the stream, so that it will be returned by the next call
351 * to get().
352 * @throws IllegalStateException There are already ungotten tokens.
353 */
354 public void
unget()355 unget() {
356 if (ungottenToken)
357 throw new IllegalStateException
358 ("Cannot unget multiple tokens");
359 if (current.type == EOL)
360 line--;
361 ungottenToken = true;
362 }
363
364 /**
365 * Gets the next token from a tokenizer and converts it to a string.
366 * @return The next token in the stream, as a string.
367 * @throws TextParseException The input was invalid or not a string.
368 * @throws IOException An I/O error occurred.
369 */
370 public String
getString()371 getString() throws IOException {
372 Token next = get();
373 if (!next.isString()) {
374 throw exception("expected a string");
375 }
376 return next.value;
377 }
378
379 private String
_getIdentifier(String expected)380 _getIdentifier(String expected) throws IOException {
381 Token next = get();
382 if (next.type != IDENTIFIER)
383 throw exception("expected " + expected);
384 return next.value;
385 }
386
387 /**
388 * Gets the next token from a tokenizer, ensures it is an unquoted string,
389 * and converts it to a string.
390 * @return The next token in the stream, as a string.
391 * @throws TextParseException The input was invalid or not an unquoted string.
392 * @throws IOException An I/O error occurred.
393 */
394 public String
getIdentifier()395 getIdentifier() throws IOException {
396 return _getIdentifier("an identifier");
397 }
398
399 /**
400 * Gets the next token from a tokenizer and converts it to a long.
401 * @return The next token in the stream, as a long.
402 * @throws TextParseException The input was invalid or not a long.
403 * @throws IOException An I/O error occurred.
404 */
405 public long
getLong()406 getLong() throws IOException {
407 String next = _getIdentifier("an integer");
408 if (!Character.isDigit(next.charAt(0)))
409 throw exception("expected an integer");
410 try {
411 return Long.parseLong(next);
412 } catch (NumberFormatException e) {
413 throw exception("expected an integer");
414 }
415 }
416
417 /**
418 * Gets the next token from a tokenizer and converts it to an unsigned 32 bit
419 * integer.
420 * @return The next token in the stream, as an unsigned 32 bit integer.
421 * @throws TextParseException The input was invalid or not an unsigned 32
422 * bit integer.
423 * @throws IOException An I/O error occurred.
424 */
425 public long
getUInt32()426 getUInt32() throws IOException {
427 long l = getLong();
428 if (l < 0 || l > 0xFFFFFFFFL)
429 throw exception("expected an 32 bit unsigned integer");
430 return l;
431 }
432
433 /**
434 * Gets the next token from a tokenizer and converts it to an unsigned 16 bit
435 * integer.
436 * @return The next token in the stream, as an unsigned 16 bit integer.
437 * @throws TextParseException The input was invalid or not an unsigned 16
438 * bit integer.
439 * @throws IOException An I/O error occurred.
440 */
441 public int
getUInt16()442 getUInt16() throws IOException {
443 long l = getLong();
444 if (l < 0 || l > 0xFFFFL)
445 throw exception("expected an 16 bit unsigned integer");
446 return (int) l;
447 }
448
449 /**
450 * Gets the next token from a tokenizer and converts it to an unsigned 8 bit
451 * integer.
452 * @return The next token in the stream, as an unsigned 8 bit integer.
453 * @throws TextParseException The input was invalid or not an unsigned 8
454 * bit integer.
455 * @throws IOException An I/O error occurred.
456 */
457 public int
getUInt8()458 getUInt8() throws IOException {
459 long l = getLong();
460 if (l < 0 || l > 0xFFL)
461 throw exception("expected an 8 bit unsigned integer");
462 return (int) l;
463 }
464
465 /**
466 * Gets the next token from a tokenizer and parses it as a TTL.
467 * @return The next token in the stream, as an unsigned 32 bit integer.
468 * @throws TextParseException The input was not valid.
469 * @throws IOException An I/O error occurred.
470 * @see TTL
471 */
472 public long
getTTL()473 getTTL() throws IOException {
474 String next = _getIdentifier("a TTL value");
475 try {
476 return TTL.parseTTL(next);
477 }
478 catch (NumberFormatException e) {
479 throw exception("expected a TTL value");
480 }
481 }
482
483 /**
484 * Gets the next token from a tokenizer and parses it as if it were a TTL.
485 * @return The next token in the stream, as an unsigned 32 bit integer.
486 * @throws TextParseException The input was not valid.
487 * @throws IOException An I/O error occurred.
488 * @see TTL
489 */
490 public long
getTTLLike()491 getTTLLike() throws IOException {
492 String next = _getIdentifier("a TTL-like value");
493 try {
494 return TTL.parse(next, false);
495 }
496 catch (NumberFormatException e) {
497 throw exception("expected a TTL-like value");
498 }
499 }
500
501 /**
502 * Gets the next token from a tokenizer and converts it to a name.
503 * @param origin The origin to append to relative names.
504 * @return The next token in the stream, as a name.
505 * @throws TextParseException The input was invalid or not a valid name.
506 * @throws IOException An I/O error occurred.
507 * @throws RelativeNameException The parsed name was relative, even with the
508 * origin.
509 * @see Name
510 */
511 public Name
getName(Name origin)512 getName(Name origin) throws IOException {
513 String next = _getIdentifier("a name");
514 try {
515 Name name = Name.fromString(next, origin);
516 if (!name.isAbsolute())
517 throw new RelativeNameException(name);
518 return name;
519 }
520 catch (TextParseException e) {
521 throw exception(e.getMessage());
522 }
523 }
524
525 /**
526 * Gets the next token from a tokenizer and converts it to an IP Address.
527 * @param family The address family.
528 * @return The next token in the stream, as an InetAddress
529 * @throws TextParseException The input was invalid or not a valid address.
530 * @throws IOException An I/O error occurred.
531 * @see Address
532 */
533 public InetAddress
getAddress(int family)534 getAddress(int family) throws IOException {
535 String next = _getIdentifier("an address");
536 try {
537 return Address.getByAddress(next, family);
538 }
539 catch (UnknownHostException e) {
540 throw exception(e.getMessage());
541 }
542 }
543
544 /**
545 * Gets the next token from a tokenizer, which must be an EOL or EOF.
546 * @throws TextParseException The input was invalid or not an EOL or EOF token.
547 * @throws IOException An I/O error occurred.
548 */
549 public void
getEOL()550 getEOL() throws IOException {
551 Token next = get();
552 if (next.type != EOL && next.type != EOF) {
553 throw exception("expected EOL or EOF");
554 }
555 }
556
557 /**
558 * Returns a concatenation of the remaining strings from a Tokenizer.
559 */
560 private String
remainingStrings()561 remainingStrings() throws IOException {
562 StringBuffer buffer = null;
563 while (true) {
564 Tokenizer.Token t = get();
565 if (!t.isString())
566 break;
567 if (buffer == null)
568 buffer = new StringBuffer();
569 buffer.append(t.value);
570 }
571 unget();
572 if (buffer == null)
573 return null;
574 return buffer.toString();
575 }
576
577 /**
578 * Gets the remaining string tokens until an EOL/EOF is seen, concatenates
579 * them together, and converts the base64 encoded data to a byte array.
580 * @param required If true, an exception will be thrown if no strings remain;
581 * otherwise null be be returned.
582 * @return The byte array containing the decoded strings, or null if there
583 * were no strings to decode.
584 * @throws TextParseException The input was invalid.
585 * @throws IOException An I/O error occurred.
586 */
587 public byte []
getBase64(boolean required)588 getBase64(boolean required) throws IOException {
589 String s = remainingStrings();
590 if (s == null) {
591 if (required)
592 throw exception("expected base64 encoded string");
593 else
594 return null;
595 }
596 byte [] array = base64.fromString(s);
597 if (array == null)
598 throw exception("invalid base64 encoding");
599 return array;
600 }
601
602 /**
603 * Gets the remaining string tokens until an EOL/EOF is seen, concatenates
604 * them together, and converts the base64 encoded data to a byte array.
605 * @return The byte array containing the decoded strings, or null if there
606 * were no strings to decode.
607 * @throws TextParseException The input was invalid.
608 * @throws IOException An I/O error occurred.
609 */
610 public byte []
getBase64()611 getBase64() throws IOException {
612 return getBase64(false);
613 }
614
615 /**
616 * Gets the remaining string tokens until an EOL/EOF is seen, concatenates
617 * them together, and converts the hex encoded data to a byte array.
618 * @param required If true, an exception will be thrown if no strings remain;
619 * otherwise null be be returned.
620 * @return The byte array containing the decoded strings, or null if there
621 * were no strings to decode.
622 * @throws TextParseException The input was invalid.
623 * @throws IOException An I/O error occurred.
624 */
625 public byte []
getHex(boolean required)626 getHex(boolean required) throws IOException {
627 String s = remainingStrings();
628 if (s == null) {
629 if (required)
630 throw exception("expected hex encoded string");
631 else
632 return null;
633 }
634 byte [] array = base16.fromString(s);
635 if (array == null)
636 throw exception("invalid hex encoding");
637 return array;
638 }
639
640 /**
641 * Gets the remaining string tokens until an EOL/EOF is seen, concatenates
642 * them together, and converts the hex encoded data to a byte array.
643 * @return The byte array containing the decoded strings, or null if there
644 * were no strings to decode.
645 * @throws TextParseException The input was invalid.
646 * @throws IOException An I/O error occurred.
647 */
648 public byte []
getHex()649 getHex() throws IOException {
650 return getHex(false);
651 }
652
653 /**
654 * Gets the next token from a tokenizer and decodes it as hex.
655 * @return The byte array containing the decoded string.
656 * @throws TextParseException The input was invalid.
657 * @throws IOException An I/O error occurred.
658 */
659 public byte []
getHexString()660 getHexString() throws IOException {
661 String next = _getIdentifier("a hex string");
662 byte [] array = base16.fromString(next);
663 if (array == null)
664 throw exception("invalid hex encoding");
665 return array;
666 }
667
668 /**
669 * Gets the next token from a tokenizer and decodes it as base32.
670 * @param b32 The base32 context to decode with.
671 * @return The byte array containing the decoded string.
672 * @throws TextParseException The input was invalid.
673 * @throws IOException An I/O error occurred.
674 */
675 public byte []
getBase32String(base32 b32)676 getBase32String(base32 b32) throws IOException {
677 String next = _getIdentifier("a base32 string");
678 byte [] array = b32.fromString(next);
679 if (array == null)
680 throw exception("invalid base32 encoding");
681 return array;
682 }
683
684 /**
685 * Creates an exception which includes the current state in the error message
686 * @param s The error message to include.
687 * @return The exception to be thrown
688 */
689 public TextParseException
exception(String s)690 exception(String s) {
691 return new TokenizerException(filename, line, s);
692 }
693
694 /**
695 * Closes any files opened by this tokenizer.
696 */
697 public void
close()698 close() {
699 if (wantClose) {
700 try {
701 is.close();
702 }
703 catch (IOException e) {
704 }
705 }
706 }
707
708 protected void
finalize()709 finalize() {
710 close();
711 }
712
713 }
714