• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright (c) 2003-2004 Brian Wellington (bwelling@xbill.org)
2 //
3 // Copyright (C) 2003-2004 Nominum, Inc.
4 //
5 // Permission to use, copy, modify, and distribute this software for any
6 // purpose with or without fee is hereby granted, provided that the above
7 // copyright notice and this permission notice appear in all copies.
8 //
9 // THE SOFTWARE IS PROVIDED "AS IS" AND NOMINUM DISCLAIMS ALL WARRANTIES
10 // WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11 // MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL NOMINUM BE LIABLE FOR ANY
12 // SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13 // WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14 // ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
15 // OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16 //
17 
18 package org.xbill.DNS;
19 
20 import java.io.*;
21 import java.net.*;
22 
23 import org.xbill.DNS.utils.*;
24 
25 /**
26  * Tokenizer is used to parse DNS records and zones from text format,
27  *
28  * @author Brian Wellington
29  * @author Bob Halley
30  */
31 
32 public class Tokenizer {
33 
34 private static String delim = " \t\n;()\"";
35 private static String quotes = "\"";
36 
37 /** End of file */
38 public static final int EOF		= 0;
39 
40 /** End of line */
41 public static final int EOL		= 1;
42 
43 /** Whitespace; only returned when wantWhitespace is set */
44 public static final int WHITESPACE	= 2;
45 
46 /** An identifier (unquoted string) */
47 public static final int IDENTIFIER	= 3;
48 
49 /** A quoted string */
50 public static final int QUOTED_STRING	= 4;
51 
52 /** A comment; only returned when wantComment is set */
53 public static final int COMMENT		= 5;
54 
55 private PushbackInputStream is;
56 private boolean ungottenToken;
57 private int multiline;
58 private boolean quoting;
59 private String delimiters;
60 private Token current;
61 private StringBuffer sb;
62 private boolean wantClose;
63 
64 private String filename;
65 private int line;
66 
67 public static class Token {
68 	/** The type of token. */
69 	public int type;
70 
71 	/** The value of the token, or null for tokens without values. */
72 	public String value;
73 
74 	private
Token()75 	Token() {
76 		type = -1;
77 		value = null;
78 	}
79 
80 	private Token
set(int type, StringBuffer value)81 	set(int type, StringBuffer value) {
82 		if (type < 0)
83 			throw new IllegalArgumentException();
84 		this.type = type;
85 		this.value = value == null ? null : value.toString();
86 		return this;
87 	}
88 
89 	/**
90 	 * Converts the token to a string containing a representation useful
91 	 * for debugging.
92 	 */
93 	public String
toString()94 	toString() {
95 		switch (type) {
96 		case EOF:
97 			return "<eof>";
98 		case EOL:
99 			return "<eol>";
100 		case WHITESPACE:
101 			return "<whitespace>";
102 		case IDENTIFIER:
103 			return "<identifier: " + value + ">";
104 		case QUOTED_STRING:
105 			return "<quoted_string: " + value + ">";
106 		case COMMENT:
107 			return "<comment: " + value + ">";
108 		default:
109 			return "<unknown>";
110 		}
111 	}
112 
113 	/** Indicates whether this token contains a string. */
114 	public boolean
isString()115 	isString() {
116 		return (type == IDENTIFIER || type == QUOTED_STRING);
117 	}
118 
119 	/** Indicates whether this token contains an EOL or EOF. */
120 	public boolean
isEOL()121 	isEOL() {
122 		return (type == EOL || type == EOF);
123 	}
124 }
125 
126 static class TokenizerException extends TextParseException {
127 	String message;
128 
129 	public
TokenizerException(String filename, int line, String message)130 	TokenizerException(String filename, int line, String message) {
131 		super(filename + ":" + line + ": " + message);
132 		this.message = message;
133 	}
134 
135 	public String
getBaseMessage()136 	getBaseMessage() {
137 		return message;
138 	}
139 }
140 
141 /**
142  * Creates a Tokenizer from an arbitrary input stream.
143  * @param is The InputStream to tokenize.
144  */
145 public
Tokenizer(InputStream is)146 Tokenizer(InputStream is) {
147 	if (!(is instanceof BufferedInputStream))
148 		is = new BufferedInputStream(is);
149 	this.is = new PushbackInputStream(is, 2);
150 	ungottenToken = false;
151 	multiline = 0;
152 	quoting = false;
153 	delimiters = delim;
154 	current = new Token();
155 	sb = new StringBuffer();
156 	filename = "<none>";
157 	line = 1;
158 }
159 
160 /**
161  * Creates a Tokenizer from a string.
162  * @param s The String to tokenize.
163  */
164 public
Tokenizer(String s)165 Tokenizer(String s) {
166 	this(new ByteArrayInputStream(s.getBytes()));
167 }
168 
169 /**
170  * Creates a Tokenizer from a file.
171  * @param f The File to tokenize.
172  */
173 public
Tokenizer(File f)174 Tokenizer(File f) throws FileNotFoundException {
175 	this(new FileInputStream(f));
176 	wantClose = true;
177 	filename = f.getName();
178 }
179 
180 private int
getChar()181 getChar() throws IOException {
182 	int c = is.read();
183 	if (c == '\r') {
184 		int next = is.read();
185 		if (next != '\n')
186 			is.unread(next);
187 		c = '\n';
188 	}
189 	if (c == '\n')
190 		line++;
191 	return c;
192 }
193 
194 private void
ungetChar(int c)195 ungetChar(int c) throws IOException {
196 	if (c == -1)
197 		return;
198 	is.unread(c);
199 	if (c == '\n')
200 		line--;
201 }
202 
203 private int
skipWhitespace()204 skipWhitespace() throws IOException {
205 	int skipped = 0;
206 	while (true) {
207 		int c = getChar();
208 		if (c != ' ' && c != '\t') {
209 	                if (!(c == '\n' && multiline > 0)) {
210 				ungetChar(c);
211 				return skipped;
212 			}
213 		}
214 		skipped++;
215 	}
216 }
217 
218 private void
checkUnbalancedParens()219 checkUnbalancedParens() throws TextParseException {
220 	if (multiline > 0)
221 		throw exception("unbalanced parentheses");
222 }
223 
224 /**
225  * Gets the next token from a tokenizer.
226  * @param wantWhitespace If true, leading whitespace will be returned as a
227  * token.
228  * @param wantComment If true, comments are returned as tokens.
229  * @return The next token in the stream.
230  * @throws TextParseException The input was invalid.
231  * @throws IOException An I/O error occurred.
232  */
233 public Token
get(boolean wantWhitespace, boolean wantComment)234 get(boolean wantWhitespace, boolean wantComment) throws IOException {
235 	int type;
236 	int c;
237 
238 	if (ungottenToken) {
239 		ungottenToken = false;
240 		if (current.type == WHITESPACE) {
241 			if (wantWhitespace)
242 				return current;
243 		} else if (current.type == COMMENT) {
244 			if (wantComment)
245 				return current;
246 		} else {
247 			if (current.type == EOL)
248 				line++;
249 			return current;
250 		}
251 	}
252 	int skipped = skipWhitespace();
253 	if (skipped > 0 && wantWhitespace)
254 		return current.set(WHITESPACE, null);
255 	type = IDENTIFIER;
256 	sb.setLength(0);
257 	while (true) {
258 		c = getChar();
259 		if (c == -1 || delimiters.indexOf(c) != -1) {
260 			if (c == -1) {
261 				if (quoting)
262 					throw exception("EOF in " +
263 							"quoted string");
264 				else if (sb.length() == 0)
265 					return current.set(EOF, null);
266 				else
267 					return current.set(type, sb);
268 			}
269 			if (sb.length() == 0 && type != QUOTED_STRING) {
270 				if (c == '(') {
271 					multiline++;
272 					skipWhitespace();
273 					continue;
274 				} else if (c == ')') {
275 					if (multiline <= 0)
276 						throw exception("invalid " +
277 								"close " +
278 								"parenthesis");
279 					multiline--;
280 					skipWhitespace();
281 					continue;
282 				} else if (c == '"') {
283 					if (!quoting) {
284 						quoting = true;
285 						delimiters = quotes;
286 						type = QUOTED_STRING;
287 					} else {
288 						quoting = false;
289 						delimiters = delim;
290 						skipWhitespace();
291 					}
292 					continue;
293 				} else if (c == '\n') {
294 					return current.set(EOL, null);
295 				} else if (c == ';') {
296 					while (true) {
297 						c = getChar();
298 						if (c == '\n' || c == -1)
299 							break;
300 						sb.append((char)c);
301 					}
302 					if (wantComment) {
303 						ungetChar(c);
304 						return current.set(COMMENT, sb);
305 					} else if (c == -1 &&
306 						   type != QUOTED_STRING)
307 					{
308 						checkUnbalancedParens();
309 						return current.set(EOF, null);
310 					} else if (multiline > 0) {
311 						skipWhitespace();
312 						sb.setLength(0);
313 						continue;
314 					} else
315 						return current.set(EOL, null);
316 				} else
317 					throw new IllegalStateException();
318 			} else
319 				ungetChar(c);
320 			break;
321 		} else if (c == '\\') {
322 			c = getChar();
323 			if (c == -1)
324 				throw exception("unterminated escape sequence");
325 			sb.append('\\');
326 		} else if (quoting && c == '\n') {
327 			throw exception("newline in quoted string");
328 		}
329 		sb.append((char)c);
330 	}
331 	if (sb.length() == 0 && type != QUOTED_STRING) {
332 		checkUnbalancedParens();
333 		return current.set(EOF, null);
334 	}
335 	return current.set(type, sb);
336 }
337 
338 /**
339  * Gets the next token from a tokenizer, ignoring whitespace and comments.
340  * @return The next token in the stream.
341  * @throws TextParseException The input was invalid.
342  * @throws IOException An I/O error occurred.
343  */
344 public Token
get()345 get() throws IOException {
346 	return get(false, false);
347 }
348 
349 /**
350  * Returns a token to the stream, so that it will be returned by the next call
351  * to get().
352  * @throws IllegalStateException There are already ungotten tokens.
353  */
354 public void
unget()355 unget() {
356 	if (ungottenToken)
357 		throw new IllegalStateException
358 				("Cannot unget multiple tokens");
359 	if (current.type == EOL)
360 		line--;
361 	ungottenToken = true;
362 }
363 
364 /**
365  * Gets the next token from a tokenizer and converts it to a string.
366  * @return The next token in the stream, as a string.
367  * @throws TextParseException The input was invalid or not a string.
368  * @throws IOException An I/O error occurred.
369  */
370 public String
getString()371 getString() throws IOException {
372 	Token next = get();
373 	if (!next.isString()) {
374 		throw exception("expected a string");
375 	}
376 	return next.value;
377 }
378 
379 private String
_getIdentifier(String expected)380 _getIdentifier(String expected) throws IOException {
381 	Token next = get();
382 	if (next.type != IDENTIFIER)
383 		throw exception("expected " + expected);
384 	return next.value;
385 }
386 
387 /**
388  * Gets the next token from a tokenizer, ensures it is an unquoted string,
389  * and converts it to a string.
390  * @return The next token in the stream, as a string.
391  * @throws TextParseException The input was invalid or not an unquoted string.
392  * @throws IOException An I/O error occurred.
393  */
394 public String
getIdentifier()395 getIdentifier() throws IOException {
396 	return _getIdentifier("an identifier");
397 }
398 
399 /**
400  * Gets the next token from a tokenizer and converts it to a long.
401  * @return The next token in the stream, as a long.
402  * @throws TextParseException The input was invalid or not a long.
403  * @throws IOException An I/O error occurred.
404  */
405 public long
getLong()406 getLong() throws IOException {
407 	String next = _getIdentifier("an integer");
408 	if (!Character.isDigit(next.charAt(0)))
409 		throw exception("expected an integer");
410 	try {
411 		return Long.parseLong(next);
412 	} catch (NumberFormatException e) {
413 		throw exception("expected an integer");
414 	}
415 }
416 
417 /**
418  * Gets the next token from a tokenizer and converts it to an unsigned 32 bit
419  * integer.
420  * @return The next token in the stream, as an unsigned 32 bit integer.
421  * @throws TextParseException The input was invalid or not an unsigned 32
422  * bit integer.
423  * @throws IOException An I/O error occurred.
424  */
425 public long
getUInt32()426 getUInt32() throws IOException {
427 	long l = getLong();
428 	if (l < 0 || l > 0xFFFFFFFFL)
429 		throw exception("expected an 32 bit unsigned integer");
430 	return l;
431 }
432 
433 /**
434  * Gets the next token from a tokenizer and converts it to an unsigned 16 bit
435  * integer.
436  * @return The next token in the stream, as an unsigned 16 bit integer.
437  * @throws TextParseException The input was invalid or not an unsigned 16
438  * bit integer.
439  * @throws IOException An I/O error occurred.
440  */
441 public int
getUInt16()442 getUInt16() throws IOException {
443 	long l = getLong();
444 	if (l < 0 || l > 0xFFFFL)
445 		throw exception("expected an 16 bit unsigned integer");
446 	return (int) l;
447 }
448 
449 /**
450  * Gets the next token from a tokenizer and converts it to an unsigned 8 bit
451  * integer.
452  * @return The next token in the stream, as an unsigned 8 bit integer.
453  * @throws TextParseException The input was invalid or not an unsigned 8
454  * bit integer.
455  * @throws IOException An I/O error occurred.
456  */
457 public int
getUInt8()458 getUInt8() throws IOException {
459 	long l = getLong();
460 	if (l < 0 || l > 0xFFL)
461 		throw exception("expected an 8 bit unsigned integer");
462 	return (int) l;
463 }
464 
465 /**
466  * Gets the next token from a tokenizer and parses it as a TTL.
467  * @return The next token in the stream, as an unsigned 32 bit integer.
468  * @throws TextParseException The input was not valid.
469  * @throws IOException An I/O error occurred.
470  * @see TTL
471  */
472 public long
getTTL()473 getTTL() throws IOException {
474 	String next = _getIdentifier("a TTL value");
475 	try {
476 		return TTL.parseTTL(next);
477 	}
478 	catch (NumberFormatException e) {
479 		throw exception("expected a TTL value");
480 	}
481 }
482 
483 /**
484  * Gets the next token from a tokenizer and parses it as if it were a TTL.
485  * @return The next token in the stream, as an unsigned 32 bit integer.
486  * @throws TextParseException The input was not valid.
487  * @throws IOException An I/O error occurred.
488  * @see TTL
489  */
490 public long
getTTLLike()491 getTTLLike() throws IOException {
492 	String next = _getIdentifier("a TTL-like value");
493 	try {
494 		return TTL.parse(next, false);
495 	}
496 	catch (NumberFormatException e) {
497 		throw exception("expected a TTL-like value");
498 	}
499 }
500 
501 /**
502  * Gets the next token from a tokenizer and converts it to a name.
503  * @param origin The origin to append to relative names.
504  * @return The next token in the stream, as a name.
505  * @throws TextParseException The input was invalid or not a valid name.
506  * @throws IOException An I/O error occurred.
507  * @throws RelativeNameException The parsed name was relative, even with the
508  * origin.
509  * @see Name
510  */
511 public Name
getName(Name origin)512 getName(Name origin) throws IOException {
513 	String next = _getIdentifier("a name");
514 	try {
515 		Name name = Name.fromString(next, origin);
516 		if (!name.isAbsolute())
517 			throw new RelativeNameException(name);
518 		return name;
519 	}
520 	catch (TextParseException e) {
521 		throw exception(e.getMessage());
522 	}
523 }
524 
525 /**
526  * Gets the next token from a tokenizer and converts it to an IP Address.
527  * @param family The address family.
528  * @return The next token in the stream, as an InetAddress
529  * @throws TextParseException The input was invalid or not a valid address.
530  * @throws IOException An I/O error occurred.
531  * @see Address
532  */
533 public InetAddress
getAddress(int family)534 getAddress(int family) throws IOException {
535 	String next = _getIdentifier("an address");
536 	try {
537 		return Address.getByAddress(next, family);
538 	}
539 	catch (UnknownHostException e) {
540 		throw exception(e.getMessage());
541 	}
542 }
543 
544 /**
545  * Gets the next token from a tokenizer, which must be an EOL or EOF.
546  * @throws TextParseException The input was invalid or not an EOL or EOF token.
547  * @throws IOException An I/O error occurred.
548  */
549 public void
getEOL()550 getEOL() throws IOException {
551 	Token next = get();
552 	if (next.type != EOL && next.type != EOF) {
553 		throw exception("expected EOL or EOF");
554 	}
555 }
556 
557 /**
558  * Returns a concatenation of the remaining strings from a Tokenizer.
559  */
560 private String
remainingStrings()561 remainingStrings() throws IOException {
562         StringBuffer buffer = null;
563         while (true) {
564                 Tokenizer.Token t = get();
565                 if (!t.isString())
566                         break;
567                 if (buffer == null)
568                         buffer = new StringBuffer();
569                 buffer.append(t.value);
570         }
571         unget();
572         if (buffer == null)
573                 return null;
574         return buffer.toString();
575 }
576 
577 /**
578  * Gets the remaining string tokens until an EOL/EOF is seen, concatenates
579  * them together, and converts the base64 encoded data to a byte array.
580  * @param required If true, an exception will be thrown if no strings remain;
581  * otherwise null be be returned.
582  * @return The byte array containing the decoded strings, or null if there
583  * were no strings to decode.
584  * @throws TextParseException The input was invalid.
585  * @throws IOException An I/O error occurred.
586  */
587 public byte []
getBase64(boolean required)588 getBase64(boolean required) throws IOException {
589 	String s = remainingStrings();
590 	if (s == null) {
591 		if (required)
592 			throw exception("expected base64 encoded string");
593 		else
594 			return null;
595 	}
596 	byte [] array = base64.fromString(s);
597 	if (array == null)
598 		throw exception("invalid base64 encoding");
599 	return array;
600 }
601 
602 /**
603  * Gets the remaining string tokens until an EOL/EOF is seen, concatenates
604  * them together, and converts the base64 encoded data to a byte array.
605  * @return The byte array containing the decoded strings, or null if there
606  * were no strings to decode.
607  * @throws TextParseException The input was invalid.
608  * @throws IOException An I/O error occurred.
609  */
610 public byte []
getBase64()611 getBase64() throws IOException {
612 	return getBase64(false);
613 }
614 
615 /**
616  * Gets the remaining string tokens until an EOL/EOF is seen, concatenates
617  * them together, and converts the hex encoded data to a byte array.
618  * @param required If true, an exception will be thrown if no strings remain;
619  * otherwise null be be returned.
620  * @return The byte array containing the decoded strings, or null if there
621  * were no strings to decode.
622  * @throws TextParseException The input was invalid.
623  * @throws IOException An I/O error occurred.
624  */
625 public byte []
getHex(boolean required)626 getHex(boolean required) throws IOException {
627 	String s = remainingStrings();
628 	if (s == null) {
629 		if (required)
630 			throw exception("expected hex encoded string");
631 		else
632 			return null;
633 	}
634 	byte [] array = base16.fromString(s);
635 	if (array == null)
636 		throw exception("invalid hex encoding");
637 	return array;
638 }
639 
640 /**
641  * Gets the remaining string tokens until an EOL/EOF is seen, concatenates
642  * them together, and converts the hex encoded data to a byte array.
643  * @return The byte array containing the decoded strings, or null if there
644  * were no strings to decode.
645  * @throws TextParseException The input was invalid.
646  * @throws IOException An I/O error occurred.
647  */
648 public byte []
getHex()649 getHex() throws IOException {
650 	return getHex(false);
651 }
652 
653 /**
654  * Gets the next token from a tokenizer and decodes it as hex.
655  * @return The byte array containing the decoded string.
656  * @throws TextParseException The input was invalid.
657  * @throws IOException An I/O error occurred.
658  */
659 public byte []
getHexString()660 getHexString() throws IOException {
661 	String next = _getIdentifier("a hex string");
662 	byte [] array = base16.fromString(next);
663 	if (array == null)
664 		throw exception("invalid hex encoding");
665 	return array;
666 }
667 
668 /**
669  * Gets the next token from a tokenizer and decodes it as base32.
670  * @param b32 The base32 context to decode with.
671  * @return The byte array containing the decoded string.
672  * @throws TextParseException The input was invalid.
673  * @throws IOException An I/O error occurred.
674  */
675 public byte []
getBase32String(base32 b32)676 getBase32String(base32 b32) throws IOException {
677 	String next = _getIdentifier("a base32 string");
678 	byte [] array = b32.fromString(next);
679 	if (array == null)
680 		throw exception("invalid base32 encoding");
681 	return array;
682 }
683 
684 /**
685  * Creates an exception which includes the current state in the error message
686  * @param s The error message to include.
687  * @return The exception to be thrown
688  */
689 public TextParseException
exception(String s)690 exception(String s) {
691 	return new TokenizerException(filename, line, s);
692 }
693 
694 /**
695  * Closes any files opened by this tokenizer.
696  */
697 public void
close()698 close() {
699 	if (wantClose) {
700 		try {
701 			is.close();
702 		}
703 		catch (IOException e) {
704 		}
705 	}
706 }
707 
708 protected void
finalize()709 finalize() {
710 	close();
711 }
712 
713 }
714