1 #region Copyright notice and license 2 // Protocol Buffers - Google's data interchange format 3 // Copyright 2008 Google Inc. All rights reserved. 4 // 5 // Use of this source code is governed by a BSD-style 6 // license that can be found in the LICENSE file or at 7 // https://developers.google.com/open-source/licenses/bsd 8 #endregion 9 using NUnit.Framework; 10 using System; 11 using System.IO; 12 13 namespace Google.Protobuf 14 { 15 public class JsonTokenizerTest 16 { 17 [Test] EmptyObjectValue()18 public void EmptyObjectValue() 19 { 20 AssertTokens("{}", JsonToken.StartObject, JsonToken.EndObject); 21 } 22 23 [Test] EmptyArrayValue()24 public void EmptyArrayValue() 25 { 26 AssertTokens("[]", JsonToken.StartArray, JsonToken.EndArray); 27 } 28 29 [Test] 30 [TestCase("foo", "foo")] 31 [TestCase("tab\\t", "tab\t")] 32 [TestCase("line\\nfeed", "line\nfeed")] 33 [TestCase("carriage\\rreturn", "carriage\rreturn")] 34 [TestCase("back\\bspace", "back\bspace")] 35 [TestCase("form\\ffeed", "form\ffeed")] 36 [TestCase("escaped\\/slash", "escaped/slash")] 37 [TestCase("escaped\\\\backslash", "escaped\\backslash")] 38 [TestCase("escaped\\\"quote", "escaped\"quote")] 39 [TestCase("foo {}[] bar", "foo {}[] bar")] 40 [TestCase("foo\\u09aFbar", "foo\u09afbar")] // Digits, upper hex, lower hex 41 [TestCase("ab\ud800\udc00cd", "ab\ud800\udc00cd")] 42 [TestCase("ab\\ud800\\udc00cd", "ab\ud800\udc00cd")] StringValue(string json, string expectedValue)43 public void StringValue(string json, string expectedValue) 44 { 45 AssertTokensNoReplacement("\"" + json + "\"", JsonToken.Value(expectedValue)); 46 } 47 48 // Valid surrogate pairs, with mixed escaping. These test cases can't be expressed 49 // using TestCase as they have no valid UTF-8 representation. 50 // It's unclear exactly how we should handle a mixture of escaped or not: that can't 51 // come from UTF-8 text, but could come from a .NET string. For the moment, 52 // treat it as valid in the obvious way. 53 [Test] MixedSurrogatePairs()54 public void MixedSurrogatePairs() 55 { 56 string expected = "\ud800\udc00"; 57 AssertTokens("'\\ud800\udc00'", JsonToken.Value(expected)); 58 AssertTokens("'\ud800\\udc00'", JsonToken.Value(expected)); 59 } 60 61 [Test] ObjectDepth()62 public void ObjectDepth() 63 { 64 string json = "{ \"foo\": { \"x\": 1, \"y\": [ 0 ] } }"; 65 var tokenizer = JsonTokenizer.FromTextReader(new StringReader(json)); 66 // If we had more tests like this, I'd introduce a helper method... but for one test, it's not worth it. 67 Assert.AreEqual(0, tokenizer.ObjectDepth); 68 Assert.AreEqual(JsonToken.StartObject, tokenizer.Next()); 69 Assert.AreEqual(1, tokenizer.ObjectDepth); 70 Assert.AreEqual(JsonToken.Name("foo"), tokenizer.Next()); 71 Assert.AreEqual(1, tokenizer.ObjectDepth); 72 Assert.AreEqual(JsonToken.StartObject, tokenizer.Next()); 73 Assert.AreEqual(2, tokenizer.ObjectDepth); 74 Assert.AreEqual(JsonToken.Name("x"), tokenizer.Next()); 75 Assert.AreEqual(2, tokenizer.ObjectDepth); 76 Assert.AreEqual(JsonToken.Value(1), tokenizer.Next()); 77 Assert.AreEqual(2, tokenizer.ObjectDepth); 78 Assert.AreEqual(JsonToken.Name("y"), tokenizer.Next()); 79 Assert.AreEqual(2, tokenizer.ObjectDepth); 80 Assert.AreEqual(JsonToken.StartArray, tokenizer.Next()); 81 Assert.AreEqual(2, tokenizer.ObjectDepth); // Depth hasn't changed in array 82 Assert.AreEqual(JsonToken.Value(0), tokenizer.Next()); 83 Assert.AreEqual(2, tokenizer.ObjectDepth); 84 Assert.AreEqual(JsonToken.EndArray, tokenizer.Next()); 85 Assert.AreEqual(2, tokenizer.ObjectDepth); 86 Assert.AreEqual(JsonToken.EndObject, tokenizer.Next()); 87 Assert.AreEqual(1, tokenizer.ObjectDepth); 88 Assert.AreEqual(JsonToken.EndObject, tokenizer.Next()); 89 Assert.AreEqual(0, tokenizer.ObjectDepth); 90 Assert.AreEqual(JsonToken.EndDocument, tokenizer.Next()); 91 Assert.AreEqual(0, tokenizer.ObjectDepth); 92 } 93 94 [Test] ObjectDepth_WithPushBack()95 public void ObjectDepth_WithPushBack() 96 { 97 string json = "{}"; 98 var tokenizer = JsonTokenizer.FromTextReader(new StringReader(json)); 99 Assert.AreEqual(0, tokenizer.ObjectDepth); 100 var token = tokenizer.Next(); 101 Assert.AreEqual(1, tokenizer.ObjectDepth); 102 // When we push back a "start object", we should effectively be back to the previous depth. 103 tokenizer.PushBack(token); 104 Assert.AreEqual(0, tokenizer.ObjectDepth); 105 // Read the same token again, and get back to depth 1 106 token = tokenizer.Next(); 107 Assert.AreEqual(1, tokenizer.ObjectDepth); 108 109 // Now the same in reverse, with EndObject 110 token = tokenizer.Next(); 111 Assert.AreEqual(0, tokenizer.ObjectDepth); 112 tokenizer.PushBack(token); 113 Assert.AreEqual(1, tokenizer.ObjectDepth); 114 tokenizer.Next(); 115 Assert.AreEqual(0, tokenizer.ObjectDepth); 116 } 117 118 [Test] 119 [TestCase("embedded tab\t")] 120 [TestCase("embedded CR\r")] 121 [TestCase("embedded LF\n")] 122 [TestCase("embedded bell\u0007")] 123 [TestCase("bad escape\\a")] 124 [TestCase("incomplete escape\\")] 125 [TestCase("incomplete Unicode escape\\u000")] 126 [TestCase("invalid Unicode escape\\u000H")] 127 // Surrogate pair handling, both in raw .NET strings and escaped. We only need 128 // to detect this in strings, as non-ASCII characters anywhere other than in strings 129 // will already lead to parsing errors. 130 [TestCase("\\ud800")] 131 [TestCase("\\udc00")] 132 [TestCase("\\ud800x")] 133 [TestCase("\\udc00x")] 134 [TestCase("\\udc00\\ud800y")] InvalidStringValue(string json)135 public void InvalidStringValue(string json) 136 { 137 AssertThrowsAfter("\"" + json + "\""); 138 } 139 140 // Tests for invalid strings that can't be expressed in attributes, 141 // as the constants can't be expressed as UTF-8 strings. 142 [Test] InvalidSurrogatePairs()143 public void InvalidSurrogatePairs() 144 { 145 AssertThrowsAfter("\"\ud800x\""); 146 AssertThrowsAfter("\"\udc00y\""); 147 AssertThrowsAfter("\"\udc00\ud800y\""); 148 } 149 150 [Test] 151 [TestCase("0", 0)] 152 [TestCase("-0", 0)] // We don't distinguish between positive and negative 0 153 [TestCase("1", 1)] 154 [TestCase("-1", -1)] 155 // From here on, assume leading sign is okay... 156 [TestCase("1.125", 1.125)] 157 [TestCase("1.0", 1)] 158 [TestCase("1e5", 100000)] 159 [TestCase("1e000000", 1)] // Weird, but not prohibited by the spec 160 [TestCase("1E5", 100000)] 161 [TestCase("1e+5", 100000)] 162 [TestCase("1E-5", 0.00001)] 163 [TestCase("123E-2", 1.23)] 164 [TestCase("123.45E3", 123450)] 165 [TestCase(" 1 ", 1)] NumberValue(string json, double expectedValue)166 public void NumberValue(string json, double expectedValue) 167 { 168 AssertTokens(json, JsonToken.Value(expectedValue)); 169 } 170 171 [Test] 172 [TestCase("00")] 173 [TestCase(".5")] 174 [TestCase("1.")] 175 [TestCase("1e")] 176 [TestCase("1e-")] 177 [TestCase("--")] 178 [TestCase("--1")] 179 [TestCase("-1.7977e308")] 180 [TestCase("1.7977e308")] InvalidNumberValue(string json)181 public void InvalidNumberValue(string json) 182 { 183 AssertThrowsAfter(json); 184 } 185 186 [Test] 187 [TestCase("nul")] 188 [TestCase("nothing")] 189 [TestCase("truth")] 190 [TestCase("fALSEhood")] InvalidLiterals(string json)191 public void InvalidLiterals(string json) 192 { 193 AssertThrowsAfter(json); 194 } 195 196 [Test] NullValue()197 public void NullValue() 198 { 199 AssertTokens("null", JsonToken.Null); 200 } 201 202 [Test] TrueValue()203 public void TrueValue() 204 { 205 AssertTokens("true", JsonToken.True); 206 } 207 208 [Test] FalseValue()209 public void FalseValue() 210 { 211 AssertTokens("false", JsonToken.False); 212 } 213 214 [Test] SimpleObject()215 public void SimpleObject() 216 { 217 AssertTokens("{'x': 'y'}", 218 JsonToken.StartObject, JsonToken.Name("x"), JsonToken.Value("y"), JsonToken.EndObject); 219 } 220 221 [Test] 222 [TestCase("[10, 20", 3)] 223 [TestCase("[10,", 2)] 224 [TestCase("[10:20]", 2)] 225 [TestCase("[", 1)] 226 [TestCase("[,", 1)] 227 [TestCase("{", 1)] 228 [TestCase("{,", 1)] 229 [TestCase("{[", 1)] 230 [TestCase("{{", 1)] 231 [TestCase("{0", 1)] 232 [TestCase("{null", 1)] 233 [TestCase("{false", 1)] 234 [TestCase("{true", 1)] 235 [TestCase("}", 0)] 236 [TestCase("]", 0)] 237 [TestCase(",", 0)] 238 [TestCase("'foo' 'bar'", 1)] 239 [TestCase(":", 0)] 240 [TestCase("'foo", 0)] // Incomplete string 241 [TestCase("{ 'foo' }", 2)] 242 [TestCase("{ x:1", 1)] // Property names must be quoted 243 [TestCase("{]", 1)] 244 [TestCase("[}", 1)] 245 [TestCase("[1,", 2)] 246 [TestCase("{'x':0]", 3)] 247 [TestCase("{ 'foo': }", 2)] 248 [TestCase("{ 'foo':'bar', }", 3)] InvalidStructure(string json, int expectedValidTokens)249 public void InvalidStructure(string json, int expectedValidTokens) 250 { 251 // Note: we don't test that the earlier tokens are exactly as expected, 252 // partly because that's hard to parameterize. 253 var reader = new StringReader(json.Replace('\'', '"')); 254 var tokenizer = JsonTokenizer.FromTextReader(reader); 255 for (int i = 0; i < expectedValidTokens; i++) 256 { 257 Assert.IsNotNull(tokenizer.Next()); 258 } 259 Assert.Throws<InvalidJsonException>(() => tokenizer.Next()); 260 } 261 262 [Test] ArrayMixedType()263 public void ArrayMixedType() 264 { 265 AssertTokens("[1, 'foo', null, false, true, [2], {'x':'y' }]", 266 JsonToken.StartArray, 267 JsonToken.Value(1), 268 JsonToken.Value("foo"), 269 JsonToken.Null, 270 JsonToken.False, 271 JsonToken.True, 272 JsonToken.StartArray, 273 JsonToken.Value(2), 274 JsonToken.EndArray, 275 JsonToken.StartObject, 276 JsonToken.Name("x"), 277 JsonToken.Value("y"), 278 JsonToken.EndObject, 279 JsonToken.EndArray); 280 } 281 282 [Test] ObjectMixedType()283 public void ObjectMixedType() 284 { 285 AssertTokens(@"{'a': 1, 'b': 'bar', 'c': null, 'd': false, 'e': true, 286 'f': [2], 'g': {'x':'y' }}", 287 JsonToken.StartObject, 288 JsonToken.Name("a"), 289 JsonToken.Value(1), 290 JsonToken.Name("b"), 291 JsonToken.Value("bar"), 292 JsonToken.Name("c"), 293 JsonToken.Null, 294 JsonToken.Name("d"), 295 JsonToken.False, 296 JsonToken.Name("e"), 297 JsonToken.True, 298 JsonToken.Name("f"), 299 JsonToken.StartArray, 300 JsonToken.Value(2), 301 JsonToken.EndArray, 302 JsonToken.Name("g"), 303 JsonToken.StartObject, 304 JsonToken.Name("x"), 305 JsonToken.Value("y"), 306 JsonToken.EndObject, 307 JsonToken.EndObject); 308 } 309 310 [Test] NextAfterEndDocumentThrows()311 public void NextAfterEndDocumentThrows() 312 { 313 var tokenizer = JsonTokenizer.FromTextReader(new StringReader("null")); 314 Assert.AreEqual(JsonToken.Null, tokenizer.Next()); 315 Assert.AreEqual(JsonToken.EndDocument, tokenizer.Next()); 316 Assert.Throws<InvalidOperationException>(() => tokenizer.Next()); 317 } 318 319 [Test] CanPushBackEndDocument()320 public void CanPushBackEndDocument() 321 { 322 var tokenizer = JsonTokenizer.FromTextReader(new StringReader("null")); 323 Assert.AreEqual(JsonToken.Null, tokenizer.Next()); 324 Assert.AreEqual(JsonToken.EndDocument, tokenizer.Next()); 325 tokenizer.PushBack(JsonToken.EndDocument); 326 Assert.AreEqual(JsonToken.EndDocument, tokenizer.Next()); 327 Assert.Throws<InvalidOperationException>(() => tokenizer.Next()); 328 } 329 330 /// <summary> 331 /// Asserts that the specified JSON is tokenized into the given sequence of tokens. 332 /// All apostrophes are first converted to double quotes, allowing any tests 333 /// that don't need to check actual apostrophe handling to use apostrophes in the JSON, avoiding 334 /// messy string literal escaping. The "end document" token is not specified in the list of 335 /// expected tokens, but is implicit. 336 /// </summary> AssertTokens(string json, params JsonToken[] expectedTokens)337 private static void AssertTokens(string json, params JsonToken[] expectedTokens) 338 { 339 AssertTokensNoReplacement(json.Replace('\'', '"'), expectedTokens); 340 } 341 342 /// <summary> 343 /// Asserts that the specified JSON is tokenized into the given sequence of tokens. 344 /// Unlike <see cref="AssertTokens(string, JsonToken[])"/>, this does not perform any character 345 /// replacement on the specified JSON, and should be used when the text contains apostrophes which 346 /// are expected to be used *as* apostrophes. The "end document" token is not specified in the list of 347 /// expected tokens, but is implicit. 348 /// </summary> AssertTokensNoReplacement(string json, params JsonToken[] expectedTokens)349 private static void AssertTokensNoReplacement(string json, params JsonToken[] expectedTokens) 350 { 351 var reader = new StringReader(json); 352 var tokenizer = JsonTokenizer.FromTextReader(reader); 353 for (int i = 0; i < expectedTokens.Length; i++) 354 { 355 var actualToken = tokenizer.Next(); 356 if (actualToken == JsonToken.EndDocument) 357 { 358 Assert.Fail("Expected {0} but reached end of token stream", expectedTokens[i]); 359 } 360 Assert.AreEqual(expectedTokens[i], actualToken); 361 } 362 var finalToken = tokenizer.Next(); 363 if (finalToken != JsonToken.EndDocument) 364 { 365 Assert.Fail("Expected token stream to be exhausted; received {0}", finalToken); 366 } 367 } 368 AssertThrowsAfter(string json, params JsonToken[] expectedTokens)369 private static void AssertThrowsAfter(string json, params JsonToken[] expectedTokens) 370 { 371 var reader = new StringReader(json); 372 var tokenizer = JsonTokenizer.FromTextReader(reader); 373 for (int i = 0; i < expectedTokens.Length; i++) 374 { 375 var actualToken = tokenizer.Next(); 376 if (actualToken == JsonToken.EndDocument) 377 { 378 Assert.Fail("Expected {0} but reached end of document", expectedTokens[i]); 379 } 380 Assert.AreEqual(expectedTokens[i], actualToken); 381 } 382 Assert.Throws<InvalidJsonException>(() => tokenizer.Next()); 383 } 384 } 385 } 386