1 #region Copyright notice and license 2 // Protocol Buffers - Google's data interchange format 3 // Copyright 2008 Google Inc. All rights reserved. 4 // 5 // Use of this source code is governed by a BSD-style 6 // license that can be found in the LICENSE file or at 7 // https://developers.google.com/open-source/licenses/bsd 8 #endregion 9 10 using NUnit.Framework; 11 using System; 12 using System.IO; 13 14 namespace Google.Protobuf 15 { 16 public class JsonTokenizerTest 17 { 18 [Test] EmptyObjectValue()19 public void EmptyObjectValue() 20 { 21 AssertTokens("{}", JsonToken.StartObject, JsonToken.EndObject); 22 } 23 24 [Test] EmptyArrayValue()25 public void EmptyArrayValue() 26 { 27 AssertTokens("[]", JsonToken.StartArray, JsonToken.EndArray); 28 } 29 30 [Test] 31 [TestCase("foo", "foo")] 32 [TestCase("tab\\t", "tab\t")] 33 [TestCase("line\\nfeed", "line\nfeed")] 34 [TestCase("carriage\\rreturn", "carriage\rreturn")] 35 [TestCase("back\\bspace", "back\bspace")] 36 [TestCase("form\\ffeed", "form\ffeed")] 37 [TestCase("escaped\\/slash", "escaped/slash")] 38 [TestCase("escaped\\\\backslash", "escaped\\backslash")] 39 [TestCase("escaped\\\"quote", "escaped\"quote")] 40 [TestCase("foo {}[] bar", "foo {}[] bar")] 41 [TestCase("foo\\u09aFbar", "foo\u09afbar")] // Digits, upper hex, lower hex 42 [TestCase("ab\ud800\udc00cd", "ab\ud800\udc00cd")] 43 [TestCase("ab\\ud800\\udc00cd", "ab\ud800\udc00cd")] StringValue(string json, string expectedValue)44 public void StringValue(string json, string expectedValue) 45 { 46 AssertTokensNoReplacement("\"" + json + "\"", JsonToken.Value(expectedValue)); 47 } 48 49 // Valid surrogate pairs, with mixed escaping. These test cases can't be expressed 50 // using TestCase as they have no valid UTF-8 representation. 51 // It's unclear exactly how we should handle a mixture of escaped or not: that can't 52 // come from UTF-8 text, but could come from a .NET string. For the moment, 53 // treat it as valid in the obvious way. 54 [Test] MixedSurrogatePairs()55 public void MixedSurrogatePairs() 56 { 57 string expected = "\ud800\udc00"; 58 AssertTokens("'\\ud800\udc00'", JsonToken.Value(expected)); 59 AssertTokens("'\ud800\\udc00'", JsonToken.Value(expected)); 60 } 61 62 [Test] ObjectDepth()63 public void ObjectDepth() 64 { 65 string json = "{ \"foo\": { \"x\": 1, \"y\": [ 0 ] } }"; 66 var tokenizer = JsonTokenizer.FromTextReader(new StringReader(json)); 67 // If we had more tests like this, I'd introduce a helper method... but for one test, it's not worth it. 68 Assert.AreEqual(0, tokenizer.ObjectDepth); 69 Assert.AreEqual(JsonToken.StartObject, tokenizer.Next()); 70 Assert.AreEqual(1, tokenizer.ObjectDepth); 71 Assert.AreEqual(JsonToken.Name("foo"), tokenizer.Next()); 72 Assert.AreEqual(1, tokenizer.ObjectDepth); 73 Assert.AreEqual(JsonToken.StartObject, tokenizer.Next()); 74 Assert.AreEqual(2, tokenizer.ObjectDepth); 75 Assert.AreEqual(JsonToken.Name("x"), tokenizer.Next()); 76 Assert.AreEqual(2, tokenizer.ObjectDepth); 77 Assert.AreEqual(JsonToken.Value(1), tokenizer.Next()); 78 Assert.AreEqual(2, tokenizer.ObjectDepth); 79 Assert.AreEqual(JsonToken.Name("y"), tokenizer.Next()); 80 Assert.AreEqual(2, tokenizer.ObjectDepth); 81 Assert.AreEqual(JsonToken.StartArray, tokenizer.Next()); 82 Assert.AreEqual(2, tokenizer.ObjectDepth); // Depth hasn't changed in array 83 Assert.AreEqual(JsonToken.Value(0), tokenizer.Next()); 84 Assert.AreEqual(2, tokenizer.ObjectDepth); 85 Assert.AreEqual(JsonToken.EndArray, tokenizer.Next()); 86 Assert.AreEqual(2, tokenizer.ObjectDepth); 87 Assert.AreEqual(JsonToken.EndObject, tokenizer.Next()); 88 Assert.AreEqual(1, tokenizer.ObjectDepth); 89 Assert.AreEqual(JsonToken.EndObject, tokenizer.Next()); 90 Assert.AreEqual(0, tokenizer.ObjectDepth); 91 Assert.AreEqual(JsonToken.EndDocument, tokenizer.Next()); 92 Assert.AreEqual(0, tokenizer.ObjectDepth); 93 } 94 95 [Test] ObjectDepth_WithPushBack()96 public void ObjectDepth_WithPushBack() 97 { 98 string json = "{}"; 99 var tokenizer = JsonTokenizer.FromTextReader(new StringReader(json)); 100 Assert.AreEqual(0, tokenizer.ObjectDepth); 101 var token = tokenizer.Next(); 102 Assert.AreEqual(1, tokenizer.ObjectDepth); 103 // When we push back a "start object", we should effectively be back to the previous depth. 104 tokenizer.PushBack(token); 105 Assert.AreEqual(0, tokenizer.ObjectDepth); 106 // Read the same token again, and get back to depth 1 107 _ = tokenizer.Next(); 108 Assert.AreEqual(1, tokenizer.ObjectDepth); 109 110 // Now the same in reverse, with EndObject 111 token = tokenizer.Next(); 112 Assert.AreEqual(0, tokenizer.ObjectDepth); 113 tokenizer.PushBack(token); 114 Assert.AreEqual(1, tokenizer.ObjectDepth); 115 tokenizer.Next(); 116 Assert.AreEqual(0, tokenizer.ObjectDepth); 117 } 118 119 [Test] 120 [TestCase("embedded tab\t")] 121 [TestCase("embedded CR\r")] 122 [TestCase("embedded LF\n")] 123 [TestCase("embedded bell\u0007")] 124 [TestCase("bad escape\\a")] 125 [TestCase("incomplete escape\\")] 126 [TestCase("incomplete Unicode escape\\u000")] 127 [TestCase("invalid Unicode escape\\u000H")] 128 // Surrogate pair handling, both in raw .NET strings and escaped. We only need 129 // to detect this in strings, as non-ASCII characters anywhere other than in strings 130 // will already lead to parsing errors. 131 [TestCase("\\ud800")] 132 [TestCase("\\udc00")] 133 [TestCase("\\ud800x")] 134 [TestCase("\\udc00x")] 135 [TestCase("\\udc00\\ud800y")] InvalidStringValue(string json)136 public void InvalidStringValue(string json) 137 { 138 AssertThrowsAfter("\"" + json + "\""); 139 } 140 141 // Tests for invalid strings that can't be expressed in attributes, 142 // as the constants can't be expressed as UTF-8 strings. 143 [Test] InvalidSurrogatePairs()144 public void InvalidSurrogatePairs() 145 { 146 AssertThrowsAfter("\"\ud800x\""); 147 AssertThrowsAfter("\"\udc00y\""); 148 AssertThrowsAfter("\"\udc00\ud800y\""); 149 } 150 151 [Test] 152 [TestCase("0", 0)] 153 [TestCase("-0", 0)] // We don't distinguish between positive and negative 0 154 [TestCase("1", 1)] 155 [TestCase("-1", -1)] 156 // From here on, assume leading sign is okay... 157 [TestCase("1.125", 1.125)] 158 [TestCase("1.0", 1)] 159 [TestCase("1e5", 100000)] 160 [TestCase("1e000000", 1)] // Weird, but not prohibited by the spec 161 [TestCase("1E5", 100000)] 162 [TestCase("1e+5", 100000)] 163 [TestCase("1E-5", 0.00001)] 164 [TestCase("123E-2", 1.23)] 165 [TestCase("123.45E3", 123450)] 166 [TestCase(" 1 ", 1)] NumberValue(string json, double expectedValue)167 public void NumberValue(string json, double expectedValue) 168 { 169 AssertTokens(json, JsonToken.Value(expectedValue)); 170 } 171 172 [Test] 173 [TestCase("00")] 174 [TestCase(".5")] 175 [TestCase("1.")] 176 [TestCase("1e")] 177 [TestCase("1e-")] 178 [TestCase("--")] 179 [TestCase("--1")] 180 [TestCase("-1.7977e308")] 181 [TestCase("1.7977e308")] InvalidNumberValue(string json)182 public void InvalidNumberValue(string json) 183 { 184 AssertThrowsAfter(json); 185 } 186 187 [Test] 188 [TestCase("nul")] 189 [TestCase("nothing")] 190 [TestCase("truth")] 191 [TestCase("fALSEhood")] InvalidLiterals(string json)192 public void InvalidLiterals(string json) 193 { 194 AssertThrowsAfter(json); 195 } 196 197 [Test] NullValue()198 public void NullValue() 199 { 200 AssertTokens("null", JsonToken.Null); 201 } 202 203 [Test] TrueValue()204 public void TrueValue() 205 { 206 AssertTokens("true", JsonToken.True); 207 } 208 209 [Test] FalseValue()210 public void FalseValue() 211 { 212 AssertTokens("false", JsonToken.False); 213 } 214 215 [Test] SimpleObject()216 public void SimpleObject() 217 { 218 AssertTokens("{'x': 'y'}", 219 JsonToken.StartObject, JsonToken.Name("x"), JsonToken.Value("y"), JsonToken.EndObject); 220 } 221 222 [Test] 223 [TestCase("[10, 20", 3)] 224 [TestCase("[10,", 2)] 225 [TestCase("[10:20]", 2)] 226 [TestCase("[", 1)] 227 [TestCase("[,", 1)] 228 [TestCase("{", 1)] 229 [TestCase("{,", 1)] 230 [TestCase("{[", 1)] 231 [TestCase("{{", 1)] 232 [TestCase("{0", 1)] 233 [TestCase("{null", 1)] 234 [TestCase("{false", 1)] 235 [TestCase("{true", 1)] 236 [TestCase("}", 0)] 237 [TestCase("]", 0)] 238 [TestCase(",", 0)] 239 [TestCase("'foo' 'bar'", 1)] 240 [TestCase(":", 0)] 241 [TestCase("'foo", 0)] // Incomplete string 242 [TestCase("{ 'foo' }", 2)] 243 [TestCase("{ x:1", 1)] // Property names must be quoted 244 [TestCase("{]", 1)] 245 [TestCase("[}", 1)] 246 [TestCase("[1,", 2)] 247 [TestCase("{'x':0]", 3)] 248 [TestCase("{ 'foo': }", 2)] 249 [TestCase("{ 'foo':'bar', }", 3)] InvalidStructure(string json, int expectedValidTokens)250 public void InvalidStructure(string json, int expectedValidTokens) 251 { 252 // Note: we don't test that the earlier tokens are exactly as expected, 253 // partly because that's hard to parameterize. 254 var reader = new StringReader(json.Replace('\'', '"')); 255 var tokenizer = JsonTokenizer.FromTextReader(reader); 256 for (int i = 0; i < expectedValidTokens; i++) 257 { 258 Assert.IsNotNull(tokenizer.Next()); 259 } 260 Assert.Throws<InvalidJsonException>(() => tokenizer.Next()); 261 } 262 263 [Test] ArrayMixedType()264 public void ArrayMixedType() 265 { 266 AssertTokens("[1, 'foo', null, false, true, [2], {'x':'y' }]", 267 JsonToken.StartArray, 268 JsonToken.Value(1), 269 JsonToken.Value("foo"), 270 JsonToken.Null, 271 JsonToken.False, 272 JsonToken.True, 273 JsonToken.StartArray, 274 JsonToken.Value(2), 275 JsonToken.EndArray, 276 JsonToken.StartObject, 277 JsonToken.Name("x"), 278 JsonToken.Value("y"), 279 JsonToken.EndObject, 280 JsonToken.EndArray); 281 } 282 283 [Test] ObjectMixedType()284 public void ObjectMixedType() 285 { 286 AssertTokens(@"{'a': 1, 'b': 'bar', 'c': null, 'd': false, 'e': true, 287 'f': [2], 'g': {'x':'y' }}", 288 JsonToken.StartObject, 289 JsonToken.Name("a"), 290 JsonToken.Value(1), 291 JsonToken.Name("b"), 292 JsonToken.Value("bar"), 293 JsonToken.Name("c"), 294 JsonToken.Null, 295 JsonToken.Name("d"), 296 JsonToken.False, 297 JsonToken.Name("e"), 298 JsonToken.True, 299 JsonToken.Name("f"), 300 JsonToken.StartArray, 301 JsonToken.Value(2), 302 JsonToken.EndArray, 303 JsonToken.Name("g"), 304 JsonToken.StartObject, 305 JsonToken.Name("x"), 306 JsonToken.Value("y"), 307 JsonToken.EndObject, 308 JsonToken.EndObject); 309 } 310 311 [Test] NextAfterEndDocumentThrows()312 public void NextAfterEndDocumentThrows() 313 { 314 var tokenizer = JsonTokenizer.FromTextReader(new StringReader("null")); 315 Assert.AreEqual(JsonToken.Null, tokenizer.Next()); 316 Assert.AreEqual(JsonToken.EndDocument, tokenizer.Next()); 317 Assert.Throws<InvalidOperationException>(() => tokenizer.Next()); 318 } 319 320 [Test] CanPushBackEndDocument()321 public void CanPushBackEndDocument() 322 { 323 var tokenizer = JsonTokenizer.FromTextReader(new StringReader("null")); 324 Assert.AreEqual(JsonToken.Null, tokenizer.Next()); 325 Assert.AreEqual(JsonToken.EndDocument, tokenizer.Next()); 326 tokenizer.PushBack(JsonToken.EndDocument); 327 Assert.AreEqual(JsonToken.EndDocument, tokenizer.Next()); 328 Assert.Throws<InvalidOperationException>(() => tokenizer.Next()); 329 } 330 331 [Test] 332 [TestCase("{ 'skip': 0, 'next': 1")] 333 [TestCase("{ 'skip': [0, 1, 2], 'next': 1")] 334 [TestCase("{ 'skip': 'x', 'next': 1")] 335 [TestCase("{ 'skip': ['x', 'y'], 'next': 1")] 336 [TestCase("{ 'skip': {'a': 0}, 'next': 1")] 337 [TestCase("{ 'skip': {'a': [0, {'b':[]}]}, 'next': 1")] SkipValue(string json)338 public void SkipValue(string json) 339 { 340 var tokenizer = JsonTokenizer.FromTextReader(new StringReader(json.Replace('\'', '"'))); 341 Assert.AreEqual(JsonToken.StartObject, tokenizer.Next()); 342 Assert.AreEqual("skip", tokenizer.Next().StringValue); 343 tokenizer.SkipValue(); 344 Assert.AreEqual("next", tokenizer.Next().StringValue); 345 } 346 347 /// <summary> 348 /// Asserts that the specified JSON is tokenized into the given sequence of tokens. 349 /// All apostrophes are first converted to double quotes, allowing any tests 350 /// that don't need to check actual apostrophe handling to use apostrophes in the JSON, avoiding 351 /// messy string literal escaping. The "end document" token is not specified in the list of 352 /// expected tokens, but is implicit. 353 /// </summary> AssertTokens(string json, params JsonToken[] expectedTokens)354 private static void AssertTokens(string json, params JsonToken[] expectedTokens) 355 { 356 AssertTokensNoReplacement(json.Replace('\'', '"'), expectedTokens); 357 } 358 359 /// <summary> 360 /// Asserts that the specified JSON is tokenized into the given sequence of tokens. 361 /// Unlike <see cref="AssertTokens(string, JsonToken[])"/>, this does not perform any character 362 /// replacement on the specified JSON, and should be used when the text contains apostrophes which 363 /// are expected to be used *as* apostrophes. The "end document" token is not specified in the list of 364 /// expected tokens, but is implicit. 365 /// </summary> AssertTokensNoReplacement(string json, params JsonToken[] expectedTokens)366 private static void AssertTokensNoReplacement(string json, params JsonToken[] expectedTokens) 367 { 368 var reader = new StringReader(json); 369 var tokenizer = JsonTokenizer.FromTextReader(reader); 370 for (int i = 0; i < expectedTokens.Length; i++) 371 { 372 var actualToken = tokenizer.Next(); 373 if (actualToken == JsonToken.EndDocument) 374 { 375 Assert.Fail("Expected {0} but reached end of token stream", expectedTokens[i]); 376 } 377 Assert.AreEqual(expectedTokens[i], actualToken); 378 } 379 var finalToken = tokenizer.Next(); 380 if (finalToken != JsonToken.EndDocument) 381 { 382 Assert.Fail("Expected token stream to be exhausted; received {0}", finalToken); 383 } 384 } 385 AssertThrowsAfter(string json, params JsonToken[] expectedTokens)386 private static void AssertThrowsAfter(string json, params JsonToken[] expectedTokens) 387 { 388 var reader = new StringReader(json); 389 var tokenizer = JsonTokenizer.FromTextReader(reader); 390 for (int i = 0; i < expectedTokens.Length; i++) 391 { 392 var actualToken = tokenizer.Next(); 393 if (actualToken == JsonToken.EndDocument) 394 { 395 Assert.Fail("Expected {0} but reached end of document", expectedTokens[i]); 396 } 397 Assert.AreEqual(expectedTokens[i], actualToken); 398 } 399 Assert.Throws<InvalidJsonException>(() => tokenizer.Next()); 400 } 401 } 402 } 403