1 #region Copyright notice and license 2 // Protocol Buffers - Google's data interchange format 3 // Copyright 2008 Google Inc. All rights reserved. 4 // https://developers.google.com/protocol-buffers/ 5 // 6 // Redistribution and use in source and binary forms, with or without 7 // modification, are permitted provided that the following conditions are 8 // met: 9 // 10 // * Redistributions of source code must retain the above copyright 11 // notice, this list of conditions and the following disclaimer. 12 // * Redistributions in binary form must reproduce the above 13 // copyright notice, this list of conditions and the following disclaimer 14 // in the documentation and/or other materials provided with the 15 // distribution. 16 // * Neither the name of Google Inc. nor the names of its 17 // contributors may be used to endorse or promote products derived from 18 // this software without specific prior written permission. 19 // 20 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 21 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 22 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 23 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 24 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 25 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 26 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 27 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 28 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 29 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 30 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 31 #endregion 32 using NUnit.Framework; 33 using System; 34 using System.IO; 35 36 namespace Google.Protobuf 37 { 38 public class JsonTokenizerTest 39 { 40 [Test] EmptyObjectValue()41 public void EmptyObjectValue() 42 { 43 AssertTokens("{}", JsonToken.StartObject, JsonToken.EndObject); 44 } 45 46 [Test] EmptyArrayValue()47 public void EmptyArrayValue() 48 { 49 AssertTokens("[]", JsonToken.StartArray, JsonToken.EndArray); 50 } 51 52 [Test] 53 [TestCase("foo", "foo")] 54 [TestCase("tab\\t", "tab\t")] 55 [TestCase("line\\nfeed", "line\nfeed")] 56 [TestCase("carriage\\rreturn", "carriage\rreturn")] 57 [TestCase("back\\bspace", "back\bspace")] 58 [TestCase("form\\ffeed", "form\ffeed")] 59 [TestCase("escaped\\/slash", "escaped/slash")] 60 [TestCase("escaped\\\\backslash", "escaped\\backslash")] 61 [TestCase("escaped\\\"quote", "escaped\"quote")] 62 [TestCase("foo {}[] bar", "foo {}[] bar")] 63 [TestCase("foo\\u09aFbar", "foo\u09afbar")] // Digits, upper hex, lower hex 64 [TestCase("ab\ud800\udc00cd", "ab\ud800\udc00cd")] 65 [TestCase("ab\\ud800\\udc00cd", "ab\ud800\udc00cd")] StringValue(string json, string expectedValue)66 public void StringValue(string json, string expectedValue) 67 { 68 AssertTokensNoReplacement("\"" + json + "\"", JsonToken.Value(expectedValue)); 69 } 70 71 // Valid surrogate pairs, with mixed escaping. These test cases can't be expressed 72 // using TestCase as they have no valid UTF-8 representation. 73 // It's unclear exactly how we should handle a mixture of escaped or not: that can't 74 // come from UTF-8 text, but could come from a .NET string. For the moment, 75 // treat it as valid in the obvious way. 76 [Test] MixedSurrogatePairs()77 public void MixedSurrogatePairs() 78 { 79 string expected = "\ud800\udc00"; 80 AssertTokens("'\\ud800\udc00'", JsonToken.Value(expected)); 81 AssertTokens("'\ud800\\udc00'", JsonToken.Value(expected)); 82 } 83 84 [Test] ObjectDepth()85 public void ObjectDepth() 86 { 87 string json = "{ \"foo\": { \"x\": 1, \"y\": [ 0 ] } }"; 88 var tokenizer = JsonTokenizer.FromTextReader(new StringReader(json)); 89 // If we had more tests like this, I'd introduce a helper method... but for one test, it's not worth it. 90 Assert.AreEqual(0, tokenizer.ObjectDepth); 91 Assert.AreEqual(JsonToken.StartObject, tokenizer.Next()); 92 Assert.AreEqual(1, tokenizer.ObjectDepth); 93 Assert.AreEqual(JsonToken.Name("foo"), tokenizer.Next()); 94 Assert.AreEqual(1, tokenizer.ObjectDepth); 95 Assert.AreEqual(JsonToken.StartObject, tokenizer.Next()); 96 Assert.AreEqual(2, tokenizer.ObjectDepth); 97 Assert.AreEqual(JsonToken.Name("x"), tokenizer.Next()); 98 Assert.AreEqual(2, tokenizer.ObjectDepth); 99 Assert.AreEqual(JsonToken.Value(1), tokenizer.Next()); 100 Assert.AreEqual(2, tokenizer.ObjectDepth); 101 Assert.AreEqual(JsonToken.Name("y"), tokenizer.Next()); 102 Assert.AreEqual(2, tokenizer.ObjectDepth); 103 Assert.AreEqual(JsonToken.StartArray, tokenizer.Next()); 104 Assert.AreEqual(2, tokenizer.ObjectDepth); // Depth hasn't changed in array 105 Assert.AreEqual(JsonToken.Value(0), tokenizer.Next()); 106 Assert.AreEqual(2, tokenizer.ObjectDepth); 107 Assert.AreEqual(JsonToken.EndArray, tokenizer.Next()); 108 Assert.AreEqual(2, tokenizer.ObjectDepth); 109 Assert.AreEqual(JsonToken.EndObject, tokenizer.Next()); 110 Assert.AreEqual(1, tokenizer.ObjectDepth); 111 Assert.AreEqual(JsonToken.EndObject, tokenizer.Next()); 112 Assert.AreEqual(0, tokenizer.ObjectDepth); 113 Assert.AreEqual(JsonToken.EndDocument, tokenizer.Next()); 114 Assert.AreEqual(0, tokenizer.ObjectDepth); 115 } 116 117 [Test] ObjectDepth_WithPushBack()118 public void ObjectDepth_WithPushBack() 119 { 120 string json = "{}"; 121 var tokenizer = JsonTokenizer.FromTextReader(new StringReader(json)); 122 Assert.AreEqual(0, tokenizer.ObjectDepth); 123 var token = tokenizer.Next(); 124 Assert.AreEqual(1, tokenizer.ObjectDepth); 125 // When we push back a "start object", we should effectively be back to the previous depth. 126 tokenizer.PushBack(token); 127 Assert.AreEqual(0, tokenizer.ObjectDepth); 128 // Read the same token again, and get back to depth 1 129 token = tokenizer.Next(); 130 Assert.AreEqual(1, tokenizer.ObjectDepth); 131 132 // Now the same in reverse, with EndObject 133 token = tokenizer.Next(); 134 Assert.AreEqual(0, tokenizer.ObjectDepth); 135 tokenizer.PushBack(token); 136 Assert.AreEqual(1, tokenizer.ObjectDepth); 137 tokenizer.Next(); 138 Assert.AreEqual(0, tokenizer.ObjectDepth); 139 } 140 141 [Test] 142 [TestCase("embedded tab\t")] 143 [TestCase("embedded CR\r")] 144 [TestCase("embedded LF\n")] 145 [TestCase("embedded bell\u0007")] 146 [TestCase("bad escape\\a")] 147 [TestCase("incomplete escape\\")] 148 [TestCase("incomplete Unicode escape\\u000")] 149 [TestCase("invalid Unicode escape\\u000H")] 150 // Surrogate pair handling, both in raw .NET strings and escaped. We only need 151 // to detect this in strings, as non-ASCII characters anywhere other than in strings 152 // will already lead to parsing errors. 153 [TestCase("\\ud800")] 154 [TestCase("\\udc00")] 155 [TestCase("\\ud800x")] 156 [TestCase("\\udc00x")] 157 [TestCase("\\udc00\\ud800y")] InvalidStringValue(string json)158 public void InvalidStringValue(string json) 159 { 160 AssertThrowsAfter("\"" + json + "\""); 161 } 162 163 // Tests for invalid strings that can't be expressed in attributes, 164 // as the constants can't be expressed as UTF-8 strings. 165 [Test] InvalidSurrogatePairs()166 public void InvalidSurrogatePairs() 167 { 168 AssertThrowsAfter("\"\ud800x\""); 169 AssertThrowsAfter("\"\udc00y\""); 170 AssertThrowsAfter("\"\udc00\ud800y\""); 171 } 172 173 [Test] 174 [TestCase("0", 0)] 175 [TestCase("-0", 0)] // We don't distinguish between positive and negative 0 176 [TestCase("1", 1)] 177 [TestCase("-1", -1)] 178 // From here on, assume leading sign is okay... 179 [TestCase("1.125", 1.125)] 180 [TestCase("1.0", 1)] 181 [TestCase("1e5", 100000)] 182 [TestCase("1e000000", 1)] // Weird, but not prohibited by the spec 183 [TestCase("1E5", 100000)] 184 [TestCase("1e+5", 100000)] 185 [TestCase("1E-5", 0.00001)] 186 [TestCase("123E-2", 1.23)] 187 [TestCase("123.45E3", 123450)] 188 [TestCase(" 1 ", 1)] NumberValue(string json, double expectedValue)189 public void NumberValue(string json, double expectedValue) 190 { 191 AssertTokens(json, JsonToken.Value(expectedValue)); 192 } 193 194 [Test] 195 [TestCase("00")] 196 [TestCase(".5")] 197 [TestCase("1.")] 198 [TestCase("1e")] 199 [TestCase("1e-")] 200 [TestCase("--")] 201 [TestCase("--1")] 202 [TestCase("-1.7977e308")] 203 [TestCase("1.7977e308")] InvalidNumberValue(string json)204 public void InvalidNumberValue(string json) 205 { 206 AssertThrowsAfter(json); 207 } 208 209 [Test] 210 [TestCase("nul")] 211 [TestCase("nothing")] 212 [TestCase("truth")] 213 [TestCase("fALSEhood")] InvalidLiterals(string json)214 public void InvalidLiterals(string json) 215 { 216 AssertThrowsAfter(json); 217 } 218 219 [Test] NullValue()220 public void NullValue() 221 { 222 AssertTokens("null", JsonToken.Null); 223 } 224 225 [Test] TrueValue()226 public void TrueValue() 227 { 228 AssertTokens("true", JsonToken.True); 229 } 230 231 [Test] FalseValue()232 public void FalseValue() 233 { 234 AssertTokens("false", JsonToken.False); 235 } 236 237 [Test] SimpleObject()238 public void SimpleObject() 239 { 240 AssertTokens("{'x': 'y'}", 241 JsonToken.StartObject, JsonToken.Name("x"), JsonToken.Value("y"), JsonToken.EndObject); 242 } 243 244 [Test] 245 [TestCase("[10, 20", 3)] 246 [TestCase("[10,", 2)] 247 [TestCase("[10:20]", 2)] 248 [TestCase("[", 1)] 249 [TestCase("[,", 1)] 250 [TestCase("{", 1)] 251 [TestCase("{,", 1)] 252 [TestCase("{[", 1)] 253 [TestCase("{{", 1)] 254 [TestCase("{0", 1)] 255 [TestCase("{null", 1)] 256 [TestCase("{false", 1)] 257 [TestCase("{true", 1)] 258 [TestCase("}", 0)] 259 [TestCase("]", 0)] 260 [TestCase(",", 0)] 261 [TestCase("'foo' 'bar'", 1)] 262 [TestCase(":", 0)] 263 [TestCase("'foo", 0)] // Incomplete string 264 [TestCase("{ 'foo' }", 2)] 265 [TestCase("{ x:1", 1)] // Property names must be quoted 266 [TestCase("{]", 1)] 267 [TestCase("[}", 1)] 268 [TestCase("[1,", 2)] 269 [TestCase("{'x':0]", 3)] 270 [TestCase("{ 'foo': }", 2)] 271 [TestCase("{ 'foo':'bar', }", 3)] InvalidStructure(string json, int expectedValidTokens)272 public void InvalidStructure(string json, int expectedValidTokens) 273 { 274 // Note: we don't test that the earlier tokens are exactly as expected, 275 // partly because that's hard to parameterize. 276 var reader = new StringReader(json.Replace('\'', '"')); 277 var tokenizer = JsonTokenizer.FromTextReader(reader); 278 for (int i = 0; i < expectedValidTokens; i++) 279 { 280 Assert.IsNotNull(tokenizer.Next()); 281 } 282 Assert.Throws<InvalidJsonException>(() => tokenizer.Next()); 283 } 284 285 [Test] ArrayMixedType()286 public void ArrayMixedType() 287 { 288 AssertTokens("[1, 'foo', null, false, true, [2], {'x':'y' }]", 289 JsonToken.StartArray, 290 JsonToken.Value(1), 291 JsonToken.Value("foo"), 292 JsonToken.Null, 293 JsonToken.False, 294 JsonToken.True, 295 JsonToken.StartArray, 296 JsonToken.Value(2), 297 JsonToken.EndArray, 298 JsonToken.StartObject, 299 JsonToken.Name("x"), 300 JsonToken.Value("y"), 301 JsonToken.EndObject, 302 JsonToken.EndArray); 303 } 304 305 [Test] ObjectMixedType()306 public void ObjectMixedType() 307 { 308 AssertTokens(@"{'a': 1, 'b': 'bar', 'c': null, 'd': false, 'e': true, 309 'f': [2], 'g': {'x':'y' }}", 310 JsonToken.StartObject, 311 JsonToken.Name("a"), 312 JsonToken.Value(1), 313 JsonToken.Name("b"), 314 JsonToken.Value("bar"), 315 JsonToken.Name("c"), 316 JsonToken.Null, 317 JsonToken.Name("d"), 318 JsonToken.False, 319 JsonToken.Name("e"), 320 JsonToken.True, 321 JsonToken.Name("f"), 322 JsonToken.StartArray, 323 JsonToken.Value(2), 324 JsonToken.EndArray, 325 JsonToken.Name("g"), 326 JsonToken.StartObject, 327 JsonToken.Name("x"), 328 JsonToken.Value("y"), 329 JsonToken.EndObject, 330 JsonToken.EndObject); 331 } 332 333 [Test] NextAfterEndDocumentThrows()334 public void NextAfterEndDocumentThrows() 335 { 336 var tokenizer = JsonTokenizer.FromTextReader(new StringReader("null")); 337 Assert.AreEqual(JsonToken.Null, tokenizer.Next()); 338 Assert.AreEqual(JsonToken.EndDocument, tokenizer.Next()); 339 Assert.Throws<InvalidOperationException>(() => tokenizer.Next()); 340 } 341 342 [Test] CanPushBackEndDocument()343 public void CanPushBackEndDocument() 344 { 345 var tokenizer = JsonTokenizer.FromTextReader(new StringReader("null")); 346 Assert.AreEqual(JsonToken.Null, tokenizer.Next()); 347 Assert.AreEqual(JsonToken.EndDocument, tokenizer.Next()); 348 tokenizer.PushBack(JsonToken.EndDocument); 349 Assert.AreEqual(JsonToken.EndDocument, tokenizer.Next()); 350 Assert.Throws<InvalidOperationException>(() => tokenizer.Next()); 351 } 352 353 /// <summary> 354 /// Asserts that the specified JSON is tokenized into the given sequence of tokens. 355 /// All apostrophes are first converted to double quotes, allowing any tests 356 /// that don't need to check actual apostrophe handling to use apostrophes in the JSON, avoiding 357 /// messy string literal escaping. The "end document" token is not specified in the list of 358 /// expected tokens, but is implicit. 359 /// </summary> AssertTokens(string json, params JsonToken[] expectedTokens)360 private static void AssertTokens(string json, params JsonToken[] expectedTokens) 361 { 362 AssertTokensNoReplacement(json.Replace('\'', '"'), expectedTokens); 363 } 364 365 /// <summary> 366 /// Asserts that the specified JSON is tokenized into the given sequence of tokens. 367 /// Unlike <see cref="AssertTokens(string, JsonToken[])"/>, this does not perform any character 368 /// replacement on the specified JSON, and should be used when the text contains apostrophes which 369 /// are expected to be used *as* apostrophes. The "end document" token is not specified in the list of 370 /// expected tokens, but is implicit. 371 /// </summary> AssertTokensNoReplacement(string json, params JsonToken[] expectedTokens)372 private static void AssertTokensNoReplacement(string json, params JsonToken[] expectedTokens) 373 { 374 var reader = new StringReader(json); 375 var tokenizer = JsonTokenizer.FromTextReader(reader); 376 for (int i = 0; i < expectedTokens.Length; i++) 377 { 378 var actualToken = tokenizer.Next(); 379 if (actualToken == JsonToken.EndDocument) 380 { 381 Assert.Fail("Expected {0} but reached end of token stream", expectedTokens[i]); 382 } 383 Assert.AreEqual(expectedTokens[i], actualToken); 384 } 385 var finalToken = tokenizer.Next(); 386 if (finalToken != JsonToken.EndDocument) 387 { 388 Assert.Fail("Expected token stream to be exhausted; received {0}", finalToken); 389 } 390 } 391 AssertThrowsAfter(string json, params JsonToken[] expectedTokens)392 private static void AssertThrowsAfter(string json, params JsonToken[] expectedTokens) 393 { 394 var reader = new StringReader(json); 395 var tokenizer = JsonTokenizer.FromTextReader(reader); 396 for (int i = 0; i < expectedTokens.Length; i++) 397 { 398 var actualToken = tokenizer.Next(); 399 if (actualToken == JsonToken.EndDocument) 400 { 401 Assert.Fail("Expected {0} but reached end of document", expectedTokens[i]); 402 } 403 Assert.AreEqual(expectedTokens[i], actualToken); 404 } 405 Assert.Throws<InvalidJsonException>(() => tokenizer.Next()); 406 } 407 } 408 } 409