1 #region Copyright notice and license 2 // Protocol Buffers - Google's data interchange format 3 // Copyright 2015 Google Inc. All rights reserved. 4 // https://developers.google.com/protocol-buffers/ 5 // 6 // Redistribution and use in source and binary forms, with or without 7 // modification, are permitted provided that the following conditions are 8 // met: 9 // 10 // * Redistributions of source code must retain the above copyright 11 // notice, this list of conditions and the following disclaimer. 12 // * Redistributions in binary form must reproduce the above 13 // copyright notice, this list of conditions and the following disclaimer 14 // in the documentation and/or other materials provided with the 15 // distribution. 16 // * Neither the name of Google Inc. nor the names of its 17 // contributors may be used to endorse or promote products derived from 18 // this software without specific prior written permission. 19 // 20 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 21 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 22 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 23 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 24 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 25 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 26 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 27 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 28 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 29 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 30 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 31 #endregion 32 33 using Google.Protobuf.Reflection; 34 using Google.Protobuf.WellKnownTypes; 35 using System; 36 using System.Collections; 37 using System.Collections.Generic; 38 using System.Globalization; 39 using System.IO; 40 using System.Text; 41 using System.Text.RegularExpressions; 42 43 namespace Google.Protobuf 44 { 45 /// <summary> 46 /// Reflection-based converter from JSON to messages. 47 /// </summary> 48 /// <remarks> 49 /// <para> 50 /// Instances of this class are thread-safe, with no mutable state. 51 /// </para> 52 /// <para> 53 /// This is a simple start to get JSON parsing working. As it's reflection-based, 54 /// it's not as quick as baking calls into generated messages - but is a simpler implementation. 55 /// (This code is generally not heavily optimized.) 56 /// </para> 57 /// </remarks> 58 public sealed class JsonParser 59 { 60 // Note: using 0-9 instead of \d to ensure no non-ASCII digits. 61 // This regex isn't a complete validator, but will remove *most* invalid input. We rely on parsing to do the rest. 62 private static readonly Regex TimestampRegex = new Regex(@"^(?<datetime>[0-9]{4}-[01][0-9]-[0-3][0-9]T[012][0-9]:[0-5][0-9]:[0-5][0-9])(?<subseconds>\.[0-9]{1,9})?(?<offset>(Z|[+-][0-1][0-9]:[0-5][0-9]))$", FrameworkPortability.CompiledRegexWhereAvailable); 63 private static readonly Regex DurationRegex = new Regex(@"^(?<sign>-)?(?<int>[0-9]{1,12})(?<subseconds>\.[0-9]{1,9})?s$", FrameworkPortability.CompiledRegexWhereAvailable); 64 private static readonly int[] SubsecondScalingFactors = { 0, 100000000, 100000000, 10000000, 1000000, 100000, 10000, 1000, 100, 10, 1 }; 65 private static readonly char[] FieldMaskPathSeparators = new[] { ',' }; 66 67 private static readonly JsonParser defaultInstance = new JsonParser(Settings.Default); 68 69 // TODO: Consider introducing a class containing parse state of the parser, tokenizer and depth. That would simplify these handlers 70 // and the signatures of various methods. 71 private static readonly Dictionary<string, Action<JsonParser, IMessage, JsonTokenizer>> 72 WellKnownTypeHandlers = new Dictionary<string, Action<JsonParser, IMessage, JsonTokenizer>> 73 { 74 { Timestamp.Descriptor.FullName, (parser, message, tokenizer) => MergeTimestamp(message, tokenizer.Next()) }, 75 { Duration.Descriptor.FullName, (parser, message, tokenizer) => MergeDuration(message, tokenizer.Next()) }, 76 { Value.Descriptor.FullName, (parser, message, tokenizer) => parser.MergeStructValue(message, tokenizer) }, 77 { ListValue.Descriptor.FullName, (parser, message, tokenizer) => 78 parser.MergeRepeatedField(message, message.Descriptor.Fields[ListValue.ValuesFieldNumber], tokenizer) }, 79 { Struct.Descriptor.FullName, (parser, message, tokenizer) => parser.MergeStruct(message, tokenizer) }, 80 { Any.Descriptor.FullName, (parser, message, tokenizer) => parser.MergeAny(message, tokenizer) }, 81 { FieldMask.Descriptor.FullName, (parser, message, tokenizer) => MergeFieldMask(message, tokenizer.Next()) }, 82 { Int32Value.Descriptor.FullName, MergeWrapperField }, 83 { Int64Value.Descriptor.FullName, MergeWrapperField }, 84 { UInt32Value.Descriptor.FullName, MergeWrapperField }, 85 { UInt64Value.Descriptor.FullName, MergeWrapperField }, 86 { FloatValue.Descriptor.FullName, MergeWrapperField }, 87 { DoubleValue.Descriptor.FullName, MergeWrapperField }, 88 { BytesValue.Descriptor.FullName, MergeWrapperField }, 89 { StringValue.Descriptor.FullName, MergeWrapperField }, 90 { BoolValue.Descriptor.FullName, MergeWrapperField } 91 }; 92 93 // Convenience method to avoid having to repeat the same code multiple times in the above 94 // dictionary initialization. MergeWrapperField(JsonParser parser, IMessage message, JsonTokenizer tokenizer)95 private static void MergeWrapperField(JsonParser parser, IMessage message, JsonTokenizer tokenizer) 96 { 97 parser.MergeField(message, message.Descriptor.Fields[WrappersReflection.WrapperValueFieldNumber], tokenizer); 98 } 99 100 /// <summary> 101 /// Returns a formatter using the default settings. 102 /// </summary> 103 public static JsonParser Default { get { return defaultInstance; } } 104 105 private readonly Settings settings; 106 107 /// <summary> 108 /// Creates a new formatted with the given settings. 109 /// </summary> 110 /// <param name="settings">The settings.</param> JsonParser(Settings settings)111 public JsonParser(Settings settings) 112 { 113 this.settings = settings; 114 } 115 116 /// <summary> 117 /// Parses <paramref name="json"/> and merges the information into the given message. 118 /// </summary> 119 /// <param name="message">The message to merge the JSON information into.</param> 120 /// <param name="json">The JSON to parse.</param> Merge(IMessage message, string json)121 internal void Merge(IMessage message, string json) 122 { 123 Merge(message, new StringReader(json)); 124 } 125 126 /// <summary> 127 /// Parses JSON read from <paramref name="jsonReader"/> and merges the information into the given message. 128 /// </summary> 129 /// <param name="message">The message to merge the JSON information into.</param> 130 /// <param name="jsonReader">Reader providing the JSON to parse.</param> Merge(IMessage message, TextReader jsonReader)131 internal void Merge(IMessage message, TextReader jsonReader) 132 { 133 var tokenizer = JsonTokenizer.FromTextReader(jsonReader); 134 Merge(message, tokenizer); 135 var lastToken = tokenizer.Next(); 136 if (lastToken != JsonToken.EndDocument) 137 { 138 throw new InvalidProtocolBufferException("Expected end of JSON after object"); 139 } 140 } 141 142 /// <summary> 143 /// Merges the given message using data from the given tokenizer. In most cases, the next 144 /// token should be a "start object" token, but wrapper types and nullity can invalidate 145 /// that assumption. This is implemented as an LL(1) recursive descent parser over the stream 146 /// of tokens provided by the tokenizer. This token stream is assumed to be valid JSON, with the 147 /// tokenizer performing that validation - but not every token stream is valid "protobuf JSON". 148 /// </summary> Merge(IMessage message, JsonTokenizer tokenizer)149 private void Merge(IMessage message, JsonTokenizer tokenizer) 150 { 151 if (tokenizer.ObjectDepth > settings.RecursionLimit) 152 { 153 throw InvalidProtocolBufferException.JsonRecursionLimitExceeded(); 154 } 155 if (message.Descriptor.IsWellKnownType) 156 { 157 Action<JsonParser, IMessage, JsonTokenizer> handler; 158 if (WellKnownTypeHandlers.TryGetValue(message.Descriptor.FullName, out handler)) 159 { 160 handler(this, message, tokenizer); 161 return; 162 } 163 // Well-known types with no special handling continue in the normal way. 164 } 165 var token = tokenizer.Next(); 166 if (token.Type != JsonToken.TokenType.StartObject) 167 { 168 throw new InvalidProtocolBufferException("Expected an object"); 169 } 170 var descriptor = message.Descriptor; 171 var jsonFieldMap = descriptor.Fields.ByJsonName(); 172 // All the oneof fields we've already accounted for - we can only see each of them once. 173 // The set is created lazily to avoid the overhead of creating a set for every message 174 // we parsed, when oneofs are relatively rare. 175 HashSet<OneofDescriptor> seenOneofs = null; 176 while (true) 177 { 178 token = tokenizer.Next(); 179 if (token.Type == JsonToken.TokenType.EndObject) 180 { 181 return; 182 } 183 if (token.Type != JsonToken.TokenType.Name) 184 { 185 throw new InvalidOperationException("Unexpected token type " + token.Type); 186 } 187 string name = token.StringValue; 188 FieldDescriptor field; 189 if (jsonFieldMap.TryGetValue(name, out field)) 190 { 191 if (field.ContainingOneof != null) 192 { 193 if (seenOneofs == null) 194 { 195 seenOneofs = new HashSet<OneofDescriptor>(); 196 } 197 if (!seenOneofs.Add(field.ContainingOneof)) 198 { 199 throw new InvalidProtocolBufferException($"Multiple values specified for oneof {field.ContainingOneof.Name}"); 200 } 201 } 202 MergeField(message, field, tokenizer); 203 } 204 else 205 { 206 if (settings.IgnoreUnknownFields) 207 { 208 tokenizer.SkipValue(); 209 } 210 else 211 { 212 throw new InvalidProtocolBufferException("Unknown field: " + name); 213 } 214 } 215 } 216 } 217 MergeField(IMessage message, FieldDescriptor field, JsonTokenizer tokenizer)218 private void MergeField(IMessage message, FieldDescriptor field, JsonTokenizer tokenizer) 219 { 220 var token = tokenizer.Next(); 221 if (token.Type == JsonToken.TokenType.Null) 222 { 223 // Clear the field if we see a null token, unless it's for a singular field of type 224 // google.protobuf.Value. 225 // Note: different from Java API, which just ignores it. 226 // TODO: Bring it more in line? Discuss... 227 if (field.IsMap || field.IsRepeated || !IsGoogleProtobufValueField(field)) 228 { 229 field.Accessor.Clear(message); 230 return; 231 } 232 } 233 tokenizer.PushBack(token); 234 235 if (field.IsMap) 236 { 237 MergeMapField(message, field, tokenizer); 238 } 239 else if (field.IsRepeated) 240 { 241 MergeRepeatedField(message, field, tokenizer); 242 } 243 else 244 { 245 var value = ParseSingleValue(field, tokenizer); 246 field.Accessor.SetValue(message, value); 247 } 248 } 249 MergeRepeatedField(IMessage message, FieldDescriptor field, JsonTokenizer tokenizer)250 private void MergeRepeatedField(IMessage message, FieldDescriptor field, JsonTokenizer tokenizer) 251 { 252 var token = tokenizer.Next(); 253 if (token.Type != JsonToken.TokenType.StartArray) 254 { 255 throw new InvalidProtocolBufferException("Repeated field value was not an array. Token type: " + token.Type); 256 } 257 258 IList list = (IList) field.Accessor.GetValue(message); 259 while (true) 260 { 261 token = tokenizer.Next(); 262 if (token.Type == JsonToken.TokenType.EndArray) 263 { 264 return; 265 } 266 tokenizer.PushBack(token); 267 object value = ParseSingleValue(field, tokenizer); 268 if (value == null) 269 { 270 throw new InvalidProtocolBufferException("Repeated field elements cannot be null"); 271 } 272 list.Add(value); 273 } 274 } 275 MergeMapField(IMessage message, FieldDescriptor field, JsonTokenizer tokenizer)276 private void MergeMapField(IMessage message, FieldDescriptor field, JsonTokenizer tokenizer) 277 { 278 // Map fields are always objects, even if the values are well-known types: ParseSingleValue handles those. 279 var token = tokenizer.Next(); 280 if (token.Type != JsonToken.TokenType.StartObject) 281 { 282 throw new InvalidProtocolBufferException("Expected an object to populate a map"); 283 } 284 285 var type = field.MessageType; 286 var keyField = type.FindFieldByNumber(1); 287 var valueField = type.FindFieldByNumber(2); 288 if (keyField == null || valueField == null) 289 { 290 throw new InvalidProtocolBufferException("Invalid map field: " + field.FullName); 291 } 292 IDictionary dictionary = (IDictionary) field.Accessor.GetValue(message); 293 294 while (true) 295 { 296 token = tokenizer.Next(); 297 if (token.Type == JsonToken.TokenType.EndObject) 298 { 299 return; 300 } 301 object key = ParseMapKey(keyField, token.StringValue); 302 object value = ParseSingleValue(valueField, tokenizer); 303 if (value == null) 304 { 305 throw new InvalidProtocolBufferException("Map values must not be null"); 306 } 307 dictionary[key] = value; 308 } 309 } 310 IsGoogleProtobufValueField(FieldDescriptor field)311 private static bool IsGoogleProtobufValueField(FieldDescriptor field) 312 { 313 return field.FieldType == FieldType.Message && 314 field.MessageType.FullName == Value.Descriptor.FullName; 315 } 316 ParseSingleValue(FieldDescriptor field, JsonTokenizer tokenizer)317 private object ParseSingleValue(FieldDescriptor field, JsonTokenizer tokenizer) 318 { 319 var token = tokenizer.Next(); 320 if (token.Type == JsonToken.TokenType.Null) 321 { 322 // TODO: In order to support dynamic messages, we should really build this up 323 // dynamically. 324 if (IsGoogleProtobufValueField(field)) 325 { 326 return Value.ForNull(); 327 } 328 return null; 329 } 330 331 var fieldType = field.FieldType; 332 if (fieldType == FieldType.Message) 333 { 334 // Parse wrapper types as their constituent types. 335 // TODO: What does this mean for null? 336 if (field.MessageType.IsWrapperType) 337 { 338 field = field.MessageType.Fields[WrappersReflection.WrapperValueFieldNumber]; 339 fieldType = field.FieldType; 340 } 341 else 342 { 343 // TODO: Merge the current value in message? (Public API currently doesn't make this relevant as we don't expose merging.) 344 tokenizer.PushBack(token); 345 IMessage subMessage = NewMessageForField(field); 346 Merge(subMessage, tokenizer); 347 return subMessage; 348 } 349 } 350 351 switch (token.Type) 352 { 353 case JsonToken.TokenType.True: 354 case JsonToken.TokenType.False: 355 if (fieldType == FieldType.Bool) 356 { 357 return token.Type == JsonToken.TokenType.True; 358 } 359 // Fall through to "we don't support this type for this case"; could duplicate the behaviour of the default 360 // case instead, but this way we'd only need to change one place. 361 goto default; 362 case JsonToken.TokenType.StringValue: 363 return ParseSingleStringValue(field, token.StringValue); 364 // Note: not passing the number value itself here, as we may end up storing the string value in the token too. 365 case JsonToken.TokenType.Number: 366 return ParseSingleNumberValue(field, token); 367 case JsonToken.TokenType.Null: 368 throw new NotImplementedException("Haven't worked out what to do for null yet"); 369 default: 370 throw new InvalidProtocolBufferException("Unsupported JSON token type " + token.Type + " for field type " + fieldType); 371 } 372 } 373 374 /// <summary> 375 /// Parses <paramref name="json"/> into a new message. 376 /// </summary> 377 /// <typeparam name="T">The type of message to create.</typeparam> 378 /// <param name="json">The JSON to parse.</param> 379 /// <exception cref="InvalidJsonException">The JSON does not comply with RFC 7159</exception> 380 /// <exception cref="InvalidProtocolBufferException">The JSON does not represent a Protocol Buffers message correctly</exception> 381 public T Parse<T>(string json) where T : IMessage, new() 382 { 383 ProtoPreconditions.CheckNotNull(json, nameof(json)); 384 return Parse<T>(new StringReader(json)); 385 } 386 387 /// <summary> 388 /// Parses JSON read from <paramref name="jsonReader"/> into a new message. 389 /// </summary> 390 /// <typeparam name="T">The type of message to create.</typeparam> 391 /// <param name="jsonReader">Reader providing the JSON to parse.</param> 392 /// <exception cref="InvalidJsonException">The JSON does not comply with RFC 7159</exception> 393 /// <exception cref="InvalidProtocolBufferException">The JSON does not represent a Protocol Buffers message correctly</exception> 394 public T Parse<T>(TextReader jsonReader) where T : IMessage, new() 395 { 396 ProtoPreconditions.CheckNotNull(jsonReader, nameof(jsonReader)); 397 T message = new T(); 398 Merge(message, jsonReader); 399 return message; 400 } 401 402 /// <summary> 403 /// Parses <paramref name="json"/> into a new message. 404 /// </summary> 405 /// <param name="json">The JSON to parse.</param> 406 /// <param name="descriptor">Descriptor of message type to parse.</param> 407 /// <exception cref="InvalidJsonException">The JSON does not comply with RFC 7159</exception> 408 /// <exception cref="InvalidProtocolBufferException">The JSON does not represent a Protocol Buffers message correctly</exception> Parse(string json, MessageDescriptor descriptor)409 public IMessage Parse(string json, MessageDescriptor descriptor) 410 { 411 ProtoPreconditions.CheckNotNull(json, nameof(json)); 412 ProtoPreconditions.CheckNotNull(descriptor, nameof(descriptor)); 413 return Parse(new StringReader(json), descriptor); 414 } 415 416 /// <summary> 417 /// Parses JSON read from <paramref name="jsonReader"/> into a new message. 418 /// </summary> 419 /// <param name="jsonReader">Reader providing the JSON to parse.</param> 420 /// <param name="descriptor">Descriptor of message type to parse.</param> 421 /// <exception cref="InvalidJsonException">The JSON does not comply with RFC 7159</exception> 422 /// <exception cref="InvalidProtocolBufferException">The JSON does not represent a Protocol Buffers message correctly</exception> Parse(TextReader jsonReader, MessageDescriptor descriptor)423 public IMessage Parse(TextReader jsonReader, MessageDescriptor descriptor) 424 { 425 ProtoPreconditions.CheckNotNull(jsonReader, nameof(jsonReader)); 426 ProtoPreconditions.CheckNotNull(descriptor, nameof(descriptor)); 427 IMessage message = descriptor.Parser.CreateTemplate(); 428 Merge(message, jsonReader); 429 return message; 430 } 431 MergeStructValue(IMessage message, JsonTokenizer tokenizer)432 private void MergeStructValue(IMessage message, JsonTokenizer tokenizer) 433 { 434 var firstToken = tokenizer.Next(); 435 var fields = message.Descriptor.Fields; 436 switch (firstToken.Type) 437 { 438 case JsonToken.TokenType.Null: 439 fields[Value.NullValueFieldNumber].Accessor.SetValue(message, 0); 440 return; 441 case JsonToken.TokenType.StringValue: 442 fields[Value.StringValueFieldNumber].Accessor.SetValue(message, firstToken.StringValue); 443 return; 444 case JsonToken.TokenType.Number: 445 fields[Value.NumberValueFieldNumber].Accessor.SetValue(message, firstToken.NumberValue); 446 return; 447 case JsonToken.TokenType.False: 448 case JsonToken.TokenType.True: 449 fields[Value.BoolValueFieldNumber].Accessor.SetValue(message, firstToken.Type == JsonToken.TokenType.True); 450 return; 451 case JsonToken.TokenType.StartObject: 452 { 453 var field = fields[Value.StructValueFieldNumber]; 454 var structMessage = NewMessageForField(field); 455 tokenizer.PushBack(firstToken); 456 Merge(structMessage, tokenizer); 457 field.Accessor.SetValue(message, structMessage); 458 return; 459 } 460 case JsonToken.TokenType.StartArray: 461 { 462 var field = fields[Value.ListValueFieldNumber]; 463 var list = NewMessageForField(field); 464 tokenizer.PushBack(firstToken); 465 Merge(list, tokenizer); 466 field.Accessor.SetValue(message, list); 467 return; 468 } 469 default: 470 throw new InvalidOperationException("Unexpected token type: " + firstToken.Type); 471 } 472 } 473 MergeStruct(IMessage message, JsonTokenizer tokenizer)474 private void MergeStruct(IMessage message, JsonTokenizer tokenizer) 475 { 476 var token = tokenizer.Next(); 477 if (token.Type != JsonToken.TokenType.StartObject) 478 { 479 throw new InvalidProtocolBufferException("Expected object value for Struct"); 480 } 481 tokenizer.PushBack(token); 482 483 var field = message.Descriptor.Fields[Struct.FieldsFieldNumber]; 484 MergeMapField(message, field, tokenizer); 485 } 486 MergeAny(IMessage message, JsonTokenizer tokenizer)487 private void MergeAny(IMessage message, JsonTokenizer tokenizer) 488 { 489 // Record the token stream until we see the @type property. At that point, we can take the value, consult 490 // the type registry for the relevant message, and replay the stream, omitting the @type property. 491 var tokens = new List<JsonToken>(); 492 493 var token = tokenizer.Next(); 494 if (token.Type != JsonToken.TokenType.StartObject) 495 { 496 throw new InvalidProtocolBufferException("Expected object value for Any"); 497 } 498 int typeUrlObjectDepth = tokenizer.ObjectDepth; 499 500 // The check for the property depth protects us from nested Any values which occur before the type URL 501 // for *this* Any. 502 while (token.Type != JsonToken.TokenType.Name || 503 token.StringValue != JsonFormatter.AnyTypeUrlField || 504 tokenizer.ObjectDepth != typeUrlObjectDepth) 505 { 506 tokens.Add(token); 507 token = tokenizer.Next(); 508 509 if (tokenizer.ObjectDepth < typeUrlObjectDepth) 510 { 511 throw new InvalidProtocolBufferException("Any message with no @type"); 512 } 513 } 514 515 // Don't add the @type property or its value to the recorded token list 516 token = tokenizer.Next(); 517 if (token.Type != JsonToken.TokenType.StringValue) 518 { 519 throw new InvalidProtocolBufferException("Expected string value for Any.@type"); 520 } 521 string typeUrl = token.StringValue; 522 string typeName = Any.GetTypeName(typeUrl); 523 524 MessageDescriptor descriptor = settings.TypeRegistry.Find(typeName); 525 if (descriptor == null) 526 { 527 throw new InvalidOperationException($"Type registry has no descriptor for type name '{typeName}'"); 528 } 529 530 // Now replay the token stream we've already read and anything that remains of the object, just parsing it 531 // as normal. Our original tokenizer should end up at the end of the object. 532 var replay = JsonTokenizer.FromReplayedTokens(tokens, tokenizer); 533 var body = descriptor.Parser.CreateTemplate(); 534 if (descriptor.IsWellKnownType) 535 { 536 MergeWellKnownTypeAnyBody(body, replay); 537 } 538 else 539 { 540 Merge(body, replay); 541 } 542 var data = body.ToByteString(); 543 544 // Now that we have the message data, we can pack it into an Any (the message received as a parameter). 545 message.Descriptor.Fields[Any.TypeUrlFieldNumber].Accessor.SetValue(message, typeUrl); 546 message.Descriptor.Fields[Any.ValueFieldNumber].Accessor.SetValue(message, data); 547 } 548 549 // Well-known types end up in a property called "value" in the JSON. As there's no longer a @type property 550 // in the given JSON token stream, we should *only* have tokens of start-object, name("value"), the value 551 // itself, and then end-object. MergeWellKnownTypeAnyBody(IMessage body, JsonTokenizer tokenizer)552 private void MergeWellKnownTypeAnyBody(IMessage body, JsonTokenizer tokenizer) 553 { 554 var token = tokenizer.Next(); // Definitely start-object; checked in previous method 555 token = tokenizer.Next(); 556 // TODO: What about an absent Int32Value, for example? 557 if (token.Type != JsonToken.TokenType.Name || token.StringValue != JsonFormatter.AnyWellKnownTypeValueField) 558 { 559 throw new InvalidProtocolBufferException($"Expected '{JsonFormatter.AnyWellKnownTypeValueField}' property for well-known type Any body"); 560 } 561 Merge(body, tokenizer); 562 token = tokenizer.Next(); 563 if (token.Type != JsonToken.TokenType.EndObject) 564 { 565 throw new InvalidProtocolBufferException($"Expected end-object token after @type/value for well-known type"); 566 } 567 } 568 569 #region Utility methods which don't depend on the state (or settings) of the parser. ParseMapKey(FieldDescriptor field, string keyText)570 private static object ParseMapKey(FieldDescriptor field, string keyText) 571 { 572 switch (field.FieldType) 573 { 574 case FieldType.Bool: 575 if (keyText == "true") 576 { 577 return true; 578 } 579 if (keyText == "false") 580 { 581 return false; 582 } 583 throw new InvalidProtocolBufferException("Invalid string for bool map key: " + keyText); 584 case FieldType.String: 585 return keyText; 586 case FieldType.Int32: 587 case FieldType.SInt32: 588 case FieldType.SFixed32: 589 return ParseNumericString(keyText, int.Parse); 590 case FieldType.UInt32: 591 case FieldType.Fixed32: 592 return ParseNumericString(keyText, uint.Parse); 593 case FieldType.Int64: 594 case FieldType.SInt64: 595 case FieldType.SFixed64: 596 return ParseNumericString(keyText, long.Parse); 597 case FieldType.UInt64: 598 case FieldType.Fixed64: 599 return ParseNumericString(keyText, ulong.Parse); 600 default: 601 throw new InvalidProtocolBufferException("Invalid field type for map: " + field.FieldType); 602 } 603 } 604 605 private static object ParseSingleNumberValue(FieldDescriptor field, JsonToken token) 606 { 607 double value = token.NumberValue; 608 checked 609 { 610 try 611 { 612 switch (field.FieldType) 613 { 614 case FieldType.Int32: 615 case FieldType.SInt32: 616 case FieldType.SFixed32: 617 CheckInteger(value); 618 return (int) value; 619 case FieldType.UInt32: 620 case FieldType.Fixed32: 621 CheckInteger(value); 622 return (uint) value; 623 case FieldType.Int64: 624 case FieldType.SInt64: 625 case FieldType.SFixed64: 626 CheckInteger(value); 627 return (long) value; 628 case FieldType.UInt64: 629 case FieldType.Fixed64: 630 CheckInteger(value); 631 return (ulong) value; 632 case FieldType.Double: 633 return value; 634 case FieldType.Float: 635 if (double.IsNaN(value)) 636 { 637 return float.NaN; 638 } 639 if (value > float.MaxValue || value < float.MinValue) 640 { 641 if (double.IsPositiveInfinity(value)) 642 { 643 return float.PositiveInfinity; 644 } 645 if (double.IsNegativeInfinity(value)) 646 { 647 return float.NegativeInfinity; 648 } 649 throw new InvalidProtocolBufferException($"Value out of range: {value}"); 650 } 651 return (float) value; 652 case FieldType.Enum: 653 CheckInteger(value); 654 // Just return it as an int, and let the CLR convert it. 655 // Note that we deliberately don't check that it's a known value. 656 return (int) value; 657 default: 658 throw new InvalidProtocolBufferException($"Unsupported conversion from JSON number for field type {field.FieldType}"); 659 } 660 } 661 catch (OverflowException) 662 { 663 throw new InvalidProtocolBufferException($"Value out of range: {value}"); 664 } 665 } 666 } 667 668 private static void CheckInteger(double value) 669 { 670 if (double.IsInfinity(value) || double.IsNaN(value)) 671 { 672 throw new InvalidProtocolBufferException($"Value not an integer: {value}"); 673 } 674 if (value != Math.Floor(value)) 675 { 676 throw new InvalidProtocolBufferException($"Value not an integer: {value}"); 677 } 678 } 679 680 private static object ParseSingleStringValue(FieldDescriptor field, string text) 681 { 682 switch (field.FieldType) 683 { 684 case FieldType.String: 685 return text; 686 case FieldType.Bytes: 687 try 688 { 689 return ByteString.FromBase64(text); 690 } 691 catch (FormatException e) 692 { 693 throw InvalidProtocolBufferException.InvalidBase64(e); 694 } 695 case FieldType.Int32: 696 case FieldType.SInt32: 697 case FieldType.SFixed32: 698 return ParseNumericString(text, int.Parse); 699 case FieldType.UInt32: 700 case FieldType.Fixed32: 701 return ParseNumericString(text, uint.Parse); 702 case FieldType.Int64: 703 case FieldType.SInt64: 704 case FieldType.SFixed64: 705 return ParseNumericString(text, long.Parse); 706 case FieldType.UInt64: 707 case FieldType.Fixed64: 708 return ParseNumericString(text, ulong.Parse); 709 case FieldType.Double: 710 double d = ParseNumericString(text, double.Parse); 711 ValidateInfinityAndNan(text, double.IsPositiveInfinity(d), double.IsNegativeInfinity(d), double.IsNaN(d)); 712 return d; 713 case FieldType.Float: 714 float f = ParseNumericString(text, float.Parse); 715 ValidateInfinityAndNan(text, float.IsPositiveInfinity(f), float.IsNegativeInfinity(f), float.IsNaN(f)); 716 return f; 717 case FieldType.Enum: 718 var enumValue = field.EnumType.FindValueByName(text); 719 if (enumValue == null) 720 { 721 throw new InvalidProtocolBufferException($"Invalid enum value: {text} for enum type: {field.EnumType.FullName}"); 722 } 723 // Just return it as an int, and let the CLR convert it. 724 return enumValue.Number; 725 default: 726 throw new InvalidProtocolBufferException($"Unsupported conversion from JSON string for field type {field.FieldType}"); 727 } 728 } 729 730 /// <summary> 731 /// Creates a new instance of the message type for the given field. 732 /// </summary> 733 private static IMessage NewMessageForField(FieldDescriptor field) 734 { 735 return field.MessageType.Parser.CreateTemplate(); 736 } 737 738 private static T ParseNumericString<T>(string text, Func<string, NumberStyles, IFormatProvider, T> parser) 739 { 740 // Can't prohibit this with NumberStyles. 741 if (text.StartsWith("+")) 742 { 743 throw new InvalidProtocolBufferException($"Invalid numeric value: {text}"); 744 } 745 if (text.StartsWith("0") && text.Length > 1) 746 { 747 if (text[1] >= '0' && text[1] <= '9') 748 { 749 throw new InvalidProtocolBufferException($"Invalid numeric value: {text}"); 750 } 751 } 752 else if (text.StartsWith("-0") && text.Length > 2) 753 { 754 if (text[2] >= '0' && text[2] <= '9') 755 { 756 throw new InvalidProtocolBufferException($"Invalid numeric value: {text}"); 757 } 758 } 759 try 760 { 761 return parser(text, NumberStyles.AllowLeadingSign | NumberStyles.AllowDecimalPoint | NumberStyles.AllowExponent, CultureInfo.InvariantCulture); 762 } 763 catch (FormatException) 764 { 765 throw new InvalidProtocolBufferException($"Invalid numeric value for type: {text}"); 766 } 767 catch (OverflowException) 768 { 769 throw new InvalidProtocolBufferException($"Value out of range: {text}"); 770 } 771 } 772 773 /// <summary> 774 /// Checks that any infinite/NaN values originated from the correct text. 775 /// This corrects the lenient whitespace handling of double.Parse/float.Parse, as well as the 776 /// way that Mono parses out-of-range values as infinity. 777 /// </summary> ValidateInfinityAndNan(string text, bool isPositiveInfinity, bool isNegativeInfinity, bool isNaN)778 private static void ValidateInfinityAndNan(string text, bool isPositiveInfinity, bool isNegativeInfinity, bool isNaN) 779 { 780 if ((isPositiveInfinity && text != "Infinity") || 781 (isNegativeInfinity && text != "-Infinity") || 782 (isNaN && text != "NaN")) 783 { 784 throw new InvalidProtocolBufferException($"Invalid numeric value: {text}"); 785 } 786 } 787 MergeTimestamp(IMessage message, JsonToken token)788 private static void MergeTimestamp(IMessage message, JsonToken token) 789 { 790 if (token.Type != JsonToken.TokenType.StringValue) 791 { 792 throw new InvalidProtocolBufferException("Expected string value for Timestamp"); 793 } 794 var match = TimestampRegex.Match(token.StringValue); 795 if (!match.Success) 796 { 797 throw new InvalidProtocolBufferException($"Invalid Timestamp value: {token.StringValue}"); 798 } 799 var dateTime = match.Groups["datetime"].Value; 800 var subseconds = match.Groups["subseconds"].Value; 801 var offset = match.Groups["offset"].Value; 802 803 try 804 { 805 DateTime parsed = DateTime.ParseExact( 806 dateTime, 807 "yyyy-MM-dd'T'HH:mm:ss", 808 CultureInfo.InvariantCulture, 809 DateTimeStyles.AssumeUniversal | DateTimeStyles.AdjustToUniversal); 810 // TODO: It would be nice not to have to create all these objects... easy to optimize later though. 811 Timestamp timestamp = Timestamp.FromDateTime(parsed); 812 int nanosToAdd = 0; 813 if (subseconds != "") 814 { 815 // This should always work, as we've got 1-9 digits. 816 int parsedFraction = int.Parse(subseconds.Substring(1), CultureInfo.InvariantCulture); 817 nanosToAdd = parsedFraction * SubsecondScalingFactors[subseconds.Length]; 818 } 819 int secondsToAdd = 0; 820 if (offset != "Z") 821 { 822 // This is the amount we need to *subtract* from the local time to get to UTC - hence - => +1 and vice versa. 823 int sign = offset[0] == '-' ? 1 : -1; 824 int hours = int.Parse(offset.Substring(1, 2), CultureInfo.InvariantCulture); 825 int minutes = int.Parse(offset.Substring(4, 2)); 826 int totalMinutes = hours * 60 + minutes; 827 if (totalMinutes > 18 * 60) 828 { 829 throw new InvalidProtocolBufferException("Invalid Timestamp value: " + token.StringValue); 830 } 831 if (totalMinutes == 0 && sign == 1) 832 { 833 // This is an offset of -00:00, which means "unknown local offset". It makes no sense for a timestamp. 834 throw new InvalidProtocolBufferException("Invalid Timestamp value: " + token.StringValue); 835 } 836 // We need to *subtract* the offset from local time to get UTC. 837 secondsToAdd = sign * totalMinutes * 60; 838 } 839 // Ensure we've got the right signs. Currently unnecessary, but easy to do. 840 if (secondsToAdd < 0 && nanosToAdd > 0) 841 { 842 secondsToAdd++; 843 nanosToAdd = nanosToAdd - Duration.NanosecondsPerSecond; 844 } 845 if (secondsToAdd != 0 || nanosToAdd != 0) 846 { 847 timestamp += new Duration { Nanos = nanosToAdd, Seconds = secondsToAdd }; 848 // The resulting timestamp after offset change would be out of our expected range. Currently the Timestamp message doesn't validate this 849 // anywhere, but we shouldn't parse it. 850 if (timestamp.Seconds < Timestamp.UnixSecondsAtBclMinValue || timestamp.Seconds > Timestamp.UnixSecondsAtBclMaxValue) 851 { 852 throw new InvalidProtocolBufferException("Invalid Timestamp value: " + token.StringValue); 853 } 854 } 855 message.Descriptor.Fields[Timestamp.SecondsFieldNumber].Accessor.SetValue(message, timestamp.Seconds); 856 message.Descriptor.Fields[Timestamp.NanosFieldNumber].Accessor.SetValue(message, timestamp.Nanos); 857 } 858 catch (FormatException) 859 { 860 throw new InvalidProtocolBufferException("Invalid Timestamp value: " + token.StringValue); 861 } 862 } 863 MergeDuration(IMessage message, JsonToken token)864 private static void MergeDuration(IMessage message, JsonToken token) 865 { 866 if (token.Type != JsonToken.TokenType.StringValue) 867 { 868 throw new InvalidProtocolBufferException("Expected string value for Duration"); 869 } 870 var match = DurationRegex.Match(token.StringValue); 871 if (!match.Success) 872 { 873 throw new InvalidProtocolBufferException("Invalid Duration value: " + token.StringValue); 874 } 875 var sign = match.Groups["sign"].Value; 876 var secondsText = match.Groups["int"].Value; 877 // Prohibit leading insignficant zeroes 878 if (secondsText[0] == '0' && secondsText.Length > 1) 879 { 880 throw new InvalidProtocolBufferException("Invalid Duration value: " + token.StringValue); 881 } 882 var subseconds = match.Groups["subseconds"].Value; 883 var multiplier = sign == "-" ? -1 : 1; 884 885 try 886 { 887 long seconds = long.Parse(secondsText, CultureInfo.InvariantCulture) * multiplier; 888 int nanos = 0; 889 if (subseconds != "") 890 { 891 // This should always work, as we've got 1-9 digits. 892 int parsedFraction = int.Parse(subseconds.Substring(1)); 893 nanos = parsedFraction * SubsecondScalingFactors[subseconds.Length] * multiplier; 894 } 895 if (!Duration.IsNormalized(seconds, nanos)) 896 { 897 throw new InvalidProtocolBufferException($"Invalid Duration value: {token.StringValue}"); 898 } 899 message.Descriptor.Fields[Duration.SecondsFieldNumber].Accessor.SetValue(message, seconds); 900 message.Descriptor.Fields[Duration.NanosFieldNumber].Accessor.SetValue(message, nanos); 901 } 902 catch (FormatException) 903 { 904 throw new InvalidProtocolBufferException($"Invalid Duration value: {token.StringValue}"); 905 } 906 } 907 MergeFieldMask(IMessage message, JsonToken token)908 private static void MergeFieldMask(IMessage message, JsonToken token) 909 { 910 if (token.Type != JsonToken.TokenType.StringValue) 911 { 912 throw new InvalidProtocolBufferException("Expected string value for FieldMask"); 913 } 914 // TODO: Do we *want* to remove empty entries? Probably okay to treat "" as "no paths", but "foo,,bar"? 915 string[] jsonPaths = token.StringValue.Split(FieldMaskPathSeparators, StringSplitOptions.RemoveEmptyEntries); 916 IList messagePaths = (IList) message.Descriptor.Fields[FieldMask.PathsFieldNumber].Accessor.GetValue(message); 917 foreach (var path in jsonPaths) 918 { 919 messagePaths.Add(ToSnakeCase(path)); 920 } 921 } 922 923 // Ported from src/google/protobuf/util/internal/utility.cc ToSnakeCase(string text)924 private static string ToSnakeCase(string text) 925 { 926 var builder = new StringBuilder(text.Length * 2); 927 // Note: this is probably unnecessary now, but currently retained to be as close as possible to the 928 // C++, whilst still throwing an exception on underscores. 929 bool wasNotUnderscore = false; // Initialize to false for case 1 (below) 930 bool wasNotCap = false; 931 932 for (int i = 0; i < text.Length; i++) 933 { 934 char c = text[i]; 935 if (c >= 'A' && c <= 'Z') // ascii_isupper 936 { 937 // Consider when the current character B is capitalized: 938 // 1) At beginning of input: "B..." => "b..." 939 // (e.g. "Biscuit" => "biscuit") 940 // 2) Following a lowercase: "...aB..." => "...a_b..." 941 // (e.g. "gBike" => "g_bike") 942 // 3) At the end of input: "...AB" => "...ab" 943 // (e.g. "GoogleLAB" => "google_lab") 944 // 4) Followed by a lowercase: "...ABc..." => "...a_bc..." 945 // (e.g. "GBike" => "g_bike") 946 if (wasNotUnderscore && // case 1 out 947 (wasNotCap || // case 2 in, case 3 out 948 (i + 1 < text.Length && // case 3 out 949 (text[i + 1] >= 'a' && text[i + 1] <= 'z')))) // ascii_islower(text[i + 1]) 950 { // case 4 in 951 // We add an underscore for case 2 and case 4. 952 builder.Append('_'); 953 } 954 // ascii_tolower, but we already know that c *is* an upper case ASCII character... 955 builder.Append((char) (c + 'a' - 'A')); 956 wasNotUnderscore = true; 957 wasNotCap = false; 958 } 959 else 960 { 961 builder.Append(c); 962 if (c == '_') 963 { 964 throw new InvalidProtocolBufferException($"Invalid field mask: {text}"); 965 } 966 wasNotUnderscore = true; 967 wasNotCap = true; 968 } 969 } 970 return builder.ToString(); 971 } 972 #endregion 973 974 /// <summary> 975 /// Settings controlling JSON parsing. 976 /// </summary> 977 public sealed class Settings 978 { 979 /// <summary> 980 /// Default settings, as used by <see cref="JsonParser.Default"/>. This has the same default 981 /// recursion limit as <see cref="CodedInputStream"/>, and an empty type registry. 982 /// </summary> 983 public static Settings Default { get; } 984 985 // Workaround for the Mono compiler complaining about XML comments not being on 986 // valid language elements. Settings()987 static Settings() 988 { 989 Default = new Settings(CodedInputStream.DefaultRecursionLimit); 990 } 991 992 /// <summary> 993 /// The maximum depth of messages to parse. Note that this limit only applies to parsing 994 /// messages, not collections - so a message within a collection within a message only counts as 995 /// depth 2, not 3. 996 /// </summary> 997 public int RecursionLimit { get; } 998 999 /// <summary> 1000 /// The type registry used to parse <see cref="Any"/> messages. 1001 /// </summary> 1002 public TypeRegistry TypeRegistry { get; } 1003 1004 /// <summary> 1005 /// Whether the parser should ignore unknown fields (<c>true</c>) or throw an exception when 1006 /// they are encountered (<c>false</c>). 1007 /// </summary> 1008 public bool IgnoreUnknownFields { get; } 1009 Settings(int recursionLimit, TypeRegistry typeRegistry, bool ignoreUnknownFields)1010 private Settings(int recursionLimit, TypeRegistry typeRegistry, bool ignoreUnknownFields) 1011 { 1012 RecursionLimit = recursionLimit; 1013 TypeRegistry = ProtoPreconditions.CheckNotNull(typeRegistry, nameof(typeRegistry)); 1014 IgnoreUnknownFields = ignoreUnknownFields; 1015 } 1016 1017 /// <summary> 1018 /// Creates a new <see cref="Settings"/> object with the specified recursion limit. 1019 /// </summary> 1020 /// <param name="recursionLimit">The maximum depth of messages to parse</param> Settings(int recursionLimit)1021 public Settings(int recursionLimit) : this(recursionLimit, TypeRegistry.Empty) 1022 { 1023 } 1024 1025 /// <summary> 1026 /// Creates a new <see cref="Settings"/> object with the specified recursion limit and type registry. 1027 /// </summary> 1028 /// <param name="recursionLimit">The maximum depth of messages to parse</param> 1029 /// <param name="typeRegistry">The type registry used to parse <see cref="Any"/> messages</param> Settings(int recursionLimit, TypeRegistry typeRegistry)1030 public Settings(int recursionLimit, TypeRegistry typeRegistry) : this(recursionLimit, typeRegistry, false) 1031 { 1032 } 1033 1034 /// <summary> 1035 /// Creates a new <see cref="Settings"/> object set to either ignore unknown fields, or throw an exception 1036 /// when unknown fields are encountered. 1037 /// </summary> 1038 /// <param name="ignoreUnknownFields"><c>true</c> if unknown fields should be ignored when parsing; <c>false</c> to throw an exception.</param> 1039 public Settings WithIgnoreUnknownFields(bool ignoreUnknownFields) => 1040 new Settings(RecursionLimit, TypeRegistry, ignoreUnknownFields); 1041 1042 /// <summary> 1043 /// Creates a new <see cref="Settings"/> object based on this one, but with the specified recursion limit. 1044 /// </summary> 1045 /// <param name="recursionLimit">The new recursion limit.</param> WithRecursionLimit(int recursionLimit)1046 public Settings WithRecursionLimit(int recursionLimit) => 1047 new Settings(recursionLimit, TypeRegistry, IgnoreUnknownFields); 1048 1049 /// <summary> 1050 /// Creates a new <see cref="Settings"/> object based on this one, but with the specified type registry. 1051 /// </summary> 1052 /// <param name="typeRegistry">The new type registry. Must not be null.</param> 1053 public Settings WithTypeRegistry(TypeRegistry typeRegistry) => 1054 new Settings( 1055 RecursionLimit, 1056 ProtoPreconditions.CheckNotNull(typeRegistry, nameof(typeRegistry)), 1057 IgnoreUnknownFields); 1058 } 1059 } 1060 } 1061