1 #region Copyright notice and license 2 // Protocol Buffers - Google's data interchange format 3 // Copyright 2015 Google Inc. All rights reserved. 4 // 5 // Use of this source code is governed by a BSD-style 6 // license that can be found in the LICENSE file or at 7 // https://developers.google.com/open-source/licenses/bsd 8 #endregion 9 10 using Google.Protobuf.Reflection; 11 using Google.Protobuf.WellKnownTypes; 12 using System; 13 using System.Collections; 14 using System.Collections.Generic; 15 using System.Globalization; 16 using System.IO; 17 using System.Linq; 18 using System.Text; 19 using System.Text.RegularExpressions; 20 21 namespace Google.Protobuf 22 { 23 /// <summary> 24 /// Reflection-based converter from JSON to messages. 25 /// </summary> 26 /// <remarks> 27 /// <para> 28 /// Instances of this class are thread-safe, with no mutable state. 29 /// </para> 30 /// <para> 31 /// This is a simple start to get JSON parsing working. As it's reflection-based, 32 /// it's not as quick as baking calls into generated messages - but is a simpler implementation. 33 /// (This code is generally not heavily optimized.) 34 /// </para> 35 /// </remarks> 36 public sealed class JsonParser 37 { 38 // Note: using 0-9 instead of \d to ensure no non-ASCII digits. 39 // This regex isn't a complete validator, but will remove *most* invalid input. We rely on parsing to do the rest. 40 private static readonly Regex TimestampRegex = new Regex(@"^(?<datetime>[0-9]{4}-[01][0-9]-[0-3][0-9]T[012][0-9]:[0-5][0-9]:[0-5][0-9])(?<subseconds>\.[0-9]{1,9})?(?<offset>(Z|[+-][0-1][0-9]:[0-5][0-9]))$", FrameworkPortability.CompiledRegexWhereAvailable); 41 private static readonly Regex DurationRegex = new Regex(@"^(?<sign>-)?(?<int>[0-9]{1,12})(?<subseconds>\.[0-9]{1,9})?s$", FrameworkPortability.CompiledRegexWhereAvailable); 42 private static readonly int[] SubsecondScalingFactors = { 0, 100000000, 100000000, 10000000, 1000000, 100000, 10000, 1000, 100, 10, 1 }; 43 private static readonly char[] FieldMaskPathSeparators = new[] { ',' }; 44 private static readonly EnumDescriptor NullValueDescriptor = StructReflection.Descriptor.EnumTypes.Single(ed => ed.ClrType == typeof(NullValue)); 45 46 private static readonly JsonParser defaultInstance = new JsonParser(Settings.Default); 47 48 // TODO: Consider introducing a class containing parse state of the parser, tokenizer and depth. That would simplify these handlers 49 // and the signatures of various methods. 50 private static readonly Dictionary<string, Action<JsonParser, IMessage, JsonTokenizer>> WellKnownTypeHandlers = new() 51 { 52 { Timestamp.Descriptor.FullName, (parser, message, tokenizer) => MergeTimestamp(message, tokenizer.Next()) }, 53 { Duration.Descriptor.FullName, (parser, message, tokenizer) => MergeDuration(message, tokenizer.Next()) }, 54 { Value.Descriptor.FullName, (parser, message, tokenizer) => parser.MergeStructValue(message, tokenizer) }, 55 { ListValue.Descriptor.FullName, (parser, message, tokenizer) => 56 parser.MergeRepeatedField(message, message.Descriptor.Fields[ListValue.ValuesFieldNumber], tokenizer) }, 57 { Struct.Descriptor.FullName, (parser, message, tokenizer) => parser.MergeStruct(message, tokenizer) }, 58 { Any.Descriptor.FullName, (parser, message, tokenizer) => parser.MergeAny(message, tokenizer) }, 59 { FieldMask.Descriptor.FullName, (parser, message, tokenizer) => MergeFieldMask(message, tokenizer.Next()) }, 60 { Int32Value.Descriptor.FullName, MergeWrapperField }, 61 { Int64Value.Descriptor.FullName, MergeWrapperField }, 62 { UInt32Value.Descriptor.FullName, MergeWrapperField }, 63 { UInt64Value.Descriptor.FullName, MergeWrapperField }, 64 { FloatValue.Descriptor.FullName, MergeWrapperField }, 65 { DoubleValue.Descriptor.FullName, MergeWrapperField }, 66 { BytesValue.Descriptor.FullName, MergeWrapperField }, 67 { StringValue.Descriptor.FullName, MergeWrapperField }, 68 { BoolValue.Descriptor.FullName, MergeWrapperField } 69 }; 70 71 // Convenience method to avoid having to repeat the same code multiple times in the above 72 // dictionary initialization. MergeWrapperField(JsonParser parser, IMessage message, JsonTokenizer tokenizer)73 private static void MergeWrapperField(JsonParser parser, IMessage message, JsonTokenizer tokenizer) 74 { 75 parser.MergeField(message, message.Descriptor.Fields[WrappersReflection.WrapperValueFieldNumber], tokenizer); 76 } 77 78 /// <summary> 79 /// Returns a formatter using the default settings. 80 /// </summary> 81 public static JsonParser Default { get { return defaultInstance; } } 82 83 private readonly Settings settings; 84 85 /// <summary> 86 /// Creates a new formatted with the given settings. 87 /// </summary> 88 /// <param name="settings">The settings.</param> JsonParser(Settings settings)89 public JsonParser(Settings settings) 90 { 91 this.settings = ProtoPreconditions.CheckNotNull(settings, nameof(settings)); 92 } 93 94 /// <summary> 95 /// Parses <paramref name="json"/> and merges the information into the given message. 96 /// </summary> 97 /// <param name="message">The message to merge the JSON information into.</param> 98 /// <param name="json">The JSON to parse.</param> Merge(IMessage message, string json)99 internal void Merge(IMessage message, string json) 100 { 101 Merge(message, new StringReader(json)); 102 } 103 104 /// <summary> 105 /// Parses JSON read from <paramref name="jsonReader"/> and merges the information into the given message. 106 /// </summary> 107 /// <param name="message">The message to merge the JSON information into.</param> 108 /// <param name="jsonReader">Reader providing the JSON to parse.</param> Merge(IMessage message, TextReader jsonReader)109 internal void Merge(IMessage message, TextReader jsonReader) 110 { 111 var tokenizer = JsonTokenizer.FromTextReader(jsonReader); 112 Merge(message, tokenizer); 113 var lastToken = tokenizer.Next(); 114 if (lastToken != JsonToken.EndDocument) 115 { 116 throw new InvalidProtocolBufferException("Expected end of JSON after object"); 117 } 118 } 119 120 /// <summary> 121 /// Merges the given message using data from the given tokenizer. In most cases, the next 122 /// token should be a "start object" token, but wrapper types and nullity can invalidate 123 /// that assumption. This is implemented as an LL(1) recursive descent parser over the stream 124 /// of tokens provided by the tokenizer. This token stream is assumed to be valid JSON, with the 125 /// tokenizer performing that validation - but not every token stream is valid "protobuf JSON". 126 /// </summary> Merge(IMessage message, JsonTokenizer tokenizer)127 private void Merge(IMessage message, JsonTokenizer tokenizer) 128 { 129 if (tokenizer.ObjectDepth > settings.RecursionLimit) 130 { 131 throw InvalidProtocolBufferException.JsonRecursionLimitExceeded(); 132 } 133 if (message.Descriptor.IsWellKnownType) 134 { 135 if (WellKnownTypeHandlers.TryGetValue(message.Descriptor.FullName, out Action<JsonParser, IMessage, JsonTokenizer> handler)) 136 { 137 handler(this, message, tokenizer); 138 return; 139 } 140 // Well-known types with no special handling continue in the normal way. 141 } 142 var token = tokenizer.Next(); 143 if (token.Type != JsonToken.TokenType.StartObject) 144 { 145 throw new InvalidProtocolBufferException("Expected an object"); 146 } 147 var descriptor = message.Descriptor; 148 var jsonFieldMap = descriptor.Fields.ByJsonName(); 149 // All the oneof fields we've already accounted for - we can only see each of them once. 150 // The set is created lazily to avoid the overhead of creating a set for every message 151 // we parsed, when oneofs are relatively rare. 152 HashSet<OneofDescriptor> seenOneofs = null; 153 while (true) 154 { 155 token = tokenizer.Next(); 156 if (token.Type == JsonToken.TokenType.EndObject) 157 { 158 return; 159 } 160 if (token.Type != JsonToken.TokenType.Name) 161 { 162 throw new InvalidOperationException("Unexpected token type " + token.Type); 163 } 164 string name = token.StringValue; 165 if (jsonFieldMap.TryGetValue(name, out FieldDescriptor field)) 166 { 167 if (field.ContainingOneof != null) 168 { 169 if (seenOneofs == null) 170 { 171 seenOneofs = new HashSet<OneofDescriptor>(); 172 } 173 if (!seenOneofs.Add(field.ContainingOneof)) 174 { 175 throw new InvalidProtocolBufferException($"Multiple values specified for oneof {field.ContainingOneof.Name}"); 176 } 177 } 178 MergeField(message, field, tokenizer); 179 } 180 else 181 { 182 if (settings.IgnoreUnknownFields) 183 { 184 tokenizer.SkipValue(); 185 } 186 else 187 { 188 throw new InvalidProtocolBufferException("Unknown field: " + name); 189 } 190 } 191 } 192 } 193 MergeField(IMessage message, FieldDescriptor field, JsonTokenizer tokenizer)194 private void MergeField(IMessage message, FieldDescriptor field, JsonTokenizer tokenizer) 195 { 196 var token = tokenizer.Next(); 197 if (token.Type == JsonToken.TokenType.Null) 198 { 199 // Clear the field if we see a null token, unless it's for a singular field of type 200 // google.protobuf.Value or google.protobuf.NullValue. 201 // Note: different from Java API, which just ignores it. 202 // TODO: Bring it more in line? Discuss... 203 if (field.IsMap || field.IsRepeated || 204 !(IsGoogleProtobufValueField(field) || IsGoogleProtobufNullValueField(field))) 205 { 206 field.Accessor.Clear(message); 207 return; 208 } 209 } 210 tokenizer.PushBack(token); 211 212 if (field.IsMap) 213 { 214 MergeMapField(message, field, tokenizer); 215 } 216 else if (field.IsRepeated) 217 { 218 MergeRepeatedField(message, field, tokenizer); 219 } 220 else 221 { 222 if (TryParseSingleValue(field, tokenizer, out var value)) 223 { 224 field.Accessor.SetValue(message, value); 225 } 226 } 227 } 228 MergeRepeatedField(IMessage message, FieldDescriptor field, JsonTokenizer tokenizer)229 private void MergeRepeatedField(IMessage message, FieldDescriptor field, JsonTokenizer tokenizer) 230 { 231 var token = tokenizer.Next(); 232 if (token.Type != JsonToken.TokenType.StartArray) 233 { 234 throw new InvalidProtocolBufferException("Repeated field value was not an array. Token type: " + token.Type); 235 } 236 237 IList list = (IList) field.Accessor.GetValue(message); 238 while (true) 239 { 240 token = tokenizer.Next(); 241 if (token.Type == JsonToken.TokenType.EndArray) 242 { 243 return; 244 } 245 tokenizer.PushBack(token); 246 if (TryParseSingleValue(field, tokenizer, out object value)) 247 { 248 if (value == null) 249 { 250 throw new InvalidProtocolBufferException("Repeated field elements cannot be null"); 251 } 252 list.Add(value); 253 } 254 } 255 } 256 MergeMapField(IMessage message, FieldDescriptor field, JsonTokenizer tokenizer)257 private void MergeMapField(IMessage message, FieldDescriptor field, JsonTokenizer tokenizer) 258 { 259 // Map fields are always objects, even if the values are well-known types: ParseSingleValue handles those. 260 var token = tokenizer.Next(); 261 if (token.Type != JsonToken.TokenType.StartObject) 262 { 263 throw new InvalidProtocolBufferException("Expected an object to populate a map"); 264 } 265 266 var type = field.MessageType; 267 var keyField = type.FindFieldByNumber(1); 268 var valueField = type.FindFieldByNumber(2); 269 if (keyField == null || valueField == null) 270 { 271 throw new InvalidProtocolBufferException("Invalid map field: " + field.FullName); 272 } 273 IDictionary dictionary = (IDictionary) field.Accessor.GetValue(message); 274 275 while (true) 276 { 277 token = tokenizer.Next(); 278 if (token.Type == JsonToken.TokenType.EndObject) 279 { 280 return; 281 } 282 object key = ParseMapKey(keyField, token.StringValue); 283 if (TryParseSingleValue(valueField, tokenizer, out object value)) 284 { 285 dictionary[key] = value ?? throw new InvalidProtocolBufferException("Map values must not be null"); 286 } 287 } 288 } 289 IsGoogleProtobufValueField(FieldDescriptor field)290 private static bool IsGoogleProtobufValueField(FieldDescriptor field) 291 { 292 return field.FieldType == FieldType.Message && 293 field.MessageType.FullName == Value.Descriptor.FullName; 294 } 295 IsGoogleProtobufNullValueField(FieldDescriptor field)296 private static bool IsGoogleProtobufNullValueField(FieldDescriptor field) 297 { 298 return field.FieldType == FieldType.Enum && 299 field.EnumType.FullName == NullValueDescriptor.FullName; 300 } 301 302 /// <summary> 303 /// Attempts to parse a single value from the JSON. When the value is completely invalid, 304 /// this will still throw an exception; when it's "conditionally invalid" (currently meaning 305 /// "when there's an unknown enum string value") the method returns false instead. 306 /// </summary> 307 /// <returns> 308 /// true if the value was parsed successfully; false for an ignorable parse failure. 309 /// </returns> TryParseSingleValue(FieldDescriptor field, JsonTokenizer tokenizer, out object value)310 private bool TryParseSingleValue(FieldDescriptor field, JsonTokenizer tokenizer, out object value) 311 { 312 var token = tokenizer.Next(); 313 if (token.Type == JsonToken.TokenType.Null) 314 { 315 // TODO: In order to support dynamic messages, we should really build this up 316 // dynamically. 317 if (IsGoogleProtobufValueField(field)) 318 { 319 value = Value.ForNull(); 320 } 321 else if (IsGoogleProtobufNullValueField(field)) 322 { 323 value = NullValue.NullValue; 324 } 325 else 326 { 327 value = null; 328 } 329 return true; 330 } 331 332 var fieldType = field.FieldType; 333 if (fieldType == FieldType.Message || fieldType == FieldType.Group) 334 { 335 // Parse wrapper types as their constituent types. 336 // TODO: What does this mean for null? 337 if (field.MessageType.IsWrapperType) 338 { 339 field = field.MessageType.Fields[WrappersReflection.WrapperValueFieldNumber]; 340 fieldType = field.FieldType; 341 } 342 else 343 { 344 // TODO: Merge the current value in message? (Public API currently doesn't make this relevant as we don't expose merging.) 345 tokenizer.PushBack(token); 346 IMessage subMessage = NewMessageForField(field); 347 Merge(subMessage, tokenizer); 348 value = subMessage; 349 return true; 350 } 351 } 352 353 switch (token.Type) 354 { 355 case JsonToken.TokenType.True: 356 case JsonToken.TokenType.False: 357 if (fieldType == FieldType.Bool) 358 { 359 value = token.Type == JsonToken.TokenType.True; 360 return true; 361 } 362 // Fall through to "we don't support this type for this case"; could duplicate the behaviour of the default 363 // case instead, but this way we'd only need to change one place. 364 goto default; 365 case JsonToken.TokenType.StringValue: 366 if (field.FieldType != FieldType.Enum) 367 { 368 value = ParseSingleStringValue(field, token.StringValue); 369 return true; 370 } 371 else 372 { 373 return TryParseEnumStringValue(field, token.StringValue, out value); 374 } 375 // Note: not passing the number value itself here, as we may end up storing the string value in the token too. 376 case JsonToken.TokenType.Number: 377 value = ParseSingleNumberValue(field, token); 378 return true; 379 default: 380 throw new InvalidProtocolBufferException("Unsupported JSON token type " + token.Type + " for field type " + fieldType); 381 } 382 } 383 384 /// <summary> 385 /// Parses <paramref name="json"/> into a new message. 386 /// </summary> 387 /// <typeparam name="T">The type of message to create.</typeparam> 388 /// <param name="json">The JSON to parse.</param> 389 /// <exception cref="InvalidJsonException">The JSON does not comply with RFC 7159</exception> 390 /// <exception cref="InvalidProtocolBufferException">The JSON does not represent a Protocol Buffers message correctly</exception> 391 public T Parse<T>(string json) where T : IMessage, new() 392 { 393 ProtoPreconditions.CheckNotNull(json, nameof(json)); 394 return Parse<T>(new StringReader(json)); 395 } 396 397 /// <summary> 398 /// Parses JSON read from <paramref name="jsonReader"/> into a new message. 399 /// </summary> 400 /// <typeparam name="T">The type of message to create.</typeparam> 401 /// <param name="jsonReader">Reader providing the JSON to parse.</param> 402 /// <exception cref="InvalidJsonException">The JSON does not comply with RFC 7159</exception> 403 /// <exception cref="InvalidProtocolBufferException">The JSON does not represent a Protocol Buffers message correctly</exception> 404 public T Parse<T>(TextReader jsonReader) where T : IMessage, new() 405 { 406 ProtoPreconditions.CheckNotNull(jsonReader, nameof(jsonReader)); 407 T message = new T(); 408 Merge(message, jsonReader); 409 return message; 410 } 411 412 /// <summary> 413 /// Parses <paramref name="json"/> into a new message. 414 /// </summary> 415 /// <param name="json">The JSON to parse.</param> 416 /// <param name="descriptor">Descriptor of message type to parse.</param> 417 /// <exception cref="InvalidJsonException">The JSON does not comply with RFC 7159</exception> 418 /// <exception cref="InvalidProtocolBufferException">The JSON does not represent a Protocol Buffers message correctly</exception> Parse(string json, MessageDescriptor descriptor)419 public IMessage Parse(string json, MessageDescriptor descriptor) 420 { 421 ProtoPreconditions.CheckNotNull(json, nameof(json)); 422 ProtoPreconditions.CheckNotNull(descriptor, nameof(descriptor)); 423 return Parse(new StringReader(json), descriptor); 424 } 425 426 /// <summary> 427 /// Parses JSON read from <paramref name="jsonReader"/> into a new message. 428 /// </summary> 429 /// <param name="jsonReader">Reader providing the JSON to parse.</param> 430 /// <param name="descriptor">Descriptor of message type to parse.</param> 431 /// <exception cref="InvalidJsonException">The JSON does not comply with RFC 7159</exception> 432 /// <exception cref="InvalidProtocolBufferException">The JSON does not represent a Protocol Buffers message correctly</exception> Parse(TextReader jsonReader, MessageDescriptor descriptor)433 public IMessage Parse(TextReader jsonReader, MessageDescriptor descriptor) 434 { 435 ProtoPreconditions.CheckNotNull(jsonReader, nameof(jsonReader)); 436 ProtoPreconditions.CheckNotNull(descriptor, nameof(descriptor)); 437 IMessage message = descriptor.Parser.CreateTemplate(); 438 Merge(message, jsonReader); 439 return message; 440 } 441 MergeStructValue(IMessage message, JsonTokenizer tokenizer)442 private void MergeStructValue(IMessage message, JsonTokenizer tokenizer) 443 { 444 var firstToken = tokenizer.Next(); 445 var fields = message.Descriptor.Fields; 446 switch (firstToken.Type) 447 { 448 case JsonToken.TokenType.Null: 449 fields[Value.NullValueFieldNumber].Accessor.SetValue(message, 0); 450 return; 451 case JsonToken.TokenType.StringValue: 452 fields[Value.StringValueFieldNumber].Accessor.SetValue(message, firstToken.StringValue); 453 return; 454 case JsonToken.TokenType.Number: 455 fields[Value.NumberValueFieldNumber].Accessor.SetValue(message, firstToken.NumberValue); 456 return; 457 case JsonToken.TokenType.False: 458 case JsonToken.TokenType.True: 459 fields[Value.BoolValueFieldNumber].Accessor.SetValue(message, firstToken.Type == JsonToken.TokenType.True); 460 return; 461 case JsonToken.TokenType.StartObject: 462 { 463 var field = fields[Value.StructValueFieldNumber]; 464 var structMessage = NewMessageForField(field); 465 tokenizer.PushBack(firstToken); 466 Merge(structMessage, tokenizer); 467 field.Accessor.SetValue(message, structMessage); 468 return; 469 } 470 case JsonToken.TokenType.StartArray: 471 { 472 var field = fields[Value.ListValueFieldNumber]; 473 var list = NewMessageForField(field); 474 tokenizer.PushBack(firstToken); 475 Merge(list, tokenizer); 476 field.Accessor.SetValue(message, list); 477 return; 478 } 479 default: 480 throw new InvalidOperationException("Unexpected token type: " + firstToken.Type); 481 } 482 } 483 MergeStruct(IMessage message, JsonTokenizer tokenizer)484 private void MergeStruct(IMessage message, JsonTokenizer tokenizer) 485 { 486 var token = tokenizer.Next(); 487 if (token.Type != JsonToken.TokenType.StartObject) 488 { 489 throw new InvalidProtocolBufferException("Expected object value for Struct"); 490 } 491 tokenizer.PushBack(token); 492 493 var field = message.Descriptor.Fields[Struct.FieldsFieldNumber]; 494 MergeMapField(message, field, tokenizer); 495 } 496 MergeAny(IMessage message, JsonTokenizer tokenizer)497 private void MergeAny(IMessage message, JsonTokenizer tokenizer) 498 { 499 // Record the token stream until we see the @type property. At that point, we can take the value, consult 500 // the type registry for the relevant message, and replay the stream, omitting the @type property. 501 var tokens = new List<JsonToken>(); 502 503 var token = tokenizer.Next(); 504 if (token.Type != JsonToken.TokenType.StartObject) 505 { 506 throw new InvalidProtocolBufferException("Expected object value for Any"); 507 } 508 int typeUrlObjectDepth = tokenizer.ObjectDepth; 509 510 // The check for the property depth protects us from nested Any values which occur before the type URL 511 // for *this* Any. 512 while (token.Type != JsonToken.TokenType.Name || 513 token.StringValue != JsonFormatter.AnyTypeUrlField || 514 tokenizer.ObjectDepth != typeUrlObjectDepth) 515 { 516 tokens.Add(token); 517 token = tokenizer.Next(); 518 519 if (tokenizer.ObjectDepth < typeUrlObjectDepth) 520 { 521 throw new InvalidProtocolBufferException("Any message with no @type"); 522 } 523 } 524 525 // Don't add the @type property or its value to the recorded token list 526 token = tokenizer.Next(); 527 if (token.Type != JsonToken.TokenType.StringValue) 528 { 529 throw new InvalidProtocolBufferException("Expected string value for Any.@type"); 530 } 531 string typeUrl = token.StringValue; 532 string typeName = Any.GetTypeName(typeUrl); 533 534 MessageDescriptor descriptor = settings.TypeRegistry.Find(typeName); 535 if (descriptor == null) 536 { 537 throw new InvalidOperationException($"Type registry has no descriptor for type name '{typeName}'"); 538 } 539 540 // Now replay the token stream we've already read and anything that remains of the object, just parsing it 541 // as normal. Our original tokenizer should end up at the end of the object. 542 var replay = JsonTokenizer.FromReplayedTokens(tokens, tokenizer); 543 var body = descriptor.Parser.CreateTemplate(); 544 if (descriptor.IsWellKnownType) 545 { 546 MergeWellKnownTypeAnyBody(body, replay); 547 } 548 else 549 { 550 Merge(body, replay); 551 } 552 var data = body.ToByteString(); 553 554 // Now that we have the message data, we can pack it into an Any (the message received as a parameter). 555 message.Descriptor.Fields[Any.TypeUrlFieldNumber].Accessor.SetValue(message, typeUrl); 556 message.Descriptor.Fields[Any.ValueFieldNumber].Accessor.SetValue(message, data); 557 } 558 559 // Well-known types end up in a property called "value" in the JSON. As there's no longer a @type property 560 // in the given JSON token stream, we should *only* have tokens of start-object, name("value"), the value 561 // itself, and then end-object. MergeWellKnownTypeAnyBody(IMessage body, JsonTokenizer tokenizer)562 private void MergeWellKnownTypeAnyBody(IMessage body, JsonTokenizer tokenizer) 563 { 564 var token = tokenizer.Next(); // Definitely start-object; checked in previous method 565 token = tokenizer.Next(); 566 // TODO: What about an absent Int32Value, for example? 567 if (token.Type != JsonToken.TokenType.Name || token.StringValue != JsonFormatter.AnyWellKnownTypeValueField) 568 { 569 throw new InvalidProtocolBufferException($"Expected '{JsonFormatter.AnyWellKnownTypeValueField}' property for well-known type Any body"); 570 } 571 Merge(body, tokenizer); 572 token = tokenizer.Next(); 573 if (token.Type != JsonToken.TokenType.EndObject) 574 { 575 throw new InvalidProtocolBufferException($"Expected end-object token after @type/value for well-known type"); 576 } 577 } 578 579 #region Utility methods which don't depend on the state (or settings) of the parser. ParseMapKey(FieldDescriptor field, string keyText)580 private static object ParseMapKey(FieldDescriptor field, string keyText) 581 { 582 switch (field.FieldType) 583 { 584 case FieldType.Bool: 585 if (keyText == "true") 586 { 587 return true; 588 } 589 if (keyText == "false") 590 { 591 return false; 592 } 593 throw new InvalidProtocolBufferException("Invalid string for bool map key: " + keyText); 594 case FieldType.String: 595 return keyText; 596 case FieldType.Int32: 597 case FieldType.SInt32: 598 case FieldType.SFixed32: 599 return ParseNumericString(keyText, int.Parse); 600 case FieldType.UInt32: 601 case FieldType.Fixed32: 602 return ParseNumericString(keyText, uint.Parse); 603 case FieldType.Int64: 604 case FieldType.SInt64: 605 case FieldType.SFixed64: 606 return ParseNumericString(keyText, long.Parse); 607 case FieldType.UInt64: 608 case FieldType.Fixed64: 609 return ParseNumericString(keyText, ulong.Parse); 610 default: 611 throw new InvalidProtocolBufferException("Invalid field type for map: " + field.FieldType); 612 } 613 } 614 615 private static object ParseSingleNumberValue(FieldDescriptor field, JsonToken token) 616 { 617 double value = token.NumberValue; 618 checked 619 { 620 try 621 { 622 switch (field.FieldType) 623 { 624 case FieldType.Int32: 625 case FieldType.SInt32: 626 case FieldType.SFixed32: 627 CheckInteger(value); 628 return (int) value; 629 case FieldType.UInt32: 630 case FieldType.Fixed32: 631 CheckInteger(value); 632 return (uint) value; 633 case FieldType.Int64: 634 case FieldType.SInt64: 635 case FieldType.SFixed64: 636 CheckInteger(value); 637 return (long) value; 638 case FieldType.UInt64: 639 case FieldType.Fixed64: 640 CheckInteger(value); 641 return (ulong) value; 642 case FieldType.Double: 643 return value; 644 case FieldType.Float: 645 if (double.IsNaN(value)) 646 { 647 return float.NaN; 648 } 649 float converted = (float) value; 650 // If the value is out of range of float, the cast representation will be infinite. 651 // If the original value was infinite as well, that's fine - we'll return the 32-bit 652 // version (with the correct sign). 653 if (float.IsInfinity(converted) && !double.IsInfinity(value)) 654 { 655 throw new InvalidProtocolBufferException($"Value out of range: {value}"); 656 } 657 return converted; 658 case FieldType.Enum: 659 CheckInteger(value); 660 // Just return it as an int, and let the CLR convert it. 661 // Note that we deliberately don't check that it's a known value. 662 return (int) value; 663 default: 664 throw new InvalidProtocolBufferException($"Unsupported conversion from JSON number for field type {field.FieldType}"); 665 } 666 } 667 catch (OverflowException) 668 { 669 throw new InvalidProtocolBufferException($"Value out of range: {value}"); 670 } 671 } 672 } 673 674 private static void CheckInteger(double value) 675 { 676 if (double.IsInfinity(value) || double.IsNaN(value)) 677 { 678 throw new InvalidProtocolBufferException($"Value not an integer: {value}"); 679 } 680 if (value != Math.Floor(value)) 681 { 682 throw new InvalidProtocolBufferException($"Value not an integer: {value}"); 683 } 684 } 685 686 private static object ParseSingleStringValue(FieldDescriptor field, string text) 687 { 688 switch (field.FieldType) 689 { 690 case FieldType.String: 691 return text; 692 case FieldType.Bytes: 693 try 694 { 695 return ByteString.FromBase64(text); 696 } 697 catch (FormatException e) 698 { 699 throw InvalidProtocolBufferException.InvalidBase64(e); 700 } 701 case FieldType.Int32: 702 case FieldType.SInt32: 703 case FieldType.SFixed32: 704 return ParseNumericString(text, int.Parse); 705 case FieldType.UInt32: 706 case FieldType.Fixed32: 707 return ParseNumericString(text, uint.Parse); 708 case FieldType.Int64: 709 case FieldType.SInt64: 710 case FieldType.SFixed64: 711 return ParseNumericString(text, long.Parse); 712 case FieldType.UInt64: 713 case FieldType.Fixed64: 714 return ParseNumericString(text, ulong.Parse); 715 case FieldType.Double: 716 double d = ParseNumericString(text, double.Parse); 717 ValidateInfinityAndNan(text, double.IsPositiveInfinity(d), double.IsNegativeInfinity(d), double.IsNaN(d)); 718 return d; 719 case FieldType.Float: 720 float f = ParseNumericString(text, float.Parse); 721 ValidateInfinityAndNan(text, float.IsPositiveInfinity(f), float.IsNegativeInfinity(f), float.IsNaN(f)); 722 return f; 723 case FieldType.Enum: 724 throw new InvalidOperationException($"Use TryParseEnumStringValue for enums"); 725 default: 726 throw new InvalidProtocolBufferException($"Unsupported conversion from JSON string for field type {field.FieldType}"); 727 } 728 } 729 730 private bool TryParseEnumStringValue(FieldDescriptor field, string text, out object value) 731 { 732 var enumValue = field.EnumType.FindValueByName(text); 733 if (enumValue == null) 734 { 735 if (settings.IgnoreUnknownFields) 736 { 737 value = null; 738 return false; 739 } 740 else 741 { 742 throw new InvalidProtocolBufferException($"Invalid enum value: {text} for enum type: {field.EnumType.FullName}"); 743 } 744 } 745 // Just return it as an int, and let the CLR convert it. 746 value = enumValue.Number; 747 return true; 748 } 749 750 /// <summary> 751 /// Creates a new instance of the message type for the given field. 752 /// </summary> 753 private static IMessage NewMessageForField(FieldDescriptor field) 754 { 755 return field.MessageType.Parser.CreateTemplate(); 756 } 757 758 private static T ParseNumericString<T>(string text, Func<string, NumberStyles, IFormatProvider, T> parser) 759 { 760 // Can't prohibit this with NumberStyles. 761 if (text.StartsWith("+")) 762 { 763 throw new InvalidProtocolBufferException($"Invalid numeric value: {text}"); 764 } 765 if (text.StartsWith("0") && text.Length > 1) 766 { 767 if (text[1] >= '0' && text[1] <= '9') 768 { 769 throw new InvalidProtocolBufferException($"Invalid numeric value: {text}"); 770 } 771 } 772 else if (text.StartsWith("-0") && text.Length > 2) 773 { 774 if (text[2] >= '0' && text[2] <= '9') 775 { 776 throw new InvalidProtocolBufferException($"Invalid numeric value: {text}"); 777 } 778 } 779 try 780 { 781 return parser(text, NumberStyles.AllowLeadingSign | NumberStyles.AllowDecimalPoint | NumberStyles.AllowExponent, CultureInfo.InvariantCulture); 782 } 783 catch (FormatException) 784 { 785 throw new InvalidProtocolBufferException($"Invalid numeric value for type: {text}"); 786 } 787 catch (OverflowException) 788 { 789 throw new InvalidProtocolBufferException($"Value out of range: {text}"); 790 } 791 } 792 793 /// <summary> 794 /// Checks that any infinite/NaN values originated from the correct text. 795 /// This corrects the lenient whitespace handling of double.Parse/float.Parse, as well as the 796 /// way that Mono parses out-of-range values as infinity. 797 /// </summary> ValidateInfinityAndNan(string text, bool isPositiveInfinity, bool isNegativeInfinity, bool isNaN)798 private static void ValidateInfinityAndNan(string text, bool isPositiveInfinity, bool isNegativeInfinity, bool isNaN) 799 { 800 if ((isPositiveInfinity && text != "Infinity") || 801 (isNegativeInfinity && text != "-Infinity") || 802 (isNaN && text != "NaN")) 803 { 804 throw new InvalidProtocolBufferException($"Invalid numeric value: {text}"); 805 } 806 } 807 MergeTimestamp(IMessage message, JsonToken token)808 private static void MergeTimestamp(IMessage message, JsonToken token) 809 { 810 if (token.Type != JsonToken.TokenType.StringValue) 811 { 812 throw new InvalidProtocolBufferException("Expected string value for Timestamp"); 813 } 814 var match = TimestampRegex.Match(token.StringValue); 815 if (!match.Success) 816 { 817 throw new InvalidProtocolBufferException($"Invalid Timestamp value: {token.StringValue}"); 818 } 819 var dateTime = match.Groups["datetime"].Value; 820 var subseconds = match.Groups["subseconds"].Value; 821 var offset = match.Groups["offset"].Value; 822 823 try 824 { 825 DateTime parsed = DateTime.ParseExact( 826 dateTime, 827 "yyyy-MM-dd'T'HH:mm:ss", 828 CultureInfo.InvariantCulture, 829 DateTimeStyles.AssumeUniversal | DateTimeStyles.AdjustToUniversal); 830 // TODO: It would be nice not to have to create all these objects... easy to optimize later though. 831 Timestamp timestamp = Timestamp.FromDateTime(parsed); 832 int nanosToAdd = 0; 833 if (subseconds != "") 834 { 835 // This should always work, as we've got 1-9 digits. 836 int parsedFraction = int.Parse(subseconds.Substring(1), CultureInfo.InvariantCulture); 837 nanosToAdd = parsedFraction * SubsecondScalingFactors[subseconds.Length]; 838 } 839 int secondsToAdd = 0; 840 if (offset != "Z") 841 { 842 // This is the amount we need to *subtract* from the local time to get to UTC - hence - => +1 and vice versa. 843 int sign = offset[0] == '-' ? 1 : -1; 844 int hours = int.Parse(offset.Substring(1, 2), CultureInfo.InvariantCulture); 845 int minutes = int.Parse(offset.Substring(4, 2)); 846 int totalMinutes = hours * 60 + minutes; 847 if (totalMinutes > 18 * 60) 848 { 849 throw new InvalidProtocolBufferException("Invalid Timestamp value: " + token.StringValue); 850 } 851 if (totalMinutes == 0 && sign == 1) 852 { 853 // This is an offset of -00:00, which means "unknown local offset". It makes no sense for a timestamp. 854 throw new InvalidProtocolBufferException("Invalid Timestamp value: " + token.StringValue); 855 } 856 // We need to *subtract* the offset from local time to get UTC. 857 secondsToAdd = sign * totalMinutes * 60; 858 } 859 // Ensure we've got the right signs. Currently unnecessary, but easy to do. 860 if (secondsToAdd < 0 && nanosToAdd > 0) 861 { 862 secondsToAdd++; 863 nanosToAdd -= Duration.NanosecondsPerSecond; 864 } 865 if (secondsToAdd != 0 || nanosToAdd != 0) 866 { 867 timestamp += new Duration { Nanos = nanosToAdd, Seconds = secondsToAdd }; 868 // The resulting timestamp after offset change would be out of our expected range. Currently the Timestamp message doesn't validate this 869 // anywhere, but we shouldn't parse it. 870 if (timestamp.Seconds < Timestamp.UnixSecondsAtBclMinValue || timestamp.Seconds > Timestamp.UnixSecondsAtBclMaxValue) 871 { 872 throw new InvalidProtocolBufferException("Invalid Timestamp value: " + token.StringValue); 873 } 874 } 875 message.Descriptor.Fields[Timestamp.SecondsFieldNumber].Accessor.SetValue(message, timestamp.Seconds); 876 message.Descriptor.Fields[Timestamp.NanosFieldNumber].Accessor.SetValue(message, timestamp.Nanos); 877 } 878 catch (FormatException) 879 { 880 throw new InvalidProtocolBufferException("Invalid Timestamp value: " + token.StringValue); 881 } 882 } 883 MergeDuration(IMessage message, JsonToken token)884 private static void MergeDuration(IMessage message, JsonToken token) 885 { 886 if (token.Type != JsonToken.TokenType.StringValue) 887 { 888 throw new InvalidProtocolBufferException("Expected string value for Duration"); 889 } 890 var match = DurationRegex.Match(token.StringValue); 891 if (!match.Success) 892 { 893 throw new InvalidProtocolBufferException("Invalid Duration value: " + token.StringValue); 894 } 895 var sign = match.Groups["sign"].Value; 896 var secondsText = match.Groups["int"].Value; 897 // Prohibit leading insignficant zeroes 898 if (secondsText[0] == '0' && secondsText.Length > 1) 899 { 900 throw new InvalidProtocolBufferException("Invalid Duration value: " + token.StringValue); 901 } 902 var subseconds = match.Groups["subseconds"].Value; 903 var multiplier = sign == "-" ? -1 : 1; 904 905 try 906 { 907 long seconds = long.Parse(secondsText, CultureInfo.InvariantCulture) * multiplier; 908 int nanos = 0; 909 if (subseconds != "") 910 { 911 // This should always work, as we've got 1-9 digits. 912 int parsedFraction = int.Parse(subseconds.Substring(1)); 913 nanos = parsedFraction * SubsecondScalingFactors[subseconds.Length] * multiplier; 914 } 915 if (!Duration.IsNormalized(seconds, nanos)) 916 { 917 throw new InvalidProtocolBufferException($"Invalid Duration value: {token.StringValue}"); 918 } 919 message.Descriptor.Fields[Duration.SecondsFieldNumber].Accessor.SetValue(message, seconds); 920 message.Descriptor.Fields[Duration.NanosFieldNumber].Accessor.SetValue(message, nanos); 921 } 922 catch (FormatException) 923 { 924 throw new InvalidProtocolBufferException($"Invalid Duration value: {token.StringValue}"); 925 } 926 } 927 MergeFieldMask(IMessage message, JsonToken token)928 private static void MergeFieldMask(IMessage message, JsonToken token) 929 { 930 if (token.Type != JsonToken.TokenType.StringValue) 931 { 932 throw new InvalidProtocolBufferException("Expected string value for FieldMask"); 933 } 934 // TODO: Do we *want* to remove empty entries? Probably okay to treat "" as "no paths", but "foo,,bar"? 935 string[] jsonPaths = token.StringValue.Split(FieldMaskPathSeparators, StringSplitOptions.RemoveEmptyEntries); 936 IList messagePaths = (IList) message.Descriptor.Fields[FieldMask.PathsFieldNumber].Accessor.GetValue(message); 937 foreach (var path in jsonPaths) 938 { 939 messagePaths.Add(ToSnakeCase(path)); 940 } 941 } 942 943 // Ported from src/google/protobuf/util/internal/utility.cc ToSnakeCase(string text)944 private static string ToSnakeCase(string text) 945 { 946 var builder = new StringBuilder(text.Length * 2); 947 // Note: this is probably unnecessary now, but currently retained to be as close as possible to the 948 // C++, whilst still throwing an exception on underscores. 949 bool wasNotUnderscore = false; // Initialize to false for case 1 (below) 950 bool wasNotCap = false; 951 952 for (int i = 0; i < text.Length; i++) 953 { 954 char c = text[i]; 955 if (c >= 'A' && c <= 'Z') // ascii_isupper 956 { 957 // Consider when the current character B is capitalized: 958 // 1) At beginning of input: "B..." => "b..." 959 // (e.g. "Biscuit" => "biscuit") 960 // 2) Following a lowercase: "...aB..." => "...a_b..." 961 // (e.g. "gBike" => "g_bike") 962 // 3) At the end of input: "...AB" => "...ab" 963 // (e.g. "GoogleLAB" => "google_lab") 964 // 4) Followed by a lowercase: "...ABc..." => "...a_bc..." 965 // (e.g. "GBike" => "g_bike") 966 if (wasNotUnderscore && // case 1 out 967 (wasNotCap || // case 2 in, case 3 out 968 (i + 1 < text.Length && // case 3 out 969 (text[i + 1] >= 'a' && text[i + 1] <= 'z')))) // ascii_islower(text[i + 1]) 970 { // case 4 in 971 // We add an underscore for case 2 and case 4. 972 builder.Append('_'); 973 } 974 // ascii_tolower, but we already know that c *is* an upper case ASCII character... 975 builder.Append((char) (c + 'a' - 'A')); 976 wasNotUnderscore = true; 977 wasNotCap = false; 978 } 979 else 980 { 981 builder.Append(c); 982 if (c == '_') 983 { 984 throw new InvalidProtocolBufferException($"Invalid field mask: {text}"); 985 } 986 wasNotUnderscore = true; 987 wasNotCap = true; 988 } 989 } 990 return builder.ToString(); 991 } 992 #endregion 993 994 /// <summary> 995 /// Settings controlling JSON parsing. 996 /// </summary> 997 public sealed class Settings 998 { 999 /// <summary> 1000 /// Default settings, as used by <see cref="JsonParser.Default"/>. This has the same default 1001 /// recursion limit as <see cref="CodedInputStream"/>, and an empty type registry. 1002 /// </summary> 1003 public static Settings Default { get; } 1004 1005 // Workaround for the Mono compiler complaining about XML comments not being on 1006 // valid language elements. Settings()1007 static Settings() 1008 { 1009 Default = new Settings(CodedInputStream.DefaultRecursionLimit); 1010 } 1011 1012 /// <summary> 1013 /// The maximum depth of messages to parse. Note that this limit only applies to parsing 1014 /// messages, not collections - so a message within a collection within a message only counts as 1015 /// depth 2, not 3. 1016 /// </summary> 1017 public int RecursionLimit { get; } 1018 1019 /// <summary> 1020 /// The type registry used to parse <see cref="Any"/> messages. 1021 /// </summary> 1022 public TypeRegistry TypeRegistry { get; } 1023 1024 /// <summary> 1025 /// Whether the parser should ignore unknown fields (<c>true</c>) or throw an exception when 1026 /// they are encountered (<c>false</c>). 1027 /// </summary> 1028 public bool IgnoreUnknownFields { get; } 1029 Settings(int recursionLimit, TypeRegistry typeRegistry, bool ignoreUnknownFields)1030 private Settings(int recursionLimit, TypeRegistry typeRegistry, bool ignoreUnknownFields) 1031 { 1032 RecursionLimit = recursionLimit; 1033 TypeRegistry = ProtoPreconditions.CheckNotNull(typeRegistry, nameof(typeRegistry)); 1034 IgnoreUnknownFields = ignoreUnknownFields; 1035 } 1036 1037 /// <summary> 1038 /// Creates a new <see cref="Settings"/> object with the specified recursion limit. 1039 /// </summary> 1040 /// <param name="recursionLimit">The maximum depth of messages to parse</param> Settings(int recursionLimit)1041 public Settings(int recursionLimit) : this(recursionLimit, TypeRegistry.Empty) 1042 { 1043 } 1044 1045 /// <summary> 1046 /// Creates a new <see cref="Settings"/> object with the specified recursion limit and type registry. 1047 /// </summary> 1048 /// <param name="recursionLimit">The maximum depth of messages to parse</param> 1049 /// <param name="typeRegistry">The type registry used to parse <see cref="Any"/> messages</param> Settings(int recursionLimit, TypeRegistry typeRegistry)1050 public Settings(int recursionLimit, TypeRegistry typeRegistry) : this(recursionLimit, typeRegistry, false) 1051 { 1052 } 1053 1054 /// <summary> 1055 /// Creates a new <see cref="Settings"/> object set to either ignore unknown fields, or throw an exception 1056 /// when unknown fields are encountered. 1057 /// </summary> 1058 /// <param name="ignoreUnknownFields"><c>true</c> if unknown fields should be ignored when parsing; <c>false</c> to throw an exception.</param> 1059 public Settings WithIgnoreUnknownFields(bool ignoreUnknownFields) => new(RecursionLimit, TypeRegistry, ignoreUnknownFields); 1060 1061 /// <summary> 1062 /// Creates a new <see cref="Settings"/> object based on this one, but with the specified recursion limit. 1063 /// </summary> 1064 /// <param name="recursionLimit">The new recursion limit.</param> WithRecursionLimit(int recursionLimit)1065 public Settings WithRecursionLimit(int recursionLimit) => new(recursionLimit, TypeRegistry, IgnoreUnknownFields); 1066 1067 /// <summary> 1068 /// Creates a new <see cref="Settings"/> object based on this one, but with the specified type registry. 1069 /// </summary> 1070 /// <param name="typeRegistry">The new type registry. Must not be null.</param> 1071 public Settings WithTypeRegistry(TypeRegistry typeRegistry) => 1072 new(RecursionLimit, 1073 ProtoPreconditions.CheckNotNull(typeRegistry, nameof(typeRegistry)), 1074 IgnoreUnknownFields); 1075 } 1076 } 1077 } 1078