1 #region Copyright notice and license 2 // Protocol Buffers - Google's data interchange format 3 // Copyright 2015 Google Inc. All rights reserved. 4 // https://developers.google.com/protocol-buffers/ 5 // 6 // Redistribution and use in source and binary forms, with or without 7 // modification, are permitted provided that the following conditions are 8 // met: 9 // 10 // * Redistributions of source code must retain the above copyright 11 // notice, this list of conditions and the following disclaimer. 12 // * Redistributions in binary form must reproduce the above 13 // copyright notice, this list of conditions and the following disclaimer 14 // in the documentation and/or other materials provided with the 15 // distribution. 16 // * Neither the name of Google Inc. nor the names of its 17 // contributors may be used to endorse or promote products derived from 18 // this software without specific prior written permission. 19 // 20 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 21 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 22 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 23 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 24 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 25 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 26 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 27 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 28 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 29 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 30 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 31 #endregion 32 33 using Google.Protobuf.Reflection; 34 using Google.Protobuf.WellKnownTypes; 35 using System; 36 using System.Collections; 37 using System.Collections.Generic; 38 using System.Globalization; 39 using System.IO; 40 using System.Linq; 41 using System.Text; 42 using System.Text.RegularExpressions; 43 44 namespace Google.Protobuf 45 { 46 /// <summary> 47 /// Reflection-based converter from JSON to messages. 48 /// </summary> 49 /// <remarks> 50 /// <para> 51 /// Instances of this class are thread-safe, with no mutable state. 52 /// </para> 53 /// <para> 54 /// This is a simple start to get JSON parsing working. As it's reflection-based, 55 /// it's not as quick as baking calls into generated messages - but is a simpler implementation. 56 /// (This code is generally not heavily optimized.) 57 /// </para> 58 /// </remarks> 59 public sealed class JsonParser 60 { 61 // Note: using 0-9 instead of \d to ensure no non-ASCII digits. 62 // This regex isn't a complete validator, but will remove *most* invalid input. We rely on parsing to do the rest. 63 private static readonly Regex TimestampRegex = new Regex(@"^(?<datetime>[0-9]{4}-[01][0-9]-[0-3][0-9]T[012][0-9]:[0-5][0-9]:[0-5][0-9])(?<subseconds>\.[0-9]{1,9})?(?<offset>(Z|[+-][0-1][0-9]:[0-5][0-9]))$", FrameworkPortability.CompiledRegexWhereAvailable); 64 private static readonly Regex DurationRegex = new Regex(@"^(?<sign>-)?(?<int>[0-9]{1,12})(?<subseconds>\.[0-9]{1,9})?s$", FrameworkPortability.CompiledRegexWhereAvailable); 65 private static readonly int[] SubsecondScalingFactors = { 0, 100000000, 100000000, 10000000, 1000000, 100000, 10000, 1000, 100, 10, 1 }; 66 private static readonly char[] FieldMaskPathSeparators = new[] { ',' }; 67 private static readonly EnumDescriptor NullValueDescriptor = StructReflection.Descriptor.EnumTypes.Single(ed => ed.ClrType == typeof(NullValue)); 68 69 private static readonly JsonParser defaultInstance = new JsonParser(Settings.Default); 70 71 // TODO: Consider introducing a class containing parse state of the parser, tokenizer and depth. That would simplify these handlers 72 // and the signatures of various methods. 73 private static readonly Dictionary<string, Action<JsonParser, IMessage, JsonTokenizer>> 74 WellKnownTypeHandlers = new Dictionary<string, Action<JsonParser, IMessage, JsonTokenizer>> 75 { 76 { Timestamp.Descriptor.FullName, (parser, message, tokenizer) => MergeTimestamp(message, tokenizer.Next()) }, 77 { Duration.Descriptor.FullName, (parser, message, tokenizer) => MergeDuration(message, tokenizer.Next()) }, 78 { Value.Descriptor.FullName, (parser, message, tokenizer) => parser.MergeStructValue(message, tokenizer) }, 79 { ListValue.Descriptor.FullName, (parser, message, tokenizer) => 80 parser.MergeRepeatedField(message, message.Descriptor.Fields[ListValue.ValuesFieldNumber], tokenizer) }, 81 { Struct.Descriptor.FullName, (parser, message, tokenizer) => parser.MergeStruct(message, tokenizer) }, 82 { Any.Descriptor.FullName, (parser, message, tokenizer) => parser.MergeAny(message, tokenizer) }, 83 { FieldMask.Descriptor.FullName, (parser, message, tokenizer) => MergeFieldMask(message, tokenizer.Next()) }, 84 { Int32Value.Descriptor.FullName, MergeWrapperField }, 85 { Int64Value.Descriptor.FullName, MergeWrapperField }, 86 { UInt32Value.Descriptor.FullName, MergeWrapperField }, 87 { UInt64Value.Descriptor.FullName, MergeWrapperField }, 88 { FloatValue.Descriptor.FullName, MergeWrapperField }, 89 { DoubleValue.Descriptor.FullName, MergeWrapperField }, 90 { BytesValue.Descriptor.FullName, MergeWrapperField }, 91 { StringValue.Descriptor.FullName, MergeWrapperField }, 92 { BoolValue.Descriptor.FullName, MergeWrapperField } 93 }; 94 95 // Convenience method to avoid having to repeat the same code multiple times in the above 96 // dictionary initialization. MergeWrapperField(JsonParser parser, IMessage message, JsonTokenizer tokenizer)97 private static void MergeWrapperField(JsonParser parser, IMessage message, JsonTokenizer tokenizer) 98 { 99 parser.MergeField(message, message.Descriptor.Fields[WrappersReflection.WrapperValueFieldNumber], tokenizer); 100 } 101 102 /// <summary> 103 /// Returns a formatter using the default settings. 104 /// </summary> 105 public static JsonParser Default { get { return defaultInstance; } } 106 107 private readonly Settings settings; 108 109 /// <summary> 110 /// Creates a new formatted with the given settings. 111 /// </summary> 112 /// <param name="settings">The settings.</param> JsonParser(Settings settings)113 public JsonParser(Settings settings) 114 { 115 this.settings = ProtoPreconditions.CheckNotNull(settings, nameof(settings)); 116 } 117 118 /// <summary> 119 /// Parses <paramref name="json"/> and merges the information into the given message. 120 /// </summary> 121 /// <param name="message">The message to merge the JSON information into.</param> 122 /// <param name="json">The JSON to parse.</param> Merge(IMessage message, string json)123 internal void Merge(IMessage message, string json) 124 { 125 Merge(message, new StringReader(json)); 126 } 127 128 /// <summary> 129 /// Parses JSON read from <paramref name="jsonReader"/> and merges the information into the given message. 130 /// </summary> 131 /// <param name="message">The message to merge the JSON information into.</param> 132 /// <param name="jsonReader">Reader providing the JSON to parse.</param> Merge(IMessage message, TextReader jsonReader)133 internal void Merge(IMessage message, TextReader jsonReader) 134 { 135 var tokenizer = JsonTokenizer.FromTextReader(jsonReader); 136 Merge(message, tokenizer); 137 var lastToken = tokenizer.Next(); 138 if (lastToken != JsonToken.EndDocument) 139 { 140 throw new InvalidProtocolBufferException("Expected end of JSON after object"); 141 } 142 } 143 144 /// <summary> 145 /// Merges the given message using data from the given tokenizer. In most cases, the next 146 /// token should be a "start object" token, but wrapper types and nullity can invalidate 147 /// that assumption. This is implemented as an LL(1) recursive descent parser over the stream 148 /// of tokens provided by the tokenizer. This token stream is assumed to be valid JSON, with the 149 /// tokenizer performing that validation - but not every token stream is valid "protobuf JSON". 150 /// </summary> Merge(IMessage message, JsonTokenizer tokenizer)151 private void Merge(IMessage message, JsonTokenizer tokenizer) 152 { 153 if (tokenizer.ObjectDepth > settings.RecursionLimit) 154 { 155 throw InvalidProtocolBufferException.JsonRecursionLimitExceeded(); 156 } 157 if (message.Descriptor.IsWellKnownType) 158 { 159 Action<JsonParser, IMessage, JsonTokenizer> handler; 160 if (WellKnownTypeHandlers.TryGetValue(message.Descriptor.FullName, out handler)) 161 { 162 handler(this, message, tokenizer); 163 return; 164 } 165 // Well-known types with no special handling continue in the normal way. 166 } 167 var token = tokenizer.Next(); 168 if (token.Type != JsonToken.TokenType.StartObject) 169 { 170 throw new InvalidProtocolBufferException("Expected an object"); 171 } 172 var descriptor = message.Descriptor; 173 var jsonFieldMap = descriptor.Fields.ByJsonName(); 174 // All the oneof fields we've already accounted for - we can only see each of them once. 175 // The set is created lazily to avoid the overhead of creating a set for every message 176 // we parsed, when oneofs are relatively rare. 177 HashSet<OneofDescriptor> seenOneofs = null; 178 while (true) 179 { 180 token = tokenizer.Next(); 181 if (token.Type == JsonToken.TokenType.EndObject) 182 { 183 return; 184 } 185 if (token.Type != JsonToken.TokenType.Name) 186 { 187 throw new InvalidOperationException("Unexpected token type " + token.Type); 188 } 189 string name = token.StringValue; 190 FieldDescriptor field; 191 if (jsonFieldMap.TryGetValue(name, out field)) 192 { 193 if (field.ContainingOneof != null) 194 { 195 if (seenOneofs == null) 196 { 197 seenOneofs = new HashSet<OneofDescriptor>(); 198 } 199 if (!seenOneofs.Add(field.ContainingOneof)) 200 { 201 throw new InvalidProtocolBufferException($"Multiple values specified for oneof {field.ContainingOneof.Name}"); 202 } 203 } 204 MergeField(message, field, tokenizer); 205 } 206 else 207 { 208 if (settings.IgnoreUnknownFields) 209 { 210 tokenizer.SkipValue(); 211 } 212 else 213 { 214 throw new InvalidProtocolBufferException("Unknown field: " + name); 215 } 216 } 217 } 218 } 219 MergeField(IMessage message, FieldDescriptor field, JsonTokenizer tokenizer)220 private void MergeField(IMessage message, FieldDescriptor field, JsonTokenizer tokenizer) 221 { 222 var token = tokenizer.Next(); 223 if (token.Type == JsonToken.TokenType.Null) 224 { 225 // Clear the field if we see a null token, unless it's for a singular field of type 226 // google.protobuf.Value or google.protobuf.NullValue. 227 // Note: different from Java API, which just ignores it. 228 // TODO: Bring it more in line? Discuss... 229 if (field.IsMap || field.IsRepeated || 230 !(IsGoogleProtobufValueField(field) || IsGoogleProtobufNullValueField(field))) 231 { 232 field.Accessor.Clear(message); 233 return; 234 } 235 } 236 tokenizer.PushBack(token); 237 238 if (field.IsMap) 239 { 240 MergeMapField(message, field, tokenizer); 241 } 242 else if (field.IsRepeated) 243 { 244 MergeRepeatedField(message, field, tokenizer); 245 } 246 else 247 { 248 var value = ParseSingleValue(field, tokenizer); 249 field.Accessor.SetValue(message, value); 250 } 251 } 252 MergeRepeatedField(IMessage message, FieldDescriptor field, JsonTokenizer tokenizer)253 private void MergeRepeatedField(IMessage message, FieldDescriptor field, JsonTokenizer tokenizer) 254 { 255 var token = tokenizer.Next(); 256 if (token.Type != JsonToken.TokenType.StartArray) 257 { 258 throw new InvalidProtocolBufferException("Repeated field value was not an array. Token type: " + token.Type); 259 } 260 261 IList list = (IList) field.Accessor.GetValue(message); 262 while (true) 263 { 264 token = tokenizer.Next(); 265 if (token.Type == JsonToken.TokenType.EndArray) 266 { 267 return; 268 } 269 tokenizer.PushBack(token); 270 object value = ParseSingleValue(field, tokenizer); 271 if (value == null) 272 { 273 throw new InvalidProtocolBufferException("Repeated field elements cannot be null"); 274 } 275 list.Add(value); 276 } 277 } 278 MergeMapField(IMessage message, FieldDescriptor field, JsonTokenizer tokenizer)279 private void MergeMapField(IMessage message, FieldDescriptor field, JsonTokenizer tokenizer) 280 { 281 // Map fields are always objects, even if the values are well-known types: ParseSingleValue handles those. 282 var token = tokenizer.Next(); 283 if (token.Type != JsonToken.TokenType.StartObject) 284 { 285 throw new InvalidProtocolBufferException("Expected an object to populate a map"); 286 } 287 288 var type = field.MessageType; 289 var keyField = type.FindFieldByNumber(1); 290 var valueField = type.FindFieldByNumber(2); 291 if (keyField == null || valueField == null) 292 { 293 throw new InvalidProtocolBufferException("Invalid map field: " + field.FullName); 294 } 295 IDictionary dictionary = (IDictionary) field.Accessor.GetValue(message); 296 297 while (true) 298 { 299 token = tokenizer.Next(); 300 if (token.Type == JsonToken.TokenType.EndObject) 301 { 302 return; 303 } 304 object key = ParseMapKey(keyField, token.StringValue); 305 object value = ParseSingleValue(valueField, tokenizer); 306 if (value == null) 307 { 308 throw new InvalidProtocolBufferException("Map values must not be null"); 309 } 310 dictionary[key] = value; 311 } 312 } 313 IsGoogleProtobufValueField(FieldDescriptor field)314 private static bool IsGoogleProtobufValueField(FieldDescriptor field) 315 { 316 return field.FieldType == FieldType.Message && 317 field.MessageType.FullName == Value.Descriptor.FullName; 318 } 319 IsGoogleProtobufNullValueField(FieldDescriptor field)320 private static bool IsGoogleProtobufNullValueField(FieldDescriptor field) 321 { 322 return field.FieldType == FieldType.Enum && 323 field.EnumType.FullName == NullValueDescriptor.FullName; 324 } 325 ParseSingleValue(FieldDescriptor field, JsonTokenizer tokenizer)326 private object ParseSingleValue(FieldDescriptor field, JsonTokenizer tokenizer) 327 { 328 var token = tokenizer.Next(); 329 if (token.Type == JsonToken.TokenType.Null) 330 { 331 // TODO: In order to support dynamic messages, we should really build this up 332 // dynamically. 333 if (IsGoogleProtobufValueField(field)) 334 { 335 return Value.ForNull(); 336 } 337 if (IsGoogleProtobufNullValueField(field)) 338 { 339 return NullValue.NullValue; 340 } 341 return null; 342 } 343 344 var fieldType = field.FieldType; 345 if (fieldType == FieldType.Message) 346 { 347 // Parse wrapper types as their constituent types. 348 // TODO: What does this mean for null? 349 if (field.MessageType.IsWrapperType) 350 { 351 field = field.MessageType.Fields[WrappersReflection.WrapperValueFieldNumber]; 352 fieldType = field.FieldType; 353 } 354 else 355 { 356 // TODO: Merge the current value in message? (Public API currently doesn't make this relevant as we don't expose merging.) 357 tokenizer.PushBack(token); 358 IMessage subMessage = NewMessageForField(field); 359 Merge(subMessage, tokenizer); 360 return subMessage; 361 } 362 } 363 364 switch (token.Type) 365 { 366 case JsonToken.TokenType.True: 367 case JsonToken.TokenType.False: 368 if (fieldType == FieldType.Bool) 369 { 370 return token.Type == JsonToken.TokenType.True; 371 } 372 // Fall through to "we don't support this type for this case"; could duplicate the behaviour of the default 373 // case instead, but this way we'd only need to change one place. 374 goto default; 375 case JsonToken.TokenType.StringValue: 376 return ParseSingleStringValue(field, token.StringValue); 377 // Note: not passing the number value itself here, as we may end up storing the string value in the token too. 378 case JsonToken.TokenType.Number: 379 return ParseSingleNumberValue(field, token); 380 case JsonToken.TokenType.Null: 381 throw new NotImplementedException("Haven't worked out what to do for null yet"); 382 default: 383 throw new InvalidProtocolBufferException("Unsupported JSON token type " + token.Type + " for field type " + fieldType); 384 } 385 } 386 387 /// <summary> 388 /// Parses <paramref name="json"/> into a new message. 389 /// </summary> 390 /// <typeparam name="T">The type of message to create.</typeparam> 391 /// <param name="json">The JSON to parse.</param> 392 /// <exception cref="InvalidJsonException">The JSON does not comply with RFC 7159</exception> 393 /// <exception cref="InvalidProtocolBufferException">The JSON does not represent a Protocol Buffers message correctly</exception> 394 public T Parse<T>(string json) where T : IMessage, new() 395 { 396 ProtoPreconditions.CheckNotNull(json, nameof(json)); 397 return Parse<T>(new StringReader(json)); 398 } 399 400 /// <summary> 401 /// Parses JSON read from <paramref name="jsonReader"/> into a new message. 402 /// </summary> 403 /// <typeparam name="T">The type of message to create.</typeparam> 404 /// <param name="jsonReader">Reader providing the JSON to parse.</param> 405 /// <exception cref="InvalidJsonException">The JSON does not comply with RFC 7159</exception> 406 /// <exception cref="InvalidProtocolBufferException">The JSON does not represent a Protocol Buffers message correctly</exception> 407 public T Parse<T>(TextReader jsonReader) where T : IMessage, new() 408 { 409 ProtoPreconditions.CheckNotNull(jsonReader, nameof(jsonReader)); 410 T message = new T(); 411 Merge(message, jsonReader); 412 return message; 413 } 414 415 /// <summary> 416 /// Parses <paramref name="json"/> into a new message. 417 /// </summary> 418 /// <param name="json">The JSON to parse.</param> 419 /// <param name="descriptor">Descriptor of message type to parse.</param> 420 /// <exception cref="InvalidJsonException">The JSON does not comply with RFC 7159</exception> 421 /// <exception cref="InvalidProtocolBufferException">The JSON does not represent a Protocol Buffers message correctly</exception> Parse(string json, MessageDescriptor descriptor)422 public IMessage Parse(string json, MessageDescriptor descriptor) 423 { 424 ProtoPreconditions.CheckNotNull(json, nameof(json)); 425 ProtoPreconditions.CheckNotNull(descriptor, nameof(descriptor)); 426 return Parse(new StringReader(json), descriptor); 427 } 428 429 /// <summary> 430 /// Parses JSON read from <paramref name="jsonReader"/> into a new message. 431 /// </summary> 432 /// <param name="jsonReader">Reader providing the JSON to parse.</param> 433 /// <param name="descriptor">Descriptor of message type to parse.</param> 434 /// <exception cref="InvalidJsonException">The JSON does not comply with RFC 7159</exception> 435 /// <exception cref="InvalidProtocolBufferException">The JSON does not represent a Protocol Buffers message correctly</exception> Parse(TextReader jsonReader, MessageDescriptor descriptor)436 public IMessage Parse(TextReader jsonReader, MessageDescriptor descriptor) 437 { 438 ProtoPreconditions.CheckNotNull(jsonReader, nameof(jsonReader)); 439 ProtoPreconditions.CheckNotNull(descriptor, nameof(descriptor)); 440 IMessage message = descriptor.Parser.CreateTemplate(); 441 Merge(message, jsonReader); 442 return message; 443 } 444 MergeStructValue(IMessage message, JsonTokenizer tokenizer)445 private void MergeStructValue(IMessage message, JsonTokenizer tokenizer) 446 { 447 var firstToken = tokenizer.Next(); 448 var fields = message.Descriptor.Fields; 449 switch (firstToken.Type) 450 { 451 case JsonToken.TokenType.Null: 452 fields[Value.NullValueFieldNumber].Accessor.SetValue(message, 0); 453 return; 454 case JsonToken.TokenType.StringValue: 455 fields[Value.StringValueFieldNumber].Accessor.SetValue(message, firstToken.StringValue); 456 return; 457 case JsonToken.TokenType.Number: 458 fields[Value.NumberValueFieldNumber].Accessor.SetValue(message, firstToken.NumberValue); 459 return; 460 case JsonToken.TokenType.False: 461 case JsonToken.TokenType.True: 462 fields[Value.BoolValueFieldNumber].Accessor.SetValue(message, firstToken.Type == JsonToken.TokenType.True); 463 return; 464 case JsonToken.TokenType.StartObject: 465 { 466 var field = fields[Value.StructValueFieldNumber]; 467 var structMessage = NewMessageForField(field); 468 tokenizer.PushBack(firstToken); 469 Merge(structMessage, tokenizer); 470 field.Accessor.SetValue(message, structMessage); 471 return; 472 } 473 case JsonToken.TokenType.StartArray: 474 { 475 var field = fields[Value.ListValueFieldNumber]; 476 var list = NewMessageForField(field); 477 tokenizer.PushBack(firstToken); 478 Merge(list, tokenizer); 479 field.Accessor.SetValue(message, list); 480 return; 481 } 482 default: 483 throw new InvalidOperationException("Unexpected token type: " + firstToken.Type); 484 } 485 } 486 MergeStruct(IMessage message, JsonTokenizer tokenizer)487 private void MergeStruct(IMessage message, JsonTokenizer tokenizer) 488 { 489 var token = tokenizer.Next(); 490 if (token.Type != JsonToken.TokenType.StartObject) 491 { 492 throw new InvalidProtocolBufferException("Expected object value for Struct"); 493 } 494 tokenizer.PushBack(token); 495 496 var field = message.Descriptor.Fields[Struct.FieldsFieldNumber]; 497 MergeMapField(message, field, tokenizer); 498 } 499 MergeAny(IMessage message, JsonTokenizer tokenizer)500 private void MergeAny(IMessage message, JsonTokenizer tokenizer) 501 { 502 // Record the token stream until we see the @type property. At that point, we can take the value, consult 503 // the type registry for the relevant message, and replay the stream, omitting the @type property. 504 var tokens = new List<JsonToken>(); 505 506 var token = tokenizer.Next(); 507 if (token.Type != JsonToken.TokenType.StartObject) 508 { 509 throw new InvalidProtocolBufferException("Expected object value for Any"); 510 } 511 int typeUrlObjectDepth = tokenizer.ObjectDepth; 512 513 // The check for the property depth protects us from nested Any values which occur before the type URL 514 // for *this* Any. 515 while (token.Type != JsonToken.TokenType.Name || 516 token.StringValue != JsonFormatter.AnyTypeUrlField || 517 tokenizer.ObjectDepth != typeUrlObjectDepth) 518 { 519 tokens.Add(token); 520 token = tokenizer.Next(); 521 522 if (tokenizer.ObjectDepth < typeUrlObjectDepth) 523 { 524 throw new InvalidProtocolBufferException("Any message with no @type"); 525 } 526 } 527 528 // Don't add the @type property or its value to the recorded token list 529 token = tokenizer.Next(); 530 if (token.Type != JsonToken.TokenType.StringValue) 531 { 532 throw new InvalidProtocolBufferException("Expected string value for Any.@type"); 533 } 534 string typeUrl = token.StringValue; 535 string typeName = Any.GetTypeName(typeUrl); 536 537 MessageDescriptor descriptor = settings.TypeRegistry.Find(typeName); 538 if (descriptor == null) 539 { 540 throw new InvalidOperationException($"Type registry has no descriptor for type name '{typeName}'"); 541 } 542 543 // Now replay the token stream we've already read and anything that remains of the object, just parsing it 544 // as normal. Our original tokenizer should end up at the end of the object. 545 var replay = JsonTokenizer.FromReplayedTokens(tokens, tokenizer); 546 var body = descriptor.Parser.CreateTemplate(); 547 if (descriptor.IsWellKnownType) 548 { 549 MergeWellKnownTypeAnyBody(body, replay); 550 } 551 else 552 { 553 Merge(body, replay); 554 } 555 var data = body.ToByteString(); 556 557 // Now that we have the message data, we can pack it into an Any (the message received as a parameter). 558 message.Descriptor.Fields[Any.TypeUrlFieldNumber].Accessor.SetValue(message, typeUrl); 559 message.Descriptor.Fields[Any.ValueFieldNumber].Accessor.SetValue(message, data); 560 } 561 562 // Well-known types end up in a property called "value" in the JSON. As there's no longer a @type property 563 // in the given JSON token stream, we should *only* have tokens of start-object, name("value"), the value 564 // itself, and then end-object. MergeWellKnownTypeAnyBody(IMessage body, JsonTokenizer tokenizer)565 private void MergeWellKnownTypeAnyBody(IMessage body, JsonTokenizer tokenizer) 566 { 567 var token = tokenizer.Next(); // Definitely start-object; checked in previous method 568 token = tokenizer.Next(); 569 // TODO: What about an absent Int32Value, for example? 570 if (token.Type != JsonToken.TokenType.Name || token.StringValue != JsonFormatter.AnyWellKnownTypeValueField) 571 { 572 throw new InvalidProtocolBufferException($"Expected '{JsonFormatter.AnyWellKnownTypeValueField}' property for well-known type Any body"); 573 } 574 Merge(body, tokenizer); 575 token = tokenizer.Next(); 576 if (token.Type != JsonToken.TokenType.EndObject) 577 { 578 throw new InvalidProtocolBufferException($"Expected end-object token after @type/value for well-known type"); 579 } 580 } 581 582 #region Utility methods which don't depend on the state (or settings) of the parser. ParseMapKey(FieldDescriptor field, string keyText)583 private static object ParseMapKey(FieldDescriptor field, string keyText) 584 { 585 switch (field.FieldType) 586 { 587 case FieldType.Bool: 588 if (keyText == "true") 589 { 590 return true; 591 } 592 if (keyText == "false") 593 { 594 return false; 595 } 596 throw new InvalidProtocolBufferException("Invalid string for bool map key: " + keyText); 597 case FieldType.String: 598 return keyText; 599 case FieldType.Int32: 600 case FieldType.SInt32: 601 case FieldType.SFixed32: 602 return ParseNumericString(keyText, int.Parse); 603 case FieldType.UInt32: 604 case FieldType.Fixed32: 605 return ParseNumericString(keyText, uint.Parse); 606 case FieldType.Int64: 607 case FieldType.SInt64: 608 case FieldType.SFixed64: 609 return ParseNumericString(keyText, long.Parse); 610 case FieldType.UInt64: 611 case FieldType.Fixed64: 612 return ParseNumericString(keyText, ulong.Parse); 613 default: 614 throw new InvalidProtocolBufferException("Invalid field type for map: " + field.FieldType); 615 } 616 } 617 618 private static object ParseSingleNumberValue(FieldDescriptor field, JsonToken token) 619 { 620 double value = token.NumberValue; 621 checked 622 { 623 try 624 { 625 switch (field.FieldType) 626 { 627 case FieldType.Int32: 628 case FieldType.SInt32: 629 case FieldType.SFixed32: 630 CheckInteger(value); 631 return (int) value; 632 case FieldType.UInt32: 633 case FieldType.Fixed32: 634 CheckInteger(value); 635 return (uint) value; 636 case FieldType.Int64: 637 case FieldType.SInt64: 638 case FieldType.SFixed64: 639 CheckInteger(value); 640 return (long) value; 641 case FieldType.UInt64: 642 case FieldType.Fixed64: 643 CheckInteger(value); 644 return (ulong) value; 645 case FieldType.Double: 646 return value; 647 case FieldType.Float: 648 if (double.IsNaN(value)) 649 { 650 return float.NaN; 651 } 652 if (value > float.MaxValue || value < float.MinValue) 653 { 654 if (double.IsPositiveInfinity(value)) 655 { 656 return float.PositiveInfinity; 657 } 658 if (double.IsNegativeInfinity(value)) 659 { 660 return float.NegativeInfinity; 661 } 662 throw new InvalidProtocolBufferException($"Value out of range: {value}"); 663 } 664 return (float) value; 665 case FieldType.Enum: 666 CheckInteger(value); 667 // Just return it as an int, and let the CLR convert it. 668 // Note that we deliberately don't check that it's a known value. 669 return (int) value; 670 default: 671 throw new InvalidProtocolBufferException($"Unsupported conversion from JSON number for field type {field.FieldType}"); 672 } 673 } 674 catch (OverflowException) 675 { 676 throw new InvalidProtocolBufferException($"Value out of range: {value}"); 677 } 678 } 679 } 680 681 private static void CheckInteger(double value) 682 { 683 if (double.IsInfinity(value) || double.IsNaN(value)) 684 { 685 throw new InvalidProtocolBufferException($"Value not an integer: {value}"); 686 } 687 if (value != Math.Floor(value)) 688 { 689 throw new InvalidProtocolBufferException($"Value not an integer: {value}"); 690 } 691 } 692 693 private static object ParseSingleStringValue(FieldDescriptor field, string text) 694 { 695 switch (field.FieldType) 696 { 697 case FieldType.String: 698 return text; 699 case FieldType.Bytes: 700 try 701 { 702 return ByteString.FromBase64(text); 703 } 704 catch (FormatException e) 705 { 706 throw InvalidProtocolBufferException.InvalidBase64(e); 707 } 708 case FieldType.Int32: 709 case FieldType.SInt32: 710 case FieldType.SFixed32: 711 return ParseNumericString(text, int.Parse); 712 case FieldType.UInt32: 713 case FieldType.Fixed32: 714 return ParseNumericString(text, uint.Parse); 715 case FieldType.Int64: 716 case FieldType.SInt64: 717 case FieldType.SFixed64: 718 return ParseNumericString(text, long.Parse); 719 case FieldType.UInt64: 720 case FieldType.Fixed64: 721 return ParseNumericString(text, ulong.Parse); 722 case FieldType.Double: 723 double d = ParseNumericString(text, double.Parse); 724 ValidateInfinityAndNan(text, double.IsPositiveInfinity(d), double.IsNegativeInfinity(d), double.IsNaN(d)); 725 return d; 726 case FieldType.Float: 727 float f = ParseNumericString(text, float.Parse); 728 ValidateInfinityAndNan(text, float.IsPositiveInfinity(f), float.IsNegativeInfinity(f), float.IsNaN(f)); 729 return f; 730 case FieldType.Enum: 731 var enumValue = field.EnumType.FindValueByName(text); 732 if (enumValue == null) 733 { 734 throw new InvalidProtocolBufferException($"Invalid enum value: {text} for enum type: {field.EnumType.FullName}"); 735 } 736 // Just return it as an int, and let the CLR convert it. 737 return enumValue.Number; 738 default: 739 throw new InvalidProtocolBufferException($"Unsupported conversion from JSON string for field type {field.FieldType}"); 740 } 741 } 742 743 /// <summary> 744 /// Creates a new instance of the message type for the given field. 745 /// </summary> 746 private static IMessage NewMessageForField(FieldDescriptor field) 747 { 748 return field.MessageType.Parser.CreateTemplate(); 749 } 750 751 private static T ParseNumericString<T>(string text, Func<string, NumberStyles, IFormatProvider, T> parser) 752 { 753 // Can't prohibit this with NumberStyles. 754 if (text.StartsWith("+")) 755 { 756 throw new InvalidProtocolBufferException($"Invalid numeric value: {text}"); 757 } 758 if (text.StartsWith("0") && text.Length > 1) 759 { 760 if (text[1] >= '0' && text[1] <= '9') 761 { 762 throw new InvalidProtocolBufferException($"Invalid numeric value: {text}"); 763 } 764 } 765 else if (text.StartsWith("-0") && text.Length > 2) 766 { 767 if (text[2] >= '0' && text[2] <= '9') 768 { 769 throw new InvalidProtocolBufferException($"Invalid numeric value: {text}"); 770 } 771 } 772 try 773 { 774 return parser(text, NumberStyles.AllowLeadingSign | NumberStyles.AllowDecimalPoint | NumberStyles.AllowExponent, CultureInfo.InvariantCulture); 775 } 776 catch (FormatException) 777 { 778 throw new InvalidProtocolBufferException($"Invalid numeric value for type: {text}"); 779 } 780 catch (OverflowException) 781 { 782 throw new InvalidProtocolBufferException($"Value out of range: {text}"); 783 } 784 } 785 786 /// <summary> 787 /// Checks that any infinite/NaN values originated from the correct text. 788 /// This corrects the lenient whitespace handling of double.Parse/float.Parse, as well as the 789 /// way that Mono parses out-of-range values as infinity. 790 /// </summary> ValidateInfinityAndNan(string text, bool isPositiveInfinity, bool isNegativeInfinity, bool isNaN)791 private static void ValidateInfinityAndNan(string text, bool isPositiveInfinity, bool isNegativeInfinity, bool isNaN) 792 { 793 if ((isPositiveInfinity && text != "Infinity") || 794 (isNegativeInfinity && text != "-Infinity") || 795 (isNaN && text != "NaN")) 796 { 797 throw new InvalidProtocolBufferException($"Invalid numeric value: {text}"); 798 } 799 } 800 MergeTimestamp(IMessage message, JsonToken token)801 private static void MergeTimestamp(IMessage message, JsonToken token) 802 { 803 if (token.Type != JsonToken.TokenType.StringValue) 804 { 805 throw new InvalidProtocolBufferException("Expected string value for Timestamp"); 806 } 807 var match = TimestampRegex.Match(token.StringValue); 808 if (!match.Success) 809 { 810 throw new InvalidProtocolBufferException($"Invalid Timestamp value: {token.StringValue}"); 811 } 812 var dateTime = match.Groups["datetime"].Value; 813 var subseconds = match.Groups["subseconds"].Value; 814 var offset = match.Groups["offset"].Value; 815 816 try 817 { 818 DateTime parsed = DateTime.ParseExact( 819 dateTime, 820 "yyyy-MM-dd'T'HH:mm:ss", 821 CultureInfo.InvariantCulture, 822 DateTimeStyles.AssumeUniversal | DateTimeStyles.AdjustToUniversal); 823 // TODO: It would be nice not to have to create all these objects... easy to optimize later though. 824 Timestamp timestamp = Timestamp.FromDateTime(parsed); 825 int nanosToAdd = 0; 826 if (subseconds != "") 827 { 828 // This should always work, as we've got 1-9 digits. 829 int parsedFraction = int.Parse(subseconds.Substring(1), CultureInfo.InvariantCulture); 830 nanosToAdd = parsedFraction * SubsecondScalingFactors[subseconds.Length]; 831 } 832 int secondsToAdd = 0; 833 if (offset != "Z") 834 { 835 // This is the amount we need to *subtract* from the local time to get to UTC - hence - => +1 and vice versa. 836 int sign = offset[0] == '-' ? 1 : -1; 837 int hours = int.Parse(offset.Substring(1, 2), CultureInfo.InvariantCulture); 838 int minutes = int.Parse(offset.Substring(4, 2)); 839 int totalMinutes = hours * 60 + minutes; 840 if (totalMinutes > 18 * 60) 841 { 842 throw new InvalidProtocolBufferException("Invalid Timestamp value: " + token.StringValue); 843 } 844 if (totalMinutes == 0 && sign == 1) 845 { 846 // This is an offset of -00:00, which means "unknown local offset". It makes no sense for a timestamp. 847 throw new InvalidProtocolBufferException("Invalid Timestamp value: " + token.StringValue); 848 } 849 // We need to *subtract* the offset from local time to get UTC. 850 secondsToAdd = sign * totalMinutes * 60; 851 } 852 // Ensure we've got the right signs. Currently unnecessary, but easy to do. 853 if (secondsToAdd < 0 && nanosToAdd > 0) 854 { 855 secondsToAdd++; 856 nanosToAdd = nanosToAdd - Duration.NanosecondsPerSecond; 857 } 858 if (secondsToAdd != 0 || nanosToAdd != 0) 859 { 860 timestamp += new Duration { Nanos = nanosToAdd, Seconds = secondsToAdd }; 861 // The resulting timestamp after offset change would be out of our expected range. Currently the Timestamp message doesn't validate this 862 // anywhere, but we shouldn't parse it. 863 if (timestamp.Seconds < Timestamp.UnixSecondsAtBclMinValue || timestamp.Seconds > Timestamp.UnixSecondsAtBclMaxValue) 864 { 865 throw new InvalidProtocolBufferException("Invalid Timestamp value: " + token.StringValue); 866 } 867 } 868 message.Descriptor.Fields[Timestamp.SecondsFieldNumber].Accessor.SetValue(message, timestamp.Seconds); 869 message.Descriptor.Fields[Timestamp.NanosFieldNumber].Accessor.SetValue(message, timestamp.Nanos); 870 } 871 catch (FormatException) 872 { 873 throw new InvalidProtocolBufferException("Invalid Timestamp value: " + token.StringValue); 874 } 875 } 876 MergeDuration(IMessage message, JsonToken token)877 private static void MergeDuration(IMessage message, JsonToken token) 878 { 879 if (token.Type != JsonToken.TokenType.StringValue) 880 { 881 throw new InvalidProtocolBufferException("Expected string value for Duration"); 882 } 883 var match = DurationRegex.Match(token.StringValue); 884 if (!match.Success) 885 { 886 throw new InvalidProtocolBufferException("Invalid Duration value: " + token.StringValue); 887 } 888 var sign = match.Groups["sign"].Value; 889 var secondsText = match.Groups["int"].Value; 890 // Prohibit leading insignficant zeroes 891 if (secondsText[0] == '0' && secondsText.Length > 1) 892 { 893 throw new InvalidProtocolBufferException("Invalid Duration value: " + token.StringValue); 894 } 895 var subseconds = match.Groups["subseconds"].Value; 896 var multiplier = sign == "-" ? -1 : 1; 897 898 try 899 { 900 long seconds = long.Parse(secondsText, CultureInfo.InvariantCulture) * multiplier; 901 int nanos = 0; 902 if (subseconds != "") 903 { 904 // This should always work, as we've got 1-9 digits. 905 int parsedFraction = int.Parse(subseconds.Substring(1)); 906 nanos = parsedFraction * SubsecondScalingFactors[subseconds.Length] * multiplier; 907 } 908 if (!Duration.IsNormalized(seconds, nanos)) 909 { 910 throw new InvalidProtocolBufferException($"Invalid Duration value: {token.StringValue}"); 911 } 912 message.Descriptor.Fields[Duration.SecondsFieldNumber].Accessor.SetValue(message, seconds); 913 message.Descriptor.Fields[Duration.NanosFieldNumber].Accessor.SetValue(message, nanos); 914 } 915 catch (FormatException) 916 { 917 throw new InvalidProtocolBufferException($"Invalid Duration value: {token.StringValue}"); 918 } 919 } 920 MergeFieldMask(IMessage message, JsonToken token)921 private static void MergeFieldMask(IMessage message, JsonToken token) 922 { 923 if (token.Type != JsonToken.TokenType.StringValue) 924 { 925 throw new InvalidProtocolBufferException("Expected string value for FieldMask"); 926 } 927 // TODO: Do we *want* to remove empty entries? Probably okay to treat "" as "no paths", but "foo,,bar"? 928 string[] jsonPaths = token.StringValue.Split(FieldMaskPathSeparators, StringSplitOptions.RemoveEmptyEntries); 929 IList messagePaths = (IList) message.Descriptor.Fields[FieldMask.PathsFieldNumber].Accessor.GetValue(message); 930 foreach (var path in jsonPaths) 931 { 932 messagePaths.Add(ToSnakeCase(path)); 933 } 934 } 935 936 // Ported from src/google/protobuf/util/internal/utility.cc ToSnakeCase(string text)937 private static string ToSnakeCase(string text) 938 { 939 var builder = new StringBuilder(text.Length * 2); 940 // Note: this is probably unnecessary now, but currently retained to be as close as possible to the 941 // C++, whilst still throwing an exception on underscores. 942 bool wasNotUnderscore = false; // Initialize to false for case 1 (below) 943 bool wasNotCap = false; 944 945 for (int i = 0; i < text.Length; i++) 946 { 947 char c = text[i]; 948 if (c >= 'A' && c <= 'Z') // ascii_isupper 949 { 950 // Consider when the current character B is capitalized: 951 // 1) At beginning of input: "B..." => "b..." 952 // (e.g. "Biscuit" => "biscuit") 953 // 2) Following a lowercase: "...aB..." => "...a_b..." 954 // (e.g. "gBike" => "g_bike") 955 // 3) At the end of input: "...AB" => "...ab" 956 // (e.g. "GoogleLAB" => "google_lab") 957 // 4) Followed by a lowercase: "...ABc..." => "...a_bc..." 958 // (e.g. "GBike" => "g_bike") 959 if (wasNotUnderscore && // case 1 out 960 (wasNotCap || // case 2 in, case 3 out 961 (i + 1 < text.Length && // case 3 out 962 (text[i + 1] >= 'a' && text[i + 1] <= 'z')))) // ascii_islower(text[i + 1]) 963 { // case 4 in 964 // We add an underscore for case 2 and case 4. 965 builder.Append('_'); 966 } 967 // ascii_tolower, but we already know that c *is* an upper case ASCII character... 968 builder.Append((char) (c + 'a' - 'A')); 969 wasNotUnderscore = true; 970 wasNotCap = false; 971 } 972 else 973 { 974 builder.Append(c); 975 if (c == '_') 976 { 977 throw new InvalidProtocolBufferException($"Invalid field mask: {text}"); 978 } 979 wasNotUnderscore = true; 980 wasNotCap = true; 981 } 982 } 983 return builder.ToString(); 984 } 985 #endregion 986 987 /// <summary> 988 /// Settings controlling JSON parsing. 989 /// </summary> 990 public sealed class Settings 991 { 992 /// <summary> 993 /// Default settings, as used by <see cref="JsonParser.Default"/>. This has the same default 994 /// recursion limit as <see cref="CodedInputStream"/>, and an empty type registry. 995 /// </summary> 996 public static Settings Default { get; } 997 998 // Workaround for the Mono compiler complaining about XML comments not being on 999 // valid language elements. Settings()1000 static Settings() 1001 { 1002 Default = new Settings(CodedInputStream.DefaultRecursionLimit); 1003 } 1004 1005 /// <summary> 1006 /// The maximum depth of messages to parse. Note that this limit only applies to parsing 1007 /// messages, not collections - so a message within a collection within a message only counts as 1008 /// depth 2, not 3. 1009 /// </summary> 1010 public int RecursionLimit { get; } 1011 1012 /// <summary> 1013 /// The type registry used to parse <see cref="Any"/> messages. 1014 /// </summary> 1015 public TypeRegistry TypeRegistry { get; } 1016 1017 /// <summary> 1018 /// Whether the parser should ignore unknown fields (<c>true</c>) or throw an exception when 1019 /// they are encountered (<c>false</c>). 1020 /// </summary> 1021 public bool IgnoreUnknownFields { get; } 1022 Settings(int recursionLimit, TypeRegistry typeRegistry, bool ignoreUnknownFields)1023 private Settings(int recursionLimit, TypeRegistry typeRegistry, bool ignoreUnknownFields) 1024 { 1025 RecursionLimit = recursionLimit; 1026 TypeRegistry = ProtoPreconditions.CheckNotNull(typeRegistry, nameof(typeRegistry)); 1027 IgnoreUnknownFields = ignoreUnknownFields; 1028 } 1029 1030 /// <summary> 1031 /// Creates a new <see cref="Settings"/> object with the specified recursion limit. 1032 /// </summary> 1033 /// <param name="recursionLimit">The maximum depth of messages to parse</param> Settings(int recursionLimit)1034 public Settings(int recursionLimit) : this(recursionLimit, TypeRegistry.Empty) 1035 { 1036 } 1037 1038 /// <summary> 1039 /// Creates a new <see cref="Settings"/> object with the specified recursion limit and type registry. 1040 /// </summary> 1041 /// <param name="recursionLimit">The maximum depth of messages to parse</param> 1042 /// <param name="typeRegistry">The type registry used to parse <see cref="Any"/> messages</param> Settings(int recursionLimit, TypeRegistry typeRegistry)1043 public Settings(int recursionLimit, TypeRegistry typeRegistry) : this(recursionLimit, typeRegistry, false) 1044 { 1045 } 1046 1047 /// <summary> 1048 /// Creates a new <see cref="Settings"/> object set to either ignore unknown fields, or throw an exception 1049 /// when unknown fields are encountered. 1050 /// </summary> 1051 /// <param name="ignoreUnknownFields"><c>true</c> if unknown fields should be ignored when parsing; <c>false</c> to throw an exception.</param> 1052 public Settings WithIgnoreUnknownFields(bool ignoreUnknownFields) => 1053 new Settings(RecursionLimit, TypeRegistry, ignoreUnknownFields); 1054 1055 /// <summary> 1056 /// Creates a new <see cref="Settings"/> object based on this one, but with the specified recursion limit. 1057 /// </summary> 1058 /// <param name="recursionLimit">The new recursion limit.</param> WithRecursionLimit(int recursionLimit)1059 public Settings WithRecursionLimit(int recursionLimit) => 1060 new Settings(recursionLimit, TypeRegistry, IgnoreUnknownFields); 1061 1062 /// <summary> 1063 /// Creates a new <see cref="Settings"/> object based on this one, but with the specified type registry. 1064 /// </summary> 1065 /// <param name="typeRegistry">The new type registry. Must not be null.</param> 1066 public Settings WithTypeRegistry(TypeRegistry typeRegistry) => 1067 new Settings( 1068 RecursionLimit, 1069 ProtoPreconditions.CheckNotNull(typeRegistry, nameof(typeRegistry)), 1070 IgnoreUnknownFields); 1071 } 1072 } 1073 } 1074