• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 #region Copyright notice and license
2 // Protocol Buffers - Google's data interchange format
3 // Copyright 2015 Google Inc.  All rights reserved.
4 // https://developers.google.com/protocol-buffers/
5 //
6 // Redistribution and use in source and binary forms, with or without
7 // modification, are permitted provided that the following conditions are
8 // met:
9 //
10 //     * Redistributions of source code must retain the above copyright
11 // notice, this list of conditions and the following disclaimer.
12 //     * Redistributions in binary form must reproduce the above
13 // copyright notice, this list of conditions and the following disclaimer
14 // in the documentation and/or other materials provided with the
15 // distribution.
16 //     * Neither the name of Google Inc. nor the names of its
17 // contributors may be used to endorse or promote products derived from
18 // this software without specific prior written permission.
19 //
20 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 #endregion
32 
33 using Google.Protobuf.Reflection;
34 using Google.Protobuf.WellKnownTypes;
35 using System;
36 using System.Collections;
37 using System.Collections.Generic;
38 using System.Globalization;
39 using System.IO;
40 using System.Linq;
41 using System.Text;
42 using System.Text.RegularExpressions;
43 
44 namespace Google.Protobuf
45 {
46     /// <summary>
47     /// Reflection-based converter from JSON to messages.
48     /// </summary>
49     /// <remarks>
50     /// <para>
51     /// Instances of this class are thread-safe, with no mutable state.
52     /// </para>
53     /// <para>
54     /// This is a simple start to get JSON parsing working. As it's reflection-based,
55     /// it's not as quick as baking calls into generated messages - but is a simpler implementation.
56     /// (This code is generally not heavily optimized.)
57     /// </para>
58     /// </remarks>
59     public sealed class JsonParser
60     {
61         // Note: using 0-9 instead of \d to ensure no non-ASCII digits.
62         // This regex isn't a complete validator, but will remove *most* invalid input. We rely on parsing to do the rest.
63         private static readonly Regex TimestampRegex = new Regex(@"^(?<datetime>[0-9]{4}-[01][0-9]-[0-3][0-9]T[012][0-9]:[0-5][0-9]:[0-5][0-9])(?<subseconds>\.[0-9]{1,9})?(?<offset>(Z|[+-][0-1][0-9]:[0-5][0-9]))$", FrameworkPortability.CompiledRegexWhereAvailable);
64         private static readonly Regex DurationRegex = new Regex(@"^(?<sign>-)?(?<int>[0-9]{1,12})(?<subseconds>\.[0-9]{1,9})?s$", FrameworkPortability.CompiledRegexWhereAvailable);
65         private static readonly int[] SubsecondScalingFactors = { 0, 100000000, 100000000, 10000000, 1000000, 100000, 10000, 1000, 100, 10, 1 };
66         private static readonly char[] FieldMaskPathSeparators = new[] { ',' };
67         private static readonly EnumDescriptor NullValueDescriptor = StructReflection.Descriptor.EnumTypes.Single(ed => ed.ClrType == typeof(NullValue));
68 
69         private static readonly JsonParser defaultInstance = new JsonParser(Settings.Default);
70 
71         // TODO: Consider introducing a class containing parse state of the parser, tokenizer and depth. That would simplify these handlers
72         // and the signatures of various methods.
73         private static readonly Dictionary<string, Action<JsonParser, IMessage, JsonTokenizer>>
74             WellKnownTypeHandlers = new Dictionary<string, Action<JsonParser, IMessage, JsonTokenizer>>
75         {
76             { Timestamp.Descriptor.FullName, (parser, message, tokenizer) => MergeTimestamp(message, tokenizer.Next()) },
77             { Duration.Descriptor.FullName, (parser, message, tokenizer) => MergeDuration(message, tokenizer.Next()) },
78             { Value.Descriptor.FullName, (parser, message, tokenizer) => parser.MergeStructValue(message, tokenizer) },
79             { ListValue.Descriptor.FullName, (parser, message, tokenizer) =>
80                 parser.MergeRepeatedField(message, message.Descriptor.Fields[ListValue.ValuesFieldNumber], tokenizer) },
81             { Struct.Descriptor.FullName, (parser, message, tokenizer) => parser.MergeStruct(message, tokenizer) },
82             { Any.Descriptor.FullName, (parser, message, tokenizer) => parser.MergeAny(message, tokenizer) },
83             { FieldMask.Descriptor.FullName, (parser, message, tokenizer) => MergeFieldMask(message, tokenizer.Next()) },
84             { Int32Value.Descriptor.FullName, MergeWrapperField },
85             { Int64Value.Descriptor.FullName, MergeWrapperField },
86             { UInt32Value.Descriptor.FullName, MergeWrapperField },
87             { UInt64Value.Descriptor.FullName, MergeWrapperField },
88             { FloatValue.Descriptor.FullName, MergeWrapperField },
89             { DoubleValue.Descriptor.FullName, MergeWrapperField },
90             { BytesValue.Descriptor.FullName, MergeWrapperField },
91             { StringValue.Descriptor.FullName, MergeWrapperField },
92             { BoolValue.Descriptor.FullName, MergeWrapperField }
93         };
94 
95         // Convenience method to avoid having to repeat the same code multiple times in the above
96         // dictionary initialization.
MergeWrapperField(JsonParser parser, IMessage message, JsonTokenizer tokenizer)97         private static void MergeWrapperField(JsonParser parser, IMessage message, JsonTokenizer tokenizer)
98         {
99             parser.MergeField(message, message.Descriptor.Fields[WrappersReflection.WrapperValueFieldNumber], tokenizer);
100         }
101 
102         /// <summary>
103         /// Returns a formatter using the default settings.
104         /// </summary>
105         public static JsonParser Default { get { return defaultInstance; } }
106 
107         private readonly Settings settings;
108 
109         /// <summary>
110         /// Creates a new formatted with the given settings.
111         /// </summary>
112         /// <param name="settings">The settings.</param>
JsonParser(Settings settings)113         public JsonParser(Settings settings)
114         {
115             this.settings = ProtoPreconditions.CheckNotNull(settings, nameof(settings));
116         }
117 
118         /// <summary>
119         /// Parses <paramref name="json"/> and merges the information into the given message.
120         /// </summary>
121         /// <param name="message">The message to merge the JSON information into.</param>
122         /// <param name="json">The JSON to parse.</param>
Merge(IMessage message, string json)123         internal void Merge(IMessage message, string json)
124         {
125             Merge(message, new StringReader(json));
126         }
127 
128         /// <summary>
129         /// Parses JSON read from <paramref name="jsonReader"/> and merges the information into the given message.
130         /// </summary>
131         /// <param name="message">The message to merge the JSON information into.</param>
132         /// <param name="jsonReader">Reader providing the JSON to parse.</param>
Merge(IMessage message, TextReader jsonReader)133         internal void Merge(IMessage message, TextReader jsonReader)
134         {
135             var tokenizer = JsonTokenizer.FromTextReader(jsonReader);
136             Merge(message, tokenizer);
137             var lastToken = tokenizer.Next();
138             if (lastToken != JsonToken.EndDocument)
139             {
140                 throw new InvalidProtocolBufferException("Expected end of JSON after object");
141             }
142         }
143 
144         /// <summary>
145         /// Merges the given message using data from the given tokenizer. In most cases, the next
146         /// token should be a "start object" token, but wrapper types and nullity can invalidate
147         /// that assumption. This is implemented as an LL(1) recursive descent parser over the stream
148         /// of tokens provided by the tokenizer. This token stream is assumed to be valid JSON, with the
149         /// tokenizer performing that validation - but not every token stream is valid "protobuf JSON".
150         /// </summary>
Merge(IMessage message, JsonTokenizer tokenizer)151         private void Merge(IMessage message, JsonTokenizer tokenizer)
152         {
153             if (tokenizer.ObjectDepth > settings.RecursionLimit)
154             {
155                 throw InvalidProtocolBufferException.JsonRecursionLimitExceeded();
156             }
157             if (message.Descriptor.IsWellKnownType)
158             {
159                 Action<JsonParser, IMessage, JsonTokenizer> handler;
160                 if (WellKnownTypeHandlers.TryGetValue(message.Descriptor.FullName, out handler))
161                 {
162                     handler(this, message, tokenizer);
163                     return;
164                 }
165                 // Well-known types with no special handling continue in the normal way.
166             }
167             var token = tokenizer.Next();
168             if (token.Type != JsonToken.TokenType.StartObject)
169             {
170                 throw new InvalidProtocolBufferException("Expected an object");
171             }
172             var descriptor = message.Descriptor;
173             var jsonFieldMap = descriptor.Fields.ByJsonName();
174             // All the oneof fields we've already accounted for - we can only see each of them once.
175             // The set is created lazily to avoid the overhead of creating a set for every message
176             // we parsed, when oneofs are relatively rare.
177             HashSet<OneofDescriptor> seenOneofs = null;
178             while (true)
179             {
180                 token = tokenizer.Next();
181                 if (token.Type == JsonToken.TokenType.EndObject)
182                 {
183                     return;
184                 }
185                 if (token.Type != JsonToken.TokenType.Name)
186                 {
187                     throw new InvalidOperationException("Unexpected token type " + token.Type);
188                 }
189                 string name = token.StringValue;
190                 FieldDescriptor field;
191                 if (jsonFieldMap.TryGetValue(name, out field))
192                 {
193                     if (field.ContainingOneof != null)
194                     {
195                         if (seenOneofs == null)
196                         {
197                             seenOneofs = new HashSet<OneofDescriptor>();
198                         }
199                         if (!seenOneofs.Add(field.ContainingOneof))
200                         {
201                             throw new InvalidProtocolBufferException($"Multiple values specified for oneof {field.ContainingOneof.Name}");
202                         }
203                     }
204                     MergeField(message, field, tokenizer);
205                 }
206                 else
207                 {
208                     if (settings.IgnoreUnknownFields)
209                     {
210                         tokenizer.SkipValue();
211                     }
212                     else
213                     {
214                         throw new InvalidProtocolBufferException("Unknown field: " + name);
215                     }
216                 }
217             }
218         }
219 
MergeField(IMessage message, FieldDescriptor field, JsonTokenizer tokenizer)220         private void MergeField(IMessage message, FieldDescriptor field, JsonTokenizer tokenizer)
221         {
222             var token = tokenizer.Next();
223             if (token.Type == JsonToken.TokenType.Null)
224             {
225                 // Clear the field if we see a null token, unless it's for a singular field of type
226                 // google.protobuf.Value or google.protobuf.NullValue.
227                 // Note: different from Java API, which just ignores it.
228                 // TODO: Bring it more in line? Discuss...
229                 if (field.IsMap || field.IsRepeated ||
230                     !(IsGoogleProtobufValueField(field) || IsGoogleProtobufNullValueField(field)))
231                 {
232                     field.Accessor.Clear(message);
233                     return;
234                 }
235             }
236             tokenizer.PushBack(token);
237 
238             if (field.IsMap)
239             {
240                 MergeMapField(message, field, tokenizer);
241             }
242             else if (field.IsRepeated)
243             {
244                 MergeRepeatedField(message, field, tokenizer);
245             }
246             else
247             {
248                 var value = ParseSingleValue(field, tokenizer);
249                 field.Accessor.SetValue(message, value);
250             }
251         }
252 
MergeRepeatedField(IMessage message, FieldDescriptor field, JsonTokenizer tokenizer)253         private void MergeRepeatedField(IMessage message, FieldDescriptor field, JsonTokenizer tokenizer)
254         {
255             var token = tokenizer.Next();
256             if (token.Type != JsonToken.TokenType.StartArray)
257             {
258                 throw new InvalidProtocolBufferException("Repeated field value was not an array. Token type: " + token.Type);
259             }
260 
261             IList list = (IList) field.Accessor.GetValue(message);
262             while (true)
263             {
264                 token = tokenizer.Next();
265                 if (token.Type == JsonToken.TokenType.EndArray)
266                 {
267                     return;
268                 }
269                 tokenizer.PushBack(token);
270                 object value = ParseSingleValue(field, tokenizer);
271                 if (value == null)
272                 {
273                     throw new InvalidProtocolBufferException("Repeated field elements cannot be null");
274                 }
275                 list.Add(value);
276             }
277         }
278 
MergeMapField(IMessage message, FieldDescriptor field, JsonTokenizer tokenizer)279         private void MergeMapField(IMessage message, FieldDescriptor field, JsonTokenizer tokenizer)
280         {
281             // Map fields are always objects, even if the values are well-known types: ParseSingleValue handles those.
282             var token = tokenizer.Next();
283             if (token.Type != JsonToken.TokenType.StartObject)
284             {
285                 throw new InvalidProtocolBufferException("Expected an object to populate a map");
286             }
287 
288             var type = field.MessageType;
289             var keyField = type.FindFieldByNumber(1);
290             var valueField = type.FindFieldByNumber(2);
291             if (keyField == null || valueField == null)
292             {
293                 throw new InvalidProtocolBufferException("Invalid map field: " + field.FullName);
294             }
295             IDictionary dictionary = (IDictionary) field.Accessor.GetValue(message);
296 
297             while (true)
298             {
299                 token = tokenizer.Next();
300                 if (token.Type == JsonToken.TokenType.EndObject)
301                 {
302                     return;
303                 }
304                 object key = ParseMapKey(keyField, token.StringValue);
305                 object value = ParseSingleValue(valueField, tokenizer);
306                 if (value == null)
307                 {
308                     throw new InvalidProtocolBufferException("Map values must not be null");
309                 }
310                 dictionary[key] = value;
311             }
312         }
313 
IsGoogleProtobufValueField(FieldDescriptor field)314         private static bool IsGoogleProtobufValueField(FieldDescriptor field)
315         {
316             return field.FieldType == FieldType.Message &&
317                 field.MessageType.FullName == Value.Descriptor.FullName;
318         }
319 
IsGoogleProtobufNullValueField(FieldDescriptor field)320         private static bool IsGoogleProtobufNullValueField(FieldDescriptor field)
321         {
322             return field.FieldType == FieldType.Enum &&
323                 field.EnumType.FullName == NullValueDescriptor.FullName;
324         }
325 
ParseSingleValue(FieldDescriptor field, JsonTokenizer tokenizer)326         private object ParseSingleValue(FieldDescriptor field, JsonTokenizer tokenizer)
327         {
328             var token = tokenizer.Next();
329             if (token.Type == JsonToken.TokenType.Null)
330             {
331                 // TODO: In order to support dynamic messages, we should really build this up
332                 // dynamically.
333                 if (IsGoogleProtobufValueField(field))
334                 {
335                     return Value.ForNull();
336                 }
337                 if (IsGoogleProtobufNullValueField(field))
338                 {
339                     return NullValue.NullValue;
340                 }
341                 return null;
342             }
343 
344             var fieldType = field.FieldType;
345             if (fieldType == FieldType.Message)
346             {
347                 // Parse wrapper types as their constituent types.
348                 // TODO: What does this mean for null?
349                 if (field.MessageType.IsWrapperType)
350                 {
351                     field = field.MessageType.Fields[WrappersReflection.WrapperValueFieldNumber];
352                     fieldType = field.FieldType;
353                 }
354                 else
355                 {
356                     // TODO: Merge the current value in message? (Public API currently doesn't make this relevant as we don't expose merging.)
357                     tokenizer.PushBack(token);
358                     IMessage subMessage = NewMessageForField(field);
359                     Merge(subMessage, tokenizer);
360                     return subMessage;
361                 }
362             }
363 
364             switch (token.Type)
365             {
366                 case JsonToken.TokenType.True:
367                 case JsonToken.TokenType.False:
368                     if (fieldType == FieldType.Bool)
369                     {
370                         return token.Type == JsonToken.TokenType.True;
371                     }
372                     // Fall through to "we don't support this type for this case"; could duplicate the behaviour of the default
373                     // case instead, but this way we'd only need to change one place.
374                     goto default;
375                 case JsonToken.TokenType.StringValue:
376                     return ParseSingleStringValue(field, token.StringValue);
377                 // Note: not passing the number value itself here, as we may end up storing the string value in the token too.
378                 case JsonToken.TokenType.Number:
379                     return ParseSingleNumberValue(field, token);
380                 case JsonToken.TokenType.Null:
381                     throw new NotImplementedException("Haven't worked out what to do for null yet");
382                 default:
383                     throw new InvalidProtocolBufferException("Unsupported JSON token type " + token.Type + " for field type " + fieldType);
384             }
385         }
386 
387         /// <summary>
388         /// Parses <paramref name="json"/> into a new message.
389         /// </summary>
390         /// <typeparam name="T">The type of message to create.</typeparam>
391         /// <param name="json">The JSON to parse.</param>
392         /// <exception cref="InvalidJsonException">The JSON does not comply with RFC 7159</exception>
393         /// <exception cref="InvalidProtocolBufferException">The JSON does not represent a Protocol Buffers message correctly</exception>
394         public T Parse<T>(string json) where T : IMessage, new()
395         {
396             ProtoPreconditions.CheckNotNull(json, nameof(json));
397             return Parse<T>(new StringReader(json));
398         }
399 
400         /// <summary>
401         /// Parses JSON read from <paramref name="jsonReader"/> into a new message.
402         /// </summary>
403         /// <typeparam name="T">The type of message to create.</typeparam>
404         /// <param name="jsonReader">Reader providing the JSON to parse.</param>
405         /// <exception cref="InvalidJsonException">The JSON does not comply with RFC 7159</exception>
406         /// <exception cref="InvalidProtocolBufferException">The JSON does not represent a Protocol Buffers message correctly</exception>
407         public T Parse<T>(TextReader jsonReader) where T : IMessage, new()
408         {
409             ProtoPreconditions.CheckNotNull(jsonReader, nameof(jsonReader));
410             T message = new T();
411             Merge(message, jsonReader);
412             return message;
413         }
414 
415         /// <summary>
416         /// Parses <paramref name="json"/> into a new message.
417         /// </summary>
418         /// <param name="json">The JSON to parse.</param>
419         /// <param name="descriptor">Descriptor of message type to parse.</param>
420         /// <exception cref="InvalidJsonException">The JSON does not comply with RFC 7159</exception>
421         /// <exception cref="InvalidProtocolBufferException">The JSON does not represent a Protocol Buffers message correctly</exception>
Parse(string json, MessageDescriptor descriptor)422         public IMessage Parse(string json, MessageDescriptor descriptor)
423         {
424             ProtoPreconditions.CheckNotNull(json, nameof(json));
425             ProtoPreconditions.CheckNotNull(descriptor, nameof(descriptor));
426             return Parse(new StringReader(json), descriptor);
427         }
428 
429         /// <summary>
430         /// Parses JSON read from <paramref name="jsonReader"/> into a new message.
431         /// </summary>
432         /// <param name="jsonReader">Reader providing the JSON to parse.</param>
433         /// <param name="descriptor">Descriptor of message type to parse.</param>
434         /// <exception cref="InvalidJsonException">The JSON does not comply with RFC 7159</exception>
435         /// <exception cref="InvalidProtocolBufferException">The JSON does not represent a Protocol Buffers message correctly</exception>
Parse(TextReader jsonReader, MessageDescriptor descriptor)436         public IMessage Parse(TextReader jsonReader, MessageDescriptor descriptor)
437         {
438             ProtoPreconditions.CheckNotNull(jsonReader, nameof(jsonReader));
439             ProtoPreconditions.CheckNotNull(descriptor, nameof(descriptor));
440             IMessage message = descriptor.Parser.CreateTemplate();
441             Merge(message, jsonReader);
442             return message;
443         }
444 
MergeStructValue(IMessage message, JsonTokenizer tokenizer)445         private void MergeStructValue(IMessage message, JsonTokenizer tokenizer)
446         {
447             var firstToken = tokenizer.Next();
448             var fields = message.Descriptor.Fields;
449             switch (firstToken.Type)
450             {
451                 case JsonToken.TokenType.Null:
452                     fields[Value.NullValueFieldNumber].Accessor.SetValue(message, 0);
453                     return;
454                 case JsonToken.TokenType.StringValue:
455                     fields[Value.StringValueFieldNumber].Accessor.SetValue(message, firstToken.StringValue);
456                     return;
457                 case JsonToken.TokenType.Number:
458                     fields[Value.NumberValueFieldNumber].Accessor.SetValue(message, firstToken.NumberValue);
459                     return;
460                 case JsonToken.TokenType.False:
461                 case JsonToken.TokenType.True:
462                     fields[Value.BoolValueFieldNumber].Accessor.SetValue(message, firstToken.Type == JsonToken.TokenType.True);
463                     return;
464                 case JsonToken.TokenType.StartObject:
465                     {
466                         var field = fields[Value.StructValueFieldNumber];
467                         var structMessage = NewMessageForField(field);
468                         tokenizer.PushBack(firstToken);
469                         Merge(structMessage, tokenizer);
470                         field.Accessor.SetValue(message, structMessage);
471                         return;
472                     }
473                 case JsonToken.TokenType.StartArray:
474                     {
475                         var field = fields[Value.ListValueFieldNumber];
476                         var list = NewMessageForField(field);
477                         tokenizer.PushBack(firstToken);
478                         Merge(list, tokenizer);
479                         field.Accessor.SetValue(message, list);
480                         return;
481                     }
482                 default:
483                     throw new InvalidOperationException("Unexpected token type: " + firstToken.Type);
484             }
485         }
486 
MergeStruct(IMessage message, JsonTokenizer tokenizer)487         private void MergeStruct(IMessage message, JsonTokenizer tokenizer)
488         {
489             var token = tokenizer.Next();
490             if (token.Type != JsonToken.TokenType.StartObject)
491             {
492                 throw new InvalidProtocolBufferException("Expected object value for Struct");
493             }
494             tokenizer.PushBack(token);
495 
496             var field = message.Descriptor.Fields[Struct.FieldsFieldNumber];
497             MergeMapField(message, field, tokenizer);
498         }
499 
MergeAny(IMessage message, JsonTokenizer tokenizer)500         private void MergeAny(IMessage message, JsonTokenizer tokenizer)
501         {
502             // Record the token stream until we see the @type property. At that point, we can take the value, consult
503             // the type registry for the relevant message, and replay the stream, omitting the @type property.
504             var tokens = new List<JsonToken>();
505 
506             var token = tokenizer.Next();
507             if (token.Type != JsonToken.TokenType.StartObject)
508             {
509                 throw new InvalidProtocolBufferException("Expected object value for Any");
510             }
511             int typeUrlObjectDepth = tokenizer.ObjectDepth;
512 
513             // The check for the property depth protects us from nested Any values which occur before the type URL
514             // for *this* Any.
515             while (token.Type != JsonToken.TokenType.Name ||
516                 token.StringValue != JsonFormatter.AnyTypeUrlField ||
517                 tokenizer.ObjectDepth != typeUrlObjectDepth)
518             {
519                 tokens.Add(token);
520                 token = tokenizer.Next();
521 
522                 if (tokenizer.ObjectDepth < typeUrlObjectDepth)
523                 {
524                     throw new InvalidProtocolBufferException("Any message with no @type");
525                 }
526             }
527 
528             // Don't add the @type property or its value to the recorded token list
529             token = tokenizer.Next();
530             if (token.Type != JsonToken.TokenType.StringValue)
531             {
532                 throw new InvalidProtocolBufferException("Expected string value for Any.@type");
533             }
534             string typeUrl = token.StringValue;
535             string typeName = Any.GetTypeName(typeUrl);
536 
537             MessageDescriptor descriptor = settings.TypeRegistry.Find(typeName);
538             if (descriptor == null)
539             {
540                 throw new InvalidOperationException($"Type registry has no descriptor for type name '{typeName}'");
541             }
542 
543             // Now replay the token stream we've already read and anything that remains of the object, just parsing it
544             // as normal. Our original tokenizer should end up at the end of the object.
545             var replay = JsonTokenizer.FromReplayedTokens(tokens, tokenizer);
546             var body = descriptor.Parser.CreateTemplate();
547             if (descriptor.IsWellKnownType)
548             {
549                 MergeWellKnownTypeAnyBody(body, replay);
550             }
551             else
552             {
553                 Merge(body, replay);
554             }
555             var data = body.ToByteString();
556 
557             // Now that we have the message data, we can pack it into an Any (the message received as a parameter).
558             message.Descriptor.Fields[Any.TypeUrlFieldNumber].Accessor.SetValue(message, typeUrl);
559             message.Descriptor.Fields[Any.ValueFieldNumber].Accessor.SetValue(message, data);
560         }
561 
562         // Well-known types end up in a property called "value" in the JSON. As there's no longer a @type property
563         // in the given JSON token stream, we should *only* have tokens of start-object, name("value"), the value
564         // itself, and then end-object.
MergeWellKnownTypeAnyBody(IMessage body, JsonTokenizer tokenizer)565         private void MergeWellKnownTypeAnyBody(IMessage body, JsonTokenizer tokenizer)
566         {
567             var token = tokenizer.Next(); // Definitely start-object; checked in previous method
568             token = tokenizer.Next();
569             // TODO: What about an absent Int32Value, for example?
570             if (token.Type != JsonToken.TokenType.Name || token.StringValue != JsonFormatter.AnyWellKnownTypeValueField)
571             {
572                 throw new InvalidProtocolBufferException($"Expected '{JsonFormatter.AnyWellKnownTypeValueField}' property for well-known type Any body");
573             }
574             Merge(body, tokenizer);
575             token = tokenizer.Next();
576             if (token.Type != JsonToken.TokenType.EndObject)
577             {
578                 throw new InvalidProtocolBufferException($"Expected end-object token after @type/value for well-known type");
579             }
580         }
581 
582         #region Utility methods which don't depend on the state (or settings) of the parser.
ParseMapKey(FieldDescriptor field, string keyText)583         private static object ParseMapKey(FieldDescriptor field, string keyText)
584         {
585             switch (field.FieldType)
586             {
587                 case FieldType.Bool:
588                     if (keyText == "true")
589                     {
590                         return true;
591                     }
592                     if (keyText == "false")
593                     {
594                         return false;
595                     }
596                     throw new InvalidProtocolBufferException("Invalid string for bool map key: " + keyText);
597                 case FieldType.String:
598                     return keyText;
599                 case FieldType.Int32:
600                 case FieldType.SInt32:
601                 case FieldType.SFixed32:
602                     return ParseNumericString(keyText, int.Parse);
603                 case FieldType.UInt32:
604                 case FieldType.Fixed32:
605                     return ParseNumericString(keyText, uint.Parse);
606                 case FieldType.Int64:
607                 case FieldType.SInt64:
608                 case FieldType.SFixed64:
609                     return ParseNumericString(keyText, long.Parse);
610                 case FieldType.UInt64:
611                 case FieldType.Fixed64:
612                     return ParseNumericString(keyText, ulong.Parse);
613                 default:
614                     throw new InvalidProtocolBufferException("Invalid field type for map: " + field.FieldType);
615             }
616         }
617 
618         private static object ParseSingleNumberValue(FieldDescriptor field, JsonToken token)
619         {
620             double value = token.NumberValue;
621             checked
622             {
623                 try
624                 {
625                     switch (field.FieldType)
626                     {
627                         case FieldType.Int32:
628                         case FieldType.SInt32:
629                         case FieldType.SFixed32:
630                             CheckInteger(value);
631                             return (int) value;
632                         case FieldType.UInt32:
633                         case FieldType.Fixed32:
634                             CheckInteger(value);
635                             return (uint) value;
636                         case FieldType.Int64:
637                         case FieldType.SInt64:
638                         case FieldType.SFixed64:
639                             CheckInteger(value);
640                             return (long) value;
641                         case FieldType.UInt64:
642                         case FieldType.Fixed64:
643                             CheckInteger(value);
644                             return (ulong) value;
645                         case FieldType.Double:
646                             return value;
647                         case FieldType.Float:
648                             if (double.IsNaN(value))
649                             {
650                                 return float.NaN;
651                             }
652                             if (value > float.MaxValue || value < float.MinValue)
653                             {
654                                 if (double.IsPositiveInfinity(value))
655                                 {
656                                     return float.PositiveInfinity;
657                                 }
658                                 if (double.IsNegativeInfinity(value))
659                                 {
660                                     return float.NegativeInfinity;
661                                 }
662                                 throw new InvalidProtocolBufferException($"Value out of range: {value}");
663                             }
664                             return (float) value;
665                         case FieldType.Enum:
666                             CheckInteger(value);
667                             // Just return it as an int, and let the CLR convert it.
668                             // Note that we deliberately don't check that it's a known value.
669                             return (int) value;
670                         default:
671                             throw new InvalidProtocolBufferException($"Unsupported conversion from JSON number for field type {field.FieldType}");
672                     }
673                 }
674                 catch (OverflowException)
675                 {
676                     throw new InvalidProtocolBufferException($"Value out of range: {value}");
677                 }
678             }
679         }
680 
681         private static void CheckInteger(double value)
682         {
683             if (double.IsInfinity(value) || double.IsNaN(value))
684             {
685                 throw new InvalidProtocolBufferException($"Value not an integer: {value}");
686             }
687             if (value != Math.Floor(value))
688             {
689                 throw new InvalidProtocolBufferException($"Value not an integer: {value}");
690             }
691         }
692 
693         private static object ParseSingleStringValue(FieldDescriptor field, string text)
694         {
695             switch (field.FieldType)
696             {
697                 case FieldType.String:
698                     return text;
699                 case FieldType.Bytes:
700                     try
701                     {
702                         return ByteString.FromBase64(text);
703                     }
704                     catch (FormatException e)
705                     {
706                         throw InvalidProtocolBufferException.InvalidBase64(e);
707                     }
708                 case FieldType.Int32:
709                 case FieldType.SInt32:
710                 case FieldType.SFixed32:
711                     return ParseNumericString(text, int.Parse);
712                 case FieldType.UInt32:
713                 case FieldType.Fixed32:
714                     return ParseNumericString(text, uint.Parse);
715                 case FieldType.Int64:
716                 case FieldType.SInt64:
717                 case FieldType.SFixed64:
718                     return ParseNumericString(text, long.Parse);
719                 case FieldType.UInt64:
720                 case FieldType.Fixed64:
721                     return ParseNumericString(text, ulong.Parse);
722                 case FieldType.Double:
723                     double d = ParseNumericString(text, double.Parse);
724                     ValidateInfinityAndNan(text, double.IsPositiveInfinity(d), double.IsNegativeInfinity(d), double.IsNaN(d));
725                     return d;
726                 case FieldType.Float:
727                     float f = ParseNumericString(text, float.Parse);
728                     ValidateInfinityAndNan(text, float.IsPositiveInfinity(f), float.IsNegativeInfinity(f), float.IsNaN(f));
729                     return f;
730                 case FieldType.Enum:
731                     var enumValue = field.EnumType.FindValueByName(text);
732                     if (enumValue == null)
733                     {
734                         throw new InvalidProtocolBufferException($"Invalid enum value: {text} for enum type: {field.EnumType.FullName}");
735                     }
736                     // Just return it as an int, and let the CLR convert it.
737                     return enumValue.Number;
738                 default:
739                     throw new InvalidProtocolBufferException($"Unsupported conversion from JSON string for field type {field.FieldType}");
740             }
741         }
742 
743         /// <summary>
744         /// Creates a new instance of the message type for the given field.
745         /// </summary>
746         private static IMessage NewMessageForField(FieldDescriptor field)
747         {
748             return field.MessageType.Parser.CreateTemplate();
749         }
750 
751         private static T ParseNumericString<T>(string text, Func<string, NumberStyles, IFormatProvider, T> parser)
752         {
753             // Can't prohibit this with NumberStyles.
754             if (text.StartsWith("+"))
755             {
756                 throw new InvalidProtocolBufferException($"Invalid numeric value: {text}");
757             }
758             if (text.StartsWith("0") && text.Length > 1)
759             {
760                 if (text[1] >= '0' && text[1] <= '9')
761                 {
762                     throw new InvalidProtocolBufferException($"Invalid numeric value: {text}");
763                 }
764             }
765             else if (text.StartsWith("-0") && text.Length > 2)
766             {
767                 if (text[2] >= '0' && text[2] <= '9')
768                 {
769                     throw new InvalidProtocolBufferException($"Invalid numeric value: {text}");
770                 }
771             }
772             try
773             {
774                 return parser(text, NumberStyles.AllowLeadingSign | NumberStyles.AllowDecimalPoint | NumberStyles.AllowExponent, CultureInfo.InvariantCulture);
775             }
776             catch (FormatException)
777             {
778                 throw new InvalidProtocolBufferException($"Invalid numeric value for type: {text}");
779             }
780             catch (OverflowException)
781             {
782                 throw new InvalidProtocolBufferException($"Value out of range: {text}");
783             }
784         }
785 
786         /// <summary>
787         /// Checks that any infinite/NaN values originated from the correct text.
788         /// This corrects the lenient whitespace handling of double.Parse/float.Parse, as well as the
789         /// way that Mono parses out-of-range values as infinity.
790         /// </summary>
ValidateInfinityAndNan(string text, bool isPositiveInfinity, bool isNegativeInfinity, bool isNaN)791         private static void ValidateInfinityAndNan(string text, bool isPositiveInfinity, bool isNegativeInfinity, bool isNaN)
792         {
793             if ((isPositiveInfinity && text != "Infinity") ||
794                 (isNegativeInfinity && text != "-Infinity") ||
795                 (isNaN && text != "NaN"))
796             {
797                 throw new InvalidProtocolBufferException($"Invalid numeric value: {text}");
798             }
799         }
800 
MergeTimestamp(IMessage message, JsonToken token)801         private static void MergeTimestamp(IMessage message, JsonToken token)
802         {
803             if (token.Type != JsonToken.TokenType.StringValue)
804             {
805                 throw new InvalidProtocolBufferException("Expected string value for Timestamp");
806             }
807             var match = TimestampRegex.Match(token.StringValue);
808             if (!match.Success)
809             {
810                 throw new InvalidProtocolBufferException($"Invalid Timestamp value: {token.StringValue}");
811             }
812             var dateTime = match.Groups["datetime"].Value;
813             var subseconds = match.Groups["subseconds"].Value;
814             var offset = match.Groups["offset"].Value;
815 
816             try
817             {
818                 DateTime parsed = DateTime.ParseExact(
819                     dateTime,
820                     "yyyy-MM-dd'T'HH:mm:ss",
821                     CultureInfo.InvariantCulture,
822                     DateTimeStyles.AssumeUniversal | DateTimeStyles.AdjustToUniversal);
823                 // TODO: It would be nice not to have to create all these objects... easy to optimize later though.
824                 Timestamp timestamp = Timestamp.FromDateTime(parsed);
825                 int nanosToAdd = 0;
826                 if (subseconds != "")
827                 {
828                     // This should always work, as we've got 1-9 digits.
829                     int parsedFraction = int.Parse(subseconds.Substring(1), CultureInfo.InvariantCulture);
830                     nanosToAdd = parsedFraction * SubsecondScalingFactors[subseconds.Length];
831                 }
832                 int secondsToAdd = 0;
833                 if (offset != "Z")
834                 {
835                     // This is the amount we need to *subtract* from the local time to get to UTC - hence - => +1 and vice versa.
836                     int sign = offset[0] == '-' ? 1 : -1;
837                     int hours = int.Parse(offset.Substring(1, 2), CultureInfo.InvariantCulture);
838                     int minutes = int.Parse(offset.Substring(4, 2));
839                     int totalMinutes = hours * 60 + minutes;
840                     if (totalMinutes > 18 * 60)
841                     {
842                         throw new InvalidProtocolBufferException("Invalid Timestamp value: " + token.StringValue);
843                     }
844                     if (totalMinutes == 0 && sign == 1)
845                     {
846                         // This is an offset of -00:00, which means "unknown local offset". It makes no sense for a timestamp.
847                         throw new InvalidProtocolBufferException("Invalid Timestamp value: " + token.StringValue);
848                     }
849                     // We need to *subtract* the offset from local time to get UTC.
850                     secondsToAdd = sign * totalMinutes * 60;
851                 }
852                 // Ensure we've got the right signs. Currently unnecessary, but easy to do.
853                 if (secondsToAdd < 0 && nanosToAdd > 0)
854                 {
855                     secondsToAdd++;
856                     nanosToAdd = nanosToAdd - Duration.NanosecondsPerSecond;
857                 }
858                 if (secondsToAdd != 0 || nanosToAdd != 0)
859                 {
860                     timestamp += new Duration { Nanos = nanosToAdd, Seconds = secondsToAdd };
861                     // The resulting timestamp after offset change would be out of our expected range. Currently the Timestamp message doesn't validate this
862                     // anywhere, but we shouldn't parse it.
863                     if (timestamp.Seconds < Timestamp.UnixSecondsAtBclMinValue || timestamp.Seconds > Timestamp.UnixSecondsAtBclMaxValue)
864                     {
865                         throw new InvalidProtocolBufferException("Invalid Timestamp value: " + token.StringValue);
866                     }
867                 }
868                 message.Descriptor.Fields[Timestamp.SecondsFieldNumber].Accessor.SetValue(message, timestamp.Seconds);
869                 message.Descriptor.Fields[Timestamp.NanosFieldNumber].Accessor.SetValue(message, timestamp.Nanos);
870             }
871             catch (FormatException)
872             {
873                 throw new InvalidProtocolBufferException("Invalid Timestamp value: " + token.StringValue);
874             }
875         }
876 
MergeDuration(IMessage message, JsonToken token)877         private static void MergeDuration(IMessage message, JsonToken token)
878         {
879             if (token.Type != JsonToken.TokenType.StringValue)
880             {
881                 throw new InvalidProtocolBufferException("Expected string value for Duration");
882             }
883             var match = DurationRegex.Match(token.StringValue);
884             if (!match.Success)
885             {
886                 throw new InvalidProtocolBufferException("Invalid Duration value: " + token.StringValue);
887             }
888             var sign = match.Groups["sign"].Value;
889             var secondsText = match.Groups["int"].Value;
890             // Prohibit leading insignficant zeroes
891             if (secondsText[0] == '0' && secondsText.Length > 1)
892             {
893                 throw new InvalidProtocolBufferException("Invalid Duration value: " + token.StringValue);
894             }
895             var subseconds = match.Groups["subseconds"].Value;
896             var multiplier = sign == "-" ? -1 : 1;
897 
898             try
899             {
900                 long seconds = long.Parse(secondsText, CultureInfo.InvariantCulture) * multiplier;
901                 int nanos = 0;
902                 if (subseconds != "")
903                 {
904                     // This should always work, as we've got 1-9 digits.
905                     int parsedFraction = int.Parse(subseconds.Substring(1));
906                     nanos = parsedFraction * SubsecondScalingFactors[subseconds.Length] * multiplier;
907                 }
908                 if (!Duration.IsNormalized(seconds, nanos))
909                 {
910                     throw new InvalidProtocolBufferException($"Invalid Duration value: {token.StringValue}");
911                 }
912                 message.Descriptor.Fields[Duration.SecondsFieldNumber].Accessor.SetValue(message, seconds);
913                 message.Descriptor.Fields[Duration.NanosFieldNumber].Accessor.SetValue(message, nanos);
914             }
915             catch (FormatException)
916             {
917                 throw new InvalidProtocolBufferException($"Invalid Duration value: {token.StringValue}");
918             }
919         }
920 
MergeFieldMask(IMessage message, JsonToken token)921         private static void MergeFieldMask(IMessage message, JsonToken token)
922         {
923             if (token.Type != JsonToken.TokenType.StringValue)
924             {
925                 throw new InvalidProtocolBufferException("Expected string value for FieldMask");
926             }
927             // TODO: Do we *want* to remove empty entries? Probably okay to treat "" as "no paths", but "foo,,bar"?
928             string[] jsonPaths = token.StringValue.Split(FieldMaskPathSeparators, StringSplitOptions.RemoveEmptyEntries);
929             IList messagePaths = (IList) message.Descriptor.Fields[FieldMask.PathsFieldNumber].Accessor.GetValue(message);
930             foreach (var path in jsonPaths)
931             {
932                 messagePaths.Add(ToSnakeCase(path));
933             }
934         }
935 
936         // Ported from src/google/protobuf/util/internal/utility.cc
ToSnakeCase(string text)937         private static string ToSnakeCase(string text)
938         {
939             var builder = new StringBuilder(text.Length * 2);
940             // Note: this is probably unnecessary now, but currently retained to be as close as possible to the
941             // C++, whilst still throwing an exception on underscores.
942             bool wasNotUnderscore = false;  // Initialize to false for case 1 (below)
943             bool wasNotCap = false;
944 
945             for (int i = 0; i < text.Length; i++)
946             {
947                 char c = text[i];
948                 if (c >= 'A' && c <= 'Z') // ascii_isupper
949                 {
950                     // Consider when the current character B is capitalized:
951                     // 1) At beginning of input:   "B..." => "b..."
952                     //    (e.g. "Biscuit" => "biscuit")
953                     // 2) Following a lowercase:   "...aB..." => "...a_b..."
954                     //    (e.g. "gBike" => "g_bike")
955                     // 3) At the end of input:     "...AB" => "...ab"
956                     //    (e.g. "GoogleLAB" => "google_lab")
957                     // 4) Followed by a lowercase: "...ABc..." => "...a_bc..."
958                     //    (e.g. "GBike" => "g_bike")
959                     if (wasNotUnderscore &&               //            case 1 out
960                         (wasNotCap ||                     // case 2 in, case 3 out
961                          (i + 1 < text.Length &&         //            case 3 out
962                           (text[i + 1] >= 'a' && text[i + 1] <= 'z')))) // ascii_islower(text[i + 1])
963                     {  // case 4 in
964                        // We add an underscore for case 2 and case 4.
965                         builder.Append('_');
966                     }
967                     // ascii_tolower, but we already know that c *is* an upper case ASCII character...
968                     builder.Append((char) (c + 'a' - 'A'));
969                     wasNotUnderscore = true;
970                     wasNotCap = false;
971                 }
972                 else
973                 {
974                     builder.Append(c);
975                     if (c == '_')
976                     {
977                         throw new InvalidProtocolBufferException($"Invalid field mask: {text}");
978                     }
979                     wasNotUnderscore = true;
980                     wasNotCap = true;
981                 }
982             }
983             return builder.ToString();
984         }
985         #endregion
986 
987         /// <summary>
988         /// Settings controlling JSON parsing.
989         /// </summary>
990         public sealed class Settings
991         {
992             /// <summary>
993             /// Default settings, as used by <see cref="JsonParser.Default"/>. This has the same default
994             /// recursion limit as <see cref="CodedInputStream"/>, and an empty type registry.
995             /// </summary>
996             public static Settings Default { get; }
997 
998             // Workaround for the Mono compiler complaining about XML comments not being on
999             // valid language elements.
Settings()1000             static Settings()
1001             {
1002                 Default = new Settings(CodedInputStream.DefaultRecursionLimit);
1003             }
1004 
1005             /// <summary>
1006             /// The maximum depth of messages to parse. Note that this limit only applies to parsing
1007             /// messages, not collections - so a message within a collection within a message only counts as
1008             /// depth 2, not 3.
1009             /// </summary>
1010             public int RecursionLimit { get; }
1011 
1012             /// <summary>
1013             /// The type registry used to parse <see cref="Any"/> messages.
1014             /// </summary>
1015             public TypeRegistry TypeRegistry { get; }
1016 
1017             /// <summary>
1018             /// Whether the parser should ignore unknown fields (<c>true</c>) or throw an exception when
1019             /// they are encountered (<c>false</c>).
1020             /// </summary>
1021             public bool IgnoreUnknownFields { get; }
1022 
Settings(int recursionLimit, TypeRegistry typeRegistry, bool ignoreUnknownFields)1023             private Settings(int recursionLimit, TypeRegistry typeRegistry, bool ignoreUnknownFields)
1024             {
1025                 RecursionLimit = recursionLimit;
1026                 TypeRegistry = ProtoPreconditions.CheckNotNull(typeRegistry, nameof(typeRegistry));
1027                 IgnoreUnknownFields = ignoreUnknownFields;
1028             }
1029 
1030             /// <summary>
1031             /// Creates a new <see cref="Settings"/> object with the specified recursion limit.
1032             /// </summary>
1033             /// <param name="recursionLimit">The maximum depth of messages to parse</param>
Settings(int recursionLimit)1034             public Settings(int recursionLimit) : this(recursionLimit, TypeRegistry.Empty)
1035             {
1036             }
1037 
1038             /// <summary>
1039             /// Creates a new <see cref="Settings"/> object with the specified recursion limit and type registry.
1040             /// </summary>
1041             /// <param name="recursionLimit">The maximum depth of messages to parse</param>
1042             /// <param name="typeRegistry">The type registry used to parse <see cref="Any"/> messages</param>
Settings(int recursionLimit, TypeRegistry typeRegistry)1043             public Settings(int recursionLimit, TypeRegistry typeRegistry) : this(recursionLimit, typeRegistry, false)
1044             {
1045             }
1046 
1047             /// <summary>
1048             /// Creates a new <see cref="Settings"/> object set to either ignore unknown fields, or throw an exception
1049             /// when unknown fields are encountered.
1050             /// </summary>
1051             /// <param name="ignoreUnknownFields"><c>true</c> if unknown fields should be ignored when parsing; <c>false</c> to throw an exception.</param>
1052             public Settings WithIgnoreUnknownFields(bool ignoreUnknownFields) =>
1053                 new Settings(RecursionLimit, TypeRegistry, ignoreUnknownFields);
1054 
1055             /// <summary>
1056             /// Creates a new <see cref="Settings"/> object based on this one, but with the specified recursion limit.
1057             /// </summary>
1058             /// <param name="recursionLimit">The new recursion limit.</param>
WithRecursionLimit(int recursionLimit)1059             public Settings WithRecursionLimit(int recursionLimit) =>
1060                 new Settings(recursionLimit, TypeRegistry, IgnoreUnknownFields);
1061 
1062             /// <summary>
1063             /// Creates a new <see cref="Settings"/> object based on this one, but with the specified type registry.
1064             /// </summary>
1065             /// <param name="typeRegistry">The new type registry. Must not be null.</param>
1066             public Settings WithTypeRegistry(TypeRegistry typeRegistry) =>
1067                 new Settings(
1068                     RecursionLimit,
1069                     ProtoPreconditions.CheckNotNull(typeRegistry, nameof(typeRegistry)),
1070                     IgnoreUnknownFields);
1071         }
1072     }
1073 }
1074