• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 #region Copyright notice and license
2 // Protocol Buffers - Google's data interchange format
3 // Copyright 2015 Google Inc.  All rights reserved.
4 //
5 // Use of this source code is governed by a BSD-style
6 // license that can be found in the LICENSE file or at
7 // https://developers.google.com/open-source/licenses/bsd
8 #endregion
9 
10 using Google.Protobuf.Reflection;
11 using Google.Protobuf.WellKnownTypes;
12 using System;
13 using System.Collections;
14 using System.Collections.Generic;
15 using System.Globalization;
16 using System.IO;
17 using System.Linq;
18 using System.Text;
19 using System.Text.RegularExpressions;
20 
21 namespace Google.Protobuf
22 {
23     /// <summary>
24     /// Reflection-based converter from JSON to messages.
25     /// </summary>
26     /// <remarks>
27     /// <para>
28     /// Instances of this class are thread-safe, with no mutable state.
29     /// </para>
30     /// <para>
31     /// This is a simple start to get JSON parsing working. As it's reflection-based,
32     /// it's not as quick as baking calls into generated messages - but is a simpler implementation.
33     /// (This code is generally not heavily optimized.)
34     /// </para>
35     /// </remarks>
36     public sealed class JsonParser
37     {
38         // Note: using 0-9 instead of \d to ensure no non-ASCII digits.
39         // This regex isn't a complete validator, but will remove *most* invalid input. We rely on parsing to do the rest.
40         private static readonly Regex TimestampRegex = new Regex(@"^(?<datetime>[0-9]{4}-[01][0-9]-[0-3][0-9]T[012][0-9]:[0-5][0-9]:[0-5][0-9])(?<subseconds>\.[0-9]{1,9})?(?<offset>(Z|[+-][0-1][0-9]:[0-5][0-9]))$", FrameworkPortability.CompiledRegexWhereAvailable);
41         private static readonly Regex DurationRegex = new Regex(@"^(?<sign>-)?(?<int>[0-9]{1,12})(?<subseconds>\.[0-9]{1,9})?s$", FrameworkPortability.CompiledRegexWhereAvailable);
42         private static readonly int[] SubsecondScalingFactors = { 0, 100000000, 100000000, 10000000, 1000000, 100000, 10000, 1000, 100, 10, 1 };
43         private static readonly char[] FieldMaskPathSeparators = new[] { ',' };
44         private static readonly EnumDescriptor NullValueDescriptor = StructReflection.Descriptor.EnumTypes.Single(ed => ed.ClrType == typeof(NullValue));
45 
46         private static readonly JsonParser defaultInstance = new JsonParser(Settings.Default);
47 
48         // TODO: Consider introducing a class containing parse state of the parser, tokenizer and depth. That would simplify these handlers
49         // and the signatures of various methods.
50         private static readonly Dictionary<string, Action<JsonParser, IMessage, JsonTokenizer>> WellKnownTypeHandlers = new()
51         {
52             { Timestamp.Descriptor.FullName, (parser, message, tokenizer) => MergeTimestamp(message, tokenizer.Next()) },
53             { Duration.Descriptor.FullName, (parser, message, tokenizer) => MergeDuration(message, tokenizer.Next()) },
54             { Value.Descriptor.FullName, (parser, message, tokenizer) => parser.MergeStructValue(message, tokenizer) },
55             { ListValue.Descriptor.FullName, (parser, message, tokenizer) =>
56                 parser.MergeRepeatedField(message, message.Descriptor.Fields[ListValue.ValuesFieldNumber], tokenizer) },
57             { Struct.Descriptor.FullName, (parser, message, tokenizer) => parser.MergeStruct(message, tokenizer) },
58             { Any.Descriptor.FullName, (parser, message, tokenizer) => parser.MergeAny(message, tokenizer) },
59             { FieldMask.Descriptor.FullName, (parser, message, tokenizer) => MergeFieldMask(message, tokenizer.Next()) },
60             { Int32Value.Descriptor.FullName, MergeWrapperField },
61             { Int64Value.Descriptor.FullName, MergeWrapperField },
62             { UInt32Value.Descriptor.FullName, MergeWrapperField },
63             { UInt64Value.Descriptor.FullName, MergeWrapperField },
64             { FloatValue.Descriptor.FullName, MergeWrapperField },
65             { DoubleValue.Descriptor.FullName, MergeWrapperField },
66             { BytesValue.Descriptor.FullName, MergeWrapperField },
67             { StringValue.Descriptor.FullName, MergeWrapperField },
68             { BoolValue.Descriptor.FullName, MergeWrapperField }
69         };
70 
71         // Convenience method to avoid having to repeat the same code multiple times in the above
72         // dictionary initialization.
MergeWrapperField(JsonParser parser, IMessage message, JsonTokenizer tokenizer)73         private static void MergeWrapperField(JsonParser parser, IMessage message, JsonTokenizer tokenizer)
74         {
75             parser.MergeField(message, message.Descriptor.Fields[WrappersReflection.WrapperValueFieldNumber], tokenizer);
76         }
77 
78         /// <summary>
79         /// Returns a formatter using the default settings.
80         /// </summary>
81         public static JsonParser Default { get { return defaultInstance; } }
82 
83         private readonly Settings settings;
84 
85         /// <summary>
86         /// Creates a new formatted with the given settings.
87         /// </summary>
88         /// <param name="settings">The settings.</param>
JsonParser(Settings settings)89         public JsonParser(Settings settings)
90         {
91             this.settings = ProtoPreconditions.CheckNotNull(settings, nameof(settings));
92         }
93 
94         /// <summary>
95         /// Parses <paramref name="json"/> and merges the information into the given message.
96         /// </summary>
97         /// <param name="message">The message to merge the JSON information into.</param>
98         /// <param name="json">The JSON to parse.</param>
Merge(IMessage message, string json)99         internal void Merge(IMessage message, string json)
100         {
101             Merge(message, new StringReader(json));
102         }
103 
104         /// <summary>
105         /// Parses JSON read from <paramref name="jsonReader"/> and merges the information into the given message.
106         /// </summary>
107         /// <param name="message">The message to merge the JSON information into.</param>
108         /// <param name="jsonReader">Reader providing the JSON to parse.</param>
Merge(IMessage message, TextReader jsonReader)109         internal void Merge(IMessage message, TextReader jsonReader)
110         {
111             var tokenizer = JsonTokenizer.FromTextReader(jsonReader);
112             Merge(message, tokenizer);
113             var lastToken = tokenizer.Next();
114             if (lastToken != JsonToken.EndDocument)
115             {
116                 throw new InvalidProtocolBufferException("Expected end of JSON after object");
117             }
118         }
119 
120         /// <summary>
121         /// Merges the given message using data from the given tokenizer. In most cases, the next
122         /// token should be a "start object" token, but wrapper types and nullity can invalidate
123         /// that assumption. This is implemented as an LL(1) recursive descent parser over the stream
124         /// of tokens provided by the tokenizer. This token stream is assumed to be valid JSON, with the
125         /// tokenizer performing that validation - but not every token stream is valid "protobuf JSON".
126         /// </summary>
Merge(IMessage message, JsonTokenizer tokenizer)127         private void Merge(IMessage message, JsonTokenizer tokenizer)
128         {
129             if (tokenizer.ObjectDepth > settings.RecursionLimit)
130             {
131                 throw InvalidProtocolBufferException.JsonRecursionLimitExceeded();
132             }
133             if (message.Descriptor.IsWellKnownType)
134             {
135                 if (WellKnownTypeHandlers.TryGetValue(message.Descriptor.FullName, out Action<JsonParser, IMessage, JsonTokenizer> handler))
136                 {
137                     handler(this, message, tokenizer);
138                     return;
139                 }
140                 // Well-known types with no special handling continue in the normal way.
141             }
142             var token = tokenizer.Next();
143             if (token.Type != JsonToken.TokenType.StartObject)
144             {
145                 throw new InvalidProtocolBufferException("Expected an object");
146             }
147             var descriptor = message.Descriptor;
148             var jsonFieldMap = descriptor.Fields.ByJsonName();
149             // All the oneof fields we've already accounted for - we can only see each of them once.
150             // The set is created lazily to avoid the overhead of creating a set for every message
151             // we parsed, when oneofs are relatively rare.
152             HashSet<OneofDescriptor> seenOneofs = null;
153             while (true)
154             {
155                 token = tokenizer.Next();
156                 if (token.Type == JsonToken.TokenType.EndObject)
157                 {
158                     return;
159                 }
160                 if (token.Type != JsonToken.TokenType.Name)
161                 {
162                     throw new InvalidOperationException("Unexpected token type " + token.Type);
163                 }
164                 string name = token.StringValue;
165                 if (jsonFieldMap.TryGetValue(name, out FieldDescriptor field))
166                 {
167                     if (field.ContainingOneof != null)
168                     {
169                         if (seenOneofs == null)
170                         {
171                             seenOneofs = new HashSet<OneofDescriptor>();
172                         }
173                         if (!seenOneofs.Add(field.ContainingOneof))
174                         {
175                             throw new InvalidProtocolBufferException($"Multiple values specified for oneof {field.ContainingOneof.Name}");
176                         }
177                     }
178                     MergeField(message, field, tokenizer);
179                 }
180                 else
181                 {
182                     if (settings.IgnoreUnknownFields)
183                     {
184                         tokenizer.SkipValue();
185                     }
186                     else
187                     {
188                         throw new InvalidProtocolBufferException("Unknown field: " + name);
189                     }
190                 }
191             }
192         }
193 
MergeField(IMessage message, FieldDescriptor field, JsonTokenizer tokenizer)194         private void MergeField(IMessage message, FieldDescriptor field, JsonTokenizer tokenizer)
195         {
196             var token = tokenizer.Next();
197             if (token.Type == JsonToken.TokenType.Null)
198             {
199                 // Clear the field if we see a null token, unless it's for a singular field of type
200                 // google.protobuf.Value or google.protobuf.NullValue.
201                 // Note: different from Java API, which just ignores it.
202                 // TODO: Bring it more in line? Discuss...
203                 if (field.IsMap || field.IsRepeated ||
204                     !(IsGoogleProtobufValueField(field) || IsGoogleProtobufNullValueField(field)))
205                 {
206                     field.Accessor.Clear(message);
207                     return;
208                 }
209             }
210             tokenizer.PushBack(token);
211 
212             if (field.IsMap)
213             {
214                 MergeMapField(message, field, tokenizer);
215             }
216             else if (field.IsRepeated)
217             {
218                 MergeRepeatedField(message, field, tokenizer);
219             }
220             else
221             {
222                 if (TryParseSingleValue(field, tokenizer, out var value))
223                 {
224                     field.Accessor.SetValue(message, value);
225                 }
226             }
227         }
228 
MergeRepeatedField(IMessage message, FieldDescriptor field, JsonTokenizer tokenizer)229         private void MergeRepeatedField(IMessage message, FieldDescriptor field, JsonTokenizer tokenizer)
230         {
231             var token = tokenizer.Next();
232             if (token.Type != JsonToken.TokenType.StartArray)
233             {
234                 throw new InvalidProtocolBufferException("Repeated field value was not an array. Token type: " + token.Type);
235             }
236 
237             IList list = (IList) field.Accessor.GetValue(message);
238             while (true)
239             {
240                 token = tokenizer.Next();
241                 if (token.Type == JsonToken.TokenType.EndArray)
242                 {
243                     return;
244                 }
245                 tokenizer.PushBack(token);
246                 if (TryParseSingleValue(field, tokenizer, out object value))
247                 {
248                     if (value == null)
249                     {
250                         throw new InvalidProtocolBufferException("Repeated field elements cannot be null");
251                     }
252                     list.Add(value);
253                 }
254             }
255         }
256 
MergeMapField(IMessage message, FieldDescriptor field, JsonTokenizer tokenizer)257         private void MergeMapField(IMessage message, FieldDescriptor field, JsonTokenizer tokenizer)
258         {
259             // Map fields are always objects, even if the values are well-known types: ParseSingleValue handles those.
260             var token = tokenizer.Next();
261             if (token.Type != JsonToken.TokenType.StartObject)
262             {
263                 throw new InvalidProtocolBufferException("Expected an object to populate a map");
264             }
265 
266             var type = field.MessageType;
267             var keyField = type.FindFieldByNumber(1);
268             var valueField = type.FindFieldByNumber(2);
269             if (keyField == null || valueField == null)
270             {
271                 throw new InvalidProtocolBufferException("Invalid map field: " + field.FullName);
272             }
273             IDictionary dictionary = (IDictionary) field.Accessor.GetValue(message);
274 
275             while (true)
276             {
277                 token = tokenizer.Next();
278                 if (token.Type == JsonToken.TokenType.EndObject)
279                 {
280                     return;
281                 }
282                 object key = ParseMapKey(keyField, token.StringValue);
283                 if (TryParseSingleValue(valueField, tokenizer, out object value))
284                 {
285                     dictionary[key] = value ?? throw new InvalidProtocolBufferException("Map values must not be null");
286                 }
287             }
288         }
289 
IsGoogleProtobufValueField(FieldDescriptor field)290         private static bool IsGoogleProtobufValueField(FieldDescriptor field)
291         {
292             return field.FieldType == FieldType.Message &&
293                 field.MessageType.FullName == Value.Descriptor.FullName;
294         }
295 
IsGoogleProtobufNullValueField(FieldDescriptor field)296         private static bool IsGoogleProtobufNullValueField(FieldDescriptor field)
297         {
298             return field.FieldType == FieldType.Enum &&
299                 field.EnumType.FullName == NullValueDescriptor.FullName;
300         }
301 
302         /// <summary>
303         /// Attempts to parse a single value from the JSON. When the value is completely invalid,
304         /// this will still throw an exception; when it's "conditionally invalid" (currently meaning
305         /// "when there's an unknown enum string value") the method returns false instead.
306         /// </summary>
307         /// <returns>
308         /// true if the value was parsed successfully; false for an ignorable parse failure.
309         /// </returns>
TryParseSingleValue(FieldDescriptor field, JsonTokenizer tokenizer, out object value)310         private bool TryParseSingleValue(FieldDescriptor field, JsonTokenizer tokenizer, out object value)
311         {
312             var token = tokenizer.Next();
313             if (token.Type == JsonToken.TokenType.Null)
314             {
315                 // TODO: In order to support dynamic messages, we should really build this up
316                 // dynamically.
317                 if (IsGoogleProtobufValueField(field))
318                 {
319                     value = Value.ForNull();
320                 }
321                 else if (IsGoogleProtobufNullValueField(field))
322                 {
323                     value = NullValue.NullValue;
324                 }
325                 else
326                 {
327                     value = null;
328                 }
329                 return true;
330             }
331 
332             var fieldType = field.FieldType;
333             if (fieldType == FieldType.Message || fieldType == FieldType.Group)
334             {
335                 // Parse wrapper types as their constituent types.
336                 // TODO: What does this mean for null?
337                 if (field.MessageType.IsWrapperType)
338                 {
339                     field = field.MessageType.Fields[WrappersReflection.WrapperValueFieldNumber];
340                     fieldType = field.FieldType;
341                 }
342                 else
343                 {
344                     // TODO: Merge the current value in message? (Public API currently doesn't make this relevant as we don't expose merging.)
345                     tokenizer.PushBack(token);
346                     IMessage subMessage = NewMessageForField(field);
347                     Merge(subMessage, tokenizer);
348                     value = subMessage;
349                     return true;
350                 }
351             }
352 
353             switch (token.Type)
354             {
355                 case JsonToken.TokenType.True:
356                 case JsonToken.TokenType.False:
357                     if (fieldType == FieldType.Bool)
358                     {
359                         value = token.Type == JsonToken.TokenType.True;
360                         return true;
361                     }
362                     // Fall through to "we don't support this type for this case"; could duplicate the behaviour of the default
363                     // case instead, but this way we'd only need to change one place.
364                     goto default;
365                 case JsonToken.TokenType.StringValue:
366                     if (field.FieldType != FieldType.Enum)
367                     {
368                         value = ParseSingleStringValue(field, token.StringValue);
369                         return true;
370                     }
371                     else
372                     {
373                         return TryParseEnumStringValue(field, token.StringValue, out value);
374                     }
375                 // Note: not passing the number value itself here, as we may end up storing the string value in the token too.
376                 case JsonToken.TokenType.Number:
377                     value = ParseSingleNumberValue(field, token);
378                     return true;
379                 default:
380                     throw new InvalidProtocolBufferException("Unsupported JSON token type " + token.Type + " for field type " + fieldType);
381             }
382         }
383 
384         /// <summary>
385         /// Parses <paramref name="json"/> into a new message.
386         /// </summary>
387         /// <typeparam name="T">The type of message to create.</typeparam>
388         /// <param name="json">The JSON to parse.</param>
389         /// <exception cref="InvalidJsonException">The JSON does not comply with RFC 7159</exception>
390         /// <exception cref="InvalidProtocolBufferException">The JSON does not represent a Protocol Buffers message correctly</exception>
391         public T Parse<T>(string json) where T : IMessage, new()
392         {
393             ProtoPreconditions.CheckNotNull(json, nameof(json));
394             return Parse<T>(new StringReader(json));
395         }
396 
397         /// <summary>
398         /// Parses JSON read from <paramref name="jsonReader"/> into a new message.
399         /// </summary>
400         /// <typeparam name="T">The type of message to create.</typeparam>
401         /// <param name="jsonReader">Reader providing the JSON to parse.</param>
402         /// <exception cref="InvalidJsonException">The JSON does not comply with RFC 7159</exception>
403         /// <exception cref="InvalidProtocolBufferException">The JSON does not represent a Protocol Buffers message correctly</exception>
404         public T Parse<T>(TextReader jsonReader) where T : IMessage, new()
405         {
406             ProtoPreconditions.CheckNotNull(jsonReader, nameof(jsonReader));
407             T message = new T();
408             Merge(message, jsonReader);
409             return message;
410         }
411 
412         /// <summary>
413         /// Parses <paramref name="json"/> into a new message.
414         /// </summary>
415         /// <param name="json">The JSON to parse.</param>
416         /// <param name="descriptor">Descriptor of message type to parse.</param>
417         /// <exception cref="InvalidJsonException">The JSON does not comply with RFC 7159</exception>
418         /// <exception cref="InvalidProtocolBufferException">The JSON does not represent a Protocol Buffers message correctly</exception>
Parse(string json, MessageDescriptor descriptor)419         public IMessage Parse(string json, MessageDescriptor descriptor)
420         {
421             ProtoPreconditions.CheckNotNull(json, nameof(json));
422             ProtoPreconditions.CheckNotNull(descriptor, nameof(descriptor));
423             return Parse(new StringReader(json), descriptor);
424         }
425 
426         /// <summary>
427         /// Parses JSON read from <paramref name="jsonReader"/> into a new message.
428         /// </summary>
429         /// <param name="jsonReader">Reader providing the JSON to parse.</param>
430         /// <param name="descriptor">Descriptor of message type to parse.</param>
431         /// <exception cref="InvalidJsonException">The JSON does not comply with RFC 7159</exception>
432         /// <exception cref="InvalidProtocolBufferException">The JSON does not represent a Protocol Buffers message correctly</exception>
Parse(TextReader jsonReader, MessageDescriptor descriptor)433         public IMessage Parse(TextReader jsonReader, MessageDescriptor descriptor)
434         {
435             ProtoPreconditions.CheckNotNull(jsonReader, nameof(jsonReader));
436             ProtoPreconditions.CheckNotNull(descriptor, nameof(descriptor));
437             IMessage message = descriptor.Parser.CreateTemplate();
438             Merge(message, jsonReader);
439             return message;
440         }
441 
MergeStructValue(IMessage message, JsonTokenizer tokenizer)442         private void MergeStructValue(IMessage message, JsonTokenizer tokenizer)
443         {
444             var firstToken = tokenizer.Next();
445             var fields = message.Descriptor.Fields;
446             switch (firstToken.Type)
447             {
448                 case JsonToken.TokenType.Null:
449                     fields[Value.NullValueFieldNumber].Accessor.SetValue(message, 0);
450                     return;
451                 case JsonToken.TokenType.StringValue:
452                     fields[Value.StringValueFieldNumber].Accessor.SetValue(message, firstToken.StringValue);
453                     return;
454                 case JsonToken.TokenType.Number:
455                     fields[Value.NumberValueFieldNumber].Accessor.SetValue(message, firstToken.NumberValue);
456                     return;
457                 case JsonToken.TokenType.False:
458                 case JsonToken.TokenType.True:
459                     fields[Value.BoolValueFieldNumber].Accessor.SetValue(message, firstToken.Type == JsonToken.TokenType.True);
460                     return;
461                 case JsonToken.TokenType.StartObject:
462                     {
463                         var field = fields[Value.StructValueFieldNumber];
464                         var structMessage = NewMessageForField(field);
465                         tokenizer.PushBack(firstToken);
466                         Merge(structMessage, tokenizer);
467                         field.Accessor.SetValue(message, structMessage);
468                         return;
469                     }
470                 case JsonToken.TokenType.StartArray:
471                     {
472                         var field = fields[Value.ListValueFieldNumber];
473                         var list = NewMessageForField(field);
474                         tokenizer.PushBack(firstToken);
475                         Merge(list, tokenizer);
476                         field.Accessor.SetValue(message, list);
477                         return;
478                     }
479                 default:
480                     throw new InvalidOperationException("Unexpected token type: " + firstToken.Type);
481             }
482         }
483 
MergeStruct(IMessage message, JsonTokenizer tokenizer)484         private void MergeStruct(IMessage message, JsonTokenizer tokenizer)
485         {
486             var token = tokenizer.Next();
487             if (token.Type != JsonToken.TokenType.StartObject)
488             {
489                 throw new InvalidProtocolBufferException("Expected object value for Struct");
490             }
491             tokenizer.PushBack(token);
492 
493             var field = message.Descriptor.Fields[Struct.FieldsFieldNumber];
494             MergeMapField(message, field, tokenizer);
495         }
496 
MergeAny(IMessage message, JsonTokenizer tokenizer)497         private void MergeAny(IMessage message, JsonTokenizer tokenizer)
498         {
499             // Record the token stream until we see the @type property. At that point, we can take the value, consult
500             // the type registry for the relevant message, and replay the stream, omitting the @type property.
501             var tokens = new List<JsonToken>();
502 
503             var token = tokenizer.Next();
504             if (token.Type != JsonToken.TokenType.StartObject)
505             {
506                 throw new InvalidProtocolBufferException("Expected object value for Any");
507             }
508             int typeUrlObjectDepth = tokenizer.ObjectDepth;
509 
510             // The check for the property depth protects us from nested Any values which occur before the type URL
511             // for *this* Any.
512             while (token.Type != JsonToken.TokenType.Name ||
513                 token.StringValue != JsonFormatter.AnyTypeUrlField ||
514                 tokenizer.ObjectDepth != typeUrlObjectDepth)
515             {
516                 tokens.Add(token);
517                 token = tokenizer.Next();
518 
519                 if (tokenizer.ObjectDepth < typeUrlObjectDepth)
520                 {
521                     throw new InvalidProtocolBufferException("Any message with no @type");
522                 }
523             }
524 
525             // Don't add the @type property or its value to the recorded token list
526             token = tokenizer.Next();
527             if (token.Type != JsonToken.TokenType.StringValue)
528             {
529                 throw new InvalidProtocolBufferException("Expected string value for Any.@type");
530             }
531             string typeUrl = token.StringValue;
532             string typeName = Any.GetTypeName(typeUrl);
533 
534             MessageDescriptor descriptor = settings.TypeRegistry.Find(typeName);
535             if (descriptor == null)
536             {
537                 throw new InvalidOperationException($"Type registry has no descriptor for type name '{typeName}'");
538             }
539 
540             // Now replay the token stream we've already read and anything that remains of the object, just parsing it
541             // as normal. Our original tokenizer should end up at the end of the object.
542             var replay = JsonTokenizer.FromReplayedTokens(tokens, tokenizer);
543             var body = descriptor.Parser.CreateTemplate();
544             if (descriptor.IsWellKnownType)
545             {
546                 MergeWellKnownTypeAnyBody(body, replay);
547             }
548             else
549             {
550                 Merge(body, replay);
551             }
552             var data = body.ToByteString();
553 
554             // Now that we have the message data, we can pack it into an Any (the message received as a parameter).
555             message.Descriptor.Fields[Any.TypeUrlFieldNumber].Accessor.SetValue(message, typeUrl);
556             message.Descriptor.Fields[Any.ValueFieldNumber].Accessor.SetValue(message, data);
557         }
558 
559         // Well-known types end up in a property called "value" in the JSON. As there's no longer a @type property
560         // in the given JSON token stream, we should *only* have tokens of start-object, name("value"), the value
561         // itself, and then end-object.
MergeWellKnownTypeAnyBody(IMessage body, JsonTokenizer tokenizer)562         private void MergeWellKnownTypeAnyBody(IMessage body, JsonTokenizer tokenizer)
563         {
564             var token = tokenizer.Next(); // Definitely start-object; checked in previous method
565             token = tokenizer.Next();
566             // TODO: What about an absent Int32Value, for example?
567             if (token.Type != JsonToken.TokenType.Name || token.StringValue != JsonFormatter.AnyWellKnownTypeValueField)
568             {
569                 throw new InvalidProtocolBufferException($"Expected '{JsonFormatter.AnyWellKnownTypeValueField}' property for well-known type Any body");
570             }
571             Merge(body, tokenizer);
572             token = tokenizer.Next();
573             if (token.Type != JsonToken.TokenType.EndObject)
574             {
575                 throw new InvalidProtocolBufferException($"Expected end-object token after @type/value for well-known type");
576             }
577         }
578 
579         #region Utility methods which don't depend on the state (or settings) of the parser.
ParseMapKey(FieldDescriptor field, string keyText)580         private static object ParseMapKey(FieldDescriptor field, string keyText)
581         {
582             switch (field.FieldType)
583             {
584                 case FieldType.Bool:
585                     if (keyText == "true")
586                     {
587                         return true;
588                     }
589                     if (keyText == "false")
590                     {
591                         return false;
592                     }
593                     throw new InvalidProtocolBufferException("Invalid string for bool map key: " + keyText);
594                 case FieldType.String:
595                     return keyText;
596                 case FieldType.Int32:
597                 case FieldType.SInt32:
598                 case FieldType.SFixed32:
599                     return ParseNumericString(keyText, int.Parse);
600                 case FieldType.UInt32:
601                 case FieldType.Fixed32:
602                     return ParseNumericString(keyText, uint.Parse);
603                 case FieldType.Int64:
604                 case FieldType.SInt64:
605                 case FieldType.SFixed64:
606                     return ParseNumericString(keyText, long.Parse);
607                 case FieldType.UInt64:
608                 case FieldType.Fixed64:
609                     return ParseNumericString(keyText, ulong.Parse);
610                 default:
611                     throw new InvalidProtocolBufferException("Invalid field type for map: " + field.FieldType);
612             }
613         }
614 
615         private static object ParseSingleNumberValue(FieldDescriptor field, JsonToken token)
616         {
617             double value = token.NumberValue;
618             checked
619             {
620                 try
621                 {
622                     switch (field.FieldType)
623                     {
624                         case FieldType.Int32:
625                         case FieldType.SInt32:
626                         case FieldType.SFixed32:
627                             CheckInteger(value);
628                             return (int) value;
629                         case FieldType.UInt32:
630                         case FieldType.Fixed32:
631                             CheckInteger(value);
632                             return (uint) value;
633                         case FieldType.Int64:
634                         case FieldType.SInt64:
635                         case FieldType.SFixed64:
636                             CheckInteger(value);
637                             return (long) value;
638                         case FieldType.UInt64:
639                         case FieldType.Fixed64:
640                             CheckInteger(value);
641                             return (ulong) value;
642                         case FieldType.Double:
643                             return value;
644                         case FieldType.Float:
645                             if (double.IsNaN(value))
646                             {
647                                 return float.NaN;
648                             }
649                             float converted = (float) value;
650                             // If the value is out of range of float, the cast representation will be infinite.
651                             // If the original value was infinite as well, that's fine - we'll return the 32-bit
652                             // version (with the correct sign).
653                             if (float.IsInfinity(converted) && !double.IsInfinity(value))
654                             {
655                                 throw new InvalidProtocolBufferException($"Value out of range: {value}");
656                             }
657                             return converted;
658                         case FieldType.Enum:
659                             CheckInteger(value);
660                             // Just return it as an int, and let the CLR convert it.
661                             // Note that we deliberately don't check that it's a known value.
662                             return (int) value;
663                         default:
664                             throw new InvalidProtocolBufferException($"Unsupported conversion from JSON number for field type {field.FieldType}");
665                     }
666                 }
667                 catch (OverflowException)
668                 {
669                     throw new InvalidProtocolBufferException($"Value out of range: {value}");
670                 }
671             }
672         }
673 
674         private static void CheckInteger(double value)
675         {
676             if (double.IsInfinity(value) || double.IsNaN(value))
677             {
678                 throw new InvalidProtocolBufferException($"Value not an integer: {value}");
679             }
680             if (value != Math.Floor(value))
681             {
682                 throw new InvalidProtocolBufferException($"Value not an integer: {value}");
683             }
684         }
685 
686         private static object ParseSingleStringValue(FieldDescriptor field, string text)
687         {
688             switch (field.FieldType)
689             {
690                 case FieldType.String:
691                     return text;
692                 case FieldType.Bytes:
693                     try
694                     {
695                         return ByteString.FromBase64(text);
696                     }
697                     catch (FormatException e)
698                     {
699                         throw InvalidProtocolBufferException.InvalidBase64(e);
700                     }
701                 case FieldType.Int32:
702                 case FieldType.SInt32:
703                 case FieldType.SFixed32:
704                     return ParseNumericString(text, int.Parse);
705                 case FieldType.UInt32:
706                 case FieldType.Fixed32:
707                     return ParseNumericString(text, uint.Parse);
708                 case FieldType.Int64:
709                 case FieldType.SInt64:
710                 case FieldType.SFixed64:
711                     return ParseNumericString(text, long.Parse);
712                 case FieldType.UInt64:
713                 case FieldType.Fixed64:
714                     return ParseNumericString(text, ulong.Parse);
715                 case FieldType.Double:
716                     double d = ParseNumericString(text, double.Parse);
717                     ValidateInfinityAndNan(text, double.IsPositiveInfinity(d), double.IsNegativeInfinity(d), double.IsNaN(d));
718                     return d;
719                 case FieldType.Float:
720                     float f = ParseNumericString(text, float.Parse);
721                     ValidateInfinityAndNan(text, float.IsPositiveInfinity(f), float.IsNegativeInfinity(f), float.IsNaN(f));
722                     return f;
723                 case FieldType.Enum:
724                     throw new InvalidOperationException($"Use TryParseEnumStringValue for enums");
725                 default:
726                     throw new InvalidProtocolBufferException($"Unsupported conversion from JSON string for field type {field.FieldType}");
727             }
728         }
729 
730         private bool TryParseEnumStringValue(FieldDescriptor field, string text, out object value)
731         {
732             var enumValue = field.EnumType.FindValueByName(text);
733             if (enumValue == null)
734             {
735                 if (settings.IgnoreUnknownFields)
736                 {
737                     value = null;
738                     return false;
739                 }
740                 else
741                 {
742                     throw new InvalidProtocolBufferException($"Invalid enum value: {text} for enum type: {field.EnumType.FullName}");
743                 }
744             }
745             // Just return it as an int, and let the CLR convert it.
746             value = enumValue.Number;
747             return true;
748         }
749 
750         /// <summary>
751         /// Creates a new instance of the message type for the given field.
752         /// </summary>
753         private static IMessage NewMessageForField(FieldDescriptor field)
754         {
755             return field.MessageType.Parser.CreateTemplate();
756         }
757 
758         private static T ParseNumericString<T>(string text, Func<string, NumberStyles, IFormatProvider, T> parser)
759         {
760             // Can't prohibit this with NumberStyles.
761             if (text.StartsWith("+"))
762             {
763                 throw new InvalidProtocolBufferException($"Invalid numeric value: {text}");
764             }
765             if (text.StartsWith("0") && text.Length > 1)
766             {
767                 if (text[1] >= '0' && text[1] <= '9')
768                 {
769                     throw new InvalidProtocolBufferException($"Invalid numeric value: {text}");
770                 }
771             }
772             else if (text.StartsWith("-0") && text.Length > 2)
773             {
774                 if (text[2] >= '0' && text[2] <= '9')
775                 {
776                     throw new InvalidProtocolBufferException($"Invalid numeric value: {text}");
777                 }
778             }
779             try
780             {
781                 return parser(text, NumberStyles.AllowLeadingSign | NumberStyles.AllowDecimalPoint | NumberStyles.AllowExponent, CultureInfo.InvariantCulture);
782             }
783             catch (FormatException)
784             {
785                 throw new InvalidProtocolBufferException($"Invalid numeric value for type: {text}");
786             }
787             catch (OverflowException)
788             {
789                 throw new InvalidProtocolBufferException($"Value out of range: {text}");
790             }
791         }
792 
793         /// <summary>
794         /// Checks that any infinite/NaN values originated from the correct text.
795         /// This corrects the lenient whitespace handling of double.Parse/float.Parse, as well as the
796         /// way that Mono parses out-of-range values as infinity.
797         /// </summary>
ValidateInfinityAndNan(string text, bool isPositiveInfinity, bool isNegativeInfinity, bool isNaN)798         private static void ValidateInfinityAndNan(string text, bool isPositiveInfinity, bool isNegativeInfinity, bool isNaN)
799         {
800             if ((isPositiveInfinity && text != "Infinity") ||
801                 (isNegativeInfinity && text != "-Infinity") ||
802                 (isNaN && text != "NaN"))
803             {
804                 throw new InvalidProtocolBufferException($"Invalid numeric value: {text}");
805             }
806         }
807 
MergeTimestamp(IMessage message, JsonToken token)808         private static void MergeTimestamp(IMessage message, JsonToken token)
809         {
810             if (token.Type != JsonToken.TokenType.StringValue)
811             {
812                 throw new InvalidProtocolBufferException("Expected string value for Timestamp");
813             }
814             var match = TimestampRegex.Match(token.StringValue);
815             if (!match.Success)
816             {
817                 throw new InvalidProtocolBufferException($"Invalid Timestamp value: {token.StringValue}");
818             }
819             var dateTime = match.Groups["datetime"].Value;
820             var subseconds = match.Groups["subseconds"].Value;
821             var offset = match.Groups["offset"].Value;
822 
823             try
824             {
825                 DateTime parsed = DateTime.ParseExact(
826                     dateTime,
827                     "yyyy-MM-dd'T'HH:mm:ss",
828                     CultureInfo.InvariantCulture,
829                     DateTimeStyles.AssumeUniversal | DateTimeStyles.AdjustToUniversal);
830                 // TODO: It would be nice not to have to create all these objects... easy to optimize later though.
831                 Timestamp timestamp = Timestamp.FromDateTime(parsed);
832                 int nanosToAdd = 0;
833                 if (subseconds != "")
834                 {
835                     // This should always work, as we've got 1-9 digits.
836                     int parsedFraction = int.Parse(subseconds.Substring(1), CultureInfo.InvariantCulture);
837                     nanosToAdd = parsedFraction * SubsecondScalingFactors[subseconds.Length];
838                 }
839                 int secondsToAdd = 0;
840                 if (offset != "Z")
841                 {
842                     // This is the amount we need to *subtract* from the local time to get to UTC - hence - => +1 and vice versa.
843                     int sign = offset[0] == '-' ? 1 : -1;
844                     int hours = int.Parse(offset.Substring(1, 2), CultureInfo.InvariantCulture);
845                     int minutes = int.Parse(offset.Substring(4, 2));
846                     int totalMinutes = hours * 60 + minutes;
847                     if (totalMinutes > 18 * 60)
848                     {
849                         throw new InvalidProtocolBufferException("Invalid Timestamp value: " + token.StringValue);
850                     }
851                     if (totalMinutes == 0 && sign == 1)
852                     {
853                         // This is an offset of -00:00, which means "unknown local offset". It makes no sense for a timestamp.
854                         throw new InvalidProtocolBufferException("Invalid Timestamp value: " + token.StringValue);
855                     }
856                     // We need to *subtract* the offset from local time to get UTC.
857                     secondsToAdd = sign * totalMinutes * 60;
858                 }
859                 // Ensure we've got the right signs. Currently unnecessary, but easy to do.
860                 if (secondsToAdd < 0 && nanosToAdd > 0)
861                 {
862                     secondsToAdd++;
863                     nanosToAdd -= Duration.NanosecondsPerSecond;
864                 }
865                 if (secondsToAdd != 0 || nanosToAdd != 0)
866                 {
867                     timestamp += new Duration { Nanos = nanosToAdd, Seconds = secondsToAdd };
868                     // The resulting timestamp after offset change would be out of our expected range. Currently the Timestamp message doesn't validate this
869                     // anywhere, but we shouldn't parse it.
870                     if (timestamp.Seconds < Timestamp.UnixSecondsAtBclMinValue || timestamp.Seconds > Timestamp.UnixSecondsAtBclMaxValue)
871                     {
872                         throw new InvalidProtocolBufferException("Invalid Timestamp value: " + token.StringValue);
873                     }
874                 }
875                 message.Descriptor.Fields[Timestamp.SecondsFieldNumber].Accessor.SetValue(message, timestamp.Seconds);
876                 message.Descriptor.Fields[Timestamp.NanosFieldNumber].Accessor.SetValue(message, timestamp.Nanos);
877             }
878             catch (FormatException)
879             {
880                 throw new InvalidProtocolBufferException("Invalid Timestamp value: " + token.StringValue);
881             }
882         }
883 
MergeDuration(IMessage message, JsonToken token)884         private static void MergeDuration(IMessage message, JsonToken token)
885         {
886             if (token.Type != JsonToken.TokenType.StringValue)
887             {
888                 throw new InvalidProtocolBufferException("Expected string value for Duration");
889             }
890             var match = DurationRegex.Match(token.StringValue);
891             if (!match.Success)
892             {
893                 throw new InvalidProtocolBufferException("Invalid Duration value: " + token.StringValue);
894             }
895             var sign = match.Groups["sign"].Value;
896             var secondsText = match.Groups["int"].Value;
897             // Prohibit leading insignficant zeroes
898             if (secondsText[0] == '0' && secondsText.Length > 1)
899             {
900                 throw new InvalidProtocolBufferException("Invalid Duration value: " + token.StringValue);
901             }
902             var subseconds = match.Groups["subseconds"].Value;
903             var multiplier = sign == "-" ? -1 : 1;
904 
905             try
906             {
907                 long seconds = long.Parse(secondsText, CultureInfo.InvariantCulture) * multiplier;
908                 int nanos = 0;
909                 if (subseconds != "")
910                 {
911                     // This should always work, as we've got 1-9 digits.
912                     int parsedFraction = int.Parse(subseconds.Substring(1));
913                     nanos = parsedFraction * SubsecondScalingFactors[subseconds.Length] * multiplier;
914                 }
915                 if (!Duration.IsNormalized(seconds, nanos))
916                 {
917                     throw new InvalidProtocolBufferException($"Invalid Duration value: {token.StringValue}");
918                 }
919                 message.Descriptor.Fields[Duration.SecondsFieldNumber].Accessor.SetValue(message, seconds);
920                 message.Descriptor.Fields[Duration.NanosFieldNumber].Accessor.SetValue(message, nanos);
921             }
922             catch (FormatException)
923             {
924                 throw new InvalidProtocolBufferException($"Invalid Duration value: {token.StringValue}");
925             }
926         }
927 
MergeFieldMask(IMessage message, JsonToken token)928         private static void MergeFieldMask(IMessage message, JsonToken token)
929         {
930             if (token.Type != JsonToken.TokenType.StringValue)
931             {
932                 throw new InvalidProtocolBufferException("Expected string value for FieldMask");
933             }
934             // TODO: Do we *want* to remove empty entries? Probably okay to treat "" as "no paths", but "foo,,bar"?
935             string[] jsonPaths = token.StringValue.Split(FieldMaskPathSeparators, StringSplitOptions.RemoveEmptyEntries);
936             IList messagePaths = (IList) message.Descriptor.Fields[FieldMask.PathsFieldNumber].Accessor.GetValue(message);
937             foreach (var path in jsonPaths)
938             {
939                 messagePaths.Add(ToSnakeCase(path));
940             }
941         }
942 
943         // Ported from src/google/protobuf/util/internal/utility.cc
ToSnakeCase(string text)944         private static string ToSnakeCase(string text)
945         {
946             var builder = new StringBuilder(text.Length * 2);
947             // Note: this is probably unnecessary now, but currently retained to be as close as possible to the
948             // C++, whilst still throwing an exception on underscores.
949             bool wasNotUnderscore = false;  // Initialize to false for case 1 (below)
950             bool wasNotCap = false;
951 
952             for (int i = 0; i < text.Length; i++)
953             {
954                 char c = text[i];
955                 if (c >= 'A' && c <= 'Z') // ascii_isupper
956                 {
957                     // Consider when the current character B is capitalized:
958                     // 1) At beginning of input:   "B..." => "b..."
959                     //    (e.g. "Biscuit" => "biscuit")
960                     // 2) Following a lowercase:   "...aB..." => "...a_b..."
961                     //    (e.g. "gBike" => "g_bike")
962                     // 3) At the end of input:     "...AB" => "...ab"
963                     //    (e.g. "GoogleLAB" => "google_lab")
964                     // 4) Followed by a lowercase: "...ABc..." => "...a_bc..."
965                     //    (e.g. "GBike" => "g_bike")
966                     if (wasNotUnderscore &&               //            case 1 out
967                         (wasNotCap ||                     // case 2 in, case 3 out
968                          (i + 1 < text.Length &&         //            case 3 out
969                           (text[i + 1] >= 'a' && text[i + 1] <= 'z')))) // ascii_islower(text[i + 1])
970                     {  // case 4 in
971                        // We add an underscore for case 2 and case 4.
972                         builder.Append('_');
973                     }
974                     // ascii_tolower, but we already know that c *is* an upper case ASCII character...
975                     builder.Append((char) (c + 'a' - 'A'));
976                     wasNotUnderscore = true;
977                     wasNotCap = false;
978                 }
979                 else
980                 {
981                     builder.Append(c);
982                     if (c == '_')
983                     {
984                         throw new InvalidProtocolBufferException($"Invalid field mask: {text}");
985                     }
986                     wasNotUnderscore = true;
987                     wasNotCap = true;
988                 }
989             }
990             return builder.ToString();
991         }
992         #endregion
993 
994         /// <summary>
995         /// Settings controlling JSON parsing.
996         /// </summary>
997         public sealed class Settings
998         {
999             /// <summary>
1000             /// Default settings, as used by <see cref="JsonParser.Default"/>. This has the same default
1001             /// recursion limit as <see cref="CodedInputStream"/>, and an empty type registry.
1002             /// </summary>
1003             public static Settings Default { get; }
1004 
1005             // Workaround for the Mono compiler complaining about XML comments not being on
1006             // valid language elements.
Settings()1007             static Settings()
1008             {
1009                 Default = new Settings(CodedInputStream.DefaultRecursionLimit);
1010             }
1011 
1012             /// <summary>
1013             /// The maximum depth of messages to parse. Note that this limit only applies to parsing
1014             /// messages, not collections - so a message within a collection within a message only counts as
1015             /// depth 2, not 3.
1016             /// </summary>
1017             public int RecursionLimit { get; }
1018 
1019             /// <summary>
1020             /// The type registry used to parse <see cref="Any"/> messages.
1021             /// </summary>
1022             public TypeRegistry TypeRegistry { get; }
1023 
1024             /// <summary>
1025             /// Whether the parser should ignore unknown fields (<c>true</c>) or throw an exception when
1026             /// they are encountered (<c>false</c>).
1027             /// </summary>
1028             public bool IgnoreUnknownFields { get; }
1029 
Settings(int recursionLimit, TypeRegistry typeRegistry, bool ignoreUnknownFields)1030             private Settings(int recursionLimit, TypeRegistry typeRegistry, bool ignoreUnknownFields)
1031             {
1032                 RecursionLimit = recursionLimit;
1033                 TypeRegistry = ProtoPreconditions.CheckNotNull(typeRegistry, nameof(typeRegistry));
1034                 IgnoreUnknownFields = ignoreUnknownFields;
1035             }
1036 
1037             /// <summary>
1038             /// Creates a new <see cref="Settings"/> object with the specified recursion limit.
1039             /// </summary>
1040             /// <param name="recursionLimit">The maximum depth of messages to parse</param>
Settings(int recursionLimit)1041             public Settings(int recursionLimit) : this(recursionLimit, TypeRegistry.Empty)
1042             {
1043             }
1044 
1045             /// <summary>
1046             /// Creates a new <see cref="Settings"/> object with the specified recursion limit and type registry.
1047             /// </summary>
1048             /// <param name="recursionLimit">The maximum depth of messages to parse</param>
1049             /// <param name="typeRegistry">The type registry used to parse <see cref="Any"/> messages</param>
Settings(int recursionLimit, TypeRegistry typeRegistry)1050             public Settings(int recursionLimit, TypeRegistry typeRegistry) : this(recursionLimit, typeRegistry, false)
1051             {
1052             }
1053 
1054             /// <summary>
1055             /// Creates a new <see cref="Settings"/> object set to either ignore unknown fields, or throw an exception
1056             /// when unknown fields are encountered.
1057             /// </summary>
1058             /// <param name="ignoreUnknownFields"><c>true</c> if unknown fields should be ignored when parsing; <c>false</c> to throw an exception.</param>
1059             public Settings WithIgnoreUnknownFields(bool ignoreUnknownFields) => new(RecursionLimit, TypeRegistry, ignoreUnknownFields);
1060 
1061             /// <summary>
1062             /// Creates a new <see cref="Settings"/> object based on this one, but with the specified recursion limit.
1063             /// </summary>
1064             /// <param name="recursionLimit">The new recursion limit.</param>
WithRecursionLimit(int recursionLimit)1065             public Settings WithRecursionLimit(int recursionLimit) => new(recursionLimit, TypeRegistry, IgnoreUnknownFields);
1066 
1067             /// <summary>
1068             /// Creates a new <see cref="Settings"/> object based on this one, but with the specified type registry.
1069             /// </summary>
1070             /// <param name="typeRegistry">The new type registry. Must not be null.</param>
1071             public Settings WithTypeRegistry(TypeRegistry typeRegistry) =>
1072                 new(RecursionLimit,
1073                     ProtoPreconditions.CheckNotNull(typeRegistry, nameof(typeRegistry)),
1074                     IgnoreUnknownFields);
1075         }
1076     }
1077 }
1078