• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 #region Copyright notice and license
2 // Protocol Buffers - Google's data interchange format
3 // Copyright 2015 Google Inc.  All rights reserved.
4 // https://developers.google.com/protocol-buffers/
5 //
6 // Redistribution and use in source and binary forms, with or without
7 // modification, are permitted provided that the following conditions are
8 // met:
9 //
10 //     * Redistributions of source code must retain the above copyright
11 // notice, this list of conditions and the following disclaimer.
12 //     * Redistributions in binary form must reproduce the above
13 // copyright notice, this list of conditions and the following disclaimer
14 // in the documentation and/or other materials provided with the
15 // distribution.
16 //     * Neither the name of Google Inc. nor the names of its
17 // contributors may be used to endorse or promote products derived from
18 // this software without specific prior written permission.
19 //
20 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 #endregion
32 
33 using Google.Protobuf.Reflection;
34 using Google.Protobuf.WellKnownTypes;
35 using System;
36 using System.Collections;
37 using System.Collections.Generic;
38 using System.Globalization;
39 using System.IO;
40 using System.Text;
41 using System.Text.RegularExpressions;
42 
43 namespace Google.Protobuf
44 {
45     /// <summary>
46     /// Reflection-based converter from JSON to messages.
47     /// </summary>
48     /// <remarks>
49     /// <para>
50     /// Instances of this class are thread-safe, with no mutable state.
51     /// </para>
52     /// <para>
53     /// This is a simple start to get JSON parsing working. As it's reflection-based,
54     /// it's not as quick as baking calls into generated messages - but is a simpler implementation.
55     /// (This code is generally not heavily optimized.)
56     /// </para>
57     /// </remarks>
58     public sealed class JsonParser
59     {
60         // Note: using 0-9 instead of \d to ensure no non-ASCII digits.
61         // This regex isn't a complete validator, but will remove *most* invalid input. We rely on parsing to do the rest.
62         private static readonly Regex TimestampRegex = new Regex(@"^(?<datetime>[0-9]{4}-[01][0-9]-[0-3][0-9]T[012][0-9]:[0-5][0-9]:[0-5][0-9])(?<subseconds>\.[0-9]{1,9})?(?<offset>(Z|[+-][0-1][0-9]:[0-5][0-9]))$", FrameworkPortability.CompiledRegexWhereAvailable);
63         private static readonly Regex DurationRegex = new Regex(@"^(?<sign>-)?(?<int>[0-9]{1,12})(?<subseconds>\.[0-9]{1,9})?s$", FrameworkPortability.CompiledRegexWhereAvailable);
64         private static readonly int[] SubsecondScalingFactors = { 0, 100000000, 100000000, 10000000, 1000000, 100000, 10000, 1000, 100, 10, 1 };
65         private static readonly char[] FieldMaskPathSeparators = new[] { ',' };
66 
67         private static readonly JsonParser defaultInstance = new JsonParser(Settings.Default);
68 
69         // TODO: Consider introducing a class containing parse state of the parser, tokenizer and depth. That would simplify these handlers
70         // and the signatures of various methods.
71         private static readonly Dictionary<string, Action<JsonParser, IMessage, JsonTokenizer>>
72             WellKnownTypeHandlers = new Dictionary<string, Action<JsonParser, IMessage, JsonTokenizer>>
73         {
74             { Timestamp.Descriptor.FullName, (parser, message, tokenizer) => MergeTimestamp(message, tokenizer.Next()) },
75             { Duration.Descriptor.FullName, (parser, message, tokenizer) => MergeDuration(message, tokenizer.Next()) },
76             { Value.Descriptor.FullName, (parser, message, tokenizer) => parser.MergeStructValue(message, tokenizer) },
77             { ListValue.Descriptor.FullName, (parser, message, tokenizer) =>
78                 parser.MergeRepeatedField(message, message.Descriptor.Fields[ListValue.ValuesFieldNumber], tokenizer) },
79             { Struct.Descriptor.FullName, (parser, message, tokenizer) => parser.MergeStruct(message, tokenizer) },
80             { Any.Descriptor.FullName, (parser, message, tokenizer) => parser.MergeAny(message, tokenizer) },
81             { FieldMask.Descriptor.FullName, (parser, message, tokenizer) => MergeFieldMask(message, tokenizer.Next()) },
82             { Int32Value.Descriptor.FullName, MergeWrapperField },
83             { Int64Value.Descriptor.FullName, MergeWrapperField },
84             { UInt32Value.Descriptor.FullName, MergeWrapperField },
85             { UInt64Value.Descriptor.FullName, MergeWrapperField },
86             { FloatValue.Descriptor.FullName, MergeWrapperField },
87             { DoubleValue.Descriptor.FullName, MergeWrapperField },
88             { BytesValue.Descriptor.FullName, MergeWrapperField },
89             { StringValue.Descriptor.FullName, MergeWrapperField },
90             { BoolValue.Descriptor.FullName, MergeWrapperField }
91         };
92 
93         // Convenience method to avoid having to repeat the same code multiple times in the above
94         // dictionary initialization.
MergeWrapperField(JsonParser parser, IMessage message, JsonTokenizer tokenizer)95         private static void MergeWrapperField(JsonParser parser, IMessage message, JsonTokenizer tokenizer)
96         {
97             parser.MergeField(message, message.Descriptor.Fields[WrappersReflection.WrapperValueFieldNumber], tokenizer);
98         }
99 
100         /// <summary>
101         /// Returns a formatter using the default settings.
102         /// </summary>
103         public static JsonParser Default { get { return defaultInstance; } }
104 
105         private readonly Settings settings;
106 
107         /// <summary>
108         /// Creates a new formatted with the given settings.
109         /// </summary>
110         /// <param name="settings">The settings.</param>
JsonParser(Settings settings)111         public JsonParser(Settings settings)
112         {
113             this.settings = settings;
114         }
115 
116         /// <summary>
117         /// Parses <paramref name="json"/> and merges the information into the given message.
118         /// </summary>
119         /// <param name="message">The message to merge the JSON information into.</param>
120         /// <param name="json">The JSON to parse.</param>
Merge(IMessage message, string json)121         internal void Merge(IMessage message, string json)
122         {
123             Merge(message, new StringReader(json));
124         }
125 
126         /// <summary>
127         /// Parses JSON read from <paramref name="jsonReader"/> and merges the information into the given message.
128         /// </summary>
129         /// <param name="message">The message to merge the JSON information into.</param>
130         /// <param name="jsonReader">Reader providing the JSON to parse.</param>
Merge(IMessage message, TextReader jsonReader)131         internal void Merge(IMessage message, TextReader jsonReader)
132         {
133             var tokenizer = JsonTokenizer.FromTextReader(jsonReader);
134             Merge(message, tokenizer);
135             var lastToken = tokenizer.Next();
136             if (lastToken != JsonToken.EndDocument)
137             {
138                 throw new InvalidProtocolBufferException("Expected end of JSON after object");
139             }
140         }
141 
142         /// <summary>
143         /// Merges the given message using data from the given tokenizer. In most cases, the next
144         /// token should be a "start object" token, but wrapper types and nullity can invalidate
145         /// that assumption. This is implemented as an LL(1) recursive descent parser over the stream
146         /// of tokens provided by the tokenizer. This token stream is assumed to be valid JSON, with the
147         /// tokenizer performing that validation - but not every token stream is valid "protobuf JSON".
148         /// </summary>
Merge(IMessage message, JsonTokenizer tokenizer)149         private void Merge(IMessage message, JsonTokenizer tokenizer)
150         {
151             if (tokenizer.ObjectDepth > settings.RecursionLimit)
152             {
153                 throw InvalidProtocolBufferException.JsonRecursionLimitExceeded();
154             }
155             if (message.Descriptor.IsWellKnownType)
156             {
157                 Action<JsonParser, IMessage, JsonTokenizer> handler;
158                 if (WellKnownTypeHandlers.TryGetValue(message.Descriptor.FullName, out handler))
159                 {
160                     handler(this, message, tokenizer);
161                     return;
162                 }
163                 // Well-known types with no special handling continue in the normal way.
164             }
165             var token = tokenizer.Next();
166             if (token.Type != JsonToken.TokenType.StartObject)
167             {
168                 throw new InvalidProtocolBufferException("Expected an object");
169             }
170             var descriptor = message.Descriptor;
171             var jsonFieldMap = descriptor.Fields.ByJsonName();
172             // All the oneof fields we've already accounted for - we can only see each of them once.
173             // The set is created lazily to avoid the overhead of creating a set for every message
174             // we parsed, when oneofs are relatively rare.
175             HashSet<OneofDescriptor> seenOneofs = null;
176             while (true)
177             {
178                 token = tokenizer.Next();
179                 if (token.Type == JsonToken.TokenType.EndObject)
180                 {
181                     return;
182                 }
183                 if (token.Type != JsonToken.TokenType.Name)
184                 {
185                     throw new InvalidOperationException("Unexpected token type " + token.Type);
186                 }
187                 string name = token.StringValue;
188                 FieldDescriptor field;
189                 if (jsonFieldMap.TryGetValue(name, out field))
190                 {
191                     if (field.ContainingOneof != null)
192                     {
193                         if (seenOneofs == null)
194                         {
195                             seenOneofs = new HashSet<OneofDescriptor>();
196                         }
197                         if (!seenOneofs.Add(field.ContainingOneof))
198                         {
199                             throw new InvalidProtocolBufferException($"Multiple values specified for oneof {field.ContainingOneof.Name}");
200                         }
201                     }
202                     MergeField(message, field, tokenizer);
203                 }
204                 else
205                 {
206                     if (settings.IgnoreUnknownFields)
207                     {
208                         tokenizer.SkipValue();
209                     }
210                     else
211                     {
212                         throw new InvalidProtocolBufferException("Unknown field: " + name);
213                     }
214                 }
215             }
216         }
217 
MergeField(IMessage message, FieldDescriptor field, JsonTokenizer tokenizer)218         private void MergeField(IMessage message, FieldDescriptor field, JsonTokenizer tokenizer)
219         {
220             var token = tokenizer.Next();
221             if (token.Type == JsonToken.TokenType.Null)
222             {
223                 // Clear the field if we see a null token, unless it's for a singular field of type
224                 // google.protobuf.Value.
225                 // Note: different from Java API, which just ignores it.
226                 // TODO: Bring it more in line? Discuss...
227                 if (field.IsMap || field.IsRepeated || !IsGoogleProtobufValueField(field))
228                 {
229                     field.Accessor.Clear(message);
230                     return;
231                 }
232             }
233             tokenizer.PushBack(token);
234 
235             if (field.IsMap)
236             {
237                 MergeMapField(message, field, tokenizer);
238             }
239             else if (field.IsRepeated)
240             {
241                 MergeRepeatedField(message, field, tokenizer);
242             }
243             else
244             {
245                 var value = ParseSingleValue(field, tokenizer);
246                 field.Accessor.SetValue(message, value);
247             }
248         }
249 
MergeRepeatedField(IMessage message, FieldDescriptor field, JsonTokenizer tokenizer)250         private void MergeRepeatedField(IMessage message, FieldDescriptor field, JsonTokenizer tokenizer)
251         {
252             var token = tokenizer.Next();
253             if (token.Type != JsonToken.TokenType.StartArray)
254             {
255                 throw new InvalidProtocolBufferException("Repeated field value was not an array. Token type: " + token.Type);
256             }
257 
258             IList list = (IList) field.Accessor.GetValue(message);
259             while (true)
260             {
261                 token = tokenizer.Next();
262                 if (token.Type == JsonToken.TokenType.EndArray)
263                 {
264                     return;
265                 }
266                 tokenizer.PushBack(token);
267                 object value = ParseSingleValue(field, tokenizer);
268                 if (value == null)
269                 {
270                     throw new InvalidProtocolBufferException("Repeated field elements cannot be null");
271                 }
272                 list.Add(value);
273             }
274         }
275 
MergeMapField(IMessage message, FieldDescriptor field, JsonTokenizer tokenizer)276         private void MergeMapField(IMessage message, FieldDescriptor field, JsonTokenizer tokenizer)
277         {
278             // Map fields are always objects, even if the values are well-known types: ParseSingleValue handles those.
279             var token = tokenizer.Next();
280             if (token.Type != JsonToken.TokenType.StartObject)
281             {
282                 throw new InvalidProtocolBufferException("Expected an object to populate a map");
283             }
284 
285             var type = field.MessageType;
286             var keyField = type.FindFieldByNumber(1);
287             var valueField = type.FindFieldByNumber(2);
288             if (keyField == null || valueField == null)
289             {
290                 throw new InvalidProtocolBufferException("Invalid map field: " + field.FullName);
291             }
292             IDictionary dictionary = (IDictionary) field.Accessor.GetValue(message);
293 
294             while (true)
295             {
296                 token = tokenizer.Next();
297                 if (token.Type == JsonToken.TokenType.EndObject)
298                 {
299                     return;
300                 }
301                 object key = ParseMapKey(keyField, token.StringValue);
302                 object value = ParseSingleValue(valueField, tokenizer);
303                 if (value == null)
304                 {
305                     throw new InvalidProtocolBufferException("Map values must not be null");
306                 }
307                 dictionary[key] = value;
308             }
309         }
310 
IsGoogleProtobufValueField(FieldDescriptor field)311         private static bool IsGoogleProtobufValueField(FieldDescriptor field)
312         {
313             return field.FieldType == FieldType.Message &&
314                 field.MessageType.FullName == Value.Descriptor.FullName;
315         }
316 
ParseSingleValue(FieldDescriptor field, JsonTokenizer tokenizer)317         private object ParseSingleValue(FieldDescriptor field, JsonTokenizer tokenizer)
318         {
319             var token = tokenizer.Next();
320             if (token.Type == JsonToken.TokenType.Null)
321             {
322                 // TODO: In order to support dynamic messages, we should really build this up
323                 // dynamically.
324                 if (IsGoogleProtobufValueField(field))
325                 {
326                     return Value.ForNull();
327                 }
328                 return null;
329             }
330 
331             var fieldType = field.FieldType;
332             if (fieldType == FieldType.Message)
333             {
334                 // Parse wrapper types as their constituent types.
335                 // TODO: What does this mean for null?
336                 if (field.MessageType.IsWrapperType)
337                 {
338                     field = field.MessageType.Fields[WrappersReflection.WrapperValueFieldNumber];
339                     fieldType = field.FieldType;
340                 }
341                 else
342                 {
343                     // TODO: Merge the current value in message? (Public API currently doesn't make this relevant as we don't expose merging.)
344                     tokenizer.PushBack(token);
345                     IMessage subMessage = NewMessageForField(field);
346                     Merge(subMessage, tokenizer);
347                     return subMessage;
348                 }
349             }
350 
351             switch (token.Type)
352             {
353                 case JsonToken.TokenType.True:
354                 case JsonToken.TokenType.False:
355                     if (fieldType == FieldType.Bool)
356                     {
357                         return token.Type == JsonToken.TokenType.True;
358                     }
359                     // Fall through to "we don't support this type for this case"; could duplicate the behaviour of the default
360                     // case instead, but this way we'd only need to change one place.
361                     goto default;
362                 case JsonToken.TokenType.StringValue:
363                     return ParseSingleStringValue(field, token.StringValue);
364                 // Note: not passing the number value itself here, as we may end up storing the string value in the token too.
365                 case JsonToken.TokenType.Number:
366                     return ParseSingleNumberValue(field, token);
367                 case JsonToken.TokenType.Null:
368                     throw new NotImplementedException("Haven't worked out what to do for null yet");
369                 default:
370                     throw new InvalidProtocolBufferException("Unsupported JSON token type " + token.Type + " for field type " + fieldType);
371             }
372         }
373 
374         /// <summary>
375         /// Parses <paramref name="json"/> into a new message.
376         /// </summary>
377         /// <typeparam name="T">The type of message to create.</typeparam>
378         /// <param name="json">The JSON to parse.</param>
379         /// <exception cref="InvalidJsonException">The JSON does not comply with RFC 7159</exception>
380         /// <exception cref="InvalidProtocolBufferException">The JSON does not represent a Protocol Buffers message correctly</exception>
381         public T Parse<T>(string json) where T : IMessage, new()
382         {
383             ProtoPreconditions.CheckNotNull(json, nameof(json));
384             return Parse<T>(new StringReader(json));
385         }
386 
387         /// <summary>
388         /// Parses JSON read from <paramref name="jsonReader"/> into a new message.
389         /// </summary>
390         /// <typeparam name="T">The type of message to create.</typeparam>
391         /// <param name="jsonReader">Reader providing the JSON to parse.</param>
392         /// <exception cref="InvalidJsonException">The JSON does not comply with RFC 7159</exception>
393         /// <exception cref="InvalidProtocolBufferException">The JSON does not represent a Protocol Buffers message correctly</exception>
394         public T Parse<T>(TextReader jsonReader) where T : IMessage, new()
395         {
396             ProtoPreconditions.CheckNotNull(jsonReader, nameof(jsonReader));
397             T message = new T();
398             Merge(message, jsonReader);
399             return message;
400         }
401 
402         /// <summary>
403         /// Parses <paramref name="json"/> into a new message.
404         /// </summary>
405         /// <param name="json">The JSON to parse.</param>
406         /// <param name="descriptor">Descriptor of message type to parse.</param>
407         /// <exception cref="InvalidJsonException">The JSON does not comply with RFC 7159</exception>
408         /// <exception cref="InvalidProtocolBufferException">The JSON does not represent a Protocol Buffers message correctly</exception>
Parse(string json, MessageDescriptor descriptor)409         public IMessage Parse(string json, MessageDescriptor descriptor)
410         {
411             ProtoPreconditions.CheckNotNull(json, nameof(json));
412             ProtoPreconditions.CheckNotNull(descriptor, nameof(descriptor));
413             return Parse(new StringReader(json), descriptor);
414         }
415 
416         /// <summary>
417         /// Parses JSON read from <paramref name="jsonReader"/> into a new message.
418         /// </summary>
419         /// <param name="jsonReader">Reader providing the JSON to parse.</param>
420         /// <param name="descriptor">Descriptor of message type to parse.</param>
421         /// <exception cref="InvalidJsonException">The JSON does not comply with RFC 7159</exception>
422         /// <exception cref="InvalidProtocolBufferException">The JSON does not represent a Protocol Buffers message correctly</exception>
Parse(TextReader jsonReader, MessageDescriptor descriptor)423         public IMessage Parse(TextReader jsonReader, MessageDescriptor descriptor)
424         {
425             ProtoPreconditions.CheckNotNull(jsonReader, nameof(jsonReader));
426             ProtoPreconditions.CheckNotNull(descriptor, nameof(descriptor));
427             IMessage message = descriptor.Parser.CreateTemplate();
428             Merge(message, jsonReader);
429             return message;
430         }
431 
MergeStructValue(IMessage message, JsonTokenizer tokenizer)432         private void MergeStructValue(IMessage message, JsonTokenizer tokenizer)
433         {
434             var firstToken = tokenizer.Next();
435             var fields = message.Descriptor.Fields;
436             switch (firstToken.Type)
437             {
438                 case JsonToken.TokenType.Null:
439                     fields[Value.NullValueFieldNumber].Accessor.SetValue(message, 0);
440                     return;
441                 case JsonToken.TokenType.StringValue:
442                     fields[Value.StringValueFieldNumber].Accessor.SetValue(message, firstToken.StringValue);
443                     return;
444                 case JsonToken.TokenType.Number:
445                     fields[Value.NumberValueFieldNumber].Accessor.SetValue(message, firstToken.NumberValue);
446                     return;
447                 case JsonToken.TokenType.False:
448                 case JsonToken.TokenType.True:
449                     fields[Value.BoolValueFieldNumber].Accessor.SetValue(message, firstToken.Type == JsonToken.TokenType.True);
450                     return;
451                 case JsonToken.TokenType.StartObject:
452                     {
453                         var field = fields[Value.StructValueFieldNumber];
454                         var structMessage = NewMessageForField(field);
455                         tokenizer.PushBack(firstToken);
456                         Merge(structMessage, tokenizer);
457                         field.Accessor.SetValue(message, structMessage);
458                         return;
459                     }
460                 case JsonToken.TokenType.StartArray:
461                     {
462                         var field = fields[Value.ListValueFieldNumber];
463                         var list = NewMessageForField(field);
464                         tokenizer.PushBack(firstToken);
465                         Merge(list, tokenizer);
466                         field.Accessor.SetValue(message, list);
467                         return;
468                     }
469                 default:
470                     throw new InvalidOperationException("Unexpected token type: " + firstToken.Type);
471             }
472         }
473 
MergeStruct(IMessage message, JsonTokenizer tokenizer)474         private void MergeStruct(IMessage message, JsonTokenizer tokenizer)
475         {
476             var token = tokenizer.Next();
477             if (token.Type != JsonToken.TokenType.StartObject)
478             {
479                 throw new InvalidProtocolBufferException("Expected object value for Struct");
480             }
481             tokenizer.PushBack(token);
482 
483             var field = message.Descriptor.Fields[Struct.FieldsFieldNumber];
484             MergeMapField(message, field, tokenizer);
485         }
486 
MergeAny(IMessage message, JsonTokenizer tokenizer)487         private void MergeAny(IMessage message, JsonTokenizer tokenizer)
488         {
489             // Record the token stream until we see the @type property. At that point, we can take the value, consult
490             // the type registry for the relevant message, and replay the stream, omitting the @type property.
491             var tokens = new List<JsonToken>();
492 
493             var token = tokenizer.Next();
494             if (token.Type != JsonToken.TokenType.StartObject)
495             {
496                 throw new InvalidProtocolBufferException("Expected object value for Any");
497             }
498             int typeUrlObjectDepth = tokenizer.ObjectDepth;
499 
500             // The check for the property depth protects us from nested Any values which occur before the type URL
501             // for *this* Any.
502             while (token.Type != JsonToken.TokenType.Name ||
503                 token.StringValue != JsonFormatter.AnyTypeUrlField ||
504                 tokenizer.ObjectDepth != typeUrlObjectDepth)
505             {
506                 tokens.Add(token);
507                 token = tokenizer.Next();
508 
509                 if (tokenizer.ObjectDepth < typeUrlObjectDepth)
510                 {
511                     throw new InvalidProtocolBufferException("Any message with no @type");
512                 }
513             }
514 
515             // Don't add the @type property or its value to the recorded token list
516             token = tokenizer.Next();
517             if (token.Type != JsonToken.TokenType.StringValue)
518             {
519                 throw new InvalidProtocolBufferException("Expected string value for Any.@type");
520             }
521             string typeUrl = token.StringValue;
522             string typeName = Any.GetTypeName(typeUrl);
523 
524             MessageDescriptor descriptor = settings.TypeRegistry.Find(typeName);
525             if (descriptor == null)
526             {
527                 throw new InvalidOperationException($"Type registry has no descriptor for type name '{typeName}'");
528             }
529 
530             // Now replay the token stream we've already read and anything that remains of the object, just parsing it
531             // as normal. Our original tokenizer should end up at the end of the object.
532             var replay = JsonTokenizer.FromReplayedTokens(tokens, tokenizer);
533             var body = descriptor.Parser.CreateTemplate();
534             if (descriptor.IsWellKnownType)
535             {
536                 MergeWellKnownTypeAnyBody(body, replay);
537             }
538             else
539             {
540                 Merge(body, replay);
541             }
542             var data = body.ToByteString();
543 
544             // Now that we have the message data, we can pack it into an Any (the message received as a parameter).
545             message.Descriptor.Fields[Any.TypeUrlFieldNumber].Accessor.SetValue(message, typeUrl);
546             message.Descriptor.Fields[Any.ValueFieldNumber].Accessor.SetValue(message, data);
547         }
548 
549         // Well-known types end up in a property called "value" in the JSON. As there's no longer a @type property
550         // in the given JSON token stream, we should *only* have tokens of start-object, name("value"), the value
551         // itself, and then end-object.
MergeWellKnownTypeAnyBody(IMessage body, JsonTokenizer tokenizer)552         private void MergeWellKnownTypeAnyBody(IMessage body, JsonTokenizer tokenizer)
553         {
554             var token = tokenizer.Next(); // Definitely start-object; checked in previous method
555             token = tokenizer.Next();
556             // TODO: What about an absent Int32Value, for example?
557             if (token.Type != JsonToken.TokenType.Name || token.StringValue != JsonFormatter.AnyWellKnownTypeValueField)
558             {
559                 throw new InvalidProtocolBufferException($"Expected '{JsonFormatter.AnyWellKnownTypeValueField}' property for well-known type Any body");
560             }
561             Merge(body, tokenizer);
562             token = tokenizer.Next();
563             if (token.Type != JsonToken.TokenType.EndObject)
564             {
565                 throw new InvalidProtocolBufferException($"Expected end-object token after @type/value for well-known type");
566             }
567         }
568 
569         #region Utility methods which don't depend on the state (or settings) of the parser.
ParseMapKey(FieldDescriptor field, string keyText)570         private static object ParseMapKey(FieldDescriptor field, string keyText)
571         {
572             switch (field.FieldType)
573             {
574                 case FieldType.Bool:
575                     if (keyText == "true")
576                     {
577                         return true;
578                     }
579                     if (keyText == "false")
580                     {
581                         return false;
582                     }
583                     throw new InvalidProtocolBufferException("Invalid string for bool map key: " + keyText);
584                 case FieldType.String:
585                     return keyText;
586                 case FieldType.Int32:
587                 case FieldType.SInt32:
588                 case FieldType.SFixed32:
589                     return ParseNumericString(keyText, int.Parse);
590                 case FieldType.UInt32:
591                 case FieldType.Fixed32:
592                     return ParseNumericString(keyText, uint.Parse);
593                 case FieldType.Int64:
594                 case FieldType.SInt64:
595                 case FieldType.SFixed64:
596                     return ParseNumericString(keyText, long.Parse);
597                 case FieldType.UInt64:
598                 case FieldType.Fixed64:
599                     return ParseNumericString(keyText, ulong.Parse);
600                 default:
601                     throw new InvalidProtocolBufferException("Invalid field type for map: " + field.FieldType);
602             }
603         }
604 
605         private static object ParseSingleNumberValue(FieldDescriptor field, JsonToken token)
606         {
607             double value = token.NumberValue;
608             checked
609             {
610                 try
611                 {
612                     switch (field.FieldType)
613                     {
614                         case FieldType.Int32:
615                         case FieldType.SInt32:
616                         case FieldType.SFixed32:
617                             CheckInteger(value);
618                             return (int) value;
619                         case FieldType.UInt32:
620                         case FieldType.Fixed32:
621                             CheckInteger(value);
622                             return (uint) value;
623                         case FieldType.Int64:
624                         case FieldType.SInt64:
625                         case FieldType.SFixed64:
626                             CheckInteger(value);
627                             return (long) value;
628                         case FieldType.UInt64:
629                         case FieldType.Fixed64:
630                             CheckInteger(value);
631                             return (ulong) value;
632                         case FieldType.Double:
633                             return value;
634                         case FieldType.Float:
635                             if (double.IsNaN(value))
636                             {
637                                 return float.NaN;
638                             }
639                             if (value > float.MaxValue || value < float.MinValue)
640                             {
641                                 if (double.IsPositiveInfinity(value))
642                                 {
643                                     return float.PositiveInfinity;
644                                 }
645                                 if (double.IsNegativeInfinity(value))
646                                 {
647                                     return float.NegativeInfinity;
648                                 }
649                                 throw new InvalidProtocolBufferException($"Value out of range: {value}");
650                             }
651                             return (float) value;
652                         case FieldType.Enum:
653                             CheckInteger(value);
654                             // Just return it as an int, and let the CLR convert it.
655                             // Note that we deliberately don't check that it's a known value.
656                             return (int) value;
657                         default:
658                             throw new InvalidProtocolBufferException($"Unsupported conversion from JSON number for field type {field.FieldType}");
659                     }
660                 }
661                 catch (OverflowException)
662                 {
663                     throw new InvalidProtocolBufferException($"Value out of range: {value}");
664                 }
665             }
666         }
667 
668         private static void CheckInteger(double value)
669         {
670             if (double.IsInfinity(value) || double.IsNaN(value))
671             {
672                 throw new InvalidProtocolBufferException($"Value not an integer: {value}");
673             }
674             if (value != Math.Floor(value))
675             {
676                 throw new InvalidProtocolBufferException($"Value not an integer: {value}");
677             }
678         }
679 
680         private static object ParseSingleStringValue(FieldDescriptor field, string text)
681         {
682             switch (field.FieldType)
683             {
684                 case FieldType.String:
685                     return text;
686                 case FieldType.Bytes:
687                     try
688                     {
689                         return ByteString.FromBase64(text);
690                     }
691                     catch (FormatException e)
692                     {
693                         throw InvalidProtocolBufferException.InvalidBase64(e);
694                     }
695                 case FieldType.Int32:
696                 case FieldType.SInt32:
697                 case FieldType.SFixed32:
698                     return ParseNumericString(text, int.Parse);
699                 case FieldType.UInt32:
700                 case FieldType.Fixed32:
701                     return ParseNumericString(text, uint.Parse);
702                 case FieldType.Int64:
703                 case FieldType.SInt64:
704                 case FieldType.SFixed64:
705                     return ParseNumericString(text, long.Parse);
706                 case FieldType.UInt64:
707                 case FieldType.Fixed64:
708                     return ParseNumericString(text, ulong.Parse);
709                 case FieldType.Double:
710                     double d = ParseNumericString(text, double.Parse);
711                     ValidateInfinityAndNan(text, double.IsPositiveInfinity(d), double.IsNegativeInfinity(d), double.IsNaN(d));
712                     return d;
713                 case FieldType.Float:
714                     float f = ParseNumericString(text, float.Parse);
715                     ValidateInfinityAndNan(text, float.IsPositiveInfinity(f), float.IsNegativeInfinity(f), float.IsNaN(f));
716                     return f;
717                 case FieldType.Enum:
718                     var enumValue = field.EnumType.FindValueByName(text);
719                     if (enumValue == null)
720                     {
721                         throw new InvalidProtocolBufferException($"Invalid enum value: {text} for enum type: {field.EnumType.FullName}");
722                     }
723                     // Just return it as an int, and let the CLR convert it.
724                     return enumValue.Number;
725                 default:
726                     throw new InvalidProtocolBufferException($"Unsupported conversion from JSON string for field type {field.FieldType}");
727             }
728         }
729 
730         /// <summary>
731         /// Creates a new instance of the message type for the given field.
732         /// </summary>
733         private static IMessage NewMessageForField(FieldDescriptor field)
734         {
735             return field.MessageType.Parser.CreateTemplate();
736         }
737 
738         private static T ParseNumericString<T>(string text, Func<string, NumberStyles, IFormatProvider, T> parser)
739         {
740             // Can't prohibit this with NumberStyles.
741             if (text.StartsWith("+"))
742             {
743                 throw new InvalidProtocolBufferException($"Invalid numeric value: {text}");
744             }
745             if (text.StartsWith("0") && text.Length > 1)
746             {
747                 if (text[1] >= '0' && text[1] <= '9')
748                 {
749                     throw new InvalidProtocolBufferException($"Invalid numeric value: {text}");
750                 }
751             }
752             else if (text.StartsWith("-0") && text.Length > 2)
753             {
754                 if (text[2] >= '0' && text[2] <= '9')
755                 {
756                     throw new InvalidProtocolBufferException($"Invalid numeric value: {text}");
757                 }
758             }
759             try
760             {
761                 return parser(text, NumberStyles.AllowLeadingSign | NumberStyles.AllowDecimalPoint | NumberStyles.AllowExponent, CultureInfo.InvariantCulture);
762             }
763             catch (FormatException)
764             {
765                 throw new InvalidProtocolBufferException($"Invalid numeric value for type: {text}");
766             }
767             catch (OverflowException)
768             {
769                 throw new InvalidProtocolBufferException($"Value out of range: {text}");
770             }
771         }
772 
773         /// <summary>
774         /// Checks that any infinite/NaN values originated from the correct text.
775         /// This corrects the lenient whitespace handling of double.Parse/float.Parse, as well as the
776         /// way that Mono parses out-of-range values as infinity.
777         /// </summary>
ValidateInfinityAndNan(string text, bool isPositiveInfinity, bool isNegativeInfinity, bool isNaN)778         private static void ValidateInfinityAndNan(string text, bool isPositiveInfinity, bool isNegativeInfinity, bool isNaN)
779         {
780             if ((isPositiveInfinity && text != "Infinity") ||
781                 (isNegativeInfinity && text != "-Infinity") ||
782                 (isNaN && text != "NaN"))
783             {
784                 throw new InvalidProtocolBufferException($"Invalid numeric value: {text}");
785             }
786         }
787 
MergeTimestamp(IMessage message, JsonToken token)788         private static void MergeTimestamp(IMessage message, JsonToken token)
789         {
790             if (token.Type != JsonToken.TokenType.StringValue)
791             {
792                 throw new InvalidProtocolBufferException("Expected string value for Timestamp");
793             }
794             var match = TimestampRegex.Match(token.StringValue);
795             if (!match.Success)
796             {
797                 throw new InvalidProtocolBufferException($"Invalid Timestamp value: {token.StringValue}");
798             }
799             var dateTime = match.Groups["datetime"].Value;
800             var subseconds = match.Groups["subseconds"].Value;
801             var offset = match.Groups["offset"].Value;
802 
803             try
804             {
805                 DateTime parsed = DateTime.ParseExact(
806                     dateTime,
807                     "yyyy-MM-dd'T'HH:mm:ss",
808                     CultureInfo.InvariantCulture,
809                     DateTimeStyles.AssumeUniversal | DateTimeStyles.AdjustToUniversal);
810                 // TODO: It would be nice not to have to create all these objects... easy to optimize later though.
811                 Timestamp timestamp = Timestamp.FromDateTime(parsed);
812                 int nanosToAdd = 0;
813                 if (subseconds != "")
814                 {
815                     // This should always work, as we've got 1-9 digits.
816                     int parsedFraction = int.Parse(subseconds.Substring(1), CultureInfo.InvariantCulture);
817                     nanosToAdd = parsedFraction * SubsecondScalingFactors[subseconds.Length];
818                 }
819                 int secondsToAdd = 0;
820                 if (offset != "Z")
821                 {
822                     // This is the amount we need to *subtract* from the local time to get to UTC - hence - => +1 and vice versa.
823                     int sign = offset[0] == '-' ? 1 : -1;
824                     int hours = int.Parse(offset.Substring(1, 2), CultureInfo.InvariantCulture);
825                     int minutes = int.Parse(offset.Substring(4, 2));
826                     int totalMinutes = hours * 60 + minutes;
827                     if (totalMinutes > 18 * 60)
828                     {
829                         throw new InvalidProtocolBufferException("Invalid Timestamp value: " + token.StringValue);
830                     }
831                     if (totalMinutes == 0 && sign == 1)
832                     {
833                         // This is an offset of -00:00, which means "unknown local offset". It makes no sense for a timestamp.
834                         throw new InvalidProtocolBufferException("Invalid Timestamp value: " + token.StringValue);
835                     }
836                     // We need to *subtract* the offset from local time to get UTC.
837                     secondsToAdd = sign * totalMinutes * 60;
838                 }
839                 // Ensure we've got the right signs. Currently unnecessary, but easy to do.
840                 if (secondsToAdd < 0 && nanosToAdd > 0)
841                 {
842                     secondsToAdd++;
843                     nanosToAdd = nanosToAdd - Duration.NanosecondsPerSecond;
844                 }
845                 if (secondsToAdd != 0 || nanosToAdd != 0)
846                 {
847                     timestamp += new Duration { Nanos = nanosToAdd, Seconds = secondsToAdd };
848                     // The resulting timestamp after offset change would be out of our expected range. Currently the Timestamp message doesn't validate this
849                     // anywhere, but we shouldn't parse it.
850                     if (timestamp.Seconds < Timestamp.UnixSecondsAtBclMinValue || timestamp.Seconds > Timestamp.UnixSecondsAtBclMaxValue)
851                     {
852                         throw new InvalidProtocolBufferException("Invalid Timestamp value: " + token.StringValue);
853                     }
854                 }
855                 message.Descriptor.Fields[Timestamp.SecondsFieldNumber].Accessor.SetValue(message, timestamp.Seconds);
856                 message.Descriptor.Fields[Timestamp.NanosFieldNumber].Accessor.SetValue(message, timestamp.Nanos);
857             }
858             catch (FormatException)
859             {
860                 throw new InvalidProtocolBufferException("Invalid Timestamp value: " + token.StringValue);
861             }
862         }
863 
MergeDuration(IMessage message, JsonToken token)864         private static void MergeDuration(IMessage message, JsonToken token)
865         {
866             if (token.Type != JsonToken.TokenType.StringValue)
867             {
868                 throw new InvalidProtocolBufferException("Expected string value for Duration");
869             }
870             var match = DurationRegex.Match(token.StringValue);
871             if (!match.Success)
872             {
873                 throw new InvalidProtocolBufferException("Invalid Duration value: " + token.StringValue);
874             }
875             var sign = match.Groups["sign"].Value;
876             var secondsText = match.Groups["int"].Value;
877             // Prohibit leading insignficant zeroes
878             if (secondsText[0] == '0' && secondsText.Length > 1)
879             {
880                 throw new InvalidProtocolBufferException("Invalid Duration value: " + token.StringValue);
881             }
882             var subseconds = match.Groups["subseconds"].Value;
883             var multiplier = sign == "-" ? -1 : 1;
884 
885             try
886             {
887                 long seconds = long.Parse(secondsText, CultureInfo.InvariantCulture) * multiplier;
888                 int nanos = 0;
889                 if (subseconds != "")
890                 {
891                     // This should always work, as we've got 1-9 digits.
892                     int parsedFraction = int.Parse(subseconds.Substring(1));
893                     nanos = parsedFraction * SubsecondScalingFactors[subseconds.Length] * multiplier;
894                 }
895                 if (!Duration.IsNormalized(seconds, nanos))
896                 {
897                     throw new InvalidProtocolBufferException($"Invalid Duration value: {token.StringValue}");
898                 }
899                 message.Descriptor.Fields[Duration.SecondsFieldNumber].Accessor.SetValue(message, seconds);
900                 message.Descriptor.Fields[Duration.NanosFieldNumber].Accessor.SetValue(message, nanos);
901             }
902             catch (FormatException)
903             {
904                 throw new InvalidProtocolBufferException($"Invalid Duration value: {token.StringValue}");
905             }
906         }
907 
MergeFieldMask(IMessage message, JsonToken token)908         private static void MergeFieldMask(IMessage message, JsonToken token)
909         {
910             if (token.Type != JsonToken.TokenType.StringValue)
911             {
912                 throw new InvalidProtocolBufferException("Expected string value for FieldMask");
913             }
914             // TODO: Do we *want* to remove empty entries? Probably okay to treat "" as "no paths", but "foo,,bar"?
915             string[] jsonPaths = token.StringValue.Split(FieldMaskPathSeparators, StringSplitOptions.RemoveEmptyEntries);
916             IList messagePaths = (IList) message.Descriptor.Fields[FieldMask.PathsFieldNumber].Accessor.GetValue(message);
917             foreach (var path in jsonPaths)
918             {
919                 messagePaths.Add(ToSnakeCase(path));
920             }
921         }
922 
923         // Ported from src/google/protobuf/util/internal/utility.cc
ToSnakeCase(string text)924         private static string ToSnakeCase(string text)
925         {
926             var builder = new StringBuilder(text.Length * 2);
927             // Note: this is probably unnecessary now, but currently retained to be as close as possible to the
928             // C++, whilst still throwing an exception on underscores.
929             bool wasNotUnderscore = false;  // Initialize to false for case 1 (below)
930             bool wasNotCap = false;
931 
932             for (int i = 0; i < text.Length; i++)
933             {
934                 char c = text[i];
935                 if (c >= 'A' && c <= 'Z') // ascii_isupper
936                 {
937                     // Consider when the current character B is capitalized:
938                     // 1) At beginning of input:   "B..." => "b..."
939                     //    (e.g. "Biscuit" => "biscuit")
940                     // 2) Following a lowercase:   "...aB..." => "...a_b..."
941                     //    (e.g. "gBike" => "g_bike")
942                     // 3) At the end of input:     "...AB" => "...ab"
943                     //    (e.g. "GoogleLAB" => "google_lab")
944                     // 4) Followed by a lowercase: "...ABc..." => "...a_bc..."
945                     //    (e.g. "GBike" => "g_bike")
946                     if (wasNotUnderscore &&               //            case 1 out
947                         (wasNotCap ||                     // case 2 in, case 3 out
948                          (i + 1 < text.Length &&         //            case 3 out
949                           (text[i + 1] >= 'a' && text[i + 1] <= 'z')))) // ascii_islower(text[i + 1])
950                     {  // case 4 in
951                        // We add an underscore for case 2 and case 4.
952                         builder.Append('_');
953                     }
954                     // ascii_tolower, but we already know that c *is* an upper case ASCII character...
955                     builder.Append((char) (c + 'a' - 'A'));
956                     wasNotUnderscore = true;
957                     wasNotCap = false;
958                 }
959                 else
960                 {
961                     builder.Append(c);
962                     if (c == '_')
963                     {
964                         throw new InvalidProtocolBufferException($"Invalid field mask: {text}");
965                     }
966                     wasNotUnderscore = true;
967                     wasNotCap = true;
968                 }
969             }
970             return builder.ToString();
971         }
972         #endregion
973 
974         /// <summary>
975         /// Settings controlling JSON parsing.
976         /// </summary>
977         public sealed class Settings
978         {
979             /// <summary>
980             /// Default settings, as used by <see cref="JsonParser.Default"/>. This has the same default
981             /// recursion limit as <see cref="CodedInputStream"/>, and an empty type registry.
982             /// </summary>
983             public static Settings Default { get; }
984 
985             // Workaround for the Mono compiler complaining about XML comments not being on
986             // valid language elements.
Settings()987             static Settings()
988             {
989                 Default = new Settings(CodedInputStream.DefaultRecursionLimit);
990             }
991 
992             /// <summary>
993             /// The maximum depth of messages to parse. Note that this limit only applies to parsing
994             /// messages, not collections - so a message within a collection within a message only counts as
995             /// depth 2, not 3.
996             /// </summary>
997             public int RecursionLimit { get; }
998 
999             /// <summary>
1000             /// The type registry used to parse <see cref="Any"/> messages.
1001             /// </summary>
1002             public TypeRegistry TypeRegistry { get; }
1003 
1004             /// <summary>
1005             /// Whether the parser should ignore unknown fields (<c>true</c>) or throw an exception when
1006             /// they are encountered (<c>false</c>).
1007             /// </summary>
1008             public bool IgnoreUnknownFields { get; }
1009 
Settings(int recursionLimit, TypeRegistry typeRegistry, bool ignoreUnknownFields)1010             private Settings(int recursionLimit, TypeRegistry typeRegistry, bool ignoreUnknownFields)
1011             {
1012                 RecursionLimit = recursionLimit;
1013                 TypeRegistry = ProtoPreconditions.CheckNotNull(typeRegistry, nameof(typeRegistry));
1014                 IgnoreUnknownFields = ignoreUnknownFields;
1015             }
1016 
1017             /// <summary>
1018             /// Creates a new <see cref="Settings"/> object with the specified recursion limit.
1019             /// </summary>
1020             /// <param name="recursionLimit">The maximum depth of messages to parse</param>
Settings(int recursionLimit)1021             public Settings(int recursionLimit) : this(recursionLimit, TypeRegistry.Empty)
1022             {
1023             }
1024 
1025             /// <summary>
1026             /// Creates a new <see cref="Settings"/> object with the specified recursion limit and type registry.
1027             /// </summary>
1028             /// <param name="recursionLimit">The maximum depth of messages to parse</param>
1029             /// <param name="typeRegistry">The type registry used to parse <see cref="Any"/> messages</param>
Settings(int recursionLimit, TypeRegistry typeRegistry)1030             public Settings(int recursionLimit, TypeRegistry typeRegistry) : this(recursionLimit, typeRegistry, false)
1031             {
1032             }
1033 
1034             /// <summary>
1035             /// Creates a new <see cref="Settings"/> object set to either ignore unknown fields, or throw an exception
1036             /// when unknown fields are encountered.
1037             /// </summary>
1038             /// <param name="ignoreUnknownFields"><c>true</c> if unknown fields should be ignored when parsing; <c>false</c> to throw an exception.</param>
1039             public Settings WithIgnoreUnknownFields(bool ignoreUnknownFields) =>
1040                 new Settings(RecursionLimit, TypeRegistry, ignoreUnknownFields);
1041 
1042             /// <summary>
1043             /// Creates a new <see cref="Settings"/> object based on this one, but with the specified recursion limit.
1044             /// </summary>
1045             /// <param name="recursionLimit">The new recursion limit.</param>
WithRecursionLimit(int recursionLimit)1046             public Settings WithRecursionLimit(int recursionLimit) =>
1047                 new Settings(recursionLimit, TypeRegistry, IgnoreUnknownFields);
1048 
1049             /// <summary>
1050             /// Creates a new <see cref="Settings"/> object based on this one, but with the specified type registry.
1051             /// </summary>
1052             /// <param name="typeRegistry">The new type registry. Must not be null.</param>
1053             public Settings WithTypeRegistry(TypeRegistry typeRegistry) =>
1054                 new Settings(
1055                     RecursionLimit,
1056                     ProtoPreconditions.CheckNotNull(typeRegistry, nameof(typeRegistry)),
1057                     IgnoreUnknownFields);
1058         }
1059     }
1060 }
1061