• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1#!/usr/bin/env python
2#
3# Copyright 2010 Google Inc.
4#
5# Licensed under the Apache License, Version 2.0 (the "License");
6# you may not use this file except in compliance with the License.
7# You may obtain a copy of the License at
8#
9#     http://www.apache.org/licenses/LICENSE-2.0
10#
11# Unless required by applicable law or agreed to in writing, software
12# distributed under the License is distributed on an "AS IS" BASIS,
13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14# See the License for the specific language governing permissions and
15# limitations under the License.
16#
17
18"""URL encoding support for messages types.
19
20Protocol support for URL encoded form parameters.
21
22Nested Fields:
23  Nested fields are repesented by dot separated names.  For example, consider
24  the following messages:
25
26    class WebPage(Message):
27
28      title = StringField(1)
29      tags = StringField(2, repeated=True)
30
31    class WebSite(Message):
32
33      name = StringField(1)
34      home = MessageField(WebPage, 2)
35      pages = MessageField(WebPage, 3, repeated=True)
36
37  And consider the object:
38
39    page = WebPage()
40    page.title = 'Welcome to NewSite 2010'
41
42    site = WebSite()
43    site.name = 'NewSite 2010'
44    site.home = page
45
46  The URL encoded representation of this constellation of objects is.
47
48    name=NewSite+2010&home.title=Welcome+to+NewSite+2010
49
50  An object that exists but does not have any state can be represented with
51  a reference to its name alone with no value assigned to it.  For example:
52
53    page = WebSite()
54    page.name = 'My Empty Site'
55    page.home = WebPage()
56
57  is represented as:
58
59    name=My+Empty+Site&home=
60
61  This represents a site with an empty uninitialized home page.
62
63Repeated Fields:
64  Repeated fields are represented by the name of and the index of each value
65  separated by a dash.  For example, consider the following message:
66
67    home = Page()
68    home.title = 'Nome'
69
70    news = Page()
71    news.title = 'News'
72    news.tags = ['news', 'articles']
73
74    instance = WebSite()
75    instance.name = 'Super fun site'
76    instance.pages = [home, news, preferences]
77
78  An instance of this message can be represented as:
79
80    name=Super+fun+site&page-0.title=Home&pages-1.title=News&...
81    pages-1.tags-0=new&pages-1.tags-1=articles
82
83Helper classes:
84
85  URLEncodedRequestBuilder: Used for encapsulating the logic used for building
86    a request message from a URL encoded RPC.
87"""
88import six
89
90__author__ = 'rafek@google.com (Rafe Kaplan)'
91
92import cgi
93import re
94import urllib
95
96from . import message_types
97from . import messages
98from . import util
99
100__all__ = ['CONTENT_TYPE',
101           'URLEncodedRequestBuilder',
102           'encode_message',
103           'decode_message',
104           ]
105
106CONTENT_TYPE = 'application/x-www-form-urlencoded'
107
108_FIELD_NAME_REGEX = re.compile(r'^([a-zA-Z_][a-zA-Z_0-9]*)(?:-([0-9]+))?$')
109
110
111class URLEncodedRequestBuilder(object):
112  """Helper that encapsulates the logic used for building URL encoded messages.
113
114  This helper is used to map query parameters from a URL encoded RPC to a
115  message instance.
116  """
117
118  @util.positional(2)
119  def __init__(self, message, prefix=''):
120    """Constructor.
121
122    Args:
123      message: Message instance to build from parameters.
124      prefix: Prefix expected at the start of valid parameters.
125    """
126    self.__parameter_prefix = prefix
127
128    # The empty tuple indicates the root message, which has no path.
129    # __messages is a full cache that makes it very easy to look up message
130    # instances by their paths.  See make_path for details about what a path
131    # is.
132    self.__messages = {(): message}
133
134    # This is a cache that stores paths which have been checked for
135    # correctness.  Correctness means that an index is present for repeated
136    # fields on the path and absent for non-repeated fields.  The cache is
137    # also used to check that indexes are added in the right order so that
138    # dicontiguous ranges of indexes are ignored.
139    self.__checked_indexes = set([()])
140
141  def make_path(self, parameter_name):
142    """Parse a parameter name and build a full path to a message value.
143
144    The path of a method is a tuple of 2-tuples describing the names and
145    indexes within repeated fields from the root message (the message being
146    constructed by the builder) to an arbitrarily nested message within it.
147
148    Each 2-tuple node of a path (name, index) is:
149      name: The name of the field that refers to the message instance.
150      index: The index within a repeated field that refers to the message
151        instance, None if not a repeated field.
152
153    For example, consider:
154
155      class VeryInner(messages.Message):
156        ...
157
158      class Inner(messages.Message):
159
160        very_inner = messages.MessageField(VeryInner, 1, repeated=True)
161
162      class Outer(messages.Message):
163
164        inner = messages.MessageField(Inner, 1)
165
166    If this builder is building an instance of Outer, that instance is
167    referred to in the URL encoded parameters without a path.  Therefore
168    its path is ().
169
170    The child 'inner' is referred to by its path (('inner', None)).
171
172    The first child of repeated field 'very_inner' on the Inner instance
173    is referred to by (('inner', None), ('very_inner', 0)).
174
175    Examples:
176      # Correct reference to model where nation is a Message, district is
177      # repeated Message and county is any not repeated field type.
178      >>> make_path('nation.district-2.county')
179      (('nation', None), ('district', 2), ('county', None))
180
181      # Field is not part of model.
182      >>> make_path('nation.made_up_field')
183      None
184
185      # nation field is not repeated and index provided.
186      >>> make_path('nation-1')
187      None
188
189      # district field is repeated and no index provided.
190      >>> make_path('nation.district')
191      None
192
193    Args:
194      parameter_name: Name of query parameter as passed in from the request.
195        in order to make a path, this parameter_name must point to a valid
196        field within the message structure.  Nodes of the path that refer to
197        repeated fields must be indexed with a number, non repeated nodes must
198        not have an index.
199
200    Returns:
201      Parsed version of the parameter_name as a tuple of tuples:
202        attribute: Name of attribute associated with path.
203        index: Postitive integer index when it is a repeated field, else None.
204      Will return None if the parameter_name does not have the right prefix,
205      does not point to a field within the message structure, does not have
206      an index if it is a repeated field or has an index but is not a repeated
207      field.
208    """
209    if parameter_name.startswith(self.__parameter_prefix):
210      parameter_name = parameter_name[len(self.__parameter_prefix):]
211    else:
212      return None
213
214    path = []
215    name = []
216    message_type = type(self.__messages[()])  # Get root message.
217
218    for item in parameter_name.split('.'):
219      # This will catch sub_message.real_message_field.not_real_field
220      if not message_type:
221        return None
222
223      item_match = _FIELD_NAME_REGEX.match(item)
224      if not item_match:
225        return None
226      attribute = item_match.group(1)
227      index = item_match.group(2)
228      if index:
229        index = int(index)
230
231      try:
232        field = message_type.field_by_name(attribute)
233      except KeyError:
234        return None
235
236      if field.repeated != (index is not None):
237        return None
238
239      if isinstance(field, messages.MessageField):
240        message_type = field.message_type
241      else:
242        message_type = None
243
244      # Path is valid so far.  Append node and continue.
245      path.append((attribute, index))
246
247    return tuple(path)
248
249  def __check_index(self, parent_path, name, index):
250    """Check correct index use and value relative to a given path.
251
252    Check that for a given path the index is present for repeated fields
253    and that it is in range for the existing list that it will be inserted
254    in to or appended to.
255
256    Args:
257      parent_path: Path to check against name and index.
258      name: Name of field to check for existance.
259      index: Index to check.  If field is repeated, should be a number within
260        range of the length of the field, or point to the next item for
261        appending.
262    """
263    # Don't worry about non-repeated fields.
264    # It's also ok if index is 0 because that means next insert will append.
265    if not index:
266      return True
267
268    parent = self.__messages.get(parent_path, None)
269    value_list = getattr(parent, name, None)
270    # If the list does not exist then the index should be 0.  Since it is
271    # not, path is not valid.
272    if not value_list:
273      return False
274
275    # The index must either point to an element of the list or to the tail.
276    return len(value_list) >= index
277
278  def __check_indexes(self, path):
279    """Check that all indexes are valid and in the right order.
280
281    This method must iterate over the path and check that all references
282    to indexes point to an existing message or to the end of the list, meaning
283    the next value should be appended to the repeated field.
284
285    Args:
286      path: Path to check indexes for.  Tuple of 2-tuples (name, index).  See
287        make_path for more information.
288
289    Returns:
290      True if all the indexes of the path are within range, else False.
291    """
292    if path in self.__checked_indexes:
293      return True
294
295    # Start with the root message.
296    parent_path = ()
297
298    for name, index in path:
299      next_path = parent_path + ((name, index),)
300      # First look in the checked indexes cache.
301      if next_path not in self.__checked_indexes:
302        if not self.__check_index(parent_path, name, index):
303          return False
304        self.__checked_indexes.add(next_path)
305
306      parent_path = next_path
307
308    return True
309
310  def __get_or_create_path(self, path):
311    """Get a message from the messages cache or create it and add it.
312
313    This method will also create any parent messages based on the path.
314
315    When a new instance of a given message is created, it is stored in
316    __message by its path.
317
318    Args:
319      path: Path of message to get.  Path must be valid, in other words
320        __check_index(path) returns true.  Tuple of 2-tuples (name, index).
321        See make_path for more information.
322
323    Returns:
324      Message instance if the field being pointed to by the path is a
325      message, else will return None for non-message fields.
326    """
327    message = self.__messages.get(path, None)
328    if message:
329      return message
330
331    parent_path = ()
332    parent = self.__messages[()]  # Get the root object
333
334    for name, index in path:
335      field = parent.field_by_name(name)
336      next_path = parent_path + ((name, index),)
337      next_message = self.__messages.get(next_path, None)
338      if next_message is None:
339        next_message = field.message_type()
340        self.__messages[next_path] = next_message
341        if not field.repeated:
342          setattr(parent, field.name, next_message)
343        else:
344          list_value = getattr(parent, field.name, None)
345          if list_value is None:
346            setattr(parent, field.name, [next_message])
347          else:
348            list_value.append(next_message)
349
350      parent_path = next_path
351      parent = next_message
352
353    return parent
354
355  def add_parameter(self, parameter, values):
356    """Add a single parameter.
357
358    Adds a single parameter and its value to the request message.
359
360    Args:
361      parameter: Query string parameter to map to request.
362      values: List of values to assign to request message.
363
364    Returns:
365      True if parameter was valid and added to the message, else False.
366
367    Raises:
368      DecodeError if the parameter refers to a valid field, and the values
369      parameter does not have one and only one value.  Non-valid query
370      parameters may have multiple values and should not cause an error.
371    """
372    path = self.make_path(parameter)
373
374    if not path:
375      return False
376
377    # Must check that all indexes of all items in the path are correct before
378    # instantiating any of them.  For example, consider:
379    #
380    #   class Repeated(object):
381    #     ...
382    #
383    #   class Inner(object):
384    #
385    #     repeated = messages.MessageField(Repeated, 1, repeated=True)
386    #
387    #   class Outer(object):
388    #
389    #     inner = messages.MessageField(Inner, 1)
390    #
391    #   instance = Outer()
392    #   builder = URLEncodedRequestBuilder(instance)
393    #   builder.add_parameter('inner.repeated')
394    #
395    #   assert not hasattr(instance, 'inner')
396    #
397    # The check is done relative to the instance of Outer pass in to the
398    # constructor of the builder.  This instance is not referred to at all
399    # because all names are assumed to be relative to it.
400    #
401    # The 'repeated' part of the path is not correct because it is missing an
402    # index.  Because it is missing an index, it should not create an instance
403    # of Repeated.  In this case add_parameter will return False and have no
404    # side effects.
405    #
406    # A correct path that would cause a new Inner instance to be inserted at
407    # instance.inner and a new Repeated instance to be appended to the
408    # instance.inner.repeated list would be 'inner.repeated-0'.
409    if not self.__check_indexes(path):
410      return False
411
412    # Ok to build objects.
413    parent_path = path[:-1]
414    parent = self.__get_or_create_path(parent_path)
415    name, index = path[-1]
416    field = parent.field_by_name(name)
417
418    if len(values) != 1:
419      raise messages.DecodeError(
420          'Found repeated values for field %s.' % field.name)
421
422    value = values[0]
423
424    if isinstance(field, messages.IntegerField):
425      converted_value = int(value)
426    elif isinstance(field, message_types.DateTimeField):
427      try:
428        converted_value = util.decode_datetime(value)
429      except ValueError as e:
430        raise messages.DecodeError(e)
431    elif isinstance(field, messages.MessageField):
432      # Just make sure it's instantiated.  Assignment to field or
433      # appending to list is done in __get_or_create_path.
434      self.__get_or_create_path(path)
435      return True
436    elif isinstance(field, messages.StringField):
437      converted_value = value.decode('utf-8')
438    elif isinstance(field, messages.BooleanField):
439      converted_value = value.lower() == 'true' and True or False
440    else:
441      try:
442        converted_value = field.type(value)
443      except TypeError:
444        raise messages.DecodeError('Invalid enum value "%s"' % value)
445
446    if field.repeated:
447      value_list = getattr(parent, field.name, None)
448      if value_list is None:
449        setattr(parent, field.name, [converted_value])
450      else:
451        if index == len(value_list):
452          value_list.append(converted_value)
453        else:
454          # Index should never be above len(value_list) because it was
455          # verified during the index check above.
456          value_list[index] = converted_value
457    else:
458      setattr(parent, field.name, converted_value)
459
460    return True
461
462
463@util.positional(1)
464def encode_message(message, prefix=''):
465  """Encode Message instance to url-encoded string.
466
467  Args:
468    message: Message instance to encode in to url-encoded string.
469    prefix: Prefix to append to field names of contained values.
470
471  Returns:
472    String encoding of Message in URL encoded format.
473
474  Raises:
475    messages.ValidationError if message is not initialized.
476  """
477  message.check_initialized()
478
479  parameters = []
480  def build_message(parent, prefix):
481    """Recursively build parameter list for URL response.
482
483    Args:
484      parent: Message to build parameters for.
485      prefix: Prefix to append to field names of contained values.
486
487    Returns:
488      True if some value of parent was added to the parameters list,
489      else False, meaning the object contained no values.
490    """
491    has_any_values = False
492    for field in sorted(parent.all_fields(), key=lambda f: f.number):
493      next_value = parent.get_assigned_value(field.name)
494      if next_value is None:
495        continue
496
497      # Found a value.  Ultimate return value should be True.
498      has_any_values = True
499
500      # Normalize all values in to a list.
501      if not field.repeated:
502        next_value = [next_value]
503
504      for index, item in enumerate(next_value):
505        # Create a name with an index if it is a repeated field.
506        if field.repeated:
507          field_name = '%s%s-%s' % (prefix, field.name, index)
508        else:
509          field_name = prefix + field.name
510
511        if isinstance(field, message_types.DateTimeField):
512          # DateTimeField stores its data as a RFC 3339 compliant string.
513          parameters.append((field_name, item.isoformat()))
514        elif isinstance(field, messages.MessageField):
515          # Message fields must be recursed in to in order to construct
516          # their component parameter values.
517          if not build_message(item, field_name + '.'):
518            # The nested message is empty.  Append an empty value to
519            # represent it.
520            parameters.append((field_name, ''))
521        elif isinstance(field, messages.BooleanField):
522          parameters.append((field_name, item and 'true' or 'false'))
523        else:
524          if isinstance(item, six.text_type):
525            item = item.encode('utf-8')
526          parameters.append((field_name, str(item)))
527
528    return has_any_values
529
530  build_message(message, prefix)
531
532  # Also add any unrecognized values from the decoded string.
533  for key in message.all_unrecognized_fields():
534    values, _ = message.get_unrecognized_field_info(key)
535    if not isinstance(values, (list, tuple)):
536      values = (values,)
537    for value in values:
538      parameters.append((key, value))
539
540  return urllib.urlencode(parameters)
541
542
543def decode_message(message_type, encoded_message, **kwargs):
544  """Decode urlencoded content to message.
545
546  Args:
547    message_type: Message instance to merge URL encoded content into.
548    encoded_message: URL encoded message.
549    prefix: Prefix to append to field names of contained values.
550
551  Returns:
552    Decoded instance of message_type.
553  """
554  message = message_type()
555  builder = URLEncodedRequestBuilder(message, **kwargs)
556  arguments = cgi.parse_qs(encoded_message, keep_blank_values=True)
557  for argument, values in sorted(six.iteritems(arguments)):
558    added = builder.add_parameter(argument, values)
559    # Save off any unknown values, so they're still accessible.
560    if not added:
561      message.set_unrecognized_field(argument, values, messages.Variant.STRING)
562  message.check_initialized()
563  return message
564