1#!/usr/bin/env python 2# 3# Copyright 2010 Google Inc. 4# 5# Licensed under the Apache License, Version 2.0 (the "License"); 6# you may not use this file except in compliance with the License. 7# You may obtain a copy of the License at 8# 9# http://www.apache.org/licenses/LICENSE-2.0 10# 11# Unless required by applicable law or agreed to in writing, software 12# distributed under the License is distributed on an "AS IS" BASIS, 13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14# See the License for the specific language governing permissions and 15# limitations under the License. 16# 17 18"""URL encoding support for messages types. 19 20Protocol support for URL encoded form parameters. 21 22Nested Fields: 23 Nested fields are repesented by dot separated names. For example, consider 24 the following messages: 25 26 class WebPage(Message): 27 28 title = StringField(1) 29 tags = StringField(2, repeated=True) 30 31 class WebSite(Message): 32 33 name = StringField(1) 34 home = MessageField(WebPage, 2) 35 pages = MessageField(WebPage, 3, repeated=True) 36 37 And consider the object: 38 39 page = WebPage() 40 page.title = 'Welcome to NewSite 2010' 41 42 site = WebSite() 43 site.name = 'NewSite 2010' 44 site.home = page 45 46 The URL encoded representation of this constellation of objects is. 47 48 name=NewSite+2010&home.title=Welcome+to+NewSite+2010 49 50 An object that exists but does not have any state can be represented with 51 a reference to its name alone with no value assigned to it. For example: 52 53 page = WebSite() 54 page.name = 'My Empty Site' 55 page.home = WebPage() 56 57 is represented as: 58 59 name=My+Empty+Site&home= 60 61 This represents a site with an empty uninitialized home page. 62 63Repeated Fields: 64 Repeated fields are represented by the name of and the index of each value 65 separated by a dash. For example, consider the following message: 66 67 home = Page() 68 home.title = 'Nome' 69 70 news = Page() 71 news.title = 'News' 72 news.tags = ['news', 'articles'] 73 74 instance = WebSite() 75 instance.name = 'Super fun site' 76 instance.pages = [home, news, preferences] 77 78 An instance of this message can be represented as: 79 80 name=Super+fun+site&page-0.title=Home&pages-1.title=News&... 81 pages-1.tags-0=new&pages-1.tags-1=articles 82 83Helper classes: 84 85 URLEncodedRequestBuilder: Used for encapsulating the logic used for building 86 a request message from a URL encoded RPC. 87""" 88import six 89 90__author__ = 'rafek@google.com (Rafe Kaplan)' 91 92import cgi 93import re 94import urllib 95 96from . import message_types 97from . import messages 98from . import util 99 100__all__ = ['CONTENT_TYPE', 101 'URLEncodedRequestBuilder', 102 'encode_message', 103 'decode_message', 104 ] 105 106CONTENT_TYPE = 'application/x-www-form-urlencoded' 107 108_FIELD_NAME_REGEX = re.compile(r'^([a-zA-Z_][a-zA-Z_0-9]*)(?:-([0-9]+))?$') 109 110 111class URLEncodedRequestBuilder(object): 112 """Helper that encapsulates the logic used for building URL encoded messages. 113 114 This helper is used to map query parameters from a URL encoded RPC to a 115 message instance. 116 """ 117 118 @util.positional(2) 119 def __init__(self, message, prefix=''): 120 """Constructor. 121 122 Args: 123 message: Message instance to build from parameters. 124 prefix: Prefix expected at the start of valid parameters. 125 """ 126 self.__parameter_prefix = prefix 127 128 # The empty tuple indicates the root message, which has no path. 129 # __messages is a full cache that makes it very easy to look up message 130 # instances by their paths. See make_path for details about what a path 131 # is. 132 self.__messages = {(): message} 133 134 # This is a cache that stores paths which have been checked for 135 # correctness. Correctness means that an index is present for repeated 136 # fields on the path and absent for non-repeated fields. The cache is 137 # also used to check that indexes are added in the right order so that 138 # dicontiguous ranges of indexes are ignored. 139 self.__checked_indexes = set([()]) 140 141 def make_path(self, parameter_name): 142 """Parse a parameter name and build a full path to a message value. 143 144 The path of a method is a tuple of 2-tuples describing the names and 145 indexes within repeated fields from the root message (the message being 146 constructed by the builder) to an arbitrarily nested message within it. 147 148 Each 2-tuple node of a path (name, index) is: 149 name: The name of the field that refers to the message instance. 150 index: The index within a repeated field that refers to the message 151 instance, None if not a repeated field. 152 153 For example, consider: 154 155 class VeryInner(messages.Message): 156 ... 157 158 class Inner(messages.Message): 159 160 very_inner = messages.MessageField(VeryInner, 1, repeated=True) 161 162 class Outer(messages.Message): 163 164 inner = messages.MessageField(Inner, 1) 165 166 If this builder is building an instance of Outer, that instance is 167 referred to in the URL encoded parameters without a path. Therefore 168 its path is (). 169 170 The child 'inner' is referred to by its path (('inner', None)). 171 172 The first child of repeated field 'very_inner' on the Inner instance 173 is referred to by (('inner', None), ('very_inner', 0)). 174 175 Examples: 176 # Correct reference to model where nation is a Message, district is 177 # repeated Message and county is any not repeated field type. 178 >>> make_path('nation.district-2.county') 179 (('nation', None), ('district', 2), ('county', None)) 180 181 # Field is not part of model. 182 >>> make_path('nation.made_up_field') 183 None 184 185 # nation field is not repeated and index provided. 186 >>> make_path('nation-1') 187 None 188 189 # district field is repeated and no index provided. 190 >>> make_path('nation.district') 191 None 192 193 Args: 194 parameter_name: Name of query parameter as passed in from the request. 195 in order to make a path, this parameter_name must point to a valid 196 field within the message structure. Nodes of the path that refer to 197 repeated fields must be indexed with a number, non repeated nodes must 198 not have an index. 199 200 Returns: 201 Parsed version of the parameter_name as a tuple of tuples: 202 attribute: Name of attribute associated with path. 203 index: Postitive integer index when it is a repeated field, else None. 204 Will return None if the parameter_name does not have the right prefix, 205 does not point to a field within the message structure, does not have 206 an index if it is a repeated field or has an index but is not a repeated 207 field. 208 """ 209 if parameter_name.startswith(self.__parameter_prefix): 210 parameter_name = parameter_name[len(self.__parameter_prefix):] 211 else: 212 return None 213 214 path = [] 215 name = [] 216 message_type = type(self.__messages[()]) # Get root message. 217 218 for item in parameter_name.split('.'): 219 # This will catch sub_message.real_message_field.not_real_field 220 if not message_type: 221 return None 222 223 item_match = _FIELD_NAME_REGEX.match(item) 224 if not item_match: 225 return None 226 attribute = item_match.group(1) 227 index = item_match.group(2) 228 if index: 229 index = int(index) 230 231 try: 232 field = message_type.field_by_name(attribute) 233 except KeyError: 234 return None 235 236 if field.repeated != (index is not None): 237 return None 238 239 if isinstance(field, messages.MessageField): 240 message_type = field.message_type 241 else: 242 message_type = None 243 244 # Path is valid so far. Append node and continue. 245 path.append((attribute, index)) 246 247 return tuple(path) 248 249 def __check_index(self, parent_path, name, index): 250 """Check correct index use and value relative to a given path. 251 252 Check that for a given path the index is present for repeated fields 253 and that it is in range for the existing list that it will be inserted 254 in to or appended to. 255 256 Args: 257 parent_path: Path to check against name and index. 258 name: Name of field to check for existance. 259 index: Index to check. If field is repeated, should be a number within 260 range of the length of the field, or point to the next item for 261 appending. 262 """ 263 # Don't worry about non-repeated fields. 264 # It's also ok if index is 0 because that means next insert will append. 265 if not index: 266 return True 267 268 parent = self.__messages.get(parent_path, None) 269 value_list = getattr(parent, name, None) 270 # If the list does not exist then the index should be 0. Since it is 271 # not, path is not valid. 272 if not value_list: 273 return False 274 275 # The index must either point to an element of the list or to the tail. 276 return len(value_list) >= index 277 278 def __check_indexes(self, path): 279 """Check that all indexes are valid and in the right order. 280 281 This method must iterate over the path and check that all references 282 to indexes point to an existing message or to the end of the list, meaning 283 the next value should be appended to the repeated field. 284 285 Args: 286 path: Path to check indexes for. Tuple of 2-tuples (name, index). See 287 make_path for more information. 288 289 Returns: 290 True if all the indexes of the path are within range, else False. 291 """ 292 if path in self.__checked_indexes: 293 return True 294 295 # Start with the root message. 296 parent_path = () 297 298 for name, index in path: 299 next_path = parent_path + ((name, index),) 300 # First look in the checked indexes cache. 301 if next_path not in self.__checked_indexes: 302 if not self.__check_index(parent_path, name, index): 303 return False 304 self.__checked_indexes.add(next_path) 305 306 parent_path = next_path 307 308 return True 309 310 def __get_or_create_path(self, path): 311 """Get a message from the messages cache or create it and add it. 312 313 This method will also create any parent messages based on the path. 314 315 When a new instance of a given message is created, it is stored in 316 __message by its path. 317 318 Args: 319 path: Path of message to get. Path must be valid, in other words 320 __check_index(path) returns true. Tuple of 2-tuples (name, index). 321 See make_path for more information. 322 323 Returns: 324 Message instance if the field being pointed to by the path is a 325 message, else will return None for non-message fields. 326 """ 327 message = self.__messages.get(path, None) 328 if message: 329 return message 330 331 parent_path = () 332 parent = self.__messages[()] # Get the root object 333 334 for name, index in path: 335 field = parent.field_by_name(name) 336 next_path = parent_path + ((name, index),) 337 next_message = self.__messages.get(next_path, None) 338 if next_message is None: 339 next_message = field.message_type() 340 self.__messages[next_path] = next_message 341 if not field.repeated: 342 setattr(parent, field.name, next_message) 343 else: 344 list_value = getattr(parent, field.name, None) 345 if list_value is None: 346 setattr(parent, field.name, [next_message]) 347 else: 348 list_value.append(next_message) 349 350 parent_path = next_path 351 parent = next_message 352 353 return parent 354 355 def add_parameter(self, parameter, values): 356 """Add a single parameter. 357 358 Adds a single parameter and its value to the request message. 359 360 Args: 361 parameter: Query string parameter to map to request. 362 values: List of values to assign to request message. 363 364 Returns: 365 True if parameter was valid and added to the message, else False. 366 367 Raises: 368 DecodeError if the parameter refers to a valid field, and the values 369 parameter does not have one and only one value. Non-valid query 370 parameters may have multiple values and should not cause an error. 371 """ 372 path = self.make_path(parameter) 373 374 if not path: 375 return False 376 377 # Must check that all indexes of all items in the path are correct before 378 # instantiating any of them. For example, consider: 379 # 380 # class Repeated(object): 381 # ... 382 # 383 # class Inner(object): 384 # 385 # repeated = messages.MessageField(Repeated, 1, repeated=True) 386 # 387 # class Outer(object): 388 # 389 # inner = messages.MessageField(Inner, 1) 390 # 391 # instance = Outer() 392 # builder = URLEncodedRequestBuilder(instance) 393 # builder.add_parameter('inner.repeated') 394 # 395 # assert not hasattr(instance, 'inner') 396 # 397 # The check is done relative to the instance of Outer pass in to the 398 # constructor of the builder. This instance is not referred to at all 399 # because all names are assumed to be relative to it. 400 # 401 # The 'repeated' part of the path is not correct because it is missing an 402 # index. Because it is missing an index, it should not create an instance 403 # of Repeated. In this case add_parameter will return False and have no 404 # side effects. 405 # 406 # A correct path that would cause a new Inner instance to be inserted at 407 # instance.inner and a new Repeated instance to be appended to the 408 # instance.inner.repeated list would be 'inner.repeated-0'. 409 if not self.__check_indexes(path): 410 return False 411 412 # Ok to build objects. 413 parent_path = path[:-1] 414 parent = self.__get_or_create_path(parent_path) 415 name, index = path[-1] 416 field = parent.field_by_name(name) 417 418 if len(values) != 1: 419 raise messages.DecodeError( 420 'Found repeated values for field %s.' % field.name) 421 422 value = values[0] 423 424 if isinstance(field, messages.IntegerField): 425 converted_value = int(value) 426 elif isinstance(field, message_types.DateTimeField): 427 try: 428 converted_value = util.decode_datetime(value) 429 except ValueError as e: 430 raise messages.DecodeError(e) 431 elif isinstance(field, messages.MessageField): 432 # Just make sure it's instantiated. Assignment to field or 433 # appending to list is done in __get_or_create_path. 434 self.__get_or_create_path(path) 435 return True 436 elif isinstance(field, messages.StringField): 437 converted_value = value.decode('utf-8') 438 elif isinstance(field, messages.BooleanField): 439 converted_value = value.lower() == 'true' and True or False 440 else: 441 try: 442 converted_value = field.type(value) 443 except TypeError: 444 raise messages.DecodeError('Invalid enum value "%s"' % value) 445 446 if field.repeated: 447 value_list = getattr(parent, field.name, None) 448 if value_list is None: 449 setattr(parent, field.name, [converted_value]) 450 else: 451 if index == len(value_list): 452 value_list.append(converted_value) 453 else: 454 # Index should never be above len(value_list) because it was 455 # verified during the index check above. 456 value_list[index] = converted_value 457 else: 458 setattr(parent, field.name, converted_value) 459 460 return True 461 462 463@util.positional(1) 464def encode_message(message, prefix=''): 465 """Encode Message instance to url-encoded string. 466 467 Args: 468 message: Message instance to encode in to url-encoded string. 469 prefix: Prefix to append to field names of contained values. 470 471 Returns: 472 String encoding of Message in URL encoded format. 473 474 Raises: 475 messages.ValidationError if message is not initialized. 476 """ 477 message.check_initialized() 478 479 parameters = [] 480 def build_message(parent, prefix): 481 """Recursively build parameter list for URL response. 482 483 Args: 484 parent: Message to build parameters for. 485 prefix: Prefix to append to field names of contained values. 486 487 Returns: 488 True if some value of parent was added to the parameters list, 489 else False, meaning the object contained no values. 490 """ 491 has_any_values = False 492 for field in sorted(parent.all_fields(), key=lambda f: f.number): 493 next_value = parent.get_assigned_value(field.name) 494 if next_value is None: 495 continue 496 497 # Found a value. Ultimate return value should be True. 498 has_any_values = True 499 500 # Normalize all values in to a list. 501 if not field.repeated: 502 next_value = [next_value] 503 504 for index, item in enumerate(next_value): 505 # Create a name with an index if it is a repeated field. 506 if field.repeated: 507 field_name = '%s%s-%s' % (prefix, field.name, index) 508 else: 509 field_name = prefix + field.name 510 511 if isinstance(field, message_types.DateTimeField): 512 # DateTimeField stores its data as a RFC 3339 compliant string. 513 parameters.append((field_name, item.isoformat())) 514 elif isinstance(field, messages.MessageField): 515 # Message fields must be recursed in to in order to construct 516 # their component parameter values. 517 if not build_message(item, field_name + '.'): 518 # The nested message is empty. Append an empty value to 519 # represent it. 520 parameters.append((field_name, '')) 521 elif isinstance(field, messages.BooleanField): 522 parameters.append((field_name, item and 'true' or 'false')) 523 else: 524 if isinstance(item, six.text_type): 525 item = item.encode('utf-8') 526 parameters.append((field_name, str(item))) 527 528 return has_any_values 529 530 build_message(message, prefix) 531 532 # Also add any unrecognized values from the decoded string. 533 for key in message.all_unrecognized_fields(): 534 values, _ = message.get_unrecognized_field_info(key) 535 if not isinstance(values, (list, tuple)): 536 values = (values,) 537 for value in values: 538 parameters.append((key, value)) 539 540 return urllib.urlencode(parameters) 541 542 543def decode_message(message_type, encoded_message, **kwargs): 544 """Decode urlencoded content to message. 545 546 Args: 547 message_type: Message instance to merge URL encoded content into. 548 encoded_message: URL encoded message. 549 prefix: Prefix to append to field names of contained values. 550 551 Returns: 552 Decoded instance of message_type. 553 """ 554 message = message_type() 555 builder = URLEncodedRequestBuilder(message, **kwargs) 556 arguments = cgi.parse_qs(encoded_message, keep_blank_values=True) 557 for argument, values in sorted(six.iteritems(arguments)): 558 added = builder.add_parameter(argument, values) 559 # Save off any unknown values, so they're still accessible. 560 if not added: 561 message.set_unrecognized_field(argument, values, messages.Variant.STRING) 562 message.check_initialized() 563 return message 564