1#!/usr/bin/env python 2# 3# Copyright 2010 Google Inc. 4# 5# Licensed under the Apache License, Version 2.0 (the "License"); 6# you may not use this file except in compliance with the License. 7# You may obtain a copy of the License at 8# 9# http://www.apache.org/licenses/LICENSE-2.0 10# 11# Unless required by applicable law or agreed to in writing, software 12# distributed under the License is distributed on an "AS IS" BASIS, 13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14# See the License for the specific language governing permissions and 15# limitations under the License. 16# 17 18"""Common utility library.""" 19 20from __future__ import with_statement 21import six 22 23__author__ = ['rafek@google.com (Rafe Kaplan)', 24 'guido@google.com (Guido van Rossum)', 25] 26 27import cgi 28import datetime 29import inspect 30import os 31import re 32import sys 33 34__all__ = ['AcceptItem', 35 'AcceptError', 36 'Error', 37 'choose_content_type', 38 'decode_datetime', 39 'get_package_for_module', 40 'pad_string', 41 'parse_accept_header', 42 'positional', 43 'PROTORPC_PROJECT_URL', 44 'TimeZoneOffset', 45 'total_seconds', 46] 47 48 49class Error(Exception): 50 """Base class for protorpc exceptions.""" 51 52 53class AcceptError(Error): 54 """Raised when there is an error parsing the accept header.""" 55 56 57PROTORPC_PROJECT_URL = 'http://code.google.com/p/google-protorpc' 58 59_TIME_ZONE_RE_STRING = r""" 60 # Examples: 61 # +01:00 62 # -05:30 63 # Z12:00 64 ((?P<z>Z) | (?P<sign>[-+]) 65 (?P<hours>\d\d) : 66 (?P<minutes>\d\d))$ 67""" 68_TIME_ZONE_RE = re.compile(_TIME_ZONE_RE_STRING, re.IGNORECASE | re.VERBOSE) 69 70 71def pad_string(string): 72 """Pad a string for safe HTTP error responses. 73 74 Prevents Internet Explorer from displaying their own error messages 75 when sent as the content of error responses. 76 77 Args: 78 string: A string. 79 80 Returns: 81 Formatted string left justified within a 512 byte field. 82 """ 83 return string.ljust(512) 84 85 86def positional(max_positional_args): 87 """A decorator to declare that only the first N arguments may be positional. 88 89 This decorator makes it easy to support Python 3 style keyword-only 90 parameters. For example, in Python 3 it is possible to write: 91 92 def fn(pos1, *, kwonly1=None, kwonly1=None): 93 ... 94 95 All named parameters after * must be a keyword: 96 97 fn(10, 'kw1', 'kw2') # Raises exception. 98 fn(10, kwonly1='kw1') # Ok. 99 100 Example: 101 To define a function like above, do: 102 103 @positional(1) 104 def fn(pos1, kwonly1=None, kwonly2=None): 105 ... 106 107 If no default value is provided to a keyword argument, it becomes a required 108 keyword argument: 109 110 @positional(0) 111 def fn(required_kw): 112 ... 113 114 This must be called with the keyword parameter: 115 116 fn() # Raises exception. 117 fn(10) # Raises exception. 118 fn(required_kw=10) # Ok. 119 120 When defining instance or class methods always remember to account for 121 'self' and 'cls': 122 123 class MyClass(object): 124 125 @positional(2) 126 def my_method(self, pos1, kwonly1=None): 127 ... 128 129 @classmethod 130 @positional(2) 131 def my_method(cls, pos1, kwonly1=None): 132 ... 133 134 One can omit the argument to 'positional' altogether, and then no 135 arguments with default values may be passed positionally. This 136 would be equivalent to placing a '*' before the first argument 137 with a default value in Python 3. If there are no arguments with 138 default values, and no argument is given to 'positional', an error 139 is raised. 140 141 @positional 142 def fn(arg1, arg2, required_kw1=None, required_kw2=0): 143 ... 144 145 fn(1, 3, 5) # Raises exception. 146 fn(1, 3) # Ok. 147 fn(1, 3, required_kw1=5) # Ok. 148 149 Args: 150 max_positional_arguments: Maximum number of positional arguments. All 151 parameters after the this index must be keyword only. 152 153 Returns: 154 A decorator that prevents using arguments after max_positional_args from 155 being used as positional parameters. 156 157 Raises: 158 TypeError if a keyword-only argument is provided as a positional parameter. 159 ValueError if no maximum number of arguments is provided and the function 160 has no arguments with default values. 161 """ 162 def positional_decorator(wrapped): 163 def positional_wrapper(*args, **kwargs): 164 if len(args) > max_positional_args: 165 plural_s = '' 166 if max_positional_args != 1: 167 plural_s = 's' 168 raise TypeError('%s() takes at most %d positional argument%s ' 169 '(%d given)' % (wrapped.__name__, 170 max_positional_args, 171 plural_s, len(args))) 172 return wrapped(*args, **kwargs) 173 return positional_wrapper 174 175 if isinstance(max_positional_args, six.integer_types): 176 return positional_decorator 177 else: 178 args, _, _, defaults = inspect.getargspec(max_positional_args) 179 if defaults is None: 180 raise ValueError( 181 'Functions with no keyword arguments must specify ' 182 'max_positional_args') 183 return positional(len(args) - len(defaults))(max_positional_args) 184 185 186# TODO(rafek): Support 'level' from the Accept header standard. 187class AcceptItem(object): 188 """Encapsulate a single entry of an Accept header. 189 190 Parses and extracts relevent values from an Accept header and implements 191 a sort order based on the priority of each requested type as defined 192 here: 193 194 http://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html 195 196 Accept headers are normally a list of comma separated items. Each item 197 has the format of a normal HTTP header. For example: 198 199 Accept: text/plain, text/html, text/*, */* 200 201 This header means to prefer plain text over HTML, HTML over any other 202 kind of text and text over any other kind of supported format. 203 204 This class does not attempt to parse the list of items from the Accept header. 205 The constructor expects the unparsed sub header and the index within the 206 Accept header that the fragment was found. 207 208 Properties: 209 index: The index that this accept item was found in the Accept header. 210 main_type: The main type of the content type. 211 sub_type: The sub type of the content type. 212 q: The q value extracted from the header as a float. If there is no q 213 value, defaults to 1.0. 214 values: All header attributes parsed form the sub-header. 215 sort_key: A tuple (no_main_type, no_sub_type, q, no_values, index): 216 no_main_type: */* has the least priority. 217 no_sub_type: Items with no sub-type have less priority. 218 q: Items with lower q value have less priority. 219 no_values: Items with no values have less priority. 220 index: Index of item in accept header is the last priority. 221 """ 222 223 __CONTENT_TYPE_REGEX = re.compile(r'^([^/]+)/([^/]+)$') 224 225 def __init__(self, accept_header, index): 226 """Parse component of an Accept header. 227 228 Args: 229 accept_header: Unparsed sub-expression of accept header. 230 index: The index that this accept item was found in the Accept header. 231 """ 232 accept_header = accept_header.lower() 233 content_type, values = cgi.parse_header(accept_header) 234 match = self.__CONTENT_TYPE_REGEX.match(content_type) 235 if not match: 236 raise AcceptError('Not valid Accept header: %s' % accept_header) 237 self.__index = index 238 self.__main_type = match.group(1) 239 self.__sub_type = match.group(2) 240 self.__q = float(values.get('q', 1)) 241 self.__values = values 242 243 if self.__main_type == '*': 244 self.__main_type = None 245 246 if self.__sub_type == '*': 247 self.__sub_type = None 248 249 self.__sort_key = (not self.__main_type, 250 not self.__sub_type, 251 -self.__q, 252 not self.__values, 253 self.__index) 254 255 @property 256 def index(self): 257 return self.__index 258 259 @property 260 def main_type(self): 261 return self.__main_type 262 263 @property 264 def sub_type(self): 265 return self.__sub_type 266 267 @property 268 def q(self): 269 return self.__q 270 271 @property 272 def values(self): 273 """Copy the dictionary of values parsed from the header fragment.""" 274 return dict(self.__values) 275 276 @property 277 def sort_key(self): 278 return self.__sort_key 279 280 def match(self, content_type): 281 """Determine if the given accept header matches content type. 282 283 Args: 284 content_type: Unparsed content type string. 285 286 Returns: 287 True if accept header matches content type, else False. 288 """ 289 content_type, _ = cgi.parse_header(content_type) 290 match = self.__CONTENT_TYPE_REGEX.match(content_type.lower()) 291 if not match: 292 return False 293 294 main_type, sub_type = match.group(1), match.group(2) 295 if not(main_type and sub_type): 296 return False 297 298 return ((self.__main_type is None or self.__main_type == main_type) and 299 (self.__sub_type is None or self.__sub_type == sub_type)) 300 301 302 def __cmp__(self, other): 303 """Comparison operator based on sort keys.""" 304 if not isinstance(other, AcceptItem): 305 return NotImplemented 306 return cmp(self.sort_key, other.sort_key) 307 308 def __str__(self): 309 """Rebuilds Accept header.""" 310 content_type = '%s/%s' % (self.__main_type or '*', self.__sub_type or '*') 311 values = self.values 312 313 if values: 314 value_strings = ['%s=%s' % (i, v) for i, v in values.items()] 315 return '%s; %s' % (content_type, '; '.join(value_strings)) 316 else: 317 return content_type 318 319 def __repr__(self): 320 return 'AcceptItem(%r, %d)' % (str(self), self.__index) 321 322 323def parse_accept_header(accept_header): 324 """Parse accept header. 325 326 Args: 327 accept_header: Unparsed accept header. Does not include name of header. 328 329 Returns: 330 List of AcceptItem instances sorted according to their priority. 331 """ 332 accept_items = [] 333 for index, header in enumerate(accept_header.split(',')): 334 accept_items.append(AcceptItem(header, index)) 335 return sorted(accept_items) 336 337 338def choose_content_type(accept_header, supported_types): 339 """Choose most appropriate supported type based on what client accepts. 340 341 Args: 342 accept_header: Unparsed accept header. Does not include name of header. 343 supported_types: List of content-types supported by the server. The index 344 of the supported types determines which supported type is prefered by 345 the server should the accept header match more than one at the same 346 priority. 347 348 Returns: 349 The preferred supported type if the accept header matches any, else None. 350 """ 351 for accept_item in parse_accept_header(accept_header): 352 for supported_type in supported_types: 353 if accept_item.match(supported_type): 354 return supported_type 355 return None 356 357 358@positional(1) 359def get_package_for_module(module): 360 """Get package name for a module. 361 362 Helper calculates the package name of a module. 363 364 Args: 365 module: Module to get name for. If module is a string, try to find 366 module in sys.modules. 367 368 Returns: 369 If module contains 'package' attribute, uses that as package name. 370 Else, if module is not the '__main__' module, the module __name__. 371 Else, the base name of the module file name. Else None. 372 """ 373 if isinstance(module, six.string_types): 374 try: 375 module = sys.modules[module] 376 except KeyError: 377 return None 378 379 try: 380 return six.text_type(module.package) 381 except AttributeError: 382 if module.__name__ == '__main__': 383 try: 384 file_name = module.__file__ 385 except AttributeError: 386 pass 387 else: 388 base_name = os.path.basename(file_name) 389 split_name = os.path.splitext(base_name) 390 if len(split_name) == 1: 391 return six.text_type(base_name) 392 else: 393 return u'.'.join(split_name[:-1]) 394 395 return six.text_type(module.__name__) 396 397 398def total_seconds(offset): 399 """Backport of offset.total_seconds() from python 2.7+.""" 400 seconds = offset.days * 24 * 60 * 60 + offset.seconds 401 microseconds = seconds * 10**6 + offset.microseconds 402 return microseconds / (10**6 * 1.0) 403 404 405class TimeZoneOffset(datetime.tzinfo): 406 """Time zone information as encoded/decoded for DateTimeFields.""" 407 408 def __init__(self, offset): 409 """Initialize a time zone offset. 410 411 Args: 412 offset: Integer or timedelta time zone offset, in minutes from UTC. This 413 can be negative. 414 """ 415 super(TimeZoneOffset, self).__init__() 416 if isinstance(offset, datetime.timedelta): 417 offset = total_seconds(offset) / 60 418 self.__offset = offset 419 420 def utcoffset(self, dt): 421 """Get the a timedelta with the time zone's offset from UTC. 422 423 Returns: 424 The time zone offset from UTC, as a timedelta. 425 """ 426 return datetime.timedelta(minutes=self.__offset) 427 428 def dst(self, dt): 429 """Get the daylight savings time offset. 430 431 The formats that ProtoRPC uses to encode/decode time zone information don't 432 contain any information about daylight savings time. So this always 433 returns a timedelta of 0. 434 435 Returns: 436 A timedelta of 0. 437 """ 438 return datetime.timedelta(0) 439 440 441def decode_datetime(encoded_datetime): 442 """Decode a DateTimeField parameter from a string to a python datetime. 443 444 Args: 445 encoded_datetime: A string in RFC 3339 format. 446 447 Returns: 448 A datetime object with the date and time specified in encoded_datetime. 449 450 Raises: 451 ValueError: If the string is not in a recognized format. 452 """ 453 # Check if the string includes a time zone offset. Break out the 454 # part that doesn't include time zone info. Convert to uppercase 455 # because all our comparisons should be case-insensitive. 456 time_zone_match = _TIME_ZONE_RE.search(encoded_datetime) 457 if time_zone_match: 458 time_string = encoded_datetime[:time_zone_match.start(1)].upper() 459 else: 460 time_string = encoded_datetime.upper() 461 462 if '.' in time_string: 463 format_string = '%Y-%m-%dT%H:%M:%S.%f' 464 else: 465 format_string = '%Y-%m-%dT%H:%M:%S' 466 467 decoded_datetime = datetime.datetime.strptime(time_string, format_string) 468 469 if not time_zone_match: 470 return decoded_datetime 471 472 # Time zone info was included in the parameter. Add a tzinfo 473 # object to the datetime. Datetimes can't be changed after they're 474 # created, so we'll need to create a new one. 475 if time_zone_match.group('z'): 476 offset_minutes = 0 477 else: 478 sign = time_zone_match.group('sign') 479 hours, minutes = [int(value) for value in 480 time_zone_match.group('hours', 'minutes')] 481 offset_minutes = hours * 60 + minutes 482 if sign == '-': 483 offset_minutes *= -1 484 485 return datetime.datetime(decoded_datetime.year, 486 decoded_datetime.month, 487 decoded_datetime.day, 488 decoded_datetime.hour, 489 decoded_datetime.minute, 490 decoded_datetime.second, 491 decoded_datetime.microsecond, 492 TimeZoneOffset(offset_minutes)) 493