1# Copyright 2020 The Pigweed Authors 2# 3# Licensed under the Apache License, Version 2.0 (the "License"); you may not 4# use this file except in compliance with the License. You may obtain a copy of 5# the License at 6# 7# https://www.apache.org/licenses/LICENSE-2.0 8# 9# Unless required by applicable law or agreed to in writing, software 10# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 11# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 12# License for the specific language governing permissions and limitations under 13# the License. 14"""Provides functionality for encoding tokenized messages.""" 15 16import argparse 17import base64 18import struct 19import sys 20from typing import Sequence, Union 21 22from pw_tokenizer import tokens 23 24_INT32_MAX = 2**31 - 1 25_UINT32_MAX = 2**32 - 1 26BASE64_PREFIX = '$' 27 28 29def _zig_zag_encode(value: int) -> int: 30 """Encodes signed integers to give a compact varint encoding.""" 31 return value << 1 if value >= 0 else (value << 1) ^ (~0) 32 33 34def _little_endian_base128_encode(integer: int) -> bytearray: 35 data = bytearray() 36 37 while True: 38 # Grab 7 bits; the eighth bit is set to 1 to indicate more data coming. 39 data.append((integer & 0x7F) | 0x80) 40 integer >>= 7 41 42 if not integer: 43 break 44 45 data[-1] &= 0x7F # clear the top bit of the last byte 46 return data 47 48 49def _encode_int32(arg: int) -> bytearray: 50 # Convert large unsigned numbers into their corresponding signed values. 51 if arg > _INT32_MAX: 52 arg -= 2**32 53 54 return _little_endian_base128_encode(_zig_zag_encode(arg)) 55 56 57def _encode_string(arg: bytes) -> bytes: 58 size_byte = len(arg) if len(arg) < 128 else 0xFF 59 return struct.pack('B', size_byte) + arg[:127] 60 61 62def encode_args(*args: Union[int, float, bytes, str]) -> bytes: 63 """Encodes a list of arguments to their on-wire representation.""" 64 65 data = bytearray(b'') 66 for arg in args: 67 if isinstance(arg, int): 68 if arg.bit_length() > 32: 69 raise ValueError( 70 f'Cannot encode {arg}: only 32-bit integers may be encoded' 71 ) 72 data += _encode_int32(arg) 73 elif isinstance(arg, float): 74 data += struct.pack('<f', arg) 75 elif isinstance(arg, str): 76 data += _encode_string(arg.encode()) 77 elif isinstance(arg, bytes): 78 data += _encode_string(arg) 79 else: 80 raise ValueError( 81 f'{arg} has type {type(arg)}, which is not supported' 82 ) 83 return bytes(data) 84 85 86def encode_token_and_args( 87 token: int, *args: Union[int, float, bytes, str] 88) -> bytes: 89 """Encodes a tokenized message given its token and arguments. 90 91 This function assumes that the token represents a format string with 92 conversion specifiers that correspond with the provided argument types. 93 Currently, only 32-bit integers are supported. 94 """ 95 96 if token < 0 or token > _UINT32_MAX: 97 raise ValueError( 98 f'The token ({token}) must be an unsigned 32-bit integer' 99 ) 100 101 return struct.pack('<I', token) + encode_args(*args) 102 103 104def prefixed_base64(data: bytes, prefix: str = '$') -> str: 105 """Encodes a tokenized message as prefixed Base64.""" 106 return prefix + base64.b64encode(data).decode() 107 108 109def _parse_user_input(string: str): 110 """Evaluates a string as Python code or returns it as a literal string.""" 111 try: 112 value = eval(string, dict(__builtins__={})) # pylint: disable=eval-used 113 except (NameError, SyntaxError): 114 return string 115 116 return value if isinstance(value, (int, float)) else string 117 118 119def _main(format_string_list: Sequence[str], raw_args: Sequence[str]) -> int: 120 (format_string,) = format_string_list 121 token = tokens.pw_tokenizer_65599_hash(format_string) 122 args = tuple(_parse_user_input(a) for a in raw_args) 123 124 data = encode_token_and_args(token, *args) 125 token = int.from_bytes(data[:4], 'little') 126 binary = ' '.join(f'{b:02x}' for b in data) 127 128 print(f' Raw input: {format_string!r} % {args!r}') 129 print(f'Formatted input: {format_string % args}') 130 print(f' Token: 0x{token:08x}') 131 print(f' Encoded: {data!r} ({binary}) [{len(data)} bytes]') 132 print(f'Prefixed Base64: {prefixed_base64(data)}') 133 134 return 0 135 136 137def _parse_args() -> dict: 138 parser = argparse.ArgumentParser( 139 description=__doc__, 140 formatter_class=argparse.RawDescriptionHelpFormatter, 141 ) 142 parser.add_argument( 143 'format_string_list', 144 metavar='FORMAT_STRING', 145 nargs=1, 146 help='Format string with optional %%-style arguments.', 147 ) 148 parser.add_argument( 149 'raw_args', 150 metavar='ARG', 151 nargs='*', 152 help=( 153 'Arguments for the format string, if any. Arguments are parsed ' 154 'as Python expressions, with no builtins (e.g. 9 is the number ' 155 '9 and \'"9"\' is the string "9"). Arguments that are not valid ' 156 'Python are treated as string literals.' 157 ), 158 ) 159 return vars(parser.parse_args()) 160 161 162if __name__ == '__main__': 163 sys.exit(_main(**_parse_args())) 164