1#!/usr/bin/env python3 2# Copyright 2020 The Pigweed Authors 3# 4# Licensed under the Apache License, Version 2.0 (the "License"); you may not 5# use this file except in compliance with the License. You may obtain a copy of 6# the License at 7# 8# https://www.apache.org/licenses/LICENSE-2.0 9# 10# Unless required by applicable law or agreed to in writing, software 11# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 12# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 13# License for the specific language governing permissions and limitations under 14# the License. 15"""Tests for detokenize.""" 16 17import base64 18import datetime as dt 19import io 20import os 21from pathlib import Path 22import struct 23import tempfile 24import unittest 25from unittest import mock 26 27from pw_tokenizer import database 28from pw_tokenizer import detokenize 29from pw_tokenizer import elf_reader 30from pw_tokenizer import tokens 31 32 33# This function is not part of this test. It was used to generate the binary 34# strings for EMPTY_ELF and ELF_WITH_TOKENIZER_SECTIONS. It takes a path and 35# returns a Python byte string suitable for copying into Python source code. 36def path_to_byte_string(path): 37 with open(path, 'rb') as fd: 38 data = fd.read() 39 40 output = [] 41 indices = iter(range(len(data))) 42 43 while True: 44 line = '' 45 46 while len(line) < 70: 47 try: 48 i = next(indices) 49 except StopIteration: 50 break 51 52 line += repr(data[i:i + 1])[2:-1].replace("'", r'\'') 53 54 if not line: 55 return ''.join(output) 56 57 output.append(" b'{}'\n".format(''.join(line))) 58 59 60# This is an empty ELF file. It was created from the ELF file for 61# tokenize_test.cc with the command: 62# 63# arm-none-eabi-objcopy -S --only-section NO_SECTIONS_PLEASE <ELF> <OUTPUT> 64# 65# The resulting ELF was converted to a Python binary string using 66# path_to_byte_string function above. 67EMPTY_ELF = ( 68 b'\x7fELF\x01\x01\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00(\x00\x01' 69 b'\x00\x00\x00\xd1\x83\x00\x084\x00\x00\x00\xe0\x00\x00\x00\x00\x04\x00\x05' 70 b'4\x00 \x00\x05\x00(\x00\x02\x00\x01\x00\x01\x00\x00\x00\xd4\x00\x00\x00' 71 b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x07\x00' 72 b'\x00\x00\x00\x00\x01\x00\x01\x00\x00\x00\xd4\x00\x00\x00\x00\x00\x00\x00' 73 b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x06\x00\x00\x00\x00\x00' 74 b'\x01\x00\x01\x00\x00\x00\xd4\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' 75 b'\x00\x00\x00\x00\x00\x00\x00\x00\x06\x00\x00\x00\x00\x00\x01\x00\x01\x00' 76 b'\x00\x00\xd4\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' 77 b'\x00\x00\x00\x00\x06\x00\x00\x00\x00\x00\x01\x00\x01\x00\x00\x00\xd4\x00' 78 b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' 79 b'\x06\x00\x00\x00\x00\x00\x01\x00\x00.shstrtab\x00\x00\x00\x00\x00\x00\x00' 80 b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' 81 b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01' 82 b'\x00\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xd4\x00\x00' 83 b'\x00\x0b\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00' 84 b'\x00\x00\x00') 85 86# This is an ELF file with only the pw_tokenizer sections. It was created 87# from a tokenize_test binary built for the STM32F429i Discovery board. The 88# pw_tokenizer sections were extracted with this command: 89# 90# arm-none-eabi-objcopy -S --only-section ".pw_tokenizer*" <ELF> <OUTPUT> 91# 92ELF_WITH_TOKENIZER_SECTIONS = Path(__file__).parent.joinpath( 93 'example_binary_with_tokenized_strings.elf').read_bytes() 94 95TOKENS_IN_ELF = 22 96 97# 0x2e668cd6 is 'Jello, world!' (which is also used in database_test.py). 98JELLO_WORLD_TOKEN = b'\xd6\x8c\x66\x2e' 99 100 101class DetokenizeTest(unittest.TestCase): 102 """Tests the detokenize.Detokenizer.""" 103 def test_simple(self): 104 detok = detokenize.Detokenizer( 105 tokens.Database([ 106 tokens.TokenizedStringEntry(0xcdab, 107 '%02d %s %c%%', 108 date_removed=dt.datetime.now()) 109 ])) 110 self.assertEqual(str(detok.detokenize(b'\xab\xcd\0\0\x02\x03Two\x66')), 111 '01 Two 3%') 112 113 def test_detokenize_extra_data_is_unsuccessful(self): 114 detok = detokenize.Detokenizer( 115 tokens.Database([ 116 tokens.TokenizedStringEntry(1, 117 'no args', 118 date_removed=dt.datetime(1, 1, 1)) 119 ])) 120 121 result = detok.detokenize(b'\x01\0\0\0\x04args') 122 self.assertEqual(len(result.failures), 1) 123 string, args, remaining = result.failures[0] 124 self.assertEqual('no args', string) 125 self.assertFalse(args) 126 self.assertEqual(b'\x04args', remaining) 127 self.assertEqual('no args', string) 128 self.assertEqual('no args', str(result)) 129 130 def test_detokenize_missing_data_is_unsuccessful(self): 131 detok = detokenize.Detokenizer( 132 tokens.Database([ 133 tokens.TokenizedStringEntry(2, 134 '%s', 135 date_removed=dt.datetime(1, 1, 1)) 136 ])) 137 138 result = detok.detokenize(b'\x02\0\0\0') 139 string, args, remaining = result.failures[0] 140 self.assertEqual('%s', string) 141 self.assertEqual(len(args), 1) 142 self.assertEqual(b'', remaining) 143 self.assertEqual(len(result.failures), 1) 144 self.assertEqual('%s', str(result)) 145 146 def test_detokenize_missing_data_with_errors_is_unsuccessful(self): 147 detok = detokenize.Detokenizer(tokens.Database([ 148 tokens.TokenizedStringEntry(2, 149 '%s', 150 date_removed=dt.datetime(1, 1, 1)) 151 ]), 152 show_errors=True) 153 154 result = detok.detokenize(b'\x02\0\0\0') 155 string, args, remaining = result.failures[0] 156 self.assertIn('%s MISSING', string) 157 self.assertEqual(len(args), 1) 158 self.assertEqual(b'', remaining) 159 self.assertEqual(len(result.failures), 1) 160 self.assertIn('%s MISSING', str(result)) 161 162 def test_unparsed_data(self): 163 detok = detokenize.Detokenizer( 164 tokens.Database([ 165 tokens.TokenizedStringEntry(1, 166 'no args', 167 date_removed=dt.datetime( 168 100, 1, 1)), 169 ])) 170 result = detok.detokenize(b'\x01\0\0\0o_o') 171 self.assertFalse(result.ok()) 172 self.assertEqual('no args', str(result)) 173 self.assertIn('o_o', repr(result)) 174 self.assertIn('decoding failed', result.error_message()) 175 176 def test_empty_db(self): 177 detok = detokenize.Detokenizer(io.BytesIO(EMPTY_ELF)) 178 self.assertFalse(detok.detokenize(b'\x12\x34\0\0').ok()) 179 self.assertIn('unknown token', 180 detok.detokenize(b'1234').error_message()) 181 self.assertIn('unknown token', repr(detok.detokenize(b'1234'))) 182 183 self.assertEqual('$' + base64.b64encode(b'1234').decode(), 184 str(detok.detokenize(b'1234'))) 185 186 self.assertIsNone(detok.detokenize(b'').token) 187 188 def test_empty_db_show_errors(self): 189 detok = detokenize.Detokenizer(io.BytesIO(EMPTY_ELF), show_errors=True) 190 self.assertFalse(detok.detokenize(b'\x12\x34\0\0').ok()) 191 self.assertIn('unknown token', 192 detok.detokenize(b'1234').error_message()) 193 self.assertIn('unknown token', repr(detok.detokenize(b'1234'))) 194 self.assertIn('unknown token', str(detok.detokenize(b'1234'))) 195 196 self.assertIsNone(detok.detokenize(b'').token) 197 198 def test_missing_token_show_errors(self): 199 detok = detokenize.Detokenizer(io.BytesIO(EMPTY_ELF), show_errors=True) 200 self.assertIn('missing token', detok.detokenize(b'').error_message()) 201 self.assertIn('missing token', str(detok.detokenize(b''))) 202 self.assertIn('missing token', repr(detok.detokenize(b'123'))) 203 204 self.assertIn('missing token', detok.detokenize(b'1').error_message()) 205 self.assertIn('missing token', str(detok.detokenize(b'1'))) 206 self.assertIn('missing token', repr(detok.detokenize(b'1'))) 207 208 self.assertIn('missing token', 209 detok.detokenize(b'123').error_message()) 210 self.assertIn('missing token', str(detok.detokenize(b'123'))) 211 self.assertIn('missing token', repr(detok.detokenize(b'123'))) 212 213 def test_missing_token(self): 214 detok = detokenize.Detokenizer(io.BytesIO(EMPTY_ELF)) 215 self.assertIn('missing token', detok.detokenize(b'').error_message()) 216 self.assertEqual('$', str(detok.detokenize(b''))) 217 self.assertIn('missing token', repr(detok.detokenize(b'123'))) 218 219 self.assertIn('missing token', detok.detokenize(b'1').error_message()) 220 self.assertEqual('$' + base64.b64encode(b'1').decode(), 221 str(detok.detokenize(b'1'))) 222 self.assertIn('missing token', repr(detok.detokenize(b'1'))) 223 224 self.assertIn('missing token', 225 detok.detokenize(b'123').error_message()) 226 self.assertEqual('$' + base64.b64encode(b'123').decode(), 227 str(detok.detokenize(b'123'))) 228 self.assertIn('missing token', repr(detok.detokenize(b'123'))) 229 230 def test_decode_from_elf_data(self): 231 detok = detokenize.Detokenizer(io.BytesIO(ELF_WITH_TOKENIZER_SECTIONS)) 232 233 self.assertTrue(detok.detokenize(JELLO_WORLD_TOKEN).ok()) 234 self.assertEqual(str(detok.detokenize(JELLO_WORLD_TOKEN)), 235 'Jello, world!') 236 237 undecoded_args = detok.detokenize(JELLO_WORLD_TOKEN + b'some junk') 238 self.assertFalse(undecoded_args.ok()) 239 self.assertEqual(str(undecoded_args), 'Jello, world!') 240 241 self.assertTrue(detok.detokenize(b'\0\0\0\0').ok()) 242 self.assertEqual(str(detok.detokenize(b'\0\0\0\0')), '') 243 244 def test_decode_from_elf_file(self): 245 """Test decoding from an elf file.""" 246 detok = detokenize.Detokenizer(io.BytesIO(ELF_WITH_TOKENIZER_SECTIONS)) 247 expected_tokens = frozenset(detok.database.token_to_entries.keys()) 248 249 with tempfile.NamedTemporaryFile('wb', delete=False) as elf: 250 try: 251 elf.write(ELF_WITH_TOKENIZER_SECTIONS) 252 elf.close() 253 254 # Open ELF by file object 255 with open(elf.name, 'rb') as fd: 256 detok = detokenize.Detokenizer(fd) 257 258 self.assertEqual( 259 expected_tokens, 260 frozenset(detok.database.token_to_entries.keys())) 261 262 # Open ELF by path 263 detok = detokenize.Detokenizer(elf.name) 264 self.assertEqual( 265 expected_tokens, 266 frozenset(detok.database.token_to_entries.keys())) 267 268 # Open ELF by elf_reader.Elf 269 with open(elf.name, 'rb') as fd: 270 detok = detokenize.Detokenizer(elf_reader.Elf(fd)) 271 272 self.assertEqual( 273 expected_tokens, 274 frozenset(detok.database.token_to_entries.keys())) 275 finally: 276 os.unlink(elf.name) 277 278 def test_decode_from_csv_file(self): 279 detok = detokenize.Detokenizer(io.BytesIO(ELF_WITH_TOKENIZER_SECTIONS)) 280 expected_tokens = frozenset(detok.database.token_to_entries.keys()) 281 282 csv_database = str(detok.database) 283 self.assertEqual(len(csv_database.splitlines()), TOKENS_IN_ELF) 284 285 with tempfile.NamedTemporaryFile('w', delete=False) as csv_file: 286 try: 287 csv_file.write(csv_database) 288 csv_file.close() 289 290 # Open CSV by path 291 detok = detokenize.Detokenizer(csv_file.name) 292 self.assertEqual( 293 expected_tokens, 294 frozenset(detok.database.token_to_entries.keys())) 295 296 # Open CSV by file object 297 with open(csv_file.name) as fd: 298 detok = detokenize.Detokenizer(fd) 299 300 self.assertEqual( 301 expected_tokens, 302 frozenset(detok.database.token_to_entries.keys())) 303 finally: 304 os.unlink(csv_file.name) 305 306 def test_create_detokenizer_with_token_database(self): 307 detok = detokenize.Detokenizer(io.BytesIO(ELF_WITH_TOKENIZER_SECTIONS)) 308 expected_tokens = frozenset(detok.database.token_to_entries.keys()) 309 310 detok = detokenize.Detokenizer(detok.database) 311 self.assertEqual(expected_tokens, 312 frozenset(detok.database.token_to_entries.keys())) 313 314 315class DetokenizeWithCollisions(unittest.TestCase): 316 """Tests collision resolution.""" 317 def setUp(self): 318 super().setUp() 319 token = 0xbaad 320 321 # Database with several conflicting tokens. 322 self.detok = detokenize.Detokenizer(tokens.Database([ 323 tokens.TokenizedStringEntry( 324 token, 'REMOVED', date_removed=dt.datetime(9, 1, 1)), 325 tokens.TokenizedStringEntry(token, 'newer'), 326 tokens.TokenizedStringEntry( 327 token, 'A: %d', date_removed=dt.datetime(30, 5, 9)), 328 tokens.TokenizedStringEntry( 329 token, 'B: %c', date_removed=dt.datetime(30, 5, 10)), 330 tokens.TokenizedStringEntry(token, 'C: %s'), 331 tokens.TokenizedStringEntry(token, '%d%u'), 332 tokens.TokenizedStringEntry(token, '%s%u %d'), 333 tokens.TokenizedStringEntry(1, '%s'), 334 tokens.TokenizedStringEntry(1, '%d'), 335 tokens.TokenizedStringEntry(2, 'Three %s %s %s'), 336 tokens.TokenizedStringEntry(2, 'Five %d %d %d %d %s'), 337 ])) # yapf: disable 338 339 def test_collision_no_args_favors_most_recently_present(self): 340 no_args = self.detok.detokenize(b'\xad\xba\0\0') 341 self.assertFalse(no_args.ok()) 342 self.assertEqual(len(no_args.successes), 2) 343 self.assertEqual(len(no_args.failures), 5) 344 self.assertEqual(len(no_args.matches()), 7) 345 self.assertEqual(str(no_args), 'newer') 346 self.assertEqual(len(no_args.best_result()[1]), 0) 347 self.assertEqual(no_args.best_result()[0], 'newer') 348 349 def test_collision_one_integer_arg_favors_most_recently_present(self): 350 multiple_correct = self.detok.detokenize(b'\xad\xba\0\0\x7a') 351 self.assertFalse(multiple_correct.ok()) 352 self.assertIn('ERROR', repr(multiple_correct)) 353 self.assertEqual(len(multiple_correct.successes), 2) 354 self.assertEqual(len(multiple_correct.failures), 5) 355 self.assertEqual(len(multiple_correct.matches()), 7) 356 self.assertEqual(str(multiple_correct), 'B: =') 357 358 def test_collision_one_integer_arg_favor_successful_decode(self): 359 # One string decodes successfully, since the arg is out of range for %c. 360 int_arg = self.detok.detokenize(b'\xad\xba\0\0\xfe\xff\xff\xff\x0f') 361 self.assertTrue(int_arg.ok()) 362 self.assertEqual(str(int_arg), 'A: 2147483647') 363 364 def test_collision_one_string_arg_favors_successful_decode(self): 365 # One string decodes successfully, since decoding the argument as an 366 # integer does not decode all the data. 367 string_arg = self.detok.detokenize(b'\xad\xba\0\0\x02Hi') 368 self.assertTrue(string_arg.ok()) 369 self.assertEqual(str(string_arg), 'C: Hi') 370 371 def test_collision_one_string_arg_favors_decoding_all_data(self): 372 result = self.detok.detokenize(b'\1\0\0\0\x83hi') 373 self.assertEqual(len(result.failures), 2) 374 # Should resolve to the string since %d would leave one byte behind. 375 self.assertEqual(str(result), '%s') 376 377 def test_collision_multiple_args_favors_decoding_more_arguments(self): 378 result = self.detok.detokenize(b'\2\0\0\0\1\2\1\4\5') 379 self.assertEqual(len(result.matches()), 2) 380 self.assertEqual(result.matches()[0][0], 'Five -1 1 -1 2 %s') 381 self.assertEqual(result.matches()[1][0], 'Three \2 \4 %s') 382 383 def test_collision_multiple_args_favors_decoding_all_arguments(self): 384 unambiguous = self.detok.detokenize(b'\xad\xba\0\0\x01#\x00\x01') 385 self.assertTrue(unambiguous.ok()) 386 self.assertEqual(len(unambiguous.matches()), 7) 387 self.assertEqual('#0 -1', str(unambiguous)) 388 self.assertIn('#0 -1', repr(unambiguous)) 389 390 391@mock.patch('os.path.getmtime') 392class AutoUpdatingDetokenizerTest(unittest.TestCase): 393 """Tests the AutoUpdatingDetokenizer class.""" 394 def test_update(self, mock_getmtime): 395 """Tests the update command.""" 396 397 db = database.load_token_database( 398 io.BytesIO(ELF_WITH_TOKENIZER_SECTIONS)) 399 self.assertEqual(len(db), TOKENS_IN_ELF) 400 401 the_time = [100] 402 403 def move_back_time_if_file_exists(path): 404 if os.path.exists(path): 405 the_time[0] -= 1 406 return the_time[0] 407 408 raise FileNotFoundError 409 410 mock_getmtime.side_effect = move_back_time_if_file_exists 411 412 with tempfile.NamedTemporaryFile('wb', delete=False) as file: 413 try: 414 file.close() 415 416 detok = detokenize.AutoUpdatingDetokenizer(file.name, 417 min_poll_period_s=0) 418 self.assertFalse(detok.detokenize(JELLO_WORLD_TOKEN).ok()) 419 420 with open(file.name, 'wb') as fd: 421 tokens.write_binary(db, fd) 422 423 self.assertTrue(detok.detokenize(JELLO_WORLD_TOKEN).ok()) 424 finally: 425 os.unlink(file.name) 426 427 # The database stays around if the file is deleted. 428 self.assertTrue(detok.detokenize(JELLO_WORLD_TOKEN).ok()) 429 430 def test_no_update_if_time_is_same(self, mock_getmtime): 431 mock_getmtime.return_value = 100 432 433 with tempfile.NamedTemporaryFile('wb', delete=False) as file: 434 try: 435 tokens.write_csv( 436 database.load_token_database( 437 io.BytesIO(ELF_WITH_TOKENIZER_SECTIONS)), file) 438 file.close() 439 440 detok = detokenize.AutoUpdatingDetokenizer(file, 441 min_poll_period_s=0) 442 self.assertTrue(detok.detokenize(JELLO_WORLD_TOKEN).ok()) 443 444 # Empty the database, but keep the mock modified time the same. 445 with open(file.name, 'wb'): 446 pass 447 448 self.assertTrue(detok.detokenize(JELLO_WORLD_TOKEN).ok()) 449 self.assertTrue(detok.detokenize(JELLO_WORLD_TOKEN).ok()) 450 451 # Move back time so the now-empty file is reloaded. 452 mock_getmtime.return_value = 50 453 self.assertFalse(detok.detokenize(JELLO_WORLD_TOKEN).ok()) 454 finally: 455 os.unlink(file.name) 456 457 458def _next_char(message: bytes) -> bytes: 459 return bytes(b + 1 for b in message) 460 461 462class PrefixedMessageDecoderTest(unittest.TestCase): 463 def setUp(self): 464 super().setUp() 465 self.decode = detokenize.PrefixedMessageDecoder('$', 'abcdefg') 466 467 def test_transform_single_message(self): 468 self.assertEqual( 469 b'%bcde', 470 b''.join(self.decode.transform(io.BytesIO(b'$abcd'), _next_char))) 471 472 def test_transform_message_amidst_other_only_affects_message(self): 473 self.assertEqual( 474 b'%%WHAT?%bcd%WHY? is this %ok %', b''.join( 475 self.decode.transform( 476 io.BytesIO(b'$$WHAT?$abc$WHY? is this $ok $'), 477 _next_char))) 478 479 def test_transform_empty_message(self): 480 self.assertEqual( 481 b'%1%', 482 b''.join(self.decode.transform(io.BytesIO(b'$1$'), _next_char))) 483 484 def test_transform_sequential_messages(self): 485 self.assertEqual( 486 b'%bcd%efghh', b''.join( 487 self.decode.transform(io.BytesIO(b'$abc$defgh'), _next_char))) 488 489 490class DetokenizeBase64(unittest.TestCase): 491 """Tests detokenizing Base64 messages.""" 492 493 JELLO = b'$' + base64.b64encode(JELLO_WORLD_TOKEN) 494 495 RECURSION_STRING = f'The secret message is "{JELLO.decode()}"' 496 RECURSION = b'$' + base64.b64encode( 497 struct.pack('I', tokens.default_hash(RECURSION_STRING))) 498 499 RECURSION_STRING_2 = f"'{RECURSION.decode()}', said the spy." 500 RECURSION_2 = b'$' + base64.b64encode( 501 struct.pack('I', tokens.default_hash(RECURSION_STRING_2))) 502 503 TEST_CASES = ( 504 (b'', b''), 505 (b'nothing here', b'nothing here'), 506 (JELLO, b'Jello, world!'), 507 (JELLO + b'a', b'Jello, world!a'), 508 (JELLO + b'abc', b'Jello, world!abc'), 509 (JELLO + b'abc=', b'Jello, world!abc='), 510 (b'$a' + JELLO + b'a', b'$aJello, world!a'), 511 (b'Hello ' + JELLO + b'?', b'Hello Jello, world!?'), 512 (b'$' + JELLO, b'$Jello, world!'), 513 (JELLO + JELLO, b'Jello, world!Jello, world!'), 514 (JELLO + b'$' + JELLO, b'Jello, world!$Jello, world!'), 515 (JELLO + b'$a' + JELLO + b'bcd', b'Jello, world!$aJello, world!bcd'), 516 (b'$3141', b'$3141'), 517 (JELLO + b'$3141', b'Jello, world!$3141'), 518 (RECURSION, b'The secret message is "Jello, world!"'), 519 (RECURSION_2, 520 b'\'The secret message is "Jello, world!"\', said the spy.'), 521 ) 522 523 def setUp(self): 524 super().setUp() 525 db = database.load_token_database( 526 io.BytesIO(ELF_WITH_TOKENIZER_SECTIONS)) 527 db.add( 528 tokens.TokenizedStringEntry(tokens.default_hash(s), s) 529 for s in [self.RECURSION_STRING, self.RECURSION_STRING_2]) 530 self.detok = detokenize.Detokenizer(db) 531 532 def test_detokenize_base64_live(self): 533 for data, expected in self.TEST_CASES: 534 output = io.BytesIO() 535 self.detok.detokenize_base64_live(io.BytesIO(data), output, '$') 536 537 self.assertEqual(expected, output.getvalue()) 538 539 def test_detokenize_base64_to_file(self): 540 for data, expected in self.TEST_CASES: 541 output = io.BytesIO() 542 self.detok.detokenize_base64_to_file(data, output, '$') 543 544 self.assertEqual(expected, output.getvalue()) 545 546 def test_detokenize_base64(self): 547 for data, expected in self.TEST_CASES: 548 self.assertEqual(expected, 549 self.detok.detokenize_base64(data, b'$')) 550 551 def test_detokenize_base64_str(self): 552 for data, expected in self.TEST_CASES: 553 self.assertEqual(expected.decode(), 554 self.detok.detokenize_base64(data.decode())) 555 556 557class DetokenizeBase64InfiniteRecursion(unittest.TestCase): 558 """Tests that infinite Bas64 token recursion resolves.""" 559 def setUp(self): 560 super().setUp() 561 self.detok = detokenize.Detokenizer( 562 tokens.Database([ 563 tokens.TokenizedStringEntry(0, '$AAAAAA=='), # token for 0 564 tokens.TokenizedStringEntry(1, '$AgAAAA=='), # token for 2 565 tokens.TokenizedStringEntry(2, '$AwAAAA=='), # token for 3 566 tokens.TokenizedStringEntry(3, '$AgAAAA=='), # token for 2 567 ])) 568 569 def test_detokenize_self_recursion(self): 570 for depth in range(5): 571 self.assertEqual( 572 self.detok.detokenize_base64(b'This one is deep: $AAAAAA==', 573 recursion=depth), 574 b'This one is deep: $AAAAAA==') 575 576 def test_detokenize_self_recursion_default(self): 577 self.assertEqual( 578 self.detok.detokenize_base64(b'This one is deep: $AAAAAA=='), 579 b'This one is deep: $AAAAAA==') 580 581 def test_detokenize_cyclic_recursion_even(self): 582 self.assertEqual( 583 self.detok.detokenize_base64(b'I said "$AQAAAA=="', recursion=2), 584 b'I said "$AgAAAA=="') 585 586 def test_detokenize_cyclic_recursion_odd(self): 587 self.assertEqual( 588 self.detok.detokenize_base64(b'I said "$AQAAAA=="', recursion=3), 589 b'I said "$AwAAAA=="') 590 591 592if __name__ == '__main__': 593 unittest.main() 594