#!/usr/bin/env python3 # Copyright 2020 The Pigweed Authors # # Licensed under the Apache License, Version 2.0 (the "License"); you may not # use this file except in compliance with the License. You may obtain a copy of # the License at # # https://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, WITHOUT # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the # License for the specific language governing permissions and limitations under # the License. """Generates test data for hash_test.cc.""" import datetime import os import random from pw_tokenizer import tokens HASH_LENGTHS = 80, 96, 128 HASH_MACRO = 'PW_TOKENIZER_65599_FIXED_LENGTH_{}_HASH' SHARED_HEADER = """\ // Copyright {year} The Pigweed Authors // // Licensed under the Apache License, Version 2.0 (the "License"); you may not // use this file except in compliance with the License. You may obtain a copy of // the License at // // https://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, WITHOUT // WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the // License for the specific language governing permissions and limitations under // the License. // AUTOGENERATED - DO NOT EDIT // // This file was generated by {script}. // To make changes, update the script and run it to generate new files. """ CPP_HEADER = """\ #pragma once #include #include #include {includes} namespace pw::tokenizer {{ // Test a series of generated test cases. inline constexpr struct {{ std::string_view string; size_t hash_length; uint32_t python_calculated_hash; uint32_t macro_calculated_hash; // clang-format off }} kHashTests[] = {{ """ CPP_FOOTER = """ }; // kHashTests // clang-format on } // namespace pw::tokenizer """ _CPP_TEST_CASE = """{{ std::string_view("{str}", {string_length}u), // NOLINT(bugprone-string-constructor) {hash_length}u, // fixed hash length UINT32_C({hash}), // Python-calculated hash {macro}("{str}"), // macro-calculated hash }}, """ RUST_HEADER = """ fn test_cases() -> Vec {{ vec![ """ RUST_FOOTER = """ ] } """ _RUST_TEST_CASE = """ TestCase{{ string: b"{str}", hash_length: {hash_length}, hash: {hash}, }}, """ def _include_paths(lengths): return '\n'.join( sorted( '#include "pw_tokenizer/internal/' 'pw_tokenizer_65599_fixed_length_{}_hash_macro.h"'.format(length) for length in lengths ) ) def _test_case_at_length(test_case_template, data, hash_length): """Generates a test case for a particular hash length.""" if isinstance(data, str): data = data.encode() if all(ord(' ') <= b <= ord('~') for b in data): escaped_str = data.decode().replace('"', r'\"') else: escaped_str = ''.join(r'\x{:02x}'.format(b) for b in data) return test_case_template.format( str=escaped_str, string_length=len(data), hash_length=hash_length, hash=tokens.c_hash(data, hash_length), macro=HASH_MACRO.format(hash_length), ) def test_case(test_case_template, data): return ''.join( _test_case_at_length(test_case_template, data, length) for length in (80, 96, 128) ) def generate_test_cases(test_case_template): yield test_case(test_case_template, '') yield test_case(test_case_template, b'\xa1') yield test_case(test_case_template, b'\xff') yield test_case(test_case_template, '\0') yield test_case(test_case_template, '\0\0') yield test_case(test_case_template, 'a') yield test_case(test_case_template, 'A') yield test_case(test_case_template, 'hello, "world"') yield test_case(test_case_template, 'YO' * 100) random.seed(600613) def random_string(size): return bytes(random.randrange(256) for _ in range(size)) for i in range(1, 16): yield test_case(test_case_template, random_string(i)) yield test_case(test_case_template, random_string(i)) for length in HASH_LENGTHS: yield test_case(test_case_template, random_string(length - 1)) yield test_case(test_case_template, random_string(length)) yield test_case(test_case_template, random_string(length + 1)) def generate_file( path_array, header_template, footer_template, test_case_template ): path = os.path.realpath( os.path.join(os.path.dirname(__file__), *path_array) ) with open(path, 'w') as output: output.write( SHARED_HEADER.format( year=datetime.date.today().year, script=os.path.basename(__file__), ) ) output.write( header_template.format( includes=_include_paths(HASH_LENGTHS), ) ) for case in generate_test_cases(test_case_template): output.write(case) output.write(footer_template) print('Wrote test data to', path) if __name__ == '__main__': generate_file( [ '..', 'pw_tokenizer_private', 'generated_hash_test_cases.h', ], CPP_HEADER, CPP_FOOTER, _CPP_TEST_CASE, ) generate_file( [ '..', 'rust', 'pw_tokenizer_core_test_cases.rs', ], RUST_HEADER, RUST_FOOTER, _RUST_TEST_CASE, )