1#!/usr/bin/env python3 2# Copyright 2020 The Pigweed Authors 3# 4# Licensed under the Apache License, Version 2.0 (the "License"); you may not 5# use this file except in compliance with the License. You may obtain a copy of 6# the License at 7# 8# https://www.apache.org/licenses/LICENSE-2.0 9# 10# Unless required by applicable law or agreed to in writing, software 11# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 12# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 13# License for the specific language governing permissions and limitations under 14# the License. 15"""Generates test data for hash_test.cc.""" 16 17import datetime 18import os 19import random 20 21from pw_tokenizer import tokens 22 23HASH_LENGTHS = 80, 96, 128 24HASH_MACRO = 'PW_TOKENIZER_65599_FIXED_LENGTH_{}_HASH' 25 26FILE_HEADER = """\ 27// Copyright {year} The Pigweed Authors 28// 29// Licensed under the Apache License, Version 2.0 (the "License"); you may not 30// use this file except in compliance with the License. You may obtain a copy of 31// the License at 32// 33// https://www.apache.org/licenses/LICENSE-2.0 34// 35// Unless required by applicable law or agreed to in writing, software 36// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 37// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 38// License for the specific language governing permissions and limitations under 39// the License. 40 41// AUTOGENERATED - DO NOT EDIT 42// 43// This file was generated by {script}. 44// To make changes, update the script and run it to generate new files. 45#pragma once 46 47#include <cstddef> 48#include <cstdint> 49#include <string_view> 50 51{includes} 52 53namespace pw::tokenizer {{ 54 55// Test a series of generated test cases. 56inline constexpr struct {{ 57 std::string_view string; 58 size_t hash_length; 59 uint32_t python_calculated_hash; 60 uint32_t macro_calculated_hash; // clang-format off 61}} kHashTests[] = {{ 62 63""" 64 65FILE_FOOTER = """ 66}; // kHashTests 67 68// clang-format on 69 70} // namespace pw::tokenizer 71""" 72 73_TEST_CASE = """{{ 74 std::string_view("{str}", {string_length}u), 75 {hash_length}u, // fixed hash length 76 UINT32_C({hash}), // Python-calculated hash 77 {macro}("{str}"), // macro-calculated hash 78}}, 79""" 80 81 82def _include_paths(lengths): 83 return '\n'.join( 84 sorted( 85 '#include "pw_tokenizer/internal/' 86 'pw_tokenizer_65599_fixed_length_{}_hash_macro.h"'.format(length) 87 for length in lengths)) 88 89 90def _test_case_at_length(data, hash_length): 91 """Generates a test case for a particular hash length.""" 92 93 if isinstance(data, str): 94 data = data.encode() 95 96 if all(ord(' ') <= b <= ord('~') for b in data): 97 escaped_str = data.decode().replace('"', r'\"') 98 else: 99 escaped_str = ''.join(r'\x{:02x}'.format(b) for b in data) 100 101 return _TEST_CASE.format(str=escaped_str, 102 string_length=len(data), 103 hash_length=hash_length, 104 hash=tokens.pw_tokenizer_65599_hash( 105 data, hash_length), 106 macro=HASH_MACRO.format(hash_length)) 107 108 109def test_case(data): 110 return ''.join( 111 _test_case_at_length(data, length) for length in (80, 96, 128)) 112 113 114def generate_test_cases(): 115 yield test_case('') 116 yield test_case(b'\xa1') 117 yield test_case(b'\xff') 118 yield test_case('\0') 119 yield test_case('\0\0') 120 yield test_case('a') 121 yield test_case('A') 122 yield test_case('hello, "world"') 123 yield test_case('YO' * 100) 124 125 random.seed(600613) 126 127 random_string = lambda size: bytes( 128 random.randrange(256) for _ in range(size)) 129 130 for i in range(1, 16): 131 yield test_case(random_string(i)) 132 yield test_case(random_string(i)) 133 134 for length in HASH_LENGTHS: 135 yield test_case(random_string(length - 1)) 136 yield test_case(random_string(length)) 137 yield test_case(random_string(length + 1)) 138 139 140if __name__ == '__main__': 141 path = os.path.realpath( 142 os.path.join(os.path.dirname(__file__), '..', 'pw_tokenizer_private', 143 'generated_hash_test_cases.h')) 144 145 with open(path, 'w') as output: 146 output.write( 147 FILE_HEADER.format(year=datetime.date.today().year, 148 script=os.path.basename(__file__), 149 includes=_include_paths(HASH_LENGTHS))) 150 151 for case in generate_test_cases(): 152 output.write(case) 153 154 output.write(FILE_FOOTER) 155 156 print('Wrote test data to', path) 157