• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1#!/usr/bin/env python3
2# Copyright 2020 The Pigweed Authors
3#
4# Licensed under the Apache License, Version 2.0 (the "License"); you may not
5# use this file except in compliance with the License. You may obtain a copy of
6# the License at
7#
8#     https://www.apache.org/licenses/LICENSE-2.0
9#
10# Unless required by applicable law or agreed to in writing, software
11# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
12# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
13# License for the specific language governing permissions and limitations under
14# the License.
15"""Generates test data for hash_test.cc."""
16
17import datetime
18import os
19import random
20
21from pw_tokenizer import tokens
22
23HASH_LENGTHS = 80, 96, 128
24HASH_MACRO = 'PW_TOKENIZER_65599_FIXED_LENGTH_{}_HASH'
25
26FILE_HEADER = """\
27// Copyright {year} The Pigweed Authors
28//
29// Licensed under the Apache License, Version 2.0 (the "License"); you may not
30// use this file except in compliance with the License. You may obtain a copy of
31// the License at
32//
33//     https://www.apache.org/licenses/LICENSE-2.0
34//
35// Unless required by applicable law or agreed to in writing, software
36// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
37// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
38// License for the specific language governing permissions and limitations under
39// the License.
40
41// AUTOGENERATED - DO NOT EDIT
42//
43// This file was generated by {script}.
44// To make changes, update the script and run it to generate new files.
45#pragma once
46
47#include <cstddef>
48#include <cstdint>
49#include <string_view>
50
51{includes}
52
53namespace pw::tokenizer {{
54
55// Test a series of generated test cases.
56inline constexpr struct {{
57  std::string_view string;
58  size_t hash_length;
59  uint32_t python_calculated_hash;
60  uint32_t macro_calculated_hash;  // clang-format off
61}} kHashTests[] = {{
62
63"""
64
65FILE_FOOTER = """
66};  // kHashTests
67
68// clang-format on
69
70}  // namespace pw::tokenizer
71"""
72
73_TEST_CASE = """{{
74  std::string_view("{str}", {string_length}u),
75  {hash_length}u,  // fixed hash length
76  UINT32_C({hash}),  // Python-calculated hash
77  {macro}("{str}"),  // macro-calculated hash
78}},
79"""
80
81
82def _include_paths(lengths):
83    return '\n'.join(
84        sorted(
85            '#include "pw_tokenizer/internal/'
86            'pw_tokenizer_65599_fixed_length_{}_hash_macro.h"'.format(length)
87            for length in lengths))
88
89
90def _test_case_at_length(data, hash_length):
91    """Generates a test case for a particular hash length."""
92
93    if isinstance(data, str):
94        data = data.encode()
95
96    if all(ord(' ') <= b <= ord('~') for b in data):
97        escaped_str = data.decode().replace('"', r'\"')
98    else:
99        escaped_str = ''.join(r'\x{:02x}'.format(b) for b in data)
100
101    return _TEST_CASE.format(str=escaped_str,
102                             string_length=len(data),
103                             hash_length=hash_length,
104                             hash=tokens.pw_tokenizer_65599_hash(
105                                 data, hash_length),
106                             macro=HASH_MACRO.format(hash_length))
107
108
109def test_case(data):
110    return ''.join(
111        _test_case_at_length(data, length) for length in (80, 96, 128))
112
113
114def generate_test_cases():
115    yield test_case('')
116    yield test_case(b'\xa1')
117    yield test_case(b'\xff')
118    yield test_case('\0')
119    yield test_case('\0\0')
120    yield test_case('a')
121    yield test_case('A')
122    yield test_case('hello, "world"')
123    yield test_case('YO' * 100)
124
125    random.seed(600613)
126
127    random_string = lambda size: bytes(
128        random.randrange(256) for _ in range(size))
129
130    for i in range(1, 16):
131        yield test_case(random_string(i))
132        yield test_case(random_string(i))
133
134    for length in HASH_LENGTHS:
135        yield test_case(random_string(length - 1))
136        yield test_case(random_string(length))
137        yield test_case(random_string(length + 1))
138
139
140if __name__ == '__main__':
141    path = os.path.realpath(
142        os.path.join(os.path.dirname(__file__), '..', 'pw_tokenizer_private',
143                     'generated_hash_test_cases.h'))
144
145    with open(path, 'w') as output:
146        output.write(
147            FILE_HEADER.format(year=datetime.date.today().year,
148                               script=os.path.basename(__file__),
149                               includes=_include_paths(HASH_LENGTHS)))
150
151        for case in generate_test_cases():
152            output.write(case)
153
154        output.write(FILE_FOOTER)
155
156    print('Wrote test data to', path)
157