• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1#!/usr/bin/env python3
2# Copyright 2020 The Pigweed Authors
3#
4# Licensed under the Apache License, Version 2.0 (the "License"); you may not
5# use this file except in compliance with the License. You may obtain a copy of
6# the License at
7#
8#     https://www.apache.org/licenses/LICENSE-2.0
9#
10# Unless required by applicable law or agreed to in writing, software
11# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
12# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
13# License for the specific language governing permissions and limitations under
14# the License.
15"""Tests for the database module."""
16
17import json
18import io
19from pathlib import Path
20import shutil
21import sys
22import tempfile
23import unittest
24from unittest import mock
25
26from pw_tokenizer import database
27
28# This is an ELF file with only the pw_tokenizer sections. It was created
29# from a tokenize_test binary built for the STM32F429i Discovery board. The
30# pw_tokenizer sections were extracted with this command:
31#
32#   arm-none-eabi-objcopy -S --only-section ".pw_tokenize*" <ELF> <OUTPUT>
33#
34TOKENIZED_ENTRIES_ELF = Path(
35    __file__).parent / 'example_binary_with_tokenized_strings.elf'
36LEGACY_PLAIN_STRING_ELF = Path(
37    __file__).parent / 'example_legacy_binary_with_tokenized_strings.elf'
38
39CSV_DEFAULT_DOMAIN = '''\
4000000000,          ,""
41141c35d5,          ,"The answer: ""%s"""
4229aef586,          ,"1234"
432b78825f,          ,"[:-)"
442e668cd6,          ,"Jello, world!"
4531631781,          ,"%d"
4661fd1e26,          ,"%ld"
4768ab92da,          ,"%s there are %x (%.2f) of them%c"
487b940e2a,          ,"Hello %s! %hd %e"
497da55d52,          ,">:-[]"
507f35a9a5,          ,"TestName"
51851beeb6,          ,"%u %d"
52881436a0,          ,"The answer is: %s"
5388808930,          ,"%u%d%02x%X%hu%hhd%d%ld%lu%lld%llu%c%c%c"
5492723f44,          ,"???"
55a09d6698,          ,"won-won-won-wonderful"
56aa9ffa66,          ,"void pw::tokenizer::{anonymous}::TestName()"
57ad002c97,          ,"%llx"
58b3653e13,          ,"Jello!"
59cc6d3131,          ,"Jello?"
60e13b0f94,          ,"%llu"
61e65aefef,          ,"Won't fit : %s%d"
62'''
63
64CSV_TEST_DOMAIN = """\
6517fa86d3,          ,"hello"
6618c5017c,          ,"yes"
6759b2701c,          ,"The answer was: %s"
68881436a0,          ,"The answer is: %s"
69d18ada0f,          ,"something"
70"""
71
72CSV_ALL_DOMAINS = '''\
7300000000,          ,""
74141c35d5,          ,"The answer: ""%s"""
7517fa86d3,          ,"hello"
7618c5017c,          ,"yes"
7729aef586,          ,"1234"
782b78825f,          ,"[:-)"
792e668cd6,          ,"Jello, world!"
8031631781,          ,"%d"
8159b2701c,          ,"The answer was: %s"
8261fd1e26,          ,"%ld"
8368ab92da,          ,"%s there are %x (%.2f) of them%c"
847b940e2a,          ,"Hello %s! %hd %e"
857da55d52,          ,">:-[]"
867f35a9a5,          ,"TestName"
87851beeb6,          ,"%u %d"
88881436a0,          ,"The answer is: %s"
8988808930,          ,"%u%d%02x%X%hu%hhd%d%ld%lu%lld%llu%c%c%c"
9092723f44,          ,"???"
91a09d6698,          ,"won-won-won-wonderful"
92aa9ffa66,          ,"void pw::tokenizer::{anonymous}::TestName()"
93ad002c97,          ,"%llx"
94b3653e13,          ,"Jello!"
95cc6d3131,          ,"Jello?"
96d18ada0f,          ,"something"
97e13b0f94,          ,"%llu"
98e65aefef,          ,"Won't fit : %s%d"
99'''
100
101JSON_SOURCE_STRINGS = '''\
102[
103  "pigweed/pw_polyfill/standard_library_public/pw_polyfill/standard_library/assert.h",
104  "protocol_buffer/gen/pigweed/pw_protobuf/common_protos.proto_library/nanopb/pw_protobuf_protos/status.pb.h",
105  "pigweed/pw_rpc/client_server.cc",
106  "pigweed/pw_rpc/public/pw_rpc/client_server.h",
107  "This is a very long string that will produce two tokens; one for C++ and one for C. This is because this string exceeds the default C hash length."
108]
109'''
110
111CSV_STRINGS = '''\
1122cbf627a,          ,"pigweed/pw_rpc/client_server.cc"
113666562a1,          ,"protocol_buffer/gen/pigweed/pw_protobuf/common_protos.proto_library/nanopb/pw_protobuf_protos/status.pb.h"
1146c1e6eb3,          ,"pigweed/pw_rpc/public/pw_rpc/client_server.h"
115b25a9932,          ,"This is a very long string that will produce two tokens; one for C++ and one for C. This is because this string exceeds the default C hash length."
116eadf017f,          ,"pigweed/pw_polyfill/standard_library_public/pw_polyfill/standard_library/assert.h"
117f815dc5c,          ,"This is a very long string that will produce two tokens; one for C++ and one for C. This is because this string exceeds the default C hash length."
118'''
119
120EXPECTED_REPORT = {
121    str(TOKENIZED_ENTRIES_ELF): {
122        '': {
123            'present_entries': 22,
124            'present_size_bytes': 289,
125            'total_entries': 22,
126            'total_size_bytes': 289,
127            'collisions': {}
128        },
129        'TEST_DOMAIN': {
130            'present_entries': 5,
131            'present_size_bytes': 57,
132            'total_entries': 5,
133            'total_size_bytes': 57,
134            'collisions': {}
135        }
136    }
137}
138
139
140def run_cli(*args) -> None:
141    original_argv = sys.argv
142    sys.argv = ['database.py', *(str(a) for a in args)]
143    # pylint: disable=protected-access
144    try:
145        database._main(*database._parse_args())
146    finally:
147        # Remove the log handler added by _main to avoid duplicate logs.
148        if database._LOG.handlers:
149            database._LOG.handlers.pop()
150        # pylint: enable=protected-access
151
152        sys.argv = original_argv
153
154
155def _mock_output() -> io.TextIOWrapper:
156    output = io.BytesIO()
157    output.name = '<fake stdout>'
158    return io.TextIOWrapper(output, write_through=True)
159
160
161class DatabaseCommandLineTest(unittest.TestCase):
162    """Tests the database.py command line interface."""
163    def setUp(self):
164        self._dir = Path(tempfile.mkdtemp('_pw_tokenizer_test'))
165        self._csv = self._dir / 'db.csv'
166        self._elf = TOKENIZED_ENTRIES_ELF
167
168        self._csv_test_domain = CSV_TEST_DOMAIN
169
170    def tearDown(self):
171        shutil.rmtree(self._dir)
172
173    def test_create_csv(self):
174        run_cli('create', '--database', self._csv, self._elf)
175
176        self.assertEqual(CSV_DEFAULT_DOMAIN.splitlines(),
177                         self._csv.read_text().splitlines())
178
179    def test_create_csv_test_domain(self):
180        run_cli('create', '--database', self._csv, f'{self._elf}#TEST_DOMAIN')
181
182        self.assertEqual(self._csv_test_domain.splitlines(),
183                         self._csv.read_text().splitlines())
184
185    def test_create_csv_all_domains(self):
186        run_cli('create', '--database', self._csv, f'{self._elf}#.*')
187
188        self.assertEqual(CSV_ALL_DOMAINS.splitlines(),
189                         self._csv.read_text().splitlines())
190
191    def test_create_force(self):
192        self._csv.write_text(CSV_ALL_DOMAINS)
193
194        with self.assertRaises(FileExistsError):
195            run_cli('create', '--database', self._csv, self._elf)
196
197        run_cli('create', '--force', '--database', self._csv, self._elf)
198
199    def test_create_binary(self):
200        binary = self._dir / 'db.bin'
201        run_cli('create', '--type', 'binary', '--database', binary, self._elf)
202
203        # Write the binary database as CSV to verify its contents.
204        run_cli('create', '--database', self._csv, binary)
205
206        self.assertEqual(CSV_DEFAULT_DOMAIN.splitlines(),
207                         self._csv.read_text().splitlines())
208
209    def test_add_does_not_recalculate_tokens(self):
210        db_with_custom_token = '01234567,          ,"hello"'
211
212        to_add = self._dir / 'add_this.csv'
213        to_add.write_text(db_with_custom_token + '\n')
214        self._csv.touch()
215
216        run_cli('add', '--database', self._csv, to_add)
217        self.assertEqual(db_with_custom_token.splitlines(),
218                         self._csv.read_text().splitlines())
219
220    def test_mark_removed(self):
221        self._csv.write_text(CSV_ALL_DOMAINS)
222
223        run_cli('mark_removed', '--database', self._csv, '--date',
224                '1998-09-04', self._elf)
225
226        # Add the removal date to the four tokens not in the default domain
227        new_csv = CSV_ALL_DOMAINS
228        new_csv = new_csv.replace('17fa86d3,          ,"hello"',
229                                  '17fa86d3,1998-09-04,"hello"')
230        new_csv = new_csv.replace('18c5017c,          ,"yes"',
231                                  '18c5017c,1998-09-04,"yes"')
232        new_csv = new_csv.replace('59b2701c,          ,"The answer was: %s"',
233                                  '59b2701c,1998-09-04,"The answer was: %s"')
234        new_csv = new_csv.replace('d18ada0f,          ,"something"',
235                                  'd18ada0f,1998-09-04,"something"')
236        self.assertNotEqual(CSV_ALL_DOMAINS, new_csv)
237
238        self.assertEqual(new_csv.splitlines(),
239                         self._csv.read_text().splitlines())
240
241    def test_purge(self):
242        self._csv.write_text(CSV_ALL_DOMAINS)
243
244        # Mark everything not in TEST_DOMAIN as removed.
245        run_cli('mark_removed', '--database', self._csv,
246                f'{self._elf}#TEST_DOMAIN')
247
248        # Delete all entries except those in TEST_DOMAIN.
249        run_cli('purge', '--database', self._csv)
250
251        self.assertEqual(self._csv_test_domain.splitlines(),
252                         self._csv.read_text().splitlines())
253
254    @mock.patch('sys.stdout', new_callable=_mock_output)
255    def test_report(self, mock_stdout):
256        run_cli('report', self._elf)
257
258        self.assertEqual(json.loads(mock_stdout.buffer.getvalue()),
259                         EXPECTED_REPORT)
260
261    def test_replace(self):
262        sub = 'replace/ment'
263        run_cli('create', '--database', self._csv, self._elf, '--replace',
264                r'(?i)\b[jh]ello\b/' + sub)
265        self.assertEqual(
266            CSV_DEFAULT_DOMAIN.replace('Jello', sub).replace('Hello', sub),
267            self._csv.read_text())
268
269    def test_json_strings(self):
270        strings_file = self._dir / "strings.json"
271
272        with open(strings_file, 'w') as file:
273            file.write(JSON_SOURCE_STRINGS)
274
275        run_cli('create', '--force', '--database', self._csv, strings_file)
276        self.assertEqual(CSV_STRINGS.splitlines(),
277                         self._csv.read_text().splitlines())
278
279
280class LegacyDatabaseCommandLineTest(DatabaseCommandLineTest):
281    """Test an ELF with the legacy plain string storage format."""
282    def setUp(self):
283        super().setUp()
284        self._elf = LEGACY_PLAIN_STRING_ELF
285
286        # The legacy approach for storing tokenized strings in an ELF always
287        # adds an entry for "", even if the empty string was never tokenized.
288        self._csv_test_domain = '00000000,          ,""\n' + CSV_TEST_DOMAIN
289
290    @mock.patch('sys.stdout', new_callable=_mock_output)
291    def test_report(self, mock_stdout):
292        run_cli('report', self._elf)
293
294        report = EXPECTED_REPORT[str(TOKENIZED_ENTRIES_ELF)].copy()
295
296        # Count the implicitly added "" entry in TEST_DOMAIN.
297        report['TEST_DOMAIN']['present_entries'] += 1
298        report['TEST_DOMAIN']['present_size_bytes'] += 1
299        report['TEST_DOMAIN']['total_entries'] += 1
300        report['TEST_DOMAIN']['total_size_bytes'] += 1
301
302        # Rename "" to the legacy name "default"
303        report['default'] = report['']
304        del report['']
305
306        self.assertEqual({str(LEGACY_PLAIN_STRING_ELF): report},
307                         json.loads(mock_stdout.buffer.getvalue()))
308
309
310if __name__ == '__main__':
311    unittest.main()
312