1 /*
2 * Copyright (c) Meta Platforms, Inc. and affiliates.
3 * All rights reserved.
4 *
5 * This source code is licensed under the BSD-style license found in the
6 * LICENSE file in the root directory of this source tree.
7 */
8
9 #include <executorch/extension/llm/tokenizer/bpe_tokenizer.h>
10 #include <executorch/runtime/platform/runtime.h>
11 #include <gtest/gtest.h>
12 #include <vector>
13
14 using namespace ::testing;
15
16 using ::executorch::extension::llm::BPETokenizer;
17 using ::executorch::extension::llm::Tokenizer;
18 using ::executorch::runtime::Error;
19 using ::executorch::runtime::Result;
20
21 class TokenizerExtensionTest : public Test {
22 public:
SetUp()23 void SetUp() override {
24 executorch::runtime::runtime_init();
25 tokenizer_ = std::make_unique<BPETokenizer>();
26 modelPath_ =
27 std::getenv("RESOURCES_PATH") + std::string("/test_bpe_tokenizer.bin");
28 }
29
30 std::unique_ptr<Tokenizer> tokenizer_;
31 std::string modelPath_;
32 };
33
TEST_F(TokenizerExtensionTest,EncodeWithoutLoadFails)34 TEST_F(TokenizerExtensionTest, EncodeWithoutLoadFails) {
35 Result<std::vector<uint64_t>> res = tokenizer_->encode("hello world", 0, 0);
36 EXPECT_EQ(res.error(), Error::NotSupported);
37 }
38
TEST_F(TokenizerExtensionTest,DecodeWithoutLoadFails)39 TEST_F(TokenizerExtensionTest, DecodeWithoutLoadFails) {
40 auto result = tokenizer_->decode(0, 0);
41 EXPECT_EQ(result.error(), Error::NotSupported);
42 }
43
TEST_F(TokenizerExtensionTest,DecodeOutOfRangeFails)44 TEST_F(TokenizerExtensionTest, DecodeOutOfRangeFails) {
45 Error res = tokenizer_->load(modelPath_.c_str());
46 EXPECT_EQ(res, Error::Ok);
47 auto result = tokenizer_->decode(0, 64000);
48 // The vocab size is 32000, and token 64000 is out of vocab range.
49 EXPECT_EQ(result.error(), Error::NotSupported);
50 }
51
TEST_F(TokenizerExtensionTest,TokenizerMetadataIsExpected)52 TEST_F(TokenizerExtensionTest, TokenizerMetadataIsExpected) {
53 Error res = tokenizer_->load(modelPath_.c_str());
54 EXPECT_EQ(res, Error::Ok);
55 // test_bpe_tokenizer.bin has vocab_size 0, bos_id 0, eos_id 0 recorded.
56 EXPECT_EQ(tokenizer_->vocab_size(), 0);
57 EXPECT_EQ(tokenizer_->bos_tok(), 0);
58 EXPECT_EQ(tokenizer_->eos_tok(), 0);
59 }
60
TEST_F(TokenizerExtensionTest,SafeToDestruct)61 TEST_F(TokenizerExtensionTest, SafeToDestruct) {
62 // Safe to destruct initialized tokenizer.
63 tokenizer_->load(modelPath_);
64 tokenizer_.reset();
65
66 // Safe to destruct uninitialized tokenizer.
67 tokenizer_ = std::make_unique<BPETokenizer>();
68 tokenizer_.reset();
69 }
70