• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) Meta Platforms, Inc. and affiliates.
3  * All rights reserved.
4  *
5  * This source code is licensed under the BSD-style license found in the
6  * LICENSE file in the root directory of this source tree.
7  */
8 
9 #include <executorch/extension/llm/tokenizer/bpe_tokenizer.h>
10 #include <executorch/runtime/platform/runtime.h>
11 #include <gtest/gtest.h>
12 #include <vector>
13 
14 using namespace ::testing;
15 
16 using ::executorch::extension::llm::BPETokenizer;
17 using ::executorch::extension::llm::Tokenizer;
18 using ::executorch::runtime::Error;
19 using ::executorch::runtime::Result;
20 
21 class TokenizerExtensionTest : public Test {
22  public:
SetUp()23   void SetUp() override {
24     executorch::runtime::runtime_init();
25     tokenizer_ = std::make_unique<BPETokenizer>();
26     modelPath_ =
27         std::getenv("RESOURCES_PATH") + std::string("/test_bpe_tokenizer.bin");
28   }
29 
30   std::unique_ptr<Tokenizer> tokenizer_;
31   std::string modelPath_;
32 };
33 
TEST_F(TokenizerExtensionTest,EncodeWithoutLoadFails)34 TEST_F(TokenizerExtensionTest, EncodeWithoutLoadFails) {
35   Result<std::vector<uint64_t>> res = tokenizer_->encode("hello world", 0, 0);
36   EXPECT_EQ(res.error(), Error::NotSupported);
37 }
38 
TEST_F(TokenizerExtensionTest,DecodeWithoutLoadFails)39 TEST_F(TokenizerExtensionTest, DecodeWithoutLoadFails) {
40   auto result = tokenizer_->decode(0, 0);
41   EXPECT_EQ(result.error(), Error::NotSupported);
42 }
43 
TEST_F(TokenizerExtensionTest,DecodeOutOfRangeFails)44 TEST_F(TokenizerExtensionTest, DecodeOutOfRangeFails) {
45   Error res = tokenizer_->load(modelPath_.c_str());
46   EXPECT_EQ(res, Error::Ok);
47   auto result = tokenizer_->decode(0, 64000);
48   // The vocab size is 32000, and token 64000 is out of vocab range.
49   EXPECT_EQ(result.error(), Error::NotSupported);
50 }
51 
TEST_F(TokenizerExtensionTest,TokenizerMetadataIsExpected)52 TEST_F(TokenizerExtensionTest, TokenizerMetadataIsExpected) {
53   Error res = tokenizer_->load(modelPath_.c_str());
54   EXPECT_EQ(res, Error::Ok);
55   // test_bpe_tokenizer.bin has vocab_size 0, bos_id 0, eos_id 0 recorded.
56   EXPECT_EQ(tokenizer_->vocab_size(), 0);
57   EXPECT_EQ(tokenizer_->bos_tok(), 0);
58   EXPECT_EQ(tokenizer_->eos_tok(), 0);
59 }
60 
TEST_F(TokenizerExtensionTest,SafeToDestruct)61 TEST_F(TokenizerExtensionTest, SafeToDestruct) {
62   // Safe to destruct initialized tokenizer.
63   tokenizer_->load(modelPath_);
64   tokenizer_.reset();
65 
66   // Safe to destruct uninitialized tokenizer.
67   tokenizer_ = std::make_unique<BPETokenizer>();
68   tokenizer_.reset();
69 }
70