• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 #ifndef TENSORFLOW_LITE_SUPPORT_CC_TASK_TEXT_NLCLASSIFIER_BERT_NL_CLASSIFIER_H_
17 #define TENSORFLOW_LITE_SUPPORT_CC_TASK_TEXT_NLCLASSIFIER_BERT_NL_CLASSIFIER_H_
18 
19 #include <stddef.h>
20 
21 #include <memory>
22 #include <string>
23 #include <vector>
24 
25 #include "absl/status/status.h"
26 #include "tensorflow/lite/c/common.h"
27 #include "tensorflow/lite/core/api/op_resolver.h"
28 #include "tensorflow/lite/kernels/register.h"
29 #include "tensorflow/lite/string_type.h"
30 #include "tensorflow_lite_support/cc/task/core/category.h"
31 #include "tensorflow_lite_support/cc/task/text/nlclassifier/nl_classifier.h"
32 #include "tensorflow_lite_support/cc/text/tokenizers/tokenizer.h"
33 
34 namespace tflite {
35 namespace task {
36 namespace text {
37 namespace nlclassifier {
38 
39 // Classifier API for NLClassification tasks with Bert models, categorizes
40 // string into different classes.
41 //
42 // The API expects a Bert based TFLite model with metadata populated.
43 // The metadata should contain the following information:
44 //   - input_process_units for Wordpiece/Sentencepiece Tokenizer
45 //   - 3 input tensors with names "ids", "mask" and "segment_ids"
46 //   - 1 output tensor of type float32[1, 2], with a optionally attached label
47 //     file. If a label file is attached, the file should be a plain text file
48 //     with one label per line, the number of labels should match the number of
49 //     categories the model outputs.
50 
51 class BertNLClassifier : public NLClassifier {
52  public:
53   using NLClassifier::NLClassifier;
54   // Max number of tokens to pass to the model.
55   static constexpr int kMaxSeqLen = 128;
56 
57   // Factory function to create a BertNLClassifier from TFLite model with
58   // metadata.
59   static tflite::support::StatusOr<std::unique_ptr<BertNLClassifier>>
60   CreateFromFile(
61       const std::string& path_to_model_with_metadata,
62       std::unique_ptr<tflite::OpResolver> resolver =
63           absl::make_unique<tflite::ops::builtin::BuiltinOpResolver>());
64 
65   // Factory function to create a BertNLClassifier from in memory buffer of a
66   // TFLite model with metadata.
67   static tflite::support::StatusOr<std::unique_ptr<BertNLClassifier>>
68   CreateFromBuffer(
69       const char* model_with_metadata_buffer_data,
70       size_t model_with_metadata_buffer_size,
71       std::unique_ptr<tflite::OpResolver> resolver =
72           absl::make_unique<tflite::ops::builtin::BuiltinOpResolver>());
73 
74   // Factory function to create a BertNLClassifier from the file descriptor of a
75   // TFLite model with metadata.
76   static tflite::support::StatusOr<std::unique_ptr<BertNLClassifier>>
77   CreateFromFd(
78       int fd, std::unique_ptr<tflite::OpResolver> resolver =
79                   absl::make_unique<tflite::ops::builtin::BuiltinOpResolver>());
80 
81  protected:
82   // Run tokenization on input text and construct three input tensors ids, mask
83   // and segment_ids for the model input.
84   absl::Status Preprocess(const std::vector<TfLiteTensor*>& input_tensors,
85                           const std::string& input) override;
86 
87   // Extract model output and create results with label file attached in
88   // metadata. If no label file is attached, use output score index as labels.
89   tflite::support::StatusOr<std::vector<core::Category>> Postprocess(
90       const std::vector<const TfLiteTensor*>& output_tensors,
91       const std::string& input) override;
92 
93  private:
94   // Initialize the API with the tokenizer and label files set in the metadata.
95   absl::Status InitializeFromMetadata();
96 
97   std::unique_ptr<tflite::support::text::tokenizer::Tokenizer> tokenizer_;
98 };
99 
100 }  // namespace nlclassifier
101 }  // namespace text
102 }  // namespace task
103 }  // namespace tflite
104 
105 #endif  // TENSORFLOW_LITE_SUPPORT_CC_TASK_TEXT_NLCLASSIFIER_BERT_NL_CLASSIFIER_H_
106