1 /* Copyright 2020 The TensorFlow Authors. All Rights Reserved. 2 3 Licensed under the Apache License, Version 2.0 (the "License"); 4 you may not use this file except in compliance with the License. 5 You may obtain a copy of the License at 6 7 http://www.apache.org/licenses/LICENSE-2.0 8 9 Unless required by applicable law or agreed to in writing, software 10 distributed under the License is distributed on an "AS IS" BASIS, 11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 See the License for the specific language governing permissions and 13 limitations under the License. 14 ==============================================================================*/ 15 16 #ifndef TENSORFLOW_LITE_SUPPORT_CC_TASK_TEXT_NLCLASSIFIER_BERT_NL_CLASSIFIER_H_ 17 #define TENSORFLOW_LITE_SUPPORT_CC_TASK_TEXT_NLCLASSIFIER_BERT_NL_CLASSIFIER_H_ 18 19 #include <stddef.h> 20 21 #include <memory> 22 #include <string> 23 #include <vector> 24 25 #include "absl/status/status.h" 26 #include "tensorflow/lite/c/common.h" 27 #include "tensorflow/lite/core/api/op_resolver.h" 28 #include "tensorflow/lite/kernels/register.h" 29 #include "tensorflow/lite/string_type.h" 30 #include "tensorflow_lite_support/cc/task/core/category.h" 31 #include "tensorflow_lite_support/cc/task/text/nlclassifier/nl_classifier.h" 32 #include "tensorflow_lite_support/cc/text/tokenizers/tokenizer.h" 33 34 namespace tflite { 35 namespace task { 36 namespace text { 37 namespace nlclassifier { 38 39 // Classifier API for NLClassification tasks with Bert models, categorizes 40 // string into different classes. 41 // 42 // The API expects a Bert based TFLite model with metadata populated. 43 // The metadata should contain the following information: 44 // - input_process_units for Wordpiece/Sentencepiece Tokenizer 45 // - 3 input tensors with names "ids", "mask" and "segment_ids" 46 // - 1 output tensor of type float32[1, 2], with a optionally attached label 47 // file. If a label file is attached, the file should be a plain text file 48 // with one label per line, the number of labels should match the number of 49 // categories the model outputs. 50 51 class BertNLClassifier : public NLClassifier { 52 public: 53 using NLClassifier::NLClassifier; 54 // Max number of tokens to pass to the model. 55 static constexpr int kMaxSeqLen = 128; 56 57 // Factory function to create a BertNLClassifier from TFLite model with 58 // metadata. 59 static tflite::support::StatusOr<std::unique_ptr<BertNLClassifier>> 60 CreateFromFile( 61 const std::string& path_to_model_with_metadata, 62 std::unique_ptr<tflite::OpResolver> resolver = 63 absl::make_unique<tflite::ops::builtin::BuiltinOpResolver>()); 64 65 // Factory function to create a BertNLClassifier from in memory buffer of a 66 // TFLite model with metadata. 67 static tflite::support::StatusOr<std::unique_ptr<BertNLClassifier>> 68 CreateFromBuffer( 69 const char* model_with_metadata_buffer_data, 70 size_t model_with_metadata_buffer_size, 71 std::unique_ptr<tflite::OpResolver> resolver = 72 absl::make_unique<tflite::ops::builtin::BuiltinOpResolver>()); 73 74 // Factory function to create a BertNLClassifier from the file descriptor of a 75 // TFLite model with metadata. 76 static tflite::support::StatusOr<std::unique_ptr<BertNLClassifier>> 77 CreateFromFd( 78 int fd, std::unique_ptr<tflite::OpResolver> resolver = 79 absl::make_unique<tflite::ops::builtin::BuiltinOpResolver>()); 80 81 protected: 82 // Run tokenization on input text and construct three input tensors ids, mask 83 // and segment_ids for the model input. 84 absl::Status Preprocess(const std::vector<TfLiteTensor*>& input_tensors, 85 const std::string& input) override; 86 87 // Extract model output and create results with label file attached in 88 // metadata. If no label file is attached, use output score index as labels. 89 tflite::support::StatusOr<std::vector<core::Category>> Postprocess( 90 const std::vector<const TfLiteTensor*>& output_tensors, 91 const std::string& input) override; 92 93 private: 94 // Initialize the API with the tokenizer and label files set in the metadata. 95 absl::Status InitializeFromMetadata(); 96 97 std::unique_ptr<tflite::support::text::tokenizer::Tokenizer> tokenizer_; 98 }; 99 100 } // namespace nlclassifier 101 } // namespace text 102 } // namespace task 103 } // namespace tflite 104 105 #endif // TENSORFLOW_LITE_SUPPORT_CC_TASK_TEXT_NLCLASSIFIER_BERT_NL_CLASSIFIER_H_ 106