1 /* 2 * Copyright (C) 2017 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #ifndef LIBTEXTCLASSIFIER_COMMON_MEMORY_IMAGE_IN_MEMORY_MODEL_DATA_H_ 18 #define LIBTEXTCLASSIFIER_COMMON_MEMORY_IMAGE_IN_MEMORY_MODEL_DATA_H_ 19 20 #include "common/memory_image/data-store.h" 21 #include "common/task-spec.pb.h" 22 #include "util/strings/stringpiece.h" 23 24 namespace libtextclassifier { 25 namespace nlp_core { 26 27 // In-memory representation of data for a Saft model. Provides access to a 28 // TaskSpec object (produced by the "spec" stage of the Saft training model) and 29 // to the bytes of the TaskInputs mentioned in that spec (all these bytes are in 30 // memory, no file I/O required). 31 // 32 // Technically, an InMemoryModelData is a DataStore that maps the special string 33 // kTaskSpecDataStoreEntryName to the binary serialization of a TaskSpec. For 34 // each TaskInput (of the TaskSpec) with a file_pattern that starts with 35 // kFilePatternPrefix (see below), the same DataStore maps file_pattern to some 36 // content bytes. This way, it is possible to have all TaskInputs in memory, 37 // while still allowing classic, on-disk TaskInputs. 38 class InMemoryModelData { 39 public: 40 // Name for the DataStore entry that stores the serialized TaskSpec for the 41 // entire model. 42 static const char kTaskSpecDataStoreEntryName[]; 43 44 // Returns prefix for TaskInput::Part::file_pattern, to distinguish those 45 // "files" from other files. 46 static const char kFilePatternPrefix[]; 47 48 // Constructs an InMemoryModelData based on a chunk of bytes. Those bytes 49 // should have been produced by a DataStoreBuilder. InMemoryModelData(StringPiece bytes)50 explicit InMemoryModelData(StringPiece bytes) : data_store_(bytes) {} 51 52 // Fills *task_spec with a TaskSpec similar to the one used by 53 // DataStoreBuilder (when building the bytes used to construct this 54 // InMemoryModelData) except that each file name 55 // (TaskInput::Part::file_pattern) is replaced with a name that can be used to 56 // retrieve the corresponding file content bytes via GetBytesForInputFile(). 57 // 58 // Returns true on success, false otherwise. 59 bool GetTaskSpec(TaskSpec *task_spec) const; 60 61 // Gets content bytes for a file. The file_name argument should be the 62 // file_pattern for a TaskInput from the TaskSpec (see GetTaskSpec()). 63 // Returns a StringPiece indicating a memory area with the content bytes. On 64 // error, returns StringPiece(nullptr, 0). 65 StringPiece GetBytesForInputFile(const std::string &file_name) const; 66 67 private: 68 const memory_image::DataStore data_store_; 69 }; 70 71 } // namespace nlp_core 72 } // namespace libtextclassifier 73 74 #endif // LIBTEXTCLASSIFIER_COMMON_MEMORY_IMAGE_IN_MEMORY_MODEL_DATA_H_ 75