1 /*
2 * Copyright (C) 2018 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "lang_id/common/flatbuffers/model-utils.h"
18
19 #include <string.h>
20
21 #include <string>
22
23 #include "lang_id/common/lite_base/logging.h"
24 #include "lang_id/common/math/checksum.h"
25
26 namespace libtextclassifier3 {
27 namespace saft_fbs {
28
29 namespace {
30
31 // Returns true if we have clear evidence that |model| fails its checksum.
32 //
33 // E.g., if |model| has the crc32 field, and the value of that field does not
34 // match the checksum, then this function returns true. If there is no crc32
35 // field, then we don't know what the original (at build time) checksum was, so
36 // we don't know anything clear and this function returns false.
ClearlyFailsChecksum(const Model & model)37 bool ClearlyFailsChecksum(const Model &model) {
38 if (!flatbuffers::IsFieldPresent(&model, Model::VT_CRC32)) {
39 SAFTM_LOG(WARNING)
40 << "No CRC32, most likely an old model; skip CRC32 check";
41 return false;
42 }
43 const mobile::uint32 expected_crc32 = model.crc32();
44 const mobile::uint32 actual_crc32 = ComputeCrc2Checksum(&model);
45 if (actual_crc32 != expected_crc32) {
46 SAFTM_LOG(ERROR) << "Corrupt model: different CRC32: " << actual_crc32
47 << " vs " << expected_crc32;
48 return true;
49 }
50 SAFTM_DLOG(INFO) << "Successfully checked CRC32 " << actual_crc32;
51 return false;
52 }
53 } // namespace
54
GetVerifiedModelFromBytes(const char * data,size_t num_bytes)55 const Model *GetVerifiedModelFromBytes(const char *data, size_t num_bytes) {
56 if ((data == nullptr) || (num_bytes == 0)) {
57 SAFTM_LOG(ERROR) << "GetModel called on an empty sequence of bytes";
58 return nullptr;
59 }
60 const uint8_t *start = reinterpret_cast<const uint8_t *>(data);
61 flatbuffers::Verifier verifier(start, num_bytes);
62 if (!VerifyModelBuffer(verifier)) {
63 SAFTM_LOG(ERROR) << "Not a valid Model flatbuffer";
64 return nullptr;
65 }
66 const Model *model = GetModel(start);
67 if (model == nullptr) {
68 return nullptr;
69 }
70 if (ClearlyFailsChecksum(*model)) {
71 return nullptr;
72 }
73 return model;
74 }
75
GetInputByName(const Model * model,const std::string & name)76 const ModelInput *GetInputByName(const Model *model, const std::string &name) {
77 if (model == nullptr) {
78 SAFTM_LOG(ERROR) << "GetInputByName called with model == nullptr";
79 return nullptr;
80 }
81 const auto *inputs = model->inputs();
82 if (inputs == nullptr) {
83 // We should always have a list of inputs; maybe an empty one, if no inputs,
84 // but the list should be there.
85 SAFTM_LOG(ERROR) << "null inputs";
86 return nullptr;
87 }
88 for (const ModelInput *input : *inputs) {
89 if (input != nullptr) {
90 const flatbuffers::String *input_name = input->name();
91 if (input_name && input_name->str() == name) {
92 return input;
93 }
94 }
95 }
96 return nullptr;
97 }
98
GetInputBytes(const ModelInput * input)99 mobile::StringPiece GetInputBytes(const ModelInput *input) {
100 if ((input == nullptr) || (input->data() == nullptr)) {
101 SAFTM_LOG(ERROR) << "ModelInput has no content";
102 return mobile::StringPiece(nullptr, 0);
103 }
104 const flatbuffers::Vector<uint8_t> *input_data = input->data();
105 if (input_data == nullptr) {
106 SAFTM_LOG(ERROR) << "null input data";
107 return mobile::StringPiece(nullptr, 0);
108 }
109 return mobile::StringPiece(reinterpret_cast<const char *>(input_data->data()),
110 input_data->size());
111 }
112
FillParameters(const Model & model,mobile::TaskContext * context)113 bool FillParameters(const Model &model, mobile::TaskContext *context) {
114 if (context == nullptr) {
115 SAFTM_LOG(ERROR) << "null context";
116 return false;
117 }
118 const auto *parameters = model.parameters();
119 if (parameters == nullptr) {
120 // We should always have a list of parameters; maybe an empty one, if no
121 // parameters, but the list should be there.
122 SAFTM_LOG(ERROR) << "null list of parameters";
123 return false;
124 }
125 for (const ModelParameter *p : *parameters) {
126 if (p == nullptr) {
127 SAFTM_LOG(ERROR) << "null parameter";
128 return false;
129 }
130 if (p->name() == nullptr) {
131 SAFTM_LOG(ERROR) << "null parameter name";
132 return false;
133 }
134 const std::string name = p->name()->str();
135 if (name.empty()) {
136 SAFTM_LOG(ERROR) << "empty parameter name";
137 return false;
138 }
139 if (p->value() == nullptr) {
140 SAFTM_LOG(ERROR) << "null parameter name";
141 return false;
142 }
143 context->SetParameter(name, p->value()->str());
144 }
145 return true;
146 }
147
148 namespace {
149 // Updates |*crc| with the information from |s|. Auxiliary for
150 // ComputeCrc2Checksum.
151 //
152 // The bytes from |info| are also used to update the CRC32 checksum. |info|
153 // should be a brief tag that indicates what |s| represents. The idea is to add
154 // some structure to the information that goes into the CRC32 computation.
155 template <typename T>
UpdateCrc(mobile::Crc32 * crc,const flatbuffers::Vector<T> * s,mobile::StringPiece info)156 void UpdateCrc(mobile::Crc32 *crc, const flatbuffers::Vector<T> *s,
157 mobile::StringPiece info) {
158 crc->Update("|");
159 crc->Update(info.data(), info.size());
160 crc->Update(":");
161 if (s == nullptr) {
162 crc->Update("empty");
163 } else {
164 crc->Update(reinterpret_cast<const char *>(s->data()),
165 s->size() * sizeof(T));
166 }
167 }
168 } // namespace
169
ComputeCrc2Checksum(const Model * model)170 mobile::uint32 ComputeCrc2Checksum(const Model *model) {
171 // Implementation note: originally, I (salcianu@) thought we can just compute
172 // a CRC32 checksum of the model bytes. Unfortunately, the expected checksum
173 // is there too (and because we don't control the flatbuffer format, we can't
174 // "arrange" for it to be placed at the head / tail of those bytes). Instead,
175 // we traverse |model| and feed into the CRC32 computation those parts we are
176 // interested in (which excludes the crc32 field).
177 //
178 // Note: storing the checksum outside the Model would be too disruptive for
179 // the way we currently ship our models.
180 mobile::Crc32 crc;
181 if (model == nullptr) {
182 return crc.Get();
183 }
184 crc.Update("|Parameters:");
185 const auto *parameters = model->parameters();
186 if (parameters != nullptr) {
187 for (const ModelParameter *p : *parameters) {
188 if (p != nullptr) {
189 UpdateCrc(&crc, p->name(), "name");
190 UpdateCrc(&crc, p->value(), "value");
191 }
192 }
193 }
194 crc.Update("|Inputs:");
195 const auto *inputs = model->inputs();
196 if (inputs != nullptr) {
197 for (const ModelInput *input : *inputs) {
198 if (input != nullptr) {
199 UpdateCrc(&crc, input->name(), "name");
200 UpdateCrc(&crc, input->type(), "type");
201 UpdateCrc(&crc, input->sub_type(), "sub-type");
202 UpdateCrc(&crc, input->data(), "data");
203 }
204 }
205 }
206 return crc.Get();
207 }
208
209 } // namespace saft_fbs
210 } // namespace nlp_saft
211