• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 #include "tensorflow/core/util/tensor_slice_writer.h"
17 
18 #include <array>
19 
20 #include "tensorflow/core/framework/tensor_shape.pb.h"
21 #include "tensorflow/core/framework/versions.pb.h"
22 #include "tensorflow/core/lib/core/status_test_util.h"
23 #include "tensorflow/core/lib/core/stringpiece.h"
24 #include "tensorflow/core/lib/io/path.h"
25 #include "tensorflow/core/lib/strings/str_util.h"
26 #include "tensorflow/core/platform/logging.h"
27 #include "tensorflow/core/platform/protobuf.h"
28 #include "tensorflow/core/platform/test.h"
29 #include "tensorflow/core/public/version.h"
30 #include "tensorflow/core/util/saved_tensor_slice_util.h"
31 #include "tensorflow/core/util/tensor_slice_reader.h"
32 
33 namespace tensorflow {
34 
35 namespace checkpoint {
36 
37 class TensorSliceWriteTestHelper {
38  public:
39   static void CheckEntries(const string& fname);
40   static void GetData(TensorSliceReader::Table* table, const string& name,
41                       const TensorSlice& slice, SavedSlice* ss);
42 };
43 
44 namespace {
45 
46 // Testing that an array is what is expected
ExpectIdenticalFloatArrays(const float * expected,int size,const float * actual)47 void ExpectIdenticalFloatArrays(const float* expected, int size,
48                                 const float* actual) {
49   // TODO(yangke): copy some of the Dump* functions over
50   //  LOG(INFO) << "Expected = " << DumpFloatArray(expected, size);
51   //  LOG(INFO) << "Actual   = " << DumpFloatArray(actual, size);
52   for (int i = 0; i < size; ++i) {
53     EXPECT_NEAR(expected[i], actual[i], 1e-6);
54   }
55 }
56 
57 template <typename T, typename U>
ExpectIdenticalIntArrays(const T * expected,int size,const U * actual)58 void ExpectIdenticalIntArrays(const T* expected, int size, const U* actual) {
59   for (int i = 0; i < size; ++i) {
60     EXPECT_EQ(expected[i], static_cast<T>(actual[i]));
61   }
62 }
63 
64 // Nifty routine to get the size of an array
65 template <typename T, unsigned SIZE>
ArraySize(const T (& v)[SIZE])66 inline size_t ArraySize(const T (&v)[SIZE]) {
67   return SIZE;
68 }
69 
70 // A simple test on writing a few tensor slices
71 // TODO(yangke): refactor into smaller tests: will do as we add more stuff to
72 // the writer.
TEST(TensorSliceWriteTest,SimpleWrite)73 TEST(TensorSliceWriteTest, SimpleWrite) {
74   const string filename = io::JoinPath(testing::TmpDir(), "checkpoint");
75 
76   TensorSliceWriter writer(filename, CreateTableTensorSliceBuilder);
77 
78   // Add some int32 tensor slices
79   {
80     TensorShape shape({5, 10});
81     TensorSlice slice = TensorSlice::ParseOrDie("-:0,1");
82     const int32 data[] = {0, 1, 2, 3, 4};
83     TF_CHECK_OK(writer.Add("test", shape, slice, data));
84   }
85 
86   // Two slices share the same tensor name
87   {
88     TensorShape shape({5, 10});
89     TensorSlice slice = TensorSlice::ParseOrDie("-:3,1");
90     const int32 data[] = {10, 11, 12, 13, 14};
91     TF_CHECK_OK(writer.Add("test", shape, slice, data));
92   }
93 
94   // Another slice from a different float tensor -- it has a different name and
95   // should be inserted in front of the previous tensor
96   {
97     TensorShape shape({3, 2});
98     TensorSlice slice = TensorSlice::ParseOrDie("-:-");
99     const float data[] = {1.2, 1.3, 1.4, 2.1, 2.2, 2.3};
100     TF_CHECK_OK(writer.Add("AA", shape, slice, data));
101   }
102 
103   // A slice with int64 data
104   {
105     TensorShape shape({5, 10});
106     TensorSlice slice = TensorSlice::ParseOrDie("-:3,1");
107     const int64 data[] = {10, 11, 12, 13, 14};
108     TF_CHECK_OK(writer.Add("int64", shape, slice, data));
109   }
110 
111   // A slice with int16 data
112   {
113     TensorShape shape({5, 10});
114     TensorSlice slice = TensorSlice::ParseOrDie("-:3,1");
115     const int16 data[] = {10, 11, 12, 13, 14};
116     TF_CHECK_OK(writer.Add("int16", shape, slice, data));
117   }
118 
119   TF_CHECK_OK(writer.Finish());
120 
121   // Now we examine the checkpoint file manually.
122   TensorSliceWriteTestHelper::CheckEntries(filename);
123 }
124 
125 }  // namespace
126 
GetData(TensorSliceReader::Table * table,const string & name,const TensorSlice & slice,SavedSlice * ss)127 void TensorSliceWriteTestHelper::GetData(TensorSliceReader::Table* table,
128                                          const string& name,
129                                          const TensorSlice& slice,
130                                          SavedSlice* ss) {
131   string key = EncodeTensorNameSlice(name, slice);
132   string value;
133   EXPECT_TRUE(table->Get(key, &value));
134   SavedTensorSlices sts;
135   EXPECT_TRUE(ParseProtoUnlimited(&sts, value));
136   EXPECT_FALSE(sts.has_meta());
137   *ss = sts.data();
138   EXPECT_EQ(name, ss->name());
139   TensorSlice slice2(ss->slice());
140   EXPECT_EQ(slice.DebugString(), slice2.DebugString());
141 }
142 
CheckEntries(const string & fname)143 void TensorSliceWriteTestHelper::CheckEntries(const string& fname) {
144   TensorSliceReader::Table* tptr;
145   TF_CHECK_OK(OpenTableTensorSliceReader(fname, &tptr));
146   std::unique_ptr<TensorSliceReader::Table> table(tptr);
147   CHECK_NOTNULL(table.get());
148 
149   // We expect a block of SavedTensorSlices
150   string value;
151   ASSERT_TRUE(table->Get(kSavedTensorSlicesKey, &value));
152   {
153     SavedTensorSlices sts;
154     EXPECT_TRUE(ParseProtoUnlimited(&sts, value));
155     // We also expect two entries for the tensors
156     EXPECT_TRUE(sts.has_meta());
157     EXPECT_EQ(4, sts.meta().tensor_size());
158     // We should have written nontrivial version information
159     EXPECT_LT(0, TF_CHECKPOINT_VERSION);
160     EXPECT_EQ(TF_CHECKPOINT_VERSION, sts.meta().versions().producer());
161     EXPECT_EQ(TF_CHECKPOINT_VERSION_MIN_CONSUMER,
162               sts.meta().versions().min_consumer());
163     // We don't expect any data in the first block.
164     EXPECT_FALSE(sts.has_data());
165     // The two tensors should be stored in the same order as they are first
166     // created.
167     {
168       // The two slices of the "test" tensor
169       const SavedSliceMeta& ssm = sts.meta().tensor(0);
170       EXPECT_EQ("test", ssm.name());
171       EXPECT_EQ(
172           "dim { size: 5 } "
173           "dim { size: 10 }",
174           ssm.shape().ShortDebugString());
175       EXPECT_EQ(DT_INT32, ssm.type());
176       EXPECT_EQ(2, ssm.slice_size());
177       TensorSlice s0(ssm.slice(0));
178       TensorSlice s1(ssm.slice(1));
179       EXPECT_EQ("-:0,1", s0.DebugString());
180       EXPECT_EQ("-:3,1", s1.DebugString());
181     }
182     {
183       // The "AA" tensor
184       const SavedSliceMeta& ssm = sts.meta().tensor(1);
185       EXPECT_EQ("AA", ssm.name());
186       EXPECT_EQ(
187           "dim { size: 3 } "
188           "dim { size: 2 }",
189           ssm.shape().ShortDebugString());
190       EXPECT_EQ(DT_FLOAT, ssm.type());
191       EXPECT_EQ(1, ssm.slice_size());
192       TensorSlice s0(ssm.slice(0));
193       EXPECT_EQ("-:-", s0.DebugString());
194     }
195     {
196       // The "int64" tensor
197       const SavedSliceMeta& ssm = sts.meta().tensor(2);
198       EXPECT_EQ("int64", ssm.name());
199       EXPECT_EQ(
200           "dim { size: 5 } "
201           "dim { size: 10 }",
202           ssm.shape().ShortDebugString());
203       EXPECT_EQ(DT_INT64, ssm.type());
204       EXPECT_EQ(1, ssm.slice_size());
205       TensorSlice s0(ssm.slice(0));
206       EXPECT_EQ("-:3,1", s0.DebugString());
207     }
208     {
209       // The "int16" tensor
210       const SavedSliceMeta& ssm = sts.meta().tensor(3);
211       EXPECT_EQ("int16", ssm.name());
212       EXPECT_EQ(
213           "dim { size: 5 } "
214           "dim { size: 10 }",
215           ssm.shape().ShortDebugString());
216       EXPECT_EQ(DT_INT16, ssm.type());
217       EXPECT_EQ(1, ssm.slice_size());
218       TensorSlice s0(ssm.slice(0));
219       EXPECT_EQ("-:3,1", s0.DebugString());
220     }
221   }
222 
223   // We expect 5 blocks of tensor data
224   {
225     // Block 1: we expect it to be the full slice of the "AA" tensor
226     SavedSlice ss;
227     GetData(table.get(), "AA", TensorSlice(2), &ss);
228     const float data[] = {1.2, 1.3, 1.4, 2.1, 2.2, 2.3};
229     EXPECT_EQ(ArraySize(data), ss.data().float_val_size());
230     ExpectIdenticalFloatArrays(data, ArraySize(data),
231                                ss.data().float_val().data());
232   }
233 
234   {
235     // Block 2: we expect it to be the first slice of the "test" tensor
236     SavedSlice ss;
237     GetData(table.get(), "test", TensorSlice({{0, -1}, {0, 1}}), &ss);
238     const int32 data[] = {0, 1, 2, 3, 4};
239     EXPECT_EQ(ArraySize(data), ss.data().int_val_size());
240     ExpectIdenticalIntArrays(data, ArraySize(data), ss.data().int_val().data());
241   }
242 
243   {
244     // Block 3: we expect it to be the second slice of the "test" tensor
245     SavedSlice ss;
246     GetData(table.get(), "test", TensorSlice({{0, -1}, {3, 1}}), &ss);
247     const int32 data[] = {10, 11, 12, 13, 14};
248     EXPECT_EQ(ArraySize(data), ss.data().int_val_size());
249     ExpectIdenticalIntArrays(data, ArraySize(data), ss.data().int_val().data());
250   }
251 
252   {
253     // Block 4: we expect it to be the slice of the "int64" tensor
254     SavedSlice ss;
255     GetData(table.get(), "int64", TensorSlice({{0, -1}, {3, 1}}), &ss);
256     const int64 data[] = {10, 11, 12, 13, 14};
257     EXPECT_EQ(ArraySize(data), ss.data().int64_val_size());
258     ExpectIdenticalIntArrays(data, ArraySize(data),
259                              ss.data().int64_val().data());
260   }
261 
262   {
263     // Block 5: we expect it to be the slice of the "int16" tensor
264     SavedSlice ss;
265     GetData(table.get(), "int16", TensorSlice({{0, -1}, {3, 1}}), &ss);
266     const int16 data[] = {10, 11, 12, 13, 14};
267     EXPECT_EQ(ArraySize(data), ss.data().int_val_size());
268     ExpectIdenticalIntArrays(data, ArraySize(data), ss.data().int_val().data());
269   }
270 }
271 
272 template <typename DT>
BytesPerElementHelper(DT value)273 size_t BytesPerElementHelper(DT value) {
274   SavedSlice ss;
275   std::array<DT, 1> lo_data;
276   std::fill(lo_data.begin(), lo_data.end(), value);
277   TF_EXPECT_OK(
278       TensorSliceWriter::SaveData(lo_data.data(), lo_data.size(), &ss));
279   size_t lo_byte_size = ss.ByteSizeLong();
280 
281   std::array<DT, 1001> hi_data;
282   std::fill(hi_data.begin(), hi_data.end(), value);
283   TF_EXPECT_OK(
284       TensorSliceWriter::SaveData(hi_data.data(), hi_data.size(), &ss));
285   size_t hi_byte_size = ss.ByteSizeLong();
286 
287   return (hi_byte_size - lo_byte_size) / (hi_data.size() - lo_data.size());
288 }
289 
TEST(TensorSliceWriteTest,CheckpointSize)290 TEST(TensorSliceWriteTest, CheckpointSize) {
291   EXPECT_EQ(TensorSliceWriter::MaxBytesPerElement(DT_BOOL),
292             BytesPerElementHelper<bool>(false));
293   EXPECT_EQ(TensorSliceWriter::MaxBytesPerElement(DT_BOOL),
294             BytesPerElementHelper<bool>(true));
295   EXPECT_EQ(TensorSliceWriter::MaxBytesPerElement(DT_FLOAT),
296             BytesPerElementHelper<float>(-1.0));
297   EXPECT_EQ(TensorSliceWriter::MaxBytesPerElement(DT_DOUBLE),
298             BytesPerElementHelper<double>(-1.0));
299   EXPECT_EQ(TensorSliceWriter::MaxBytesPerElement(DT_COMPLEX64),
300             BytesPerElementHelper<complex64>(-1.0));
301   EXPECT_EQ(TensorSliceWriter::MaxBytesPerElement(DT_COMPLEX128),
302             BytesPerElementHelper<complex128>(-1.0));
303   EXPECT_EQ(TensorSliceWriter::MaxBytesPerElement(DT_INT32),
304             BytesPerElementHelper<int32>(-1));
305   EXPECT_EQ(TensorSliceWriter::MaxBytesPerElement(DT_INT64),
306             BytesPerElementHelper<int64>(-1));
307   EXPECT_EQ(TensorSliceWriter::MaxBytesPerElement(DT_UINT16),
308             BytesPerElementHelper<uint16>(std::numeric_limits<uint16>::max()));
309   EXPECT_EQ(TensorSliceWriter::MaxBytesPerElement(DT_UINT8),
310             BytesPerElementHelper<uint8>(std::numeric_limits<uint8>::max()));
311   EXPECT_EQ(TensorSliceWriter::MaxBytesPerElement(DT_INT8),
312             BytesPerElementHelper<int8>(-1));
313   EXPECT_EQ(TensorSliceWriter::MaxBytesPerElement(DT_INT16),
314             BytesPerElementHelper<int16>(-1));
315   EXPECT_EQ(TensorSliceWriter::MaxBytesPerElement(DT_QINT8),
316             BytesPerElementHelper<qint8>(-1));
317   EXPECT_EQ(TensorSliceWriter::MaxBytesPerElement(DT_QUINT8),
318             BytesPerElementHelper<quint8>(std::numeric_limits<uint8>::max()));
319   EXPECT_EQ(TensorSliceWriter::MaxBytesPerElement(DT_QINT32),
320             BytesPerElementHelper<qint32>(-1));
321   EXPECT_EQ(TensorSliceWriter::MaxBytesPerElement(DT_HALF),
322             BytesPerElementHelper<Eigen::half>(Eigen::half(-1.0)));
323 }
324 
TEST(TensorSliceWriteTest,SizeErrors)325 TEST(TensorSliceWriteTest, SizeErrors) {
326   const string filename = io::JoinPath(testing::TmpDir(), "checkpoint");
327 
328   TensorSliceWriter writer(filename, CreateTableTensorSliceBuilder);
329 
330   // Add a 300MB int8 tensor slice, which will fail because it expands to 3GB.
331   {
332     TensorShape shape({300, 1000000});
333     TensorSlice slice = TensorSlice::ParseOrDie("-:-");
334     const std::vector<int8> data(300000000, -1);
335     Status s = writer.Add("test1", shape, slice, data.data());
336     EXPECT_EQ(s.code(), error::INVALID_ARGUMENT);
337     EXPECT_TRUE(absl::StrContains(s.error_message(),
338                                   "Tensor slice is too large to serialize"));
339   }
340 
341   // Add a large string tensor slice, which will fail.
342   {
343     TensorShape shape({256, 1024});
344     TensorSlice slice = TensorSlice::ParseOrDie("-:-");
345     const std::vector<tstring> data(256 * 1024, std::string(8192, 'f'));
346     Status s = writer.Add("test2", shape, slice, data.data());
347     EXPECT_EQ(s.code(), error::INVALID_ARGUMENT);
348     EXPECT_TRUE(absl::StrContains(s.error_message(),
349                                   "Tensor slice is too large to serialize"));
350   }
351 }
352 
353 }  // namespace checkpoint
354 
355 }  // namespace tensorflow
356