1 /* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
2
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6
7 http://www.apache.org/licenses/LICENSE-2.0
8
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15
16 #include <limits>
17
18 #include "tensorflow/core/lib/io/format.h"
19
20 #include "tensorflow/core/lib/core/coding.h"
21 #include "tensorflow/core/lib/core/errors.h"
22 #include "tensorflow/core/lib/hash/crc32c.h"
23 #include "tensorflow/core/lib/io/block.h"
24 #include "tensorflow/core/platform/env.h"
25 #include "tensorflow/core/platform/snappy.h"
26
27 namespace tensorflow {
28 namespace table {
29
EncodeTo(string * dst) const30 void BlockHandle::EncodeTo(string* dst) const {
31 // Sanity check that all fields have been set
32 assert(offset_ != ~static_cast<uint64>(0));
33 assert(size_ != ~static_cast<uint64>(0));
34 core::PutVarint64(dst, offset_);
35 core::PutVarint64(dst, size_);
36 }
37
DecodeFrom(StringPiece * input)38 Status BlockHandle::DecodeFrom(StringPiece* input) {
39 if (core::GetVarint64(input, &offset_) && core::GetVarint64(input, &size_)) {
40 return Status::OK();
41 } else {
42 return errors::DataLoss("bad block handle");
43 }
44 }
45
EncodeTo(string * dst) const46 void Footer::EncodeTo(string* dst) const {
47 #ifndef NDEBUG
48 const size_t original_size = dst->size();
49 #endif
50 metaindex_handle_.EncodeTo(dst);
51 index_handle_.EncodeTo(dst);
52 dst->resize(2 * BlockHandle::kMaxEncodedLength); // Padding
53 core::PutFixed32(dst, static_cast<uint32>(kTableMagicNumber & 0xffffffffu));
54 core::PutFixed32(dst, static_cast<uint32>(kTableMagicNumber >> 32));
55 assert(dst->size() == original_size + kEncodedLength);
56 }
57
DecodeFrom(StringPiece * input)58 Status Footer::DecodeFrom(StringPiece* input) {
59 const char* magic_ptr = input->data() + kEncodedLength - 8;
60 const uint32 magic_lo = core::DecodeFixed32(magic_ptr);
61 const uint32 magic_hi = core::DecodeFixed32(magic_ptr + 4);
62 const uint64 magic =
63 ((static_cast<uint64>(magic_hi) << 32) | (static_cast<uint64>(magic_lo)));
64 if (magic != kTableMagicNumber) {
65 return errors::DataLoss("not an sstable (bad magic number)");
66 }
67
68 Status result = metaindex_handle_.DecodeFrom(input);
69 if (result.ok()) {
70 result = index_handle_.DecodeFrom(input);
71 }
72 if (result.ok()) {
73 // We skip over any leftover data (just padding for now) in "input"
74 const char* end = magic_ptr + 8;
75 *input = StringPiece(end, input->data() + input->size() - end);
76 }
77 return result;
78 }
79
ReadBlock(RandomAccessFile * file,const BlockHandle & handle,BlockContents * result)80 Status ReadBlock(RandomAccessFile* file, const BlockHandle& handle,
81 BlockContents* result) {
82 result->data = StringPiece();
83 result->cachable = false;
84 result->heap_allocated = false;
85
86 // Read the block contents as well as the type/crc footer.
87 // See table_builder.cc for the code that built this structure.
88 size_t n = static_cast<size_t>(handle.size());
89
90 if (kBlockTrailerSize > std::numeric_limits<size_t>::max() - n) {
91 return errors::DataLoss("handle.size() too big");
92 }
93
94 char* buf = new char[n + kBlockTrailerSize];
95 StringPiece contents;
96 Status s = file->Read(handle.offset(), n + kBlockTrailerSize, &contents, buf);
97 if (!s.ok()) {
98 delete[] buf;
99 return s;
100 }
101 if (contents.size() != n + kBlockTrailerSize) {
102 delete[] buf;
103 return errors::DataLoss("truncated block read");
104 }
105
106 // Check the crc of the type and the block contents
107 const char* data = contents.data(); // Pointer to where Read put the data
108 // This checksum verification is optional. We leave it on for now
109 const bool verify_checksum = true;
110 if (verify_checksum) {
111 const uint32 crc = crc32c::Unmask(core::DecodeFixed32(data + n + 1));
112 const uint32 actual = crc32c::Value(data, n + 1);
113 if (actual != crc) {
114 delete[] buf;
115 s = errors::DataLoss("block checksum mismatch");
116 return s;
117 }
118 }
119
120 switch (data[n]) {
121 case kNoCompression:
122 if (data != buf) {
123 // File implementation gave us pointer to some other data.
124 // Use it directly under the assumption that it will be live
125 // while the file is open.
126 delete[] buf;
127 result->data = StringPiece(data, n);
128 result->heap_allocated = false;
129 result->cachable = false; // Do not double-cache
130 } else {
131 result->data = StringPiece(buf, n);
132 result->heap_allocated = true;
133 result->cachable = true;
134 }
135
136 // Ok
137 break;
138 case kSnappyCompression: {
139 size_t ulength = 0;
140 if (!port::Snappy_GetUncompressedLength(data, n, &ulength)) {
141 delete[] buf;
142 return errors::DataLoss("corrupted compressed block contents");
143 }
144 char* ubuf = new char[ulength];
145 if (!port::Snappy_Uncompress(data, n, ubuf)) {
146 delete[] buf;
147 delete[] ubuf;
148 return errors::DataLoss("corrupted compressed block contents");
149 }
150 delete[] buf;
151 result->data = StringPiece(ubuf, ulength);
152 result->heap_allocated = true;
153 result->cachable = true;
154 break;
155 }
156 default:
157 delete[] buf;
158 return errors::DataLoss("bad block type");
159 }
160
161 return Status::OK();
162 }
163
164 } // namespace table
165 } // namespace tensorflow
166