1 // Copyright 2015 Google Inc.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 //
15 ////////////////////////////////////////////////////////////////////////////////
16 //
17 // This file implements the image type recognition algorithm. Functions, which
18 // will check each single image type, are implemented based on the comparisons
19 // of magic numbers or signature strings. Other checks (e.g endianness, general
20 // tiff magic number "42", etc.) could also be used in some of those functions
21 // to make the type recognition more stable. Those checks are designed
22 // according to the format spcifications and our own experiments. Notice that
23 // the magic numbers and signature strings may have different binary values
24 // according to different endiannesses.
25 #include "src/image_type_recognition/image_type_recognition_lite.h"
26
27 #include <algorithm>
28 #include <cassert>
29 #include <string>
30 #include <vector>
31
32 #include "src/binary_parse/range_checked_byte_ptr.h"
33
34 namespace piex {
35 namespace image_type_recognition {
36 namespace {
37
38 using std::string;
39 using binary_parse::MemoryStatus;
40 using binary_parse::RangeCheckedBytePtr;
41
42 // Base class for checking image type. For each image type, one should create an
43 // inherited class and do the implementation.
44 class TypeChecker {
45 public:
46 // Comparing function, whihc is used for sorting.
Compare(const TypeChecker * a,const TypeChecker * b)47 static bool Compare(const TypeChecker* a, const TypeChecker* b) {
48 assert(a);
49 assert(b);
50 return a->RequestedSize() < b->RequestedSize();
51 }
52
~TypeChecker()53 virtual ~TypeChecker() {}
54
55 // Returns the type of current checker.
56 virtual RawImageTypes Type() const = 0;
57
58 // Returns the requested data size (in bytes) for current checker. The checker
59 // guarantees that it will not read more than this size.
60 virtual size_t RequestedSize() const = 0;
61
62 // Checks if source data belongs to current checker type.
63 virtual bool IsMyType(const RangeCheckedBytePtr& source) const = 0;
64
65 protected:
66 // Limits the source length to the RequestedSize(), using it guarantees that
67 // we will not read more than this size from the source.
LimitSource(const RangeCheckedBytePtr & source) const68 RangeCheckedBytePtr LimitSource(const RangeCheckedBytePtr& source) const {
69 return source.pointerToSubArray(0 /* pos */, RequestedSize());
70 }
71 };
72
73 // Check if the uint16 value at (source + offset) is equal to the target value.
CheckUInt16Value(const RangeCheckedBytePtr & source,const size_t source_offset,const bool use_big_endian,const unsigned short target_value)74 bool CheckUInt16Value(const RangeCheckedBytePtr& source,
75 const size_t source_offset, const bool use_big_endian,
76 const unsigned short target_value) { // NOLINT
77 MemoryStatus status = binary_parse::RANGE_CHECKED_BYTE_SUCCESS;
78 const unsigned short value = binary_parse::Get16u( // NOLINT
79 source + source_offset, use_big_endian, &status);
80 if (status != binary_parse::RANGE_CHECKED_BYTE_SUCCESS) {
81 return false;
82 }
83 return (target_value == value);
84 }
85
86 // Check if the uint32 value at (source + offset) is equal to the target value.
CheckUInt32Value(const RangeCheckedBytePtr & source,const size_t source_offset,const bool use_big_endian,const unsigned int target_value)87 bool CheckUInt32Value(const RangeCheckedBytePtr& source,
88 const size_t source_offset, const bool use_big_endian,
89 const unsigned int target_value) {
90 MemoryStatus status = binary_parse::RANGE_CHECKED_BYTE_SUCCESS;
91 const unsigned int value =
92 binary_parse::Get32u(source + source_offset, use_big_endian, &status);
93 if (status != binary_parse::RANGE_CHECKED_BYTE_SUCCESS) {
94 return false;
95 }
96 return (target_value == value);
97 }
98
99 // Determine the endianness. The return value is NOT the endianness indicator,
100 // it's just that this function was successful.
DetermineEndianness(const RangeCheckedBytePtr & source,bool * is_big_endian)101 bool DetermineEndianness(const RangeCheckedBytePtr& source,
102 bool* is_big_endian) {
103 if (source.remainingLength() < 2) {
104 return false;
105 }
106
107 if (source[0] == 0x49 && source[1] == 0x49) {
108 *is_big_endian = false;
109 } else if (source[0] == 0x4D && source[1] == 0x4D) {
110 *is_big_endian = true;
111 } else {
112 return false;
113 }
114 return true;
115 }
116
117 // Check if signature string can match to the same length string start from
118 // (source + offset). The signature string will be used as longer magic number
119 // series.
IsSignatureMatched(const RangeCheckedBytePtr & source,const size_t source_offset,const string & signature)120 bool IsSignatureMatched(const RangeCheckedBytePtr& source,
121 const size_t source_offset, const string& signature) {
122 return source.substr(source_offset, signature.size()) == signature;
123 }
124
125 // Check if signature is found in [source + offset, source + offset + range].
IsSignatureFound(const RangeCheckedBytePtr & source,const size_t search_offset,const size_t search_range,const string & signature,size_t * first_matched)126 bool IsSignatureFound(const RangeCheckedBytePtr& source,
127 const size_t search_offset, const size_t search_range,
128 const string& signature, size_t* first_matched) {
129 if (source.remainingLength() < search_offset + search_range) {
130 return false;
131 }
132
133 // The index must be in range [offset, offset + range - sizeof(signature)], so
134 // that it can guarantee that it will not read outside of range.
135 for (size_t i = search_offset;
136 i < search_offset + search_range - signature.size(); ++i) {
137 if (IsSignatureMatched(source, i, signature)) {
138 if (first_matched) {
139 *first_matched = i;
140 }
141 return true;
142 }
143 }
144 return false;
145 }
146
147 // Sony RAW format.
148 class ArwTypeChecker : public TypeChecker {
149 public:
Type() const150 virtual RawImageTypes Type() const { return kArwImage; }
151
RequestedSize() const152 virtual size_t RequestedSize() const { return 10000; }
153
154 // Check multiple points:
155 // 1. valid endianness at the beginning of the file;
156 // 2. correct tiff magic number at the (offset == 8) position of the file;
157 // 3. signature "SONY" in first requested bytes;
158 // 4. correct signature for (section + version) in first requested bytes.
IsMyType(const RangeCheckedBytePtr & source) const159 virtual bool IsMyType(const RangeCheckedBytePtr& source) const {
160 RangeCheckedBytePtr limited_source = LimitSource(source);
161
162 bool use_big_endian;
163 if (!DetermineEndianness(limited_source, &use_big_endian)) {
164 return false;
165 }
166
167 const unsigned short kTiffMagic = 0x2A; // NOLINT
168 const unsigned int kTiffOffset = 8;
169 if (!CheckUInt16Value(limited_source, 2 /* offset */, use_big_endian,
170 kTiffMagic) ||
171 !CheckUInt32Value(limited_source, 4 /* offset */, use_big_endian,
172 kTiffOffset)) {
173 return false;
174 }
175
176 // Search for kSignatureSony in first requested bytes
177 const string kSignatureSony("SONY");
178 if (!IsSignatureFound(limited_source, 0 /* offset */, RequestedSize(),
179 kSignatureSony, NULL)) {
180 return false;
181 }
182
183 // Search for (kSignatureFileTypeSection + kSignatureVersions[i]) in first
184 // requested bytes
185 const string kSignatureSection("\x00\xb0\x01\x00\x04\x00\x00\x00", 8);
186 const int kSignatureVersionsSize = 5;
187 const string kSignatureVersions[kSignatureVersionsSize] = {
188 string("\x02\x00", 2), // ARW 1.0
189 string("\x03\x00", 2), // ARW 2.0
190 string("\x03\x01", 2), // ARW 2.1
191 string("\x03\x02", 2), // ARW 2.2
192 string("\x03\x03", 2), // ARW 2.3
193 };
194 bool matched = false;
195 for (int i = 0; i < kSignatureVersionsSize; ++i) {
196 matched = matched || IsSignatureFound(
197 limited_source, 0 /* offset */, RequestedSize(),
198 kSignatureSection + kSignatureVersions[i], NULL);
199 }
200 return matched;
201 }
202 };
203
204 // Canon RAW (CR2 extension).
205 class Cr2TypeChecker : public TypeChecker {
206 public:
Type() const207 virtual RawImageTypes Type() const { return kCr2Image; }
208
RequestedSize() const209 virtual size_t RequestedSize() const { return 16; }
210
211 // Check multiple points:
212 // 1. valid endianness at the beginning of the file;
213 // 2. magic number "42" at the (offset == 2) position of the file;
214 // 3. signature "CR2" at the (offset == 8) position of the file.
IsMyType(const RangeCheckedBytePtr & source) const215 virtual bool IsMyType(const RangeCheckedBytePtr& source) const {
216 RangeCheckedBytePtr limited_source = LimitSource(source);
217
218 bool use_big_endian;
219 if (!DetermineEndianness(limited_source, &use_big_endian)) {
220 return false;
221 }
222
223 const unsigned short kTag = 42; // NOLINT
224 if (!CheckUInt16Value(limited_source, 2 /* offset */, use_big_endian,
225 kTag)) {
226 return false;
227 }
228
229 const string kSignature("CR\2\0", 4);
230 return IsSignatureMatched(limited_source, 8 /* offset */, kSignature);
231 }
232 };
233
234 // Canon RAW (CRW extension).
235 class CrwTypeChecker : public TypeChecker {
236 public:
Type() const237 virtual RawImageTypes Type() const { return kCrwImage; }
238
RequestedSize() const239 virtual size_t RequestedSize() const { return 14; }
240
241 // Check only the signature at the (offset == 6) position of the file.
IsMyType(const RangeCheckedBytePtr & source) const242 virtual bool IsMyType(const RangeCheckedBytePtr& source) const {
243 RangeCheckedBytePtr limited_source = LimitSource(source);
244
245 bool use_big_endian;
246 if (!DetermineEndianness(limited_source, &use_big_endian)) {
247 return false;
248 }
249
250 string signature;
251 if (use_big_endian) {
252 signature = string("\x00\x10\xba\xb0\xac\xbb\x00\x02", 8);
253 } else {
254 signature = string("HEAPCCDR");
255 }
256 return IsSignatureMatched(limited_source, 6 /* offset */, signature);
257 }
258 };
259
260 // Kodak RAW.
261 class DcrTypeChecker : public TypeChecker {
262 public:
Type() const263 virtual RawImageTypes Type() const { return kDcrImage; }
264
RequestedSize() const265 virtual size_t RequestedSize() const { return 5000; }
266
267 // Check two different cases, only need to fulfill one of the two:
268 // 1. signature at the (offset == 16) position of the file;
269 // 2. two tags (OriginalFileName and FirmwareVersion) can be found in the
270 // first requested bytes of the file.
IsMyType(const RangeCheckedBytePtr & source) const271 virtual bool IsMyType(const RangeCheckedBytePtr& source) const {
272 RangeCheckedBytePtr limited_source = LimitSource(source);
273
274 bool use_big_endian;
275 if (!DetermineEndianness(limited_source, &use_big_endian)) {
276 return false;
277 }
278
279 // Case 1: has signature
280 const string kSignature(
281 "\x4b\x4f\x44\x41\x4b\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20", 16);
282 if (IsSignatureMatched(limited_source, 16 /* offset */, kSignature)) {
283 return true;
284 }
285
286 // Case 2: search for tags in first requested bytes
287 string kIfdTags[2];
288 if (use_big_endian) {
289 kIfdTags[0] = string("\x03\xe9\x00\x02", 4); // OriginalFileName
290 kIfdTags[1] = string("\x0c\xe5\x00\x02", 4); // FirmwareVersion
291 } else {
292 kIfdTags[0] = string("\xe9\x03\x02\x00", 4); // OriginalFileName
293 kIfdTags[1] = string("\xe5\x0c\x02\x00", 4); // FirmwareVersion
294 }
295 return IsSignatureFound(limited_source, 0 /* offset */, RequestedSize(),
296 kIfdTags[0], NULL) &&
297 IsSignatureFound(limited_source, 0 /* offset */, RequestedSize(),
298 kIfdTags[1], NULL);
299 }
300 };
301
302 // Digital Negative RAW.
303 class DngTypeChecker : public TypeChecker {
304 public:
Type() const305 virtual RawImageTypes Type() const { return kDngImage; }
306
RequestedSize() const307 virtual size_t RequestedSize() const { return 1024; }
308
309 // Check multiple points:
310 // 1. valid endianness at the beginning of the file;
311 // 2. at least two dng specific tags in the first requested bytes of the
312 // file
IsMyType(const RangeCheckedBytePtr & source) const313 virtual bool IsMyType(const RangeCheckedBytePtr& source) const {
314 RangeCheckedBytePtr limited_source = LimitSource(source);
315
316 bool use_big_endian;
317 if (!DetermineEndianness(limited_source, &use_big_endian)) {
318 return false;
319 }
320
321 // Search tags in first requested bytes and verify the order of them.
322 const int kTagsCount = 5;
323 string dng_tags[kTagsCount];
324 if (use_big_endian) {
325 dng_tags[0] =
326 string("\xc6\x12\x00\x01\x00\x00\x00\x04", 8); // tag: 50706
327 dng_tags[1] =
328 string("\xc6\x13\x00\x01\x00\x00\x00\x04", 8); // tag: 50707
329 dng_tags[2] = string("\xc6\x14\x00\x02", 4); // tag: 50708
330 dng_tags[3] = string("\xc6\x20", 2); // tag: 50720
331 dng_tags[4] =
332 string("\xc6\x2d\x00\x04\x00\x00\x00\x01", 8); // tag: 50733
333 } else {
334 dng_tags[0] =
335 string("\x12\xc6\x01\x00\x04\x00\x00\x00", 8); // tag: 50706
336 dng_tags[1] =
337 string("\x13\xc6\x01\x00\x04\x00\x00\x00", 8); // tag: 50707
338 dng_tags[2] = string("\x14\xc6\x02\x00", 4); // tag: 50708
339 dng_tags[3] = string("\x20\xc6", 2); // tag: 50720
340 dng_tags[4] =
341 string("\x2d\xc6\x04\x00\x01\x00\x00\x00", 8); // tag: 50733
342 }
343 int tags_found = 0;
344 for (int i = 0; i < kTagsCount; ++i) {
345 if (IsSignatureFound(limited_source, 0 /* offset */, RequestedSize(),
346 dng_tags[i], NULL)) {
347 tags_found++;
348 }
349 }
350 return tags_found >= 2;
351 }
352 };
353
354 // Kodak RAW.
355 class KdcTypeChecker : public TypeChecker {
356 public:
Type() const357 virtual RawImageTypes Type() const { return kKdcImage; }
358
RequestedSize() const359 virtual size_t RequestedSize() const { return 5000; }
360
361 // Check two points:
362 // 1. valid endianness at the beginning of the file;
363 // 2. two tags (WhiteBalance and SerialNumber) in the first requested bytes.
IsMyType(const RangeCheckedBytePtr & source) const364 virtual bool IsMyType(const RangeCheckedBytePtr& source) const {
365 RangeCheckedBytePtr limited_source = LimitSource(source);
366
367 bool use_big_endian;
368 if (!DetermineEndianness(limited_source, &use_big_endian)) {
369 return false;
370 }
371
372 // Search in first requested bytes
373 const size_t kIfdTagsSize = 2;
374 string kIfdTags[kIfdTagsSize];
375 if (use_big_endian) {
376 kIfdTags[0] = string("\xfa\x0d\x00\x01", 4); // WhiteBalance
377 kIfdTags[1] = string("\xfa\x00\x00\x02", 4); // SerialNumber
378 } else {
379 kIfdTags[0] = string("\x0d\xfa\x01\x00", 4); // WhiteBalance
380 kIfdTags[1] = string("\x00\xfa\x02\x00", 4); // SerialNumber
381 }
382
383 return IsSignatureFound(limited_source, 0 /* offset */, RequestedSize(),
384 kIfdTags[0], NULL) &&
385 IsSignatureFound(limited_source, 0 /* offset */, RequestedSize(),
386 kIfdTags[1], NULL);
387 }
388 };
389
390 // Leaf RAW.
391 class MosTypeChecker : public TypeChecker {
392 public:
Type() const393 virtual RawImageTypes Type() const { return kMosImage; }
394
RequestedSize() const395 virtual size_t RequestedSize() const { return 5000; }
396
397 // Check two points:
398 // 1. valid endianness at the beginning of the file;
399 // 2. signature "PKTS " in the first requested bytes. Note the
400 // "whitespace". It's important as they are special binary values.
IsMyType(const RangeCheckedBytePtr & source) const401 virtual bool IsMyType(const RangeCheckedBytePtr& source) const {
402 RangeCheckedBytePtr limited_source = LimitSource(source);
403
404 bool use_big_endian;
405 if (!DetermineEndianness(source, &use_big_endian)) {
406 return false;
407 }
408
409 // Search kSignaturePKTS in first requested bytes
410 const string kSignaturePKTS("PKTS\x00\x00\x00\x001", 8);
411 return IsSignatureFound(limited_source, 0 /* offset */, RequestedSize(),
412 kSignaturePKTS, NULL);
413 }
414 };
415
416 // Minolta RAW.
417 class MrwTypeChecker : public TypeChecker {
418 public:
Type() const419 virtual RawImageTypes Type() const { return kMrwImage; }
420
RequestedSize() const421 virtual size_t RequestedSize() const { return 4; }
422
423 // Check only the signature at the beginning of the file.
IsMyType(const RangeCheckedBytePtr & source) const424 virtual bool IsMyType(const RangeCheckedBytePtr& source) const {
425 // Limits the source length to the RequestedSize(), using it guarantees that
426 // we will not read more than this size from the source.
427 RangeCheckedBytePtr limited_source =
428 source.pointerToSubArray(0 /* pos */, RequestedSize());
429
430 const string kSignature("\0MRM", 4);
431 return IsSignatureMatched(limited_source, 0 /* offset */, kSignature);
432 }
433 };
434
435 // Check if the file contains a NRW signature "NRW " in the first requested
436 // bytes. Note the "whitespace". It's important as they are special binary
437 // values.
438 const size_t kRequestedSizeForNrwSignature = 4000;
ContainsNrwSignature(const RangeCheckedBytePtr & source)439 bool ContainsNrwSignature(const RangeCheckedBytePtr& source) {
440 // Search for kSignatureNrw.
441 const string kSignatureNrw("NRW\x20\x20\x20", 6);
442 return IsSignatureFound(source, 0 /* offset */, kRequestedSizeForNrwSignature,
443 kSignatureNrw, NULL);
444 }
445
446 // Checks if the file contains the signatures for Nikon formats:
447 // * the general Nikon singature "NIKON" string.
448 // * the ReferenceBlackWhite tag.
449 const size_t kRequestedSizeForNikonSignatures = 4000;
ContainsNikonSignatures(const RangeCheckedBytePtr & source,const bool use_big_endian)450 bool ContainsNikonSignatures(const RangeCheckedBytePtr& source,
451 const bool use_big_endian) {
452 const string kSignatureNikon("NIKON");
453 const string kReferenceBlackWhiteTag = use_big_endian
454 ? string("\x02\x14\x00\x05", 4)
455 : string("\x14\x02\x05\x00", 4);
456 const std::vector<string> kSignatures = {kSignatureNikon,
457 kReferenceBlackWhiteTag};
458 for (auto const& signature : kSignatures) {
459 if (!IsSignatureFound(source, 0, kRequestedSizeForNikonSignatures,
460 signature, NULL)) {
461 return false;
462 }
463 }
464 return true;
465 }
466
467 // Nikon RAW (NEF extension).
468 class NefTypeChecker : public TypeChecker {
469 public:
Type() const470 virtual RawImageTypes Type() const { return kNefImage; }
471
RequestedSize() const472 virtual size_t RequestedSize() const {
473 return std::max(kRequestedSizeForNikonSignatures,
474 kRequestedSizeForNrwSignature);
475 }
476
477 // Check multiple points:
478 // 1. valid endianness at the beginning of the file;
479 // 2. magic number at the (offset == 2) position of the file;
480 // 3. the signature "NIKON" in the requested bytes of the file;
481 // 4. the ReferenceBlackWhite tag in the requested bytes of the file;
482 // 5. does not contain the NRW signature. We may also check a special
483 // signature "RAW " similar to the NRW case, but we got issues in some
484 // special images that the signature locates in the middle of the file, and it
485 // costs too long time to check;
IsMyType(const RangeCheckedBytePtr & source) const486 virtual bool IsMyType(const RangeCheckedBytePtr& source) const {
487 RangeCheckedBytePtr limited_source = LimitSource(source);
488
489 bool use_big_endian;
490 if (!DetermineEndianness(limited_source, &use_big_endian)) {
491 return false;
492 }
493
494 const unsigned short kTiffMagic = 0x2A; // NOLINT
495 if (!CheckUInt16Value(limited_source, 2 /* offset */, use_big_endian,
496 kTiffMagic)) {
497 return false;
498 }
499
500 return ContainsNikonSignatures(limited_source, use_big_endian) &&
501 !ContainsNrwSignature(limited_source); // not NRW
502 }
503 };
504
505 // Nikon RAW (NRW extension).
506 class NrwTypeChecker : public TypeChecker {
507 public:
Type() const508 virtual RawImageTypes Type() const { return kNrwImage; }
509
RequestedSize() const510 virtual size_t RequestedSize() const {
511 return std::max(kRequestedSizeForNikonSignatures,
512 kRequestedSizeForNrwSignature);
513 }
514
515 // Check multiple points:
516 // 1. valid endianness at the beginning of the file;
517 // 2. magic numbers at the (offset == 2 and offset == 4) positions of the
518 // file;
519 // 3. the signature "NIKON" in the first requested bytes of the file;
520 // 4. the ReferenceBlackWhite tag in the requested bytes of the file;
521 // 5. contains the NRW signature;
IsMyType(const RangeCheckedBytePtr & source) const522 virtual bool IsMyType(const RangeCheckedBytePtr& source) const {
523 RangeCheckedBytePtr limited_source = LimitSource(source);
524
525 bool use_big_endian;
526 if (!DetermineEndianness(limited_source, &use_big_endian)) {
527 return false;
528 }
529
530 const unsigned short kTiffMagic = 0x2A; // NOLINT
531 const unsigned int kTiffOffset = 8;
532 if (!CheckUInt16Value(limited_source, 2 /* offset */, use_big_endian,
533 kTiffMagic) ||
534 !CheckUInt32Value(limited_source, 4 /* offset */, use_big_endian,
535 kTiffOffset)) {
536 return false;
537 }
538
539 return ContainsNikonSignatures(limited_source, use_big_endian) &&
540 ContainsNrwSignature(limited_source);
541 }
542 };
543
544 // Olympus RAW.
545 class OrfTypeChecker : public TypeChecker {
546 public:
Type() const547 virtual RawImageTypes Type() const { return kOrfImage; }
548
RequestedSize() const549 virtual size_t RequestedSize() const { return 3000; }
550
551 // Check multiple points:
552 // 1. valid endianness at the beginning of the file;
553 // 2. tag at the (offset == 2) position of the file;
554 // 3. signature "OLYMP" in the first requested bytes.
IsMyType(const RangeCheckedBytePtr & source) const555 virtual bool IsMyType(const RangeCheckedBytePtr& source) const {
556 RangeCheckedBytePtr limited_source = LimitSource(source);
557
558 bool use_big_endian;
559 if (!DetermineEndianness(limited_source, &use_big_endian)) {
560 return false;
561 }
562
563 const size_t kTagSize = 2;
564 const unsigned short kTag[kTagSize] = {0x4F52, 0x5352}; // NOLINT
565 if (!(CheckUInt16Value(limited_source, 2 /* offset */, use_big_endian,
566 kTag[0]) ||
567 CheckUInt16Value(limited_source, 2 /* offset */, use_big_endian,
568 kTag[1]))) {
569 return false;
570 }
571
572 // Search for kSignatureOlymp in first requested bytes
573 const string kSignatureOlymp("OLYMP");
574 return IsSignatureFound(limited_source, 0 /* offset */, RequestedSize(),
575 kSignatureOlymp, NULL);
576 }
577 };
578
579 // Pentax RAW.
580 class PefTypeChecker : public TypeChecker {
581 public:
Type() const582 virtual RawImageTypes Type() const { return kPefImage; }
583
RequestedSize() const584 virtual size_t RequestedSize() const { return 1280; }
585
586 // Check multiple points:
587 // 1. valid big endianness at the beginning of the file;
588 // 2. magic numbers at the (offset == 2 and offset==4) positions of the file;
589 // 3. signature "AOC " or "PENTAX " in first requested bytes.
IsMyType(const RangeCheckedBytePtr & source) const590 virtual bool IsMyType(const RangeCheckedBytePtr& source) const {
591 RangeCheckedBytePtr limited_source = LimitSource(source);
592
593 bool use_big_endian;
594 if (!DetermineEndianness(limited_source, &use_big_endian)) {
595 return false;
596 }
597
598 const unsigned short kTiffMagic = 0x2A; // NOLINT
599 const unsigned int kTiffOffset = 8;
600 if (!CheckUInt16Value(limited_source, 2 /* offset */, use_big_endian,
601 kTiffMagic) ||
602 !CheckUInt32Value(limited_source, 4 /* offset */, use_big_endian,
603 kTiffOffset)) {
604 return false;
605 }
606
607 // Search for kSignatureAOC or kSignaturePENTAX in first requested bytes
608 const string kSignatureAOC("\x41\x4f\x43\x00\x4d\x4d", 6);
609 const string kSignaturePENTAX("\x50\x45\x4e\x54\x41\x58\x20\x00", 8);
610 return IsSignatureFound(limited_source, 0 /* offset */, RequestedSize(),
611 kSignatureAOC, NULL) ||
612 IsSignatureFound(limited_source, 0 /* offset */, RequestedSize(),
613 kSignaturePENTAX, NULL);
614 }
615 };
616
617 // Apple format.
618 class QtkTypeChecker : public TypeChecker {
619 public:
Type() const620 virtual RawImageTypes Type() const { return kQtkImage; }
621
RequestedSize() const622 virtual size_t RequestedSize() const { return 8; }
623
624 // Check only the signature at the beginning of the file.
IsMyType(const RangeCheckedBytePtr & source) const625 virtual bool IsMyType(const RangeCheckedBytePtr& source) const {
626 RangeCheckedBytePtr limited_source = LimitSource(source);
627
628 const size_t kSignatureSize = 2;
629 const string kSignature[kSignatureSize] = {
630 string("qktk\x00\x00\x00\x08", 8), string("qktn\x00\x00\x00\x08", 8),
631 };
632 return IsSignatureMatched(limited_source, 0 /* offset */, kSignature[0]) ||
633 IsSignatureMatched(limited_source, 0 /* offset */, kSignature[1]);
634 }
635 };
636
637 // Fuji RAW.
638 class RafTypeChecker : public TypeChecker {
639 public:
Type() const640 virtual RawImageTypes Type() const { return kRafImage; }
641
RequestedSize() const642 virtual size_t RequestedSize() const { return 8; }
643
644 // Check only the signature at the beginning of the file.
IsMyType(const RangeCheckedBytePtr & source) const645 virtual bool IsMyType(const RangeCheckedBytePtr& source) const {
646 RangeCheckedBytePtr limited_source = LimitSource(source);
647
648 const string kSignature("FUJIFILM");
649 return IsSignatureMatched(limited_source, 0 /* offset */, kSignature);
650 }
651 };
652
653 // Contax N RAW.
654 class RawContaxNTypeChecker : public TypeChecker {
655 public:
Type() const656 virtual RawImageTypes Type() const { return kRawContaxNImage; }
657
RequestedSize() const658 virtual size_t RequestedSize() const { return 36; }
659
660 // Check only the signature at the (offset == 25) position of the
661 // file.
IsMyType(const RangeCheckedBytePtr & source) const662 virtual bool IsMyType(const RangeCheckedBytePtr& source) const {
663 RangeCheckedBytePtr limited_source = LimitSource(source);
664
665 const string kSignature("ARECOYK");
666 return IsSignatureMatched(limited_source, 25, kSignature);
667 }
668 };
669
670 // Panasonic RAW.
671 class Rw2TypeChecker : public TypeChecker {
672 public:
Type() const673 virtual RawImageTypes Type() const { return kRw2Image; }
674
RequestedSize() const675 virtual size_t RequestedSize() const { return 4; }
676
677 // Check two points: 1. valid endianness at the beginning of the
678 // file; 2. tag at the (offset == 2) position of the file.
IsMyType(const RangeCheckedBytePtr & source) const679 virtual bool IsMyType(const RangeCheckedBytePtr& source) const {
680 RangeCheckedBytePtr limited_source = LimitSource(source);
681
682 bool use_big_endian;
683 if (!DetermineEndianness(source, &use_big_endian)) {
684 return false;
685 }
686
687 const unsigned short kTag = 0x55; // NOLINT
688 return CheckUInt16Value(limited_source, 2 /* offset */, use_big_endian,
689 kTag);
690 }
691 };
692
693 // Samsung RAW.
694 class SrwTypeChecker : public TypeChecker {
695 public:
Type() const696 virtual RawImageTypes Type() const { return kSrwImage; }
697
RequestedSize() const698 virtual size_t RequestedSize() const { return 256; }
699
700 // Check multiple points:
701 // 1. valid big endianness at the beginning of the file;
702 // 2. magic numbers at the (offset == 2 and offset==4) positions of the file;
703 // 3. the signature "SAMSUNG" in the requested bytes of the file;
IsMyType(const RangeCheckedBytePtr & source) const704 virtual bool IsMyType(const RangeCheckedBytePtr& source) const {
705 RangeCheckedBytePtr limited_source = LimitSource(source);
706
707 bool use_big_endian;
708 if (!DetermineEndianness(source, &use_big_endian)) {
709 return false;
710 }
711
712 const unsigned short kTiffMagic = 0x2A; // NOLINT
713 const unsigned int kTiffOffset = 8;
714 if (!CheckUInt16Value(limited_source, 2 /* offset */, use_big_endian,
715 kTiffMagic) ||
716 !CheckUInt32Value(limited_source, 4 /* offset */, use_big_endian,
717 kTiffOffset)) {
718 return false;
719 }
720
721 const string kSignature("SAMSUNG");
722 if (!IsSignatureFound(source, 0, RequestedSize(), kSignature, NULL)) {
723 return false;
724 }
725 return true;
726 }
727 };
728
729 // Sigma / Polaroid RAW.
730 class X3fTypeChecker : public TypeChecker {
731 public:
Type() const732 virtual RawImageTypes Type() const { return kX3fImage; }
733
RequestedSize() const734 virtual size_t RequestedSize() const { return 4; }
735
736 // Check only the signature at the beginning of the file.
IsMyType(const RangeCheckedBytePtr & source) const737 virtual bool IsMyType(const RangeCheckedBytePtr& source) const {
738 RangeCheckedBytePtr limited_source = LimitSource(source);
739
740 const string kSignature("FOVb", 4);
741 return IsSignatureMatched(limited_source, 0 /* offset */, kSignature);
742 }
743 };
744
745 // This class contains the list of all type checkers. One should used this list
746 // as a whole to execute the image type recognition.
747 class TypeCheckerList {
748 public:
TypeCheckerList()749 TypeCheckerList() {
750 // Add all supported RAW type checkers here.
751 checkers_.push_back(new ArwTypeChecker());
752 checkers_.push_back(new Cr2TypeChecker());
753 checkers_.push_back(new CrwTypeChecker());
754 checkers_.push_back(new DcrTypeChecker());
755 checkers_.push_back(new DngTypeChecker());
756 checkers_.push_back(new KdcTypeChecker());
757 checkers_.push_back(new MosTypeChecker());
758 checkers_.push_back(new MrwTypeChecker());
759 checkers_.push_back(new NefTypeChecker());
760 checkers_.push_back(new NrwTypeChecker());
761 checkers_.push_back(new OrfTypeChecker());
762 checkers_.push_back(new PefTypeChecker());
763 checkers_.push_back(new QtkTypeChecker());
764 checkers_.push_back(new RafTypeChecker());
765 checkers_.push_back(new RawContaxNTypeChecker());
766 checkers_.push_back(new Rw2TypeChecker());
767 checkers_.push_back(new SrwTypeChecker());
768 checkers_.push_back(new X3fTypeChecker());
769
770 // Sort the checkers by the ascending RequestedSize() to get better
771 // performance when checking type.
772 std::sort(checkers_.begin(), checkers_.end(), TypeChecker::Compare);
773 }
774
~TypeCheckerList()775 ~TypeCheckerList() {
776 for (size_t i = 0; i < checkers_.size(); ++i) {
777 delete checkers_[i];
778 checkers_[i] = NULL;
779 }
780 }
781
782 // Returns the type of source data. If it can not be identified, returns
783 // kNonRawImage.
GetType(const RangeCheckedBytePtr & source) const784 RawImageTypes GetType(const RangeCheckedBytePtr& source) const {
785 for (size_t i = 0; i < checkers_.size(); ++i) {
786 if (checkers_[i]->IsMyType(source)) {
787 return checkers_[i]->Type();
788 }
789 }
790 return kNonRawImage;
791 }
792
793 // Returns the maximum size of requested size of data for identifying image
794 // type using this class. The class guarantees that it will not read more than
795 // this size.
RequestedSize() const796 size_t RequestedSize() const {
797 assert(!checkers_.empty());
798 // The checkers_ is ascending sorted. The last element is the maximum.
799 return checkers_.back()->RequestedSize();
800 }
801
IsOfType(const RangeCheckedBytePtr & source,const RawImageTypes type)802 bool IsOfType(const RangeCheckedBytePtr& source, const RawImageTypes type) {
803 const TypeChecker* type_checker = GetTypeCheckerForType(type);
804 if (type_checker) {
805 return type_checker->IsMyType(source);
806 } else {
807 return false;
808 }
809 }
810
RequestedSizeForType(const RawImageTypes type)811 size_t RequestedSizeForType(const RawImageTypes type) {
812 const TypeChecker* type_checker = GetTypeCheckerForType(type);
813 if (type_checker) {
814 return type_checker->RequestedSize();
815 } else {
816 return 0;
817 }
818 }
819
820 private:
GetTypeCheckerForType(const RawImageTypes type)821 const TypeChecker* GetTypeCheckerForType(const RawImageTypes type) {
822 for (const auto* type_checker : checkers_) {
823 if (type_checker->Type() == type) {
824 return type_checker;
825 }
826 }
827 return nullptr;
828 }
829
830 std::vector<TypeChecker*> checkers_;
831 };
832
833 } // namespace
834
IsRaw(const RawImageTypes type)835 bool IsRaw(const RawImageTypes type) {
836 switch (type) {
837 // Non-RAW-image type
838 case kNonRawImage: {
839 return false;
840 }
841
842 // Raw image types
843 case kArwImage:
844 case kCr2Image:
845 case kCrwImage:
846 case kDcrImage:
847 case kDngImage:
848 case kKdcImage:
849 case kMosImage:
850 case kMrwImage:
851 case kNefImage:
852 case kNrwImage:
853 case kOrfImage:
854 case kPefImage:
855 case kQtkImage:
856 case kRafImage:
857 case kRawContaxNImage:
858 case kRw2Image:
859 case kSrwImage:
860 case kX3fImage: {
861 return true;
862 }
863
864 default: {
865 // Unsupported type!
866 assert(false);
867 }
868 }
869 return false;
870 }
871
IsOfType(const RangeCheckedBytePtr & source,const RawImageTypes type)872 bool IsOfType(const RangeCheckedBytePtr& source, const RawImageTypes type) {
873 return TypeCheckerList().IsOfType(source, type);
874 }
875
RecognizeRawImageTypeLite(const RangeCheckedBytePtr & source)876 RawImageTypes RecognizeRawImageTypeLite(const RangeCheckedBytePtr& source) {
877 return TypeCheckerList().GetType(source);
878 }
879
GetNumberOfBytesForIsRawLite()880 size_t GetNumberOfBytesForIsRawLite() {
881 return TypeCheckerList().RequestedSize();
882 }
883
GetNumberOfBytesForIsOfType(const RawImageTypes type)884 size_t GetNumberOfBytesForIsOfType(const RawImageTypes type) {
885 return TypeCheckerList().RequestedSizeForType(type);
886 }
887
IsRawLite(const RangeCheckedBytePtr & source)888 bool IsRawLite(const RangeCheckedBytePtr& source) {
889 return IsRaw(RecognizeRawImageTypeLite(source));
890 }
891
892 } // namespace image_type_recognition
893 } // namespace piex
894