1 /*
2 * Copyright (C) 2017 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #ifndef LIBTEXTCLASSIFIER_COMMON_LITTLE_ENDIAN_DATA_H_
18 #define LIBTEXTCLASSIFIER_COMMON_LITTLE_ENDIAN_DATA_H_
19
20 #include <algorithm>
21 #include <string>
22 #include <vector>
23
24 #include "base.h"
25 #include "util/base/logging.h"
26
27 namespace libtextclassifier {
28 namespace nlp_core {
29
30 // Swaps the sizeof(T) bytes that start at addr. E.g., if sizeof(T) == 2,
31 // then (addr[0], addr[1]) -> (addr[1], addr[0]). Useful for little endian
32 // <-> big endian conversions.
33 template <class T>
SwapBytes(T * addr)34 void SwapBytes(T *addr) {
35 char *char_ptr = reinterpret_cast<char *>(addr);
36 std::reverse(char_ptr, char_ptr + sizeof(T));
37 }
38
39 // Assuming addr points to a piece of data of type T, with its bytes in the
40 // little/big endian order specific to the machine this code runs on, this
41 // method will re-arrange the bytes (in place) in little-endian order.
42 template <class T>
HostToLittleEndian(T * addr)43 void HostToLittleEndian(T *addr) {
44 if (LittleEndian::IsLittleEndian()) {
45 // Do nothing: current machine is little-endian.
46 } else {
47 SwapBytes(addr);
48 }
49 }
50
51 // Reverse of HostToLittleEndian.
52 template <class T>
LittleEndianToHost(T * addr)53 void LittleEndianToHost(T *addr) {
54 // It turns out it's the same function: on little-endian machines, do nothing
55 // (source and target formats are identical). Otherwise, swap bytes.
56 HostToLittleEndian(addr);
57 }
58
59 // Returns string obtained by concatenating the bytes of the elements from a
60 // vector (in order: v[0], v[1], etc). If the type T requires more than one
61 // byte, the byte for each element are first converted to little-endian format.
62 template<typename T>
GetDataBytesInLittleEndianOrder(const std::vector<T> & v)63 std::string GetDataBytesInLittleEndianOrder(const std::vector<T> &v) {
64 std::string data_bytes;
65 for (const T element : v) {
66 T little_endian_element = element;
67 HostToLittleEndian(&little_endian_element);
68 data_bytes.append(
69 reinterpret_cast<const char *>(&little_endian_element),
70 sizeof(T));
71 }
72 return data_bytes;
73 }
74
75 // Performs reverse of GetDataBytesInLittleEndianOrder.
76 //
77 // I.e., decodes the data bytes from parameter bytes into num_elements Ts, and
78 // places them in the vector v (previous content of that vector is erased).
79 //
80 // We expect bytes to contain the concatenation of the bytes for exactly
81 // num_elements elements of type T. If the type T requires more than one byte,
82 // those bytes should be arranged in little-endian form.
83 //
84 // Returns true on success and false otherwise (e.g., bytes has the wrong size).
85 // Note: we do not want to crash on corrupted data (some clients, e..g, GMSCore,
86 // have asked us not to do so). Instead, we report the error and let the client
87 // decide what to do. On error, we also fill the vector with zeros, such that
88 // at least the dimension of v matches expectations.
89 template<typename T>
FillVectorFromDataBytesInLittleEndian(const std::string & bytes,int num_elements,std::vector<T> * v)90 bool FillVectorFromDataBytesInLittleEndian(
91 const std::string &bytes, int num_elements, std::vector<T> *v) {
92 if (bytes.size() != num_elements * sizeof(T)) {
93 TC_LOG(ERROR) << "Wrong number of bytes: actual " << bytes.size()
94 << " vs expected " << num_elements
95 << " elements of sizeof(element) = " << sizeof(T)
96 << " bytes each ; will fill vector with zeros";
97 v->assign(num_elements, static_cast<T>(0));
98 return false;
99 }
100 v->clear();
101 v->reserve(num_elements);
102 const T *start = reinterpret_cast<const T *>(bytes.data());
103 if (LittleEndian::IsLittleEndian() || (sizeof(T) == 1)) {
104 // Fast in the common case ([almost] all hardware today is little-endian):
105 // if same endianness (or type T requires a single byte and endianness
106 // irrelevant), just use the bytes.
107 v->assign(start, start + num_elements);
108 } else {
109 // Slower (but very rare case): this code runs on a big endian machine and
110 // the type T requires more than one byte. Hence, some conversion is
111 // necessary.
112 for (int i = 0; i < num_elements; ++i) {
113 T temp = start[i];
114 SwapBytes(&temp);
115 v->push_back(temp);
116 }
117 }
118 return true;
119 }
120
121 } // namespace nlp_core
122 } // namespace libtextclassifier
123
124 #endif // LIBTEXTCLASSIFIER_COMMON_LITTLE_ENDIAN_DATA_H_
125