/* * Copyright (C) 2017 The Android Open Source Project * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef LIBTEXTCLASSIFIER_COMMON_LITTLE_ENDIAN_DATA_H_ #define LIBTEXTCLASSIFIER_COMMON_LITTLE_ENDIAN_DATA_H_ #include #include #include #include "base.h" #include "util/base/logging.h" namespace libtextclassifier { namespace nlp_core { // Swaps the sizeof(T) bytes that start at addr. E.g., if sizeof(T) == 2, // then (addr[0], addr[1]) -> (addr[1], addr[0]). Useful for little endian // <-> big endian conversions. template void SwapBytes(T *addr) { char *char_ptr = reinterpret_cast(addr); std::reverse(char_ptr, char_ptr + sizeof(T)); } // Assuming addr points to a piece of data of type T, with its bytes in the // little/big endian order specific to the machine this code runs on, this // method will re-arrange the bytes (in place) in little-endian order. template void HostToLittleEndian(T *addr) { if (LittleEndian::IsLittleEndian()) { // Do nothing: current machine is little-endian. } else { SwapBytes(addr); } } // Reverse of HostToLittleEndian. template void LittleEndianToHost(T *addr) { // It turns out it's the same function: on little-endian machines, do nothing // (source and target formats are identical). Otherwise, swap bytes. HostToLittleEndian(addr); } // Returns string obtained by concatenating the bytes of the elements from a // vector (in order: v[0], v[1], etc). If the type T requires more than one // byte, the byte for each element are first converted to little-endian format. template std::string GetDataBytesInLittleEndianOrder(const std::vector &v) { std::string data_bytes; for (const T element : v) { T little_endian_element = element; HostToLittleEndian(&little_endian_element); data_bytes.append( reinterpret_cast(&little_endian_element), sizeof(T)); } return data_bytes; } // Performs reverse of GetDataBytesInLittleEndianOrder. // // I.e., decodes the data bytes from parameter bytes into num_elements Ts, and // places them in the vector v (previous content of that vector is erased). // // We expect bytes to contain the concatenation of the bytes for exactly // num_elements elements of type T. If the type T requires more than one byte, // those bytes should be arranged in little-endian form. // // Returns true on success and false otherwise (e.g., bytes has the wrong size). // Note: we do not want to crash on corrupted data (some clients, e..g, GMSCore, // have asked us not to do so). Instead, we report the error and let the client // decide what to do. On error, we also fill the vector with zeros, such that // at least the dimension of v matches expectations. template bool FillVectorFromDataBytesInLittleEndian( const std::string &bytes, int num_elements, std::vector *v) { if (bytes.size() != num_elements * sizeof(T)) { TC_LOG(ERROR) << "Wrong number of bytes: actual " << bytes.size() << " vs expected " << num_elements << " elements of sizeof(element) = " << sizeof(T) << " bytes each ; will fill vector with zeros"; v->assign(num_elements, static_cast(0)); return false; } v->clear(); v->reserve(num_elements); const T *start = reinterpret_cast(bytes.data()); if (LittleEndian::IsLittleEndian() || (sizeof(T) == 1)) { // Fast in the common case ([almost] all hardware today is little-endian): // if same endianness (or type T requires a single byte and endianness // irrelevant), just use the bytes. v->assign(start, start + num_elements); } else { // Slower (but very rare case): this code runs on a big endian machine and // the type T requires more than one byte. Hence, some conversion is // necessary. for (int i = 0; i < num_elements; ++i) { T temp = start[i]; SwapBytes(&temp); v->push_back(temp); } } return true; } } // namespace nlp_core } // namespace libtextclassifier #endif // LIBTEXTCLASSIFIER_COMMON_LITTLE_ENDIAN_DATA_H_