• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2013, The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *     http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef LATINIME_BYTE_ARRAY_UTILS_H
18 #define LATINIME_BYTE_ARRAY_UTILS_H
19 
20 #include <cstdint>
21 
22 #include "defines.h"
23 
24 namespace latinime {
25 
26 /**
27  * Utility methods for reading byte arrays.
28  */
29 class ByteArrayUtils {
30  public:
31     /**
32      * Integer writing
33      *
34      * Each method write a corresponding size integer in a big endian manner.
35      */
writeUintAndAdvancePosition(uint8_t * const buffer,const uint32_t data,const int size,int * const pos)36     static AK_FORCE_INLINE void writeUintAndAdvancePosition(uint8_t *const buffer,
37             const uint32_t data, const int size, int *const pos) {
38         // size must be in 1 to 4.
39         ASSERT(size >= 1 && size <= 4);
40         switch (size) {
41             case 1:
42                 ByteArrayUtils::writeUint8AndAdvancePosition(buffer, data, pos);
43                 return;
44             case 2:
45                 ByteArrayUtils::writeUint16AndAdvancePosition(buffer, data, pos);
46                 return;
47             case 3:
48                 ByteArrayUtils::writeUint24AndAdvancePosition(buffer, data, pos);
49                 return;
50             case 4:
51                 ByteArrayUtils::writeUint32AndAdvancePosition(buffer, data, pos);
52                 return;
53             default:
54                 break;
55         }
56     }
57 
58     /**
59      * Integer reading
60      *
61      * Each method read a corresponding size integer in a big endian manner.
62      */
readUint32(const uint8_t * const buffer,const int pos)63     static AK_FORCE_INLINE uint32_t readUint32(const uint8_t *const buffer, const int pos) {
64         return (buffer[pos] << 24) ^ (buffer[pos + 1] << 16)
65                 ^ (buffer[pos + 2] << 8) ^ buffer[pos + 3];
66     }
67 
readUint24(const uint8_t * const buffer,const int pos)68     static AK_FORCE_INLINE uint32_t readUint24(const uint8_t *const buffer, const int pos) {
69         return (buffer[pos] << 16) ^ (buffer[pos + 1] << 8) ^ buffer[pos + 2];
70     }
71 
readUint16(const uint8_t * const buffer,const int pos)72     static AK_FORCE_INLINE uint16_t readUint16(const uint8_t *const buffer, const int pos) {
73         return (buffer[pos] << 8) ^ buffer[pos + 1];
74     }
75 
readUint8(const uint8_t * const buffer,const int pos)76     static AK_FORCE_INLINE uint8_t readUint8(const uint8_t *const buffer, const int pos) {
77         return buffer[pos];
78     }
79 
readUint32AndAdvancePosition(const uint8_t * const buffer,int * const pos)80     static AK_FORCE_INLINE uint32_t readUint32AndAdvancePosition(
81             const uint8_t *const buffer, int *const pos) {
82         const uint32_t value = readUint32(buffer, *pos);
83         *pos += 4;
84         return value;
85     }
86 
readSint24AndAdvancePosition(const uint8_t * const buffer,int * const pos)87     static AK_FORCE_INLINE int readSint24AndAdvancePosition(
88             const uint8_t *const buffer, int *const pos) {
89         const uint8_t value = readUint8(buffer, *pos);
90         if (value < 0x80) {
91             return readUint24AndAdvancePosition(buffer, pos);
92         } else {
93             (*pos)++;
94             return -(((value & 0x7F) << 16) ^ readUint16AndAdvancePosition(buffer, pos));
95         }
96     }
97 
readUint24AndAdvancePosition(const uint8_t * const buffer,int * const pos)98     static AK_FORCE_INLINE uint32_t readUint24AndAdvancePosition(
99             const uint8_t *const buffer, int *const pos) {
100         const uint32_t value = readUint24(buffer, *pos);
101         *pos += 3;
102         return value;
103     }
104 
readUint16AndAdvancePosition(const uint8_t * const buffer,int * const pos)105     static AK_FORCE_INLINE uint16_t readUint16AndAdvancePosition(
106             const uint8_t *const buffer, int *const pos) {
107         const uint16_t value = readUint16(buffer, *pos);
108         *pos += 2;
109         return value;
110     }
111 
readUint8AndAdvancePosition(const uint8_t * const buffer,int * const pos)112     static AK_FORCE_INLINE uint8_t readUint8AndAdvancePosition(
113             const uint8_t *const buffer, int *const pos) {
114         return buffer[(*pos)++];
115     }
116 
readUint(const uint8_t * const buffer,const int size,const int pos)117     static AK_FORCE_INLINE int readUint(const uint8_t *const buffer,
118             const int size, const int pos) {
119         // size must be in 1 to 4.
120         ASSERT(size >= 1 && size <= 4);
121         switch (size) {
122             case 1:
123                 return ByteArrayUtils::readUint8(buffer, pos);
124             case 2:
125                 return ByteArrayUtils::readUint16(buffer, pos);
126             case 3:
127                 return ByteArrayUtils::readUint24(buffer, pos);
128             case 4:
129                 return ByteArrayUtils::readUint32(buffer, pos);
130             default:
131                 return 0;
132         }
133     }
134 
135     /**
136      * Code Point Reading
137      *
138      * 1 byte = bbbbbbbb match
139      * case 000xxxxx: xxxxx << 16 + next byte << 8 + next byte
140      * else: if 00011111 (= 0x1F) : this is the terminator. This is a relevant choice because
141      *       unicode code points range from 0 to 0x10FFFF, so any 3-byte value starting with
142      *       00011111 would be outside unicode.
143      * else: iso-latin-1 code
144      * This allows for the whole unicode range to be encoded, including chars outside of
145      * the BMP. Also everything in the iso-latin-1 charset is only 1 byte, except control
146      * characters which should never happen anyway (and still work, but take 3 bytes).
147      */
readCodePoint(const uint8_t * const buffer,const int pos)148     static AK_FORCE_INLINE int readCodePoint(const uint8_t *const buffer, const int pos) {
149         int p = pos;
150         return readCodePointAndAdvancePosition(buffer, &p);
151     }
152 
readCodePointAndAdvancePosition(const uint8_t * const buffer,int * const pos)153     static AK_FORCE_INLINE int readCodePointAndAdvancePosition(
154             const uint8_t *const buffer, int *const pos) {
155         const uint8_t firstByte = readUint8(buffer, *pos);
156         if (firstByte < MINIMUM_ONE_BYTE_CHARACTER_VALUE) {
157             if (firstByte == CHARACTER_ARRAY_TERMINATOR) {
158                 *pos += 1;
159                 return NOT_A_CODE_POINT;
160             } else {
161                 return readUint24AndAdvancePosition(buffer, pos);
162             }
163         } else {
164             *pos += 1;
165             return firstByte;
166         }
167     }
168 
169     /**
170      * String (array of code points) Reading
171      *
172      * Reads code points until the terminator is found.
173      */
174     // Returns the length of the string.
readStringAndAdvancePosition(const uint8_t * const buffer,const int maxLength,int * const outBuffer,int * const pos)175     static int readStringAndAdvancePosition(const uint8_t *const buffer,
176             const int maxLength, int *const outBuffer, int *const pos) {
177         int length = 0;
178         int codePoint = readCodePointAndAdvancePosition(buffer, pos);
179         while (NOT_A_CODE_POINT != codePoint && length < maxLength) {
180             outBuffer[length++] = codePoint;
181             codePoint = readCodePointAndAdvancePosition(buffer, pos);
182         }
183         return length;
184     }
185 
186     // Advances the position and returns the length of the string.
advancePositionToBehindString(const uint8_t * const buffer,const int maxLength,int * const pos)187     static int advancePositionToBehindString(
188             const uint8_t *const buffer, const int maxLength, int *const pos) {
189         int length = 0;
190         int codePoint = readCodePointAndAdvancePosition(buffer, pos);
191         while (NOT_A_CODE_POINT != codePoint && length < maxLength) {
192             codePoint = readCodePointAndAdvancePosition(buffer, pos);
193             length++;
194         }
195         return length;
196     }
197 
198     /**
199      * String (array of code points) Writing
200      */
writeCodePointsAndAdvancePosition(uint8_t * const buffer,const int * const codePoints,const int codePointCount,const bool writesTerminator,int * const pos)201     static void writeCodePointsAndAdvancePosition(uint8_t *const buffer,
202             const int *const codePoints, const int codePointCount, const bool writesTerminator,
203             int *const pos) {
204         for (int i = 0; i < codePointCount; ++i) {
205             const int codePoint = codePoints[i];
206             if (codePoint == NOT_A_CODE_POINT || codePoint == CHARACTER_ARRAY_TERMINATOR) {
207                 break;
208             } else if (codePoint < MINIMUM_ONE_BYTE_CHARACTER_VALUE
209                     || codePoint > MAXIMUM_ONE_BYTE_CHARACTER_VALUE) {
210                 // three bytes character.
211                 writeUint24AndAdvancePosition(buffer, codePoint, pos);
212             } else {
213                 // one byte character.
214                 writeUint8AndAdvancePosition(buffer, codePoint, pos);
215             }
216         }
217         if (writesTerminator) {
218             writeUint8AndAdvancePosition(buffer, CHARACTER_ARRAY_TERMINATOR, pos);
219         }
220     }
221 
calculateRequiredByteCountToStoreCodePoints(const int * const codePoints,const int codePointCount,const bool writesTerminator)222     static int calculateRequiredByteCountToStoreCodePoints(const int *const codePoints,
223             const int codePointCount, const bool writesTerminator) {
224         int byteCount = 0;
225         for (int i = 0; i < codePointCount; ++i) {
226             const int codePoint = codePoints[i];
227             if (codePoint == NOT_A_CODE_POINT || codePoint == CHARACTER_ARRAY_TERMINATOR) {
228                 break;
229             } else if (codePoint < MINIMUM_ONE_BYTE_CHARACTER_VALUE
230                     || codePoint > MAXIMUM_ONE_BYTE_CHARACTER_VALUE) {
231                 // three bytes character.
232                 byteCount += 3;
233             } else {
234                 // one byte character.
235                 byteCount += 1;
236             }
237         }
238         if (writesTerminator) {
239             // The terminator is one byte.
240             byteCount += 1;
241         }
242         return byteCount;
243     }
244 
245  private:
246     DISALLOW_IMPLICIT_CONSTRUCTORS(ByteArrayUtils);
247 
248     static const uint8_t MINIMUM_ONE_BYTE_CHARACTER_VALUE;
249     static const uint8_t MAXIMUM_ONE_BYTE_CHARACTER_VALUE;
250     static const uint8_t CHARACTER_ARRAY_TERMINATOR;
251 
writeUint32AndAdvancePosition(uint8_t * const buffer,const uint32_t data,int * const pos)252     static AK_FORCE_INLINE void writeUint32AndAdvancePosition(uint8_t *const buffer,
253             const uint32_t data, int *const pos) {
254         buffer[(*pos)++] = (data >> 24) & 0xFF;
255         buffer[(*pos)++] = (data >> 16) & 0xFF;
256         buffer[(*pos)++] = (data >> 8) & 0xFF;
257         buffer[(*pos)++] = data & 0xFF;
258     }
259 
writeUint24AndAdvancePosition(uint8_t * const buffer,const uint32_t data,int * const pos)260     static AK_FORCE_INLINE void writeUint24AndAdvancePosition(uint8_t *const buffer,
261             const uint32_t data, int *const pos) {
262         buffer[(*pos)++] = (data >> 16) & 0xFF;
263         buffer[(*pos)++] = (data >> 8) & 0xFF;
264         buffer[(*pos)++] = data & 0xFF;
265     }
266 
writeUint16AndAdvancePosition(uint8_t * const buffer,const uint16_t data,int * const pos)267     static AK_FORCE_INLINE void writeUint16AndAdvancePosition(uint8_t *const buffer,
268             const uint16_t data, int *const pos) {
269         buffer[(*pos)++] = (data >> 8) & 0xFF;
270         buffer[(*pos)++] = data & 0xFF;
271     }
272 
writeUint8AndAdvancePosition(uint8_t * const buffer,const uint8_t data,int * const pos)273     static AK_FORCE_INLINE void writeUint8AndAdvancePosition(uint8_t *const buffer,
274             const uint8_t data, int *const pos) {
275         buffer[(*pos)++] = data & 0xFF;
276     }
277 };
278 } // namespace latinime
279 #endif /* LATINIME_BYTE_ARRAY_UTILS_H */
280