1 //===-- DataExtractor.h -----------------------------------------*- C++ -*-===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9
10 #ifndef LLVM_SUPPORT_DATAEXTRACTOR_H
11 #define LLVM_SUPPORT_DATAEXTRACTOR_H
12
13 #include "llvm/ADT/StringRef.h"
14 #include "llvm/Support/DataTypes.h"
15
16 namespace llvm {
17
18 /// An auxiliary type to facilitate extraction of 3-byte entities.
19 struct Uint24 {
20 uint8_t Bytes[3];
Uint24Uint2421 Uint24(uint8_t U) {
22 Bytes[0] = Bytes[1] = Bytes[2] = U;
23 }
Uint24Uint2424 Uint24(uint8_t U0, uint8_t U1, uint8_t U2) {
25 Bytes[0] = U0; Bytes[1] = U1; Bytes[2] = U2;
26 }
getAsUint32Uint2427 uint32_t getAsUint32(bool IsLittleEndian) const {
28 int LoIx = IsLittleEndian ? 0 : 2;
29 return Bytes[LoIx] + (Bytes[1] << 8) + (Bytes[2-LoIx] << 16);
30 }
31 };
32
33 using uint24_t = Uint24;
34 static_assert(sizeof(uint24_t) == 3, "sizeof(uint24_t) != 3");
35
36 /// Needed by swapByteOrder().
getSwappedBytes(uint24_t C)37 inline uint24_t getSwappedBytes(uint24_t C) {
38 return uint24_t(C.Bytes[2], C.Bytes[1], C.Bytes[0]);
39 }
40
41 class DataExtractor {
42 StringRef Data;
43 uint8_t IsLittleEndian;
44 uint8_t AddressSize;
45 public:
46 /// Construct with a buffer that is owned by the caller.
47 ///
48 /// This constructor allows us to use data that is owned by the
49 /// caller. The data must stay around as long as this object is
50 /// valid.
DataExtractor(StringRef Data,bool IsLittleEndian,uint8_t AddressSize)51 DataExtractor(StringRef Data, bool IsLittleEndian, uint8_t AddressSize)
52 : Data(Data), IsLittleEndian(IsLittleEndian), AddressSize(AddressSize) {}
53
54 /// Get the data pointed to by this extractor.
getData()55 StringRef getData() const { return Data; }
56 /// Get the endianness for this extractor.
isLittleEndian()57 bool isLittleEndian() const { return IsLittleEndian; }
58 /// Get the address size for this extractor.
getAddressSize()59 uint8_t getAddressSize() const { return AddressSize; }
60 /// Set the address size for this extractor.
setAddressSize(uint8_t Size)61 void setAddressSize(uint8_t Size) { AddressSize = Size; }
62
63 /// Extract a C string from \a *offset_ptr.
64 ///
65 /// Returns a pointer to a C String from the data at the offset
66 /// pointed to by \a offset_ptr. A variable length NULL terminated C
67 /// string will be extracted and the \a offset_ptr will be
68 /// updated with the offset of the byte that follows the NULL
69 /// terminator byte.
70 ///
71 /// @param[in,out] offset_ptr
72 /// A pointer to an offset within the data that will be advanced
73 /// by the appropriate number of bytes if the value is extracted
74 /// correctly. If the offset is out of bounds or there are not
75 /// enough bytes to extract this value, the offset will be left
76 /// unmodified.
77 ///
78 /// @return
79 /// A pointer to the C string value in the data. If the offset
80 /// pointed to by \a offset_ptr is out of bounds, or if the
81 /// offset plus the length of the C string is out of bounds,
82 /// NULL will be returned.
83 const char *getCStr(uint32_t *offset_ptr) const;
84
85 /// Extract a C string from \a *OffsetPtr.
86 ///
87 /// Returns a StringRef for the C String from the data at the offset
88 /// pointed to by \a OffsetPtr. A variable length NULL terminated C
89 /// string will be extracted and the \a OffsetPtr will be
90 /// updated with the offset of the byte that follows the NULL
91 /// terminator byte.
92 ///
93 /// \param[in,out] OffsetPtr
94 /// A pointer to an offset within the data that will be advanced
95 /// by the appropriate number of bytes if the value is extracted
96 /// correctly. If the offset is out of bounds or there are not
97 /// enough bytes to extract this value, the offset will be left
98 /// unmodified.
99 ///
100 /// \return
101 /// A StringRef for the C string value in the data. If the offset
102 /// pointed to by \a OffsetPtr is out of bounds, or if the
103 /// offset plus the length of the C string is out of bounds,
104 /// a default-initialized StringRef will be returned.
105 StringRef getCStrRef(uint32_t *OffsetPtr) const;
106
107 /// Extract an unsigned integer of size \a byte_size from \a
108 /// *offset_ptr.
109 ///
110 /// Extract a single unsigned integer value and update the offset
111 /// pointed to by \a offset_ptr. The size of the extracted integer
112 /// is specified by the \a byte_size argument. \a byte_size should
113 /// have a value greater than or equal to one and less than or equal
114 /// to eight since the return value is 64 bits wide. Any
115 /// \a byte_size values less than 1 or greater than 8 will result in
116 /// nothing being extracted, and zero being returned.
117 ///
118 /// @param[in,out] offset_ptr
119 /// A pointer to an offset within the data that will be advanced
120 /// by the appropriate number of bytes if the value is extracted
121 /// correctly. If the offset is out of bounds or there are not
122 /// enough bytes to extract this value, the offset will be left
123 /// unmodified.
124 ///
125 /// @param[in] byte_size
126 /// The size in byte of the integer to extract.
127 ///
128 /// @return
129 /// The unsigned integer value that was extracted, or zero on
130 /// failure.
131 uint64_t getUnsigned(uint32_t *offset_ptr, uint32_t byte_size) const;
132
133 /// Extract an signed integer of size \a byte_size from \a *offset_ptr.
134 ///
135 /// Extract a single signed integer value (sign extending if required)
136 /// and update the offset pointed to by \a offset_ptr. The size of
137 /// the extracted integer is specified by the \a byte_size argument.
138 /// \a byte_size should have a value greater than or equal to one
139 /// and less than or equal to eight since the return value is 64
140 /// bits wide. Any \a byte_size values less than 1 or greater than
141 /// 8 will result in nothing being extracted, and zero being returned.
142 ///
143 /// @param[in,out] offset_ptr
144 /// A pointer to an offset within the data that will be advanced
145 /// by the appropriate number of bytes if the value is extracted
146 /// correctly. If the offset is out of bounds or there are not
147 /// enough bytes to extract this value, the offset will be left
148 /// unmodified.
149 ///
150 /// @param[in] size
151 /// The size in bytes of the integer to extract.
152 ///
153 /// @return
154 /// The sign extended signed integer value that was extracted,
155 /// or zero on failure.
156 int64_t getSigned(uint32_t *offset_ptr, uint32_t size) const;
157
158 //------------------------------------------------------------------
159 /// Extract an pointer from \a *offset_ptr.
160 ///
161 /// Extract a single pointer from the data and update the offset
162 /// pointed to by \a offset_ptr. The size of the extracted pointer
163 /// is \a getAddressSize(), so the address size has to be
164 /// set correctly prior to extracting any pointer values.
165 ///
166 /// @param[in,out] offset_ptr
167 /// A pointer to an offset within the data that will be advanced
168 /// by the appropriate number of bytes if the value is extracted
169 /// correctly. If the offset is out of bounds or there are not
170 /// enough bytes to extract this value, the offset will be left
171 /// unmodified.
172 ///
173 /// @return
174 /// The extracted pointer value as a 64 integer.
getAddress(uint32_t * offset_ptr)175 uint64_t getAddress(uint32_t *offset_ptr) const {
176 return getUnsigned(offset_ptr, AddressSize);
177 }
178
179 /// Extract a uint8_t value from \a *offset_ptr.
180 ///
181 /// Extract a single uint8_t from the binary data at the offset
182 /// pointed to by \a offset_ptr, and advance the offset on success.
183 ///
184 /// @param[in,out] offset_ptr
185 /// A pointer to an offset within the data that will be advanced
186 /// by the appropriate number of bytes if the value is extracted
187 /// correctly. If the offset is out of bounds or there are not
188 /// enough bytes to extract this value, the offset will be left
189 /// unmodified.
190 ///
191 /// @return
192 /// The extracted uint8_t value.
193 uint8_t getU8(uint32_t *offset_ptr) const;
194
195 /// Extract \a count uint8_t values from \a *offset_ptr.
196 ///
197 /// Extract \a count uint8_t values from the binary data at the
198 /// offset pointed to by \a offset_ptr, and advance the offset on
199 /// success. The extracted values are copied into \a dst.
200 ///
201 /// @param[in,out] offset_ptr
202 /// A pointer to an offset within the data that will be advanced
203 /// by the appropriate number of bytes if the value is extracted
204 /// correctly. If the offset is out of bounds or there are not
205 /// enough bytes to extract this value, the offset will be left
206 /// unmodified.
207 ///
208 /// @param[out] dst
209 /// A buffer to copy \a count uint8_t values into. \a dst must
210 /// be large enough to hold all requested data.
211 ///
212 /// @param[in] count
213 /// The number of uint8_t values to extract.
214 ///
215 /// @return
216 /// \a dst if all values were properly extracted and copied,
217 /// NULL otherise.
218 uint8_t *getU8(uint32_t *offset_ptr, uint8_t *dst, uint32_t count) const;
219
220 //------------------------------------------------------------------
221 /// Extract a uint16_t value from \a *offset_ptr.
222 ///
223 /// Extract a single uint16_t from the binary data at the offset
224 /// pointed to by \a offset_ptr, and update the offset on success.
225 ///
226 /// @param[in,out] offset_ptr
227 /// A pointer to an offset within the data that will be advanced
228 /// by the appropriate number of bytes if the value is extracted
229 /// correctly. If the offset is out of bounds or there are not
230 /// enough bytes to extract this value, the offset will be left
231 /// unmodified.
232 ///
233 /// @return
234 /// The extracted uint16_t value.
235 //------------------------------------------------------------------
236 uint16_t getU16(uint32_t *offset_ptr) const;
237
238 /// Extract \a count uint16_t values from \a *offset_ptr.
239 ///
240 /// Extract \a count uint16_t values from the binary data at the
241 /// offset pointed to by \a offset_ptr, and advance the offset on
242 /// success. The extracted values are copied into \a dst.
243 ///
244 /// @param[in,out] offset_ptr
245 /// A pointer to an offset within the data that will be advanced
246 /// by the appropriate number of bytes if the value is extracted
247 /// correctly. If the offset is out of bounds or there are not
248 /// enough bytes to extract this value, the offset will be left
249 /// unmodified.
250 ///
251 /// @param[out] dst
252 /// A buffer to copy \a count uint16_t values into. \a dst must
253 /// be large enough to hold all requested data.
254 ///
255 /// @param[in] count
256 /// The number of uint16_t values to extract.
257 ///
258 /// @return
259 /// \a dst if all values were properly extracted and copied,
260 /// NULL otherise.
261 uint16_t *getU16(uint32_t *offset_ptr, uint16_t *dst, uint32_t count) const;
262
263 /// Extract a 24-bit unsigned value from \a *offset_ptr and return it
264 /// in a uint32_t.
265 ///
266 /// Extract 3 bytes from the binary data at the offset pointed to by
267 /// \a offset_ptr, construct a uint32_t from them and update the offset
268 /// on success.
269 ///
270 /// @param[in,out] offset_ptr
271 /// A pointer to an offset within the data that will be advanced
272 /// by the 3 bytes if the value is extracted correctly. If the offset
273 /// is out of bounds or there are not enough bytes to extract this value,
274 /// the offset will be left unmodified.
275 ///
276 /// @return
277 /// The extracted 24-bit value represented in a uint32_t.
278 uint32_t getU24(uint32_t *offset_ptr) const;
279
280 /// Extract a uint32_t value from \a *offset_ptr.
281 ///
282 /// Extract a single uint32_t from the binary data at the offset
283 /// pointed to by \a offset_ptr, and update the offset on success.
284 ///
285 /// @param[in,out] offset_ptr
286 /// A pointer to an offset within the data that will be advanced
287 /// by the appropriate number of bytes if the value is extracted
288 /// correctly. If the offset is out of bounds or there are not
289 /// enough bytes to extract this value, the offset will be left
290 /// unmodified.
291 ///
292 /// @return
293 /// The extracted uint32_t value.
294 uint32_t getU32(uint32_t *offset_ptr) const;
295
296 /// Extract \a count uint32_t values from \a *offset_ptr.
297 ///
298 /// Extract \a count uint32_t values from the binary data at the
299 /// offset pointed to by \a offset_ptr, and advance the offset on
300 /// success. The extracted values are copied into \a dst.
301 ///
302 /// @param[in,out] offset_ptr
303 /// A pointer to an offset within the data that will be advanced
304 /// by the appropriate number of bytes if the value is extracted
305 /// correctly. If the offset is out of bounds or there are not
306 /// enough bytes to extract this value, the offset will be left
307 /// unmodified.
308 ///
309 /// @param[out] dst
310 /// A buffer to copy \a count uint32_t values into. \a dst must
311 /// be large enough to hold all requested data.
312 ///
313 /// @param[in] count
314 /// The number of uint32_t values to extract.
315 ///
316 /// @return
317 /// \a dst if all values were properly extracted and copied,
318 /// NULL otherise.
319 uint32_t *getU32(uint32_t *offset_ptr, uint32_t *dst, uint32_t count) const;
320
321 /// Extract a uint64_t value from \a *offset_ptr.
322 ///
323 /// Extract a single uint64_t from the binary data at the offset
324 /// pointed to by \a offset_ptr, and update the offset on success.
325 ///
326 /// @param[in,out] offset_ptr
327 /// A pointer to an offset within the data that will be advanced
328 /// by the appropriate number of bytes if the value is extracted
329 /// correctly. If the offset is out of bounds or there are not
330 /// enough bytes to extract this value, the offset will be left
331 /// unmodified.
332 ///
333 /// @return
334 /// The extracted uint64_t value.
335 uint64_t getU64(uint32_t *offset_ptr) const;
336
337 /// Extract \a count uint64_t values from \a *offset_ptr.
338 ///
339 /// Extract \a count uint64_t values from the binary data at the
340 /// offset pointed to by \a offset_ptr, and advance the offset on
341 /// success. The extracted values are copied into \a dst.
342 ///
343 /// @param[in,out] offset_ptr
344 /// A pointer to an offset within the data that will be advanced
345 /// by the appropriate number of bytes if the value is extracted
346 /// correctly. If the offset is out of bounds or there are not
347 /// enough bytes to extract this value, the offset will be left
348 /// unmodified.
349 ///
350 /// @param[out] dst
351 /// A buffer to copy \a count uint64_t values into. \a dst must
352 /// be large enough to hold all requested data.
353 ///
354 /// @param[in] count
355 /// The number of uint64_t values to extract.
356 ///
357 /// @return
358 /// \a dst if all values were properly extracted and copied,
359 /// NULL otherise.
360 uint64_t *getU64(uint32_t *offset_ptr, uint64_t *dst, uint32_t count) const;
361
362 /// Extract a signed LEB128 value from \a *offset_ptr.
363 ///
364 /// Extracts an signed LEB128 number from this object's data
365 /// starting at the offset pointed to by \a offset_ptr. The offset
366 /// pointed to by \a offset_ptr will be updated with the offset of
367 /// the byte following the last extracted byte.
368 ///
369 /// @param[in,out] offset_ptr
370 /// A pointer to an offset within the data that will be advanced
371 /// by the appropriate number of bytes if the value is extracted
372 /// correctly. If the offset is out of bounds or there are not
373 /// enough bytes to extract this value, the offset will be left
374 /// unmodified.
375 ///
376 /// @return
377 /// The extracted signed integer value.
378 int64_t getSLEB128(uint32_t *offset_ptr) const;
379
380 /// Extract a unsigned LEB128 value from \a *offset_ptr.
381 ///
382 /// Extracts an unsigned LEB128 number from this object's data
383 /// starting at the offset pointed to by \a offset_ptr. The offset
384 /// pointed to by \a offset_ptr will be updated with the offset of
385 /// the byte following the last extracted byte.
386 ///
387 /// @param[in,out] offset_ptr
388 /// A pointer to an offset within the data that will be advanced
389 /// by the appropriate number of bytes if the value is extracted
390 /// correctly. If the offset is out of bounds or there are not
391 /// enough bytes to extract this value, the offset will be left
392 /// unmodified.
393 ///
394 /// @return
395 /// The extracted unsigned integer value.
396 uint64_t getULEB128(uint32_t *offset_ptr) const;
397
398 /// Test the validity of \a offset.
399 ///
400 /// @return
401 /// \b true if \a offset is a valid offset into the data in this
402 /// object, \b false otherwise.
isValidOffset(uint32_t offset)403 bool isValidOffset(uint32_t offset) const { return Data.size() > offset; }
404
405 /// Test the availability of \a length bytes of data from \a offset.
406 ///
407 /// @return
408 /// \b true if \a offset is a valid offset and there are \a
409 /// length bytes available at that offset, \b false otherwise.
isValidOffsetForDataOfSize(uint32_t offset,uint32_t length)410 bool isValidOffsetForDataOfSize(uint32_t offset, uint32_t length) const {
411 return offset + length >= offset && isValidOffset(offset + length - 1);
412 }
413
414 /// Test the availability of enough bytes of data for a pointer from
415 /// \a offset. The size of a pointer is \a getAddressSize().
416 ///
417 /// @return
418 /// \b true if \a offset is a valid offset and there are enough
419 /// bytes for a pointer available at that offset, \b false
420 /// otherwise.
isValidOffsetForAddress(uint32_t offset)421 bool isValidOffsetForAddress(uint32_t offset) const {
422 return isValidOffsetForDataOfSize(offset, AddressSize);
423 }
424 };
425
426 } // namespace llvm
427
428 #endif
429