• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 //===-- DataExtractor.h -----------------------------------------*- C++ -*-===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 
10 #ifndef LLVM_SUPPORT_DATAEXTRACTOR_H
11 #define LLVM_SUPPORT_DATAEXTRACTOR_H
12 
13 #include "llvm/ADT/StringRef.h"
14 #include "llvm/Support/DataTypes.h"
15 
16 namespace llvm {
17 
18 /// An auxiliary type to facilitate extraction of 3-byte entities.
19 struct Uint24 {
20   uint8_t Bytes[3];
Uint24Uint2421   Uint24(uint8_t U) {
22     Bytes[0] = Bytes[1] = Bytes[2] = U;
23   }
Uint24Uint2424   Uint24(uint8_t U0, uint8_t U1, uint8_t U2) {
25     Bytes[0] = U0; Bytes[1] = U1; Bytes[2] = U2;
26   }
getAsUint32Uint2427   uint32_t getAsUint32(bool IsLittleEndian) const {
28     int LoIx = IsLittleEndian ? 0 : 2;
29     return Bytes[LoIx] + (Bytes[1] << 8) + (Bytes[2-LoIx] << 16);
30   }
31 };
32 
33 using uint24_t = Uint24;
34 static_assert(sizeof(uint24_t) == 3, "sizeof(uint24_t) != 3");
35 
36 /// Needed by swapByteOrder().
getSwappedBytes(uint24_t C)37 inline uint24_t getSwappedBytes(uint24_t C) {
38   return uint24_t(C.Bytes[2], C.Bytes[1], C.Bytes[0]);
39 }
40 
41 class DataExtractor {
42   StringRef Data;
43   uint8_t IsLittleEndian;
44   uint8_t AddressSize;
45 public:
46   /// Construct with a buffer that is owned by the caller.
47   ///
48   /// This constructor allows us to use data that is owned by the
49   /// caller. The data must stay around as long as this object is
50   /// valid.
DataExtractor(StringRef Data,bool IsLittleEndian,uint8_t AddressSize)51   DataExtractor(StringRef Data, bool IsLittleEndian, uint8_t AddressSize)
52     : Data(Data), IsLittleEndian(IsLittleEndian), AddressSize(AddressSize) {}
53 
54   /// Get the data pointed to by this extractor.
getData()55   StringRef getData() const { return Data; }
56   /// Get the endianness for this extractor.
isLittleEndian()57   bool isLittleEndian() const { return IsLittleEndian; }
58   /// Get the address size for this extractor.
getAddressSize()59   uint8_t getAddressSize() const { return AddressSize; }
60   /// Set the address size for this extractor.
setAddressSize(uint8_t Size)61   void setAddressSize(uint8_t Size) { AddressSize = Size; }
62 
63   /// Extract a C string from \a *offset_ptr.
64   ///
65   /// Returns a pointer to a C String from the data at the offset
66   /// pointed to by \a offset_ptr. A variable length NULL terminated C
67   /// string will be extracted and the \a offset_ptr will be
68   /// updated with the offset of the byte that follows the NULL
69   /// terminator byte.
70   ///
71   /// @param[in,out] offset_ptr
72   ///     A pointer to an offset within the data that will be advanced
73   ///     by the appropriate number of bytes if the value is extracted
74   ///     correctly. If the offset is out of bounds or there are not
75   ///     enough bytes to extract this value, the offset will be left
76   ///     unmodified.
77   ///
78   /// @return
79   ///     A pointer to the C string value in the data. If the offset
80   ///     pointed to by \a offset_ptr is out of bounds, or if the
81   ///     offset plus the length of the C string is out of bounds,
82   ///     NULL will be returned.
83   const char *getCStr(uint32_t *offset_ptr) const;
84 
85   /// Extract a C string from \a *OffsetPtr.
86   ///
87   /// Returns a StringRef for the C String from the data at the offset
88   /// pointed to by \a OffsetPtr. A variable length NULL terminated C
89   /// string will be extracted and the \a OffsetPtr will be
90   /// updated with the offset of the byte that follows the NULL
91   /// terminator byte.
92   ///
93   /// \param[in,out] OffsetPtr
94   ///     A pointer to an offset within the data that will be advanced
95   ///     by the appropriate number of bytes if the value is extracted
96   ///     correctly. If the offset is out of bounds or there are not
97   ///     enough bytes to extract this value, the offset will be left
98   ///     unmodified.
99   ///
100   /// \return
101   ///     A StringRef for the C string value in the data. If the offset
102   ///     pointed to by \a OffsetPtr is out of bounds, or if the
103   ///     offset plus the length of the C string is out of bounds,
104   ///     a default-initialized StringRef will be returned.
105   StringRef getCStrRef(uint32_t *OffsetPtr) const;
106 
107   /// Extract an unsigned integer of size \a byte_size from \a
108   /// *offset_ptr.
109   ///
110   /// Extract a single unsigned integer value and update the offset
111   /// pointed to by \a offset_ptr. The size of the extracted integer
112   /// is specified by the \a byte_size argument. \a byte_size should
113   /// have a value greater than or equal to one and less than or equal
114   /// to eight since the return value is 64 bits wide. Any
115   /// \a byte_size values less than 1 or greater than 8 will result in
116   /// nothing being extracted, and zero being returned.
117   ///
118   /// @param[in,out] offset_ptr
119   ///     A pointer to an offset within the data that will be advanced
120   ///     by the appropriate number of bytes if the value is extracted
121   ///     correctly. If the offset is out of bounds or there are not
122   ///     enough bytes to extract this value, the offset will be left
123   ///     unmodified.
124   ///
125   /// @param[in] byte_size
126   ///     The size in byte of the integer to extract.
127   ///
128   /// @return
129   ///     The unsigned integer value that was extracted, or zero on
130   ///     failure.
131   uint64_t getUnsigned(uint32_t *offset_ptr, uint32_t byte_size) const;
132 
133   /// Extract an signed integer of size \a byte_size from \a *offset_ptr.
134   ///
135   /// Extract a single signed integer value (sign extending if required)
136   /// and update the offset pointed to by \a offset_ptr. The size of
137   /// the extracted integer is specified by the \a byte_size argument.
138   /// \a byte_size should have a value greater than or equal to one
139   /// and less than or equal to eight since the return value is 64
140   /// bits wide. Any \a byte_size values less than 1 or greater than
141   /// 8 will result in nothing being extracted, and zero being returned.
142   ///
143   /// @param[in,out] offset_ptr
144   ///     A pointer to an offset within the data that will be advanced
145   ///     by the appropriate number of bytes if the value is extracted
146   ///     correctly. If the offset is out of bounds or there are not
147   ///     enough bytes to extract this value, the offset will be left
148   ///     unmodified.
149   ///
150   /// @param[in] size
151   ///     The size in bytes of the integer to extract.
152   ///
153   /// @return
154   ///     The sign extended signed integer value that was extracted,
155   ///     or zero on failure.
156   int64_t getSigned(uint32_t *offset_ptr, uint32_t size) const;
157 
158   //------------------------------------------------------------------
159   /// Extract an pointer from \a *offset_ptr.
160   ///
161   /// Extract a single pointer from the data and update the offset
162   /// pointed to by \a offset_ptr. The size of the extracted pointer
163   /// is \a getAddressSize(), so the address size has to be
164   /// set correctly prior to extracting any pointer values.
165   ///
166   /// @param[in,out] offset_ptr
167   ///     A pointer to an offset within the data that will be advanced
168   ///     by the appropriate number of bytes if the value is extracted
169   ///     correctly. If the offset is out of bounds or there are not
170   ///     enough bytes to extract this value, the offset will be left
171   ///     unmodified.
172   ///
173   /// @return
174   ///     The extracted pointer value as a 64 integer.
getAddress(uint32_t * offset_ptr)175   uint64_t getAddress(uint32_t *offset_ptr) const {
176     return getUnsigned(offset_ptr, AddressSize);
177   }
178 
179   /// Extract a uint8_t value from \a *offset_ptr.
180   ///
181   /// Extract a single uint8_t from the binary data at the offset
182   /// pointed to by \a offset_ptr, and advance the offset on success.
183   ///
184   /// @param[in,out] offset_ptr
185   ///     A pointer to an offset within the data that will be advanced
186   ///     by the appropriate number of bytes if the value is extracted
187   ///     correctly. If the offset is out of bounds or there are not
188   ///     enough bytes to extract this value, the offset will be left
189   ///     unmodified.
190   ///
191   /// @return
192   ///     The extracted uint8_t value.
193   uint8_t getU8(uint32_t *offset_ptr) const;
194 
195   /// Extract \a count uint8_t values from \a *offset_ptr.
196   ///
197   /// Extract \a count uint8_t values from the binary data at the
198   /// offset pointed to by \a offset_ptr, and advance the offset on
199   /// success. The extracted values are copied into \a dst.
200   ///
201   /// @param[in,out] offset_ptr
202   ///     A pointer to an offset within the data that will be advanced
203   ///     by the appropriate number of bytes if the value is extracted
204   ///     correctly. If the offset is out of bounds or there are not
205   ///     enough bytes to extract this value, the offset will be left
206   ///     unmodified.
207   ///
208   /// @param[out] dst
209   ///     A buffer to copy \a count uint8_t values into. \a dst must
210   ///     be large enough to hold all requested data.
211   ///
212   /// @param[in] count
213   ///     The number of uint8_t values to extract.
214   ///
215   /// @return
216   ///     \a dst if all values were properly extracted and copied,
217   ///     NULL otherise.
218   uint8_t *getU8(uint32_t *offset_ptr, uint8_t *dst, uint32_t count) const;
219 
220   //------------------------------------------------------------------
221   /// Extract a uint16_t value from \a *offset_ptr.
222   ///
223   /// Extract a single uint16_t from the binary data at the offset
224   /// pointed to by \a offset_ptr, and update the offset on success.
225   ///
226   /// @param[in,out] offset_ptr
227   ///     A pointer to an offset within the data that will be advanced
228   ///     by the appropriate number of bytes if the value is extracted
229   ///     correctly. If the offset is out of bounds or there are not
230   ///     enough bytes to extract this value, the offset will be left
231   ///     unmodified.
232   ///
233   /// @return
234   ///     The extracted uint16_t value.
235   //------------------------------------------------------------------
236   uint16_t getU16(uint32_t *offset_ptr) const;
237 
238   /// Extract \a count uint16_t values from \a *offset_ptr.
239   ///
240   /// Extract \a count uint16_t values from the binary data at the
241   /// offset pointed to by \a offset_ptr, and advance the offset on
242   /// success. The extracted values are copied into \a dst.
243   ///
244   /// @param[in,out] offset_ptr
245   ///     A pointer to an offset within the data that will be advanced
246   ///     by the appropriate number of bytes if the value is extracted
247   ///     correctly. If the offset is out of bounds or there are not
248   ///     enough bytes to extract this value, the offset will be left
249   ///     unmodified.
250   ///
251   /// @param[out] dst
252   ///     A buffer to copy \a count uint16_t values into. \a dst must
253   ///     be large enough to hold all requested data.
254   ///
255   /// @param[in] count
256   ///     The number of uint16_t values to extract.
257   ///
258   /// @return
259   ///     \a dst if all values were properly extracted and copied,
260   ///     NULL otherise.
261   uint16_t *getU16(uint32_t *offset_ptr, uint16_t *dst, uint32_t count) const;
262 
263   /// Extract a 24-bit unsigned value from \a *offset_ptr and return it
264   /// in a uint32_t.
265   ///
266   /// Extract 3 bytes from the binary data at the offset pointed to by
267   /// \a offset_ptr, construct a uint32_t from them and update the offset
268   /// on success.
269   ///
270   /// @param[in,out] offset_ptr
271   ///     A pointer to an offset within the data that will be advanced
272   ///     by the 3 bytes if the value is extracted correctly. If the offset
273   ///     is out of bounds or there are not enough bytes to extract this value,
274   ///     the offset will be left unmodified.
275   ///
276   /// @return
277   ///     The extracted 24-bit value represented in a uint32_t.
278   uint32_t getU24(uint32_t *offset_ptr) const;
279 
280   /// Extract a uint32_t value from \a *offset_ptr.
281   ///
282   /// Extract a single uint32_t from the binary data at the offset
283   /// pointed to by \a offset_ptr, and update the offset on success.
284   ///
285   /// @param[in,out] offset_ptr
286   ///     A pointer to an offset within the data that will be advanced
287   ///     by the appropriate number of bytes if the value is extracted
288   ///     correctly. If the offset is out of bounds or there are not
289   ///     enough bytes to extract this value, the offset will be left
290   ///     unmodified.
291   ///
292   /// @return
293   ///     The extracted uint32_t value.
294   uint32_t getU32(uint32_t *offset_ptr) const;
295 
296   /// Extract \a count uint32_t values from \a *offset_ptr.
297   ///
298   /// Extract \a count uint32_t values from the binary data at the
299   /// offset pointed to by \a offset_ptr, and advance the offset on
300   /// success. The extracted values are copied into \a dst.
301   ///
302   /// @param[in,out] offset_ptr
303   ///     A pointer to an offset within the data that will be advanced
304   ///     by the appropriate number of bytes if the value is extracted
305   ///     correctly. If the offset is out of bounds or there are not
306   ///     enough bytes to extract this value, the offset will be left
307   ///     unmodified.
308   ///
309   /// @param[out] dst
310   ///     A buffer to copy \a count uint32_t values into. \a dst must
311   ///     be large enough to hold all requested data.
312   ///
313   /// @param[in] count
314   ///     The number of uint32_t values to extract.
315   ///
316   /// @return
317   ///     \a dst if all values were properly extracted and copied,
318   ///     NULL otherise.
319   uint32_t *getU32(uint32_t *offset_ptr, uint32_t *dst, uint32_t count) const;
320 
321   /// Extract a uint64_t value from \a *offset_ptr.
322   ///
323   /// Extract a single uint64_t from the binary data at the offset
324   /// pointed to by \a offset_ptr, and update the offset on success.
325   ///
326   /// @param[in,out] offset_ptr
327   ///     A pointer to an offset within the data that will be advanced
328   ///     by the appropriate number of bytes if the value is extracted
329   ///     correctly. If the offset is out of bounds or there are not
330   ///     enough bytes to extract this value, the offset will be left
331   ///     unmodified.
332   ///
333   /// @return
334   ///     The extracted uint64_t value.
335   uint64_t getU64(uint32_t *offset_ptr) const;
336 
337   /// Extract \a count uint64_t values from \a *offset_ptr.
338   ///
339   /// Extract \a count uint64_t values from the binary data at the
340   /// offset pointed to by \a offset_ptr, and advance the offset on
341   /// success. The extracted values are copied into \a dst.
342   ///
343   /// @param[in,out] offset_ptr
344   ///     A pointer to an offset within the data that will be advanced
345   ///     by the appropriate number of bytes if the value is extracted
346   ///     correctly. If the offset is out of bounds or there are not
347   ///     enough bytes to extract this value, the offset will be left
348   ///     unmodified.
349   ///
350   /// @param[out] dst
351   ///     A buffer to copy \a count uint64_t values into. \a dst must
352   ///     be large enough to hold all requested data.
353   ///
354   /// @param[in] count
355   ///     The number of uint64_t values to extract.
356   ///
357   /// @return
358   ///     \a dst if all values were properly extracted and copied,
359   ///     NULL otherise.
360   uint64_t *getU64(uint32_t *offset_ptr, uint64_t *dst, uint32_t count) const;
361 
362   /// Extract a signed LEB128 value from \a *offset_ptr.
363   ///
364   /// Extracts an signed LEB128 number from this object's data
365   /// starting at the offset pointed to by \a offset_ptr. The offset
366   /// pointed to by \a offset_ptr will be updated with the offset of
367   /// the byte following the last extracted byte.
368   ///
369   /// @param[in,out] offset_ptr
370   ///     A pointer to an offset within the data that will be advanced
371   ///     by the appropriate number of bytes if the value is extracted
372   ///     correctly. If the offset is out of bounds or there are not
373   ///     enough bytes to extract this value, the offset will be left
374   ///     unmodified.
375   ///
376   /// @return
377   ///     The extracted signed integer value.
378   int64_t getSLEB128(uint32_t *offset_ptr) const;
379 
380   /// Extract a unsigned LEB128 value from \a *offset_ptr.
381   ///
382   /// Extracts an unsigned LEB128 number from this object's data
383   /// starting at the offset pointed to by \a offset_ptr. The offset
384   /// pointed to by \a offset_ptr will be updated with the offset of
385   /// the byte following the last extracted byte.
386   ///
387   /// @param[in,out] offset_ptr
388   ///     A pointer to an offset within the data that will be advanced
389   ///     by the appropriate number of bytes if the value is extracted
390   ///     correctly. If the offset is out of bounds or there are not
391   ///     enough bytes to extract this value, the offset will be left
392   ///     unmodified.
393   ///
394   /// @return
395   ///     The extracted unsigned integer value.
396   uint64_t getULEB128(uint32_t *offset_ptr) const;
397 
398   /// Test the validity of \a offset.
399   ///
400   /// @return
401   ///     \b true if \a offset is a valid offset into the data in this
402   ///     object, \b false otherwise.
isValidOffset(uint32_t offset)403   bool isValidOffset(uint32_t offset) const { return Data.size() > offset; }
404 
405   /// Test the availability of \a length bytes of data from \a offset.
406   ///
407   /// @return
408   ///     \b true if \a offset is a valid offset and there are \a
409   ///     length bytes available at that offset, \b false otherwise.
isValidOffsetForDataOfSize(uint32_t offset,uint32_t length)410   bool isValidOffsetForDataOfSize(uint32_t offset, uint32_t length) const {
411     return offset + length >= offset && isValidOffset(offset + length - 1);
412   }
413 
414   /// Test the availability of enough bytes of data for a pointer from
415   /// \a offset. The size of a pointer is \a getAddressSize().
416   ///
417   /// @return
418   ///     \b true if \a offset is a valid offset and there are enough
419   ///     bytes for a pointer available at that offset, \b false
420   ///     otherwise.
isValidOffsetForAddress(uint32_t offset)421   bool isValidOffsetForAddress(uint32_t offset) const {
422     return isValidOffsetForDataOfSize(offset, AddressSize);
423   }
424 };
425 
426 } // namespace llvm
427 
428 #endif
429