• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  *          Copyright Andrey Semashev 2007 - 2015.
3  * Distributed under the Boost Software License, Version 1.0.
4  *    (See accompanying file LICENSE_1_0.txt or copy at
5  *          http://www.boost.org/LICENSE_1_0.txt)
6  */
7 /*!
8  * \file   dump_ssse3.cpp
9  * \author Andrey Semashev
10  * \date   05.05.2013
11  *
12  * \brief  This header is the Boost.Log library implementation, see the library documentation
13  *         at http://www.boost.org/doc/libs/release/libs/log/doc/html/index.html.
14  */
15 
16 // NOTE: You should generally avoid including headers as much as possible here, because this file
17 //       is compiled with special compiler options, and any included header may result in generation of
18 //       unintended code with these options and violation of ODR.
19 #include <boost/log/detail/config.hpp>
20 #include <ostream>
21 #include <tmmintrin.h>
22 #include <boost/cstdint.hpp>
23 #include <boost/log/detail/header.hpp>
24 
25 #if defined(__x86_64) || defined(__x86_64__) || \
26     defined(__amd64__) || defined(__amd64) || \
27     defined(_M_X64)
28 #define BOOST_LOG_AUX_X86_64
29 #endif
30 
31 namespace boost {
32 
33 BOOST_LOG_OPEN_NAMESPACE
34 
35 namespace aux {
36 
37 extern const char g_hex_char_table[2][16];
38 
39 template< typename CharT >
40 extern void dump_data_generic(const void* data, std::size_t size, std::basic_ostream< CharT >& strm);
41 
42 BOOST_LOG_ANONYMOUS_NAMESPACE {
43 
44 enum
45 {
46     packs_per_stride = 32,
47     stride = packs_per_stride * 16
48 };
49 
50 union xmm_constant
51 {
52     uint8_t as_bytes[16];
53     __m128i as_mm;
54 
55     BOOST_FORCEINLINE operator __m128i () const { return as_mm; }
56 };
57 
58 static const xmm_constant mm_shuffle_pattern1 = {{ 0x80, 0, 1, 0x80, 2, 3, 0x80, 4, 5, 0x80, 6, 7, 0x80, 8, 9, 0x80 }};
59 static const xmm_constant mm_shuffle_pattern2 = {{ 0, 1, 0x80, 2, 3, 0x80, 4, 5, 0x80, 6, 7, 0x80, 8, 9, 0x80, 10 }};
60 static const xmm_constant mm_shuffle_pattern3 = {{ 5, 0x80, 6, 7, 0x80, 8, 9, 0x80, 10, 11, 0x80, 12, 13, 0x80, 14, 15 }};
61 
62 #if defined(BOOST_LOG_AUX_X86_64)
63 
64 // x86-64 architecture has more registers which we can utilize to pass constants
65 #define BOOST_LOG_AUX_MM_CONSTANT_ARGS_DECL __m128i mm_15, __m128i mm_9, __m128i mm_char_0, __m128i mm_char_space,
66 #define BOOST_LOG_AUX_MM_CONSTANT_ARGS mm_15, mm_9, mm_char_0, mm_char_space,
67 #define BOOST_LOG_AUX_MM_CONSTANTS \
68     const __m128i mm_15 = _mm_set1_epi32(0x0F0F0F0F);\
69     const __m128i mm_9 = _mm_set1_epi32(0x09090909);\
70     const __m128i mm_char_0 = _mm_set1_epi32(0x30303030);\
71     const __m128i mm_char_space = _mm_set1_epi32(0x20202020);
72 
73 #else
74 
75 // MSVC in 32-bit mode is not able to pass all constants to dump_pack, and is also not able to align them on the stack, so we have to fetch them from global constants
76 static const xmm_constant mm_15 = {{ 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F }};
77 static const xmm_constant mm_9 = {{ 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09 }};
78 static const xmm_constant mm_char_0 = {{ 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30 }};
79 static const xmm_constant mm_char_space = {{ 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20 }};
80 #define BOOST_LOG_AUX_MM_CONSTANT_ARGS_DECL
81 #define BOOST_LOG_AUX_MM_CONSTANT_ARGS
82 #define BOOST_LOG_AUX_MM_CONSTANTS
83 
84 #endif
85 
86 //! Dumps a pack of input data into a string of 8 bit ASCII characters
87 static BOOST_FORCEINLINE void dump_pack
88 (
89     BOOST_LOG_AUX_MM_CONSTANT_ARGS_DECL
90     __m128i mm_char_10_to_a, __m128i mm_input,
91     __m128i& mm_output1, __m128i& mm_output2, __m128i& mm_output3
92 )
93 {
94     // Split half-bytes
95     __m128i mm_input_hi = _mm_and_si128(_mm_srli_epi16(mm_input, 4), mm_15);
96     __m128i mm_input_lo = _mm_and_si128(mm_input, mm_15);
97 
98     // Stringize each of the halves
99     __m128i mm_addend_hi = _mm_cmpgt_epi8(mm_input_hi, mm_9);
100     __m128i mm_addend_lo = _mm_cmpgt_epi8(mm_input_lo, mm_9);
101     mm_addend_hi = _mm_and_si128(mm_char_10_to_a, mm_addend_hi);
102     mm_addend_lo = _mm_and_si128(mm_char_10_to_a, mm_addend_lo);
103 
104     mm_input_hi = _mm_add_epi8(mm_input_hi, mm_char_0);
105     mm_input_lo = _mm_add_epi8(mm_input_lo, mm_char_0);
106 
107     mm_input_hi = _mm_add_epi8(mm_input_hi, mm_addend_hi);
108     mm_input_lo = _mm_add_epi8(mm_input_lo, mm_addend_lo);
109 
110     // Join them back together
111     __m128i mm_1 = _mm_unpacklo_epi8(mm_input_hi, mm_input_lo);
112     __m128i mm_2 = _mm_unpackhi_epi8(mm_input_hi, mm_input_lo);
113 
114     // Insert spaces between stringized bytes:
115     // |0123456789abcdef|0123456789abcdef|
116     // | 01 23 45 67 89 |ab cd ef 01 23 4|5 67 89 ab cd ef|
117     mm_output1 = _mm_shuffle_epi8(mm_1, mm_shuffle_pattern1.as_mm);
118     mm_output2 = _mm_shuffle_epi8(_mm_alignr_epi8(mm_2, mm_1, 10), mm_shuffle_pattern2.as_mm);
119     mm_output3 = _mm_shuffle_epi8(mm_2, mm_shuffle_pattern3.as_mm);
120 
121     mm_output1 = _mm_max_epu8(mm_output1, mm_char_space);
122     mm_output2 = _mm_max_epu8(mm_output2, mm_char_space);
123     mm_output3 = _mm_max_epu8(mm_output3, mm_char_space);
124 }
125 
126 template< typename CharT >
127 BOOST_FORCEINLINE void store_characters(__m128i mm_chars, CharT* buf)
128 {
129     switch (sizeof(CharT))
130     {
131     case 1:
132         _mm_store_si128(reinterpret_cast< __m128i* >(buf), mm_chars);
133         break;
134 
135     case 2:
136         {
137             __m128i mm_0 = _mm_setzero_si128();
138             _mm_store_si128(reinterpret_cast< __m128i* >(buf), _mm_unpacklo_epi8(mm_chars, mm_0));
139             _mm_store_si128(reinterpret_cast< __m128i* >(buf) + 1, _mm_unpackhi_epi8(mm_chars, mm_0));
140         }
141         break;
142 
143     case 4:
144         {
145             __m128i mm_0 = _mm_setzero_si128();
146             __m128i mm = _mm_unpacklo_epi8(mm_chars, mm_0);
147             _mm_store_si128(reinterpret_cast< __m128i* >(buf), _mm_unpacklo_epi16(mm, mm_0));
148             _mm_store_si128(reinterpret_cast< __m128i* >(buf) + 1, _mm_unpackhi_epi16(mm, mm_0));
149             mm = _mm_unpackhi_epi8(mm_chars, mm_0);
150             _mm_store_si128(reinterpret_cast< __m128i* >(buf) + 2, _mm_unpacklo_epi16(mm, mm_0));
151             _mm_store_si128(reinterpret_cast< __m128i* >(buf) + 3, _mm_unpackhi_epi16(mm, mm_0));
152         }
153         break;
154     }
155 }
156 
157 template< typename CharT >
158 BOOST_FORCEINLINE void dump_data_ssse3(const void* data, std::size_t size, std::basic_ostream< CharT >& strm)
159 {
160     typedef CharT char_type;
161 
162     char_type buf_storage[stride * 3u + 16u];
163     // Align the temporary buffer at 16 bytes
164     char_type* const buf = reinterpret_cast< char_type* >((uint8_t*)buf_storage + (16u - (((uintptr_t)(char_type*)buf_storage) & 15u)));
165     char_type* buf_begin = buf + 1u; // skip the first space of the first chunk
166     char_type* buf_end = buf + stride * 3u;
167 
168     __m128i mm_char_10_to_a;
169     if (strm.flags() & std::ios_base::uppercase)
170         mm_char_10_to_a = _mm_set1_epi32(0x07070707); // '9' is 0x39 and 'A' is 0x41 in ASCII, so we have to add 0x07 to 0x3A to get uppercase letters
171     else
172         mm_char_10_to_a = _mm_set1_epi32(0x27272727); // ...and 'a' is 0x61, which means we have to add 0x27 to 0x3A to get lowercase letters
173 
174     // First, check the input alignment
175     const uint8_t* p = static_cast< const uint8_t* >(data);
176     const std::size_t prealign_size = ((16u - ((uintptr_t)p & 15u)) & 15u);
177     if (BOOST_UNLIKELY(prealign_size > 0))
178     {
179         __m128i mm_input = _mm_lddqu_si128(reinterpret_cast< const __m128i* >(p));
180         BOOST_LOG_AUX_MM_CONSTANTS
181 
182         __m128i mm_output1, mm_output2, mm_output3;
183         dump_pack(BOOST_LOG_AUX_MM_CONSTANT_ARGS mm_char_10_to_a, mm_input, mm_output1, mm_output2, mm_output3);
184 
185         store_characters(mm_output1, buf);
186         store_characters(mm_output2, buf + 16u);
187         store_characters(mm_output3, buf + 32u);
188 
189         strm.write(buf_begin, prealign_size * 3u - 1u);
190 
191         buf_begin = buf;
192         size -= prealign_size;
193         p += prealign_size;
194     }
195 
196     const std::size_t stride_count = size / stride;
197     std::size_t tail_size = size % stride;
198     for (std::size_t i = 0; i < stride_count; ++i)
199     {
200         char_type* b = buf;
201         BOOST_LOG_AUX_MM_CONSTANTS
202 
203         for (unsigned int j = 0; j < packs_per_stride; ++j, b += 3u * 16u, p += 16u)
204         {
205             __m128i mm_input = _mm_load_si128(reinterpret_cast< const __m128i* >(p));
206             __m128i mm_output1, mm_output2, mm_output3;
207             dump_pack(BOOST_LOG_AUX_MM_CONSTANT_ARGS mm_char_10_to_a, mm_input, mm_output1, mm_output2, mm_output3);
208 
209             store_characters(mm_output1, b);
210             store_characters(mm_output2, b + 16u);
211             store_characters(mm_output3, b + 32u);
212         }
213 
214         strm.write(buf_begin, buf_end - buf_begin);
215         buf_begin = buf;
216     }
217 
218     if (BOOST_UNLIKELY(tail_size > 0))
219     {
220         char_type* b = buf;
221         while (tail_size >= 16u)
222         {
223             __m128i mm_input = _mm_load_si128(reinterpret_cast< const __m128i* >(p));
224             BOOST_LOG_AUX_MM_CONSTANTS
225 
226             __m128i mm_output1, mm_output2, mm_output3;
227             dump_pack(BOOST_LOG_AUX_MM_CONSTANT_ARGS mm_char_10_to_a, mm_input, mm_output1, mm_output2, mm_output3);
228 
229             store_characters(mm_output1, b);
230             store_characters(mm_output2, b + 16u);
231             store_characters(mm_output3, b + 32u);
232 
233             b += 3u * 16u;
234             p += 16u;
235             tail_size -= 16u;
236         }
237 
238         const char* const char_table = g_hex_char_table[(strm.flags() & std::ios_base::uppercase) != 0];
239         for (unsigned int i = 0; i < tail_size; ++i, ++p, b += 3u)
240         {
241             uint32_t n = *p;
242             b[0] = static_cast< char_type >(' ');
243             b[1] = static_cast< char_type >(char_table[n >> 4]);
244             b[2] = static_cast< char_type >(char_table[n & 0x0F]);
245         }
246 
247         strm.write(buf_begin, b - buf_begin);
248     }
249 }
250 
251 } // namespace
252 
253 void dump_data_char_ssse3(const void* data, std::size_t size, std::basic_ostream< char >& strm)
254 {
255     if (size >= 16)
256     {
257         dump_data_ssse3(data, size, strm);
258     }
259     else
260     {
261         dump_data_generic(data, size, strm);
262     }
263 }
264 
dump_data_wchar_ssse3(const void * data,std::size_t size,std::basic_ostream<wchar_t> & strm)265 void dump_data_wchar_ssse3(const void* data, std::size_t size, std::basic_ostream< wchar_t >& strm)
266 {
267     if (size >= 16)
268     {
269         dump_data_ssse3(data, size, strm);
270     }
271     else
272     {
273         dump_data_generic(data, size, strm);
274     }
275 }
276 
277 #if !defined(BOOST_NO_CXX11_CHAR16_T)
dump_data_char16_ssse3(const void * data,std::size_t size,std::basic_ostream<char16_t> & strm)278 void dump_data_char16_ssse3(const void* data, std::size_t size, std::basic_ostream< char16_t >& strm)
279 {
280     if (size >= 16)
281     {
282         dump_data_ssse3(data, size, strm);
283     }
284     else
285     {
286         dump_data_generic(data, size, strm);
287     }
288 }
289 #endif
290 
291 #if !defined(BOOST_NO_CXX11_CHAR32_T)
dump_data_char32_ssse3(const void * data,std::size_t size,std::basic_ostream<char32_t> & strm)292 void dump_data_char32_ssse3(const void* data, std::size_t size, std::basic_ostream< char32_t >& strm)
293 {
294     if (size >= 16)
295     {
296         dump_data_ssse3(data, size, strm);
297     }
298     else
299     {
300         dump_data_generic(data, size, strm);
301     }
302 }
303 #endif
304 
305 } // namespace aux
306 
307 BOOST_LOG_CLOSE_NAMESPACE // namespace log
308 
309 } // namespace boost
310 
311 #include <boost/log/detail/footer.hpp>
312