• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* Bra.c -- Branch converters for RISC code
2 2023-04-02 : Igor Pavlov : Public domain */
3 
4 #include "Precomp.h"
5 
6 #include "Bra.h"
7 #include "CpuArch.h"
8 #include "RotateDefs.h"
9 
10 #if defined(MY_CPU_SIZEOF_POINTER) \
11     && ( MY_CPU_SIZEOF_POINTER == 4 \
12       || MY_CPU_SIZEOF_POINTER == 8)
13   #define BR_CONV_USE_OPT_PC_PTR
14 #endif
15 
16 #ifdef BR_CONV_USE_OPT_PC_PTR
17 #define BR_PC_INIT  pc -= (UInt32)(SizeT)p;
18 #define BR_PC_GET   (pc + (UInt32)(SizeT)p)
19 #else
20 #define BR_PC_INIT  pc += (UInt32)size;
21 #define BR_PC_GET   (pc - (UInt32)(SizeT)(lim - p))
22 // #define BR_PC_INIT
23 // #define BR_PC_GET   (pc + (UInt32)(SizeT)(p - data))
24 #endif
25 
26 #define BR_CONVERT_VAL(v, c) if (encoding) v += c; else v -= c;
27 // #define BR_CONVERT_VAL(v, c) if (!encoding) c = (UInt32)0 - c; v += c;
28 
29 #define Z7_BRANCH_CONV(name) z7_BranchConv_ ## name
30 
31 #define Z7_BRANCH_FUNC_MAIN(name) \
32 static \
33 Z7_FORCE_INLINE \
34 Z7_ATTRIB_NO_VECTOR \
35 Byte *Z7_BRANCH_CONV(name)(Byte *p, SizeT size, UInt32 pc, int encoding)
36 
37 #define Z7_BRANCH_FUNC_IMP(name, m, encoding) \
38 Z7_NO_INLINE \
39 Z7_ATTRIB_NO_VECTOR \
40 Byte *m(name)(Byte *data, SizeT size, UInt32 pc) \
41   { return Z7_BRANCH_CONV(name)(data, size, pc, encoding); } \
42 
43 #ifdef Z7_EXTRACT_ONLY
44 #define Z7_BRANCH_FUNCS_IMP(name) \
45   Z7_BRANCH_FUNC_IMP(name, Z7_BRANCH_CONV_DEC, 0)
46 #else
47 #define Z7_BRANCH_FUNCS_IMP(name) \
48   Z7_BRANCH_FUNC_IMP(name, Z7_BRANCH_CONV_DEC, 0) \
49   Z7_BRANCH_FUNC_IMP(name, Z7_BRANCH_CONV_ENC, 1)
50 #endif
51 
52 #if defined(__clang__)
53 #define BR_EXTERNAL_FOR
54 #define BR_NEXT_ITERATION  continue;
55 #else
56 #define BR_EXTERNAL_FOR    for (;;)
57 #define BR_NEXT_ITERATION  break;
58 #endif
59 
60 #if defined(__clang__) && (__clang_major__ >= 8) \
61   || defined(__GNUC__) && (__GNUC__ >= 1000) \
62   // GCC is not good for __builtin_expect() here
63   /* || defined(_MSC_VER) && (_MSC_VER >= 1920) */
64   // #define Z7_unlikely [[unlikely]]
65   // #define Z7_LIKELY(x)   (__builtin_expect((x), 1))
66   #define Z7_UNLIKELY(x) (__builtin_expect((x), 0))
67   // #define Z7_likely [[likely]]
68 #else
69   // #define Z7_LIKELY(x)   (x)
70   #define Z7_UNLIKELY(x) (x)
71   // #define Z7_likely
72 #endif
73 
74 
Z7_BRANCH_FUNC_MAIN(ARM64)75 Z7_BRANCH_FUNC_MAIN(ARM64)
76 {
77   // Byte *p = data;
78   const Byte *lim;
79   const UInt32 flag = (UInt32)1 << (24 - 4);
80   const UInt32 mask = ((UInt32)1 << 24) - (flag << 1);
81   size &= ~(SizeT)3;
82   // if (size == 0) return p;
83   lim = p + size;
84   BR_PC_INIT
85   pc -= 4;  // because (p) will point to next instruction
86 
87   BR_EXTERNAL_FOR
88   {
89     // Z7_PRAGMA_OPT_DISABLE_LOOP_UNROLL_VECTORIZE
90     for (;;)
91     {
92       UInt32 v;
93       if Z7_UNLIKELY(p == lim)
94         return p;
95       v = GetUi32a(p);
96       p += 4;
97       if Z7_UNLIKELY(((v - 0x94000000) & 0xfc000000) == 0)
98       {
99         UInt32 c = BR_PC_GET >> 2;
100         BR_CONVERT_VAL(v, c)
101         v &= 0x03ffffff;
102         v |= 0x94000000;
103         SetUi32a(p - 4, v)
104         BR_NEXT_ITERATION
105       }
106       // v = rotlFixed(v, 8);  v += (flag << 8) - 0x90;  if Z7_UNLIKELY((v & ((mask << 8) + 0x9f)) == 0)
107       v -= 0x90000000;  if Z7_UNLIKELY((v & 0x9f000000) == 0)
108       {
109         UInt32 z, c;
110         // v = rotrFixed(v, 8);
111         v += flag; if Z7_UNLIKELY(v & mask) continue;
112         z = (v & 0xffffffe0) | (v >> 26);
113         c = (BR_PC_GET >> (12 - 3)) & ~(UInt32)7;
114         BR_CONVERT_VAL(z, c)
115         v &= 0x1f;
116         v |= 0x90000000;
117         v |= z << 26;
118         v |= 0x00ffffe0 & ((z & (((flag << 1) - 1))) - flag);
119         SetUi32a(p - 4, v)
120       }
121     }
122   }
123 }
124 Z7_BRANCH_FUNCS_IMP(ARM64)
125 
126 
Z7_BRANCH_FUNC_MAIN(ARM)127 Z7_BRANCH_FUNC_MAIN(ARM)
128 {
129   // Byte *p = data;
130   const Byte *lim;
131   size &= ~(SizeT)3;
132   lim = p + size;
133   BR_PC_INIT
134   /* in ARM: branch offset is relative to the +2 instructions from current instruction.
135      (p) will point to next instruction */
136   pc += 8 - 4;
137 
138   for (;;)
139   {
140     for (;;)
141     {
142       if Z7_UNLIKELY(p >= lim) { return p; }  p += 4;  if Z7_UNLIKELY(p[-1] == 0xeb) break;
143       if Z7_UNLIKELY(p >= lim) { return p; }  p += 4;  if Z7_UNLIKELY(p[-1] == 0xeb) break;
144     }
145     {
146       UInt32 v = GetUi32a(p - 4);
147       UInt32 c = BR_PC_GET >> 2;
148       BR_CONVERT_VAL(v, c)
149       v &= 0x00ffffff;
150       v |= 0xeb000000;
151       SetUi32a(p - 4, v)
152     }
153   }
154 }
155 Z7_BRANCH_FUNCS_IMP(ARM)
156 
157 
Z7_BRANCH_FUNC_MAIN(PPC)158 Z7_BRANCH_FUNC_MAIN(PPC)
159 {
160   // Byte *p = data;
161   const Byte *lim;
162   size &= ~(SizeT)3;
163   lim = p + size;
164   BR_PC_INIT
165   pc -= 4;  // because (p) will point to next instruction
166 
167   for (;;)
168   {
169     UInt32 v;
170     for (;;)
171     {
172       if Z7_UNLIKELY(p == lim)
173         return p;
174       // v = GetBe32a(p);
175       v = *(UInt32 *)(void *)p;
176       p += 4;
177       // if ((v & 0xfc000003) == 0x48000001) break;
178       // if ((p[-4] & 0xFC) == 0x48 && (p[-1] & 3) == 1) break;
179       if Z7_UNLIKELY(
180           ((v - Z7_CONV_BE_TO_NATIVE_CONST32(0x48000001))
181               & Z7_CONV_BE_TO_NATIVE_CONST32(0xfc000003)) == 0) break;
182     }
183     {
184       v = Z7_CONV_NATIVE_TO_BE_32(v);
185       {
186         UInt32 c = BR_PC_GET;
187         BR_CONVERT_VAL(v, c)
188       }
189       v &= 0x03ffffff;
190       v |= 0x48000000;
191       SetBe32a(p - 4, v)
192     }
193   }
194 }
195 Z7_BRANCH_FUNCS_IMP(PPC)
196 
197 
198 #ifdef Z7_CPU_FAST_ROTATE_SUPPORTED
199 #define BR_SPARC_USE_ROTATE
200 #endif
201 
Z7_BRANCH_FUNC_MAIN(SPARC)202 Z7_BRANCH_FUNC_MAIN(SPARC)
203 {
204   // Byte *p = data;
205   const Byte *lim;
206   const UInt32 flag = (UInt32)1 << 22;
207   size &= ~(SizeT)3;
208   lim = p + size;
209   BR_PC_INIT
210   pc -= 4;  // because (p) will point to next instruction
211   for (;;)
212   {
213     UInt32 v;
214     for (;;)
215     {
216       if Z7_UNLIKELY(p == lim)
217         return p;
218       /* // the code without GetBe32a():
219       { const UInt32 v = GetUi16a(p) & 0xc0ff; p += 4; if (v == 0x40 || v == 0xc07f) break; }
220       */
221       v = GetBe32a(p);
222       p += 4;
223     #ifdef BR_SPARC_USE_ROTATE
224       v = rotlFixed(v, 2);
225       v += (flag << 2) - 1;
226       if Z7_UNLIKELY((v & (3 - (flag << 3))) == 0)
227     #else
228       v += (UInt32)5 << 29;
229       v ^= (UInt32)7 << 29;
230       v += flag;
231       if Z7_UNLIKELY((v & (0 - (flag << 1))) == 0)
232     #endif
233         break;
234     }
235     {
236       // UInt32 v = GetBe32a(p - 4);
237     #ifndef BR_SPARC_USE_ROTATE
238       v <<= 2;
239     #endif
240       {
241         UInt32 c = BR_PC_GET;
242         BR_CONVERT_VAL(v, c)
243       }
244       v &= (flag << 3) - 1;
245     #ifdef BR_SPARC_USE_ROTATE
246       v -= (flag << 2) - 1;
247       v = rotrFixed(v, 2);
248     #else
249       v -= (flag << 2);
250       v >>= 2;
251       v |= (UInt32)1 << 30;
252     #endif
253       SetBe32a(p - 4, v)
254     }
255   }
256 }
257 Z7_BRANCH_FUNCS_IMP(SPARC)
258 
259 
Z7_BRANCH_FUNC_MAIN(ARMT)260 Z7_BRANCH_FUNC_MAIN(ARMT)
261 {
262   // Byte *p = data;
263   Byte *lim;
264   size &= ~(SizeT)1;
265   // if (size == 0) return p;
266   if (size <= 2) return p;
267   size -= 2;
268   lim = p + size;
269   BR_PC_INIT
270   /* in ARM: branch offset is relative to the +2 instructions from current instruction.
271      (p) will point to the +2 instructions from current instruction */
272   // pc += 4 - 4;
273   // if (encoding) pc -= 0xf800 << 1; else pc += 0xf800 << 1;
274   // #define ARMT_TAIL_PROC { goto armt_tail; }
275   #define ARMT_TAIL_PROC { return p; }
276 
277   do
278   {
279     /* in MSVC 32-bit x86 compilers:
280        UInt32 version : it loads value from memory with movzx
281        Byte   version : it loads value to 8-bit register (AL/CL)
282        movzx version is slightly faster in some cpus
283     */
284     unsigned b1;
285     // Byte / unsigned
286     b1 = p[1];
287     // optimized version to reduce one (p >= lim) check:
288     // unsigned a1 = p[1];  b1 = p[3];  p += 2;  if Z7_LIKELY((b1 & (a1 ^ 8)) < 0xf8)
289     for (;;)
290     {
291       unsigned b3; // Byte / UInt32
292       /* (Byte)(b3) normalization can use low byte computations in MSVC.
293          It gives smaller code, and no loss of speed in some compilers/cpus.
294          But new MSVC 32-bit x86 compilers use more slow load
295          from memory to low byte register in that case.
296          So we try to use full 32-bit computations for faster code.
297       */
298       // if (p >= lim) { ARMT_TAIL_PROC }  b3 = b1 + 8;  b1 = p[3];  p += 2;  if ((b3 & b1) >= 0xf8) break;
299       if Z7_UNLIKELY(p >= lim) { ARMT_TAIL_PROC }  b3 = p[3];  p += 2;  if Z7_UNLIKELY((b3 & (b1 ^ 8)) >= 0xf8) break;
300       if Z7_UNLIKELY(p >= lim) { ARMT_TAIL_PROC }  b1 = p[3];  p += 2;  if Z7_UNLIKELY((b1 & (b3 ^ 8)) >= 0xf8) break;
301     }
302     {
303       /* we can adjust pc for (0xf800) to rid of (& 0x7FF) operation.
304          But gcc/clang for arm64 can use bfi instruction for full code here */
305       UInt32 v =
306           ((UInt32)GetUi16a(p - 2) << 11) |
307           ((UInt32)GetUi16a(p) & 0x7FF);
308       /*
309       UInt32 v =
310             ((UInt32)p[1 - 2] << 19)
311           + (((UInt32)p[1] & 0x7) << 8)
312           + (((UInt32)p[-2] << 11))
313           + (p[0]);
314       */
315       p += 2;
316       {
317         UInt32 c = BR_PC_GET >> 1;
318         BR_CONVERT_VAL(v, c)
319       }
320       SetUi16a(p - 4, (UInt16)(((v >> 11) & 0x7ff) | 0xf000))
321       SetUi16a(p - 2, (UInt16)(v | 0xf800))
322       /*
323       p[-4] = (Byte)(v >> 11);
324       p[-3] = (Byte)(0xf0 | ((v >> 19) & 0x7));
325       p[-2] = (Byte)v;
326       p[-1] = (Byte)(0xf8 | (v >> 8));
327       */
328     }
329   }
330   while (p < lim);
331   return p;
332   // armt_tail:
333   // if ((Byte)((lim[1] & 0xf8)) != 0xf0) { lim += 2; }  return lim;
334   // return (Byte *)(lim + ((Byte)((lim[1] ^ 0xf0) & 0xf8) == 0 ? 0 : 2));
335   // return (Byte *)(lim + (((lim[1] ^ ~0xfu) & ~7u) == 0 ? 0 : 2));
336   // return (Byte *)(lim + 2 - (((((unsigned)lim[1] ^ 8) + 8) >> 7) & 2));
337 }
338 Z7_BRANCH_FUNCS_IMP(ARMT)
339 
340 
341 // #define BR_IA64_NO_INLINE
342 
Z7_BRANCH_FUNC_MAIN(IA64)343 Z7_BRANCH_FUNC_MAIN(IA64)
344 {
345   // Byte *p = data;
346   const Byte *lim;
347   size &= ~(SizeT)15;
348   lim = p + size;
349   pc -= 1 << 4;
350   pc >>= 4 - 1;
351   // pc -= 1 << 1;
352 
353   for (;;)
354   {
355     unsigned m;
356     for (;;)
357     {
358       if Z7_UNLIKELY(p == lim)
359         return p;
360       m = (unsigned)((UInt32)0x334b0000 >> (*p & 0x1e));
361       p += 16;
362       pc += 1 << 1;
363       if (m &= 3)
364         break;
365     }
366     {
367       p += (ptrdiff_t)m * 5 - 20; // negative value is expected here.
368       do
369       {
370         const UInt32 t =
371           #if defined(MY_CPU_X86_OR_AMD64)
372             // we use 32-bit load here to reduce code size on x86:
373             GetUi32(p);
374           #else
375             GetUi16(p);
376           #endif
377         UInt32 z = GetUi32(p + 1) >> m;
378         p += 5;
379         if (((t >> m) & (0x70 << 1)) == 0
380             && ((z - (0x5000000 << 1)) & (0xf000000 << 1)) == 0)
381         {
382           UInt32 v = (UInt32)((0x8fffff << 1) | 1) & z;
383           z ^= v;
384         #ifdef BR_IA64_NO_INLINE
385           v |= (v & ((UInt32)1 << (23 + 1))) >> 3;
386           {
387             UInt32 c = pc;
388             BR_CONVERT_VAL(v, c)
389           }
390           v &= (0x1fffff << 1) | 1;
391         #else
392           {
393             if (encoding)
394             {
395               // pc &= ~(0xc00000 << 1); // we just need to clear at least 2 bits
396               pc &= (0x1fffff << 1) | 1;
397               v += pc;
398             }
399             else
400             {
401               // pc |= 0xc00000 << 1; // we need to set at least 2 bits
402               pc |= ~(UInt32)((0x1fffff << 1) | 1);
403               v -= pc;
404             }
405           }
406           v &= ~(UInt32)(0x600000 << 1);
407         #endif
408           v += (0x700000 << 1);
409           v &= (0x8fffff << 1) | 1;
410           z |= v;
411           z <<= m;
412           SetUi32(p + 1 - 5, z)
413         }
414         m++;
415       }
416       while (m &= 3); // while (m < 4);
417     }
418   }
419 }
420 Z7_BRANCH_FUNCS_IMP(IA64)
421