1 /* -*- mode: C; c-basic-offset: 3; -*- */
2
3 /*
4 This file is part of MemCheck, a heavyweight Valgrind tool for
5 detecting memory errors.
6
7 Copyright (C) 2012-2017 Florian Krohm
8
9 This program is free software; you can redistribute it and/or
10 modify it under the terms of the GNU General Public License as
11 published by the Free Software Foundation; either version 2 of the
12 License, or (at your option) any later version.
13
14 This program is distributed in the hope that it will be useful, but
15 WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 General Public License for more details.
18
19 You should have received a copy of the GNU General Public License
20 along with this program; if not, write to the Free Software
21 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
22 02111-1307, USA.
23
24 The GNU General Public License is contained in the file COPYING.
25 */
26
27 #include <stdio.h> // fprintf
28 #include <assert.h> // assert
29 #if defined(__APPLE__)
30 #include <machine/endian.h>
31 #define __BYTE_ORDER BYTE_ORDER
32 #define __LITTLE_ENDIAN LITTLE_ENDIAN
33 #elif defined(__sun)
34 #define __LITTLE_ENDIAN 1234
35 #define __BIG_ENDIAN 4321
36 # if defined(_LITTLE_ENDIAN)
37 # define __BYTE_ORDER __LITTLE_ENDIAN
38 # else
39 # define __BYTE_ORDER __BIG_ENDIAN
40 # endif
41 #else
42 #include <endian.h>
43 #endif
44 #include <inttypes.h>
45 #include "vbits.h"
46 #include "vtest.h"
47
48
49 /* Return the bits of V if they fit into 64-bit. If V has fewer than
50 64 bits, the bit pattern is zero-extended to the left. */
51 static uint64_t
get_bits64(vbits_t v)52 get_bits64(vbits_t v)
53 {
54 switch (v.num_bits) {
55 case 1: return v.bits.u32;
56 case 8: return v.bits.u8;
57 case 16: return v.bits.u16;
58 case 32: return v.bits.u32;
59 case 64: return v.bits.u64;
60 case 128:
61 case 256:
62 /* fall through */
63 default:
64 panic(__func__);
65 }
66 }
67
68 void
print_vbits(FILE * fp,vbits_t v)69 print_vbits(FILE *fp, vbits_t v)
70 {
71 switch (v.num_bits) {
72 case 1: fprintf(fp, "%08x", v.bits.u32); break;
73 case 8: fprintf(fp, "%02x", v.bits.u8); break;
74 case 16: fprintf(fp, "%04x", v.bits.u16); break;
75 case 32: fprintf(fp, "%08x", v.bits.u32); break;
76 case 64: fprintf(fp, "%016"PRIx64, v.bits.u64); break;
77 case 128:
78 if (__BYTE_ORDER == __LITTLE_ENDIAN) {
79 fprintf(fp, "%016"PRIx64, v.bits.u128[1]);
80 fprintf(fp, "%016"PRIx64, v.bits.u128[0]);
81 } else {
82 fprintf(fp, "%016"PRIx64, v.bits.u128[0]);
83 fprintf(fp, "%016"PRIx64, v.bits.u128[1]);
84 }
85 break;
86 case 256:
87 if (__BYTE_ORDER == __LITTLE_ENDIAN) {
88 fprintf(fp, "%016"PRIx64, v.bits.u256[3]);
89 fprintf(fp, "%016"PRIx64, v.bits.u256[2]);
90 fprintf(fp, "%016"PRIx64, v.bits.u256[1]);
91 fprintf(fp, "%016"PRIx64, v.bits.u256[0]);
92 } else {
93 fprintf(fp, "%016"PRIx64, v.bits.u256[0]);
94 fprintf(fp, "%016"PRIx64, v.bits.u256[1]);
95 fprintf(fp, "%016"PRIx64, v.bits.u256[2]);
96 fprintf(fp, "%016"PRIx64, v.bits.u256[3]);
97 }
98 break;
99 default:
100 panic(__func__);
101 }
102 }
103
104
105 /* Return a value where all bits are set to undefined. */
106 vbits_t
undefined_vbits(unsigned num_bits)107 undefined_vbits(unsigned num_bits)
108 {
109 vbits_t new = { .num_bits = num_bits };
110
111 switch (num_bits) {
112 case 1: new.bits.u32 = 0x01; break;
113 case 8: new.bits.u8 = 0xff; break;
114 case 16: new.bits.u16 = 0xffff; break;
115 case 32: new.bits.u32 = ~0; break;
116 case 64: new.bits.u64 = ~0ull; break;
117 case 128: new.bits.u128[0] = ~0ull;
118 new.bits.u128[1] = ~0ull;
119 break;
120 case 256: new.bits.u256[0] = ~0ull;
121 new.bits.u256[1] = ~0ull;
122 new.bits.u256[2] = ~0ull;
123 new.bits.u256[3] = ~0ull;
124 break;
125 default:
126 panic(__func__);
127 }
128 return new;
129 }
130
131 /* The following routines named undefined_vbits_BxE() return a 128-bit
132 * vector with E elements each of size bits. If any of the bits in an
133 * element is undefined, then return a value where all bits in that
134 * element are undefined.
135 */
136 vbits_t
undefined_vbits_BxE(unsigned int bits,unsigned int elements,vbits_t v)137 undefined_vbits_BxE(unsigned int bits, unsigned int elements, vbits_t v)
138 {
139 vbits_t new = { .num_bits = v.num_bits };
140 uint64_t mask = ~0ull >> (64 - bits);
141 int i, j;
142
143 assert ((elements % 2) == 0);
144 assert (bits <= 64);
145
146 for (i = 0; i<2; i++) {
147 new.bits.u128[i] = 0ull;
148
149 for (j = 0; j<elements/2; j++) {
150 if ((v.bits.u128[i] & (mask << (j*bits))) != 0)
151 new.bits.u128[i] |= (mask << (j*bits));
152 }
153 }
154 return new;
155 }
156
157 /* The following routines named undefined_vbits_BxE_rotate() return a 128-bit
158 * vector with E elements each of size bits. The bits in v are rotated
159 * left by the amounts in the corresponding element of val. Specified rotate
160 * amount field is assumed to be at most 8-bits wide.
161 */
162 vbits_t
undefined_vbits_BxE_rotate(unsigned int bits,unsigned int elements,vbits_t v,value_t val)163 undefined_vbits_BxE_rotate(unsigned int bits, unsigned int elements,
164 vbits_t v, value_t val)
165 {
166 vbits_t new = { .num_bits = v.num_bits };
167 uint64_t mask = ~0ull >> (64 - bits);
168 uint64_t const shift_mask = 0xFF;
169 uint64_t element;
170 int i, j;
171 signed char shift;
172 assert ((elements % 2) == 0);
173 assert (bits <= 64);
174
175 for (i = 0; i<2; i++) {
176 new.bits.u128[i] = 0ull;
177
178 for (j = 0; j<elements/2; j++) {
179 element = (v.bits.u128[i] >> (j*bits)) & mask;
180 shift = (int)((val.u128[i] >> (j*bits)) & shift_mask);
181
182 if (shift < 0) {
183 /* right shift */
184 new.bits.u128[i] = element >> -shift;
185
186 /* OR in the bits shifted out into the top of the element */
187 new.bits.u128[i] |= element << (bits + shift);
188 } else {
189 /* left shift */
190 /* upper bits from shift */
191 new.bits.u128[i] = element << shift;
192
193 /* OR in the bits shifted out into the bottom of the element */
194 new.bits.u128[i] |= element >> (bits - shift);
195 }
196 }
197 }
198 return new;
199 }
200
201 /* Only the even elements of the input are used by the Iop*/
202 vbits_t
undefined_vbits_128_even_element(unsigned int bits,unsigned int elements,vbits_t v)203 undefined_vbits_128_even_element(unsigned int bits, unsigned int elements,
204 vbits_t v)
205 {
206 int i;
207 uint64_t mask;
208 unsigned int const element_width = 128/elements;
209 vbits_t new = { .num_bits = v.num_bits };
210
211 assert ((elements % 2) == 0);
212 assert (bits <= 64);
213
214 /* Create a 128-bit mask with the bits in the even numbered
215 * elements are all ones.
216 */
217 mask = ~0ull >> (64 - bits);
218
219 for (i = 2; i < elements/2; i=i+2) {
220 mask |= mask << (i * element_width);
221 }
222
223 new.bits.u128[0] = mask & v.bits.u128[0];
224 new.bits.u128[1] = mask & v.bits.u128[1];
225
226 return new;
227 }
228
229 /* Concatenate bit i from each byte j. Place concatenated 8 bit value into
230 * byte i of the result. Do for all i from 0 to 7 and j from 0 to 7 of each
231 * 64-bit element.
232 */
233 vbits_t
undefined_vbits_64x2_transpose(vbits_t v)234 undefined_vbits_64x2_transpose(vbits_t v)
235 {
236 vbits_t new = { .num_bits = v.num_bits };
237 unsigned int bit, byte, element;
238 uint64_t value, new_value, select_bit;
239
240 for (element = 0; element < 2; element++) {
241 value = v.bits.u128[element];
242 new_value = 0;
243 for (byte = 0; byte < 8; byte++) {
244 for (bit = 0; bit < 8; bit++) {
245 select_bit = 1ULL & (value >> (bit + 8*byte));
246 new_value |= select_bit << (bit*8 + byte);
247 }
248 }
249 new.bits.u128[element] = new_value;
250 }
251 return new;
252 }
253
254 /* The routine takes a 256-bit vector value stored across the two 128-bit
255 * source operands src1 and src2. The size of each element in the input is
256 * src_num_bits. The elements are narrowed to result_num_bits and packed
257 * into the result. If saturate is True, then the all the result bits are
258 * set to 1 if the source element can not be represented in result_num_bits.
259 */
260 vbits_t
undefined_vbits_Narrow256_AtoB(unsigned int src_num_bits,unsigned int result_num_bits,vbits_t src1_v,value_t src1_value,vbits_t src2_v,value_t src2_value,bool saturate)261 undefined_vbits_Narrow256_AtoB(unsigned int src_num_bits,
262 unsigned int result_num_bits,
263 vbits_t src1_v, value_t src1_value,
264 vbits_t src2_v, value_t src2_value,
265 bool saturate)
266 {
267
268 vbits_t new = { .num_bits = src1_v.num_bits };
269 unsigned int i;
270 uint64_t vbits, new_value;
271 uint64_t const src_mask = ~0x0ULL >> (64 - src_num_bits);
272 uint64_t const result_mask = ~0x0ULL >> (64 - result_num_bits);
273 unsigned int num_elements_per_64_bits = src_num_bits/64;
274 unsigned int shift;
275
276 /*
277 * NOTE: POWER PPC
278 * the saturated value is 0xFFFF for the vbit is in one of the lower
279 * 32-bits of the source. The saturated result is 0xFFFF0000 if the
280 * vbit is in the upper 32-bits of the source. Not sure what
281 * the saturated result is in general for a B-bit result.
282 *
283 * ONLY TESTED FOR 64 bit input, 32 bit result
284 */
285 uint64_t const saturated_result = 0xFFFFULL;
286
287 /* Source elements are split between the two source operands */
288
289 assert(src_num_bits <= 64);
290 assert(result_num_bits < 64);
291 assert(result_num_bits < src_num_bits);
292
293 /* Narrow the elements from src1 to the upper 64-bits of result.
294 * Do each of the 64 bit values that make up a u128
295 */
296 new_value = 0;
297 for (i = 0; i < num_elements_per_64_bits; i++) {
298 vbits = src1_v.bits.u128[0] >> (i * src_num_bits);
299 vbits &= src_mask;
300
301 shift = result_num_bits * i;
302 if (vbits) {
303 if (saturate) {
304 /* Value will not fit in B-bits, saturate the result as needed. */
305 if (vbits >> (src_num_bits/2))
306 /* vbit is upper half of the source */
307 new_value |= saturated_result << ( shift + result_num_bits/2);
308 else
309 new_value |= saturated_result << shift;
310 } else {
311 new_value |= (vbits & result_mask) << shift;
312 }
313 }
314 }
315
316 for (i = 0; i < num_elements_per_64_bits; i++) {
317 vbits = src1_v.bits.u128[1] >> (i * src_num_bits);
318 vbits &= src_mask;
319
320 shift = result_num_bits * i + (num_elements_per_64_bits
321 * result_num_bits);
322 if (vbits) {
323 if (saturate) {
324 /* Value will not fit in result_num_bits, saturate the result
325 * as needed.
326 */
327 if (vbits >> (src_num_bits/2))
328 /* vbit is upper half of the source */
329 new_value |= saturated_result << (shift + result_num_bits/2);
330
331 else
332 new_value |= saturated_result << shift;
333
334 } else {
335 new_value |= (vbits & result_mask) << shift;
336 }
337 }
338 }
339 if (__BYTE_ORDER == __LITTLE_ENDIAN)
340 new.bits.u128[1] = new_value;
341 else
342 /* Big endian, swap the upper and lower 32-bits of new_value */
343 new.bits.u128[0] = (new_value << 32) | (new_value >> 32);
344
345 new_value = 0;
346 /* Narrow the elements from src2 to the lower 64-bits of result.
347 * Do each of the 64 bit values that make up a u128
348 */
349 for (i = 0; i < num_elements_per_64_bits; i++) {
350 vbits = src2_v.bits.u128[0] >> (i * src_num_bits);
351 vbits &= src_mask;
352
353 shift = result_num_bits * i;
354 if (vbits) {
355 if (saturate) {
356 /* Value will not fit in result, saturate the result as needed. */
357 if (vbits >> (src_num_bits/2))
358 /* vbit is upper half of the source */
359 new_value |= saturated_result << (shift + result_num_bits/2);
360 else
361 new_value |= saturated_result << shift;
362 } else {
363 new_value |= (vbits & result_mask) << shift;
364 }
365 }
366 }
367
368 for (i = 0; i < num_elements_per_64_bits; i++) {
369 vbits = src2_v.bits.u128[1] >> (i * src_num_bits);
370 vbits &= src_mask;
371
372 if (vbits) {
373 if (saturate) {
374 /* Value will not fit in result_num_bits, saturate the result
375 * as needed.
376 */
377 if (vbits >> (src_num_bits/2))
378 /* vbit is upper half of the source */
379 new_value |= saturated_result << (result_num_bits * i
380 + result_num_bits/2
381 + (num_elements_per_64_bits
382 * result_num_bits));
383 else
384 new_value |= saturated_result << (result_num_bits * i
385 + (num_elements_per_64_bits
386 * result_num_bits));
387
388 } else {
389 new_value |= (vbits & result_mask) << (result_num_bits * i
390 + (num_elements_per_64_bits
391 * result_num_bits));
392 }
393 }
394 }
395 if (__BYTE_ORDER == __LITTLE_ENDIAN)
396 new.bits.u128[0] = new_value;
397 else
398 /* Big endian, swap the upper and lower 32-bits of new_value */
399 new.bits.u128[1] = (new_value << 32) | (new_value >> 32);
400
401 return new;
402 }
403
404 /* Return a value where all bits are set to defined. */
405 vbits_t
defined_vbits(unsigned num_bits)406 defined_vbits(unsigned num_bits)
407 {
408 vbits_t new = { .num_bits = num_bits };
409
410 switch (num_bits) {
411 case 1: new.bits.u32 = 0x0; break;
412 case 8: new.bits.u8 = 0x0; break;
413 case 16: new.bits.u16 = 0x0; break;
414 case 32: new.bits.u32 = 0x0; break;
415 case 64: new.bits.u64 = 0x0; break;
416 case 128: new.bits.u128[0] = 0x0;
417 new.bits.u128[1] = 0x0;
418 break;
419 case 256: new.bits.u256[0] = 0x0;
420 new.bits.u256[1] = 0x0;
421 new.bits.u256[2] = 0x0;
422 new.bits.u256[3] = 0x0;
423 break;
424 default:
425 panic(__func__);
426 }
427 return new;
428 }
429
430
431 /* Return 1, if equal. */
432 int
equal_vbits(vbits_t v1,vbits_t v2)433 equal_vbits(vbits_t v1, vbits_t v2)
434 {
435 assert(v1.num_bits == v2.num_bits);
436
437 switch (v1.num_bits) {
438 case 1: return v1.bits.u32 == v2.bits.u32;
439 case 8: return v1.bits.u8 == v2.bits.u8;
440 case 16: return v1.bits.u16 == v2.bits.u16;
441 case 32: return v1.bits.u32 == v2.bits.u32;
442 case 64: return v1.bits.u64 == v2.bits.u64;
443 case 128: return v1.bits.u128[0] == v2.bits.u128[0] &&
444 v1.bits.u128[1] == v2.bits.u128[1];
445 case 256: return v1.bits.u256[0] == v2.bits.u256[0] &&
446 v1.bits.u256[1] == v2.bits.u256[1] &&
447 v1.bits.u256[2] == v2.bits.u256[2] &&
448 v1.bits.u256[3] == v2.bits.u256[3];
449 default:
450 panic(__func__);
451 }
452 }
453
454
455 /* Truncate the bit pattern in V1 to NUM_BITS bits */
456 vbits_t
truncate_vbits(vbits_t v,unsigned num_bits)457 truncate_vbits(vbits_t v, unsigned num_bits)
458 {
459 assert(num_bits <= v.num_bits);
460
461 if (num_bits == v.num_bits) return v;
462
463 vbits_t new = { .num_bits = num_bits };
464
465 if (num_bits <= 64) {
466 uint64_t bits;
467
468 if (v.num_bits <= 64)
469 bits = get_bits64(v);
470 else if (v.num_bits == 128)
471 if (__BYTE_ORDER == __LITTLE_ENDIAN)
472 bits = v.bits.u128[0];
473 else
474 bits = v.bits.u128[1];
475 else if (v.num_bits == 256)
476 if (__BYTE_ORDER == __LITTLE_ENDIAN)
477 bits = v.bits.u256[0];
478 else
479 bits = v.bits.u256[3];
480 else
481 panic(__func__);
482
483 switch (num_bits) {
484 case 1: new.bits.u32 = bits & 0x01; break;
485 case 8: new.bits.u8 = bits & 0xff; break;
486 case 16: new.bits.u16 = bits & 0xffff; break;
487 case 32: new.bits.u32 = bits & ~0u; break;
488 case 64: new.bits.u64 = bits & ~0ll; break;
489 default:
490 panic(__func__);
491 }
492 return new;
493 }
494
495 if (num_bits == 128) {
496 assert(v.num_bits == 256);
497 /* From 256 bits to 128 */
498 if (__BYTE_ORDER == __LITTLE_ENDIAN) {
499 new.bits.u128[0] = v.bits.u256[0];
500 new.bits.u128[1] = v.bits.u256[1];
501 } else {
502 new.bits.u128[0] = v.bits.u256[2];
503 new.bits.u128[1] = v.bits.u256[3];
504 }
505 return new;
506 }
507
508 /* Cannot truncate to 256 bits from something larger */
509 panic(__func__);
510 }
511
512
513 /* Helper function to compute left_vbits */
514 static uint64_t
left64(uint64_t x)515 left64(uint64_t x)
516 {
517 // left(x) = x | -x
518 return x | (~x + 1);
519 }
520
521
522 vbits_t
left_vbits(vbits_t v,unsigned num_bits)523 left_vbits(vbits_t v, unsigned num_bits)
524 {
525 assert(num_bits >= v.num_bits);
526
527 vbits_t new = { .num_bits = num_bits };
528
529 if (v.num_bits <= 64) {
530 uint64_t bits = left64(get_bits64(v));
531
532 switch (num_bits) {
533 case 8: new.bits.u8 = bits & 0xff; break;
534 case 16: new.bits.u16 = bits & 0xffff; break;
535 case 32: new.bits.u32 = bits & ~0u; break;
536 case 64: new.bits.u64 = bits & ~0ll; break;
537 case 128:
538 if (__BYTE_ORDER == __LITTLE_ENDIAN) {
539 new.bits.u128[0] = bits;
540 if (bits & (1ull << 63)) { // MSB is set
541 new.bits.u128[1] = ~0ull;
542 } else {
543 new.bits.u128[1] = 0;
544 }
545 } else {
546 new.bits.u128[1] = bits;
547 if (bits & (1ull << 63)) { // MSB is set
548 new.bits.u128[0] = ~0ull;
549 } else {
550 new.bits.u128[0] = 0;
551 }
552 }
553 break;
554 case 256:
555 if (__BYTE_ORDER == __LITTLE_ENDIAN) {
556 new.bits.u256[0] = bits;
557 if (bits & (1ull << 63)) { // MSB is set
558 new.bits.u256[1] = ~0ull;
559 new.bits.u256[2] = ~0ull;
560 new.bits.u256[3] = ~0ull;
561 } else {
562 new.bits.u256[1] = 0;
563 new.bits.u256[2] = 0;
564 new.bits.u256[3] = 0;
565 }
566 } else {
567 new.bits.u256[3] = bits;
568 if (bits & (1ull << 63)) { // MSB is set
569 new.bits.u256[0] = ~0ull;
570 new.bits.u256[1] = ~0ull;
571 new.bits.u256[2] = ~0ull;
572 } else {
573 new.bits.u256[0] = 0;
574 new.bits.u256[1] = 0;
575 new.bits.u256[2] = 0;
576 }
577 }
578 break;
579 default:
580 panic(__func__);
581 }
582 return new;
583 }
584
585 if (v.num_bits == 128) {
586 if (__BYTE_ORDER == __LITTLE_ENDIAN) {
587 if (v.bits.u128[1] != 0) {
588 new.bits.u128[0] = v.bits.u128[0];
589 new.bits.u128[1] = left64(v.bits.u128[1]);
590 } else {
591 new.bits.u128[0] = left64(v.bits.u128[0]);
592 if (new.bits.u128[0] & (1ull << 63)) { // MSB is set
593 new.bits.u128[1] = ~0ull;
594 } else {
595 new.bits.u128[1] = 0;
596 }
597 }
598 } else {
599 if (v.bits.u128[0] != 0) {
600 new.bits.u128[0] = left64(v.bits.u128[0]);
601 new.bits.u128[1] = v.bits.u128[1];
602 } else {
603 new.bits.u128[1] = left64(v.bits.u128[1]);
604 if (new.bits.u128[1] & (1ull << 63)) { // MSB is set
605 new.bits.u128[0] = ~0ull;
606 } else {
607 new.bits.u128[0] = 0;
608 }
609 }
610 }
611 if (num_bits == 128) return new;
612
613 assert(num_bits == 256);
614
615 if (__BYTE_ORDER == __LITTLE_ENDIAN) {
616 uint64_t b1 = new.bits.u128[1];
617 uint64_t b0 = new.bits.u128[0];
618
619 new.bits.u256[0] = b0;
620 new.bits.u256[1] = b1;
621
622 if (new.bits.u256[1] & (1ull << 63)) { // MSB is set
623 new.bits.u256[2] = ~0ull;
624 new.bits.u256[3] = ~0ull;
625 } else {
626 new.bits.u256[2] = 0;
627 new.bits.u256[3] = 0;
628 }
629 } else {
630 uint64_t b1 = new.bits.u128[0];
631 uint64_t b0 = new.bits.u128[1];
632
633 new.bits.u256[2] = b0;
634 new.bits.u256[3] = b1;
635
636 if (new.bits.u256[2] & (1ull << 63)) { // MSB is set
637 new.bits.u256[0] = ~0ull;
638 new.bits.u256[1] = ~0ull;
639 } else {
640 new.bits.u256[0] = 0;
641 new.bits.u256[1] = 0;
642 }
643 }
644 return new;
645 }
646
647 panic(__func__);
648 }
649
650
651 vbits_t
or_vbits(vbits_t v1,vbits_t v2)652 or_vbits(vbits_t v1, vbits_t v2)
653 {
654 assert(v1.num_bits == v2.num_bits);
655
656 vbits_t new = { .num_bits = v1.num_bits };
657
658 switch (v1.num_bits) {
659 case 8: new.bits.u8 = v1.bits.u8 | v2.bits.u8; break;
660 case 16: new.bits.u16 = v1.bits.u16 | v2.bits.u16; break;
661 case 32: new.bits.u32 = v1.bits.u32 | v2.bits.u32; break;
662 case 64: new.bits.u64 = v1.bits.u64 | v2.bits.u64; break;
663 case 128: new.bits.u128[0] = v1.bits.u128[0] | v2.bits.u128[0];
664 new.bits.u128[1] = v1.bits.u128[1] | v2.bits.u128[1];
665 break;
666 case 256: new.bits.u256[0] = v1.bits.u256[0] | v2.bits.u256[0];
667 new.bits.u256[1] = v1.bits.u256[1] | v2.bits.u256[1];
668 new.bits.u256[2] = v1.bits.u256[2] | v2.bits.u256[2];
669 new.bits.u256[3] = v1.bits.u256[3] | v2.bits.u256[3];
670 break;
671 default:
672 panic(__func__);
673 }
674
675 return new;
676 }
677
678
679 vbits_t
and_vbits(vbits_t v1,vbits_t v2)680 and_vbits(vbits_t v1, vbits_t v2)
681 {
682 assert(v1.num_bits == v2.num_bits);
683
684 vbits_t new = { .num_bits = v1.num_bits };
685
686 switch (v1.num_bits) {
687 case 8: new.bits.u8 = v1.bits.u8 & v2.bits.u8; break;
688 case 16: new.bits.u16 = v1.bits.u16 & v2.bits.u16; break;
689 case 32: new.bits.u32 = v1.bits.u32 & v2.bits.u32; break;
690 case 64: new.bits.u64 = v1.bits.u64 & v2.bits.u64; break;
691 case 128: new.bits.u128[0] = v1.bits.u128[0] & v2.bits.u128[0];
692 new.bits.u128[1] = v1.bits.u128[1] & v2.bits.u128[1];
693 break;
694 case 256: new.bits.u256[0] = v1.bits.u256[0] & v2.bits.u256[0];
695 new.bits.u256[1] = v1.bits.u256[1] & v2.bits.u256[1];
696 new.bits.u256[2] = v1.bits.u256[2] & v2.bits.u256[2];
697 new.bits.u256[3] = v1.bits.u256[3] & v2.bits.u256[3];
698 break;
699 default:
700 panic(__func__);
701 }
702
703 return new;
704 }
705
706
707 vbits_t
concat_vbits(vbits_t v1,vbits_t v2)708 concat_vbits(vbits_t v1, vbits_t v2)
709 {
710 assert(v1.num_bits == v2.num_bits);
711
712 vbits_t new = { .num_bits = v1.num_bits * 2 };
713
714 switch (v1.num_bits) {
715 case 8: new.bits.u16 = v1.bits.u8;
716 new.bits.u16 = (new.bits.u16 << 8) | v2.bits.u8; break;
717 case 16: new.bits.u32 = v1.bits.u16;
718 new.bits.u32 = (new.bits.u32 << 16) | v2.bits.u16; break;
719 case 32: new.bits.u64 = v1.bits.u32;
720 new.bits.u64 = (new.bits.u64 << 32) | v2.bits.u32; break;
721 case 64:
722 if (__BYTE_ORDER == __LITTLE_ENDIAN) {
723 new.bits.u128[0] = v2.bits.u64;
724 new.bits.u128[1] = v1.bits.u64;
725 } else {
726 new.bits.u128[0] = v1.bits.u64;
727 new.bits.u128[1] = v2.bits.u64;
728 }
729 break;
730 case 128:
731 if (__BYTE_ORDER == __LITTLE_ENDIAN) {
732 new.bits.u256[0] = v2.bits.u128[0];
733 new.bits.u256[1] = v2.bits.u128[1];
734 new.bits.u256[2] = v1.bits.u128[0];
735 new.bits.u256[3] = v1.bits.u128[1];
736 } else {
737 new.bits.u256[0] = v1.bits.u128[0];
738 new.bits.u256[1] = v1.bits.u128[1];
739 new.bits.u256[2] = v2.bits.u128[0];
740 new.bits.u256[3] = v2.bits.u128[1];
741 }
742 break;
743 case 256: /* Fall through */
744 default:
745 panic(__func__);
746 }
747
748 return new;
749 }
750
751
752 vbits_t
upper_vbits(vbits_t v)753 upper_vbits(vbits_t v)
754 {
755 vbits_t new = { .num_bits = v.num_bits / 2 };
756
757 switch (v.num_bits) {
758 case 16: new.bits.u8 = v.bits.u16 >> 8; break;
759 case 32: new.bits.u16 = v.bits.u32 >> 16; break;
760 case 64: new.bits.u32 = v.bits.u64 >> 32; break;
761 case 128:
762 if (__BYTE_ORDER == __LITTLE_ENDIAN)
763 new.bits.u64 = v.bits.u128[1];
764 else
765 new.bits.u64 = v.bits.u128[0];
766 break;
767 case 256:
768 if (__BYTE_ORDER == __LITTLE_ENDIAN) {
769 new.bits.u128[0] = v.bits.u256[2];
770 new.bits.u128[1] = v.bits.u256[3];
771 } else {
772 new.bits.u128[0] = v.bits.u256[0];
773 new.bits.u128[1] = v.bits.u256[1];
774 }
775 break;
776 case 8:
777 default:
778 panic(__func__);
779 }
780
781 return new;
782 }
783
784
785 vbits_t
zextend_vbits(vbits_t v,unsigned num_bits)786 zextend_vbits(vbits_t v, unsigned num_bits)
787 {
788 assert(num_bits >= v.num_bits);
789
790 if (num_bits == v.num_bits) return v;
791
792 vbits_t new = { .num_bits = num_bits };
793
794 if (v.num_bits <= 64) {
795 uint64_t bits = get_bits64(v);
796
797 switch (num_bits) {
798 case 8: new.bits.u8 = bits; break;
799 case 16: new.bits.u16 = bits; break;
800 case 32: new.bits.u32 = bits; break;
801 case 64: new.bits.u64 = bits; break;
802 case 128:
803 if (__BYTE_ORDER == __LITTLE_ENDIAN) {
804 new.bits.u128[0] = bits;
805 new.bits.u128[1] = 0;
806 } else {
807 new.bits.u128[0] = 0;
808 new.bits.u128[1] = bits;
809 }
810 break;
811 case 256:
812 if (__BYTE_ORDER == __LITTLE_ENDIAN) {
813 new.bits.u256[0] = bits;
814 new.bits.u256[1] = 0;
815 new.bits.u256[2] = 0;
816 new.bits.u256[3] = 0;
817 } else {
818 new.bits.u256[0] = 0;
819 new.bits.u256[1] = 0;
820 new.bits.u256[2] = 0;
821 new.bits.u256[3] = bits;
822 }
823 break;
824 default:
825 panic(__func__);
826 }
827 return new;
828 }
829
830 if (v.num_bits == 128) {
831 assert(num_bits == 256);
832
833 if (__BYTE_ORDER == __LITTLE_ENDIAN) {
834 new.bits.u256[0] = v.bits.u128[0];
835 new.bits.u256[1] = v.bits.u128[1];
836 new.bits.u256[2] = 0;
837 new.bits.u256[3] = 0;
838 } else {
839 new.bits.u256[0] = 0;
840 new.bits.u256[1] = 0;
841 new.bits.u256[2] = v.bits.u128[1];
842 new.bits.u256[3] = v.bits.u128[0];
843 }
844 return new;
845 }
846
847 /* Cannot zero-extend a 256-bit value to something larger */
848 panic(__func__);
849 }
850
851
852 vbits_t
sextend_vbits(vbits_t v,unsigned num_bits)853 sextend_vbits(vbits_t v, unsigned num_bits)
854 {
855 assert(num_bits >= v.num_bits);
856
857 int sextend = 0;
858
859 switch (v.num_bits) {
860 case 8: if (v.bits.u8 == 0x80) sextend = 1; break;
861 case 16: if (v.bits.u16 == 0x8000) sextend = 1; break;
862 case 32: if (v.bits.u32 == 0x80000000) sextend = 1; break;
863 case 64: if (v.bits.u64 == (1ull << 63)) sextend = 1; break;
864 case 128: if (v.bits.u128[1] == (1ull << 63)) sextend = 1; break;
865 case 256: if (v.bits.u256[3] == (1ull << 63)) sextend = 1; break;
866
867 default:
868 panic(__func__);
869 }
870
871 return sextend ? left_vbits(v, num_bits) : zextend_vbits(v, num_bits);
872 }
873
874
875 vbits_t
onehot_vbits(unsigned bitno,unsigned num_bits)876 onehot_vbits(unsigned bitno, unsigned num_bits)
877 {
878 assert(bitno < num_bits);
879
880 vbits_t new = { .num_bits = num_bits };
881
882 switch (num_bits) {
883 case 1: new.bits.u32 = 1 << bitno; break;
884 case 8: new.bits.u8 = 1 << bitno; break;
885 case 16: new.bits.u16 = 1 << bitno; break;
886 case 32: new.bits.u32 = 1u << bitno; break;
887 case 64: new.bits.u64 = 1ull << bitno; break;
888 case 128:
889 if (__BYTE_ORDER == __LITTLE_ENDIAN) {
890 if (bitno < 64) {
891 new.bits.u128[0] = 1ull << bitno;
892 new.bits.u128[1] = 0;
893 } else {
894 new.bits.u128[0] = 0;
895 new.bits.u128[1] = 1ull << (bitno - 64);
896 }
897 } else {
898 if (bitno < 64) {
899 new.bits.u128[0] = 0;
900 new.bits.u128[1] = 1ull << bitno;
901 } else {
902 new.bits.u128[0] = 1ull << (bitno - 64);
903 new.bits.u128[1] = 0;
904 }
905 }
906 break;
907 case 256:
908 if (__BYTE_ORDER == __LITTLE_ENDIAN) {
909 if (bitno < 64) {
910 new.bits.u256[0] = 1ull << bitno;
911 new.bits.u256[1] = 0;
912 new.bits.u256[2] = 0;
913 new.bits.u256[3] = 0;
914 } else if (bitno < 128) {
915 new.bits.u256[0] = 0;
916 new.bits.u256[1] = 1ull << (bitno - 64);
917 new.bits.u256[2] = 0;
918 new.bits.u256[3] = 0;
919 } else if (bitno < 192) {
920 new.bits.u256[0] = 0;
921 new.bits.u256[1] = 0;
922 new.bits.u256[2] = 1ull << (bitno - 128);
923 new.bits.u256[3] = 0;
924 } else {
925 new.bits.u256[0] = 0;
926 new.bits.u256[1] = 0;
927 new.bits.u256[2] = 0;
928 new.bits.u256[3] = 1ull << (bitno - 192);
929 }
930 } else {
931 if (bitno < 64) {
932 new.bits.u256[0] = 0;
933 new.bits.u256[1] = 0;
934 new.bits.u256[2] = 0;
935 new.bits.u256[3] = 1ull << bitno;
936 } else if (bitno < 128) {
937 new.bits.u256[0] = 0;
938 new.bits.u256[1] = 0;
939 new.bits.u256[2] = 1ull << (bitno - 64);
940 new.bits.u256[3] = 0;
941 } else if (bitno < 192) {
942 new.bits.u256[0] = 0;
943 new.bits.u256[1] = 1ull << (bitno - 128);
944 new.bits.u256[2] = 0;
945 new.bits.u256[3] = 0;
946 } else {
947 new.bits.u256[0] = 1ull << (bitno - 192);
948 new.bits.u256[1] = 0;
949 new.bits.u256[2] = 0;
950 new.bits.u256[3] = 0;
951 }
952 }
953 break;
954 default:
955 panic(__func__);
956 }
957 return new;
958 }
959
960
961 int
completely_defined_vbits(vbits_t v)962 completely_defined_vbits(vbits_t v)
963 {
964 return equal_vbits(v, defined_vbits(v.num_bits));
965 }
966
967
968 vbits_t
shl_vbits(vbits_t v,unsigned shift_amount)969 shl_vbits(vbits_t v, unsigned shift_amount)
970 {
971 assert(shift_amount < v.num_bits);
972
973 vbits_t new = v;
974
975 switch (v.num_bits) {
976 case 8: new.bits.u8 <<= shift_amount; break;
977 case 16: new.bits.u16 <<= shift_amount; break;
978 case 32: new.bits.u32 <<= shift_amount; break;
979 case 64: new.bits.u64 <<= shift_amount; break;
980 case 128: /* fall through */
981 case 256: /* fall through */
982 default:
983 panic(__func__);
984 }
985
986 return new;
987 }
988
989
990 vbits_t
shr_vbits(vbits_t v,unsigned shift_amount)991 shr_vbits(vbits_t v, unsigned shift_amount)
992 {
993 assert(shift_amount < v.num_bits);
994
995 vbits_t new = v;
996
997 switch (v.num_bits) {
998 case 8: new.bits.u8 >>= shift_amount; break;
999 case 16: new.bits.u16 >>= shift_amount; break;
1000 case 32: new.bits.u32 >>= shift_amount; break;
1001 case 64: new.bits.u64 >>= shift_amount; break;
1002 case 128: /* fall through */
1003 case 256: /* fall through */
1004 default:
1005 panic(__func__);
1006 }
1007
1008 return new;
1009 }
1010
1011
1012 vbits_t
sar_vbits(vbits_t v,unsigned shift_amount)1013 sar_vbits(vbits_t v, unsigned shift_amount)
1014 {
1015 assert(shift_amount < v.num_bits);
1016
1017 vbits_t new = v;
1018 int msb;
1019
1020 switch (v.num_bits) {
1021 case 8:
1022 new.bits.u8 >>= shift_amount;
1023 msb = (v.bits.u8 & 0x80) != 0;
1024 break;
1025 case 16:
1026 new.bits.u16 >>= shift_amount;
1027 msb = (v.bits.u16 & 0x8000) != 0;
1028 break;
1029 case 32:
1030 new.bits.u32 >>= shift_amount;
1031 msb = (v.bits.u32 & (1u << 31)) != 0;
1032 break;
1033 case 64:
1034 new.bits.u64 >>= shift_amount;
1035 msb = (v.bits.u64 & (1ull << 63)) != 0;
1036 break;
1037 case 128: /* fall through */
1038 case 256: /* fall through */
1039 default:
1040 panic(__func__);
1041 }
1042
1043 if (msb)
1044 new = left_vbits(new, new.num_bits);
1045 return new;
1046 }
1047
1048 /* Return a value for the POWER Iop_CmpORD class iops */
1049 vbits_t
cmpord_vbits(unsigned v1_num_bits,unsigned v2_num_bits)1050 cmpord_vbits(unsigned v1_num_bits, unsigned v2_num_bits)
1051 {
1052 vbits_t new = { .num_bits = v1_num_bits };
1053
1054 /* Size of values being compared must be the same */
1055 assert( v1_num_bits == v2_num_bits);
1056
1057 /* Comparison only produces 32-bit or 64-bit value where
1058 * the lower 3 bits are set to indicate, less than, equal and greater than.
1059 */
1060 switch (v1_num_bits) {
1061 case 32:
1062 new.bits.u32 = 0xE;
1063 break;
1064
1065 case 64:
1066 new.bits.u64 = 0xE;
1067 break;
1068
1069 default:
1070 panic(__func__);
1071 }
1072
1073 return new;
1074 }
1075