1 // Copyright 2020 the V8 project authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #if !V8_ENABLE_WEBASSEMBLY 6 #error This header should only be included if WebAssembly is enabled. 7 #endif // !V8_ENABLE_WEBASSEMBLY 8 9 #ifndef V8_WASM_SIMD_SHUFFLE_H_ 10 #define V8_WASM_SIMD_SHUFFLE_H_ 11 12 #include "src/base/macros.h" 13 #include "src/common/globals.h" 14 15 namespace v8 { 16 namespace internal { 17 namespace wasm { 18 19 class V8_EXPORT_PRIVATE SimdShuffle { 20 public: 21 // Converts a shuffle into canonical form, meaning that the first lane index 22 // is in the range [0 .. 15]. Set |inputs_equal| true if this is an explicit 23 // swizzle. Returns canonicalized |shuffle|, |needs_swap|, and |is_swizzle|. 24 // If |needs_swap| is true, inputs must be swapped. If |is_swizzle| is true, 25 // the second input can be ignored. 26 static void CanonicalizeShuffle(bool inputs_equal, uint8_t* shuffle, 27 bool* needs_swap, bool* is_swizzle); 28 29 // Tries to match an 8x16 byte shuffle to the identity shuffle, which is 30 // [0 1 ... 15]. This should be called after canonicalizing the shuffle, so 31 // the second identity shuffle, [16 17 .. 31] is converted to the first one. 32 static bool TryMatchIdentity(const uint8_t* shuffle); 33 34 // Tries to match a byte shuffle to a scalar splat operation. Returns the 35 // index of the lane if successful. 36 template <int LANES> TryMatchSplat(const uint8_t * shuffle,int * index)37 static bool TryMatchSplat(const uint8_t* shuffle, int* index) { 38 const int kBytesPerLane = kSimd128Size / LANES; 39 // Get the first lane's worth of bytes and check that indices start at a 40 // lane boundary and are consecutive. 41 uint8_t lane0[kBytesPerLane]; 42 lane0[0] = shuffle[0]; 43 if (lane0[0] % kBytesPerLane != 0) return false; 44 for (int i = 1; i < kBytesPerLane; ++i) { 45 lane0[i] = shuffle[i]; 46 if (lane0[i] != lane0[0] + i) return false; 47 } 48 // Now check that the other lanes are identical to lane0. 49 for (int i = 1; i < LANES; ++i) { 50 for (int j = 0; j < kBytesPerLane; ++j) { 51 if (lane0[j] != shuffle[i * kBytesPerLane + j]) return false; 52 } 53 } 54 *index = lane0[0] / kBytesPerLane; 55 return true; 56 } 57 58 // Tries to match a 32x4 rotate, only makes sense if the inputs are equal 59 // (is_swizzle). A rotation is a shuffle like [1, 2, 3, 0]. This will always 60 // match a Concat, but can have better codegen. 61 static bool TryMatch32x4Rotate(const uint8_t* shuffle, uint8_t* shuffle32x4, 62 bool is_swizzle); 63 64 // Tries to match an 8x16 byte shuffle to an equivalent 32x4 shuffle. If 65 // successful, it writes the 32x4 shuffle word indices. E.g. 66 // [0 1 2 3 8 9 10 11 4 5 6 7 12 13 14 15] == [0 2 1 3] 67 static bool TryMatch32x4Shuffle(const uint8_t* shuffle, uint8_t* shuffle32x4); 68 69 // Tries to match an 8x16 byte shuffle to an equivalent 16x8 shuffle. If 70 // successful, it writes the 16x8 shuffle word indices. E.g. 71 // [0 1 8 9 2 3 10 11 4 5 12 13 6 7 14 15] == [0 4 1 5 2 6 3 7] 72 static bool TryMatch16x8Shuffle(const uint8_t* shuffle, uint8_t* shuffle16x8); 73 74 // Tries to match a byte shuffle to a concatenate operation, formed by taking 75 // 16 bytes from the 32 byte concatenation of the inputs. If successful, it 76 // writes the byte offset. E.g. [4 5 6 7 .. 16 17 18 19] concatenates both 77 // source vectors with offset 4. The shuffle should be canonicalized. 78 static bool TryMatchConcat(const uint8_t* shuffle, uint8_t* offset); 79 80 // Tries to match a byte shuffle to a blend operation, which is a shuffle 81 // where no lanes change position. E.g. [0 9 2 11 .. 14 31] interleaves the 82 // even lanes of the first source with the odd lanes of the second. The 83 // shuffle should be canonicalized. 84 static bool TryMatchBlend(const uint8_t* shuffle); 85 86 // Packs a 4 lane shuffle into a single imm8 suitable for use by pshufd, 87 // pshuflw, and pshufhw. 88 static uint8_t PackShuffle4(uint8_t* shuffle); 89 // Gets an 8 bit lane mask suitable for 16x8 pblendw. 90 static uint8_t PackBlend8(const uint8_t* shuffle16x8); 91 // Gets an 8 bit lane mask suitable for 32x4 pblendw. 92 static uint8_t PackBlend4(const uint8_t* shuffle32x4); 93 // Packs 4 bytes of shuffle into a 32 bit immediate. 94 static int32_t Pack4Lanes(const uint8_t* shuffle); 95 // Packs 16 bytes of shuffle into an array of 4 uint32_t. 96 static void Pack16Lanes(uint32_t* dst, const uint8_t* shuffle); 97 }; 98 99 class V8_EXPORT_PRIVATE SimdSwizzle { 100 public: 101 // Checks if all the immediates are in range (< kSimd128Size), and if they are 102 // not, the top bit is set. 103 static bool AllInRangeOrTopBitSet(std::array<uint8_t, kSimd128Size> shuffle); 104 }; 105 106 } // namespace wasm 107 } // namespace internal 108 } // namespace v8 109 110 #endif // V8_WASM_SIMD_SHUFFLE_H_ 111