1//===- X86InstrVecCompiler.td - Vector Compiler Patterns ---*- tablegen -*-===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file describes the various vector pseudo instructions used by the 11// compiler, as well as Pat patterns used during instruction selection. 12// 13//===----------------------------------------------------------------------===// 14 15//===----------------------------------------------------------------------===// 16// No op bitconverts 17//===----------------------------------------------------------------------===// 18 19// Bitcasts between 128-bit vector types. Return the original type since 20// no instruction is needed for the conversion 21def : Pat<(v2i64 (bitconvert (v4i32 VR128:$src))), (v2i64 VR128:$src)>; 22def : Pat<(v2i64 (bitconvert (v8i16 VR128:$src))), (v2i64 VR128:$src)>; 23def : Pat<(v2i64 (bitconvert (v16i8 VR128:$src))), (v2i64 VR128:$src)>; 24def : Pat<(v2i64 (bitconvert (v2f64 VR128:$src))), (v2i64 VR128:$src)>; 25def : Pat<(v2i64 (bitconvert (v4f32 VR128:$src))), (v2i64 VR128:$src)>; 26def : Pat<(v4i32 (bitconvert (v2i64 VR128:$src))), (v4i32 VR128:$src)>; 27def : Pat<(v4i32 (bitconvert (v8i16 VR128:$src))), (v4i32 VR128:$src)>; 28def : Pat<(v4i32 (bitconvert (v16i8 VR128:$src))), (v4i32 VR128:$src)>; 29def : Pat<(v4i32 (bitconvert (v2f64 VR128:$src))), (v4i32 VR128:$src)>; 30def : Pat<(v4i32 (bitconvert (v4f32 VR128:$src))), (v4i32 VR128:$src)>; 31def : Pat<(v8i16 (bitconvert (v2i64 VR128:$src))), (v8i16 VR128:$src)>; 32def : Pat<(v8i16 (bitconvert (v4i32 VR128:$src))), (v8i16 VR128:$src)>; 33def : Pat<(v8i16 (bitconvert (v16i8 VR128:$src))), (v8i16 VR128:$src)>; 34def : Pat<(v8i16 (bitconvert (v2f64 VR128:$src))), (v8i16 VR128:$src)>; 35def : Pat<(v8i16 (bitconvert (v4f32 VR128:$src))), (v8i16 VR128:$src)>; 36def : Pat<(v16i8 (bitconvert (v2i64 VR128:$src))), (v16i8 VR128:$src)>; 37def : Pat<(v16i8 (bitconvert (v4i32 VR128:$src))), (v16i8 VR128:$src)>; 38def : Pat<(v16i8 (bitconvert (v8i16 VR128:$src))), (v16i8 VR128:$src)>; 39def : Pat<(v16i8 (bitconvert (v2f64 VR128:$src))), (v16i8 VR128:$src)>; 40def : Pat<(v16i8 (bitconvert (v4f32 VR128:$src))), (v16i8 VR128:$src)>; 41def : Pat<(v4f32 (bitconvert (v2i64 VR128:$src))), (v4f32 VR128:$src)>; 42def : Pat<(v4f32 (bitconvert (v4i32 VR128:$src))), (v4f32 VR128:$src)>; 43def : Pat<(v4f32 (bitconvert (v8i16 VR128:$src))), (v4f32 VR128:$src)>; 44def : Pat<(v4f32 (bitconvert (v16i8 VR128:$src))), (v4f32 VR128:$src)>; 45def : Pat<(v4f32 (bitconvert (v2f64 VR128:$src))), (v4f32 VR128:$src)>; 46def : Pat<(v2f64 (bitconvert (v2i64 VR128:$src))), (v2f64 VR128:$src)>; 47def : Pat<(v2f64 (bitconvert (v4i32 VR128:$src))), (v2f64 VR128:$src)>; 48def : Pat<(v2f64 (bitconvert (v8i16 VR128:$src))), (v2f64 VR128:$src)>; 49def : Pat<(v2f64 (bitconvert (v16i8 VR128:$src))), (v2f64 VR128:$src)>; 50def : Pat<(v2f64 (bitconvert (v4f32 VR128:$src))), (v2f64 VR128:$src)>; 51 52// Bitcasts between 256-bit vector types. Return the original type since 53// no instruction is needed for the conversion 54def : Pat<(v4i64 (bitconvert (v8i32 VR256:$src))), (v4i64 VR256:$src)>; 55def : Pat<(v4i64 (bitconvert (v16i16 VR256:$src))), (v4i64 VR256:$src)>; 56def : Pat<(v4i64 (bitconvert (v32i8 VR256:$src))), (v4i64 VR256:$src)>; 57def : Pat<(v4i64 (bitconvert (v8f32 VR256:$src))), (v4i64 VR256:$src)>; 58def : Pat<(v4i64 (bitconvert (v4f64 VR256:$src))), (v4i64 VR256:$src)>; 59def : Pat<(v8i32 (bitconvert (v4i64 VR256:$src))), (v8i32 VR256:$src)>; 60def : Pat<(v8i32 (bitconvert (v16i16 VR256:$src))), (v8i32 VR256:$src)>; 61def : Pat<(v8i32 (bitconvert (v32i8 VR256:$src))), (v8i32 VR256:$src)>; 62def : Pat<(v8i32 (bitconvert (v4f64 VR256:$src))), (v8i32 VR256:$src)>; 63def : Pat<(v8i32 (bitconvert (v8f32 VR256:$src))), (v8i32 VR256:$src)>; 64def : Pat<(v16i16 (bitconvert (v4i64 VR256:$src))), (v16i16 VR256:$src)>; 65def : Pat<(v16i16 (bitconvert (v8i32 VR256:$src))), (v16i16 VR256:$src)>; 66def : Pat<(v16i16 (bitconvert (v32i8 VR256:$src))), (v16i16 VR256:$src)>; 67def : Pat<(v16i16 (bitconvert (v4f64 VR256:$src))), (v16i16 VR256:$src)>; 68def : Pat<(v16i16 (bitconvert (v8f32 VR256:$src))), (v16i16 VR256:$src)>; 69def : Pat<(v32i8 (bitconvert (v4i64 VR256:$src))), (v32i8 VR256:$src)>; 70def : Pat<(v32i8 (bitconvert (v8i32 VR256:$src))), (v32i8 VR256:$src)>; 71def : Pat<(v32i8 (bitconvert (v16i16 VR256:$src))), (v32i8 VR256:$src)>; 72def : Pat<(v32i8 (bitconvert (v4f64 VR256:$src))), (v32i8 VR256:$src)>; 73def : Pat<(v32i8 (bitconvert (v8f32 VR256:$src))), (v32i8 VR256:$src)>; 74def : Pat<(v8f32 (bitconvert (v4i64 VR256:$src))), (v8f32 VR256:$src)>; 75def : Pat<(v8f32 (bitconvert (v8i32 VR256:$src))), (v8f32 VR256:$src)>; 76def : Pat<(v8f32 (bitconvert (v16i16 VR256:$src))), (v8f32 VR256:$src)>; 77def : Pat<(v8f32 (bitconvert (v32i8 VR256:$src))), (v8f32 VR256:$src)>; 78def : Pat<(v8f32 (bitconvert (v4f64 VR256:$src))), (v8f32 VR256:$src)>; 79def : Pat<(v4f64 (bitconvert (v4i64 VR256:$src))), (v4f64 VR256:$src)>; 80def : Pat<(v4f64 (bitconvert (v8i32 VR256:$src))), (v4f64 VR256:$src)>; 81def : Pat<(v4f64 (bitconvert (v16i16 VR256:$src))), (v4f64 VR256:$src)>; 82def : Pat<(v4f64 (bitconvert (v32i8 VR256:$src))), (v4f64 VR256:$src)>; 83def : Pat<(v4f64 (bitconvert (v8f32 VR256:$src))), (v4f64 VR256:$src)>; 84 85// Bitcasts between 512-bit vector types. Return the original type since 86// no instruction is needed for the conversion. 87def : Pat<(v8f64 (bitconvert (v8i64 VR512:$src))), (v8f64 VR512:$src)>; 88def : Pat<(v8f64 (bitconvert (v16i32 VR512:$src))), (v8f64 VR512:$src)>; 89def : Pat<(v8f64 (bitconvert (v32i16 VR512:$src))), (v8f64 VR512:$src)>; 90def : Pat<(v8f64 (bitconvert (v64i8 VR512:$src))), (v8f64 VR512:$src)>; 91def : Pat<(v8f64 (bitconvert (v16f32 VR512:$src))), (v8f64 VR512:$src)>; 92def : Pat<(v16f32 (bitconvert (v8i64 VR512:$src))), (v16f32 VR512:$src)>; 93def : Pat<(v16f32 (bitconvert (v16i32 VR512:$src))), (v16f32 VR512:$src)>; 94def : Pat<(v16f32 (bitconvert (v32i16 VR512:$src))), (v16f32 VR512:$src)>; 95def : Pat<(v16f32 (bitconvert (v64i8 VR512:$src))), (v16f32 VR512:$src)>; 96def : Pat<(v16f32 (bitconvert (v8f64 VR512:$src))), (v16f32 VR512:$src)>; 97def : Pat<(v8i64 (bitconvert (v16i32 VR512:$src))), (v8i64 VR512:$src)>; 98def : Pat<(v8i64 (bitconvert (v32i16 VR512:$src))), (v8i64 VR512:$src)>; 99def : Pat<(v8i64 (bitconvert (v64i8 VR512:$src))), (v8i64 VR512:$src)>; 100def : Pat<(v8i64 (bitconvert (v8f64 VR512:$src))), (v8i64 VR512:$src)>; 101def : Pat<(v8i64 (bitconvert (v16f32 VR512:$src))), (v8i64 VR512:$src)>; 102def : Pat<(v16i32 (bitconvert (v8i64 VR512:$src))), (v16i32 VR512:$src)>; 103def : Pat<(v16i32 (bitconvert (v16f32 VR512:$src))), (v16i32 VR512:$src)>; 104def : Pat<(v16i32 (bitconvert (v32i16 VR512:$src))), (v16i32 VR512:$src)>; 105def : Pat<(v16i32 (bitconvert (v64i8 VR512:$src))), (v16i32 VR512:$src)>; 106def : Pat<(v16i32 (bitconvert (v8f64 VR512:$src))), (v16i32 VR512:$src)>; 107def : Pat<(v32i16 (bitconvert (v8i64 VR512:$src))), (v32i16 VR512:$src)>; 108def : Pat<(v32i16 (bitconvert (v16i32 VR512:$src))), (v32i16 VR512:$src)>; 109def : Pat<(v32i16 (bitconvert (v64i8 VR512:$src))), (v32i16 VR512:$src)>; 110def : Pat<(v32i16 (bitconvert (v8f64 VR512:$src))), (v32i16 VR512:$src)>; 111def : Pat<(v32i16 (bitconvert (v16f32 VR512:$src))), (v32i16 VR512:$src)>; 112def : Pat<(v64i8 (bitconvert (v8i64 VR512:$src))), (v64i8 VR512:$src)>; 113def : Pat<(v64i8 (bitconvert (v16i32 VR512:$src))), (v64i8 VR512:$src)>; 114def : Pat<(v64i8 (bitconvert (v32i16 VR512:$src))), (v64i8 VR512:$src)>; 115def : Pat<(v64i8 (bitconvert (v8f64 VR512:$src))), (v64i8 VR512:$src)>; 116def : Pat<(v64i8 (bitconvert (v16f32 VR512:$src))), (v64i8 VR512:$src)>; 117 118 119//===----------------------------------------------------------------------===// 120// Non-instruction patterns 121//===----------------------------------------------------------------------===// 122 123// A vector extract of the first f32/f64 position is a subregister copy 124def : Pat<(f32 (extractelt (v4f32 VR128:$src), (iPTR 0))), 125 (COPY_TO_REGCLASS (v4f32 VR128:$src), FR32)>; 126def : Pat<(f64 (extractelt (v2f64 VR128:$src), (iPTR 0))), 127 (COPY_TO_REGCLASS (v2f64 VR128:$src), FR64)>; 128 129// Implicitly promote a 32-bit scalar to a vector. 130def : Pat<(v4f32 (scalar_to_vector FR32:$src)), 131 (COPY_TO_REGCLASS FR32:$src, VR128)>; 132// Implicitly promote a 64-bit scalar to a vector. 133def : Pat<(v2f64 (scalar_to_vector FR64:$src)), 134 (COPY_TO_REGCLASS FR64:$src, VR128)>; 135 136 137//===----------------------------------------------------------------------===// 138// Subvector tricks 139//===----------------------------------------------------------------------===// 140 141// Patterns for insert_subvector/extract_subvector to/from index=0 142multiclass subvector_subreg_lowering<RegisterClass subRC, ValueType subVT, 143 RegisterClass RC, ValueType VT, 144 SubRegIndex subIdx> { 145 def : Pat<(subVT (extract_subvector (VT RC:$src), (iPTR 0))), 146 (subVT (EXTRACT_SUBREG RC:$src, subIdx))>; 147 148 def : Pat<(VT (insert_subvector undef, subRC:$src, (iPTR 0))), 149 (VT (INSERT_SUBREG (IMPLICIT_DEF), subRC:$src, subIdx))>; 150} 151 152// A 128-bit subvector extract from the first 256-bit vector position is a 153// subregister copy that needs no instruction. Likewise, a 128-bit subvector 154// insert to the first 256-bit vector position is a subregister copy that needs 155// no instruction. 156defm : subvector_subreg_lowering<VR128, v4i32, VR256, v8i32, sub_xmm>; 157defm : subvector_subreg_lowering<VR128, v4f32, VR256, v8f32, sub_xmm>; 158defm : subvector_subreg_lowering<VR128, v2i64, VR256, v4i64, sub_xmm>; 159defm : subvector_subreg_lowering<VR128, v2f64, VR256, v4f64, sub_xmm>; 160defm : subvector_subreg_lowering<VR128, v8i16, VR256, v16i16, sub_xmm>; 161defm : subvector_subreg_lowering<VR128, v16i8, VR256, v32i8, sub_xmm>; 162 163// A 128-bit subvector extract from the first 512-bit vector position is a 164// subregister copy that needs no instruction. Likewise, a 128-bit subvector 165// insert to the first 512-bit vector position is a subregister copy that needs 166// no instruction. 167defm : subvector_subreg_lowering<VR128, v4i32, VR512, v16i32, sub_xmm>; 168defm : subvector_subreg_lowering<VR128, v4f32, VR512, v16f32, sub_xmm>; 169defm : subvector_subreg_lowering<VR128, v2i64, VR512, v8i64, sub_xmm>; 170defm : subvector_subreg_lowering<VR128, v2f64, VR512, v8f64, sub_xmm>; 171defm : subvector_subreg_lowering<VR128, v8i16, VR512, v32i16, sub_xmm>; 172defm : subvector_subreg_lowering<VR128, v16i8, VR512, v64i8, sub_xmm>; 173 174// A 128-bit subvector extract from the first 512-bit vector position is a 175// subregister copy that needs no instruction. Likewise, a 128-bit subvector 176// insert to the first 512-bit vector position is a subregister copy that needs 177// no instruction. 178defm : subvector_subreg_lowering<VR256, v8i32, VR512, v16i32, sub_ymm>; 179defm : subvector_subreg_lowering<VR256, v8f32, VR512, v16f32, sub_ymm>; 180defm : subvector_subreg_lowering<VR256, v4i64, VR512, v8i64, sub_ymm>; 181defm : subvector_subreg_lowering<VR256, v4f64, VR512, v8f64, sub_ymm>; 182defm : subvector_subreg_lowering<VR256, v16i16, VR512, v32i16, sub_ymm>; 183defm : subvector_subreg_lowering<VR256, v32i8, VR512, v64i8, sub_ymm>; 184 185 186multiclass subvector_store_lowering<string AlignedStr, string UnalignedStr, 187 RegisterClass RC, ValueType DstTy, 188 ValueType SrcTy, SubRegIndex SubIdx> { 189 def : Pat<(alignedstore (DstTy (extract_subvector 190 (SrcTy RC:$src), (iPTR 0))), addr:$dst), 191 (!cast<Instruction>("VMOV"#AlignedStr#"mr") addr:$dst, 192 (DstTy (EXTRACT_SUBREG RC:$src, SubIdx)))>; 193 194 def : Pat<(store (DstTy (extract_subvector 195 (SrcTy RC:$src), (iPTR 0))), addr:$dst), 196 (!cast<Instruction>("VMOV"#UnalignedStr#"mr") addr:$dst, 197 (DstTy (EXTRACT_SUBREG RC:$src, SubIdx)))>; 198} 199 200let Predicates = [HasAVX, NoVLX] in { 201 defm : subvector_store_lowering<"APD", "UPD", VR256X, v2f64, v4f64, sub_xmm>; 202 defm : subvector_store_lowering<"APS", "UPS", VR256X, v4f32, v8f32, sub_xmm>; 203 defm : subvector_store_lowering<"DQA", "DQU", VR256X, v2i64, v4i64, sub_xmm>; 204 defm : subvector_store_lowering<"DQA", "DQU", VR256X, v4i32, v8i32, sub_xmm>; 205 defm : subvector_store_lowering<"DQA", "DQU", VR256X, v8i16, v16i16, sub_xmm>; 206 defm : subvector_store_lowering<"DQA", "DQU", VR256X, v16i8, v32i8, sub_xmm>; 207} 208 209let Predicates = [HasVLX] in { 210 // Special patterns for storing subvector extracts of lower 128-bits 211 // Its cheaper to just use VMOVAPS/VMOVUPS instead of VEXTRACTF128mr 212 defm : subvector_store_lowering<"APDZ128", "UPDZ128", VR256X, v2f64, v4f64, 213 sub_xmm>; 214 defm : subvector_store_lowering<"APSZ128", "UPSZ128", VR256X, v4f32, v8f32, 215 sub_xmm>; 216 defm : subvector_store_lowering<"DQA64Z128", "DQU64Z128", VR256X, v2i64, 217 v4i64, sub_xmm>; 218 defm : subvector_store_lowering<"DQA64Z128", "DQU64Z128", VR256X, v4i32, 219 v8i32, sub_xmm>; 220 defm : subvector_store_lowering<"DQA64Z128", "DQU64Z128", VR256X, v8i16, 221 v16i16, sub_xmm>; 222 defm : subvector_store_lowering<"DQA64Z128", "DQU64Z128", VR256X, v16i8, 223 v32i8, sub_xmm>; 224 225 // Special patterns for storing subvector extracts of lower 128-bits of 512. 226 // Its cheaper to just use VMOVAPS/VMOVUPS instead of VEXTRACTF128mr 227 defm : subvector_store_lowering<"APDZ128", "UPDZ128", VR512, v2f64, v8f64, 228 sub_xmm>; 229 defm : subvector_store_lowering<"APSZ128", "UPSZ128", VR512, v4f32, v16f32, 230 sub_xmm>; 231 defm : subvector_store_lowering<"DQA64Z128", "DQU64Z128", VR512, v2i64, 232 v8i64, sub_xmm>; 233 defm : subvector_store_lowering<"DQA64Z128", "DQU64Z128", VR512, v4i32, 234 v16i32, sub_xmm>; 235 defm : subvector_store_lowering<"DQA64Z128", "DQU64Z128", VR512, v8i16, 236 v32i16, sub_xmm>; 237 defm : subvector_store_lowering<"DQA64Z128", "DQU64Z128", VR512, v16i8, 238 v64i8, sub_xmm>; 239 240 // Special patterns for storing subvector extracts of lower 256-bits of 512. 241 // Its cheaper to just use VMOVAPS/VMOVUPS instead of VEXTRACTF128mr 242 defm : subvector_store_lowering<"APDZ256", "UPDZ256", VR512, v4f64, v8f64, 243 sub_ymm>; 244 defm : subvector_store_lowering<"APSZ256", "UPSZ256", VR512, v8f32, v16f32, 245 sub_ymm>; 246 defm : subvector_store_lowering<"DQA64Z256", "DQU64Z256", VR512, v4i64, 247 v8i64, sub_ymm>; 248 defm : subvector_store_lowering<"DQA64Z256", "DQU64Z256", VR512, v8i32, 249 v16i32, sub_ymm>; 250 defm : subvector_store_lowering<"DQA64Z256", "DQU64Z256", VR512, v16i16, 251 v32i16, sub_ymm>; 252 defm : subvector_store_lowering<"DQA64Z256", "DQU64Z256", VR512, v32i8, 253 v64i8, sub_ymm>; 254} 255 256// If we're inserting into an all zeros vector, just use a plain move which 257// will zero the upper bits. A post-isel hook will take care of removing 258// any moves that we can prove are unnecessary. 259multiclass subvec_zero_lowering<string MoveStr, 260 RegisterClass RC, ValueType DstTy, 261 ValueType SrcTy, ValueType ZeroTy, 262 SubRegIndex SubIdx> { 263 def : Pat<(DstTy (insert_subvector (bitconvert (ZeroTy immAllZerosV)), 264 (SrcTy RC:$src), (iPTR 0))), 265 (SUBREG_TO_REG (i64 0), 266 (SrcTy (!cast<Instruction>("VMOV"#MoveStr#"rr") RC:$src)), SubIdx)>; 267} 268 269let Predicates = [HasAVX, NoVLX] in { 270 defm : subvec_zero_lowering<"APD", VR128, v4f64, v2f64, v8i32, sub_xmm>; 271 defm : subvec_zero_lowering<"APS", VR128, v8f32, v4f32, v8i32, sub_xmm>; 272 defm : subvec_zero_lowering<"DQA", VR128, v4i64, v2i64, v8i32, sub_xmm>; 273 defm : subvec_zero_lowering<"DQA", VR128, v8i32, v4i32, v8i32, sub_xmm>; 274 defm : subvec_zero_lowering<"DQA", VR128, v16i16, v8i16, v8i32, sub_xmm>; 275 defm : subvec_zero_lowering<"DQA", VR128, v32i8, v16i8, v8i32, sub_xmm>; 276} 277 278let Predicates = [HasVLX] in { 279 defm : subvec_zero_lowering<"APDZ128", VR128X, v4f64, v2f64, v8i32, sub_xmm>; 280 defm : subvec_zero_lowering<"APSZ128", VR128X, v8f32, v4f32, v8i32, sub_xmm>; 281 defm : subvec_zero_lowering<"DQA64Z128", VR128X, v4i64, v2i64, v8i32, sub_xmm>; 282 defm : subvec_zero_lowering<"DQA64Z128", VR128X, v8i32, v4i32, v8i32, sub_xmm>; 283 defm : subvec_zero_lowering<"DQA64Z128", VR128X, v16i16, v8i16, v8i32, sub_xmm>; 284 defm : subvec_zero_lowering<"DQA64Z128", VR128X, v32i8, v16i8, v8i32, sub_xmm>; 285 286 defm : subvec_zero_lowering<"APDZ128", VR128X, v8f64, v2f64, v16i32, sub_xmm>; 287 defm : subvec_zero_lowering<"APSZ128", VR128X, v16f32, v4f32, v16i32, sub_xmm>; 288 defm : subvec_zero_lowering<"DQA64Z128", VR128X, v8i64, v2i64, v16i32, sub_xmm>; 289 defm : subvec_zero_lowering<"DQA64Z128", VR128X, v16i32, v4i32, v16i32, sub_xmm>; 290 defm : subvec_zero_lowering<"DQA64Z128", VR128X, v32i16, v8i16, v16i32, sub_xmm>; 291 defm : subvec_zero_lowering<"DQA64Z128", VR128X, v64i8, v16i8, v16i32, sub_xmm>; 292 293 defm : subvec_zero_lowering<"APDZ256", VR256X, v8f64, v4f64, v16i32, sub_ymm>; 294 defm : subvec_zero_lowering<"APSZ256", VR256X, v16f32, v8f32, v16i32, sub_ymm>; 295 defm : subvec_zero_lowering<"DQA64Z256", VR256X, v8i64, v4i64, v16i32, sub_ymm>; 296 defm : subvec_zero_lowering<"DQA64Z256", VR256X, v16i32, v8i32, v16i32, sub_ymm>; 297 defm : subvec_zero_lowering<"DQA64Z256", VR256X, v32i16, v16i16, v16i32, sub_ymm>; 298 defm : subvec_zero_lowering<"DQA64Z256", VR256X, v64i8, v32i8, v16i32, sub_ymm>; 299} 300 301let Predicates = [HasAVX512, NoVLX] in { 302 defm : subvec_zero_lowering<"APD", VR128, v8f64, v2f64, v16i32, sub_xmm>; 303 defm : subvec_zero_lowering<"APS", VR128, v16f32, v4f32, v16i32, sub_xmm>; 304 defm : subvec_zero_lowering<"DQA", VR128, v8i64, v2i64, v16i32, sub_xmm>; 305 defm : subvec_zero_lowering<"DQA", VR128, v16i32, v4i32, v16i32, sub_xmm>; 306 defm : subvec_zero_lowering<"DQA", VR128, v32i16, v8i16, v16i32, sub_xmm>; 307 defm : subvec_zero_lowering<"DQA", VR128, v64i8, v16i8, v16i32, sub_xmm>; 308 309 defm : subvec_zero_lowering<"APDY", VR256, v8f64, v4f64, v16i32, sub_ymm>; 310 defm : subvec_zero_lowering<"APSY", VR256, v16f32, v8f32, v16i32, sub_ymm>; 311 defm : subvec_zero_lowering<"DQAY", VR256, v8i64, v4i64, v16i32, sub_ymm>; 312 defm : subvec_zero_lowering<"DQAY", VR256, v16i32, v8i32, v16i32, sub_ymm>; 313 defm : subvec_zero_lowering<"DQAY", VR256, v32i16, v16i16, v16i32, sub_ymm>; 314 defm : subvec_zero_lowering<"DQAY", VR256, v64i8, v32i8, v16i32, sub_ymm>; 315} 316 317class maskzeroupper<ValueType vt, RegisterClass RC> : 318 PatLeaf<(vt RC:$src), [{ 319 return isMaskZeroExtended(N); 320 }]>; 321 322def maskzeroupperv1i1 : maskzeroupper<v1i1, VK1>; 323def maskzeroupperv2i1 : maskzeroupper<v2i1, VK2>; 324def maskzeroupperv4i1 : maskzeroupper<v4i1, VK4>; 325def maskzeroupperv8i1 : maskzeroupper<v8i1, VK8>; 326def maskzeroupperv16i1 : maskzeroupper<v16i1, VK16>; 327def maskzeroupperv32i1 : maskzeroupper<v32i1, VK32>; 328 329// The patterns determine if we can depend on the upper bits of a mask register 330// being zeroed by the previous operation so that we can skip explicit 331// zeroing. 332let Predicates = [HasBWI] in { 333 def : Pat<(v32i1 (insert_subvector (v32i1 immAllZerosV), 334 maskzeroupperv1i1:$src, (iPTR 0))), 335 (COPY_TO_REGCLASS VK1:$src, VK32)>; 336 def : Pat<(v32i1 (insert_subvector (v32i1 immAllZerosV), 337 maskzeroupperv8i1:$src, (iPTR 0))), 338 (COPY_TO_REGCLASS VK8:$src, VK32)>; 339 def : Pat<(v32i1 (insert_subvector (v32i1 immAllZerosV), 340 maskzeroupperv16i1:$src, (iPTR 0))), 341 (COPY_TO_REGCLASS VK16:$src, VK32)>; 342 343 def : Pat<(v64i1 (insert_subvector (v64i1 immAllZerosV), 344 maskzeroupperv1i1:$src, (iPTR 0))), 345 (COPY_TO_REGCLASS VK1:$src, VK64)>; 346 def : Pat<(v64i1 (insert_subvector (v64i1 immAllZerosV), 347 maskzeroupperv8i1:$src, (iPTR 0))), 348 (COPY_TO_REGCLASS VK8:$src, VK64)>; 349 def : Pat<(v64i1 (insert_subvector (v64i1 immAllZerosV), 350 maskzeroupperv16i1:$src, (iPTR 0))), 351 (COPY_TO_REGCLASS VK16:$src, VK64)>; 352 def : Pat<(v64i1 (insert_subvector (v64i1 immAllZerosV), 353 maskzeroupperv32i1:$src, (iPTR 0))), 354 (COPY_TO_REGCLASS VK32:$src, VK64)>; 355} 356 357let Predicates = [HasAVX512] in { 358 def : Pat<(v16i1 (insert_subvector (v16i1 immAllZerosV), 359 maskzeroupperv1i1:$src, (iPTR 0))), 360 (COPY_TO_REGCLASS VK1:$src, VK16)>; 361 def : Pat<(v16i1 (insert_subvector (v16i1 immAllZerosV), 362 maskzeroupperv8i1:$src, (iPTR 0))), 363 (COPY_TO_REGCLASS VK8:$src, VK16)>; 364} 365 366let Predicates = [HasDQI] in { 367 def : Pat<(v8i1 (insert_subvector (v8i1 immAllZerosV), 368 maskzeroupperv1i1:$src, (iPTR 0))), 369 (COPY_TO_REGCLASS VK1:$src, VK8)>; 370} 371 372let Predicates = [HasVLX, HasDQI] in { 373 def : Pat<(v8i1 (insert_subvector (v8i1 immAllZerosV), 374 maskzeroupperv2i1:$src, (iPTR 0))), 375 (COPY_TO_REGCLASS VK2:$src, VK8)>; 376 def : Pat<(v8i1 (insert_subvector (v8i1 immAllZerosV), 377 maskzeroupperv4i1:$src, (iPTR 0))), 378 (COPY_TO_REGCLASS VK4:$src, VK8)>; 379} 380 381let Predicates = [HasVLX] in { 382 def : Pat<(v16i1 (insert_subvector (v16i1 immAllZerosV), 383 maskzeroupperv2i1:$src, (iPTR 0))), 384 (COPY_TO_REGCLASS VK2:$src, VK16)>; 385 def : Pat<(v16i1 (insert_subvector (v16i1 immAllZerosV), 386 maskzeroupperv4i1:$src, (iPTR 0))), 387 (COPY_TO_REGCLASS VK4:$src, VK16)>; 388} 389 390let Predicates = [HasBWI, HasVLX] in { 391 def : Pat<(v32i1 (insert_subvector (v32i1 immAllZerosV), 392 maskzeroupperv2i1:$src, (iPTR 0))), 393 (COPY_TO_REGCLASS VK2:$src, VK32)>; 394 def : Pat<(v32i1 (insert_subvector (v32i1 immAllZerosV), 395 maskzeroupperv4i1:$src, (iPTR 0))), 396 (COPY_TO_REGCLASS VK4:$src, VK32)>; 397 def : Pat<(v64i1 (insert_subvector (v64i1 immAllZerosV), 398 maskzeroupperv2i1:$src, (iPTR 0))), 399 (COPY_TO_REGCLASS VK2:$src, VK64)>; 400 def : Pat<(v64i1 (insert_subvector (v64i1 immAllZerosV), 401 maskzeroupperv4i1:$src, (iPTR 0))), 402 (COPY_TO_REGCLASS VK4:$src, VK64)>; 403} 404 405// If the bits are not zero we have to fall back to explicitly zeroing by 406// using shifts. 407let Predicates = [HasAVX512] in { 408 def : Pat<(v16i1 (insert_subvector (v16i1 immAllZerosV), 409 (v1i1 VK1:$mask), (iPTR 0))), 410 (KSHIFTRWri (KSHIFTLWri (COPY_TO_REGCLASS VK1:$mask, VK16), 411 (i8 15)), (i8 15))>; 412 413 def : Pat<(v16i1 (insert_subvector (v16i1 immAllZerosV), 414 (v2i1 VK2:$mask), (iPTR 0))), 415 (KSHIFTRWri (KSHIFTLWri (COPY_TO_REGCLASS VK2:$mask, VK16), 416 (i8 14)), (i8 14))>; 417 418 def : Pat<(v16i1 (insert_subvector (v16i1 immAllZerosV), 419 (v4i1 VK4:$mask), (iPTR 0))), 420 (KSHIFTRWri (KSHIFTLWri (COPY_TO_REGCLASS VK4:$mask, VK16), 421 (i8 12)), (i8 12))>; 422} 423 424let Predicates = [HasAVX512, NoDQI] in { 425 def : Pat<(v16i1 (insert_subvector (v16i1 immAllZerosV), 426 (v8i1 VK8:$mask), (iPTR 0))), 427 (KSHIFTRWri (KSHIFTLWri (COPY_TO_REGCLASS VK8:$mask, VK16), 428 (i8 8)), (i8 8))>; 429} 430 431let Predicates = [HasDQI] in { 432 def : Pat<(v16i1 (insert_subvector (v16i1 immAllZerosV), 433 (v8i1 VK8:$mask), (iPTR 0))), 434 (COPY_TO_REGCLASS (KMOVBkk VK8:$mask), VK16)>; 435 436 def : Pat<(v8i1 (insert_subvector (v8i1 immAllZerosV), 437 (v1i1 VK1:$mask), (iPTR 0))), 438 (KSHIFTRBri (KSHIFTLBri (COPY_TO_REGCLASS VK1:$mask, VK8), 439 (i8 7)), (i8 7))>; 440 def : Pat<(v8i1 (insert_subvector (v8i1 immAllZerosV), 441 (v2i1 VK2:$mask), (iPTR 0))), 442 (KSHIFTRBri (KSHIFTLBri (COPY_TO_REGCLASS VK2:$mask, VK8), 443 (i8 6)), (i8 6))>; 444 def : Pat<(v8i1 (insert_subvector (v8i1 immAllZerosV), 445 (v4i1 VK4:$mask), (iPTR 0))), 446 (KSHIFTRBri (KSHIFTLBri (COPY_TO_REGCLASS VK4:$mask, VK8), 447 (i8 4)), (i8 4))>; 448} 449 450let Predicates = [HasBWI] in { 451 def : Pat<(v32i1 (insert_subvector (v32i1 immAllZerosV), 452 (v16i1 VK16:$mask), (iPTR 0))), 453 (COPY_TO_REGCLASS (KMOVWkk VK16:$mask), VK32)>; 454 455 def : Pat<(v64i1 (insert_subvector (v64i1 immAllZerosV), 456 (v16i1 VK16:$mask), (iPTR 0))), 457 (COPY_TO_REGCLASS (KMOVWkk VK16:$mask), VK64)>; 458 def : Pat<(v64i1 (insert_subvector (v64i1 immAllZerosV), 459 (v32i1 VK32:$mask), (iPTR 0))), 460 (COPY_TO_REGCLASS (KMOVDkk VK32:$mask), VK64)>; 461} 462 463let Predicates = [HasBWI, NoDQI] in { 464 def : Pat<(v32i1 (insert_subvector (v32i1 immAllZerosV), 465 (v8i1 VK8:$mask), (iPTR 0))), 466 (KSHIFTRDri (KSHIFTLDri (COPY_TO_REGCLASS VK8:$mask, VK32), 467 (i8 24)), (i8 24))>; 468 469 def : Pat<(v64i1 (insert_subvector (v64i1 immAllZerosV), 470 (v8i1 VK8:$mask), (iPTR 0))), 471 (KSHIFTRQri (KSHIFTLQri (COPY_TO_REGCLASS VK8:$mask, VK64), 472 (i8 56)), (i8 56))>; 473} 474 475let Predicates = [HasBWI, HasDQI] in { 476 def : Pat<(v32i1 (insert_subvector (v32i1 immAllZerosV), 477 (v8i1 VK8:$mask), (iPTR 0))), 478 (COPY_TO_REGCLASS (KMOVBkk VK8:$mask), VK32)>; 479 480 def : Pat<(v64i1 (insert_subvector (v64i1 immAllZerosV), 481 (v8i1 VK8:$mask), (iPTR 0))), 482 (COPY_TO_REGCLASS (KMOVBkk VK8:$mask), VK64)>; 483} 484 485let Predicates = [HasBWI, HasVLX] in { 486 def : Pat<(v32i1 (insert_subvector (v32i1 immAllZerosV), 487 (v1i1 VK1:$mask), (iPTR 0))), 488 (KSHIFTRDri (KSHIFTLDri (COPY_TO_REGCLASS VK1:$mask, VK32), 489 (i8 31)), (i8 31))>; 490 def : Pat<(v32i1 (insert_subvector (v32i1 immAllZerosV), 491 (v2i1 VK2:$mask), (iPTR 0))), 492 (KSHIFTRDri (KSHIFTLDri (COPY_TO_REGCLASS VK2:$mask, VK32), 493 (i8 30)), (i8 30))>; 494 def : Pat<(v32i1 (insert_subvector (v32i1 immAllZerosV), 495 (v4i1 VK4:$mask), (iPTR 0))), 496 (KSHIFTRDri (KSHIFTLDri (COPY_TO_REGCLASS VK4:$mask, VK32), 497 (i8 28)), (i8 28))>; 498 499 def : Pat<(v64i1 (insert_subvector (v64i1 immAllZerosV), 500 (v1i1 VK1:$mask), (iPTR 0))), 501 (KSHIFTRQri (KSHIFTLQri (COPY_TO_REGCLASS VK1:$mask, VK64), 502 (i8 63)), (i8 63))>; 503 def : Pat<(v64i1 (insert_subvector (v64i1 immAllZerosV), 504 (v2i1 VK2:$mask), (iPTR 0))), 505 (KSHIFTRQri (KSHIFTLQri (COPY_TO_REGCLASS VK2:$mask, VK64), 506 (i8 62)), (i8 62))>; 507 def : Pat<(v64i1 (insert_subvector (v64i1 immAllZerosV), 508 (v4i1 VK4:$mask), (iPTR 0))), 509 (KSHIFTRQri (KSHIFTLQri (COPY_TO_REGCLASS VK4:$mask, VK64), 510 (i8 60)), (i8 60))>; 511} 512