1//===- NVPTXVector.td - NVPTX Vector Specific Instruction defs -*- tblgen-*-==// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9 10//----------------------------------- 11// Vector Specific 12//----------------------------------- 13 14// 15// All vector instructions derive from NVPTXVecInst 16// 17 18class NVPTXVecInst<dag outs, dag ins, string asmstr, list<dag> pattern, 19 NVPTXInst sInst=NOP> 20 : NVPTXInst<outs, ins, asmstr, pattern> { 21 NVPTXInst scalarInst=sInst; 22} 23 24let isAsCheapAsAMove=1, VecInstType=isVecExtract.Value in { 25// Extract v2i16 26def V2i16Extract : NVPTXVecInst<(outs Int16Regs:$dst), 27 (ins V2I16Regs:$src, i8imm:$c), 28 "mov.u16 \t$dst, $src${c:vecelem};", 29 [(set Int16Regs:$dst, (extractelt 30 (v2i16 V2I16Regs:$src), imm:$c))], 31 IMOV16rr>; 32 33// Extract v4i16 34def V4i16Extract : NVPTXVecInst<(outs Int16Regs:$dst), 35 (ins V4I16Regs:$src, i8imm:$c), 36 "mov.u16 \t$dst, $src${c:vecelem};", 37 [(set Int16Regs:$dst, (extractelt 38 (v4i16 V4I16Regs:$src), imm:$c))], 39 IMOV16rr>; 40 41// Extract v2i8 42def V2i8Extract : NVPTXVecInst<(outs Int8Regs:$dst), 43 (ins V2I8Regs:$src, i8imm:$c), 44 "mov.u16 \t$dst, $src${c:vecelem};", 45 [(set Int8Regs:$dst, (extractelt 46 (v2i8 V2I8Regs:$src), imm:$c))], 47 IMOV8rr>; 48 49// Extract v4i8 50def V4i8Extract : NVPTXVecInst<(outs Int8Regs:$dst), 51 (ins V4I8Regs:$src, i8imm:$c), 52 "mov.u16 \t$dst, $src${c:vecelem};", 53 [(set Int8Regs:$dst, (extractelt 54 (v4i8 V4I8Regs:$src), imm:$c))], 55 IMOV8rr>; 56 57// Extract v2i32 58def V2i32Extract : NVPTXVecInst<(outs Int32Regs:$dst), 59 (ins V2I32Regs:$src, i8imm:$c), 60 "mov.u32 \t$dst, $src${c:vecelem};", 61 [(set Int32Regs:$dst, (extractelt 62 (v2i32 V2I32Regs:$src), imm:$c))], 63 IMOV32rr>; 64 65// Extract v2f32 66def V2f32Extract : NVPTXVecInst<(outs Float32Regs:$dst), 67 (ins V2F32Regs:$src, i8imm:$c), 68 "mov.f32 \t$dst, $src${c:vecelem};", 69 [(set Float32Regs:$dst, (extractelt 70 (v2f32 V2F32Regs:$src), imm:$c))], 71 FMOV32rr>; 72 73// Extract v2i64 74def V2i64Extract : NVPTXVecInst<(outs Int64Regs:$dst), 75 (ins V2I64Regs:$src, i8imm:$c), 76 "mov.u64 \t$dst, $src${c:vecelem};", 77 [(set Int64Regs:$dst, (extractelt 78 (v2i64 V2I64Regs:$src), imm:$c))], 79 IMOV64rr>; 80 81// Extract v2f64 82def V2f64Extract : NVPTXVecInst<(outs Float64Regs:$dst), 83 (ins V2F64Regs:$src, i8imm:$c), 84 "mov.f64 \t$dst, $src${c:vecelem};", 85 [(set Float64Regs:$dst, (extractelt 86 (v2f64 V2F64Regs:$src), imm:$c))], 87 FMOV64rr>; 88 89// Extract v4i32 90def V4i32Extract : NVPTXVecInst<(outs Int32Regs:$dst), 91 (ins V4I32Regs:$src, i8imm:$c), 92 "mov.u32 \t$dst, $src${c:vecelem};", 93 [(set Int32Regs:$dst, (extractelt 94 (v4i32 V4I32Regs:$src), imm:$c))], 95 IMOV32rr>; 96 97// Extract v4f32 98def V4f32Extract : NVPTXVecInst<(outs Float32Regs:$dst), 99 (ins V4F32Regs:$src, i8imm:$c), 100 "mov.f32 \t$dst, $src${c:vecelem};", 101 [(set Float32Regs:$dst, (extractelt 102 (v4f32 V4F32Regs:$src), imm:$c))], 103 FMOV32rr>; 104} 105 106let isAsCheapAsAMove=1, VecInstType=isVecInsert.Value in { 107// Insert v2i8 108def V2i8Insert : NVPTXVecInst<(outs V2I8Regs:$dst), 109 (ins V2I8Regs:$src, Int8Regs:$val, i8imm:$c), 110 "mov.v2.u16 \t${dst:vecfull}, ${src:vecfull};" 111 "\n\tmov.u16 \t$dst${c:vecelem}, $val;", 112 [(set V2I8Regs:$dst, 113 (insertelt V2I8Regs:$src, Int8Regs:$val, imm:$c))], IMOV8rr>; 114 115// Insert v4i8 116def V4i8Insert : NVPTXVecInst<(outs V4I8Regs:$dst), 117 (ins V4I8Regs:$src, Int8Regs:$val, i8imm:$c), 118 "mov.v4.u16 \t${dst:vecfull}, ${src:vecfull};" 119 "\n\tmov.u16 \t$dst${c:vecelem}, $val;", 120 [(set V4I8Regs:$dst, 121 (insertelt V4I8Regs:$src, Int8Regs:$val, imm:$c))], IMOV8rr>; 122 123// Insert v2i16 124def V2i16Insert : NVPTXVecInst<(outs V2I16Regs:$dst), 125 (ins V2I16Regs:$src, Int16Regs:$val, i8imm:$c), 126 "mov.v2.u16 \t${dst:vecfull}, ${src:vecfull};" 127 "\n\tmov.u16 \t$dst${c:vecelem}, $val;", 128 [(set V2I16Regs:$dst, 129 (insertelt V2I16Regs:$src, Int16Regs:$val, imm:$c))], 130 IMOV16rr>; 131 132// Insert v4i16 133def V4i16Insert : NVPTXVecInst<(outs V4I16Regs:$dst), 134 (ins V4I16Regs:$src, Int16Regs:$val, i8imm:$c), 135 "mov.v4.u16 \t${dst:vecfull}, ${src:vecfull};" 136 "\n\tmov.u16 \t$dst${c:vecelem}, $val;", 137 [(set V4I16Regs:$dst, 138 (insertelt V4I16Regs:$src, Int16Regs:$val, imm:$c))], 139 IMOV16rr>; 140 141// Insert v2i32 142def V2i32Insert : NVPTXVecInst<(outs V2I32Regs:$dst), 143 (ins V2I32Regs:$src, Int32Regs:$val, i8imm:$c), 144 "mov.v2.u32 \t${dst:vecfull}, ${src:vecfull};" 145 "\n\tmov.u32 \t$dst${c:vecelem}, $val;", 146 [(set V2I32Regs:$dst, 147 (insertelt V2I32Regs:$src, Int32Regs:$val, imm:$c))], 148 IMOV32rr>; 149 150// Insert v2f32 151def V2f32Insert : NVPTXVecInst<(outs V2F32Regs:$dst), 152 (ins V2F32Regs:$src, Float32Regs:$val, i8imm:$c), 153 "mov.v2.f32 \t${dst:vecfull}, ${src:vecfull};" 154 "\n\tmov.f32 \t$dst${c:vecelem}, $val;", 155 [(set V2F32Regs:$dst, 156 (insertelt V2F32Regs:$src, Float32Regs:$val, imm:$c))], 157 FMOV32rr>; 158 159// Insert v2i64 160def V2i64Insert : NVPTXVecInst<(outs V2I64Regs:$dst), 161 (ins V2I64Regs:$src, Int64Regs:$val, i8imm:$c), 162 "mov.v2.u64 \t${dst:vecfull}, ${src:vecfull};" 163 "\n\tmov.u64 \t$dst${c:vecelem}, $val;", 164 [(set V2I64Regs:$dst, 165 (insertelt V2I64Regs:$src, Int64Regs:$val, imm:$c))], 166 IMOV64rr>; 167 168// Insert v2f64 169def V2f64Insert : NVPTXVecInst<(outs V2F64Regs:$dst), 170 (ins V2F64Regs:$src, Float64Regs:$val, i8imm:$c), 171 "mov.v2.f64 \t${dst:vecfull}, ${src:vecfull};" 172 "\n\tmov.f64 \t$dst${c:vecelem}, $val;", 173 [(set V2F64Regs:$dst, 174 (insertelt V2F64Regs:$src, Float64Regs:$val, imm:$c))], 175 FMOV64rr>; 176 177// Insert v4i32 178def V4i32Insert : NVPTXVecInst<(outs V4I32Regs:$dst), 179 (ins V4I32Regs:$src, Int32Regs:$val, i8imm:$c), 180 "mov.v4.u32 \t${dst:vecfull}, ${src:vecfull};" 181 "\n\tmov.u32 \t$dst${c:vecelem}, $val;", 182 [(set V4I32Regs:$dst, 183 (insertelt V4I32Regs:$src, Int32Regs:$val, imm:$c))], 184 IMOV32rr>; 185 186// Insert v4f32 187def V4f32Insert : NVPTXVecInst<(outs V4F32Regs:$dst), 188 (ins V4F32Regs:$src, Float32Regs:$val, i8imm:$c), 189 "mov.v4.f32 \t${dst:vecfull}, ${src:vecfull};" 190 "\n\tmov.f32 \t$dst${c:vecelem}, $val;", 191 [(set V4F32Regs:$dst, 192 (insertelt V4F32Regs:$src, Float32Regs:$val, imm:$c))], 193 FMOV32rr>; 194} 195 196class BinOpAsmString<string c> { 197 string s = c; 198} 199 200class V4AsmStr<string opcode> : BinOpAsmString< 201 !strconcat(!strconcat(!strconcat(!strconcat( 202 !strconcat(!strconcat(!strconcat( 203 opcode, " \t${dst}_0, ${a}_0, ${b}_0;\n\t"), 204 opcode), " \t${dst}_1, ${a}_1, ${b}_1;\n\t"), 205 opcode), " \t${dst}_2, ${a}_2, ${b}_2;\n\t"), 206 opcode), " \t${dst}_3, ${a}_3, ${b}_3;")>; 207 208class V2AsmStr<string opcode> : BinOpAsmString< 209 !strconcat(!strconcat(!strconcat( 210 opcode, " \t${dst}_0, ${a}_0, ${b}_0;\n\t"), 211 opcode), " \t${dst}_1, ${a}_1, ${b}_1;")>; 212 213class V4MADStr<string opcode> : BinOpAsmString< 214 !strconcat(!strconcat(!strconcat(!strconcat( 215 !strconcat(!strconcat(!strconcat( 216 opcode, " \t${dst}_0, ${a}_0, ${b}_0, ${c}_0;\n\t"), 217 opcode), " \t${dst}_1, ${a}_1, ${b}_1, ${c}_1;\n\t"), 218 opcode), " \t${dst}_2, ${a}_2, ${b}_2, ${c}_2;\n\t"), 219 opcode), " \t${dst}_3, ${a}_3, ${b}_3, ${c}_3;")>; 220 221class V2MADStr<string opcode> : BinOpAsmString< 222 !strconcat(!strconcat(!strconcat( 223 opcode, " \t${dst}_0, ${a}_0, ${b}_0, ${c}_0;\n\t"), 224 opcode), " \t${dst}_1, ${a}_1, ${b}_1, ${c}_1;")>; 225 226class V4UnaryStr<string opcode> : BinOpAsmString< 227 !strconcat(!strconcat(!strconcat(!strconcat( 228 !strconcat(!strconcat(!strconcat( 229 opcode, " \t${dst}_0, ${a}_0;\n\t"), 230 opcode), " \t${dst}_1, ${a}_1;\n\t"), 231 opcode), " \t${dst}_2, ${a}_2;\n\t"), 232 opcode), " \t${dst}_3, ${a}_3;")>; 233 234class V2UnaryStr<string opcode> : BinOpAsmString< 235 !strconcat(!strconcat(!strconcat( 236 opcode, " \t${dst}_0, ${a}_0;\n\t"), 237 opcode), " \t${dst}_1, ${a}_1;")>; 238 239class VecBinaryOp<BinOpAsmString asmstr, SDNode OpNode, NVPTXRegClass regclass, 240 NVPTXInst sInst=NOP> : 241 NVPTXVecInst<(outs regclass:$dst), (ins regclass:$a, regclass:$b), 242 asmstr.s, 243 [(set regclass:$dst, (OpNode regclass:$a, regclass:$b))], 244 sInst>; 245 246class VecShiftOp<BinOpAsmString asmstr, SDNode OpNode, NVPTXRegClass regclass1, 247 NVPTXRegClass regclass2, NVPTXInst sInst=NOP> : 248 NVPTXVecInst<(outs regclass1:$dst), (ins regclass1:$a, regclass2:$b), 249 asmstr.s, 250 [(set regclass1:$dst, (OpNode regclass1:$a, regclass2:$b))], 251 sInst>; 252 253class VecUnaryOp<BinOpAsmString asmstr, PatFrag OpNode, NVPTXRegClass regclass, 254 NVPTXInst sInst=NOP> : 255 NVPTXVecInst<(outs regclass:$dst), (ins regclass:$a), 256 asmstr.s, 257 [(set regclass:$dst, (OpNode regclass:$a))], sInst>; 258 259multiclass IntBinVOp<string asmstr, SDNode OpNode, 260 NVPTXInst i64op=NOP, NVPTXInst i32op=NOP, NVPTXInst 261 i16op=NOP, NVPTXInst i8op=NOP> { 262 def V2I64 : VecBinaryOp<V2AsmStr<!strconcat(asmstr, "64")>, OpNode, V2I64Regs, 263 i64op>; 264 def V4I32 : VecBinaryOp<V4AsmStr<!strconcat(asmstr, "32")>, OpNode, V4I32Regs, 265 i32op>; 266 def V2I32 : VecBinaryOp<V2AsmStr<!strconcat(asmstr, "32")>, OpNode, V2I32Regs, 267 i32op>; 268 def V4I16 : VecBinaryOp<V4AsmStr<!strconcat(asmstr, "16")>, OpNode, V4I16Regs, 269 i16op>; 270 def V2I16 : VecBinaryOp<V2AsmStr<!strconcat(asmstr, "16")>, OpNode, V2I16Regs, 271 i16op>; 272 def V4I8 : VecBinaryOp<V4AsmStr<!strconcat(asmstr, "16")>, OpNode, V4I8Regs, 273 i8op>; 274 def V2I8 : VecBinaryOp<V2AsmStr<!strconcat(asmstr, "16")>, OpNode, V2I8Regs, 275 i8op>; 276} 277 278multiclass FloatBinVOp<string asmstr, SDNode OpNode, 279 NVPTXInst f64=NOP, NVPTXInst f32=NOP, 280 NVPTXInst f32_ftz=NOP> { 281 def V2F64 : VecBinaryOp<V2AsmStr<!strconcat(asmstr, "f64")>, OpNode, 282 V2F64Regs, f64>; 283 def V4F32_ftz : VecBinaryOp<V4AsmStr<!strconcat(asmstr, "ftz.f32")>, OpNode, 284 V4F32Regs, f32_ftz>, Requires<[doF32FTZ]>; 285 def V2F32_ftz : VecBinaryOp<V2AsmStr<!strconcat(asmstr, "ftz.f32")>, OpNode, 286 V2F32Regs, f32_ftz>, Requires<[doF32FTZ]>; 287 def V4F32 : VecBinaryOp<V4AsmStr<!strconcat(asmstr, "f32")>, OpNode, 288 V4F32Regs, f32>; 289 def V2F32 : VecBinaryOp<V2AsmStr<!strconcat(asmstr, "f32")>, OpNode, 290 V2F32Regs, f32>; 291} 292 293multiclass IntUnaryVOp<string asmstr, PatFrag OpNode, 294 NVPTXInst i64op=NOP, NVPTXInst i32op=NOP, 295 NVPTXInst i16op=NOP, NVPTXInst i8op=NOP> { 296 def V2I64 : VecUnaryOp<V2UnaryStr<!strconcat(asmstr, "64")>, OpNode, 297 V2I64Regs, i64op>; 298 def V4I32 : VecUnaryOp<V4UnaryStr<!strconcat(asmstr, "32")>, OpNode, 299 V4I32Regs, i32op>; 300 def V2I32 : VecUnaryOp<V2UnaryStr<!strconcat(asmstr, "32")>, OpNode, 301 V2I32Regs, i32op>; 302 def V4I16 : VecUnaryOp<V4UnaryStr<!strconcat(asmstr, "16")>, OpNode, 303 V4I16Regs, i16op>; 304 def V2I16 : VecUnaryOp<V2UnaryStr<!strconcat(asmstr, "16")>, OpNode, 305 V2I16Regs, i16op>; 306 def V4I8 : VecUnaryOp<V4UnaryStr<!strconcat(asmstr, "16")>, OpNode, 307 V4I8Regs, i8op>; 308 def V2I8 : VecUnaryOp<V2UnaryStr<!strconcat(asmstr, "16")>, OpNode, 309 V2I8Regs, i8op>; 310} 311 312 313// Integer Arithmetic 314let VecInstType=isVecOther.Value in { 315defm VAdd : IntBinVOp<"add.s", add, ADDi64rr, ADDi32rr, ADDi16rr, ADDi8rr>; 316defm VSub : IntBinVOp<"sub.s", sub, SUBi64rr, SUBi32rr, SUBi16rr, SUBi8rr>; 317 318def AddCCV4I32 : VecBinaryOp<V4AsmStr<"add.cc.s32">, addc, V4I32Regs, 319 ADDCCi32rr>; 320def AddCCV2I32 : VecBinaryOp<V2AsmStr<"add.cc.s32">, addc, V2I32Regs, 321 ADDCCi32rr>; 322def SubCCV4I32 : VecBinaryOp<V4AsmStr<"sub.cc.s32">, subc, V4I32Regs, 323 SUBCCi32rr>; 324def SubCCV2I32 : VecBinaryOp<V2AsmStr<"sub.cc.s32">, subc, V2I32Regs, 325 SUBCCi32rr>; 326def AddCCCV4I32 : VecBinaryOp<V4AsmStr<"addc.cc.s32">, adde, V4I32Regs, 327 ADDCCCi32rr>; 328def AddCCCV2I32 : VecBinaryOp<V2AsmStr<"addc.cc.s32">, adde, V2I32Regs, 329 ADDCCCi32rr>; 330def SubCCCV4I32 : VecBinaryOp<V4AsmStr<"subc.cc.s32">, sube, V4I32Regs, 331 SUBCCCi32rr>; 332def SubCCCV2I32 : VecBinaryOp<V2AsmStr<"subc.cc.s32">, sube, V2I32Regs, 333 SUBCCCi32rr>; 334 335def ShiftLV2I64 : VecShiftOp<V2AsmStr<"shl.b64">, shl, V2I64Regs, V2I32Regs, 336 SHLi64rr>; 337def ShiftLV2I32 : VecShiftOp<V2AsmStr<"shl.b32">, shl, V2I32Regs, V2I32Regs, 338 SHLi32rr>; 339def ShiftLV4I32 : VecShiftOp<V4AsmStr<"shl.b32">, shl, V4I32Regs, V4I32Regs, 340 SHLi32rr>; 341def ShiftLV2I16 : VecShiftOp<V2AsmStr<"shl.b16">, shl, V2I16Regs, V2I32Regs, 342 SHLi16rr>; 343def ShiftLV4I16 : VecShiftOp<V4AsmStr<"shl.b16">, shl, V4I16Regs, V4I32Regs, 344 SHLi16rr>; 345def ShiftLV2I8 : VecShiftOp<V2AsmStr<"shl.b16">, shl, V2I8Regs, V2I32Regs, 346 SHLi8rr>; 347def ShiftLV4I8 : VecShiftOp<V4AsmStr<"shl.b16">, shl, V4I8Regs, V4I32Regs, 348 SHLi8rr>; 349} 350 351// cvt to v*i32, helpers for shift 352class CVTtoVeci32<NVPTXRegClass inclass, NVPTXRegClass outclass, string asmstr, 353 NVPTXInst sInst=NOP> : 354 NVPTXVecInst<(outs outclass:$d), (ins inclass:$s), asmstr, [], sInst>; 355 356class VecCVTStrHelper<string op, string dest, string src> { 357 string s=!strconcat(op, !strconcat("\t", 358 !strconcat(dest, !strconcat(", ", !strconcat(src, ";"))))); 359} 360 361class Vec2CVTStr<string op> { 362 string s=!strconcat(VecCVTStrHelper<op, "${d}_0", "${s}_0">.s, 363 !strconcat("\n\t", VecCVTStrHelper<op, "${d}_1", "${s}_1">.s)); 364} 365 366class Vec4CVTStr<string op> { 367 string s=!strconcat(VecCVTStrHelper<op, "${d}_0", "${s}_0">.s, 368 !strconcat("\n\t", 369 !strconcat(VecCVTStrHelper<op, "${d}_1", "${s}_1">.s, 370 !strconcat("\n\t", 371 !strconcat(VecCVTStrHelper<op, "${d}_2", "${s}_2">.s, 372 !strconcat("\n\t", VecCVTStrHelper<op, "${d}_3", "${s}_3">.s)))))); 373} 374 375let VecInstType=isVecOther.Value in { 376def CVTv2i8tov2i32 : CVTtoVeci32<V2I8Regs, V2I32Regs, 377 Vec2CVTStr<"cvt.u32.u16">.s, Zint_extendext8to32>; 378def CVTv2i16tov2i32 : CVTtoVeci32<V2I16Regs, V2I32Regs, 379 Vec2CVTStr<"cvt.u32.u16">.s, Zint_extendext16to32>; 380def CVTv4i8tov4i32 : CVTtoVeci32<V4I8Regs, V4I32Regs, 381 Vec4CVTStr<"cvt.u32.u16">.s, Zint_extendext8to32>; 382def CVTv4i16tov4i32 : CVTtoVeci32<V4I16Regs, V4I32Regs, 383 Vec4CVTStr<"cvt.u32.u16">.s, Zint_extendext16to32>; 384def CVTv2i64tov2i32 : CVTtoVeci32<V2I64Regs, V2I32Regs, 385 Vec2CVTStr<"cvt.u32.u64">.s, TRUNC_64to32>; 386} 387 388def : Pat<(shl V2I16Regs:$src1, V2I16Regs:$src2), 389 (ShiftLV2I16 V2I16Regs:$src1, (CVTv2i16tov2i32 V2I16Regs:$src2))>; 390def : Pat<(shl V2I8Regs:$src1, V2I8Regs:$src2), 391 (ShiftLV2I8 V2I8Regs:$src1, (CVTv2i8tov2i32 V2I8Regs:$src2))>; 392def : Pat<(shl V2I64Regs:$src1, V2I64Regs:$src2), 393 (ShiftLV2I64 V2I64Regs:$src1, (CVTv2i64tov2i32 V2I64Regs:$src2))>; 394 395def : Pat<(shl V4I16Regs:$src1, V4I16Regs:$src2), 396 (ShiftLV4I16 V4I16Regs:$src1, (CVTv4i16tov4i32 V4I16Regs:$src2))>; 397def : Pat<(shl V4I8Regs:$src1, V4I8Regs:$src2), 398 (ShiftLV4I8 V4I8Regs:$src1, (CVTv4i8tov4i32 V4I8Regs:$src2))>; 399 400let VecInstType=isVecOther.Value in { 401def ShiftRAV2I64 : VecShiftOp<V2AsmStr<"shr.s64">, sra, V2I64Regs, V2I32Regs, 402 SRAi64rr>; 403def ShiftRAV2I32 : VecShiftOp<V2AsmStr<"shr.s32">, sra, V2I32Regs, V2I32Regs, 404 SRAi32rr>; 405def ShiftRAV4I32 : VecShiftOp<V4AsmStr<"shr.s32">, sra, V4I32Regs, V4I32Regs, 406 SRAi32rr>; 407def ShiftRAV2I16 : VecShiftOp<V2AsmStr<"shr.s16">, sra, V2I16Regs, V2I32Regs, 408 SRAi16rr>; 409def ShiftRAV4I16 : VecShiftOp<V4AsmStr<"shr.s16">, sra, V4I16Regs, V4I32Regs, 410 SRAi16rr>; 411def ShiftRAV2I8 : VecShiftOp<V2AsmStr<"shr.s16">, sra, V2I8Regs, V2I32Regs, 412 SRAi8rr>; 413def ShiftRAV4I8 : VecShiftOp<V4AsmStr<"shr.s16">, sra, V4I8Regs, V4I32Regs, 414 SRAi8rr>; 415 416def ShiftRLV2I64 : VecShiftOp<V2AsmStr<"shr.u64">, srl, V2I64Regs, V2I32Regs, 417 SRLi64rr>; 418def ShiftRLV2I32 : VecShiftOp<V2AsmStr<"shr.u32">, srl, V2I32Regs, V2I32Regs, 419 SRLi32rr>; 420def ShiftRLV4I32 : VecShiftOp<V4AsmStr<"shr.u32">, srl, V4I32Regs, V4I32Regs, 421 SRLi32rr>; 422def ShiftRLV2I16 : VecShiftOp<V2AsmStr<"shr.u16">, srl, V2I16Regs, V2I32Regs, 423 SRLi16rr>; 424def ShiftRLV4I16 : VecShiftOp<V4AsmStr<"shr.u16">, srl, V4I16Regs, V4I32Regs, 425 SRLi16rr>; 426def ShiftRLV2I8 : VecShiftOp<V2AsmStr<"shr.u16">, srl, V2I8Regs, V2I32Regs, 427 SRLi8rr>; 428def ShiftRLV4I8 : VecShiftOp<V4AsmStr<"shr.u16">, srl, V4I8Regs, V4I32Regs, 429 SRLi8rr>; 430 431defm VMult : IntBinVOp<"mul.lo.s", mul, MULTi64rr, MULTi32rr, MULTi16rr, 432 MULTi8rr>; 433defm VMultHS : IntBinVOp<"mul.hi.s", mulhs, MULTHSi64rr, MULTHSi32rr, 434 MULTHSi16rr, 435 MULTHSi8rr>; 436defm VMultHU : IntBinVOp<"mul.hi.u", mulhu, MULTHUi64rr, MULTHUi32rr, 437 MULTHUi16rr, 438 MULTHUi8rr>; 439defm VSDiv : IntBinVOp<"div.s", sdiv, SDIVi64rr, SDIVi32rr, SDIVi16rr, 440 SDIVi8rr>; 441defm VUDiv : IntBinVOp<"div.u", udiv, UDIVi64rr, UDIVi32rr, UDIVi16rr, 442 UDIVi8rr>; 443defm VSRem : IntBinVOp<"rem.s", srem, SREMi64rr, SREMi32rr, SREMi16rr, 444 SREMi8rr>; 445defm VURem : IntBinVOp<"rem.u", urem, UREMi64rr, UREMi32rr, UREMi16rr, 446 UREMi8rr>; 447} 448 449def : Pat<(sra V2I16Regs:$src1, V2I16Regs:$src2), 450 (ShiftRAV2I16 V2I16Regs:$src1, (CVTv2i16tov2i32 V2I16Regs:$src2))>; 451def : Pat<(sra V2I8Regs:$src1, V2I8Regs:$src2), 452 (ShiftRAV2I8 V2I8Regs:$src1, (CVTv2i8tov2i32 V2I8Regs:$src2))>; 453def : Pat<(sra V2I64Regs:$src1, V2I64Regs:$src2), 454 (ShiftRAV2I64 V2I64Regs:$src1, (CVTv2i64tov2i32 V2I64Regs:$src2))>; 455 456def : Pat<(sra V4I16Regs:$src1, V4I16Regs:$src2), 457 (ShiftRAV4I16 V4I16Regs:$src1, (CVTv4i16tov4i32 V4I16Regs:$src2))>; 458def : Pat<(sra V4I8Regs:$src1, V4I8Regs:$src2), 459 (ShiftRAV4I8 V4I8Regs:$src1, (CVTv4i8tov4i32 V4I8Regs:$src2))>; 460 461def : Pat<(srl V2I16Regs:$src1, V2I16Regs:$src2), 462 (ShiftRLV2I16 V2I16Regs:$src1, (CVTv2i16tov2i32 V2I16Regs:$src2))>; 463def : Pat<(srl V2I8Regs:$src1, V2I8Regs:$src2), 464 (ShiftRLV2I8 V2I8Regs:$src1, (CVTv2i8tov2i32 V2I8Regs:$src2))>; 465def : Pat<(srl V2I64Regs:$src1, V2I64Regs:$src2), 466 (ShiftRLV2I64 V2I64Regs:$src1, (CVTv2i64tov2i32 V2I64Regs:$src2))>; 467 468def : Pat<(srl V4I16Regs:$src1, V4I16Regs:$src2), 469 (ShiftRLV4I16 V4I16Regs:$src1, (CVTv4i16tov4i32 V4I16Regs:$src2))>; 470def : Pat<(srl V4I8Regs:$src1, V4I8Regs:$src2), 471 (ShiftRLV4I8 V4I8Regs:$src1, (CVTv4i8tov4i32 V4I8Regs:$src2))>; 472 473multiclass VMAD<string asmstr, NVPTXRegClass regclassv4, 474 NVPTXRegClass regclassv2, 475 SDNode an=add, SDNode mn=mul, NVPTXInst sop=NOP, 476 Predicate Pred> { 477 def V4 : NVPTXVecInst<(outs regclassv4:$dst), 478 (ins regclassv4:$a, regclassv4:$b, regclassv4:$c), 479 V4MADStr<asmstr>.s, 480 [(set regclassv4:$dst, 481 (an (mn regclassv4:$a, regclassv4:$b), regclassv4:$c))], 482 sop>, 483 Requires<[Pred]>; 484 def V2 : NVPTXVecInst<(outs regclassv2:$dst), 485 (ins regclassv2:$a, regclassv2:$b, regclassv2:$c), 486 V2MADStr<asmstr>.s, 487 [(set regclassv2:$dst, 488 (an (mn regclassv2:$a, regclassv2:$b), regclassv2:$c))], 489 sop>, 490 Requires<[Pred]>; 491} 492 493multiclass VMADV2Only<string asmstr, NVPTXRegClass regclass, NVPTXInst sop=NOP, 494 Predicate Pred> { 495 def V2 : NVPTXVecInst<(outs regclass:$dst), 496 (ins regclass:$a, regclass:$b, regclass:$c), 497 V2MADStr<asmstr>.s, 498 [(set regclass:$dst, (add 499 (mul regclass:$a, regclass:$b), regclass:$c))], sop>, 500 Requires<[Pred]>; 501} 502multiclass VFMADV2Only<string asmstr, NVPTXRegClass regclass, NVPTXInst sop=NOP, 503 Predicate Pred> { 504 def V2 : NVPTXVecInst<(outs regclass:$dst), 505 (ins regclass:$a, regclass:$b, regclass:$c), 506 V2MADStr<asmstr>.s, 507 [(set regclass:$dst, (fadd 508 (fmul regclass:$a, regclass:$b), regclass:$c))], sop>, 509 Requires<[Pred]>; 510} 511 512let VecInstType=isVecOther.Value in { 513defm I8MAD : VMAD<"mad.lo.s16", V4I8Regs, V2I8Regs, add, mul, MAD8rrr, true>; 514defm I16MAD : VMAD<"mad.lo.s16", V4I16Regs, V2I16Regs, add, mul, MAD16rrr, 515 true>; 516defm I32MAD : VMAD<"mad.lo.s32", V4I32Regs, V2I32Regs, add, mul, MAD32rrr, 517 true>; 518defm I64MAD : VMADV2Only<"mad.lo.s64", V2I64Regs, MAD64rrr, true>; 519 520defm VNeg : IntUnaryVOp<"neg.s", ineg, INEG64, INEG32, INEG16, INEG8>; 521 522defm VAddf : FloatBinVOp<"add.", fadd, FADDf64rr, FADDf32rr, FADDf32rr_ftz>; 523defm VSubf : FloatBinVOp<"sub.", fsub, FSUBf64rr, FSUBf32rr, FSUBf32rr_ftz>; 524defm VMulf : FloatBinVOp<"mul.", fmul, FMULf64rr, FMULf32rr, FMULf32rr_ftz>; 525 526defm F32MAD_ftz : VMAD<"mad.ftz.f32", V4F32Regs, V2F32Regs, fadd, fmul, 527 FMAD32_ftzrrr, doFMADF32_ftz>; 528defm F32FMA_ftz : VMAD<"fma.rn.ftz.f32", V4F32Regs, V2F32Regs, fadd, fmul, 529 FMA32_ftzrrr, doFMAF32_ftz>; 530defm F32MAD : VMAD<"mad.f32", V4F32Regs, V2F32Regs, fadd, fmul, FMAD32rrr, 531 doFMADF32>; 532defm F32FMA : VMAD<"fma.rn.f32", V4F32Regs, V2F32Regs, fadd, fmul, FMA32rrr, 533 doFMAF32>; 534defm F64FMA : VFMADV2Only<"fma.rn.f64", V2F64Regs, FMA64rrr, doFMAF64>; 535} 536 537let VecInstType=isVecOther.Value in { 538def V4F32Div_prec_ftz : VecBinaryOp<V4AsmStr<"div.rn.ftz.f32">, fdiv, V4F32Regs, 539 FDIV32rr_prec_ftz>, Requires<[doF32FTZ, reqPTX20]>; 540def V2F32Div_prec_ftz : VecBinaryOp<V2AsmStr<"div.rn.ftz.f32">, fdiv, V2F32Regs, 541 FDIV32rr_prec_ftz>, Requires<[doF32FTZ, reqPTX20]>; 542def V4F32Div_prec : VecBinaryOp<V4AsmStr<"div.rn.f32">, fdiv, V4F32Regs, 543 FDIV32rr_prec>, Requires<[reqPTX20]>; 544def V2F32Div_prec : VecBinaryOp<V2AsmStr<"div.rn.f32">, fdiv, V2F32Regs, 545 FDIV32rr_prec>, Requires<[reqPTX20]>; 546def V2F32Div_ftz : VecBinaryOp<V2AsmStr<"div.full.ftz.f32">, fdiv, V2F32Regs, 547 FDIV32rr_ftz>, Requires<[doF32FTZ]>; 548def V4F32Div_ftz : VecBinaryOp<V4AsmStr<"div.full.ftz.f32">, fdiv, V4F32Regs, 549 FDIV32rr_ftz>, Requires<[doF32FTZ]>; 550def V2F32Div : VecBinaryOp<V2AsmStr<"div.full.f32">, fdiv, V2F32Regs, FDIV32rr>; 551def V4F32Div : VecBinaryOp<V4AsmStr<"div.full.f32">, fdiv, V4F32Regs, FDIV32rr>; 552def V2F64Div : VecBinaryOp<V2AsmStr<"div.rn.f64">, fdiv, V2F64Regs, FDIV64rr>; 553} 554 555def fnegpat : PatFrag<(ops node:$in), (fneg node:$in)>; 556 557let VecInstType=isVecOther.Value in { 558def VNegv2f32_ftz : VecUnaryOp<V2UnaryStr<"neg.ftz.f32">, fnegpat, V2F32Regs, 559 FNEGf32_ftz>, Requires<[doF32FTZ]>; 560def VNegv4f32_ftz : VecUnaryOp<V4UnaryStr<"neg.ftz.f32">, fnegpat, V4F32Regs, 561 FNEGf32_ftz>, Requires<[doF32FTZ]>; 562def VNegv2f32 : VecUnaryOp<V2UnaryStr<"neg.f32">, fnegpat, V2F32Regs, FNEGf32>; 563def VNegv4f32 : VecUnaryOp<V4UnaryStr<"neg.f32">, fnegpat, V4F32Regs, FNEGf32>; 564def VNegv2f64 : VecUnaryOp<V2UnaryStr<"neg.f64">, fnegpat, V2F64Regs, FNEGf64>; 565 566// Logical Arithmetic 567defm VAnd : IntBinVOp<"and.b", and, ANDb64rr, ANDb32rr, ANDb16rr, ANDb8rr>; 568defm VOr : IntBinVOp<"or.b", or, ORb64rr, ORb32rr, ORb16rr, ORb8rr>; 569defm VXor : IntBinVOp<"xor.b", xor, XORb64rr, XORb32rr, XORb16rr, XORb8rr>; 570 571defm VNot : IntUnaryVOp<"not.b", not, NOT64, NOT32, NOT16, NOT8>; 572} 573 574 575multiclass V2FPCONTRACT32_SUB_PAT<NVPTXInst Inst, Predicate Pred> { 576 def : Pat<(fsub V2F32Regs:$a, (fmul V2F32Regs:$b, V2F32Regs:$c)), 577 (Inst (VNegv2f32 V2F32Regs:$b), V2F32Regs:$c, V2F32Regs:$a)>, 578 Requires<[Pred]>; 579 580 def : Pat<(fsub (fmul V2F32Regs:$a, V2F32Regs:$b), V2F32Regs:$c), 581 (Inst V2F32Regs:$a, V2F32Regs:$b, (VNegv2f32 V2F32Regs:$c))>, 582 Requires<[Pred]>; 583} 584 585defm V2FMAF32ext_ftz : V2FPCONTRACT32_SUB_PAT<F32FMA_ftzV2, doFMAF32AGG_ftz>; 586defm V2FMADF32ext_ftz : V2FPCONTRACT32_SUB_PAT<F32MAD_ftzV2, doFMADF32_ftz>; 587defm V2FMAF32ext : V2FPCONTRACT32_SUB_PAT<F32FMAV2, doFMAF32AGG>; 588defm V2FMADF32ext : V2FPCONTRACT32_SUB_PAT<F32MADV2, doFMADF32>; 589 590multiclass V4FPCONTRACT32_SUB_PAT<NVPTXInst Inst, Predicate Pred> { 591 def : Pat<(fsub V4F32Regs:$a, (fmul V4F32Regs:$b, V4F32Regs:$c)), 592 (Inst (VNegv4f32 V4F32Regs:$b), V4F32Regs:$c, V4F32Regs:$a)>, 593 Requires<[Pred]>; 594 595 def : Pat<(fsub (fmul V4F32Regs:$a, V4F32Regs:$b), V4F32Regs:$c), 596 (Inst V4F32Regs:$a, V4F32Regs:$b, (VNegv4f32 V4F32Regs:$c))>, 597 Requires<[Pred]>; 598} 599 600defm V4FMAF32ext_ftz : V4FPCONTRACT32_SUB_PAT<F32FMA_ftzV4, doFMAF32AGG_ftz>; 601defm V4FMADF32ext_ftz : V4FPCONTRACT32_SUB_PAT<F32MAD_ftzV4, doFMADF32_ftz>; 602defm V4FMAF32ext : V4FPCONTRACT32_SUB_PAT<F32FMAV4, doFMAF32AGG>; 603defm V4FMADF32ext : V4FPCONTRACT32_SUB_PAT<F32MADV4, doFMADF32>; 604 605multiclass V2FPCONTRACT64_SUB_PAT<NVPTXInst Inst, Predicate Pred> { 606 def : Pat<(fsub V2F64Regs:$a, (fmul V2F64Regs:$b, V2F64Regs:$c)), 607 (Inst (VNegv2f64 V2F64Regs:$b), V2F64Regs:$c, V2F64Regs:$a)>, 608 Requires<[Pred]>; 609 610 def : Pat<(fsub (fmul V2F64Regs:$a, V2F64Regs:$b), V2F64Regs:$c), 611 (Inst V2F64Regs:$a, V2F64Regs:$b, (VNegv2f64 V2F64Regs:$c))>, 612 Requires<[Pred]>; 613} 614 615defm V2FMAF64ext : V2FPCONTRACT64_SUB_PAT<F64FMAV2, doFMAF64AGG>; 616 617class VecModStr<string vecsize, string elem, string extra, string l=""> 618{ 619 string t1 = !strconcat("${c", elem); 620 string t2 = !strconcat(t1, ":vecv"); 621 string t3 = !strconcat(t2, vecsize); 622 string t4 = !strconcat(t3, extra); 623 string t5 = !strconcat(t4, l); 624 string s = !strconcat(t5, "}"); 625} 626class ShuffleOneLine<string vecsize, string elem, string type> 627{ 628 string t1 = VecModStr<vecsize, elem, "comm", "1">.s; 629 string t2 = !strconcat(t1, "mov."); 630 string t3 = !strconcat(t2, type); 631 string t4 = !strconcat(t3, " \t${dst}_"); 632 string t5 = !strconcat(t4, elem); 633 string t6 = !strconcat(t5, ", $src1"); 634 string t7 = !strconcat(t6, VecModStr<vecsize, elem, "pos">.s); 635 string t8 = !strconcat(t7, ";\n\t"); 636 string t9 = !strconcat(t8, VecModStr<vecsize, elem, "comm", "2">.s); 637 string t10 = !strconcat(t9, "mov."); 638 string t11 = !strconcat(t10, type); 639 string t12 = !strconcat(t11, " \t${dst}_"); 640 string t13 = !strconcat(t12, elem); 641 string t14 = !strconcat(t13, ", $src2"); 642 string t15 = !strconcat(t14, VecModStr<vecsize, elem, "pos">.s); 643 string s = !strconcat(t15, ";"); 644} 645class ShuffleAsmStr2<string type> 646{ 647 string t1 = ShuffleOneLine<"2", "0", type>.s; 648 string t2 = !strconcat(t1, "\n\t"); 649 string s = !strconcat(t2, ShuffleOneLine<"2", "1", type>.s); 650} 651class ShuffleAsmStr4<string type> 652{ 653 string t1 = ShuffleOneLine<"4", "0", type>.s; 654 string t2 = !strconcat(t1, "\n\t"); 655 string t3 = !strconcat(t2, ShuffleOneLine<"4", "1", type>.s); 656 string t4 = !strconcat(t3, "\n\t"); 657 string t5 = !strconcat(t4, ShuffleOneLine<"4", "2", type>.s); 658 string t6 = !strconcat(t5, "\n\t"); 659 string s = !strconcat(t6, ShuffleOneLine<"4", "3", type>.s); 660} 661 662let hasSideEffects=0, VecInstType=isVecShuffle.Value in { 663def VecShuffle_v4f32 : NVPTXVecInst<(outs V4F32Regs:$dst), 664 (ins V4F32Regs:$src1, V4F32Regs:$src2, 665 i8imm:$c0, i8imm:$c1, i8imm:$c2, i8imm:$c3), 666 !strconcat("//Mov $dst, $src1, $src2, $c0, $c1, $c2, $c3;\n\t", 667 ShuffleAsmStr4<"f32">.s), 668 [], FMOV32rr>; 669 670def VecShuffle_v4i32 : NVPTXVecInst<(outs V4I32Regs:$dst), 671 (ins V4I32Regs:$src1, V4I32Regs:$src2, 672 i8imm:$c0, i8imm:$c1, i8imm:$c2, i8imm:$c3), 673 !strconcat("//Mov $dst, $src1, $src2, $c0, $c1, $c2, $c3;\n\t", 674 ShuffleAsmStr4<"u32">.s), 675 [], IMOV32rr>; 676 677def VecShuffle_v4i16 : NVPTXVecInst<(outs V4I16Regs:$dst), 678 (ins V4I16Regs:$src1, V4I16Regs:$src2, 679 i8imm:$c0, i8imm:$c1, i8imm:$c2, i8imm:$c3), 680 !strconcat("//Mov $dst, $src1, $src2, $c0, $c1, $c2, $c3;\n\t", 681 ShuffleAsmStr4<"u16">.s), 682 [], IMOV16rr>; 683 684def VecShuffle_v4i8 : NVPTXVecInst<(outs V4I8Regs:$dst), 685 (ins V4I8Regs:$src1, V4I8Regs:$src2, 686 i8imm:$c0, i8imm:$c1, i8imm:$c2, i8imm:$c3), 687 !strconcat("//Mov $dst, $src1, $src2, $c0, $c1, $c2, $c3;\n\t", 688 ShuffleAsmStr4<"u16">.s), 689 [], IMOV8rr>; 690 691def VecShuffle_v2f32 : NVPTXVecInst<(outs V2F32Regs:$dst), 692 (ins V2F32Regs:$src1, V2F32Regs:$src2, 693 i8imm:$c0, i8imm:$c1), 694 !strconcat("//Mov $dst, $src1, $src2, $c0, $c1;\n\t", 695 ShuffleAsmStr2<"f32">.s), 696 [], FMOV32rr>; 697 698def VecShuffle_v2i32 : NVPTXVecInst<(outs V2I32Regs:$dst), 699 (ins V2I32Regs:$src1, V2I32Regs:$src2, 700 i8imm:$c0, i8imm:$c1), 701 !strconcat("//Mov $dst, $src1, $src2, $c0, $c1;\n\t", 702 ShuffleAsmStr2<"u32">.s), 703 [], IMOV32rr>; 704 705def VecShuffle_v2i8 : NVPTXVecInst<(outs V2I8Regs:$dst), 706 (ins V2I8Regs:$src1, V2I8Regs:$src2, 707 i8imm:$c0, i8imm:$c1), 708 !strconcat("//Mov $dst, $src1, $src2, $c0, $c1;\n\t", 709 ShuffleAsmStr2<"u16">.s), 710 [], IMOV8rr>; 711 712def VecShuffle_v2i16 : NVPTXVecInst<(outs V2I16Regs:$dst), 713 (ins V2I16Regs:$src1, V2I16Regs:$src2, 714 i8imm:$c0, i8imm:$c1), 715 !strconcat("//Mov $dst, $src1, $src2, $c0, $c1;\n\t", 716 ShuffleAsmStr2<"u16">.s), 717 [], IMOV16rr>; 718 719def VecShuffle_v2f64 : NVPTXVecInst<(outs V2F64Regs:$dst), 720 (ins V2F64Regs:$src1, V2F64Regs:$src2, 721 i8imm:$c0, i8imm:$c1), 722 !strconcat("//Mov $dst, $src1, $src2, $c0, $c1;\n\t", 723 ShuffleAsmStr2<"f64">.s), 724 [], FMOV64rr>; 725 726def VecShuffle_v2i64 : NVPTXVecInst<(outs V2I64Regs:$dst), 727 (ins V2I64Regs:$src1, V2I64Regs:$src2, 728 i8imm:$c0, i8imm:$c1), 729 !strconcat("//Mov $dst, $src1, $src2, $c0, $c1;\n\t", 730 ShuffleAsmStr2<"u64">.s), 731 [], IMOV64rr>; 732} 733 734def ShuffleMask0 : SDNodeXForm<vector_shuffle, [{ 735 ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N); 736 return CurDAG->getTargetConstant(SVOp->getMaskElt(0), SDLoc(N), MVT::i32); 737}]>; 738def ShuffleMask1 : SDNodeXForm<vector_shuffle, [{ 739 ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N); 740 return CurDAG->getTargetConstant(SVOp->getMaskElt(1), SDLoc(N), MVT::i32); 741}]>; 742def ShuffleMask2 : SDNodeXForm<vector_shuffle, [{ 743 ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N); 744 return CurDAG->getTargetConstant(SVOp->getMaskElt(2), SDLoc(N), MVT::i32); 745}]>; 746def ShuffleMask3 : SDNodeXForm<vector_shuffle, [{ 747 ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N); 748 return CurDAG->getTargetConstant(SVOp->getMaskElt(3), SDLoc(N), MVT::i32); 749}]>; 750 751// The spurious call is here to silence a compiler warning about N being 752// unused. 753def vec_shuf : PatFrag<(ops node:$lhs, node:$rhs), 754 (vector_shuffle node:$lhs, node:$rhs), 755 [{ N->getGluedNode(); return true; }]>; 756 757def : Pat<(v2f64 (vec_shuf:$op V2F64Regs:$src1, V2F64Regs:$src2)), 758 (VecShuffle_v2f64 V2F64Regs:$src1, V2F64Regs:$src2, 759 (ShuffleMask0 node:$op), (ShuffleMask1 node:$op))>; 760 761def : Pat<(v4f32 (vec_shuf:$op V4F32Regs:$src1, V4F32Regs:$src2)), 762 (VecShuffle_v4f32 V4F32Regs:$src1, V4F32Regs:$src2, 763 (ShuffleMask0 node:$op), (ShuffleMask1 node:$op), 764 (ShuffleMask2 node:$op), (ShuffleMask3 node:$op))>; 765 766def : Pat<(v2f32 (vec_shuf:$op V2F32Regs:$src1, V2F32Regs:$src2)), 767 (VecShuffle_v2f32 V2F32Regs:$src1, V2F32Regs:$src2, 768 (ShuffleMask0 node:$op), (ShuffleMask1 node:$op))>; 769 770def : Pat<(v2i64 (vec_shuf:$op V2I64Regs:$src1, V2I64Regs:$src2)), 771 (VecShuffle_v2i64 V2I64Regs:$src1, V2I64Regs:$src2, 772 (ShuffleMask0 node:$op), (ShuffleMask1 node:$op))>; 773 774def : Pat<(v4i32 (vec_shuf:$op V4I32Regs:$src1, V4I32Regs:$src2)), 775 (VecShuffle_v4i32 V4I32Regs:$src1, V4I32Regs:$src2, 776 (ShuffleMask0 node:$op), (ShuffleMask1 node:$op), 777 (ShuffleMask2 node:$op), (ShuffleMask3 node:$op))>; 778 779def : Pat<(v2i32 (vec_shuf:$op V2I32Regs:$src1, V2I32Regs:$src2)), 780 (VecShuffle_v2i32 V2I32Regs:$src1, V2I32Regs:$src2, 781 (ShuffleMask0 node:$op), (ShuffleMask1 node:$op))>; 782 783def : Pat<(v4i16 (vec_shuf:$op V4I16Regs:$src1, V4I16Regs:$src2)), 784 (VecShuffle_v4i16 V4I16Regs:$src1, V4I16Regs:$src2, 785 (ShuffleMask0 node:$op), (ShuffleMask1 node:$op), 786 (ShuffleMask2 node:$op), (ShuffleMask3 node:$op))>; 787 788def : Pat<(v2i16 (vec_shuf:$op V2I16Regs:$src1, V2I16Regs:$src2)), 789 (VecShuffle_v2i16 V2I16Regs:$src1, V2I16Regs:$src2, 790 (ShuffleMask0 node:$op), (ShuffleMask1 node:$op))>; 791 792def : Pat<(v4i8 (vec_shuf:$op V4I8Regs:$src1, V4I8Regs:$src2)), 793 (VecShuffle_v4i8 V4I8Regs:$src1, V4I8Regs:$src2, 794 (ShuffleMask0 node:$op), (ShuffleMask1 node:$op), 795 (ShuffleMask2 node:$op), (ShuffleMask3 node:$op))>; 796 797def : Pat<(v2i8 (vec_shuf:$op V2I8Regs:$src1, V2I8Regs:$src2)), 798 (VecShuffle_v2i8 V2I8Regs:$src1, V2I8Regs:$src2, 799 (ShuffleMask0 node:$op), (ShuffleMask1 node:$op))>; 800 801class Build_Vector2<string asmstr, NVPTXRegClass vclass, NVPTXRegClass sclass, 802 NVPTXInst si> 803 : NVPTXVecInst<(outs vclass:$dst), 804 (ins sclass:$a1, sclass:$a2), 805 !strconcat(asmstr, "\t${dst:vecfull}, {{$a1, $a2}};"), 806 [(set vclass:$dst, (build_vector sclass:$a1, sclass:$a2))], 807 si>; 808class Build_Vector4<string asmstr, NVPTXRegClass vclass, NVPTXRegClass sclass, 809 NVPTXInst si> 810 : NVPTXVecInst<(outs vclass:$dst), 811 (ins sclass:$a1, sclass:$a2, sclass:$a3, sclass:$a4), 812 !strconcat(asmstr, "\t${dst:vecfull}, {{$a1, $a2, $a3, $a4}};"), 813 [(set vclass:$dst, 814 (build_vector sclass:$a1, sclass:$a2, 815 sclass:$a3, sclass:$a4))], si>; 816 817let isAsCheapAsAMove=1, VecInstType=isVecBuild.Value in { 818def Build_Vector2_f32 : Build_Vector2<"mov.v2.f32", V2F32Regs, Float32Regs, 819 FMOV32rr>; 820def Build_Vector2_f64 : Build_Vector2<"mov.v2.f64", V2F64Regs, Float64Regs, 821 FMOV64rr>; 822 823def Build_Vector2_i32 : Build_Vector2<"mov.v2.u32", V2I32Regs, Int32Regs, 824 IMOV32rr>; 825def Build_Vector2_i64 : Build_Vector2<"mov.v2.u64", V2I64Regs, Int64Regs, 826 IMOV64rr>; 827def Build_Vector2_i16 : Build_Vector2<"mov.v2.u16", V2I16Regs, Int16Regs, 828 IMOV16rr>; 829def Build_Vector2_i8 : Build_Vector2<"mov.v2.u16", V2I8Regs, Int8Regs, 830 IMOV8rr>; 831 832def Build_Vector4_f32 : Build_Vector4<"mov.v4.f32", V4F32Regs, Float32Regs, 833 FMOV32rr>; 834 835def Build_Vector4_i32 : Build_Vector4<"mov.v4.u32", V4I32Regs, Int32Regs, 836 IMOV32rr>; 837def Build_Vector4_i16 : Build_Vector4<"mov.v4.u16", V4I16Regs, Int16Regs, 838 IMOV16rr>; 839def Build_Vector4_i8 : Build_Vector4<"mov.v4.u16", V4I8Regs, Int8Regs, 840 IMOV8rr>; 841} 842 843class Vec_Move<string asmstr, NVPTXRegClass vclass, NVPTXInst sop=NOP> 844 : NVPTXVecInst<(outs vclass:$dst), (ins vclass:$src), 845 !strconcat(asmstr, "\t${dst:vecfull}, ${src:vecfull};"), 846 [], sop>; 847 848let isAsCheapAsAMove=1, hasSideEffects=0, IsSimpleMove=1, 849 VecInstType=isVecOther.Value in { 850def V4f32Mov : Vec_Move<"mov.v4.f32", V4F32Regs, FMOV32rr>; 851def V2f32Mov : Vec_Move<"mov.v2.f32", V2F32Regs, FMOV32rr>; 852 853def V4i32Mov : Vec_Move<"mov.v4.u32", V4I32Regs, IMOV32rr>; 854def V2i32Mov : Vec_Move<"mov.v2.u32", V2I32Regs, IMOV32rr>; 855 856def V4i16Mov : Vec_Move<"mov.v4.u16", V4I16Regs, IMOV16rr>; 857def V2i16Mov : Vec_Move<"mov.v2.u16", V2I16Regs, IMOV16rr>; 858 859def V4i8Mov : Vec_Move<"mov.v4.u16", V4I8Regs, IMOV8rr>; 860def V2i8Mov : Vec_Move<"mov.v2.u16", V2I8Regs, IMOV8rr>; 861 862def V2f64Mov : Vec_Move<"mov.v2.f64", V2F64Regs, FMOV64rr>; 863def V2i64Mov : Vec_Move<"mov.v2.u64", V2I64Regs, IMOV64rr>; 864} 865 866// extract subvector patterns 867def extract_subvec : SDNode<"ISD::EXTRACT_SUBVECTOR", 868 SDTypeProfile<1, 2, [SDTCisPtrTy<2>]>>; 869 870def : Pat<(v2f32 (extract_subvec V4F32Regs:$src, 0)), 871 (Build_Vector2_f32 (V4f32Extract V4F32Regs:$src, 0), 872 (V4f32Extract V4F32Regs:$src, 1))>; 873def : Pat<(v2f32 (extract_subvec V4F32Regs:$src, 2)), 874 (Build_Vector2_f32 (V4f32Extract V4F32Regs:$src, 2), 875 (V4f32Extract V4F32Regs:$src, 3))>; 876def : Pat<(v2i32 (extract_subvec V4I32Regs:$src, 0)), 877 (Build_Vector2_i32 (V4i32Extract V4I32Regs:$src, 0), 878 (V4i32Extract V4I32Regs:$src, 1))>; 879def : Pat<(v2i32 (extract_subvec V4I32Regs:$src, 2)), 880 (Build_Vector2_i32 (V4i32Extract V4I32Regs:$src, 2), 881 (V4i32Extract V4I32Regs:$src, 3))>; 882def : Pat<(v2i16 (extract_subvec V4I16Regs:$src, 0)), 883 (Build_Vector2_i16 (V4i16Extract V4I16Regs:$src, 0), 884 (V4i16Extract V4I16Regs:$src, 1))>; 885def : Pat<(v2i16 (extract_subvec V4I16Regs:$src, 2)), 886 (Build_Vector2_i16 (V4i16Extract V4I16Regs:$src, 2), 887 (V4i16Extract V4I16Regs:$src, 3))>; 888def : Pat<(v2i8 (extract_subvec V4I8Regs:$src, 0)), 889 (Build_Vector2_i8 (V4i8Extract V4I8Regs:$src, 0), 890 (V4i8Extract V4I8Regs:$src, 1))>; 891def : Pat<(v2i8 (extract_subvec V4I8Regs:$src, 2)), 892 (Build_Vector2_i8 (V4i8Extract V4I8Regs:$src, 2), 893 (V4i8Extract V4I8Regs:$src, 3))>; 894 895// Select instructions 896class Select_OneLine<string type, string pos> { 897 string t1 = !strconcat("selp.", type); 898 string t2 = !strconcat(t1, " \t${dst}_"); 899 string t3 = !strconcat(t2, pos); 900 string t4 = !strconcat(t3, ", ${src1}_"); 901 string t5 = !strconcat(t4, pos); 902 string t6 = !strconcat(t5, ", ${src2}_"); 903 string t7 = !strconcat(t6, pos); 904 string s = !strconcat(t7, ", $p;"); 905} 906 907class Select_Str2<string type> { 908 string t1 = Select_OneLine<type, "0">.s; 909 string t2 = !strconcat(t1, "\n\t"); 910 string s = !strconcat(t2, Select_OneLine<type, "1">.s); 911} 912 913class Select_Str4<string type> { 914 string t1 = Select_OneLine<type, "0">.s; 915 string t2 = !strconcat(t1, "\n\t"); 916 string t3 = !strconcat(t2, Select_OneLine<type, "1">.s); 917 string t4 = !strconcat(t3, "\n\t"); 918 string t5 = !strconcat(t4, Select_OneLine<type, "2">.s); 919 string t6 = !strconcat(t5, "\n\t"); 920 string s = !strconcat(t6, Select_OneLine<type, "3">.s); 921 922} 923 924class Vec_Select<NVPTXRegClass vclass, string asmstr, NVPTXInst sop> 925 : NVPTXVecInst<(outs vclass:$dst), 926 (ins vclass:$src1, vclass:$src2, Int1Regs:$p), 927 asmstr, 928 [(set vclass:$dst, (select Int1Regs:$p, vclass:$src1, 929 vclass:$src2))], 930 sop>; 931 932let VecInstType=isVecOther.Value in { 933def V2I64_Select : Vec_Select<V2I64Regs, Select_Str2<"b64">.s, SELECTi64rr>; 934def V4I32_Select : Vec_Select<V4I32Regs, Select_Str4<"b32">.s, SELECTi32rr>; 935def V2I32_Select : Vec_Select<V2I32Regs, Select_Str2<"b32">.s, SELECTi32rr>; 936def V4I16_Select : Vec_Select<V4I16Regs, Select_Str4<"b16">.s, SELECTi16rr>; 937def V2I16_Select : Vec_Select<V2I16Regs, Select_Str2<"b16">.s, SELECTi16rr>; 938def V4I8_Select : Vec_Select<V4I8Regs, Select_Str4<"b16">.s, SELECTi8rr>; 939def V2I8_Select : Vec_Select<V2I8Regs, Select_Str2<"b16">.s, SELECTi8rr>; 940 941def V2F64_Select : Vec_Select<V2F64Regs, Select_Str2<"f64">.s, SELECTf64rr>; 942def V4F32_Select : Vec_Select<V4F32Regs, Select_Str4<"f32">.s, SELECTf32rr>; 943def V2F32_Select : Vec_Select<V2F32Regs, Select_Str2<"f32">.s, SELECTf32rr>; 944} 945 946// Comparison instructions 947 948// setcc convenience fragments. 949def vsetoeq : PatFrag<(ops node:$lhs, node:$rhs), 950 (setcc node:$lhs, node:$rhs, SETOEQ)>; 951def vsetogt : PatFrag<(ops node:$lhs, node:$rhs), 952 (setcc node:$lhs, node:$rhs, SETOGT)>; 953def vsetoge : PatFrag<(ops node:$lhs, node:$rhs), 954 (setcc node:$lhs, node:$rhs, SETOGE)>; 955def vsetolt : PatFrag<(ops node:$lhs, node:$rhs), 956 (setcc node:$lhs, node:$rhs, SETOLT)>; 957def vsetole : PatFrag<(ops node:$lhs, node:$rhs), 958 (setcc node:$lhs, node:$rhs, SETOLE)>; 959def vsetone : PatFrag<(ops node:$lhs, node:$rhs), 960 (setcc node:$lhs, node:$rhs, SETONE)>; 961def vseto : PatFrag<(ops node:$lhs, node:$rhs), 962 (setcc node:$lhs, node:$rhs, SETO)>; 963def vsetuo : PatFrag<(ops node:$lhs, node:$rhs), 964 (setcc node:$lhs, node:$rhs, SETUO)>; 965def vsetueq : PatFrag<(ops node:$lhs, node:$rhs), 966 (setcc node:$lhs, node:$rhs, SETUEQ)>; 967def vsetugt : PatFrag<(ops node:$lhs, node:$rhs), 968 (setcc node:$lhs, node:$rhs, SETUGT)>; 969def vsetuge : PatFrag<(ops node:$lhs, node:$rhs), 970 (setcc node:$lhs, node:$rhs, SETUGE)>; 971def vsetult : PatFrag<(ops node:$lhs, node:$rhs), 972 (setcc node:$lhs, node:$rhs, SETULT)>; 973def vsetule : PatFrag<(ops node:$lhs, node:$rhs), 974 (setcc node:$lhs, node:$rhs, SETULE)>; 975def vsetune : PatFrag<(ops node:$lhs, node:$rhs), 976 (setcc node:$lhs, node:$rhs, SETUNE)>; 977def vseteq : PatFrag<(ops node:$lhs, node:$rhs), 978 (setcc node:$lhs, node:$rhs, SETEQ)>; 979def vsetgt : PatFrag<(ops node:$lhs, node:$rhs), 980 (setcc node:$lhs, node:$rhs, SETGT)>; 981def vsetge : PatFrag<(ops node:$lhs, node:$rhs), 982 (setcc node:$lhs, node:$rhs, SETGE)>; 983def vsetlt : PatFrag<(ops node:$lhs, node:$rhs), 984 (setcc node:$lhs, node:$rhs, SETLT)>; 985def vsetle : PatFrag<(ops node:$lhs, node:$rhs), 986 (setcc node:$lhs, node:$rhs, SETLE)>; 987def vsetne : PatFrag<(ops node:$lhs, node:$rhs), 988 (setcc node:$lhs, node:$rhs, SETNE)>; 989 990class Vec_Compare<PatFrag op, NVPTXRegClass outrclass, NVPTXRegClass inrclass, 991 NVPTXInst sop> 992 : NVPTXVecInst<(outs outrclass:$dst), 993 (ins inrclass:$a, inrclass:$b), 994 "Unsupported", 995 [(set outrclass:$dst, (op inrclass:$a, inrclass:$b))], 996 sop>; 997 998multiclass Vec_Compare_All<PatFrag op, 999 NVPTXInst inst8, 1000 NVPTXInst inst16, 1001 NVPTXInst inst32, 1002 NVPTXInst inst64> 1003{ 1004 def V2I8 : Vec_Compare<op, V2I8Regs, V2I8Regs, inst8>; 1005 def V4I8 : Vec_Compare<op, V4I8Regs, V4I8Regs, inst8>; 1006 def V2I16 : Vec_Compare<op, V2I16Regs, V2I16Regs, inst16>; 1007 def V4I16 : Vec_Compare<op, V4I16Regs, V4I16Regs, inst16>; 1008 def V2I32 : Vec_Compare<op, V2I32Regs, V2I32Regs, inst32>; 1009 def V4I32 : Vec_Compare<op, V4I32Regs, V4I32Regs, inst32>; 1010 def V2I64 : Vec_Compare<op, V2I64Regs, V2I64Regs, inst64>; 1011} 1012 1013let VecInstType=isVecOther.Value in { 1014 defm VecSGT : Vec_Compare_All<vsetgt, ISetSGTi8rr_toi8, ISetSGTi16rr_toi16, 1015 ISetSGTi32rr_toi32, ISetSGTi64rr_toi64>; 1016 defm VecUGT : Vec_Compare_All<vsetugt, ISetUGTi8rr_toi8, ISetUGTi16rr_toi16, 1017 ISetUGTi32rr_toi32, ISetUGTi64rr_toi64>; 1018 defm VecSLT : Vec_Compare_All<vsetlt, ISetSLTi8rr_toi8, ISetSLTi16rr_toi16, 1019 ISetSLTi32rr_toi32, ISetSLTi64rr_toi64>; 1020 defm VecULT : Vec_Compare_All<vsetult, ISetULTi8rr_toi8, ISetULTi16rr_toi16, 1021 ISetULTi32rr_toi32, ISetULTi64rr_toi64>; 1022 defm VecSGE : Vec_Compare_All<vsetge, ISetSGEi8rr_toi8, ISetSGEi16rr_toi16, 1023 ISetSGEi32rr_toi32, ISetSGEi64rr_toi64>; 1024 defm VecUGE : Vec_Compare_All<vsetuge, ISetUGEi8rr_toi8, ISetUGEi16rr_toi16, 1025 ISetUGEi32rr_toi32, ISetUGEi64rr_toi64>; 1026 defm VecSLE : Vec_Compare_All<vsetle, ISetSLEi8rr_toi8, ISetSLEi16rr_toi16, 1027 ISetSLEi32rr_toi32, ISetSLEi64rr_toi64>; 1028 defm VecULE : Vec_Compare_All<vsetule, ISetULEi8rr_toi8, ISetULEi16rr_toi16, 1029 ISetULEi32rr_toi32, ISetULEi64rr_toi64>; 1030 defm VecSEQ : Vec_Compare_All<vseteq, ISetSEQi8rr_toi8, ISetSEQi16rr_toi16, 1031 ISetSEQi32rr_toi32, ISetSEQi64rr_toi64>; 1032 defm VecUEQ : Vec_Compare_All<vsetueq, ISetUEQi8rr_toi8, ISetUEQi16rr_toi16, 1033 ISetUEQi32rr_toi32, ISetUEQi64rr_toi64>; 1034 defm VecSNE : Vec_Compare_All<vsetne, ISetSNEi8rr_toi8, ISetSNEi16rr_toi16, 1035 ISetSNEi32rr_toi32, ISetSNEi64rr_toi64>; 1036 defm VecUNE : Vec_Compare_All<vsetune, ISetUNEi8rr_toi8, ISetUNEi16rr_toi16, 1037 ISetUNEi32rr_toi32, ISetUNEi64rr_toi64>; 1038} 1039 1040multiclass FVec_Compare_All<PatFrag op, 1041 NVPTXInst instf32, 1042 NVPTXInst instf64> 1043{ 1044 def V2F32 : Vec_Compare<op, V2I32Regs, V2F32Regs, instf32>; 1045 def V4F32 : Vec_Compare<op, V4I32Regs, V4F32Regs, instf32>; 1046 def V2F64 : Vec_Compare<op, V2I64Regs, V2F64Regs, instf64>; 1047} 1048 1049let VecInstType=isVecOther.Value in { 1050 defm FVecGT : FVec_Compare_All<vsetogt, FSetGTf32rr_toi32, 1051 FSetGTf64rr_toi64>; 1052 defm FVecLT : FVec_Compare_All<vsetolt, FSetLTf32rr_toi32, 1053 FSetLTf64rr_toi64>; 1054 defm FVecGE : FVec_Compare_All<vsetoge, FSetGEf32rr_toi32, 1055 FSetGEf64rr_toi64>; 1056 defm FVecLE : FVec_Compare_All<vsetole, FSetLEf32rr_toi32, 1057 FSetLEf64rr_toi64>; 1058 defm FVecEQ : FVec_Compare_All<vsetoeq, FSetEQf32rr_toi32, 1059 FSetEQf64rr_toi64>; 1060 defm FVecNE : FVec_Compare_All<vsetone, FSetNEf32rr_toi32, 1061 FSetNEf64rr_toi64>; 1062 1063 defm FVecUGT : FVec_Compare_All<vsetugt, FSetUGTf32rr_toi32, 1064 FSetUGTf64rr_toi64>; 1065 defm FVecULT : FVec_Compare_All<vsetult, FSetULTf32rr_toi32, 1066 FSetULTf64rr_toi64>; 1067 defm FVecUGE : FVec_Compare_All<vsetuge, FSetUGEf32rr_toi32, 1068 FSetUGEf64rr_toi64>; 1069 defm FVecULE : FVec_Compare_All<vsetule, FSetULEf32rr_toi32, 1070 FSetULEf64rr_toi64>; 1071 defm FVecUEQ : FVec_Compare_All<vsetueq, FSetUEQf32rr_toi32, 1072 FSetUEQf64rr_toi64>; 1073 defm FVecUNE : FVec_Compare_All<vsetune, FSetUNEf32rr_toi32, 1074 FSetUNEf64rr_toi64>; 1075 1076 defm FVecNUM : FVec_Compare_All<vseto, FSetNUMf32rr_toi32, 1077 FSetNUMf64rr_toi64>; 1078 defm FVecNAN : FVec_Compare_All<vsetuo, FSetNANf32rr_toi32, 1079 FSetNANf64rr_toi64>; 1080} 1081 1082class LoadParamScalar4Inst<NVPTXRegClass regclass, string opstr> : 1083 NVPTXInst<(outs regclass:$d1, regclass:$d2, regclass:$d3, regclass:$d4), 1084 (ins i32imm:$a, i32imm:$b), 1085 !strconcat(!strconcat("ld.param", opstr), 1086 "\t{{$d1, $d2, $d3, $d4}}, [retval0+$b];"), []>; 1087 1088class LoadParamScalar2Inst<NVPTXRegClass regclass, string opstr> : 1089 NVPTXInst<(outs regclass:$d1, regclass:$d2), 1090 (ins i32imm:$a, i32imm:$b), 1091 !strconcat(!strconcat("ld.param", opstr), 1092 "\t{{$d1, $d2}}, [retval0+$b];"), []>; 1093 1094 1095class StoreParamScalar4Inst<NVPTXRegClass regclass, string opstr> : 1096 NVPTXInst<(outs), 1097 (ins regclass:$s1, regclass:$s2, regclass:$s3, regclass:$s4, 1098 i32imm:$a, i32imm:$b), 1099 !strconcat(!strconcat("st.param", opstr), 1100 "\t[param$a+$b], {{$s1, $s2, $s3, $s4}};"), []>; 1101 1102class StoreParamScalar2Inst<NVPTXRegClass regclass, string opstr> : 1103 NVPTXInst<(outs), 1104 (ins regclass:$s1, regclass:$s2, i32imm:$a, i32imm:$b), 1105 !strconcat(!strconcat("st.param", opstr), 1106 "\t[param$a+$b], {{$s1, $s2}};"), []>; 1107 1108class StoreRetvalScalar4Inst<NVPTXRegClass regclass, string opstr> : 1109 NVPTXInst<(outs), 1110 (ins regclass:$s1, regclass:$s2, regclass:$s3, regclass:$s4, 1111 i32imm:$a), 1112 !strconcat(!strconcat("st.param", opstr), 1113 "\t[func_retval+$a], {{$s1, $s2, $s3, $s4}};"), []>; 1114 1115class StoreRetvalScalar2Inst<NVPTXRegClass regclass, string opstr> : 1116 NVPTXInst<(outs), 1117 (ins regclass:$s1, regclass:$s2, i32imm:$a), 1118 !strconcat(!strconcat("st.param", opstr), 1119 "\t[func_retval+$a], {{$s1, $s2}};"), []>; 1120 1121def LoadParamScalar4I32 : LoadParamScalar4Inst<Int32Regs, ".v4.b32">; 1122def LoadParamScalar4I16 : LoadParamScalar4Inst<Int16Regs, ".v4.b16">; 1123def LoadParamScalar4I8 : LoadParamScalar4Inst<Int8Regs, ".v4.b8">; 1124 1125def LoadParamScalar2I64 : LoadParamScalar2Inst<Int32Regs, ".v2.b64">; 1126def LoadParamScalar2I32 : LoadParamScalar2Inst<Int32Regs, ".v2.b32">; 1127def LoadParamScalar2I16 : LoadParamScalar2Inst<Int32Regs, ".v2.b16">; 1128def LoadParamScalar2I8 : LoadParamScalar2Inst<Int32Regs, ".v2.b8">; 1129 1130def LoadParamScalar4F32 : LoadParamScalar4Inst<Float32Regs, ".v4.f32">; 1131def LoadParamScalar2F32 : LoadParamScalar2Inst<Float32Regs, ".v2.f32">; 1132def LoadParamScalar2F64 : LoadParamScalar2Inst<Float64Regs, ".v2.f64">; 1133 1134def StoreParamScalar4I32 : StoreParamScalar4Inst<Int32Regs, ".v4.b32">; 1135def StoreParamScalar4I16 : StoreParamScalar4Inst<Int16Regs, ".v4.b16">; 1136def StoreParamScalar4I8 : StoreParamScalar4Inst<Int8Regs, ".v4.b8">; 1137 1138def StoreParamScalar2I64 : StoreParamScalar2Inst<Int64Regs, ".v2.b64">; 1139def StoreParamScalar2I32 : StoreParamScalar2Inst<Int32Regs, ".v2.b32">; 1140def StoreParamScalar2I16 : StoreParamScalar2Inst<Int16Regs, ".v2.b16">; 1141def StoreParamScalar2I8 : StoreParamScalar2Inst<Int8Regs, ".v2.b8">; 1142 1143def StoreParamScalar4F32 : StoreParamScalar4Inst<Float32Regs, ".v4.f32">; 1144def StoreParamScalar2F32 : StoreParamScalar2Inst<Float32Regs, ".v2.f32">; 1145def StoreParamScalar2F64 : StoreParamScalar2Inst<Float64Regs, ".v2.f64">; 1146 1147def StoreRetvalScalar4I32 : StoreRetvalScalar4Inst<Int32Regs, ".v4.b32">; 1148def StoreRetvalScalar4I16 : StoreRetvalScalar4Inst<Int16Regs, ".v4.b16">; 1149def StoreRetvalScalar4I8 : StoreRetvalScalar4Inst<Int8Regs, ".v4.b8">; 1150 1151def StoreRetvalScalar2I64 : StoreRetvalScalar2Inst<Int64Regs, ".v2.b64">; 1152def StoreRetvalScalar2I32 : StoreRetvalScalar2Inst<Int32Regs, ".v2.b32">; 1153def StoreRetvalScalar2I16 : StoreRetvalScalar2Inst<Int16Regs, ".v2.b16">; 1154def StoreRetvalScalar2I8 : StoreRetvalScalar2Inst<Int8Regs, ".v2.b8">; 1155 1156def StoreRetvalScalar4F32 : StoreRetvalScalar4Inst<Float32Regs, ".v4.f32">; 1157def StoreRetvalScalar2F32 : StoreRetvalScalar2Inst<Float32Regs, ".v2.f32">; 1158def StoreRetvalScalar2F64 : StoreRetvalScalar2Inst<Float64Regs, ".v2.f64">; 1159 1160class LoadParamVecInst<NVPTXRegClass regclass, string opstr, NVPTXInst sop=NOP>: 1161 NVPTXVecInst<(outs regclass:$dst), (ins i32imm:$a, i32imm:$b), 1162 "loadparam : $dst <- [$a, $b]", 1163 [(set regclass:$dst, (LoadParam (i32 imm:$a), (i32 imm:$b)))], 1164 sop>; 1165 1166class StoreParamVecInst<NVPTXRegClass regclass, string opstr, NVPTXInst sop=NOP> 1167 : NVPTXVecInst<(outs), (ins regclass:$val, i32imm:$a, i32imm:$b), 1168 "storeparam : [$a, $b] <- $val", 1169 [(StoreParam (i32 imm:$a), (i32 imm:$b), regclass:$val)], sop>; 1170 1171class StoreRetvalVecInst<NVPTXRegClass regclass, string opstr, 1172 NVPTXInst sop=NOP> 1173 : NVPTXVecInst<(outs), (ins regclass:$val, i32imm:$a), 1174 "storeretval : retval[$a] <- $val", 1175 [(StoreRetval (i32 imm:$a), regclass:$val)], sop>; 1176 1177let VecInstType=isVecLD.Value in { 1178def LoadParamV4I32 : LoadParamVecInst<V4I32Regs, ".v4.b32", 1179 LoadParamScalar4I32>; 1180def LoadParamV4I16 : LoadParamVecInst<V4I16Regs, ".v4.b16", 1181 LoadParamScalar4I16>; 1182def LoadParamV4I8 : LoadParamVecInst<V4I8Regs, ".v4.b8", 1183 LoadParamScalar4I8>; 1184 1185def LoadParamV2I64 : LoadParamVecInst<V2I64Regs, ".v2.b64", 1186 LoadParamScalar2I64>; 1187def LoadParamV2I32 : LoadParamVecInst<V2I32Regs, ".v2.b32", 1188 LoadParamScalar2I32>; 1189def LoadParamV2I16 : LoadParamVecInst<V2I16Regs, ".v2.b16", 1190 LoadParamScalar2I16>; 1191def LoadParamV2I8 : LoadParamVecInst<V2I8Regs, ".v2.b8", 1192 LoadParamScalar2I8>; 1193 1194def LoadParamV4F32 : LoadParamVecInst<V4F32Regs, ".v4.f32", 1195 LoadParamScalar4F32>; 1196def LoadParamV2F32 : LoadParamVecInst<V2F32Regs, ".v2.f32", 1197 LoadParamScalar2F32>; 1198def LoadParamV2F64 : LoadParamVecInst<V2F64Regs, ".v2.f64", 1199 LoadParamScalar2F64>; 1200} 1201 1202let VecInstType=isVecST.Value in { 1203def StoreParamV4I32 : StoreParamVecInst<V4I32Regs, ".v4.b32", 1204 StoreParamScalar4I32>; 1205def StoreParamV4I16 : StoreParamVecInst<V4I16Regs, ".v4.b16", 1206 StoreParamScalar4I16>; 1207def StoreParamV4I8 : StoreParamVecInst<V4I8Regs, ".v4.b8", 1208 StoreParamScalar4I8>; 1209 1210def StoreParamV2I64 : StoreParamVecInst<V2I64Regs, ".v2.b64", 1211 StoreParamScalar2I64>; 1212def StoreParamV2I32 : StoreParamVecInst<V2I32Regs, ".v2.b32", 1213 StoreParamScalar2I32>; 1214def StoreParamV2I16 : StoreParamVecInst<V2I16Regs, ".v2.b16", 1215 StoreParamScalar2I16>; 1216def StoreParamV2I8 : StoreParamVecInst<V2I8Regs, ".v2.b8", 1217 StoreParamScalar2I8>; 1218 1219def StoreParamV4F32 : StoreParamVecInst<V4F32Regs, ".v4.f32", 1220 StoreParamScalar4F32>; 1221def StoreParamV2F32 : StoreParamVecInst<V2F32Regs, ".v2.f32", 1222 StoreParamScalar2F32>; 1223def StoreParamV2F64 : StoreParamVecInst<V2F64Regs, ".v2.f64", 1224 StoreParamScalar2F64>; 1225 1226def StoreRetvalV4I32 : StoreRetvalVecInst<V4I32Regs, ".v4.b32", 1227 StoreRetvalScalar4I32>; 1228def StoreRetvalV4I16 : StoreRetvalVecInst<V4I16Regs, ".v4.b16", 1229 StoreRetvalScalar4I16>; 1230def StoreRetvalV4I8 : StoreRetvalVecInst<V4I8Regs, ".v4.b8", 1231 StoreRetvalScalar4I8>; 1232 1233def StoreRetvalV2I64 : StoreRetvalVecInst<V2I64Regs, ".v2.b64", 1234 StoreRetvalScalar2I64>; 1235def StoreRetvalV2I32 : StoreRetvalVecInst<V2I32Regs, ".v2.b32", 1236 StoreRetvalScalar2I32>; 1237def StoreRetvalV2I16 : StoreRetvalVecInst<V2I16Regs, ".v2.b16", 1238 StoreRetvalScalar2I16>; 1239def StoreRetvalV2I8 : StoreRetvalVecInst<V2I8Regs, ".v2.b8", 1240 StoreRetvalScalar2I8>; 1241 1242def StoreRetvalV4F32 : StoreRetvalVecInst<V4F32Regs, ".v4.f32", 1243 StoreRetvalScalar4F32>; 1244def StoreRetvalV2F32 : StoreRetvalVecInst<V2F32Regs, ".v2.f32", 1245 StoreRetvalScalar2F32>; 1246def StoreRetvalV2F64 : StoreRetvalVecInst<V2F64Regs, ".v2.f64", 1247 StoreRetvalScalar2F64>; 1248 1249} 1250 1251 1252// Int vector to int scalar bit convert 1253// v4i8 -> i32 1254def : Pat<(i32 (bitconvert V4I8Regs:$s)), 1255 (V4I8toI32 (V4i8Extract V4I8Regs:$s,0), (V4i8Extract V4I8Regs:$s,1), 1256 (V4i8Extract V4I8Regs:$s,2), (V4i8Extract V4I8Regs:$s,3))>; 1257// v4i16 -> i64 1258def : Pat<(i64 (bitconvert V4I16Regs:$s)), 1259 (V4I16toI64 (V4i16Extract V4I16Regs:$s,0), 1260 (V4i16Extract V4I16Regs:$s,1), 1261 (V4i16Extract V4I16Regs:$s,2), 1262 (V4i16Extract V4I16Regs:$s,3))>; 1263// v2i8 -> i16 1264def : Pat<(i16 (bitconvert V2I8Regs:$s)), 1265 (V2I8toI16 (V2i8Extract V2I8Regs:$s,0), (V2i8Extract V2I8Regs:$s,1))>; 1266// v2i16 -> i32 1267def : Pat<(i32 (bitconvert V2I16Regs:$s)), 1268 (V2I16toI32 (V2i16Extract V2I16Regs:$s,0), 1269 (V2i16Extract V2I16Regs:$s,1))>; 1270// v2i32 -> i64 1271def : Pat<(i64 (bitconvert V2I32Regs:$s)), 1272 (V2I32toI64 (V2i32Extract V2I32Regs:$s,0), 1273 (V2i32Extract V2I32Regs:$s,1))>; 1274 1275// Int scalar to int vector bit convert 1276let VecInstType=isVecDest.Value in { 1277// i32 -> v4i8 1278def VecI32toV4I8 : NVPTXVecInst<(outs V4I8Regs:$d), (ins Int32Regs:$s), 1279 "Error!", 1280 [(set V4I8Regs:$d, (bitconvert Int32Regs:$s))], 1281 I32toV4I8>; 1282// i64 -> v4i16 1283def VecI64toV4I16 : NVPTXVecInst<(outs V4I16Regs:$d), (ins Int64Regs:$s), 1284 "Error!", 1285 [(set V4I16Regs:$d, (bitconvert Int64Regs:$s))], 1286 I64toV4I16>; 1287// i16 -> v2i8 1288def VecI16toV2I8 : NVPTXVecInst<(outs V2I8Regs:$d), (ins Int16Regs:$s), 1289 "Error!", 1290 [(set V2I8Regs:$d, (bitconvert Int16Regs:$s))], 1291 I16toV2I8>; 1292// i32 -> v2i16 1293def VecI32toV2I16 : NVPTXVecInst<(outs V2I16Regs:$d), (ins Int32Regs:$s), 1294 "Error!", 1295 [(set V2I16Regs:$d, (bitconvert Int32Regs:$s))], 1296 I32toV2I16>; 1297// i64 -> v2i32 1298def VecI64toV2I32 : NVPTXVecInst<(outs V2I32Regs:$d), (ins Int64Regs:$s), 1299 "Error!", 1300 [(set V2I32Regs:$d, (bitconvert Int64Regs:$s))], 1301 I64toV2I32>; 1302} 1303 1304// Int vector to int vector bit convert 1305// v4i8 -> v2i16 1306def : Pat<(v2i16 (bitconvert V4I8Regs:$s)), 1307 (VecI32toV2I16 1308 (V4I8toI32 (V4i8Extract V4I8Regs:$s,0), (V4i8Extract V4I8Regs:$s,1), 1309 (V4i8Extract V4I8Regs:$s,2), (V4i8Extract V4I8Regs:$s,3)))>; 1310// v4i16 -> v2i32 1311def : Pat<(v2i32 (bitconvert V4I16Regs:$s)), 1312 (VecI64toV2I32 1313 (V4I16toI64 (V4i16Extract V4I16Regs:$s,0), (V4i16Extract V4I16Regs:$s,1), 1314 (V4i16Extract V4I16Regs:$s,2), (V4i16Extract V4I16Regs:$s,3)))>; 1315// v2i16 -> v4i8 1316def : Pat<(v4i8 (bitconvert V2I16Regs:$s)), 1317 (VecI32toV4I8 1318 (V2I16toI32 (V2i16Extract V2I16Regs:$s,0), (V2i16Extract V2I16Regs:$s,1)))>; 1319// v2i32 -> v4i16 1320def : Pat<(v4i16 (bitconvert V2I32Regs:$s)), 1321 (VecI64toV4I16 1322 (V2I32toI64 (V2i32Extract V2I32Regs:$s,0), (V2i32Extract V2I32Regs:$s,1)))>; 1323// v2i64 -> v4i32 1324def : Pat<(v4i32 (bitconvert V2I64Regs:$s)), 1325 (Build_Vector4_i32 1326 (V2i32Extract (VecI64toV2I32 (V2i64Extract V2I64Regs:$s, 0)), 0), 1327 (V2i32Extract (VecI64toV2I32 (V2i64Extract V2I64Regs:$s, 0)), 1), 1328 (V2i32Extract (VecI64toV2I32 (V2i64Extract V2I64Regs:$s, 1)), 0), 1329 (V2i32Extract (VecI64toV2I32 (V2i64Extract V2I64Regs:$s, 1)), 1))>; 1330// v4i32 -> v2i64 1331def : Pat<(v2i64 (bitconvert V4I32Regs:$s)), 1332 (Build_Vector2_i64 1333 (V2I32toI64 (V4i32Extract V4I32Regs:$s,0), (V4i32Extract V4I32Regs:$s,1)), 1334 (V2I32toI64 (V4i32Extract V4I32Regs:$s,2), (V4i32Extract V4I32Regs:$s,3)))>; 1335 1336// Fp scalar to fp vector convert 1337// f64 -> v2f32 1338let VecInstType=isVecDest.Value in { 1339def VecF64toV2F32 : NVPTXVecInst<(outs V2F32Regs:$d), (ins Float64Regs:$s), 1340 "Error!", 1341 [(set V2F32Regs:$d, (bitconvert Float64Regs:$s))], 1342 F64toV2F32>; 1343} 1344 1345// Fp vector to fp scalar convert 1346// v2f32 -> f64 1347def : Pat<(f64 (bitconvert V2F32Regs:$s)), 1348 (V2F32toF64 (V2f32Extract V2F32Regs:$s,0), (V2f32Extract V2F32Regs:$s,1))>; 1349 1350// Fp scalar to int vector convert 1351// f32 -> v4i8 1352def : Pat<(v4i8 (bitconvert Float32Regs:$s)), 1353 (VecI32toV4I8 (BITCONVERT_32_F2I Float32Regs:$s))>; 1354// f32 -> v2i16 1355def : Pat<(v2i16 (bitconvert Float32Regs:$s)), 1356 (VecI32toV2I16 (BITCONVERT_32_F2I Float32Regs:$s))>; 1357// f64 -> v4i16 1358def : Pat<(v4i16 (bitconvert Float64Regs:$s)), 1359 (VecI64toV4I16 (BITCONVERT_64_F2I Float64Regs:$s))>; 1360// f64 -> v2i32 1361def : Pat<(v2i32 (bitconvert Float64Regs:$s)), 1362 (VecI64toV2I32 (BITCONVERT_64_F2I Float64Regs:$s))>; 1363 1364// Int vector to fp scalar convert 1365// v4i8 -> f32 1366def : Pat<(f32 (bitconvert V4I8Regs:$s)), 1367 (BITCONVERT_32_I2F 1368 (V4I8toI32 (V4i8Extract V4I8Regs:$s,0), (V4i8Extract V4I8Regs:$s,1), 1369 (V4i8Extract V4I8Regs:$s,2), (V4i8Extract V4I8Regs:$s,3)))>; 1370// v4i16 -> f64 1371def : Pat<(f64 (bitconvert V4I16Regs:$s)), 1372 (BITCONVERT_64_I2F 1373 (V4I16toI64 (V4i16Extract V4I16Regs:$s,0), (V4i16Extract V4I16Regs:$s,1), 1374 (V4i16Extract V4I16Regs:$s,2), (V4i16Extract V4I16Regs:$s,3)))>; 1375// v2i16 -> f32 1376def : Pat<(f32 (bitconvert V2I16Regs:$s)), 1377 (BITCONVERT_32_I2F 1378 (V2I16toI32 (V2i16Extract V2I16Regs:$s,0), (V2i16Extract V2I16Regs:$s,1)))>; 1379// v2i32 -> f64 1380def : Pat<(f64 (bitconvert V2I32Regs:$s)), 1381 (BITCONVERT_64_I2F 1382 (V2I32toI64 (V2i32Extract V2I32Regs:$s,0), (V2i32Extract V2I32Regs:$s,1)))>; 1383 1384// Int scalar to fp vector convert 1385// i64 -> v2f32 1386def : Pat<(v2f32 (bitconvert Int64Regs:$s)), 1387 (VecF64toV2F32 (BITCONVERT_64_I2F Int64Regs:$s))>; 1388 1389// Fp vector to int scalar convert 1390// v2f32 -> i64 1391def : Pat<(i64 (bitconvert V2F32Regs:$s)), 1392 (BITCONVERT_64_F2I 1393 (V2F32toF64 (V2f32Extract V2F32Regs:$s,0), (V2f32Extract V2F32Regs:$s,1)))>; 1394 1395// Int vector to fp vector convert 1396// v2i64 -> v4f32 1397def : Pat<(v4f32 (bitconvert V2I64Regs:$s)), 1398 (Build_Vector4_f32 1399 (BITCONVERT_32_I2F (V2i32Extract (VecI64toV2I32 1400 (V2i64Extract V2I64Regs:$s, 0)), 0)), 1401 (BITCONVERT_32_I2F (V2i32Extract (VecI64toV2I32 1402 (V2i64Extract V2I64Regs:$s, 0)), 1)), 1403 (BITCONVERT_32_I2F (V2i32Extract (VecI64toV2I32 1404 (V2i64Extract V2I64Regs:$s, 1)), 0)), 1405 (BITCONVERT_32_I2F (V2i32Extract (VecI64toV2I32 1406 (V2i64Extract V2I64Regs:$s, 1)), 1)))>; 1407// v2i64 -> v2f64 1408def : Pat<(v2f64 (bitconvert V2I64Regs:$s)), 1409 (Build_Vector2_f64 1410 (BITCONVERT_64_I2F (V2i64Extract V2I64Regs:$s,0)), 1411 (BITCONVERT_64_I2F (V2i64Extract V2I64Regs:$s,1)))>; 1412// v2i32 -> v2f32 1413def : Pat<(v2f32 (bitconvert V2I32Regs:$s)), 1414 (Build_Vector2_f32 1415 (BITCONVERT_32_I2F (V2i32Extract V2I32Regs:$s,0)), 1416 (BITCONVERT_32_I2F (V2i32Extract V2I32Regs:$s,1)))>; 1417// v4i32 -> v2f64 1418def : Pat<(v2f64 (bitconvert V4I32Regs:$s)), 1419 (Build_Vector2_f64 1420 (BITCONVERT_64_I2F (V2I32toI64 (V4i32Extract V4I32Regs:$s,0), 1421 (V4i32Extract V4I32Regs:$s,1))), 1422 (BITCONVERT_64_I2F (V2I32toI64 (V4i32Extract V4I32Regs:$s,2), 1423 (V4i32Extract V4I32Regs:$s,3))))>; 1424// v4i32 -> v4f32 1425def : Pat<(v4f32 (bitconvert V4I32Regs:$s)), 1426 (Build_Vector4_f32 1427 (BITCONVERT_32_I2F (V4i32Extract V4I32Regs:$s,0)), 1428 (BITCONVERT_32_I2F (V4i32Extract V4I32Regs:$s,1)), 1429 (BITCONVERT_32_I2F (V4i32Extract V4I32Regs:$s,2)), 1430 (BITCONVERT_32_I2F (V4i32Extract V4I32Regs:$s,3)))>; 1431// v4i16 -> v2f32 1432def : Pat<(v2f32 (bitconvert V4I16Regs:$s)), 1433 (VecF64toV2F32 (BITCONVERT_64_I2F 1434 (V4I16toI64 (V4i16Extract V4I16Regs:$s,0), 1435 (V4i16Extract V4I16Regs:$s,1), 1436 (V4i16Extract V4I16Regs:$s,2), 1437 (V4i16Extract V4I16Regs:$s,3))))>; 1438 1439// Fp vector to int vector convert 1440// v2i64 <- v4f32 1441def : Pat<(v2i64 (bitconvert V4F32Regs:$s)), 1442 (Build_Vector2_i64 1443 (BITCONVERT_64_F2I (V2F32toF64 (V4f32Extract V4F32Regs:$s,0), 1444 (V4f32Extract V4F32Regs:$s,1))), 1445 (BITCONVERT_64_F2I (V2F32toF64 (V4f32Extract V4F32Regs:$s,2), 1446 (V4f32Extract V4F32Regs:$s,3))))>; 1447// v2i64 <- v2f64 1448def : Pat<(v2i64 (bitconvert V2F64Regs:$s)), 1449 (Build_Vector2_i64 1450 (BITCONVERT_64_F2I (V2f64Extract V2F64Regs:$s,0)), 1451 (BITCONVERT_64_F2I (V2f64Extract V2F64Regs:$s,1)))>; 1452// v2i32 <- v2f32 1453def : Pat<(v2i32 (bitconvert V2F32Regs:$s)), 1454 (Build_Vector2_i32 1455 (BITCONVERT_32_F2I (V2f32Extract V2F32Regs:$s,0)), 1456 (BITCONVERT_32_F2I (V2f32Extract V2F32Regs:$s,1)))>; 1457// v4i32 <- v2f64 1458def : Pat<(v4i32 (bitconvert V2F64Regs:$s)), 1459 (Build_Vector4_i32 1460 (BITCONVERT_32_F2I (V2f32Extract (VecF64toV2F32 1461 (V2f64Extract V2F64Regs:$s, 0)), 0)), 1462 (BITCONVERT_32_F2I (V2f32Extract (VecF64toV2F32 1463 (V2f64Extract V2F64Regs:$s, 0)), 1)), 1464 (BITCONVERT_32_F2I (V2f32Extract (VecF64toV2F32 1465 (V2f64Extract V2F64Regs:$s, 1)), 0)), 1466 (BITCONVERT_32_F2I (V2f32Extract (VecF64toV2F32 1467 (V2f64Extract V2F64Regs:$s, 1)), 1)))>; 1468// v4i32 <- v4f32 1469def : Pat<(v4i32 (bitconvert V4F32Regs:$s)), 1470 (Build_Vector4_i32 1471 (BITCONVERT_32_F2I (V4f32Extract V4F32Regs:$s,0)), 1472 (BITCONVERT_32_F2I (V4f32Extract V4F32Regs:$s,1)), 1473 (BITCONVERT_32_F2I (V4f32Extract V4F32Regs:$s,2)), 1474 (BITCONVERT_32_F2I (V4f32Extract V4F32Regs:$s,3)))>; 1475// v4i16 <- v2f32 1476def : Pat<(v4i16 (bitconvert V2F32Regs:$s)), 1477 (VecI64toV4I16 (BITCONVERT_64_F2I 1478 (V2F32toF64 (V2f32Extract V2F32Regs:$s,0), 1479 (V2f32Extract V2F32Regs:$s,1))))>; 1480