Home
last modified time | relevance | path

Searched refs:insertps (Results 1 – 25 of 102) sorted by relevance

12345

/external/llvm/test/CodeGen/X86/
Davx.ll28 declare <4 x float> @llvm.x86.sse41.insertps(<4 x float>, <4 x float>, i32) nounwind readnone
38 %2 = tail call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %a, <4 x float> %1, i32 48)
42 ;; Use a non-zero CountS for insertps
52 %2 = tail call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %a, <4 x float> %1, i32 96)
67 %3 = tail call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %a, <4 x float> %2, i32 192)
77 ; CHECK: insertps $48
85 %7 = tail call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %a, <4 x float> %6, i32 48)
94 ; CHECK: insertps $48
102 %7 = tail call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %a, <4 x float> %6, i32 48)
114 ; CHECK: insertps $48
[all …]
Dsse41.ll144 ; X32-NEXT: insertps {{.*#+}} xmm0 = zero,xmm1[0],zero,xmm0[3]
149 ; X64-NEXT: insertps {{.*#+}} xmm0 = zero,xmm1[0],zero,xmm0[3]
151 …%tmp1 = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %t1, <4 x float> %t2, i32 21) nounwi…
155 declare <4 x float> @llvm.x86.sse41.insertps(<4 x float>, <4 x float>, i32) nounwind readnone
157 ; When optimizing for speed, prefer blendps over insertps even if it means we have to
174 ; When optimizing for size, generate an insertps if there's a load fold opportunity.
176 ; generate an insertps for X32 but not for X64!
180 ; X32-NEXT: insertps {{.*#+}} xmm0 = mem[0],xmm0[1,2,3]
192 ; is always just a blendps because blendps is never more expensive than insertps.
267 ; This used to compile to insertps $0 + insertps $16. insertps $0 is always
[all …]
Dinsertps-from-constantpool.ll4 ; Test for case where insertps folds the load of an insertion element from a constant pool.
9 ; X32-NEXT: insertps {{.*#+}} xmm0 = mem[0],xmm0[1,2,3]
14 ; X64-NEXT: insertps {{.*#+}} xmm0 = mem[0],xmm0[1,2,3]
16 …%1 = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %a, <4 x float> <float 0.0, float 1.0, …
20 declare <4 x float> @llvm.x86.sse41.insertps(<4 x float>, <4 x float>, i8) nounwind readnone
Dinsertps-combine.ll9 ; SSE-NEXT: insertps {{.*#+}} xmm0 = xmm0[0],zero,xmm0[2],xmm1[2]
27 ; SSE-NEXT: insertps {{.*#+}} xmm0 = xmm0[0],zero,zero,xmm1[0]
45 ; SSE-NEXT: insertps {{.*#+}} xmm0 = xmm0[0],zero,xmm0[2],xmm1[0]
63 ; SSE-NEXT: insertps {{.*#+}} xmm0 = xmm0[0],zero,zero,xmm0[0]
80 ; SSE-NEXT: insertps {{.*#+}} xmm0 = xmm0[0],zero,xmm1[2],zero
99 ; SSE-NEXT: insertps {{.*#+}} xmm0 = zero,xmm1[0],zero,zero
107 %res1 = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %res0, <4 x float> %a1, i8 21)
125 %res1 = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %a0, <4 x float> %res0, i8 21)
135 ; SSE-NEXT: insertps {{.*#+}} xmm0 = zero,xmm0[2,2,3]
159 ; SSE-NEXT: insertps {{.*#+}} xmm0 = zero,xmm0[2,2,3]
[all …]
Dinsertps-O0-bug.ll12 ; of an insertps is considered to be profitable.
24 ; always considered unprofitable at -O0. This would leave the insertps mask
33 ; This test checks that the vector load in input to the insertps is not
40 ; CHECK: insertps $64, [[REG]],
44 %1 = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %a, <4 x float> %0, i32 64)
52 declare <4 x float> @llvm.x86.sse41.insertps(<4 x float>, <4 x float>, i32)
Dinsertps-unfold-load-bug.ll4 ; Test for case where insertps was folding the load of the insertion element, but a later optimizat…
14 ; X32-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0]
22 ; X64-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0]
Dvec_set-3.ll7 ; CHECK-NEXT: insertps {{.*#+}} xmm0 = zero,mem[0],zero,zero
30 ; CHECK-NEXT: insertps {{.*#+}} xmm0 = zero,xmm0[0],zero,zero
/external/swiftshader/third_party/llvm-7.0/llvm/test/CodeGen/X86/
Davx.ll28 declare <4 x float> @llvm.x86.sse41.insertps(<4 x float>, <4 x float>, i32) nounwind readnone
38 %2 = tail call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %a, <4 x float> %1, i32 48)
42 ;; Use a non-zero CountS for insertps
52 %2 = tail call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %a, <4 x float> %1, i32 96)
67 %3 = tail call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %a, <4 x float> %2, i32 192)
77 ; CHECK: insertps $48
85 %7 = tail call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %a, <4 x float> %6, i32 48)
94 ; CHECK: insertps $48
102 %7 = tail call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %a, <4 x float> %6, i32 48)
114 ; CHECK: insertps $48
[all …]
Dinsertps-from-constantpool.ll4 ; Test for case where insertps folds the load of an insertion element from a constant pool.
9 ; X32-NEXT: insertps {{.*#+}} xmm0 = mem[0],xmm0[1,2,3]
14 ; X64-NEXT: insertps {{.*#+}} xmm0 = mem[0],xmm0[1,2,3]
16 …%1 = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %a, <4 x float> <float 0.0, float 1.0, …
20 declare <4 x float> @llvm.x86.sse41.insertps(<4 x float>, <4 x float>, i8) nounwind readnone
Dinsertps-O0-bug.ll12 ; of an insertps is considered to be profitable.
24 ; always considered unprofitable at -O0. This would leave the insertps mask
33 ; This test checks that the vector load in input to the insertps is not
40 ; CHECK: insertps $64, [[REG]],
44 %1 = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %a, <4 x float> %0, i32 64)
52 declare <4 x float> @llvm.x86.sse41.insertps(<4 x float>, <4 x float>, i32)
Dinsertps-combine.ll9 ; SSE-NEXT: insertps {{.*#+}} xmm0 = xmm0[0],zero,xmm0[2],xmm1[2]
27 ; SSE-NEXT: insertps {{.*#+}} xmm0 = xmm0[0],zero,zero,xmm1[0]
45 ; SSE-NEXT: insertps {{.*#+}} xmm0 = xmm0[0],zero,xmm0[2],xmm1[0]
63 ; SSE-NEXT: insertps {{.*#+}} xmm0 = xmm0[0],zero,zero,xmm0[0]
80 ; SSE-NEXT: insertps {{.*#+}} xmm0 = xmm0[0],zero,xmm1[2],zero
99 ; SSE-NEXT: insertps {{.*#+}} xmm1 = zero,xmm0[0],xmm1[2],zero
115 ; SSE-NEXT: insertps {{.*#+}} xmm0 = xmm0[0],xmm1[1],zero,zero
130 ; SSE-NEXT: insertps {{.*#+}} xmm0 = zero,xmm1[0],zero,zero
138 %res1 = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %res0, <4 x float> %a1, i8 21)
156 %res1 = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %a0, <4 x float> %res0, i8 21)
[all …]
Dvec_set-3.ll8 ; X86-NEXT: insertps {{.*#+}} xmm0 = zero,mem[0],zero,zero
13 ; X64-NEXT: insertps {{.*#+}} xmm0 = zero,xmm0[0],zero,zero
42 ; X86-NEXT: insertps {{.*#+}} xmm0 = zero,xmm0[0],zero,zero
47 ; X64-NEXT: insertps {{.*#+}} xmm0 = zero,xmm0[0],zero,zero
Dinsertps-unfold-load-bug.ll4 ; Test for case where insertps was folding the load of the insertion element, but a later optimizat…
14 ; X32-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0]
22 ; X64-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0]
Dsse41.ll320 ; SSE-NEXT: insertps $21, %xmm1, %xmm0 ## encoding: [0x66,0x0f,0x3a,0x21,0xc1,0x15]
335 …%tmp1 = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %t1, <4 x float> %t2, i32 21) nounwi…
339 declare <4 x float> @llvm.x86.sse41.insertps(<4 x float>, <4 x float>, i32) nounwind readnone
341 ; When optimizing for speed, prefer blendps over insertps even if it means we have to
383 ; When optimizing for size, generate an insertps if there's a load fold opportunity.
385 ; generate an insertps for X86 but not for X64!
433 ; is always just a blendps because blendps is never more expensive than insertps.
509 ; This used to compile to insertps $0 + insertps $16. insertps $0 is always
520 ; SSE-NEXT: insertps $16, %xmm3, %xmm0 ## encoding: [0x66,0x0f,0x3a,0x21,0xc3,0x10]
564 ; X86-SSE-NEXT: insertps $48, %xmm1, %xmm0 ## encoding: [0x66,0x0f,0x3a,0x21,0xc1,0x30]
[all …]
/external/llvm/test/Transforms/InstCombine/
Dx86-insertps.ll3 declare <4 x float> @llvm.x86.sse41.insertps(<4 x float>, <4 x float>, i8) nounwind readnone
8 %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v1, <4 x float> %v2, i8 %c)
12 ; CHECK-NEXT: call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v1, <4 x float> %v2, i8 %c)
19 %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v1, <4 x float> %v2, i8 15)
26 %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v1, <4 x float> %v2, i8 255)
36 %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v1, <4 x float> %v2, i8 12)
40 ; CHECK-NEXT: call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v1, <4 x float> %v2, i8 12)
47 %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v1, <4 x float> %v1, i8 21)
58 %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v1, <4 x float> %v1, i8 26)
69 %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v1, <4 x float> %v2, i8 193)
[all …]
/external/swiftshader/third_party/llvm-7.0/llvm/test/Transforms/InstCombine/X86/
Dx86-insertps.ll4 declare <4 x float> @llvm.x86.sse41.insertps(<4 x float>, <4 x float>, i8) nounwind readnone
10 ; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v1, <4 x floa…
13 %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v1, <4 x float> %v2, i8 %c)
24 %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v1, <4 x float> %v2, i8 15)
32 %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v1, <4 x float> %v2, i8 255)
41 ; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v1, <4 x floa…
44 %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v1, <4 x float> %v2, i8 12)
56 %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v1, <4 x float> %v1, i8 21)
68 %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v1, <4 x float> %v1, i8 26)
80 %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v1, <4 x float> %v2, i8 193)
[all …]
/external/swiftshader/third_party/LLVM/test/CodeGen/X86/
Dsse41.ll154 …%tmp1 = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %t1, <4 x float> %t2, i32 1) nounwin…
157 ; X32: insertps $1, %xmm1, %xmm0
160 ; X64: insertps $1, %xmm1, %xmm0
163 declare <4 x float> @llvm.x86.sse41.insertps(<4 x float>, <4 x float>, i32) nounwind readnone
169 ; X32: insertps $0, 4(%esp), %xmm0
172 ; X64: insertps $0, %xmm1, %xmm0
180 ; X32: insertps $0, %xmm1, %xmm0
183 ; X64: insertps $0, %xmm1, %xmm0
227 ; This used to compile to insertps $0 + insertps $16. insertps $0 is always
241 ; X32-NOT: insertps $0
[all …]
/external/swiftshader/third_party/llvm-7.0/llvm/test/tools/llvm-mca/X86/Haswell/
Dresources-sse41.s25 insertps $1, %xmm0, %xmm2 label
26 insertps $1, (%rax), %xmm2 label
172 # CHECK-NEXT: 1 1 1.00 insertps $1, %xmm0, %xmm2
173 # CHECK-NEXT: 2 7 1.00 * insertps $1, (%rax), %xmm2
287 # CHECK-NEXT: - - - - - - - 1.00 - - insertps $1, %x…
288 # CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - insertps $1, (%…
/external/swiftshader/third_party/llvm-7.0/llvm/test/tools/llvm-mca/X86/Generic/
Dresources-sse41.s25 insertps $1, %xmm0, %xmm2 label
26 insertps $1, (%rax), %xmm2 label
172 # CHECK-NEXT: 1 1 1.00 insertps $1, %xmm0, %xmm2
173 # CHECK-NEXT: 2 7 1.00 * insertps $1, (%rax), %xmm2
285 # CHECK-NEXT: - - - - - 1.00 - - insertps $1, %xmm0, %xmm2
286 # CHECK-NEXT: - - - - - 1.00 0.50 0.50 insertps $1, (%rax), %xmm2
/external/swiftshader/third_party/llvm-7.0/llvm/test/tools/llvm-mca/X86/SLM/
Dresources-sse41.s25 insertps $1, %xmm0, %xmm2 label
26 insertps $1, (%rax), %xmm2 label
172 # CHECK-NEXT: 1 1 1.00 insertps $1, %xmm0, %xmm2
173 # CHECK-NEXT: 1 4 1.00 * insertps $1, (%rax), %xmm2
285 # CHECK-NEXT: - - - 1.00 - - - - insertps $1, %xmm0, %xmm2
286 # CHECK-NEXT: - - - 1.00 - - - 1.00 insertps $1, (%rax), %xmm2
/external/swiftshader/third_party/llvm-7.0/llvm/test/tools/llvm-mca/X86/BtVer2/
Dresources-sse41.s25 insertps $1, %xmm0, %xmm2 label
26 insertps $1, (%rax), %xmm2 label
172 # CHECK-NEXT: 1 1 0.50 insertps $1, %xmm0, %xmm2
173 # CHECK-NEXT: 1 6 1.00 * insertps $1, (%rax), %xmm2
291 …0 0.50 0.50 0.50 - - - - - - - insertps $1, %xmm0, %xmm2
292 … 0.50 0.50 0.50 1.00 - - - - - - insertps $1, (%rax), %xmm2
/external/swiftshader/third_party/llvm-7.0/llvm/test/tools/llvm-mca/X86/SkylakeServer/
Dresources-sse41.s25 insertps $1, %xmm0, %xmm2 label
26 insertps $1, (%rax), %xmm2 label
172 # CHECK-NEXT: 1 1 1.00 insertps $1, %xmm0, %xmm2
173 # CHECK-NEXT: 2 7 1.00 * insertps $1, (%rax), %xmm2
287 # CHECK-NEXT: - - - - - - - 1.00 - - insertps $1, %x…
288 # CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - insertps $1, (%…
/external/swiftshader/third_party/llvm-7.0/llvm/test/tools/llvm-mca/X86/Znver1/
Dresources-sse41.s25 insertps $1, %xmm0, %xmm2 label
26 insertps $1, (%rax), %xmm2 label
172 # CHECK-NEXT: 1 1 0.50 insertps $1, %xmm0, %xmm2
173 # CHECK-NEXT: 1 8 0.50 * insertps $1, (%rax), %xmm2
289 … - - - - - - 0.50 0.50 - - insertps $1, %xmm0, %xmm2
290 … - - - - - - 0.50 0.50 - - insertps $1, (%rax), %xmm2
/external/swiftshader/third_party/llvm-7.0/llvm/test/tools/llvm-mca/X86/SandyBridge/
Dresources-sse41.s25 insertps $1, %xmm0, %xmm2 label
26 insertps $1, (%rax), %xmm2 label
172 # CHECK-NEXT: 1 1 1.00 insertps $1, %xmm0, %xmm2
173 # CHECK-NEXT: 2 7 1.00 * insertps $1, (%rax), %xmm2
285 # CHECK-NEXT: - - - - - 1.00 - - insertps $1, %xmm0, %xmm2
286 # CHECK-NEXT: - - - - - 1.00 0.50 0.50 insertps $1, (%rax), %xmm2
/external/swiftshader/third_party/llvm-7.0/llvm/test/tools/llvm-mca/X86/SkylakeClient/
Dresources-sse41.s25 insertps $1, %xmm0, %xmm2 label
26 insertps $1, (%rax), %xmm2 label
172 # CHECK-NEXT: 1 1 1.00 insertps $1, %xmm0, %xmm2
173 # CHECK-NEXT: 2 7 1.00 * insertps $1, (%rax), %xmm2
287 # CHECK-NEXT: - - - - - - - 1.00 - - insertps $1, %x…
288 # CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - insertps $1, (%…

12345