Home
last modified time | relevance | path

Searched refs:insertps (Results 1 – 25 of 56) sorted by relevance

123

/external/valgrind/none/tests/amd64/
Dsse4-64.stdout.exp-older-glibc2005 r insertps $0 410e6666c0d66666c059999a3f99999a c1815c29416266664142147bc121c28f c1815c29416266664…
2006 m insertps $0 410e6666c0d66666c059999a3f99999a c1815c29416266664142147bc121c28f c1815c29416266664…
2007 r insertps $1 410e6666c0d66666c059999a3f99999a c1815c29416266664142147bc121c28f c1815c29416266664…
2008 m insertps $1 410e6666c0d66666c059999a3f99999a c1815c29416266664142147bc121c28f c1815c29416266664…
2009 r insertps $2 410e6666c0d66666c059999a3f99999a c1815c29416266664142147bc121c28f c1815c29416266660…
2010 m insertps $2 410e6666c0d66666c059999a3f99999a c1815c29416266664142147bc121c28f c1815c29416266660…
2011 r insertps $3 410e6666c0d66666c059999a3f99999a c1815c29416266664142147bc121c28f c1815c29416266660…
2012 m insertps $3 410e6666c0d66666c059999a3f99999a c1815c29416266664142147bc121c28f c1815c29416266660…
2013 r insertps $4 410e6666c0d66666c059999a3f99999a c1815c29416266664142147bc121c28f c1815c29000000004…
2014 m insertps $4 410e6666c0d66666c059999a3f99999a c1815c29416266664142147bc121c28f c1815c29000000004…
[all …]
Dsse4-64.stdout.exp2005 r insertps $0 410e6666c0d66666c059999a3f99999a c1815c29416266664142147bc121c28f c1815c29416266664…
2006 m insertps $0 410e6666c0d66666c059999a3f99999a c1815c29416266664142147bc121c28f c1815c29416266664…
2007 r insertps $1 410e6666c0d66666c059999a3f99999a c1815c29416266664142147bc121c28f c1815c29416266664…
2008 m insertps $1 410e6666c0d66666c059999a3f99999a c1815c29416266664142147bc121c28f c1815c29416266664…
2009 r insertps $2 410e6666c0d66666c059999a3f99999a c1815c29416266664142147bc121c28f c1815c29416266660…
2010 m insertps $2 410e6666c0d66666c059999a3f99999a c1815c29416266664142147bc121c28f c1815c29416266660…
2011 r insertps $3 410e6666c0d66666c059999a3f99999a c1815c29416266664142147bc121c28f c1815c29416266660…
2012 m insertps $3 410e6666c0d66666c059999a3f99999a c1815c29416266664142147bc121c28f c1815c29416266660…
2013 r insertps $4 410e6666c0d66666c059999a3f99999a c1815c29416266664142147bc121c28f c1815c29000000004…
2014 m insertps $4 410e6666c0d66666c059999a3f99999a c1815c29416266664142147bc121c28f c1815c29000000004…
[all …]
/external/llvm/test/CodeGen/X86/
Davx.ll28 declare <4 x float> @llvm.x86.sse41.insertps(<4 x float>, <4 x float>, i32) nounwind readnone
38 %2 = tail call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %a, <4 x float> %1, i32 48)
42 ;; Use a non-zero CountS for insertps
52 %2 = tail call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %a, <4 x float> %1, i32 96)
67 %3 = tail call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %a, <4 x float> %2, i32 192)
77 ; CHECK: insertps $48
85 %7 = tail call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %a, <4 x float> %6, i32 48)
94 ; CHECK: insertps $48
102 %7 = tail call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %a, <4 x float> %6, i32 48)
114 ; CHECK: insertps $48
[all …]
Dsse41.ll144 ; X32-NEXT: insertps {{.*#+}} xmm0 = zero,xmm1[0],zero,xmm0[3]
149 ; X64-NEXT: insertps {{.*#+}} xmm0 = zero,xmm1[0],zero,xmm0[3]
151 …%tmp1 = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %t1, <4 x float> %t2, i32 21) nounwi…
155 declare <4 x float> @llvm.x86.sse41.insertps(<4 x float>, <4 x float>, i32) nounwind readnone
157 ; When optimizing for speed, prefer blendps over insertps even if it means we have to
174 ; When optimizing for size, generate an insertps if there's a load fold opportunity.
176 ; generate an insertps for X32 but not for X64!
180 ; X32-NEXT: insertps {{.*#+}} xmm0 = mem[0],xmm0[1,2,3]
192 ; is always just a blendps because blendps is never more expensive than insertps.
267 ; This used to compile to insertps $0 + insertps $16. insertps $0 is always
[all …]
Dinsertps-from-constantpool.ll4 ; Test for case where insertps folds the load of an insertion element from a constant pool.
9 ; X32-NEXT: insertps {{.*#+}} xmm0 = mem[0],xmm0[1,2,3]
14 ; X64-NEXT: insertps {{.*#+}} xmm0 = mem[0],xmm0[1,2,3]
16 …%1 = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %a, <4 x float> <float 0.0, float 1.0, …
20 declare <4 x float> @llvm.x86.sse41.insertps(<4 x float>, <4 x float>, i8) nounwind readnone
Dinsertps-O0-bug.ll12 ; of an insertps is considered to be profitable.
24 ; always considered unprofitable at -O0. This would leave the insertps mask
33 ; This test checks that the vector load in input to the insertps is not
40 ; CHECK: insertps $64, [[REG]],
44 %1 = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %a, <4 x float> %0, i32 64)
52 declare <4 x float> @llvm.x86.sse41.insertps(<4 x float>, <4 x float>, i32)
Dinsertps-combine.ll9 ; SSE-NEXT: insertps {{.*#+}} xmm0 = xmm0[0],zero,xmm0[2],xmm1[2]
27 ; SSE-NEXT: insertps {{.*#+}} xmm0 = xmm0[0],zero,zero,xmm1[0]
45 ; SSE-NEXT: insertps {{.*#+}} xmm0 = xmm0[0],zero,xmm0[2],xmm1[0]
63 ; SSE-NEXT: insertps {{.*#+}} xmm0 = xmm0[0],zero,zero,xmm0[0]
80 ; SSE-NEXT: insertps {{.*#+}} xmm0 = xmm0[0],zero,xmm1[2],zero
99 ; SSE-NEXT: insertps {{.*#+}} xmm0 = zero,xmm1[0],zero,zero
107 %res1 = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %res0, <4 x float> %a1, i8 21)
125 %res1 = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %a0, <4 x float> %res0, i8 21)
135 ; SSE-NEXT: insertps {{.*#+}} xmm0 = zero,xmm0[2,2,3]
159 ; SSE-NEXT: insertps {{.*#+}} xmm0 = zero,xmm0[2,2,3]
[all …]
Dinsertps-unfold-load-bug.ll4 ; Test for case where insertps was folding the load of the insertion element, but a later optimizat…
14 ; X32-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0]
22 ; X64-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0]
Dvec_set-3.ll7 ; CHECK-NEXT: insertps {{.*#+}} xmm0 = zero,mem[0],zero,zero
30 ; CHECK-NEXT: insertps {{.*#+}} xmm0 = zero,xmm0[0],zero,zero
Dmerge-consecutive-loads-128.ll219 ; SSE41-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1],mem[0],xmm0[3]
232 ; X32-SSE-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1],mem[0],xmm0[3]
260 ; SSE41-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1],mem[0],xmm0[3]
273 ; X32-SSE-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1],mem[0],xmm0[3]
710 ; SSE41-NEXT: insertps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[2,3]
711 ; SSE41-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1],mem[0],xmm0[3]
712 ; SSE41-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0]
727 ; X32-SSE-NEXT: insertps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[2,3]
728 ; X32-SSE-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1],mem[0],xmm0[3]
729 ; X32-SSE-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0]
Dfold-load-vec.ll9 ; CHECK: insertps
Dsse41-intrinsics-x86-upgrade.ll54 ; CHECK-NEXT: insertps {{.*#+}} xmm0 = zero,xmm1[0],xmm0[2,3]
56 …%res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %a0, <4 x float> %a1, i32 17) ; <<4 x…
59 declare <4 x float> @llvm.x86.sse41.insertps(<4 x float>, <4 x float>, i32) nounwind readnone
Dsse41-intrinsics-x86.ll78 ; SSE41-NEXT: insertps {{.*#+}} xmm0 = zero,xmm1[0],xmm0[2,3]
85 …%res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %a0, <4 x float> %a1, i8 17) ; <<4 x …
88 declare <4 x float> @llvm.x86.sse41.insertps(<4 x float>, <4 x float>, i8) nounwind readnone
Dsse41-intrinsics-fast-isel.ll581 ; X32-NEXT: insertps {{.*#+}} xmm0 = xmm1[0],xmm0[1],zero,xmm0[3]
586 ; X64-NEXT: insertps {{.*#+}} xmm0 = xmm1[0],xmm0[1],zero,xmm0[3]
588 %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %a0, <4 x float> %a1, i8 4)
591 declare <4 x float> @llvm.x86.sse41.insertps(<4 x float>, <4 x float>, i8) nounwind readnone
Dvector-shuffle-128-v4.ll675 ; SSE41-NEXT: insertps {{.*#+}} xmm0 = zero,xmm0[0],zero,zero
713 ; SSE41-NEXT: insertps {{.*#+}} xmm0 = zero,zero,xmm0[0],zero
748 ; SSE41-NEXT: insertps {{.*#+}} xmm0 = zero,zero,zero,xmm0[0]
823 ; SSE41-NEXT: insertps {{.*#+}} xmm0 = zero,xmm0[2],zero,zero
1067 ; SSE41-NEXT: insertps {{.*#+}} xmm1 = zero,zero,zero,xmm1[0]
1111 ; SSE41-NEXT: insertps {{.*#+}} xmm0 = xmm0[0],zero,zero,xmm1[2]
1156 ; SSE41-NEXT: insertps {{.*#+}} xmm0 = xmm0[0],zero,xmm0[2],xmm1[0]
/external/llvm/test/Transforms/InstCombine/
Dx86-insertps.ll3 declare <4 x float> @llvm.x86.sse41.insertps(<4 x float>, <4 x float>, i8) nounwind readnone
8 %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v1, <4 x float> %v2, i8 %c)
12 ; CHECK-NEXT: call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v1, <4 x float> %v2, i8 %c)
19 %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v1, <4 x float> %v2, i8 15)
26 %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v1, <4 x float> %v2, i8 255)
36 %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v1, <4 x float> %v2, i8 12)
40 ; CHECK-NEXT: call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v1, <4 x float> %v2, i8 12)
47 %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v1, <4 x float> %v1, i8 21)
58 %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v1, <4 x float> %v1, i8 26)
69 %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v1, <4 x float> %v2, i8 193)
[all …]
/external/swiftshader/third_party/LLVM/test/CodeGen/X86/
Dsse41.ll154 …%tmp1 = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %t1, <4 x float> %t2, i32 1) nounwin…
157 ; X32: insertps $1, %xmm1, %xmm0
160 ; X64: insertps $1, %xmm1, %xmm0
163 declare <4 x float> @llvm.x86.sse41.insertps(<4 x float>, <4 x float>, i32) nounwind readnone
169 ; X32: insertps $0, 4(%esp), %xmm0
172 ; X64: insertps $0, %xmm1, %xmm0
180 ; X32: insertps $0, %xmm1, %xmm0
183 ; X64: insertps $0, %xmm1, %xmm0
227 ; This used to compile to insertps $0 + insertps $16. insertps $0 is always
241 ; X32-NOT: insertps $0
[all …]
/external/swiftshader/third_party/LLVM/lib/Target/X86/
DREADME-SSE.txt608 In sse4 mode, we could use insertps to make both better.
610 Here's another testcase that could use insertps [mem]:
621 insertps $0x10, x2(%rip), %xmm0
622 insertps $0x10, x3(%rip), %xmm1
911 insertps $0, %xmm2, %xmm2
912 insertps $16, %xmm3, %xmm2
913 insertps $0, %xmm0, %xmm3
914 insertps $16, %xmm1, %xmm3
922 The insertps's of $0 are pointless complex copies.
/external/swiftshader/third_party/subzero/tests_lit/llvm2ice_tests/
Dvector-ops.ll29 ; SSE41: insertps {{.*}},{{.*}},0x0
82 ; SSE41: insertps {{.*}},{{.*}},0x10
Dundef.ll204 ; CHECK: {{movss|insertps}} {{.*}},[[REG]]
/external/llvm/lib/Target/X86/
DREADME-SSE.txt542 In sse4 mode, we could use insertps to make both better.
544 Here's another testcase that could use insertps [mem]:
555 insertps $0x10, x2(%rip), %xmm0
556 insertps $0x10, x3(%rip), %xmm1
/external/swiftshader/third_party/subzero/src/
DIceAssemblerX86Base.h496 void insertps(Type Ty, XmmRegister dst, XmmRegister src,
498 void insertps(Type Ty, XmmRegister dst, const Address &src,
/external/swiftshader/third_party/LLVM/test/MC/Disassembler/X86/
Dsimple-tests.txt129 # CHECK: insertps $129, %xmm2, %xmm1
Dx86-32.txt160 # CHECK: insertps $129, %xmm2, %xmm1
/external/elfutils/libcpu/
DChangeLog79 * defs/i386: Add dppd, dpps, insertps, movntdqa, mpsadbw, packusdw,

123