/external/libvpx/libvpx/vpx_dsp/x86/ |
D | highbd_subpel_variance_impl_sse2.asm | 329 ; slightly faster because of pmullw latency. It would also cut our rodata 331 pmullw m1, filter_y_a 332 pmullw m5, filter_y_b 334 pmullw m0, filter_y_a 335 pmullw m4, filter_y_b 359 pmullw m1, filter_y_a 360 pmullw m5, filter_y_b 362 pmullw m0, filter_y_a 363 pmullw m4, filter_y_b 562 pmullw m1, filter_y_a [all …]
|
D | subpel_variance_sse2.asm | 396 ; slightly faster because of pmullw latency. It would also cut our rodata 398 pmullw m2, filter_y_a 399 pmullw m3, filter_y_b 401 pmullw m0, filter_y_a 402 pmullw m4, filter_y_b 440 pmullw m0, filter_y_a 441 pmullw m1, m2, filter_y_b 444 pmullw m2, filter_y_a 445 pmullw m4, filter_y_b 839 pmullw m2, filter_x_a [all …]
|
D | variance_impl_mmx.asm | 455 pmullw mm1, [rax] ; 458 pmullw mm3, [rax+8] ; 479 pmullw mm1, [rax] ; 482 pmullw mm3, [rax+8] ; 491 pmullw mm3, [rdx] ; 493 pmullw mm1, [rdx+8] ; 596 pmullw mm1, [rax] ; 598 pmullw mm2, [rax] ; 602 pmullw mm3, [rax+8] ; 604 pmullw mm4, [rax+8] ; [all …]
|
D | vpx_subpixel_bilinear_sse2.asm | 39 pmullw xmm0, xmm4 ;multiply the filter factors 87 pmullw xmm0, xmm6 88 pmullw xmm1, xmm7 110 pmullw xmm0, xmm6 111 pmullw xmm1, xmm7 112 pmullw xmm2, xmm6 113 pmullw xmm3, xmm7
|
D | vpx_subpixel_8t_sse2.asm | 61 pmullw xmm0, k0k1 ;multiply the filter factors 62 pmullw xmm6, k6k7 63 pmullw xmm2, k2k3 64 pmullw xmm5, k5k4 152 pmullw xmm0, k0 153 pmullw xmm1, k1 154 pmullw xmm6, k6 155 pmullw xmm7, k7 156 pmullw xmm2, k2 157 pmullw xmm5, k5 [all …]
|
D | quantize_avx_x86_64.asm | 141 pmullw m8, m3 ; dqc[i] = qc[i] * q 143 pmullw m13, m3 ; dqc[i] = qc[i] * q 323 pmullw m8, m3 ; dqc[i] = qc[i] * q 325 pmullw m13, m3 ; dqc[i] = qc[i] * q 445 pmullw m14, m3 ; dqc[i] = qc[i] * q 446 pmullw m13, m3 ; dqc[i] = qc[i] * q
|
D | quantize_ssse3_x86_64.asm | 125 pmullw m8, m3 ; dqc[i] = qc[i] * q 127 pmullw m13, m3 ; dqc[i] = qc[i] * q 228 pmullw m14, m3 ; dqc[i] = qc[i] * q 229 pmullw m13, m3 ; dqc[i] = qc[i] * q
|
/external/libvpx/libvpx/vp8/common/x86/ |
D | subpixel_sse2.asm | 77 pmullw xmm3, XMMWORD PTR [rdx] ; x[-2] * H[-2]; Tap 1 81 pmullw xmm4, XMMWORD PTR [rdx+16] ; x[-1] * H[-1]; Tap 2 87 pmullw xmm5, [rdx+32] ; x[ 0] * H[ 0]; Tap 3 92 pmullw xmm6, [rdx+48] ; x[ 1] * h[ 1] ; Tap 4 98 pmullw xmm7, [rdx+64] ; x[ 2] * h[ 2] ; Tap 5 101 pmullw xmm1, [rdx+80] ; x[ 3] * h[ 3] ; Tap 6 202 pmullw xmm3, XMMWORD PTR [rdx] ; x[-2] * H[-2]; Tap 1 206 pmullw xmm4, XMMWORD PTR [rdx+16] ; x[-1] * H[-1]; Tap 2 212 pmullw xmm5, [rdx+32] ; x[ 0] * H[ 0]; Tap 3 217 pmullw xmm6, [rdx+48] ; x[ 1] * h[ 1] ; Tap 4 [all …]
|
D | subpixel_mmx.asm | 59 pmullw mm3, mm1 ; mm3 *= kernel 1 modifiers. 63 pmullw mm4, mm7 ; mm5 *= kernel 4 modifiers 69 pmullw mm5, mm2 ; mm5 *= kernel 2 modifiers 75 pmullw mm4, mm6 ; mm5 *= kernel 3 modifiers 81 pmullw mm4, [rdx+80] ; mm5 *= kernel 0 modifiers 85 pmullw mm5, [rdx] ; mm5 *= kernel 5 modifiers 158 pmullw mm3, mm1 ; mm3 *= kernel 1 modifiers. 162 pmullw mm4, mm7 ; mm4 *= kernel 4 modifiers. 166 pmullw mm4, mm2 ; mm4 *= kernel 2 modifiers. 170 pmullw mm4, [rbx] ; mm4 *= kernel 0 modifiers. [all …]
|
D | dequantize_mmx.asm | 30 pmullw mm1, [rax+0] ; mm4 *= kernel 0 modifiers. 34 pmullw mm1, [rax+8] ; mm4 *= kernel 0 modifiers. 38 pmullw mm1, [rax+16] ; mm4 *= kernel 0 modifiers. 42 pmullw mm1, [rax+24] ; mm4 *= kernel 0 modifiers. 72 pmullw mm0, [rdx] 75 pmullw mm1, [rdx +8] 78 pmullw mm2, [rdx+16] 81 pmullw mm3, [rdx+24]
|
D | mfqe_sse2.asm | 57 pmullw xmm2, xmm0 58 pmullw xmm3, xmm0 64 pmullw xmm4, xmm1 65 pmullw xmm5, xmm1 132 pmullw xmm2, xmm0 136 pmullw xmm3, xmm1
|
D | idctllm_sse2.asm | 39 pmullw xmm4, xmm5 140 pmullw xmm0, [rdx] 141 pmullw xmm2, [rdx+16] 142 pmullw xmm1, [rdx] 143 pmullw xmm3, [rdx+16] 472 pmullw xmm0, [rdx] 473 pmullw xmm2, [rdx+16] 474 pmullw xmm1, [rdx] 475 pmullw xmm3, [rdx+16]
|
D | postproc_mmx.asm | 100 pmullw mm1, mm1 ; 125 pmullw mm2, mm2 134 pmullw mm1, mm1 151 pmullw mm1, mm1
|
D | postproc_sse2.asm | 319 pmullw xmm1, xmm1 ; 344 pmullw xmm2, xmm2 353 pmullw xmm1, xmm1 370 pmullw xmm1, xmm1
|
/external/llvm/test/CodeGen/X86/ |
D | vec_shift6.ll | 8 ; Check that we produce a SSE2 packed integer multiply (pmullw) instead. 15 ; CHECK: pmullw 24 ; CHECK: pmullw 55 ; into two pmullw instructions. With AVX2, the test case below would produce 63 ; SSE: pmullw 64 ; SSE-NEXT: pmullw 87 ; parts and then we convert each part into a pmullw. 94 ; SSE: pmullw 95 ; SSE-NEXT: pmullw 96 ; SSE-NEXT: pmullw [all …]
|
D | pmul.ll | 14 ; SSE2-NEXT: pmullw %xmm1, %xmm2 19 ; SSE2-NEXT: pmullw %xmm1, %xmm0 28 ; SSE41-NEXT: pmullw %xmm2, %xmm1 33 ; SSE41-NEXT: pmullw %xmm2, %xmm0 59 ; SSE-NEXT: pmullw {{.*}}(%rip), %xmm0 133 ; SSE2-NEXT: pmullw %xmm2, %xmm3 140 ; SSE2-NEXT: pmullw %xmm1, %xmm0 149 ; SSE41-NEXT: pmullw %xmm3, %xmm2 156 ; SSE41-NEXT: pmullw %xmm1, %xmm0 182 ; SSE-NEXT: pmullw %xmm1, %xmm0
|
D | 2008-02-26-AsmDirectMemOp.ll | 15 …mm0 \0A\09movq 8($0, $3), %mm1 \0A\09pmullw %mm6, %mm0 \0A\09pmullw …
|
/external/libvpx/libvpx/vp9/common/x86/ |
D | vp9_mfqe_sse2.asm | 58 pmullw xmm2, xmm0 59 pmullw xmm3, xmm0 65 pmullw xmm4, xmm1 66 pmullw xmm5, xmm1 133 pmullw xmm2, xmm0 137 pmullw xmm3, xmm1
|
/external/mesa3d/src/mesa/x86/ |
D | read_rgba_span_x86.S | 561 pmullw %mm6, %mm0 562 pmullw %mm6, %mm2 596 pmullw %mm6, %mm0 597 pmullw %mm6, %mm2 634 pmullw %mm6, %mm0 635 pmullw %mm6, %mm2 665 pmullw %mm6, %mm0
|
/external/libvpx/libvpx/vp8/encoder/x86/ |
D | quantize_mmx.asm | 66 pmullw mm3, mm2 106 pmullw mm7, mm6 147 pmullw mm7, mm6 188 pmullw mm7, mm6
|
/external/libvpx/libvpx/vp9/encoder/x86/ |
D | vp9_quantize_ssse3_x86_64.asm | 78 pmullw m8, m3 ; r4[i] = r3[i] * q 80 pmullw m13, m3 ; r4[i] = r3[i] * q 132 pmullw m14, m3 ; r4[i] = r3[i] * q 133 pmullw m13, m3 ; r4[i] = r3[i] * q
|
/external/valgrind/VEX/test/ |
D | mmxtest.c | 286 #define pmullw_m2r(var, reg) mmx_m2r(pmullw, var, reg) 287 #define pmullw_r2r(regs, regd) mmx_r2r(pmullw, regs, regd) 288 #define pmullw(vars, vard) mmx_m2m(pmullw, vars, vard) macro 555 do_test("pmullw", pmullw(ma,mb)); in main()
|
/external/libjpeg-turbo/simd/ |
D | jdsample-sse2-64.asm | 268 pmullw xmm0,[rel PW_THREE] 269 pmullw xmm4,[rel PW_THREE]
|
D | jdsample-mmx.asm | 295 pmullw mm0,[GOTOFF(ebx,PW_THREE)] 296 pmullw mm4,[GOTOFF(ebx,PW_THREE)]
|
/external/libyuv/files/source/ |
D | row_win.cc | 1574 __asm pmullw xmm3, kYToRgb \ 1602 __asm pmullw xmm3, kYToRgb \ 3037 pmullw xmm2, xmm3 // _r_b * alpha in ARGBBlendRow_SSE2() 3042 pmullw xmm1, xmm3 // _a_g * alpha in ARGBBlendRow_SSE2() 3068 pmullw xmm2, xmm3 // _r_b * alpha in ARGBBlendRow_SSE2() 3073 pmullw xmm1, xmm3 // _a_g * alpha in ARGBBlendRow_SSE2() 3099 pmullw xmm2, xmm3 // _r_b * alpha in ARGBBlendRow_SSE2() 3104 pmullw xmm1, xmm3 // _a_g * alpha in ARGBBlendRow_SSE2() 3169 pmullw xmm2, xmm3 // _r_b * alpha in ARGBBlendRow_SSSE3() 3174 pmullw xmm1, xmm3 // _a_g * alpha in ARGBBlendRow_SSSE3() [all …]
|