Lines Matching refs:__m64
110 __m64 tmp0_3l, tmp0_3h, tmp1_2l, tmp1_2h; \
111 __m64 tmp0l, tmp0h, tmp1l, tmp1h, tmp2l, tmp2h, tmp3l, tmp3h; \
112 __m64 z34l, z34h, z3l, z3h, z4l, z4h, z3, z4; \
113 __m64 out0l, out0h, out1l, out1h, out2l, out2h, out3l, out3h; \
114 __m64 out4l, out4h, out5l, out5h, out6l, out6h, out7l, out7h; \
256 __m64 col0l, col1l, col2l, col3l, col4l, col5l, col6l, col7l; \
257 __m64 quant0l, quant1l, quant2l, quant3l; \
258 __m64 quant4l, quant5l, quant6l, quant7l; \
259 __m64 z23, z2, z3, z23l, z23h; \
260 __m64 row01a, row01b, row01c, row01d, row23a, row23b, row23c, row23d; \
261 __m64 row0l, row0h, row1l, row1h, row2l, row2h, row3l, row3h; \
262 __m64 tmp0l, tmp0h, tmp1l, tmp1h, tmp2l, tmp2h, tmp3l, tmp3h; \
263 __m64 tmp10l, tmp10h, tmp11l, tmp11h, tmp12l, tmp12h, tmp13l, tmp13h; \
271 __m64 mm1, mm2; \
273 col0l = _mm_load_si64((__m64 *)&inptr[DCTSIZE * 0]); \
274 col1l = _mm_load_si64((__m64 *)&inptr[DCTSIZE * 1]); \
275 col2l = _mm_load_si64((__m64 *)&inptr[DCTSIZE * 2]); \
276 col3l = _mm_load_si64((__m64 *)&inptr[DCTSIZE * 3]); \
277 col4l = _mm_load_si64((__m64 *)&inptr[DCTSIZE * 4]); \
278 col5l = _mm_load_si64((__m64 *)&inptr[DCTSIZE * 5]); \
279 col6l = _mm_load_si64((__m64 *)&inptr[DCTSIZE * 6]); \
280 col7l = _mm_load_si64((__m64 *)&inptr[DCTSIZE * 7]); \
290 __m64 dcval, dcvall, dcvalh, row0, row1, row2, row3; \
294 quant0l = _mm_load_si64((__m64 *)&quantptr[DCTSIZE * 0]); \
307 _mm_store_si64((__m64 *)&wsptr[DCTSIZE * 0], row0); \
308 _mm_store_si64((__m64 *)&wsptr[DCTSIZE * 0 + 4], row0); \
309 _mm_store_si64((__m64 *)&wsptr[DCTSIZE * 1], row1); \
310 _mm_store_si64((__m64 *)&wsptr[DCTSIZE * 1 + 4], row1); \
311 _mm_store_si64((__m64 *)&wsptr[DCTSIZE * 2], row2); \
312 _mm_store_si64((__m64 *)&wsptr[DCTSIZE * 2 + 4], row2); \
313 _mm_store_si64((__m64 *)&wsptr[DCTSIZE * 3], row3); \
314 _mm_store_si64((__m64 *)&wsptr[DCTSIZE * 3 + 4], row3); \
332 col0l = _mm_load_si64((__m64 *)&inptr[DCTSIZE * 0]); /* (00 10 20 30) */ \
333 col2l = _mm_load_si64((__m64 *)&inptr[DCTSIZE * 2]); /* (02 12 22 32) */ \
334 col4l = _mm_load_si64((__m64 *)&inptr[DCTSIZE * 4]); /* (04 14 24 34) */ \
335 col6l = _mm_load_si64((__m64 *)&inptr[DCTSIZE * 6]); /* (06 16 26 36) */ \
337 quant0l = _mm_load_si64((__m64 *)&quantptr[DCTSIZE * 0]); \
338 quant2l = _mm_load_si64((__m64 *)&quantptr[DCTSIZE * 2]); \
339 quant4l = _mm_load_si64((__m64 *)&quantptr[DCTSIZE * 4]); \
340 quant6l = _mm_load_si64((__m64 *)&quantptr[DCTSIZE * 6]); \
379 col1l = _mm_load_si64((__m64 *)&inptr[DCTSIZE * 1]); /* (01 11 21 31) */ \
380 col3l = _mm_load_si64((__m64 *)&inptr[DCTSIZE * 3]); /* (03 13 23 33) */ \
381 col5l = _mm_load_si64((__m64 *)&inptr[DCTSIZE * 5]); /* (05 15 25 35) */ \
382 col7l = _mm_load_si64((__m64 *)&inptr[DCTSIZE * 7]); /* (07 17 27 37) */ \
384 quant1l = _mm_load_si64((__m64 *)&quantptr[DCTSIZE * 1]); \
385 quant3l = _mm_load_si64((__m64 *)&quantptr[DCTSIZE * 3]); \
386 quant5l = _mm_load_si64((__m64 *)&quantptr[DCTSIZE * 5]); \
387 quant7l = _mm_load_si64((__m64 *)&quantptr[DCTSIZE * 7]); \
423 _mm_store_si64((__m64 *)&wsptr[DCTSIZE * 0], row0l); \
424 _mm_store_si64((__m64 *)&wsptr[DCTSIZE * 0 + 4], row0h); \
425 _mm_store_si64((__m64 *)&wsptr[DCTSIZE * 1], row1l); \
426 _mm_store_si64((__m64 *)&wsptr[DCTSIZE * 1 + 4], row1h); \
427 _mm_store_si64((__m64 *)&wsptr[DCTSIZE * 2], row2l); \
428 _mm_store_si64((__m64 *)&wsptr[DCTSIZE * 2 + 4], row2h); \
429 _mm_store_si64((__m64 *)&wsptr[DCTSIZE * 3], row3l); \
430 _mm_store_si64((__m64 *)&wsptr[DCTSIZE * 3 + 4], row3h); \
434 __m64 row0l, row1l, row2l, row3l, row4l, row5l, row6l, row7l; \
435 __m64 z23, z23l, z23h; \
436 __m64 col0123a, col0123b, col0123c, col0123d; \
437 __m64 col01l, col01h, col23l, col23h, row06, row17, row24, row35; \
438 __m64 col0, col1, col2, col3; \
439 __m64 tmp0l, tmp0h, tmp1l, tmp1h, tmp2l, tmp2h, tmp3l, tmp3h; \
440 __m64 tmp10l, tmp10h, tmp11l, tmp11h, tmp12l, tmp12h, tmp13l, tmp13h; \
442 row0l = _mm_load_si64((__m64 *)&wsptr[DCTSIZE * 0]); /* (00 01 02 03) */ \
443 row1l = _mm_load_si64((__m64 *)&wsptr[DCTSIZE * 1]); /* (10 11 12 13) */ \
444 row2l = _mm_load_si64((__m64 *)&wsptr[DCTSIZE * 2]); /* (20 21 22 23) */ \
445 row3l = _mm_load_si64((__m64 *)&wsptr[DCTSIZE * 3]); /* (30 31 32 33) */ \
446 row4l = _mm_load_si64((__m64 *)&wsptr[DCTSIZE * 4]); /* (40 41 42 43) */ \
447 row5l = _mm_load_si64((__m64 *)&wsptr[DCTSIZE * 5]); /* (50 51 52 53) */ \
448 row6l = _mm_load_si64((__m64 *)&wsptr[DCTSIZE * 6]); /* (60 61 62 63) */ \
449 row7l = _mm_load_si64((__m64 *)&wsptr[DCTSIZE * 7]); /* (70 71 72 73) */ \
534 _mm_store_si64((__m64 *)(output_buf[ctr + 0] + output_col), col0); \
535 _mm_store_si64((__m64 *)(output_buf[ctr + 1] + output_col), col1); \
536 _mm_store_si64((__m64 *)(output_buf[ctr + 2] + output_col), col2); \
537 _mm_store_si64((__m64 *)(output_buf[ctr + 3] + output_col), col3); \
543 __m64 tmp0, tmp1, tmp2, tmp3; in jsimd_idct_islow_mmi()
544 __m64 out0, out1, out2, out3, out4, out5, out6, out7; in jsimd_idct_islow_mmi()