Lines Matching full:stride
26 static INLINE void dc_store_4x4(uint8_t *dst, ptrdiff_t stride, in dc_store_4x4() argument
29 for (i = 0; i < 4; ++i, dst += stride) { in dc_store_4x4()
34 void vpx_dc_predictor_4x4_neon(uint8_t *dst, ptrdiff_t stride, in vpx_dc_predictor_4x4_neon() argument
41 dc_store_4x4(dst, stride, dc); in vpx_dc_predictor_4x4_neon()
44 void vpx_dc_left_predictor_4x4_neon(uint8_t *dst, ptrdiff_t stride, in vpx_dc_left_predictor_4x4_neon() argument
49 dc_store_4x4(dst, stride, dc); in vpx_dc_left_predictor_4x4_neon()
52 void vpx_dc_top_predictor_4x4_neon(uint8_t *dst, ptrdiff_t stride, in vpx_dc_top_predictor_4x4_neon() argument
57 dc_store_4x4(dst, stride, dc); in vpx_dc_top_predictor_4x4_neon()
60 void vpx_dc_128_predictor_4x4_neon(uint8_t *dst, ptrdiff_t stride, in vpx_dc_128_predictor_4x4_neon() argument
65 dc_store_4x4(dst, stride, dc); in vpx_dc_128_predictor_4x4_neon()
75 static INLINE void dc_store_8x8(uint8_t *dst, ptrdiff_t stride, in dc_store_8x8() argument
78 for (i = 0; i < 8; ++i, dst += stride) { in dc_store_8x8()
83 void vpx_dc_predictor_8x8_neon(uint8_t *dst, ptrdiff_t stride, in vpx_dc_predictor_8x8_neon() argument
90 dc_store_8x8(dst, stride, dc); in vpx_dc_predictor_8x8_neon()
93 void vpx_dc_left_predictor_8x8_neon(uint8_t *dst, ptrdiff_t stride, in vpx_dc_left_predictor_8x8_neon() argument
98 dc_store_8x8(dst, stride, dc); in vpx_dc_left_predictor_8x8_neon()
101 void vpx_dc_top_predictor_8x8_neon(uint8_t *dst, ptrdiff_t stride, in vpx_dc_top_predictor_8x8_neon() argument
106 dc_store_8x8(dst, stride, dc); in vpx_dc_top_predictor_8x8_neon()
109 void vpx_dc_128_predictor_8x8_neon(uint8_t *dst, ptrdiff_t stride, in vpx_dc_128_predictor_8x8_neon() argument
114 dc_store_8x8(dst, stride, dc); in vpx_dc_128_predictor_8x8_neon()
124 static INLINE void dc_store_16x16(uint8_t *dst, ptrdiff_t stride, in dc_store_16x16() argument
127 for (i = 0; i < 16; ++i, dst += stride) { in dc_store_16x16()
132 void vpx_dc_predictor_16x16_neon(uint8_t *dst, ptrdiff_t stride, in vpx_dc_predictor_16x16_neon() argument
141 dc_store_16x16(dst, stride, dc); in vpx_dc_predictor_16x16_neon()
144 void vpx_dc_left_predictor_16x16_neon(uint8_t *dst, ptrdiff_t stride, in vpx_dc_left_predictor_16x16_neon() argument
150 dc_store_16x16(dst, stride, dc); in vpx_dc_left_predictor_16x16_neon()
153 void vpx_dc_top_predictor_16x16_neon(uint8_t *dst, ptrdiff_t stride, in vpx_dc_top_predictor_16x16_neon() argument
159 dc_store_16x16(dst, stride, dc); in vpx_dc_top_predictor_16x16_neon()
162 void vpx_dc_128_predictor_16x16_neon(uint8_t *dst, ptrdiff_t stride, in vpx_dc_128_predictor_16x16_neon() argument
168 dc_store_16x16(dst, stride, dc); in vpx_dc_128_predictor_16x16_neon()
181 static INLINE void dc_store_32x32(uint8_t *dst, ptrdiff_t stride, in dc_store_32x32() argument
184 for (i = 0; i < 32; ++i, dst += stride) { in dc_store_32x32()
190 void vpx_dc_predictor_32x32_neon(uint8_t *dst, ptrdiff_t stride, in vpx_dc_predictor_32x32_neon() argument
201 dc_store_32x32(dst, stride, dc); in vpx_dc_predictor_32x32_neon()
204 void vpx_dc_left_predictor_32x32_neon(uint8_t *dst, ptrdiff_t stride, in vpx_dc_left_predictor_32x32_neon() argument
210 dc_store_32x32(dst, stride, dc); in vpx_dc_left_predictor_32x32_neon()
213 void vpx_dc_top_predictor_32x32_neon(uint8_t *dst, ptrdiff_t stride, in vpx_dc_top_predictor_32x32_neon() argument
219 dc_store_32x32(dst, stride, dc); in vpx_dc_top_predictor_32x32_neon()
222 void vpx_dc_128_predictor_32x32_neon(uint8_t *dst, ptrdiff_t stride, in vpx_dc_128_predictor_32x32_neon() argument
228 dc_store_32x32(dst, stride, dc); in vpx_dc_128_predictor_32x32_neon()
233 void vpx_d45_predictor_4x4_neon(uint8_t *dst, ptrdiff_t stride, in vpx_d45_predictor_4x4_neon() argument
255 // stride=0 [ d0[0], d0[1], d0[2], d0[3] ] in vpx_d45_predictor_4x4_neon()
256 // stride=1 [ d0[1], d0[2], d0[3], d0[4] ] in vpx_d45_predictor_4x4_neon()
257 // stride=2 [ d0[2], d0[3], d0[4], d0[5] ] in vpx_d45_predictor_4x4_neon()
258 // stride=2 [ d0[3], d0[4], d0[5], above[7] ] in vpx_d45_predictor_4x4_neon()
259 store_u8_4x1(dst + 0 * stride, d0); in vpx_d45_predictor_4x4_neon()
260 store_u8_4x1(dst + 1 * stride, vext_u8(d0, d0, 1)); in vpx_d45_predictor_4x4_neon()
261 store_u8_4x1(dst + 2 * stride, vext_u8(d0, d0, 2)); in vpx_d45_predictor_4x4_neon()
262 store_u8_4x1(dst + 3 * stride, vext_u8(d0, d0, 3)); in vpx_d45_predictor_4x4_neon()
265 dst[3 * stride + 3] = a7; in vpx_d45_predictor_4x4_neon()
268 void vpx_d45_predictor_8x8_neon(uint8_t *dst, ptrdiff_t stride, in vpx_d45_predictor_8x8_neon() argument
290 vst1_u8(dst + 0 * stride, vext_u8(d0, a7, 1)); in vpx_d45_predictor_8x8_neon()
291 vst1_u8(dst + 1 * stride, vext_u8(d0, a7, 2)); in vpx_d45_predictor_8x8_neon()
292 vst1_u8(dst + 2 * stride, vext_u8(d0, a7, 3)); in vpx_d45_predictor_8x8_neon()
293 vst1_u8(dst + 3 * stride, vext_u8(d0, a7, 4)); in vpx_d45_predictor_8x8_neon()
294 vst1_u8(dst + 4 * stride, vext_u8(d0, a7, 5)); in vpx_d45_predictor_8x8_neon()
295 vst1_u8(dst + 5 * stride, vext_u8(d0, a7, 6)); in vpx_d45_predictor_8x8_neon()
296 vst1_u8(dst + 6 * stride, vext_u8(d0, a7, 7)); in vpx_d45_predictor_8x8_neon()
297 vst1_u8(dst + 7 * stride, a7); in vpx_d45_predictor_8x8_neon()
300 void vpx_d45_predictor_16x16_neon(uint8_t *dst, ptrdiff_t stride, in vpx_d45_predictor_16x16_neon() argument
322 vst1q_u8(dst + 0 * stride, vextq_u8(d0, a15, 1)); in vpx_d45_predictor_16x16_neon()
323 vst1q_u8(dst + 1 * stride, vextq_u8(d0, a15, 2)); in vpx_d45_predictor_16x16_neon()
324 vst1q_u8(dst + 2 * stride, vextq_u8(d0, a15, 3)); in vpx_d45_predictor_16x16_neon()
325 vst1q_u8(dst + 3 * stride, vextq_u8(d0, a15, 4)); in vpx_d45_predictor_16x16_neon()
326 vst1q_u8(dst + 4 * stride, vextq_u8(d0, a15, 5)); in vpx_d45_predictor_16x16_neon()
327 vst1q_u8(dst + 5 * stride, vextq_u8(d0, a15, 6)); in vpx_d45_predictor_16x16_neon()
328 vst1q_u8(dst + 6 * stride, vextq_u8(d0, a15, 7)); in vpx_d45_predictor_16x16_neon()
329 vst1q_u8(dst + 7 * stride, vextq_u8(d0, a15, 8)); in vpx_d45_predictor_16x16_neon()
330 vst1q_u8(dst + 8 * stride, vextq_u8(d0, a15, 9)); in vpx_d45_predictor_16x16_neon()
331 vst1q_u8(dst + 9 * stride, vextq_u8(d0, a15, 10)); in vpx_d45_predictor_16x16_neon()
332 vst1q_u8(dst + 10 * stride, vextq_u8(d0, a15, 11)); in vpx_d45_predictor_16x16_neon()
333 vst1q_u8(dst + 11 * stride, vextq_u8(d0, a15, 12)); in vpx_d45_predictor_16x16_neon()
334 vst1q_u8(dst + 12 * stride, vextq_u8(d0, a15, 13)); in vpx_d45_predictor_16x16_neon()
335 vst1q_u8(dst + 13 * stride, vextq_u8(d0, a15, 14)); in vpx_d45_predictor_16x16_neon()
336 vst1q_u8(dst + 14 * stride, vextq_u8(d0, a15, 15)); in vpx_d45_predictor_16x16_neon()
337 vst1q_u8(dst + 15 * stride, a15); in vpx_d45_predictor_16x16_neon()
340 void vpx_d45_predictor_32x32_neon(uint8_t *dst, ptrdiff_t stride, in vpx_d45_predictor_32x32_neon() argument
366 vst1q_u8(dst + 0 * stride + 0, vextq_u8(d0[0], d0[1], 1)); in vpx_d45_predictor_32x32_neon()
367 vst1q_u8(dst + 0 * stride + 16, vextq_u8(d0[1], a31, 1)); in vpx_d45_predictor_32x32_neon()
368 vst1q_u8(dst + 1 * stride + 0, vextq_u8(d0[0], d0[1], 2)); in vpx_d45_predictor_32x32_neon()
369 vst1q_u8(dst + 1 * stride + 16, vextq_u8(d0[1], a31, 2)); in vpx_d45_predictor_32x32_neon()
370 vst1q_u8(dst + 2 * stride + 0, vextq_u8(d0[0], d0[1], 3)); in vpx_d45_predictor_32x32_neon()
371 vst1q_u8(dst + 2 * stride + 16, vextq_u8(d0[1], a31, 3)); in vpx_d45_predictor_32x32_neon()
372 vst1q_u8(dst + 3 * stride + 0, vextq_u8(d0[0], d0[1], 4)); in vpx_d45_predictor_32x32_neon()
373 vst1q_u8(dst + 3 * stride + 16, vextq_u8(d0[1], a31, 4)); in vpx_d45_predictor_32x32_neon()
374 vst1q_u8(dst + 4 * stride + 0, vextq_u8(d0[0], d0[1], 5)); in vpx_d45_predictor_32x32_neon()
375 vst1q_u8(dst + 4 * stride + 16, vextq_u8(d0[1], a31, 5)); in vpx_d45_predictor_32x32_neon()
376 vst1q_u8(dst + 5 * stride + 0, vextq_u8(d0[0], d0[1], 6)); in vpx_d45_predictor_32x32_neon()
377 vst1q_u8(dst + 5 * stride + 16, vextq_u8(d0[1], a31, 6)); in vpx_d45_predictor_32x32_neon()
378 vst1q_u8(dst + 6 * stride + 0, vextq_u8(d0[0], d0[1], 7)); in vpx_d45_predictor_32x32_neon()
379 vst1q_u8(dst + 6 * stride + 16, vextq_u8(d0[1], a31, 7)); in vpx_d45_predictor_32x32_neon()
380 vst1q_u8(dst + 7 * stride + 0, vextq_u8(d0[0], d0[1], 8)); in vpx_d45_predictor_32x32_neon()
381 vst1q_u8(dst + 7 * stride + 16, vextq_u8(d0[1], a31, 8)); in vpx_d45_predictor_32x32_neon()
382 vst1q_u8(dst + 8 * stride + 0, vextq_u8(d0[0], d0[1], 9)); in vpx_d45_predictor_32x32_neon()
383 vst1q_u8(dst + 8 * stride + 16, vextq_u8(d0[1], a31, 9)); in vpx_d45_predictor_32x32_neon()
384 vst1q_u8(dst + 9 * stride + 0, vextq_u8(d0[0], d0[1], 10)); in vpx_d45_predictor_32x32_neon()
385 vst1q_u8(dst + 9 * stride + 16, vextq_u8(d0[1], a31, 10)); in vpx_d45_predictor_32x32_neon()
386 vst1q_u8(dst + 10 * stride + 0, vextq_u8(d0[0], d0[1], 11)); in vpx_d45_predictor_32x32_neon()
387 vst1q_u8(dst + 10 * stride + 16, vextq_u8(d0[1], a31, 11)); in vpx_d45_predictor_32x32_neon()
388 vst1q_u8(dst + 11 * stride + 0, vextq_u8(d0[0], d0[1], 12)); in vpx_d45_predictor_32x32_neon()
389 vst1q_u8(dst + 11 * stride + 16, vextq_u8(d0[1], a31, 12)); in vpx_d45_predictor_32x32_neon()
390 vst1q_u8(dst + 12 * stride + 0, vextq_u8(d0[0], d0[1], 13)); in vpx_d45_predictor_32x32_neon()
391 vst1q_u8(dst + 12 * stride + 16, vextq_u8(d0[1], a31, 13)); in vpx_d45_predictor_32x32_neon()
392 vst1q_u8(dst + 13 * stride + 0, vextq_u8(d0[0], d0[1], 14)); in vpx_d45_predictor_32x32_neon()
393 vst1q_u8(dst + 13 * stride + 16, vextq_u8(d0[1], a31, 14)); in vpx_d45_predictor_32x32_neon()
394 vst1q_u8(dst + 14 * stride + 0, vextq_u8(d0[0], d0[1], 15)); in vpx_d45_predictor_32x32_neon()
395 vst1q_u8(dst + 14 * stride + 16, vextq_u8(d0[1], a31, 15)); in vpx_d45_predictor_32x32_neon()
396 vst1q_u8(dst + 15 * stride + 0, d0[1]); in vpx_d45_predictor_32x32_neon()
397 vst1q_u8(dst + 15 * stride + 16, a31); in vpx_d45_predictor_32x32_neon()
399 vst1q_u8(dst + 16 * stride + 0, vextq_u8(d0[1], a31, 1)); in vpx_d45_predictor_32x32_neon()
400 vst1q_u8(dst + 16 * stride + 16, a31); in vpx_d45_predictor_32x32_neon()
401 vst1q_u8(dst + 17 * stride + 0, vextq_u8(d0[1], a31, 2)); in vpx_d45_predictor_32x32_neon()
402 vst1q_u8(dst + 17 * stride + 16, a31); in vpx_d45_predictor_32x32_neon()
403 vst1q_u8(dst + 18 * stride + 0, vextq_u8(d0[1], a31, 3)); in vpx_d45_predictor_32x32_neon()
404 vst1q_u8(dst + 18 * stride + 16, a31); in vpx_d45_predictor_32x32_neon()
405 vst1q_u8(dst + 19 * stride + 0, vextq_u8(d0[1], a31, 4)); in vpx_d45_predictor_32x32_neon()
406 vst1q_u8(dst + 19 * stride + 16, a31); in vpx_d45_predictor_32x32_neon()
407 vst1q_u8(dst + 20 * stride + 0, vextq_u8(d0[1], a31, 5)); in vpx_d45_predictor_32x32_neon()
408 vst1q_u8(dst + 20 * stride + 16, a31); in vpx_d45_predictor_32x32_neon()
409 vst1q_u8(dst + 21 * stride + 0, vextq_u8(d0[1], a31, 6)); in vpx_d45_predictor_32x32_neon()
410 vst1q_u8(dst + 21 * stride + 16, a31); in vpx_d45_predictor_32x32_neon()
411 vst1q_u8(dst + 22 * stride + 0, vextq_u8(d0[1], a31, 7)); in vpx_d45_predictor_32x32_neon()
412 vst1q_u8(dst + 22 * stride + 16, a31); in vpx_d45_predictor_32x32_neon()
413 vst1q_u8(dst + 23 * stride + 0, vextq_u8(d0[1], a31, 8)); in vpx_d45_predictor_32x32_neon()
414 vst1q_u8(dst + 23 * stride + 16, a31); in vpx_d45_predictor_32x32_neon()
415 vst1q_u8(dst + 24 * stride + 0, vextq_u8(d0[1], a31, 9)); in vpx_d45_predictor_32x32_neon()
416 vst1q_u8(dst + 24 * stride + 16, a31); in vpx_d45_predictor_32x32_neon()
417 vst1q_u8(dst + 25 * stride + 0, vextq_u8(d0[1], a31, 10)); in vpx_d45_predictor_32x32_neon()
418 vst1q_u8(dst + 25 * stride + 16, a31); in vpx_d45_predictor_32x32_neon()
419 vst1q_u8(dst + 26 * stride + 0, vextq_u8(d0[1], a31, 11)); in vpx_d45_predictor_32x32_neon()
420 vst1q_u8(dst + 26 * stride + 16, a31); in vpx_d45_predictor_32x32_neon()
421 vst1q_u8(dst + 27 * stride + 0, vextq_u8(d0[1], a31, 12)); in vpx_d45_predictor_32x32_neon()
422 vst1q_u8(dst + 27 * stride + 16, a31); in vpx_d45_predictor_32x32_neon()
423 vst1q_u8(dst + 28 * stride + 0, vextq_u8(d0[1], a31, 13)); in vpx_d45_predictor_32x32_neon()
424 vst1q_u8(dst + 28 * stride + 16, a31); in vpx_d45_predictor_32x32_neon()
425 vst1q_u8(dst + 29 * stride + 0, vextq_u8(d0[1], a31, 14)); in vpx_d45_predictor_32x32_neon()
426 vst1q_u8(dst + 29 * stride + 16, a31); in vpx_d45_predictor_32x32_neon()
427 vst1q_u8(dst + 30 * stride + 0, vextq_u8(d0[1], a31, 15)); in vpx_d45_predictor_32x32_neon()
428 vst1q_u8(dst + 30 * stride + 16, a31); in vpx_d45_predictor_32x32_neon()
429 vst1q_u8(dst + 31 * stride + 0, a31); in vpx_d45_predictor_32x32_neon()
430 vst1q_u8(dst + 31 * stride + 16, a31); in vpx_d45_predictor_32x32_neon()
435 void vpx_d63_predictor_4x4_neon(uint8_t *dst, ptrdiff_t stride, in vpx_d63_predictor_4x4_neon() argument
450 store_u8_4x1(dst + 0 * stride, d0); in vpx_d63_predictor_4x4_neon()
451 store_u8_4x1(dst + 1 * stride, d1); in vpx_d63_predictor_4x4_neon()
452 store_u8_4x1(dst + 2 * stride, d2); in vpx_d63_predictor_4x4_neon()
453 store_u8_4x1(dst + 3 * stride, d3); in vpx_d63_predictor_4x4_neon()
456 void vpx_d63_predictor_8x8_neon(uint8_t *dst, ptrdiff_t stride, in vpx_d63_predictor_8x8_neon() argument
469 vst1_u8(dst + 0 * stride, d0); in vpx_d63_predictor_8x8_neon()
470 vst1_u8(dst + 1 * stride, d1); in vpx_d63_predictor_8x8_neon()
475 vst1_u8(dst + 2 * stride, vext_u8(d0, a7, 2)); in vpx_d63_predictor_8x8_neon()
476 vst1_u8(dst + 3 * stride, vext_u8(d1, a7, 2)); in vpx_d63_predictor_8x8_neon()
477 vst1_u8(dst + 4 * stride, vext_u8(d0, a7, 3)); in vpx_d63_predictor_8x8_neon()
478 vst1_u8(dst + 5 * stride, vext_u8(d1, a7, 3)); in vpx_d63_predictor_8x8_neon()
479 vst1_u8(dst + 6 * stride, vext_u8(d0, a7, 4)); in vpx_d63_predictor_8x8_neon()
480 vst1_u8(dst + 7 * stride, vext_u8(d1, a7, 4)); in vpx_d63_predictor_8x8_neon()
483 void vpx_d63_predictor_16x16_neon(uint8_t *dst, ptrdiff_t stride, in vpx_d63_predictor_16x16_neon() argument
496 vst1q_u8(dst + 0 * stride, d0); in vpx_d63_predictor_16x16_neon()
497 vst1q_u8(dst + 1 * stride, d1); in vpx_d63_predictor_16x16_neon()
502 vst1q_u8(dst + 2 * stride, vextq_u8(d0, a15, 2)); in vpx_d63_predictor_16x16_neon()
503 vst1q_u8(dst + 3 * stride, vextq_u8(d1, a15, 2)); in vpx_d63_predictor_16x16_neon()
504 vst1q_u8(dst + 4 * stride, vextq_u8(d0, a15, 3)); in vpx_d63_predictor_16x16_neon()
505 vst1q_u8(dst + 5 * stride, vextq_u8(d1, a15, 3)); in vpx_d63_predictor_16x16_neon()
506 vst1q_u8(dst + 6 * stride, vextq_u8(d0, a15, 4)); in vpx_d63_predictor_16x16_neon()
507 vst1q_u8(dst + 7 * stride, vextq_u8(d1, a15, 4)); in vpx_d63_predictor_16x16_neon()
508 vst1q_u8(dst + 8 * stride, vextq_u8(d0, a15, 5)); in vpx_d63_predictor_16x16_neon()
509 vst1q_u8(dst + 9 * stride, vextq_u8(d1, a15, 5)); in vpx_d63_predictor_16x16_neon()
510 vst1q_u8(dst + 10 * stride, vextq_u8(d0, a15, 6)); in vpx_d63_predictor_16x16_neon()
511 vst1q_u8(dst + 11 * stride, vextq_u8(d1, a15, 6)); in vpx_d63_predictor_16x16_neon()
512 vst1q_u8(dst + 12 * stride, vextq_u8(d0, a15, 7)); in vpx_d63_predictor_16x16_neon()
513 vst1q_u8(dst + 13 * stride, vextq_u8(d1, a15, 7)); in vpx_d63_predictor_16x16_neon()
514 vst1q_u8(dst + 14 * stride, vextq_u8(d0, a15, 8)); in vpx_d63_predictor_16x16_neon()
515 vst1q_u8(dst + 15 * stride, vextq_u8(d1, a15, 8)); in vpx_d63_predictor_16x16_neon()
518 void vpx_d63_predictor_32x32_neon(uint8_t *dst, ptrdiff_t stride, in vpx_d63_predictor_32x32_neon() argument
536 vst1q_u8(dst + 0 * stride + 0, d0_lo); in vpx_d63_predictor_32x32_neon()
537 vst1q_u8(dst + 0 * stride + 16, d0_hi); in vpx_d63_predictor_32x32_neon()
538 vst1q_u8(dst + 1 * stride + 0, d1_lo); in vpx_d63_predictor_32x32_neon()
539 vst1q_u8(dst + 1 * stride + 16, d1_hi); in vpx_d63_predictor_32x32_neon()
546 vst1q_u8(dst + 2 * stride + 0, vextq_u8(d0_lo, d0_hi, 2)); in vpx_d63_predictor_32x32_neon()
547 vst1q_u8(dst + 2 * stride + 16, vextq_u8(d0_hi, a31, 2)); in vpx_d63_predictor_32x32_neon()
548 vst1q_u8(dst + 3 * stride + 0, vextq_u8(d1_lo, d1_hi, 2)); in vpx_d63_predictor_32x32_neon()
549 vst1q_u8(dst + 3 * stride + 16, vextq_u8(d1_hi, a31, 2)); in vpx_d63_predictor_32x32_neon()
550 vst1q_u8(dst + 4 * stride + 0, vextq_u8(d0_lo, d0_hi, 3)); in vpx_d63_predictor_32x32_neon()
551 vst1q_u8(dst + 4 * stride + 16, vextq_u8(d0_hi, a31, 3)); in vpx_d63_predictor_32x32_neon()
552 vst1q_u8(dst + 5 * stride + 0, vextq_u8(d1_lo, d1_hi, 3)); in vpx_d63_predictor_32x32_neon()
553 vst1q_u8(dst + 5 * stride + 16, vextq_u8(d1_hi, a31, 3)); in vpx_d63_predictor_32x32_neon()
554 vst1q_u8(dst + 6 * stride + 0, vextq_u8(d0_lo, d0_hi, 4)); in vpx_d63_predictor_32x32_neon()
555 vst1q_u8(dst + 6 * stride + 16, vextq_u8(d0_hi, a31, 4)); in vpx_d63_predictor_32x32_neon()
556 vst1q_u8(dst + 7 * stride + 0, vextq_u8(d1_lo, d1_hi, 4)); in vpx_d63_predictor_32x32_neon()
557 vst1q_u8(dst + 7 * stride + 16, vextq_u8(d1_hi, a31, 4)); in vpx_d63_predictor_32x32_neon()
558 vst1q_u8(dst + 8 * stride + 0, vextq_u8(d0_lo, d0_hi, 5)); in vpx_d63_predictor_32x32_neon()
559 vst1q_u8(dst + 8 * stride + 16, vextq_u8(d0_hi, a31, 5)); in vpx_d63_predictor_32x32_neon()
560 vst1q_u8(dst + 9 * stride + 0, vextq_u8(d1_lo, d1_hi, 5)); in vpx_d63_predictor_32x32_neon()
561 vst1q_u8(dst + 9 * stride + 16, vextq_u8(d1_hi, a31, 5)); in vpx_d63_predictor_32x32_neon()
562 vst1q_u8(dst + 10 * stride + 0, vextq_u8(d0_lo, d0_hi, 6)); in vpx_d63_predictor_32x32_neon()
563 vst1q_u8(dst + 10 * stride + 16, vextq_u8(d0_hi, a31, 6)); in vpx_d63_predictor_32x32_neon()
564 vst1q_u8(dst + 11 * stride + 0, vextq_u8(d1_lo, d1_hi, 6)); in vpx_d63_predictor_32x32_neon()
565 vst1q_u8(dst + 11 * stride + 16, vextq_u8(d1_hi, a31, 6)); in vpx_d63_predictor_32x32_neon()
566 vst1q_u8(dst + 12 * stride + 0, vextq_u8(d0_lo, d0_hi, 7)); in vpx_d63_predictor_32x32_neon()
567 vst1q_u8(dst + 12 * stride + 16, vextq_u8(d0_hi, a31, 7)); in vpx_d63_predictor_32x32_neon()
568 vst1q_u8(dst + 13 * stride + 0, vextq_u8(d1_lo, d1_hi, 7)); in vpx_d63_predictor_32x32_neon()
569 vst1q_u8(dst + 13 * stride + 16, vextq_u8(d1_hi, a31, 7)); in vpx_d63_predictor_32x32_neon()
570 vst1q_u8(dst + 14 * stride + 0, vextq_u8(d0_lo, d0_hi, 8)); in vpx_d63_predictor_32x32_neon()
571 vst1q_u8(dst + 14 * stride + 16, vextq_u8(d0_hi, a31, 8)); in vpx_d63_predictor_32x32_neon()
572 vst1q_u8(dst + 15 * stride + 0, vextq_u8(d1_lo, d1_hi, 8)); in vpx_d63_predictor_32x32_neon()
573 vst1q_u8(dst + 15 * stride + 16, vextq_u8(d1_hi, a31, 8)); in vpx_d63_predictor_32x32_neon()
574 vst1q_u8(dst + 16 * stride + 0, vextq_u8(d0_lo, d0_hi, 9)); in vpx_d63_predictor_32x32_neon()
575 vst1q_u8(dst + 16 * stride + 16, vextq_u8(d0_hi, a31, 9)); in vpx_d63_predictor_32x32_neon()
576 vst1q_u8(dst + 17 * stride + 0, vextq_u8(d1_lo, d1_hi, 9)); in vpx_d63_predictor_32x32_neon()
577 vst1q_u8(dst + 17 * stride + 16, vextq_u8(d1_hi, a31, 9)); in vpx_d63_predictor_32x32_neon()
578 vst1q_u8(dst + 18 * stride + 0, vextq_u8(d0_lo, d0_hi, 10)); in vpx_d63_predictor_32x32_neon()
579 vst1q_u8(dst + 18 * stride + 16, vextq_u8(d0_hi, a31, 10)); in vpx_d63_predictor_32x32_neon()
580 vst1q_u8(dst + 19 * stride + 0, vextq_u8(d1_lo, d1_hi, 10)); in vpx_d63_predictor_32x32_neon()
581 vst1q_u8(dst + 19 * stride + 16, vextq_u8(d1_hi, a31, 10)); in vpx_d63_predictor_32x32_neon()
582 vst1q_u8(dst + 20 * stride + 0, vextq_u8(d0_lo, d0_hi, 11)); in vpx_d63_predictor_32x32_neon()
583 vst1q_u8(dst + 20 * stride + 16, vextq_u8(d0_hi, a31, 11)); in vpx_d63_predictor_32x32_neon()
584 vst1q_u8(dst + 21 * stride + 0, vextq_u8(d1_lo, d1_hi, 11)); in vpx_d63_predictor_32x32_neon()
585 vst1q_u8(dst + 21 * stride + 16, vextq_u8(d1_hi, a31, 11)); in vpx_d63_predictor_32x32_neon()
586 vst1q_u8(dst + 22 * stride + 0, vextq_u8(d0_lo, d0_hi, 12)); in vpx_d63_predictor_32x32_neon()
587 vst1q_u8(dst + 22 * stride + 16, vextq_u8(d0_hi, a31, 12)); in vpx_d63_predictor_32x32_neon()
588 vst1q_u8(dst + 23 * stride + 0, vextq_u8(d1_lo, d1_hi, 12)); in vpx_d63_predictor_32x32_neon()
589 vst1q_u8(dst + 23 * stride + 16, vextq_u8(d1_hi, a31, 12)); in vpx_d63_predictor_32x32_neon()
590 vst1q_u8(dst + 24 * stride + 0, vextq_u8(d0_lo, d0_hi, 13)); in vpx_d63_predictor_32x32_neon()
591 vst1q_u8(dst + 24 * stride + 16, vextq_u8(d0_hi, a31, 13)); in vpx_d63_predictor_32x32_neon()
592 vst1q_u8(dst + 25 * stride + 0, vextq_u8(d1_lo, d1_hi, 13)); in vpx_d63_predictor_32x32_neon()
593 vst1q_u8(dst + 25 * stride + 16, vextq_u8(d1_hi, a31, 13)); in vpx_d63_predictor_32x32_neon()
594 vst1q_u8(dst + 26 * stride + 0, vextq_u8(d0_lo, d0_hi, 14)); in vpx_d63_predictor_32x32_neon()
595 vst1q_u8(dst + 26 * stride + 16, vextq_u8(d0_hi, a31, 14)); in vpx_d63_predictor_32x32_neon()
596 vst1q_u8(dst + 27 * stride + 0, vextq_u8(d1_lo, d1_hi, 14)); in vpx_d63_predictor_32x32_neon()
597 vst1q_u8(dst + 27 * stride + 16, vextq_u8(d1_hi, a31, 14)); in vpx_d63_predictor_32x32_neon()
598 vst1q_u8(dst + 28 * stride + 0, vextq_u8(d0_lo, d0_hi, 15)); in vpx_d63_predictor_32x32_neon()
599 vst1q_u8(dst + 28 * stride + 16, vextq_u8(d0_hi, a31, 15)); in vpx_d63_predictor_32x32_neon()
600 vst1q_u8(dst + 29 * stride + 0, vextq_u8(d1_lo, d1_hi, 15)); in vpx_d63_predictor_32x32_neon()
601 vst1q_u8(dst + 29 * stride + 16, vextq_u8(d1_hi, a31, 15)); in vpx_d63_predictor_32x32_neon()
602 vst1q_u8(dst + 30 * stride + 0, d0_hi); in vpx_d63_predictor_32x32_neon()
603 vst1q_u8(dst + 30 * stride + 16, a31); in vpx_d63_predictor_32x32_neon()
604 vst1q_u8(dst + 31 * stride + 0, d1_hi); in vpx_d63_predictor_32x32_neon()
605 vst1q_u8(dst + 31 * stride + 16, a31); in vpx_d63_predictor_32x32_neon()
610 void vpx_d117_predictor_4x4_neon(uint8_t *dst, ptrdiff_t stride, in vpx_d117_predictor_4x4_neon() argument
628 store_u8_4x1(dst + 0 * stride, d0); in vpx_d117_predictor_4x4_neon()
629 store_u8_4x1(dst + 1 * stride, d1); in vpx_d117_predictor_4x4_neon()
630 store_u8_4x1(dst + 2 * stride, d2); in vpx_d117_predictor_4x4_neon()
631 store_u8_4x1(dst + 3 * stride, d3); in vpx_d117_predictor_4x4_neon()
634 void vpx_d117_predictor_8x8_neon(uint8_t *dst, ptrdiff_t stride, in vpx_d117_predictor_8x8_neon() argument
684 // stride=0 [ d0[0], d0[1], d0[2], d0[3], d0[4], d0[5], d0[6], d0[7] ] in vpx_d117_predictor_8x8_neon()
685 // stride=1 [ d1[0], d1[1], d1[2], d1[3], d1[4], d1[5], d1[6], d1[7] ] in vpx_d117_predictor_8x8_neon()
686 // stride=2 [ col0[7], d0[0], d0[1], d0[2], d0[3], d0[4], d0[5], d0[6] ] in vpx_d117_predictor_8x8_neon()
687 // stride=3 [ col0[6], d1[0], d1[1], d1[2], d1[3], d1[4], d1[5], d1[6] ] in vpx_d117_predictor_8x8_neon()
688 // stride=4 [ col0[5], col0[7], d0[0], d0[1], d0[2], d0[3], d0[4], d0[5] ] in vpx_d117_predictor_8x8_neon()
689 // stride=5 [ col0[4], col0[6], d1[0], d1[1], d1[2], d1[3], d1[4], d1[5] ] in vpx_d117_predictor_8x8_neon()
690 // stride=6 [ col0[3], col0[5], col0[7], d0[0], d0[1], d0[2], d0[3], d0[4] ] in vpx_d117_predictor_8x8_neon()
691 // stride=7 [ col0[2], col0[4], col0[6], d1[0], d1[1], d1[2], d1[3], d1[4] ] in vpx_d117_predictor_8x8_neon()
692 vst1_u8(dst + 0 * stride, d0); in vpx_d117_predictor_8x8_neon()
693 vst1_u8(dst + 1 * stride, d1); in vpx_d117_predictor_8x8_neon()
694 vst1_u8(dst + 2 * stride, vext_u8(col0_even, d0, 7)); in vpx_d117_predictor_8x8_neon()
695 vst1_u8(dst + 3 * stride, vext_u8(col0_odd, d1, 7)); in vpx_d117_predictor_8x8_neon()
696 vst1_u8(dst + 4 * stride, vext_u8(col0_even, d0, 6)); in vpx_d117_predictor_8x8_neon()
697 vst1_u8(dst + 5 * stride, vext_u8(col0_odd, d1, 6)); in vpx_d117_predictor_8x8_neon()
698 vst1_u8(dst + 6 * stride, vext_u8(col0_even, d0, 5)); in vpx_d117_predictor_8x8_neon()
699 vst1_u8(dst + 7 * stride, vext_u8(col0_odd, d1, 5)); in vpx_d117_predictor_8x8_neon()
702 void vpx_d117_predictor_16x16_neon(uint8_t *dst, ptrdiff_t stride, in vpx_d117_predictor_16x16_neon() argument
733 vst1q_u8(dst + 0 * stride, d0); in vpx_d117_predictor_16x16_neon()
734 vst1q_u8(dst + 1 * stride, d1); in vpx_d117_predictor_16x16_neon()
735 vst1q_u8(dst + 2 * stride, vextq_u8(col0_even, d0, 15)); in vpx_d117_predictor_16x16_neon()
736 vst1q_u8(dst + 3 * stride, vextq_u8(col0_odd, d1, 15)); in vpx_d117_predictor_16x16_neon()
737 vst1q_u8(dst + 4 * stride, vextq_u8(col0_even, d0, 14)); in vpx_d117_predictor_16x16_neon()
738 vst1q_u8(dst + 5 * stride, vextq_u8(col0_odd, d1, 14)); in vpx_d117_predictor_16x16_neon()
739 vst1q_u8(dst + 6 * stride, vextq_u8(col0_even, d0, 13)); in vpx_d117_predictor_16x16_neon()
740 vst1q_u8(dst + 7 * stride, vextq_u8(col0_odd, d1, 13)); in vpx_d117_predictor_16x16_neon()
741 vst1q_u8(dst + 8 * stride, vextq_u8(col0_even, d0, 12)); in vpx_d117_predictor_16x16_neon()
742 vst1q_u8(dst + 9 * stride, vextq_u8(col0_odd, d1, 12)); in vpx_d117_predictor_16x16_neon()
743 vst1q_u8(dst + 10 * stride, vextq_u8(col0_even, d0, 11)); in vpx_d117_predictor_16x16_neon()
744 vst1q_u8(dst + 11 * stride, vextq_u8(col0_odd, d1, 11)); in vpx_d117_predictor_16x16_neon()
745 vst1q_u8(dst + 12 * stride, vextq_u8(col0_even, d0, 10)); in vpx_d117_predictor_16x16_neon()
746 vst1q_u8(dst + 13 * stride, vextq_u8(col0_odd, d1, 10)); in vpx_d117_predictor_16x16_neon()
747 vst1q_u8(dst + 14 * stride, vextq_u8(col0_even, d0, 9)); in vpx_d117_predictor_16x16_neon()
748 vst1q_u8(dst + 15 * stride, vextq_u8(col0_odd, d1, 9)); in vpx_d117_predictor_16x16_neon()
751 void vpx_d117_predictor_32x32_neon(uint8_t *dst, ptrdiff_t stride, in vpx_d117_predictor_32x32_neon() argument
794 vst1q_u8(dst + 0 * stride + 0, d0_lo); in vpx_d117_predictor_32x32_neon()
795 vst1q_u8(dst + 0 * stride + 16, d0_hi); in vpx_d117_predictor_32x32_neon()
796 vst1q_u8(dst + 1 * stride + 0, d1_lo); in vpx_d117_predictor_32x32_neon()
797 vst1q_u8(dst + 1 * stride + 16, d1_hi); in vpx_d117_predictor_32x32_neon()
798 vst1q_u8(dst + 2 * stride + 0, vextq_u8(col0_even, d0_lo, 15)); in vpx_d117_predictor_32x32_neon()
799 vst1q_u8(dst + 2 * stride + 16, vextq_u8(d0_lo, d0_hi, 15)); in vpx_d117_predictor_32x32_neon()
800 vst1q_u8(dst + 3 * stride + 0, vextq_u8(col0_odd, d1_lo, 15)); in vpx_d117_predictor_32x32_neon()
801 vst1q_u8(dst + 3 * stride + 16, vextq_u8(d1_lo, d1_hi, 15)); in vpx_d117_predictor_32x32_neon()
802 vst1q_u8(dst + 4 * stride + 0, vextq_u8(col0_even, d0_lo, 14)); in vpx_d117_predictor_32x32_neon()
803 vst1q_u8(dst + 4 * stride + 16, vextq_u8(d0_lo, d0_hi, 14)); in vpx_d117_predictor_32x32_neon()
804 vst1q_u8(dst + 5 * stride + 0, vextq_u8(col0_odd, d1_lo, 14)); in vpx_d117_predictor_32x32_neon()
805 vst1q_u8(dst + 5 * stride + 16, vextq_u8(d1_lo, d1_hi, 14)); in vpx_d117_predictor_32x32_neon()
806 vst1q_u8(dst + 6 * stride + 0, vextq_u8(col0_even, d0_lo, 13)); in vpx_d117_predictor_32x32_neon()
807 vst1q_u8(dst + 6 * stride + 16, vextq_u8(d0_lo, d0_hi, 13)); in vpx_d117_predictor_32x32_neon()
808 vst1q_u8(dst + 7 * stride + 0, vextq_u8(col0_odd, d1_lo, 13)); in vpx_d117_predictor_32x32_neon()
809 vst1q_u8(dst + 7 * stride + 16, vextq_u8(d1_lo, d1_hi, 13)); in vpx_d117_predictor_32x32_neon()
810 vst1q_u8(dst + 8 * stride + 0, vextq_u8(col0_even, d0_lo, 12)); in vpx_d117_predictor_32x32_neon()
811 vst1q_u8(dst + 8 * stride + 16, vextq_u8(d0_lo, d0_hi, 12)); in vpx_d117_predictor_32x32_neon()
812 vst1q_u8(dst + 9 * stride + 0, vextq_u8(col0_odd, d1_lo, 12)); in vpx_d117_predictor_32x32_neon()
813 vst1q_u8(dst + 9 * stride + 16, vextq_u8(d1_lo, d1_hi, 12)); in vpx_d117_predictor_32x32_neon()
814 vst1q_u8(dst + 10 * stride + 0, vextq_u8(col0_even, d0_lo, 11)); in vpx_d117_predictor_32x32_neon()
815 vst1q_u8(dst + 10 * stride + 16, vextq_u8(d0_lo, d0_hi, 11)); in vpx_d117_predictor_32x32_neon()
816 vst1q_u8(dst + 11 * stride + 0, vextq_u8(col0_odd, d1_lo, 11)); in vpx_d117_predictor_32x32_neon()
817 vst1q_u8(dst + 11 * stride + 16, vextq_u8(d1_lo, d1_hi, 11)); in vpx_d117_predictor_32x32_neon()
818 vst1q_u8(dst + 12 * stride + 0, vextq_u8(col0_even, d0_lo, 10)); in vpx_d117_predictor_32x32_neon()
819 vst1q_u8(dst + 12 * stride + 16, vextq_u8(d0_lo, d0_hi, 10)); in vpx_d117_predictor_32x32_neon()
820 vst1q_u8(dst + 13 * stride + 0, vextq_u8(col0_odd, d1_lo, 10)); in vpx_d117_predictor_32x32_neon()
821 vst1q_u8(dst + 13 * stride + 16, vextq_u8(d1_lo, d1_hi, 10)); in vpx_d117_predictor_32x32_neon()
822 vst1q_u8(dst + 14 * stride + 0, vextq_u8(col0_even, d0_lo, 9)); in vpx_d117_predictor_32x32_neon()
823 vst1q_u8(dst + 14 * stride + 16, vextq_u8(d0_lo, d0_hi, 9)); in vpx_d117_predictor_32x32_neon()
824 vst1q_u8(dst + 15 * stride + 0, vextq_u8(col0_odd, d1_lo, 9)); in vpx_d117_predictor_32x32_neon()
825 vst1q_u8(dst + 15 * stride + 16, vextq_u8(d1_lo, d1_hi, 9)); in vpx_d117_predictor_32x32_neon()
826 vst1q_u8(dst + 16 * stride + 0, vextq_u8(col0_even, d0_lo, 8)); in vpx_d117_predictor_32x32_neon()
827 vst1q_u8(dst + 16 * stride + 16, vextq_u8(d0_lo, d0_hi, 8)); in vpx_d117_predictor_32x32_neon()
828 vst1q_u8(dst + 17 * stride + 0, vextq_u8(col0_odd, d1_lo, 8)); in vpx_d117_predictor_32x32_neon()
829 vst1q_u8(dst + 17 * stride + 16, vextq_u8(d1_lo, d1_hi, 8)); in vpx_d117_predictor_32x32_neon()
830 vst1q_u8(dst + 18 * stride + 0, vextq_u8(col0_even, d0_lo, 7)); in vpx_d117_predictor_32x32_neon()
831 vst1q_u8(dst + 18 * stride + 16, vextq_u8(d0_lo, d0_hi, 7)); in vpx_d117_predictor_32x32_neon()
832 vst1q_u8(dst + 19 * stride + 0, vextq_u8(col0_odd, d1_lo, 7)); in vpx_d117_predictor_32x32_neon()
833 vst1q_u8(dst + 19 * stride + 16, vextq_u8(d1_lo, d1_hi, 7)); in vpx_d117_predictor_32x32_neon()
834 vst1q_u8(dst + 20 * stride + 0, vextq_u8(col0_even, d0_lo, 6)); in vpx_d117_predictor_32x32_neon()
835 vst1q_u8(dst + 20 * stride + 16, vextq_u8(d0_lo, d0_hi, 6)); in vpx_d117_predictor_32x32_neon()
836 vst1q_u8(dst + 21 * stride + 0, vextq_u8(col0_odd, d1_lo, 6)); in vpx_d117_predictor_32x32_neon()
837 vst1q_u8(dst + 21 * stride + 16, vextq_u8(d1_lo, d1_hi, 6)); in vpx_d117_predictor_32x32_neon()
838 vst1q_u8(dst + 22 * stride + 0, vextq_u8(col0_even, d0_lo, 5)); in vpx_d117_predictor_32x32_neon()
839 vst1q_u8(dst + 22 * stride + 16, vextq_u8(d0_lo, d0_hi, 5)); in vpx_d117_predictor_32x32_neon()
840 vst1q_u8(dst + 23 * stride + 0, vextq_u8(col0_odd, d1_lo, 5)); in vpx_d117_predictor_32x32_neon()
841 vst1q_u8(dst + 23 * stride + 16, vextq_u8(d1_lo, d1_hi, 5)); in vpx_d117_predictor_32x32_neon()
842 vst1q_u8(dst + 24 * stride + 0, vextq_u8(col0_even, d0_lo, 4)); in vpx_d117_predictor_32x32_neon()
843 vst1q_u8(dst + 24 * stride + 16, vextq_u8(d0_lo, d0_hi, 4)); in vpx_d117_predictor_32x32_neon()
844 vst1q_u8(dst + 25 * stride + 0, vextq_u8(col0_odd, d1_lo, 4)); in vpx_d117_predictor_32x32_neon()
845 vst1q_u8(dst + 25 * stride + 16, vextq_u8(d1_lo, d1_hi, 4)); in vpx_d117_predictor_32x32_neon()
846 vst1q_u8(dst + 26 * stride + 0, vextq_u8(col0_even, d0_lo, 3)); in vpx_d117_predictor_32x32_neon()
847 vst1q_u8(dst + 26 * stride + 16, vextq_u8(d0_lo, d0_hi, 3)); in vpx_d117_predictor_32x32_neon()
848 vst1q_u8(dst + 27 * stride + 0, vextq_u8(col0_odd, d1_lo, 3)); in vpx_d117_predictor_32x32_neon()
849 vst1q_u8(dst + 27 * stride + 16, vextq_u8(d1_lo, d1_hi, 3)); in vpx_d117_predictor_32x32_neon()
850 vst1q_u8(dst + 28 * stride + 0, vextq_u8(col0_even, d0_lo, 2)); in vpx_d117_predictor_32x32_neon()
851 vst1q_u8(dst + 28 * stride + 16, vextq_u8(d0_lo, d0_hi, 2)); in vpx_d117_predictor_32x32_neon()
852 vst1q_u8(dst + 29 * stride + 0, vextq_u8(col0_odd, d1_lo, 2)); in vpx_d117_predictor_32x32_neon()
853 vst1q_u8(dst + 29 * stride + 16, vextq_u8(d1_lo, d1_hi, 2)); in vpx_d117_predictor_32x32_neon()
854 vst1q_u8(dst + 30 * stride + 0, vextq_u8(col0_even, d0_lo, 1)); in vpx_d117_predictor_32x32_neon()
855 vst1q_u8(dst + 30 * stride + 16, vextq_u8(d0_lo, d0_hi, 1)); in vpx_d117_predictor_32x32_neon()
856 vst1q_u8(dst + 31 * stride + 0, vextq_u8(col0_odd, d1_lo, 1)); in vpx_d117_predictor_32x32_neon()
857 vst1q_u8(dst + 31 * stride + 16, vextq_u8(d1_lo, d1_hi, 1)); in vpx_d117_predictor_32x32_neon()
862 void vpx_d135_predictor_4x4_neon(uint8_t *dst, ptrdiff_t stride, in vpx_d135_predictor_4x4_neon() argument
873 store_u8_4x1(dst + 0 * stride, vext_u8(avg2, avg2, 3)); in vpx_d135_predictor_4x4_neon()
874 store_u8_4x1(dst + 1 * stride, vext_u8(avg2, avg2, 2)); in vpx_d135_predictor_4x4_neon()
875 store_u8_4x1(dst + 2 * stride, vext_u8(avg2, avg2, 1)); in vpx_d135_predictor_4x4_neon()
876 store_u8_4x1(dst + 3 * stride, avg2); in vpx_d135_predictor_4x4_neon()
879 void vpx_d135_predictor_8x8_neon(uint8_t *dst, ptrdiff_t stride, in vpx_d135_predictor_8x8_neon() argument
894 vst1_u8(dst + 0 * stride, vget_low_u8(vextq_u8(row, row, 7))); in vpx_d135_predictor_8x8_neon()
895 vst1_u8(dst + 1 * stride, vget_low_u8(vextq_u8(row, row, 6))); in vpx_d135_predictor_8x8_neon()
896 vst1_u8(dst + 2 * stride, vget_low_u8(vextq_u8(row, row, 5))); in vpx_d135_predictor_8x8_neon()
897 vst1_u8(dst + 3 * stride, vget_low_u8(vextq_u8(row, row, 4))); in vpx_d135_predictor_8x8_neon()
898 vst1_u8(dst + 4 * stride, vget_low_u8(vextq_u8(row, row, 3))); in vpx_d135_predictor_8x8_neon()
899 vst1_u8(dst + 5 * stride, vget_low_u8(vextq_u8(row, row, 2))); in vpx_d135_predictor_8x8_neon()
900 vst1_u8(dst + 6 * stride, vget_low_u8(vextq_u8(row, row, 1))); in vpx_d135_predictor_8x8_neon()
901 vst1_u8(dst + 7 * stride, vget_low_u8(row)); in vpx_d135_predictor_8x8_neon()
905 uint8_t **dst, const ptrdiff_t stride, const uint8x16_t row_0, in d135_store_16x8() argument
910 *dst += stride; in d135_store_16x8()
912 *dst += stride; in d135_store_16x8()
914 *dst += stride; in d135_store_16x8()
916 *dst += stride; in d135_store_16x8()
918 *dst += stride; in d135_store_16x8()
920 *dst += stride; in d135_store_16x8()
922 *dst += stride; in d135_store_16x8()
924 *dst += stride; in d135_store_16x8()
927 void vpx_d135_predictor_16x16_neon(uint8_t *dst, ptrdiff_t stride, in vpx_d135_predictor_16x16_neon() argument
961 d135_store_16x8(&dst, stride, r_0, r_1, r_2, r_3, r_4, r_5, r_6, r_7); in vpx_d135_predictor_16x16_neon()
962 d135_store_16x8(&dst, stride, r_8, r_9, r_a, r_b, r_c, r_d, r_e, row_0); in vpx_d135_predictor_16x16_neon()
965 static INLINE void d135_store_32x2(uint8_t **dst, const ptrdiff_t stride, in d135_store_32x2() argument
973 dst2 += 16 * stride - 16; in d135_store_32x2()
977 *dst += stride; in d135_store_32x2()
980 void vpx_d135_predictor_32x32_neon(uint8_t *dst, ptrdiff_t stride, in vpx_d135_predictor_32x32_neon() argument
1019 d135_store_32x2(&dst, stride, r_0, r_1, r_2); in vpx_d135_predictor_32x32_neon()
1026 d135_store_32x2(&dst, stride, r_0, r_1, r_2); in vpx_d135_predictor_32x32_neon()
1033 d135_store_32x2(&dst, stride, r_0, r_1, r_2); in vpx_d135_predictor_32x32_neon()
1040 d135_store_32x2(&dst, stride, r_0, r_1, r_2); in vpx_d135_predictor_32x32_neon()
1047 d135_store_32x2(&dst, stride, r_0, r_1, r_2); in vpx_d135_predictor_32x32_neon()
1054 d135_store_32x2(&dst, stride, r_0, r_1, r_2); in vpx_d135_predictor_32x32_neon()
1061 d135_store_32x2(&dst, stride, r_0, r_1, r_2); in vpx_d135_predictor_32x32_neon()
1068 d135_store_32x2(&dst, stride, r_0, r_1, r_2); in vpx_d135_predictor_32x32_neon()
1075 d135_store_32x2(&dst, stride, r_0, r_1, r_2); in vpx_d135_predictor_32x32_neon()
1082 d135_store_32x2(&dst, stride, r_0, r_1, r_2); in vpx_d135_predictor_32x32_neon()
1089 d135_store_32x2(&dst, stride, r_0, r_1, r_2); in vpx_d135_predictor_32x32_neon()
1096 d135_store_32x2(&dst, stride, r_0, r_1, r_2); in vpx_d135_predictor_32x32_neon()
1103 d135_store_32x2(&dst, stride, r_0, r_1, r_2); in vpx_d135_predictor_32x32_neon()
1110 d135_store_32x2(&dst, stride, r_0, r_1, r_2); in vpx_d135_predictor_32x32_neon()
1117 d135_store_32x2(&dst, stride, r_0, r_1, r_2); in vpx_d135_predictor_32x32_neon()
1120 d135_store_32x2(&dst, stride, row_0, row_1, row_2); in vpx_d135_predictor_32x32_neon()
1125 void vpx_d153_predictor_4x4_neon(uint8_t *dst, ptrdiff_t stride, in vpx_d153_predictor_4x4_neon() argument
1146 store_u8_4x1(dst + 0 * stride, vext_u8(d02, d1, 7)); in vpx_d153_predictor_4x4_neon()
1147 store_u8_4x1(dst + 1 * stride, vext_u8(d02, d1, 5)); in vpx_d153_predictor_4x4_neon()
1148 store_u8_4x1(dst + 2 * stride, vext_u8(d02, d1, 3)); in vpx_d153_predictor_4x4_neon()
1149 store_u8_4x1(dst + 3 * stride, vext_u8(d02, d1, 1)); in vpx_d153_predictor_4x4_neon()
1152 void vpx_d153_predictor_8x8_neon(uint8_t *dst, ptrdiff_t stride, in vpx_d153_predictor_8x8_neon() argument
1196 // stride=0 [ d0[0], d1[0], d1[1], d1[2], d1[3], d1[4], d1[5], d1[6] ] in vpx_d153_predictor_8x8_neon()
1197 // stride=1 [ d0[1], d2[0], d0[0], d1[0], d1[1], d1[2], d1[3], d1[4] ] in vpx_d153_predictor_8x8_neon()
1198 // stride=2 [ d0[2], d2[1], d0[1], d2[0], d0[0], d1[0], d1[1], d1[2] ] in vpx_d153_predictor_8x8_neon()
1199 // stride=3 [ d0[3], d2[2], d0[2], d2[1], d0[1], d2[0], d0[0], d1[0] ] in vpx_d153_predictor_8x8_neon()
1200 // stride=4 [ d0[4], d2[3], d0[3], d2[2], d0[2], d2[1], d0[1], d2[0] ] in vpx_d153_predictor_8x8_neon()
1201 // stride=5 [ d0[5], d2[4], d0[4], d2[3], d0[3], d2[2], d0[2], d2[1] ] in vpx_d153_predictor_8x8_neon()
1202 // stride=6 [ d0[6], d2[5], d0[5], d2[4], d0[4], d2[3], d0[3], d2[2] ] in vpx_d153_predictor_8x8_neon()
1203 // stride=7 [ d0[7], d2[6], d0[6], d2[5], d0[5], d2[4], d0[4], d2[3] ] in vpx_d153_predictor_8x8_neon()
1204 vst1_u8(dst + 0 * stride, vext_u8(d02_hi, d1, 7)); in vpx_d153_predictor_8x8_neon()
1205 vst1_u8(dst + 1 * stride, vext_u8(d02_hi, d1, 5)); in vpx_d153_predictor_8x8_neon()
1206 vst1_u8(dst + 2 * stride, vext_u8(d02_hi, d1, 3)); in vpx_d153_predictor_8x8_neon()
1207 vst1_u8(dst + 3 * stride, vext_u8(d02_hi, d1, 1)); in vpx_d153_predictor_8x8_neon()
1208 vst1_u8(dst + 4 * stride, vext_u8(d02_lo, d02_hi, 7)); in vpx_d153_predictor_8x8_neon()
1209 vst1_u8(dst + 5 * stride, vext_u8(d02_lo, d02_hi, 5)); in vpx_d153_predictor_8x8_neon()
1210 vst1_u8(dst + 6 * stride, vext_u8(d02_lo, d02_hi, 3)); in vpx_d153_predictor_8x8_neon()
1211 vst1_u8(dst + 7 * stride, vext_u8(d02_lo, d02_hi, 1)); in vpx_d153_predictor_8x8_neon()
1214 void vpx_d153_predictor_16x16_neon(uint8_t *dst, ptrdiff_t stride, in vpx_d153_predictor_16x16_neon() argument
1244 vst1q_u8(dst + 0 * stride, vextq_u8(d02_hi, d1, 15)); in vpx_d153_predictor_16x16_neon()
1245 vst1q_u8(dst + 1 * stride, vextq_u8(d02_hi, d1, 13)); in vpx_d153_predictor_16x16_neon()
1246 vst1q_u8(dst + 2 * stride, vextq_u8(d02_hi, d1, 11)); in vpx_d153_predictor_16x16_neon()
1247 vst1q_u8(dst + 3 * stride, vextq_u8(d02_hi, d1, 9)); in vpx_d153_predictor_16x16_neon()
1248 vst1q_u8(dst + 4 * stride, vextq_u8(d02_hi, d1, 7)); in vpx_d153_predictor_16x16_neon()
1249 vst1q_u8(dst + 5 * stride, vextq_u8(d02_hi, d1, 5)); in vpx_d153_predictor_16x16_neon()
1250 vst1q_u8(dst + 6 * stride, vextq_u8(d02_hi, d1, 3)); in vpx_d153_predictor_16x16_neon()
1251 vst1q_u8(dst + 7 * stride, vextq_u8(d02_hi, d1, 1)); in vpx_d153_predictor_16x16_neon()
1252 vst1q_u8(dst + 8 * stride, vextq_u8(d02_lo, d02_hi, 15)); in vpx_d153_predictor_16x16_neon()
1253 vst1q_u8(dst + 9 * stride, vextq_u8(d02_lo, d02_hi, 13)); in vpx_d153_predictor_16x16_neon()
1254 vst1q_u8(dst + 10 * stride, vextq_u8(d02_lo, d02_hi, 11)); in vpx_d153_predictor_16x16_neon()
1255 vst1q_u8(dst + 11 * stride, vextq_u8(d02_lo, d02_hi, 9)); in vpx_d153_predictor_16x16_neon()
1256 vst1q_u8(dst + 12 * stride, vextq_u8(d02_lo, d02_hi, 7)); in vpx_d153_predictor_16x16_neon()
1257 vst1q_u8(dst + 13 * stride, vextq_u8(d02_lo, d02_hi, 5)); in vpx_d153_predictor_16x16_neon()
1258 vst1q_u8(dst + 14 * stride, vextq_u8(d02_lo, d02_hi, 3)); in vpx_d153_predictor_16x16_neon()
1259 vst1q_u8(dst + 15 * stride, vextq_u8(d02_lo, d02_hi, 1)); in vpx_d153_predictor_16x16_neon()
1262 void vpx_d153_predictor_32x32_neon(uint8_t *dst, ptrdiff_t stride, in vpx_d153_predictor_32x32_neon() argument
1309 vst1q_u8(dst + 0 * stride + 0, vextq_u8(d02_lo.val[1], d1_lo, 15)); in vpx_d153_predictor_32x32_neon()
1310 vst1q_u8(dst + 0 * stride + 16, vextq_u8(d1_lo, d1_hi, 15)); in vpx_d153_predictor_32x32_neon()
1311 vst1q_u8(dst + 1 * stride + 0, vextq_u8(d02_lo.val[1], d1_lo, 13)); in vpx_d153_predictor_32x32_neon()
1312 vst1q_u8(dst + 1 * stride + 16, vextq_u8(d1_lo, d1_hi, 13)); in vpx_d153_predictor_32x32_neon()
1313 vst1q_u8(dst + 2 * stride + 0, vextq_u8(d02_lo.val[1], d1_lo, 11)); in vpx_d153_predictor_32x32_neon()
1314 vst1q_u8(dst + 2 * stride + 16, vextq_u8(d1_lo, d1_hi, 11)); in vpx_d153_predictor_32x32_neon()
1315 vst1q_u8(dst + 3 * stride + 0, vextq_u8(d02_lo.val[1], d1_lo, 9)); in vpx_d153_predictor_32x32_neon()
1316 vst1q_u8(dst + 3 * stride + 16, vextq_u8(d1_lo, d1_hi, 9)); in vpx_d153_predictor_32x32_neon()
1317 vst1q_u8(dst + 4 * stride + 0, vextq_u8(d02_lo.val[1], d1_lo, 7)); in vpx_d153_predictor_32x32_neon()
1318 vst1q_u8(dst + 4 * stride + 16, vextq_u8(d1_lo, d1_hi, 7)); in vpx_d153_predictor_32x32_neon()
1319 vst1q_u8(dst + 5 * stride + 0, vextq_u8(d02_lo.val[1], d1_lo, 5)); in vpx_d153_predictor_32x32_neon()
1320 vst1q_u8(dst + 5 * stride + 16, vextq_u8(d1_lo, d1_hi, 5)); in vpx_d153_predictor_32x32_neon()
1321 vst1q_u8(dst + 6 * stride + 0, vextq_u8(d02_lo.val[1], d1_lo, 3)); in vpx_d153_predictor_32x32_neon()
1322 vst1q_u8(dst + 6 * stride + 16, vextq_u8(d1_lo, d1_hi, 3)); in vpx_d153_predictor_32x32_neon()
1323 vst1q_u8(dst + 7 * stride + 0, vextq_u8(d02_lo.val[1], d1_lo, 1)); in vpx_d153_predictor_32x32_neon()
1324 vst1q_u8(dst + 7 * stride + 16, vextq_u8(d1_lo, d1_hi, 1)); in vpx_d153_predictor_32x32_neon()
1325 vst1q_u8(dst + 8 * stride + 0, vextq_u8(d02_lo.val[0], d02_lo.val[1], 15)); in vpx_d153_predictor_32x32_neon()
1326 vst1q_u8(dst + 8 * stride + 16, vextq_u8(d02_lo.val[1], d1_lo, 15)); in vpx_d153_predictor_32x32_neon()
1327 vst1q_u8(dst + 9 * stride + 0, vextq_u8(d02_lo.val[0], d02_lo.val[1], 13)); in vpx_d153_predictor_32x32_neon()
1328 vst1q_u8(dst + 9 * stride + 16, vextq_u8(d02_lo.val[1], d1_lo, 13)); in vpx_d153_predictor_32x32_neon()
1329 vst1q_u8(dst + 10 * stride + 0, vextq_u8(d02_lo.val[0], d02_lo.val[1], 11)); in vpx_d153_predictor_32x32_neon()
1330 vst1q_u8(dst + 10 * stride + 16, vextq_u8(d02_lo.val[1], d1_lo, 11)); in vpx_d153_predictor_32x32_neon()
1331 vst1q_u8(dst + 11 * stride + 0, vextq_u8(d02_lo.val[0], d02_lo.val[1], 9)); in vpx_d153_predictor_32x32_neon()
1332 vst1q_u8(dst + 11 * stride + 16, vextq_u8(d02_lo.val[1], d1_lo, 9)); in vpx_d153_predictor_32x32_neon()
1333 vst1q_u8(dst + 12 * stride + 0, vextq_u8(d02_lo.val[0], d02_lo.val[1], 7)); in vpx_d153_predictor_32x32_neon()
1334 vst1q_u8(dst + 12 * stride + 16, vextq_u8(d02_lo.val[1], d1_lo, 7)); in vpx_d153_predictor_32x32_neon()
1335 vst1q_u8(dst + 13 * stride + 0, vextq_u8(d02_lo.val[0], d02_lo.val[1], 5)); in vpx_d153_predictor_32x32_neon()
1336 vst1q_u8(dst + 13 * stride + 16, vextq_u8(d02_lo.val[1], d1_lo, 5)); in vpx_d153_predictor_32x32_neon()
1337 vst1q_u8(dst + 14 * stride + 0, vextq_u8(d02_lo.val[0], d02_lo.val[1], 3)); in vpx_d153_predictor_32x32_neon()
1338 vst1q_u8(dst + 14 * stride + 16, vextq_u8(d02_lo.val[1], d1_lo, 3)); in vpx_d153_predictor_32x32_neon()
1339 vst1q_u8(dst + 15 * stride + 0, vextq_u8(d02_lo.val[0], d02_lo.val[1], 1)); in vpx_d153_predictor_32x32_neon()
1340 vst1q_u8(dst + 15 * stride + 16, vextq_u8(d02_lo.val[1], d1_lo, 1)); in vpx_d153_predictor_32x32_neon()
1341 vst1q_u8(dst + 16 * stride + 0, vextq_u8(d02_hi.val[1], d02_lo.val[0], 15)); in vpx_d153_predictor_32x32_neon()
1342 vst1q_u8(dst + 16 * stride + 16, vextq_u8(d02_lo.val[0], d02_lo.val[1], 15)); in vpx_d153_predictor_32x32_neon()
1343 vst1q_u8(dst + 17 * stride + 0, vextq_u8(d02_hi.val[1], d02_lo.val[0], 13)); in vpx_d153_predictor_32x32_neon()
1344 vst1q_u8(dst + 17 * stride + 16, vextq_u8(d02_lo.val[0], d02_lo.val[1], 13)); in vpx_d153_predictor_32x32_neon()
1345 vst1q_u8(dst + 18 * stride + 0, vextq_u8(d02_hi.val[1], d02_lo.val[0], 11)); in vpx_d153_predictor_32x32_neon()
1346 vst1q_u8(dst + 18 * stride + 16, vextq_u8(d02_lo.val[0], d02_lo.val[1], 11)); in vpx_d153_predictor_32x32_neon()
1347 vst1q_u8(dst + 19 * stride + 0, vextq_u8(d02_hi.val[1], d02_lo.val[0], 9)); in vpx_d153_predictor_32x32_neon()
1348 vst1q_u8(dst + 19 * stride + 16, vextq_u8(d02_lo.val[0], d02_lo.val[1], 9)); in vpx_d153_predictor_32x32_neon()
1349 vst1q_u8(dst + 20 * stride + 0, vextq_u8(d02_hi.val[1], d02_lo.val[0], 7)); in vpx_d153_predictor_32x32_neon()
1350 vst1q_u8(dst + 20 * stride + 16, vextq_u8(d02_lo.val[0], d02_lo.val[1], 7)); in vpx_d153_predictor_32x32_neon()
1351 vst1q_u8(dst + 21 * stride + 0, vextq_u8(d02_hi.val[1], d02_lo.val[0], 5)); in vpx_d153_predictor_32x32_neon()
1352 vst1q_u8(dst + 21 * stride + 16, vextq_u8(d02_lo.val[0], d02_lo.val[1], 5)); in vpx_d153_predictor_32x32_neon()
1353 vst1q_u8(dst + 22 * stride + 0, vextq_u8(d02_hi.val[1], d02_lo.val[0], 3)); in vpx_d153_predictor_32x32_neon()
1354 vst1q_u8(dst + 22 * stride + 16, vextq_u8(d02_lo.val[0], d02_lo.val[1], 3)); in vpx_d153_predictor_32x32_neon()
1355 vst1q_u8(dst + 23 * stride + 0, vextq_u8(d02_hi.val[1], d02_lo.val[0], 1)); in vpx_d153_predictor_32x32_neon()
1356 vst1q_u8(dst + 23 * stride + 16, vextq_u8(d02_lo.val[0], d02_lo.val[1], 1)); in vpx_d153_predictor_32x32_neon()
1357 vst1q_u8(dst + 24 * stride + 0, vextq_u8(d02_hi.val[0], d02_hi.val[1], 15)); in vpx_d153_predictor_32x32_neon()
1358 vst1q_u8(dst + 24 * stride + 16, vextq_u8(d02_hi.val[1], d02_lo.val[0], 15)); in vpx_d153_predictor_32x32_neon()
1359 vst1q_u8(dst + 25 * stride + 0, vextq_u8(d02_hi.val[0], d02_hi.val[1], 13)); in vpx_d153_predictor_32x32_neon()
1360 vst1q_u8(dst + 25 * stride + 16, vextq_u8(d02_hi.val[1], d02_lo.val[0], 13)); in vpx_d153_predictor_32x32_neon()
1361 vst1q_u8(dst + 26 * stride + 0, vextq_u8(d02_hi.val[0], d02_hi.val[1], 11)); in vpx_d153_predictor_32x32_neon()
1362 vst1q_u8(dst + 26 * stride + 16, vextq_u8(d02_hi.val[1], d02_lo.val[0], 11)); in vpx_d153_predictor_32x32_neon()
1363 vst1q_u8(dst + 27 * stride + 0, vextq_u8(d02_hi.val[0], d02_hi.val[1], 9)); in vpx_d153_predictor_32x32_neon()
1364 vst1q_u8(dst + 27 * stride + 16, vextq_u8(d02_hi.val[1], d02_lo.val[0], 9)); in vpx_d153_predictor_32x32_neon()
1365 vst1q_u8(dst + 28 * stride + 0, vextq_u8(d02_hi.val[0], d02_hi.val[1], 7)); in vpx_d153_predictor_32x32_neon()
1366 vst1q_u8(dst + 28 * stride + 16, vextq_u8(d02_hi.val[1], d02_lo.val[0], 7)); in vpx_d153_predictor_32x32_neon()
1367 vst1q_u8(dst + 29 * stride + 0, vextq_u8(d02_hi.val[0], d02_hi.val[1], 5)); in vpx_d153_predictor_32x32_neon()
1368 vst1q_u8(dst + 29 * stride + 16, vextq_u8(d02_hi.val[1], d02_lo.val[0], 5)); in vpx_d153_predictor_32x32_neon()
1369 vst1q_u8(dst + 30 * stride + 0, vextq_u8(d02_hi.val[0], d02_hi.val[1], 3)); in vpx_d153_predictor_32x32_neon()
1370 vst1q_u8(dst + 30 * stride + 16, vextq_u8(d02_hi.val[1], d02_lo.val[0], 3)); in vpx_d153_predictor_32x32_neon()
1371 vst1q_u8(dst + 31 * stride + 0, vextq_u8(d02_hi.val[0], d02_hi.val[1], 1)); in vpx_d153_predictor_32x32_neon()
1372 vst1q_u8(dst + 31 * stride + 16, vextq_u8(d02_hi.val[1], d02_lo.val[0], 1)); in vpx_d153_predictor_32x32_neon()
1377 void vpx_d207_predictor_4x4_neon(uint8_t *dst, ptrdiff_t stride, in vpx_d207_predictor_4x4_neon() argument
1402 // Store the high half of the vector for stride={2,3} to avoid needing in vpx_d207_predictor_4x4_neon()
1404 // stride=0 [ c0[0], c1[0], c0[1], c1[1] ] in vpx_d207_predictor_4x4_neon()
1405 // stride=1 [ c0[1], c1[1], c0[2], c1[2] ] in vpx_d207_predictor_4x4_neon()
1406 // stride=2 [ c0[2], c1[2], c0[3], c1[3] ] in vpx_d207_predictor_4x4_neon()
1407 // stride=3 [ c0[3], c1[3], left[3], left[3] ] in vpx_d207_predictor_4x4_neon()
1408 store_u8_4x1(dst + 0 * stride, d0); in vpx_d207_predictor_4x4_neon()
1409 store_u8_4x1(dst + 1 * stride, d1); in vpx_d207_predictor_4x4_neon()
1410 store_u8_4x1_high(dst + 2 * stride, d0); in vpx_d207_predictor_4x4_neon()
1411 store_u8_4x1_high(dst + 3 * stride, d1); in vpx_d207_predictor_4x4_neon()
1414 void vpx_d207_predictor_8x8_neon(uint8_t *dst, ptrdiff_t stride, in vpx_d207_predictor_8x8_neon() argument
1433 vst1_u8(dst + 0 * stride, c01_lo); in vpx_d207_predictor_8x8_neon()
1434 vst1_u8(dst + 1 * stride, vext_u8(c01_lo, c01_hi, 2)); in vpx_d207_predictor_8x8_neon()
1435 vst1_u8(dst + 2 * stride, vext_u8(c01_lo, c01_hi, 4)); in vpx_d207_predictor_8x8_neon()
1436 vst1_u8(dst + 3 * stride, vext_u8(c01_lo, c01_hi, 6)); in vpx_d207_predictor_8x8_neon()
1437 vst1_u8(dst + 4 * stride, c01_hi); in vpx_d207_predictor_8x8_neon()
1438 vst1_u8(dst + 5 * stride, vext_u8(c01_hi, l7, 2)); in vpx_d207_predictor_8x8_neon()
1439 vst1_u8(dst + 6 * stride, vext_u8(c01_hi, l7, 4)); in vpx_d207_predictor_8x8_neon()
1440 vst1_u8(dst + 7 * stride, vext_u8(c01_hi, l7, 6)); in vpx_d207_predictor_8x8_neon()
1443 void vpx_d207_predictor_16x16_neon(uint8_t *dst, ptrdiff_t stride, in vpx_d207_predictor_16x16_neon() argument
1460 vst1q_u8(dst + 0 * stride, c01_lo); in vpx_d207_predictor_16x16_neon()
1461 vst1q_u8(dst + 1 * stride, vextq_u8(c01_lo, c01_hi, 2)); in vpx_d207_predictor_16x16_neon()
1462 vst1q_u8(dst + 2 * stride, vextq_u8(c01_lo, c01_hi, 4)); in vpx_d207_predictor_16x16_neon()
1463 vst1q_u8(dst + 3 * stride, vextq_u8(c01_lo, c01_hi, 6)); in vpx_d207_predictor_16x16_neon()
1464 vst1q_u8(dst + 4 * stride, vextq_u8(c01_lo, c01_hi, 8)); in vpx_d207_predictor_16x16_neon()
1465 vst1q_u8(dst + 5 * stride, vextq_u8(c01_lo, c01_hi, 10)); in vpx_d207_predictor_16x16_neon()
1466 vst1q_u8(dst + 6 * stride, vextq_u8(c01_lo, c01_hi, 12)); in vpx_d207_predictor_16x16_neon()
1467 vst1q_u8(dst + 7 * stride, vextq_u8(c01_lo, c01_hi, 14)); in vpx_d207_predictor_16x16_neon()
1468 vst1q_u8(dst + 8 * stride, c01_hi); in vpx_d207_predictor_16x16_neon()
1469 vst1q_u8(dst + 9 * stride, vextq_u8(c01_hi, l15, 2)); in vpx_d207_predictor_16x16_neon()
1470 vst1q_u8(dst + 10 * stride, vextq_u8(c01_hi, l15, 4)); in vpx_d207_predictor_16x16_neon()
1471 vst1q_u8(dst + 11 * stride, vextq_u8(c01_hi, l15, 6)); in vpx_d207_predictor_16x16_neon()
1472 vst1q_u8(dst + 12 * stride, vextq_u8(c01_hi, l15, 8)); in vpx_d207_predictor_16x16_neon()
1473 vst1q_u8(dst + 13 * stride, vextq_u8(c01_hi, l15, 10)); in vpx_d207_predictor_16x16_neon()
1474 vst1q_u8(dst + 14 * stride, vextq_u8(c01_hi, l15, 12)); in vpx_d207_predictor_16x16_neon()
1475 vst1q_u8(dst + 15 * stride, vextq_u8(c01_hi, l15, 14)); in vpx_d207_predictor_16x16_neon()
1478 void vpx_d207_predictor_32x32_neon(uint8_t *dst, ptrdiff_t stride, in vpx_d207_predictor_32x32_neon() argument
1503 vst1q_u8(dst + 0 * stride + 0, c01[0]); in vpx_d207_predictor_32x32_neon()
1504 vst1q_u8(dst + 0 * stride + 16, c01[1]); in vpx_d207_predictor_32x32_neon()
1505 vst1q_u8(dst + 1 * stride + 0, vextq_u8(c01[0], c01[1], 2)); in vpx_d207_predictor_32x32_neon()
1506 vst1q_u8(dst + 1 * stride + 16, vextq_u8(c01[1], c01[2], 2)); in vpx_d207_predictor_32x32_neon()
1507 vst1q_u8(dst + 2 * stride + 0, vextq_u8(c01[0], c01[1], 4)); in vpx_d207_predictor_32x32_neon()
1508 vst1q_u8(dst + 2 * stride + 16, vextq_u8(c01[1], c01[2], 4)); in vpx_d207_predictor_32x32_neon()
1509 vst1q_u8(dst + 3 * stride + 0, vextq_u8(c01[0], c01[1], 6)); in vpx_d207_predictor_32x32_neon()
1510 vst1q_u8(dst + 3 * stride + 16, vextq_u8(c01[1], c01[2], 6)); in vpx_d207_predictor_32x32_neon()
1511 vst1q_u8(dst + 4 * stride + 0, vextq_u8(c01[0], c01[1], 8)); in vpx_d207_predictor_32x32_neon()
1512 vst1q_u8(dst + 4 * stride + 16, vextq_u8(c01[1], c01[2], 8)); in vpx_d207_predictor_32x32_neon()
1513 vst1q_u8(dst + 5 * stride + 0, vextq_u8(c01[0], c01[1], 10)); in vpx_d207_predictor_32x32_neon()
1514 vst1q_u8(dst + 5 * stride + 16, vextq_u8(c01[1], c01[2], 10)); in vpx_d207_predictor_32x32_neon()
1515 vst1q_u8(dst + 6 * stride + 0, vextq_u8(c01[0], c01[1], 12)); in vpx_d207_predictor_32x32_neon()
1516 vst1q_u8(dst + 6 * stride + 16, vextq_u8(c01[1], c01[2], 12)); in vpx_d207_predictor_32x32_neon()
1517 vst1q_u8(dst + 7 * stride + 0, vextq_u8(c01[0], c01[1], 14)); in vpx_d207_predictor_32x32_neon()
1518 vst1q_u8(dst + 7 * stride + 16, vextq_u8(c01[1], c01[2], 14)); in vpx_d207_predictor_32x32_neon()
1519 vst1q_u8(dst + 8 * stride + 0, c01[1]); in vpx_d207_predictor_32x32_neon()
1520 vst1q_u8(dst + 8 * stride + 16, c01[2]); in vpx_d207_predictor_32x32_neon()
1521 vst1q_u8(dst + 9 * stride + 0, vextq_u8(c01[1], c01[2], 2)); in vpx_d207_predictor_32x32_neon()
1522 vst1q_u8(dst + 9 * stride + 16, vextq_u8(c01[2], c01[3], 2)); in vpx_d207_predictor_32x32_neon()
1523 vst1q_u8(dst + 10 * stride + 0, vextq_u8(c01[1], c01[2], 4)); in vpx_d207_predictor_32x32_neon()
1524 vst1q_u8(dst + 10 * stride + 16, vextq_u8(c01[2], c01[3], 4)); in vpx_d207_predictor_32x32_neon()
1525 vst1q_u8(dst + 11 * stride + 0, vextq_u8(c01[1], c01[2], 6)); in vpx_d207_predictor_32x32_neon()
1526 vst1q_u8(dst + 11 * stride + 16, vextq_u8(c01[2], c01[3], 6)); in vpx_d207_predictor_32x32_neon()
1527 vst1q_u8(dst + 12 * stride + 0, vextq_u8(c01[1], c01[2], 8)); in vpx_d207_predictor_32x32_neon()
1528 vst1q_u8(dst + 12 * stride + 16, vextq_u8(c01[2], c01[3], 8)); in vpx_d207_predictor_32x32_neon()
1529 vst1q_u8(dst + 13 * stride + 0, vextq_u8(c01[1], c01[2], 10)); in vpx_d207_predictor_32x32_neon()
1530 vst1q_u8(dst + 13 * stride + 16, vextq_u8(c01[2], c01[3], 10)); in vpx_d207_predictor_32x32_neon()
1531 vst1q_u8(dst + 14 * stride + 0, vextq_u8(c01[1], c01[2], 12)); in vpx_d207_predictor_32x32_neon()
1532 vst1q_u8(dst + 14 * stride + 16, vextq_u8(c01[2], c01[3], 12)); in vpx_d207_predictor_32x32_neon()
1533 vst1q_u8(dst + 15 * stride + 0, vextq_u8(c01[1], c01[2], 14)); in vpx_d207_predictor_32x32_neon()
1534 vst1q_u8(dst + 15 * stride + 16, vextq_u8(c01[2], c01[3], 14)); in vpx_d207_predictor_32x32_neon()
1535 vst1q_u8(dst + 16 * stride + 0, c01[2]); in vpx_d207_predictor_32x32_neon()
1536 vst1q_u8(dst + 16 * stride + 16, c01[3]); in vpx_d207_predictor_32x32_neon()
1537 vst1q_u8(dst + 17 * stride + 0, vextq_u8(c01[2], c01[3], 2)); in vpx_d207_predictor_32x32_neon()
1538 vst1q_u8(dst + 17 * stride + 16, vextq_u8(c01[3], l31, 2)); in vpx_d207_predictor_32x32_neon()
1539 vst1q_u8(dst + 18 * stride + 0, vextq_u8(c01[2], c01[3], 4)); in vpx_d207_predictor_32x32_neon()
1540 vst1q_u8(dst + 18 * stride + 16, vextq_u8(c01[3], l31, 4)); in vpx_d207_predictor_32x32_neon()
1541 vst1q_u8(dst + 19 * stride + 0, vextq_u8(c01[2], c01[3], 6)); in vpx_d207_predictor_32x32_neon()
1542 vst1q_u8(dst + 19 * stride + 16, vextq_u8(c01[3], l31, 6)); in vpx_d207_predictor_32x32_neon()
1543 vst1q_u8(dst + 20 * stride + 0, vextq_u8(c01[2], c01[3], 8)); in vpx_d207_predictor_32x32_neon()
1544 vst1q_u8(dst + 20 * stride + 16, vextq_u8(c01[3], l31, 8)); in vpx_d207_predictor_32x32_neon()
1545 vst1q_u8(dst + 21 * stride + 0, vextq_u8(c01[2], c01[3], 10)); in vpx_d207_predictor_32x32_neon()
1546 vst1q_u8(dst + 21 * stride + 16, vextq_u8(c01[3], l31, 10)); in vpx_d207_predictor_32x32_neon()
1547 vst1q_u8(dst + 22 * stride + 0, vextq_u8(c01[2], c01[3], 12)); in vpx_d207_predictor_32x32_neon()
1548 vst1q_u8(dst + 22 * stride + 16, vextq_u8(c01[3], l31, 12)); in vpx_d207_predictor_32x32_neon()
1549 vst1q_u8(dst + 23 * stride + 0, vextq_u8(c01[2], c01[3], 14)); in vpx_d207_predictor_32x32_neon()
1550 vst1q_u8(dst + 23 * stride + 16, vextq_u8(c01[3], l31, 14)); in vpx_d207_predictor_32x32_neon()
1551 vst1q_u8(dst + 24 * stride + 0, c01[3]); in vpx_d207_predictor_32x32_neon()
1552 vst1q_u8(dst + 24 * stride + 16, l31); in vpx_d207_predictor_32x32_neon()
1553 vst1q_u8(dst + 25 * stride + 0, vextq_u8(c01[3], l31, 2)); in vpx_d207_predictor_32x32_neon()
1554 vst1q_u8(dst + 25 * stride + 16, l31); in vpx_d207_predictor_32x32_neon()
1555 vst1q_u8(dst + 26 * stride + 0, vextq_u8(c01[3], l31, 4)); in vpx_d207_predictor_32x32_neon()
1556 vst1q_u8(dst + 26 * stride + 16, l31); in vpx_d207_predictor_32x32_neon()
1557 vst1q_u8(dst + 27 * stride + 0, vextq_u8(c01[3], l31, 6)); in vpx_d207_predictor_32x32_neon()
1558 vst1q_u8(dst + 27 * stride + 16, l31); in vpx_d207_predictor_32x32_neon()
1559 vst1q_u8(dst + 28 * stride + 0, vextq_u8(c01[3], l31, 8)); in vpx_d207_predictor_32x32_neon()
1560 vst1q_u8(dst + 28 * stride + 16, l31); in vpx_d207_predictor_32x32_neon()
1561 vst1q_u8(dst + 29 * stride + 0, vextq_u8(c01[3], l31, 10)); in vpx_d207_predictor_32x32_neon()
1562 vst1q_u8(dst + 29 * stride + 16, l31); in vpx_d207_predictor_32x32_neon()
1563 vst1q_u8(dst + 30 * stride + 0, vextq_u8(c01[3], l31, 12)); in vpx_d207_predictor_32x32_neon()
1564 vst1q_u8(dst + 30 * stride + 16, l31); in vpx_d207_predictor_32x32_neon()
1565 vst1q_u8(dst + 31 * stride + 0, vextq_u8(c01[3], l31, 14)); in vpx_d207_predictor_32x32_neon()
1566 vst1q_u8(dst + 31 * stride + 16, l31); in vpx_d207_predictor_32x32_neon()
1573 void vpx_v_predictor_4x4_neon(uint8_t *dst, ptrdiff_t stride, in vpx_v_predictor_4x4_neon() argument
1579 for (i = 0; i < 4; i++, dst += stride) { in vpx_v_predictor_4x4_neon()
1584 void vpx_v_predictor_8x8_neon(uint8_t *dst, ptrdiff_t stride, in vpx_v_predictor_8x8_neon() argument
1590 for (i = 0; i < 8; i++, dst += stride) { in vpx_v_predictor_8x8_neon()
1595 void vpx_v_predictor_16x16_neon(uint8_t *dst, ptrdiff_t stride, in vpx_v_predictor_16x16_neon() argument
1601 for (i = 0; i < 16; i++, dst += stride) { in vpx_v_predictor_16x16_neon()
1606 void vpx_v_predictor_32x32_neon(uint8_t *dst, ptrdiff_t stride, in vpx_v_predictor_32x32_neon() argument
1620 dst += stride - 16; in vpx_v_predictor_32x32_neon()
1626 void vpx_h_predictor_4x4_neon(uint8_t *dst, ptrdiff_t stride, in vpx_h_predictor_4x4_neon() argument
1636 dst += stride; in vpx_h_predictor_4x4_neon()
1639 dst += stride; in vpx_h_predictor_4x4_neon()
1642 dst += stride; in vpx_h_predictor_4x4_neon()
1647 void vpx_h_predictor_8x8_neon(uint8_t *dst, ptrdiff_t stride, in vpx_h_predictor_8x8_neon() argument
1655 dst += stride; in vpx_h_predictor_8x8_neon()
1658 dst += stride; in vpx_h_predictor_8x8_neon()
1661 dst += stride; in vpx_h_predictor_8x8_neon()
1664 dst += stride; in vpx_h_predictor_8x8_neon()
1667 dst += stride; in vpx_h_predictor_8x8_neon()
1670 dst += stride; in vpx_h_predictor_8x8_neon()
1673 dst += stride; in vpx_h_predictor_8x8_neon()
1678 static INLINE void h_store_16x8(uint8_t **dst, const ptrdiff_t stride, in h_store_16x8() argument
1690 *dst += stride; in h_store_16x8()
1692 *dst += stride; in h_store_16x8()
1694 *dst += stride; in h_store_16x8()
1696 *dst += stride; in h_store_16x8()
1698 *dst += stride; in h_store_16x8()
1700 *dst += stride; in h_store_16x8()
1702 *dst += stride; in h_store_16x8()
1704 *dst += stride; in h_store_16x8()
1707 void vpx_h_predictor_16x16_neon(uint8_t *dst, ptrdiff_t stride, in vpx_h_predictor_16x16_neon() argument
1712 h_store_16x8(&dst, stride, vget_low_u8(left_u8q)); in vpx_h_predictor_16x16_neon()
1713 h_store_16x8(&dst, stride, vget_high_u8(left_u8q)); in vpx_h_predictor_16x16_neon()
1716 static INLINE void h_store_32x8(uint8_t **dst, const ptrdiff_t stride, in h_store_32x8() argument
1730 *dst += stride - 16; in h_store_32x8()
1734 *dst += stride - 16; in h_store_32x8()
1738 *dst += stride - 16; in h_store_32x8()
1742 *dst += stride - 16; in h_store_32x8()
1746 *dst += stride - 16; in h_store_32x8()
1750 *dst += stride - 16; in h_store_32x8()
1754 *dst += stride - 16; in h_store_32x8()
1758 *dst += stride - 16; in h_store_32x8()
1761 void vpx_h_predictor_32x32_neon(uint8_t *dst, ptrdiff_t stride, in vpx_h_predictor_32x32_neon() argument
1768 h_store_32x8(&dst, stride, vget_low_u8(left_u8)); in vpx_h_predictor_32x32_neon()
1769 h_store_32x8(&dst, stride, vget_high_u8(left_u8)); in vpx_h_predictor_32x32_neon()
1779 void vpx_tm_predictor_4x4_neon(uint8_t *dst, ptrdiff_t stride, in vpx_tm_predictor_4x4_neon() argument
1797 dst += stride; in vpx_tm_predictor_4x4_neon()
1799 dst += stride; in vpx_tm_predictor_4x4_neon()
1805 dst += stride; in vpx_tm_predictor_4x4_neon()
1809 static INLINE void tm_8_kernel(uint8_t **dst, const ptrdiff_t stride, in tm_8_kernel() argument
1814 *dst += stride; in tm_8_kernel()
1817 void vpx_tm_predictor_8x8_neon(uint8_t *dst, ptrdiff_t stride, in vpx_tm_predictor_8x8_neon() argument
1831 tm_8_kernel(&dst, stride, left_dup, sub); in vpx_tm_predictor_8x8_neon()
1833 tm_8_kernel(&dst, stride, left_dup, sub); in vpx_tm_predictor_8x8_neon()
1835 tm_8_kernel(&dst, stride, left_dup, sub); in vpx_tm_predictor_8x8_neon()
1837 tm_8_kernel(&dst, stride, left_dup, sub); in vpx_tm_predictor_8x8_neon()
1841 static INLINE void tm_16_kernel(uint8_t **dst, const ptrdiff_t stride, in tm_16_kernel() argument
1851 *dst += stride - 8; in tm_16_kernel()
1854 void vpx_tm_predictor_16x16_neon(uint8_t *dst, ptrdiff_t stride, in vpx_tm_predictor_16x16_neon() argument
1872 tm_16_kernel(&dst, stride, left_dup, sub0, sub1); in vpx_tm_predictor_16x16_neon()
1874 tm_16_kernel(&dst, stride, left_dup, sub0, sub1); in vpx_tm_predictor_16x16_neon()
1876 tm_16_kernel(&dst, stride, left_dup, sub0, sub1); in vpx_tm_predictor_16x16_neon()
1878 tm_16_kernel(&dst, stride, left_dup, sub0, sub1); in vpx_tm_predictor_16x16_neon()
1881 tm_16_kernel(&dst, stride, left_dup, sub0, sub1); in vpx_tm_predictor_16x16_neon()
1883 tm_16_kernel(&dst, stride, left_dup, sub0, sub1); in vpx_tm_predictor_16x16_neon()
1885 tm_16_kernel(&dst, stride, left_dup, sub0, sub1); in vpx_tm_predictor_16x16_neon()
1887 tm_16_kernel(&dst, stride, left_dup, sub0, sub1); in vpx_tm_predictor_16x16_neon()
1891 static INLINE void tm_32_kernel(uint8_t **dst, const ptrdiff_t stride, in tm_32_kernel() argument
1907 *dst += stride - 16; in tm_32_kernel()
1910 void vpx_tm_predictor_32x32_neon(uint8_t *dst, ptrdiff_t stride, in vpx_tm_predictor_32x32_neon() argument
1932 tm_32_kernel(&dst, stride, left_dup, sub0, sub1, sub2, sub3); in vpx_tm_predictor_32x32_neon()
1934 tm_32_kernel(&dst, stride, left_dup, sub0, sub1, sub2, sub3); in vpx_tm_predictor_32x32_neon()
1936 tm_32_kernel(&dst, stride, left_dup, sub0, sub1, sub2, sub3); in vpx_tm_predictor_32x32_neon()
1938 tm_32_kernel(&dst, stride, left_dup, sub0, sub1, sub2, sub3); in vpx_tm_predictor_32x32_neon()