1 /*
2 * Copyright (c) 2018 The WebM project authors. All Rights Reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
11 #include <assert.h>
12
13 #include "./vpx_config.h"
14 #include "vpx/vpx_integer.h"
15 #include "vpx_dsp/ppc/types_vsx.h"
16
subtract_block4x4(int16_t * diff,ptrdiff_t diff_stride,const uint8_t * src,ptrdiff_t src_stride,const uint8_t * pred,ptrdiff_t pred_stride)17 static VPX_FORCE_INLINE void subtract_block4x4(
18 int16_t *diff, ptrdiff_t diff_stride, const uint8_t *src,
19 ptrdiff_t src_stride, const uint8_t *pred, ptrdiff_t pred_stride) {
20 int16_t *diff1 = diff + 2 * diff_stride;
21 const uint8_t *src1 = src + 2 * src_stride;
22 const uint8_t *pred1 = pred + 2 * pred_stride;
23
24 const int16x8_t d0 = vec_vsx_ld(0, diff);
25 const int16x8_t d1 = vec_vsx_ld(0, diff + diff_stride);
26 const int16x8_t d2 = vec_vsx_ld(0, diff1);
27 const int16x8_t d3 = vec_vsx_ld(0, diff1 + diff_stride);
28
29 const uint8x16_t s0 = read4x2(src, (int)src_stride);
30 const uint8x16_t p0 = read4x2(pred, (int)pred_stride);
31 const uint8x16_t s1 = read4x2(src1, (int)src_stride);
32 const uint8x16_t p1 = read4x2(pred1, (int)pred_stride);
33
34 const int16x8_t da = vec_sub(unpack_to_s16_h(s0), unpack_to_s16_h(p0));
35 const int16x8_t db = vec_sub(unpack_to_s16_h(s1), unpack_to_s16_h(p1));
36
37 vec_vsx_st(xxpermdi(da, d0, 1), 0, diff);
38 vec_vsx_st(xxpermdi(da, d1, 3), 0, diff + diff_stride);
39 vec_vsx_st(xxpermdi(db, d2, 1), 0, diff1);
40 vec_vsx_st(xxpermdi(db, d3, 3), 0, diff1 + diff_stride);
41 }
42
vpx_subtract_block_vsx(int rows,int cols,int16_t * diff,ptrdiff_t diff_stride,const uint8_t * src,ptrdiff_t src_stride,const uint8_t * pred,ptrdiff_t pred_stride)43 void vpx_subtract_block_vsx(int rows, int cols, int16_t *diff,
44 ptrdiff_t diff_stride, const uint8_t *src,
45 ptrdiff_t src_stride, const uint8_t *pred,
46 ptrdiff_t pred_stride) {
47 int r = rows, c;
48
49 switch (cols) {
50 case 64:
51 case 32:
52 do {
53 for (c = 0; c < cols; c += 32) {
54 const uint8x16_t s0 = vec_vsx_ld(0, src + c);
55 const uint8x16_t s1 = vec_vsx_ld(16, src + c);
56 const uint8x16_t p0 = vec_vsx_ld(0, pred + c);
57 const uint8x16_t p1 = vec_vsx_ld(16, pred + c);
58 const int16x8_t d0l =
59 vec_sub(unpack_to_s16_l(s0), unpack_to_s16_l(p0));
60 const int16x8_t d0h =
61 vec_sub(unpack_to_s16_h(s0), unpack_to_s16_h(p0));
62 const int16x8_t d1l =
63 vec_sub(unpack_to_s16_l(s1), unpack_to_s16_l(p1));
64 const int16x8_t d1h =
65 vec_sub(unpack_to_s16_h(s1), unpack_to_s16_h(p1));
66 vec_vsx_st(d0h, 0, diff + c);
67 vec_vsx_st(d0l, 16, diff + c);
68 vec_vsx_st(d1h, 0, diff + c + 16);
69 vec_vsx_st(d1l, 16, diff + c + 16);
70 }
71 diff += diff_stride;
72 pred += pred_stride;
73 src += src_stride;
74 } while (--r);
75 break;
76 case 16:
77 do {
78 const uint8x16_t s0 = vec_vsx_ld(0, src);
79 const uint8x16_t p0 = vec_vsx_ld(0, pred);
80 const int16x8_t d0l = vec_sub(unpack_to_s16_l(s0), unpack_to_s16_l(p0));
81 const int16x8_t d0h = vec_sub(unpack_to_s16_h(s0), unpack_to_s16_h(p0));
82 vec_vsx_st(d0h, 0, diff);
83 vec_vsx_st(d0l, 16, diff);
84 diff += diff_stride;
85 pred += pred_stride;
86 src += src_stride;
87 } while (--r);
88 break;
89 case 8:
90 do {
91 const uint8x16_t s0 = vec_vsx_ld(0, src);
92 const uint8x16_t p0 = vec_vsx_ld(0, pred);
93 const int16x8_t d0h = vec_sub(unpack_to_s16_h(s0), unpack_to_s16_h(p0));
94 vec_vsx_st(d0h, 0, diff);
95 diff += diff_stride;
96 pred += pred_stride;
97 src += src_stride;
98 } while (--r);
99 break;
100 case 4:
101 subtract_block4x4(diff, diff_stride, src, src_stride, pred, pred_stride);
102 if (r > 4) {
103 diff += 4 * diff_stride;
104 pred += 4 * pred_stride;
105 src += 4 * src_stride;
106
107 subtract_block4x4(diff, diff_stride,
108
109 src, src_stride,
110
111 pred, pred_stride);
112 }
113 break;
114 default: assert(0); // unreachable
115 }
116 }
117