• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  *  Copyright (c) 2018 The WebM project authors. All Rights Reserved.
3  *
4  *  Use of this source code is governed by a BSD-style license
5  *  that can be found in the LICENSE file in the root of the source
6  *  tree. An additional intellectual property rights grant can be found
7  *  in the file PATENTS.  All contributing project authors may
8  *  be found in the AUTHORS file in the root of the source tree.
9  */
10 
11 #include <assert.h>
12 
13 #include "./vpx_config.h"
14 #include "vpx/vpx_integer.h"
15 #include "vpx_dsp/ppc/types_vsx.h"
16 
subtract_block4x4(int16_t * diff,ptrdiff_t diff_stride,const uint8_t * src,ptrdiff_t src_stride,const uint8_t * pred,ptrdiff_t pred_stride)17 static VPX_FORCE_INLINE void subtract_block4x4(
18     int16_t *diff, ptrdiff_t diff_stride, const uint8_t *src,
19     ptrdiff_t src_stride, const uint8_t *pred, ptrdiff_t pred_stride) {
20   int16_t *diff1 = diff + 2 * diff_stride;
21   const uint8_t *src1 = src + 2 * src_stride;
22   const uint8_t *pred1 = pred + 2 * pred_stride;
23 
24   const int16x8_t d0 = vec_vsx_ld(0, diff);
25   const int16x8_t d1 = vec_vsx_ld(0, diff + diff_stride);
26   const int16x8_t d2 = vec_vsx_ld(0, diff1);
27   const int16x8_t d3 = vec_vsx_ld(0, diff1 + diff_stride);
28 
29   const uint8x16_t s0 = read4x2(src, (int)src_stride);
30   const uint8x16_t p0 = read4x2(pred, (int)pred_stride);
31   const uint8x16_t s1 = read4x2(src1, (int)src_stride);
32   const uint8x16_t p1 = read4x2(pred1, (int)pred_stride);
33 
34   const int16x8_t da = vec_sub(unpack_to_s16_h(s0), unpack_to_s16_h(p0));
35   const int16x8_t db = vec_sub(unpack_to_s16_h(s1), unpack_to_s16_h(p1));
36 
37   vec_vsx_st(xxpermdi(da, d0, 1), 0, diff);
38   vec_vsx_st(xxpermdi(da, d1, 3), 0, diff + diff_stride);
39   vec_vsx_st(xxpermdi(db, d2, 1), 0, diff1);
40   vec_vsx_st(xxpermdi(db, d3, 3), 0, diff1 + diff_stride);
41 }
42 
vpx_subtract_block_vsx(int rows,int cols,int16_t * diff,ptrdiff_t diff_stride,const uint8_t * src,ptrdiff_t src_stride,const uint8_t * pred,ptrdiff_t pred_stride)43 void vpx_subtract_block_vsx(int rows, int cols, int16_t *diff,
44                             ptrdiff_t diff_stride, const uint8_t *src,
45                             ptrdiff_t src_stride, const uint8_t *pred,
46                             ptrdiff_t pred_stride) {
47   int r = rows, c;
48 
49   switch (cols) {
50     case 64:
51     case 32:
52       do {
53         for (c = 0; c < cols; c += 32) {
54           const uint8x16_t s0 = vec_vsx_ld(0, src + c);
55           const uint8x16_t s1 = vec_vsx_ld(16, src + c);
56           const uint8x16_t p0 = vec_vsx_ld(0, pred + c);
57           const uint8x16_t p1 = vec_vsx_ld(16, pred + c);
58           const int16x8_t d0l =
59               vec_sub(unpack_to_s16_l(s0), unpack_to_s16_l(p0));
60           const int16x8_t d0h =
61               vec_sub(unpack_to_s16_h(s0), unpack_to_s16_h(p0));
62           const int16x8_t d1l =
63               vec_sub(unpack_to_s16_l(s1), unpack_to_s16_l(p1));
64           const int16x8_t d1h =
65               vec_sub(unpack_to_s16_h(s1), unpack_to_s16_h(p1));
66           vec_vsx_st(d0h, 0, diff + c);
67           vec_vsx_st(d0l, 16, diff + c);
68           vec_vsx_st(d1h, 0, diff + c + 16);
69           vec_vsx_st(d1l, 16, diff + c + 16);
70         }
71         diff += diff_stride;
72         pred += pred_stride;
73         src += src_stride;
74       } while (--r);
75       break;
76     case 16:
77       do {
78         const uint8x16_t s0 = vec_vsx_ld(0, src);
79         const uint8x16_t p0 = vec_vsx_ld(0, pred);
80         const int16x8_t d0l = vec_sub(unpack_to_s16_l(s0), unpack_to_s16_l(p0));
81         const int16x8_t d0h = vec_sub(unpack_to_s16_h(s0), unpack_to_s16_h(p0));
82         vec_vsx_st(d0h, 0, diff);
83         vec_vsx_st(d0l, 16, diff);
84         diff += diff_stride;
85         pred += pred_stride;
86         src += src_stride;
87       } while (--r);
88       break;
89     case 8:
90       do {
91         const uint8x16_t s0 = vec_vsx_ld(0, src);
92         const uint8x16_t p0 = vec_vsx_ld(0, pred);
93         const int16x8_t d0h = vec_sub(unpack_to_s16_h(s0), unpack_to_s16_h(p0));
94         vec_vsx_st(d0h, 0, diff);
95         diff += diff_stride;
96         pred += pred_stride;
97         src += src_stride;
98       } while (--r);
99       break;
100     case 4:
101       subtract_block4x4(diff, diff_stride, src, src_stride, pred, pred_stride);
102       if (r > 4) {
103         diff += 4 * diff_stride;
104         pred += 4 * pred_stride;
105         src += 4 * src_stride;
106 
107         subtract_block4x4(diff, diff_stride,
108 
109                           src, src_stride,
110 
111                           pred, pred_stride);
112       }
113       break;
114     default: assert(0);  // unreachable
115   }
116 }
117