• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  *  Copyright 2012 The LibYuv Project Authors. All rights reserved.
3  *
4  *  Use of this source code is governed by a BSD-style license
5  *  that can be found in the LICENSE file in the root of the source
6  *  tree. An additional intellectual property rights grant can be found
7  *  in the file PATENTS. All contributing project authors may
8  *  be found in the AUTHORS file in the root of the source tree.
9  */
10 
11 #include "libyuv/basic_types.h"
12 
13 #include "libyuv/compare_row.h"
14 #include "libyuv/row.h"
15 
16 #ifdef __cplusplus
17 namespace libyuv {
18 extern "C" {
19 #endif
20 
21 // This module is for GCC x86 and x64.
22 #if !defined(LIBYUV_DISABLE_X86) && \
23     (defined(__x86_64__) || (defined(__i386__) && !defined(_MSC_VER)))
24 
SumSquareError_SSE2(const uint8 * src_a,const uint8 * src_b,int count)25 uint32 SumSquareError_SSE2(const uint8* src_a, const uint8* src_b, int count) {
26   uint32 sse;
27   asm volatile (
28     "pxor      %%xmm0,%%xmm0                   \n"
29     "pxor      %%xmm5,%%xmm5                   \n"
30     LABELALIGN
31   "1:                                          \n"
32     "movdqu    " MEMACCESS(0) ",%%xmm1         \n"
33     "lea       " MEMLEA(0x10, 0) ",%0          \n"
34     "movdqu    " MEMACCESS(1) ",%%xmm2         \n"
35     "lea       " MEMLEA(0x10, 1) ",%1          \n"
36     "movdqa    %%xmm1,%%xmm3                   \n"
37     "psubusb   %%xmm2,%%xmm1                   \n"
38     "psubusb   %%xmm3,%%xmm2                   \n"
39     "por       %%xmm2,%%xmm1                   \n"
40     "movdqa    %%xmm1,%%xmm2                   \n"
41     "punpcklbw %%xmm5,%%xmm1                   \n"
42     "punpckhbw %%xmm5,%%xmm2                   \n"
43     "pmaddwd   %%xmm1,%%xmm1                   \n"
44     "pmaddwd   %%xmm2,%%xmm2                   \n"
45     "paddd     %%xmm1,%%xmm0                   \n"
46     "paddd     %%xmm2,%%xmm0                   \n"
47     "sub       $0x10,%2                        \n"
48     "jg        1b                              \n"
49 
50     "pshufd    $0xee,%%xmm0,%%xmm1             \n"
51     "paddd     %%xmm1,%%xmm0                   \n"
52     "pshufd    $0x1,%%xmm0,%%xmm1              \n"
53     "paddd     %%xmm1,%%xmm0                   \n"
54     "movd      %%xmm0,%3                       \n"
55 
56   : "+r"(src_a),      // %0
57     "+r"(src_b),      // %1
58     "+r"(count),      // %2
59     "=g"(sse)         // %3
60   :: "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm5"
61   );
62   return sse;
63 }
64 
65 static uvec32 kHash16x33 = {0x92d9e201, 0, 0, 0};  // 33 ^ 16
66 static uvec32 kHashMul0 = {
67     0x0c3525e1,  // 33 ^ 15
68     0xa3476dc1,  // 33 ^ 14
69     0x3b4039a1,  // 33 ^ 13
70     0x4f5f0981,  // 33 ^ 12
71 };
72 static uvec32 kHashMul1 = {
73     0x30f35d61,  // 33 ^ 11
74     0x855cb541,  // 33 ^ 10
75     0x040a9121,  // 33 ^ 9
76     0x747c7101,  // 33 ^ 8
77 };
78 static uvec32 kHashMul2 = {
79     0xec41d4e1,  // 33 ^ 7
80     0x4cfa3cc1,  // 33 ^ 6
81     0x025528a1,  // 33 ^ 5
82     0x00121881,  // 33 ^ 4
83 };
84 static uvec32 kHashMul3 = {
85     0x00008c61,  // 33 ^ 3
86     0x00000441,  // 33 ^ 2
87     0x00000021,  // 33 ^ 1
88     0x00000001,  // 33 ^ 0
89 };
90 
HashDjb2_SSE41(const uint8 * src,int count,uint32 seed)91 uint32 HashDjb2_SSE41(const uint8* src, int count, uint32 seed) {
92   uint32 hash;
93   asm volatile (
94     "movd      %2,%%xmm0                       \n"
95     "pxor      %%xmm7,%%xmm7                   \n"
96     "movdqa    %4,%%xmm6                       \n"
97     LABELALIGN
98   "1:                                          \n"
99     "movdqu    " MEMACCESS(0) ",%%xmm1         \n"
100     "lea       " MEMLEA(0x10, 0) ",%0          \n"
101     "pmulld    %%xmm6,%%xmm0                   \n"
102     "movdqa    %5,%%xmm5                       \n"
103     "movdqa    %%xmm1,%%xmm2                   \n"
104     "punpcklbw %%xmm7,%%xmm2                   \n"
105     "movdqa    %%xmm2,%%xmm3                   \n"
106     "punpcklwd %%xmm7,%%xmm3                   \n"
107     "pmulld    %%xmm5,%%xmm3                   \n"
108     "movdqa    %6,%%xmm5                       \n"
109     "movdqa    %%xmm2,%%xmm4                   \n"
110     "punpckhwd %%xmm7,%%xmm4                   \n"
111     "pmulld    %%xmm5,%%xmm4                   \n"
112     "movdqa    %7,%%xmm5                       \n"
113     "punpckhbw %%xmm7,%%xmm1                   \n"
114     "movdqa    %%xmm1,%%xmm2                   \n"
115     "punpcklwd %%xmm7,%%xmm2                   \n"
116     "pmulld    %%xmm5,%%xmm2                   \n"
117     "movdqa    %8,%%xmm5                       \n"
118     "punpckhwd %%xmm7,%%xmm1                   \n"
119     "pmulld    %%xmm5,%%xmm1                   \n"
120     "paddd     %%xmm4,%%xmm3                   \n"
121     "paddd     %%xmm2,%%xmm1                   \n"
122     "paddd     %%xmm3,%%xmm1                   \n"
123     "pshufd    $0xe,%%xmm1,%%xmm2              \n"
124     "paddd     %%xmm2,%%xmm1                   \n"
125     "pshufd    $0x1,%%xmm1,%%xmm2              \n"
126     "paddd     %%xmm2,%%xmm1                   \n"
127     "paddd     %%xmm1,%%xmm0                   \n"
128     "sub       $0x10,%1                        \n"
129     "jg        1b                              \n"
130     "movd      %%xmm0,%3                       \n"
131   : "+r"(src),        // %0
132     "+r"(count),      // %1
133     "+rm"(seed),      // %2
134     "=g"(hash)        // %3
135   : "m"(kHash16x33),  // %4
136     "m"(kHashMul0),   // %5
137     "m"(kHashMul1),   // %6
138     "m"(kHashMul2),   // %7
139     "m"(kHashMul3)    // %8
140   : "memory", "cc"
141     , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7"
142   );
143   return hash;
144 }
145 #endif  // defined(__x86_64__) || (defined(__i386__) && !defined(__pic__)))
146 
147 #ifdef __cplusplus
148 }  // extern "C"
149 }  // namespace libyuv
150 #endif
151