1 /*
2 * Copyright 2012 The LibYuv Project Authors. All rights reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
11 #include "libyuv/basic_types.h"
12
13 #include "libyuv/compare_row.h"
14
15 #ifdef __cplusplus
16 namespace libyuv {
17 extern "C" {
18 #endif
19
20 // This module is for Mips MMI.
21 #if !defined(LIBYUV_DISABLE_MMI) && defined(_MIPS_ARCH_LOONGSON3A)
22
23 // Hakmem method for hamming distance.
HammingDistance_MMI(const uint8_t * src_a,const uint8_t * src_b,int count)24 uint32_t HammingDistance_MMI(const uint8_t* src_a,
25 const uint8_t* src_b,
26 int count) {
27 uint32_t diff = 0u;
28
29 uint64_t temp = 0, temp1 = 0, ta = 0, tb = 0;
30 uint64_t c1 = 0x5555555555555555;
31 uint64_t c2 = 0x3333333333333333;
32 uint64_t c3 = 0x0f0f0f0f0f0f0f0f;
33 uint32_t c4 = 0x01010101;
34 uint64_t s1 = 1, s2 = 2, s3 = 4;
35 __asm__ volatile(
36 "1: \n\t"
37 "ldc1 %[ta], 0(%[src_a]) \n\t"
38 "ldc1 %[tb], 0(%[src_b]) \n\t"
39 "xor %[temp], %[ta], %[tb] \n\t"
40 "psrlw %[temp1], %[temp], %[s1] \n\t" // temp1=x>>1
41 "and %[temp1], %[temp1], %[c1] \n\t" // temp1&=c1
42 "psubw %[temp1], %[temp], %[temp1] \n\t" // x-temp1
43 "and %[temp], %[temp1], %[c2] \n\t" // t = (u&c2)
44 "psrlw %[temp1], %[temp1], %[s2] \n\t" // u>>2
45 "and %[temp1], %[temp1], %[c2] \n\t" // u>>2 & c2
46 "paddw %[temp1], %[temp1], %[temp] \n\t" // t1 = t1+t
47 "psrlw %[temp], %[temp1], %[s3] \n\t" // u>>4
48 "paddw %[temp1], %[temp1], %[temp] \n\t" // u+(u>>4)
49 "and %[temp1], %[temp1], %[c3] \n\t" //&c3
50 "dmfc1 $t0, %[temp1] \n\t"
51 "dsrl32 $t0, $t0, 0 \n\t "
52 "mul $t0, $t0, %[c4] \n\t"
53 "dsrl $t0, $t0, 24 \n\t"
54 "dadd %[diff], %[diff], $t0 \n\t"
55 "dmfc1 $t0, %[temp1] \n\t"
56 "mul $t0, $t0, %[c4] \n\t"
57 "dsrl $t0, $t0, 24 \n\t"
58 "dadd %[diff], %[diff], $t0 \n\t"
59 "daddiu %[src_a], %[src_a], 8 \n\t"
60 "daddiu %[src_b], %[src_b], 8 \n\t"
61 "addiu %[count], %[count], -8 \n\t"
62 "bgtz %[count], 1b \n\t"
63 "nop \n\t"
64 : [diff] "+r"(diff), [src_a] "+r"(src_a), [src_b] "+r"(src_b),
65 [count] "+r"(count), [ta] "+f"(ta), [tb] "+f"(tb), [temp] "+f"(temp),
66 [temp1] "+f"(temp1)
67 : [c1] "f"(c1), [c2] "f"(c2), [c3] "f"(c3), [c4] "r"(c4), [s1] "f"(s1),
68 [s2] "f"(s2), [s3] "f"(s3)
69 : "memory");
70 return diff;
71 }
72
SumSquareError_MMI(const uint8_t * src_a,const uint8_t * src_b,int count)73 uint32_t SumSquareError_MMI(const uint8_t* src_a,
74 const uint8_t* src_b,
75 int count) {
76 uint32_t sse = 0u;
77 uint32_t sse_hi = 0u, sse_lo = 0u;
78
79 uint64_t src1, src2;
80 uint64_t diff, diff_hi, diff_lo;
81 uint64_t sse_sum, sse_tmp;
82
83 const uint64_t mask = 0x0ULL;
84
85 __asm__ volatile(
86 "xor %[sse_sum], %[sse_sum], %[sse_sum] \n\t"
87
88 "1: \n\t"
89 "ldc1 %[src1], 0x00(%[src_a]) \n\t"
90 "ldc1 %[src2], 0x00(%[src_b]) \n\t"
91 "pasubub %[diff], %[src1], %[src2] \n\t"
92 "punpcklbh %[diff_lo], %[diff], %[mask] \n\t"
93 "punpckhbh %[diff_hi], %[diff], %[mask] \n\t"
94 "pmaddhw %[sse_tmp], %[diff_lo], %[diff_lo] \n\t"
95 "paddw %[sse_sum], %[sse_sum], %[sse_tmp] \n\t"
96 "pmaddhw %[sse_tmp], %[diff_hi], %[diff_hi] \n\t"
97 "paddw %[sse_sum], %[sse_sum], %[sse_tmp] \n\t"
98
99 "daddiu %[src_a], %[src_a], 0x08 \n\t"
100 "daddiu %[src_b], %[src_b], 0x08 \n\t"
101 "daddiu %[count], %[count], -0x08 \n\t"
102 "bnez %[count], 1b \n\t"
103
104 "mfc1 %[sse_lo], %[sse_sum] \n\t"
105 "mfhc1 %[sse_hi], %[sse_sum] \n\t"
106 "daddu %[sse], %[sse_hi], %[sse_lo] \n\t"
107 : [sse] "+&r"(sse), [diff] "=&f"(diff), [src1] "=&f"(src1),
108 [src2] "=&f"(src2), [diff_lo] "=&f"(diff_lo), [diff_hi] "=&f"(diff_hi),
109 [sse_sum] "=&f"(sse_sum), [sse_tmp] "=&f"(sse_tmp),
110 [sse_hi] "+&r"(sse_hi), [sse_lo] "+&r"(sse_lo)
111 : [src_a] "r"(src_a), [src_b] "r"(src_b), [count] "r"(count),
112 [mask] "f"(mask)
113 : "memory");
114
115 return sse;
116 }
117
118 #endif // !defined(LIBYUV_DISABLE_MMI) && defined(_MIPS_ARCH_LOONGSON3A)
119
120 #ifdef __cplusplus
121 } // extern "C"
122 } // namespace libyuv
123 #endif
124