1 /*!
2 **********************************************************************************
3 * Copyright (c) 2022 Loongson Technology Corporation Limited
4 * Contributed by Lu Wang <wanglu@loongson.cn>
5 *
6 * \copy
7 * Copyright (c) 2009-2013, Cisco Systems
8 * All rights reserved.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 *
14 * * Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 *
17 * * Redistributions in binary form must reproduce the above copyright
18 * notice, this list of conditions and the following disclaimer in
19 * the documentation and/or other materials provided with the
20 * distribution.
21 *
22 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
23 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
24 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
25 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
26 * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
27 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
28 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
29 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
30 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
32 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
33 * POSSIBILITY OF SUCH DAMAGE.
34 *
35 * \file svc_motion_estimate_lsx.c
36 *
37 * \brief Loongson optimization
38 *
39 * \date 13/10/2022 Created
40 *
41 **********************************************************************************
42 */
43
44 #include <stdint.h>
45 #include "loongson_intrinsics.h"
46
SumOf8x8SingleBlock_lsx(uint8_t * pRef,const int32_t kiRefStride)47 int32_t SumOf8x8SingleBlock_lsx (uint8_t* pRef, const int32_t kiRefStride) {
48 __m128i vec_pRef0, vec_pRef1, vec_pRef2, vec_pRef3;
49 __m128i vec_pRef4, vec_pRef5, vec_pRef6, vec_pRef7;
50
51 int32_t iSum;
52 int32_t kiRefStride_x2 = kiRefStride << 1;
53 int32_t kiRefStride_x3 = kiRefStride_x2 + kiRefStride;
54 int32_t kiRefStride_x4 = kiRefStride << 2;
55
56 vec_pRef0 = __lsx_vld(pRef, 0);
57 vec_pRef1 = __lsx_vldx(pRef, kiRefStride);
58 vec_pRef2 = __lsx_vldx(pRef, kiRefStride_x2);
59 vec_pRef3 = __lsx_vldx(pRef, kiRefStride_x3);
60 pRef += kiRefStride_x4;
61 vec_pRef4 = __lsx_vld(pRef, 0);
62 vec_pRef5 = __lsx_vldx(pRef, kiRefStride);
63 vec_pRef6 = __lsx_vldx(pRef, kiRefStride_x2);
64 vec_pRef7 = __lsx_vldx(pRef, kiRefStride_x3);
65
66 vec_pRef0 = __lsx_vilvl_d(vec_pRef1, vec_pRef0);
67 vec_pRef2 = __lsx_vilvl_d(vec_pRef3, vec_pRef2);
68 vec_pRef4 = __lsx_vilvl_d(vec_pRef5, vec_pRef4);
69 vec_pRef6 = __lsx_vilvl_d(vec_pRef7, vec_pRef6);
70
71 vec_pRef0 = __lsx_vhaddw_hu_bu(vec_pRef0, vec_pRef0);
72 vec_pRef2 = __lsx_vhaddw_hu_bu(vec_pRef2, vec_pRef2);
73 vec_pRef4 = __lsx_vhaddw_hu_bu(vec_pRef4, vec_pRef4);
74 vec_pRef6 = __lsx_vhaddw_hu_bu(vec_pRef6, vec_pRef6);
75
76 vec_pRef0 = __lsx_vadd_h(vec_pRef0, vec_pRef2);
77 vec_pRef4 = __lsx_vadd_h(vec_pRef4, vec_pRef6);
78 vec_pRef0 = __lsx_vadd_h(vec_pRef0, vec_pRef4);
79 vec_pRef1 = __lsx_vhaddw_wu_hu(vec_pRef0, vec_pRef0);
80 vec_pRef2 = __lsx_vhaddw_du_wu(vec_pRef1, vec_pRef1);
81 vec_pRef0 = __lsx_vhaddw_qu_du(vec_pRef2, vec_pRef2);
82
83 iSum = __lsx_vpickve2gr_w(vec_pRef0, 0);
84 return iSum;
85 }
86
SumOf8x8BlockOfFrame_lsx(uint8_t * pRefPicture,const int32_t kiWidth,const int32_t kiHeight,const int32_t kiRefStride,uint16_t * pFeatureOfBlock,uint32_t pTimesOfFeatureValue[])87 void SumOf8x8BlockOfFrame_lsx(uint8_t* pRefPicture, const int32_t kiWidth,
88 const int32_t kiHeight, const int32_t kiRefStride,
89 uint16_t* pFeatureOfBlock, uint32_t pTimesOfFeatureValue[]) {
90 int32_t x, y;
91 uint8_t* pRef;
92 uint16_t* pBuffer;
93 int32_t iSum;
94 for (y = 0; y < kiHeight; y++) {
95 pRef = pRefPicture + kiRefStride * y;
96 pBuffer = pFeatureOfBlock + kiWidth * y;
97 for (x = 0; x < kiWidth; x++) {
98 iSum = SumOf8x8SingleBlock_lsx(pRef + x, kiRefStride);
99
100 pBuffer[x] = iSum;
101 pTimesOfFeatureValue[iSum]++;
102 }
103 }
104 }
105