• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1/*
2 * Copyright © 2023, VideoLAN and dav1d authors
3 * Copyright © 2023, Loongson Technology Corporation Limited
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions are met:
8 *
9 * 1. Redistributions of source code must retain the above copyright notice, this
10 *    list of conditions and the following disclaimer.
11 *
12 * 2. Redistributions in binary form must reproduce the above copyright notice,
13 *    this list of conditions and the following disclaimer in the documentation
14 *    and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
20 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 */
27
28#include "src/loongarch/loongson_asm.S"
29
30/*
31static void splat_mv_c(refmvs_block **rr, const refmvs_block *const rmv,
32                       const int bx4, const int bw4, int bh4)
33*/
34
35function splat_mv_lsx
36    vld           vr0,      a1,       0          // 0 1 ... 11 ...
37    clz.w         t4,       a3
38    vaddi.bu      vr1,      vr0,      0
39    addi.w        t4,       t4,       -26
40    vextrins.w    vr1,      vr0,      0x30       // 0 1 2 ... 11 0 1 2 3
41    la.local      t5,       .SPLAT_LSX_JRTABLE
42    vbsrl.v       vr2,      vr1,      4          // 4 5 6 7...11 0 1 2 3 0 0 0 0
43    alsl.d        t6,       t4,       t5,     1
44    vextrins.w    vr2,      vr0,      0x31       // 4 5 6 7...11 0 1 2 3 4 5 6 7
45    ld.h          t7,       t6,       0
46    vbsrl.v       vr3,      vr2,      4          // 8 9 10 11 0 1 2 3 4 5 6 7 0 0 0 0
47    add.d         t8,       t5,       t7
48    alsl.d        a2,       a2,       a2,     1
49    vextrins.w    vr3,      vr0,      0x32       // 8 9 10 11 0 1 2 3 4 5 6 7 8 9 10 11
50    slli.w        a2,       a2,       2
51    jirl          $r0,      t8,       0
52
53.SPLAT_LSX_JRTABLE:
54    .hword .SPLAT_W32_LSX - .SPLAT_LSX_JRTABLE
55    .hword .SPLAT_W16_LSX - .SPLAT_LSX_JRTABLE
56    .hword .SPLAT_W8_LSX  - .SPLAT_LSX_JRTABLE
57    .hword .SPLAT_W4_LSX  - .SPLAT_LSX_JRTABLE
58    .hword .SPLAT_W2_LSX  - .SPLAT_LSX_JRTABLE
59    .hword .SPLAT_W1_LSX  - .SPLAT_LSX_JRTABLE
60
61.SPLAT_W1_LSX:
62    ld.d          t3,       a0,       0
63    addi.d        a0,       a0,       8
64    addi.d        a4,       a4,       -1
65    add.d         t3,       t3,       a2
66
67    fst.d         f1,       t3,       0
68    fst.s         f3,       t3,       8
69    blt           zero,     a4,       .SPLAT_W1_LSX
70    b             .splat_end
71.SPLAT_W2_LSX:
72    ld.d          t3,       a0,       0
73    addi.d        a0,       a0,       8
74    addi.d        a4,       a4,       -1
75    add.d         t3,       t3,       a2
76
77    vst           vr1,      t3,       0
78    fst.d         f2,       t3,       16
79    blt           zero,     a4,       .SPLAT_W2_LSX
80    b             .splat_end
81
82.SPLAT_W4_LSX:
83    ld.d          t3,       a0,       0
84    addi.d        a0,       a0,       8
85    addi.d        a4,       a4,       -1
86    add.d         t3,       t3,       a2
87
88    vst           vr1,      t3,       0
89    vst           vr2,      t3,       16
90    vst           vr3,      t3,       32
91    blt           zero,     a4,       .SPLAT_W4_LSX
92    b             .splat_end
93
94.SPLAT_W8_LSX:
95    ld.d          t3,       a0,       0
96    addi.d        a0,       a0,       8
97    addi.d        a4,       a4,       -1
98    add.d         t3,       t3,       a2
99
100    vst           vr1,      t3,       0
101    vst           vr2,      t3,       16
102    vst           vr3,      t3,       32
103
104    vst           vr1,      t3,       48
105    vst           vr2,      t3,       64
106    vst           vr3,      t3,       80
107    blt           zero,     a4,       .SPLAT_W8_LSX
108    b             .splat_end
109
110.SPLAT_W16_LSX:
111    ld.d          t3,       a0,       0
112    addi.d        a0,       a0,       8
113    addi.d        a4,       a4,       -1
114    add.d         t3,       t3,       a2
115
116.rept 2
117    vst           vr1,      t3,       0
118    vst           vr2,      t3,       16
119    vst           vr3,      t3,       32
120
121    vst           vr1,      t3,       48
122    vst           vr2,      t3,       64
123    vst           vr3,      t3,       80
124
125    addi.d        t3,       t3,       96
126.endr
127
128    blt           zero,     a4,       .SPLAT_W16_LSX
129    b             .splat_end
130
131.SPLAT_W32_LSX:
132    ld.d          t3,       a0,       0
133    addi.d        a0,       a0,       8
134    addi.d        a4,       a4,       -1
135    add.d         t3,       t3,       a2
136
137.rept 4
138    vst           vr1,      t3,       0
139    vst           vr2,      t3,       16
140    vst           vr3,      t3,       32
141
142    vst           vr1,      t3,       48
143    vst           vr2,      t3,       64
144    vst           vr3,      t3,       80
145
146    addi.d        t3,       t3,       96
147.endr
148
149    blt           zero,     a4,       .SPLAT_W32_LSX
150
151.splat_end:
152endfunc
153