• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1/*
2 * This file is part of the openHiTLS project.
3 *
4 * openHiTLS is licensed under the Mulan PSL v2.
5 * You can use this software according to the terms and conditions of the Mulan PSL v2.
6 * You may obtain a copy of Mulan PSL v2 at:
7 *
8 *     http://license.coscl.org.cn/MulanPSL2
9 *
10 * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
11 * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
12 * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
13 * See the Mulan PSL v2 for more details.
14 */
15
16#include "hitls_build.h"
17#ifdef HITLS_CRYPTO_CHACHA20
18
19.text
20
21.macro CHA256_SET_VDATA
22    mov VREG01.16b, VSIGMA.16b
23    mov VREG11.16b, VSIGMA.16b
24    mov VREG21.16b, VSIGMA.16b
25
26    mov VREG02.16b, VKEY01.16b
27    mov VREG12.16b, VKEY01.16b
28    mov VREG22.16b, VKEY01.16b
29
30    mov VREG03.16b, VKEY02.16b
31    mov VREG13.16b, VKEY02.16b
32    mov VREG23.16b, VKEY02.16b
33
34    mov VREG04.16b, VREG52.16b              // 1
35    mov VREG14.16b, VREG53.16b              // 2
36    mov VREG24.16b, VREG54.16b              // 3
37.endm
38
39.macro CHA256_ROUND_A
40    add WINPUT0, WINPUT0, WINPUT4               // A+B
41    add VREG01.4s, VREG01.4s, VREG02.4s
42    add WINPUT1, WINPUT1, WINPUT5               // A+B
43    add VREG11.4s, VREG11.4s, VREG12.4s
44    add WINPUT2, WINPUT2, WINPUT6               // A+B
45    add VREG21.4s, VREG21.4s, VREG22.4s
46    add WINPUT3, WINPUT3, WINPUT7               // A+B
47    eor VREG04.16b, VREG04.16b, VREG01.16b
48
49    eor WINPUT12, WINPUT12, WINPUT0             // D^A
50    eor VREG14.16b, VREG14.16b, VREG11.16b
51    eor WINPUT13, WINPUT13, WINPUT1             // D^A
52    eor VREG24.16b, VREG24.16b, VREG21.16b
53    eor WINPUT14, WINPUT14, WINPUT2             // D^A
54    rev32 VREG04.8h, VREG04.8h
55    eor WINPUT15, WINPUT15, WINPUT3             // D^A
56    rev32 VREG14.8h, VREG14.8h
57
58    ror WINPUT12, WINPUT12, #16                 // D>>>16
59    rev32 VREG24.8h, VREG24.8h
60    ror WINPUT13, WINPUT13, #16                 // D>>>16
61    add VREG03.4s, VREG03.4s, VREG04.4s
62    ror WINPUT14, WINPUT14, #16                 // D>>>16
63    add VREG13.4s, VREG13.4s, VREG14.4s
64    ror WINPUT15, WINPUT15, #16                 // D>>>16
65    add VREG23.4s, VREG23.4s, VREG24.4s
66
67    add WINPUT8, WINPUT8, WINPUT12              // C+D
68    eor VREG41.16b, VREG03.16b, VREG02.16b
69    add WINPUT9, WINPUT9, WINPUT13              // C+D
70    eor VREG42.16b, VREG13.16b, VREG12.16b
71    add WINPUT10, WINPUT10, WINPUT14            // C+D
72    eor VREG43.16b, VREG23.16b, VREG22.16b
73    add WINPUT11, WINPUT11, WINPUT15            // C+D
74    ushr VREG02.4s, VREG41.4s, #20
75
76    eor WINPUT4, WINPUT4, WINPUT8               // B^C
77    ushr VREG12.4s, VREG42.4s, #20
78    eor WINPUT5, WINPUT5, WINPUT9               // B^C
79    ushr VREG22.4s, VREG43.4s, #20
80    eor WINPUT6, WINPUT6, WINPUT10              // B^C
81    sli VREG02.4s, VREG41.4s, #12
82    eor WINPUT7, WINPUT7, WINPUT11              // B^C
83    sli VREG12.4s, VREG42.4s, #12
84
85    ror WINPUT4, WINPUT4, #20                   // B>>>20
86    sli VREG22.4s, VREG43.4s, #12
87    ror WINPUT5, WINPUT5, #20                   // B>>>20
88    add VREG01.4s, VREG01.4s, VREG02.4s
89    ror WINPUT6, WINPUT6, #20                   // B>>>20
90    add VREG11.4s, VREG11.4s, VREG12.4s
91    ror WINPUT7, WINPUT7, #20                   // B>>>20
92    add VREG21.4s, VREG21.4s, VREG22.4s
93
94    add WINPUT0, WINPUT0, WINPUT4               // A+B
95    eor VREG41.16b, VREG04.16b, VREG01.16b
96    add WINPUT1, WINPUT1, WINPUT5               // A+B
97    eor VREG42.16b, VREG14.16b, VREG11.16b
98    add WINPUT2, WINPUT2, WINPUT6               // A+B
99    eor VREG43.16b, VREG24.16b, VREG21.16b
100    add WINPUT3, WINPUT3, WINPUT7               // A+B
101    ushr VREG04.4s, VREG41.4s, #24
102
103    eor WINPUT12, WINPUT12, WINPUT0             // D^A
104    ushr VREG14.4s, VREG42.4s, #24
105    eor WINPUT13, WINPUT13, WINPUT1             // D^A
106    ushr VREG24.4s, VREG43.4s, #24
107    eor WINPUT14, WINPUT14, WINPUT2             // D^A
108    sli VREG04.4s, VREG41.4s, #8
109    eor WINPUT15, WINPUT15, WINPUT3             // D^A
110    sli VREG14.4s, VREG42.4s, #8
111
112    ror WINPUT12, WINPUT12, #24                 // D>>>24
113    sli VREG24.4s, VREG43.4s, #8
114    ror WINPUT13, WINPUT13, #24                 // D>>>24
115    add VREG03.4s, VREG03.4s, VREG04.4s
116    ror WINPUT14, WINPUT14, #24                 // D>>>24
117    add VREG13.4s, VREG13.4s, VREG14.4s
118    ror WINPUT15, WINPUT15, #24                 // D>>>24
119    add VREG23.4s, VREG23.4s, VREG24.4s
120
121    add WINPUT8, WINPUT8, WINPUT12              // C+D
122    eor VREG41.16b, VREG03.16b, VREG02.16b
123    add WINPUT9, WINPUT9, WINPUT13              // C+D
124    eor VREG42.16b, VREG13.16b, VREG12.16b
125    add WINPUT10, WINPUT10, WINPUT14            // C+D
126    eor VREG43.16b, VREG23.16b, VREG22.16b
127    add WINPUT11, WINPUT11, WINPUT15            // C+D
128    ushr VREG02.4s, VREG41.4s, #25
129
130    eor WINPUT4, WINPUT4, WINPUT8               // B^C
131    ushr VREG12.4s, VREG42.4s, #25
132    eor WINPUT5, WINPUT5, WINPUT9               // B^C
133    ushr VREG22.4s, VREG43.4s, #25
134    eor WINPUT6, WINPUT6, WINPUT10              // B^C
135    sli VREG02.4s, VREG41.4s, #7
136    eor WINPUT7, WINPUT7, WINPUT11              // B^C
137    sli VREG12.4s, VREG42.4s, #7
138
139    ror WINPUT4, WINPUT4, #25                   // B>>>25
140    sli VREG22.4s, VREG43.4s, #7
141    ror WINPUT5, WINPUT5, #25                   // B>>>25
142    ext VREG03.16b, VREG03.16b, VREG03.16b, #8
143    ror WINPUT6, WINPUT6, #25                   // B>>>25
144    ext VREG13.16b, VREG13.16b, VREG13.16b, #8
145    ror WINPUT7, WINPUT7, #25                   // B>>>25
146    ext VREG23.16b, VREG23.16b, VREG23.16b, #8
147.endm
148
149.macro CHA256_ROUND_B
150    add WINPUT0, WINPUT0, WINPUT5               // A+B
151    add VREG01.4s, VREG01.4s, VREG02.4s
152    add WINPUT1, WINPUT1, WINPUT6               // A+B
153    add VREG11.4s, VREG11.4s, VREG12.4s
154    add WINPUT2, WINPUT2, WINPUT7               // A+B
155    add VREG21.4s, VREG21.4s, VREG22.4s
156    add WINPUT3, WINPUT3, WINPUT4               // A+B
157    eor VREG04.16b, VREG04.16b, VREG01.16b
158
159    eor WINPUT15, WINPUT15, WINPUT0             // D^A
160    eor VREG14.16b, VREG14.16b, VREG11.16b
161    eor WINPUT12, WINPUT12, WINPUT1             // D^A
162    eor VREG24.16b, VREG24.16b, VREG21.16b
163    eor WINPUT13, WINPUT13, WINPUT2             // D^A
164    rev32 VREG04.8h, VREG04.8h
165    eor WINPUT14, WINPUT14, WINPUT3             // D^A
166    rev32 VREG14.8h, VREG14.8h
167
168    ror WINPUT12, WINPUT12, #16                 // D>>>16
169    rev32 VREG24.8h, VREG24.8h
170    ror WINPUT13, WINPUT13, #16                 // D>>>16
171    add VREG03.4s, VREG03.4s, VREG04.4s
172    ror WINPUT14, WINPUT14, #16                 // D>>>16
173    add VREG13.4s, VREG13.4s, VREG14.4s
174    ror WINPUT15, WINPUT15, #16                 // D>>>16
175    add VREG23.4s, VREG23.4s, VREG24.4s
176
177    add WINPUT10, WINPUT10, WINPUT15            // C+D
178    eor VREG41.16b, VREG03.16b, VREG02.16b
179    add WINPUT11, WINPUT11, WINPUT12            // C+D
180    eor VREG42.16b, VREG13.16b, VREG12.16b
181    add WINPUT8, WINPUT8, WINPUT13              // C+D
182    eor VREG43.16b, VREG23.16b, VREG22.16b
183    add WINPUT9, WINPUT9, WINPUT14              // C+D
184    ushr VREG02.4s, VREG41.4s, #20
185
186    eor WINPUT5, WINPUT5, WINPUT10              // B^C
187    ushr VREG12.4s, VREG42.4s, #20
188    eor WINPUT6, WINPUT6, WINPUT11              // B^C
189    ushr VREG22.4s, VREG43.4s, #20
190    eor WINPUT7, WINPUT7, WINPUT8               // B^C
191    sli VREG02.4s, VREG41.4s, #12
192    eor WINPUT4, WINPUT4, WINPUT9               // B^C
193    sli VREG12.4s, VREG42.4s, #12
194
195    ror WINPUT4, WINPUT4, #20                   // B>>>20
196    sli VREG22.4s, VREG43.4s, #12
197    ror WINPUT5, WINPUT5, #20                   // B>>>20
198    add VREG01.4s, VREG01.4s, VREG02.4s
199    ror WINPUT6, WINPUT6, #20                   // B>>>20
200    add VREG11.4s, VREG11.4s, VREG12.4s
201    ror WINPUT7, WINPUT7, #20                   // B>>>20
202    add VREG21.4s, VREG21.4s, VREG22.4s
203
204    add WINPUT0, WINPUT0, WINPUT5               // A+B
205    eor VREG41.16b, VREG04.16b, VREG01.16b
206    add WINPUT1, WINPUT1, WINPUT6               // A+B
207    eor VREG42.16b, VREG14.16b, VREG11.16b
208    add WINPUT2, WINPUT2, WINPUT7               // A+B
209    eor VREG43.16b, VREG24.16b, VREG21.16b
210    add WINPUT3, WINPUT3, WINPUT4               // A+B
211    ushr VREG04.4s, VREG41.4s, #24
212
213    eor WINPUT15, WINPUT15, WINPUT0             // D^A
214    ushr VREG14.4s, VREG42.4s, #24
215    eor WINPUT12, WINPUT12, WINPUT1             // D^A
216    ushr VREG24.4s, VREG43.4s, #24
217    eor WINPUT13, WINPUT13, WINPUT2             // D^A
218    sli VREG04.4s, VREG41.4s, #8
219    eor WINPUT14, WINPUT14, WINPUT3             // D^A
220    sli VREG14.4s, VREG42.4s, #8
221
222    ror WINPUT12, WINPUT12, #24                 // D>>>24
223    sli VREG24.4s, VREG43.4s, #8
224    ror WINPUT13, WINPUT13, #24
225    add VREG03.4s, VREG03.4s, VREG04.4s
226    ror WINPUT14, WINPUT14, #24
227    add VREG13.4s, VREG13.4s, VREG14.4s
228    ror WINPUT15, WINPUT15, #24
229    add VREG23.4s, VREG23.4s, VREG24.4s
230
231    add WINPUT10, WINPUT10, WINPUT15            // C+D
232    eor VREG41.16b, VREG03.16b, VREG02.16b
233    add WINPUT11, WINPUT11, WINPUT12            // C+D
234    eor VREG42.16b, VREG13.16b, VREG12.16b
235    add WINPUT8, WINPUT8, WINPUT13              // C+D
236    eor VREG43.16b, VREG23.16b, VREG22.16b
237    add WINPUT9, WINPUT9, WINPUT14              // C+D
238    ushr VREG02.4s, VREG41.4s, #25
239
240    eor WINPUT5, WINPUT5, WINPUT10              // B^C
241    ushr VREG12.4s, VREG42.4s, #25
242    eor WINPUT6, WINPUT6, WINPUT11
243    ushr VREG22.4s, VREG43.4s, #25
244    eor WINPUT7, WINPUT7, WINPUT8
245    sli VREG02.4s, VREG41.4s, #7
246    eor WINPUT4, WINPUT4, WINPUT9
247    sli VREG12.4s, VREG42.4s, #7
248
249    ror WINPUT4, WINPUT4, #25                   // B>>>25
250    sli VREG22.4s, VREG43.4s, #7
251    ror WINPUT5, WINPUT5, #25
252    ext VREG03.16b, VREG03.16b, VREG03.16b, #8
253    ror WINPUT6, WINPUT6, #25
254    ext VREG13.16b, VREG13.16b, VREG13.16b, #8
255    ror WINPUT7, WINPUT7, #25
256    ext VREG23.16b, VREG23.16b, VREG23.16b, #8
257.endm
258
259.macro CHA256_ROUND_END
260    add VREG01.4s, VREG01.4s, VSIGMA.4s     // After the cycle is complete, add input.
261    add VREG11.4s, VREG11.4s, VSIGMA.4s
262    add VREG21.4s, VREG21.4s, VSIGMA.4s
263
264    add VREG02.4s, VREG02.4s, VKEY01.4s     // After the cycle is complete, add input.
265    add VREG12.4s, VREG12.4s, VKEY01.4s
266    add VREG22.4s, VREG22.4s, VKEY01.4s
267
268    add VREG03.4s, VREG03.4s, VKEY02.4s     // After the cycle is complete, add input.
269    add VREG13.4s, VREG13.4s, VKEY02.4s
270    add VREG23.4s, VREG23.4s, VKEY02.4s
271
272    add VREG04.4s, VREG04.4s, VREG52.4s     // 0
273    add VREG14.4s, VREG14.4s, VREG53.4s     // 1
274    add VREG24.4s, VREG24.4s, VREG54.4s     // 2
275.endm
276
277.macro CHA256_WRITE_BACK
278    ld1 {VREG41.16b, VREG42.16b, VREG43.16b, VREG44.16b}, [REGINC], #64  // Load 64 bytes.
279    eor XINPUT0, XINPUT0, XINPUT1
280    eor XINPUT2, XINPUT2, XINPUT3
281    eor XINPUT4, XINPUT4, XINPUT5
282    eor XINPUT6, XINPUT6, XINPUT7
283    eor XINPUT8, XINPUT8, XINPUT9
284    stp XINPUT0, XINPUT2, [REGOUT], #16            // Write data.
285    eor VREG01.16b, VREG01.16b, VREG41.16b
286    stp XINPUT4, XINPUT6, [REGOUT], #16
287    eor XINPUT10, XINPUT10, XINPUT11
288    eor VREG02.16b, VREG02.16b, VREG42.16b
289    eor XINPUT12, XINPUT12, XINPUT13
290    eor VREG03.16b, VREG03.16b, VREG43.16b
291    eor XINPUT14, XINPUT14, XINPUT15
292    stp XINPUT8, XINPUT10, [REGOUT], #16
293    eor VREG04.16b, VREG04.16b, VREG44.16b
294
295    ld1 {VREG41.16b, VREG42.16b, VREG43.16b, VREG44.16b}, [REGINC], #64  // Load 64 bytes.
296    stp XINPUT12, XINPUT14, [REGOUT], #16
297
298    eor VREG11.16b, VREG11.16b, VREG41.16b
299    eor VREG12.16b, VREG12.16b, VREG42.16b
300
301    st1 {VREG01.16b, VREG02.16b, VREG03.16b, VREG04.16b}, [REGOUT], #64  // Write 64 bytes.
302
303    eor VREG13.16b, VREG13.16b, VREG43.16b
304    eor VREG14.16b, VREG14.16b, VREG44.16b
305
306    ld1 {VREG01.16b, VREG02.16b, VREG03.16b, VREG04.16b}, [REGINC], #64  // Load 64 bytes.
307    st1 {VREG11.16b, VREG12.16b, VREG13.16b, VREG14.16b}, [REGOUT], #64  // Write 64 bytes.
308
309    eor VREG21.16b, VREG21.16b, VREG01.16b
310    eor VREG22.16b, VREG22.16b, VREG02.16b
311    eor VREG23.16b, VREG23.16b, VREG03.16b
312    eor VREG24.16b, VREG24.16b, VREG04.16b
313    st1 {VREG21.16b, VREG22.16b, VREG23.16b, VREG24.16b}, [REGOUT], #64  // Write 64 bytes.
314.endm
315
316.macro CHA256_WRITE_BACKB src1, src2, src3, src4
317    ld1 {VREG41.16b, VREG42.16b, VREG43.16b, VREG44.16b}, [REGINC], #64  // Load 64 bytes.
318    eor \src1, \src1, VREG41.16b
319    eor \src2, \src2, VREG42.16b
320    eor \src3, \src3, VREG43.16b
321    eor \src4, \src4, VREG44.16b
322    st1 {\src1, \src2, \src3, \src4}, [REGOUT], #64  // Write 64 bytes.
323.endm
324
325#endif
326