• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1/*
2 * Buffer submit code for multi buffer SHA512 algorithm
3 *
4 * This file is provided under a dual BSD/GPLv2 license.  When using or
5 * redistributing this file, you may do so under either license.
6 *
7 * GPL LICENSE SUMMARY
8 *
9 * Copyright(c) 2016 Intel Corporation.
10 *
11 * This program is free software; you can redistribute it and/or modify
12 * it under the terms of version 2 of the GNU General Public License as
13 * published by the Free Software Foundation.
14 *
15 * This program is distributed in the hope that it will be useful, but
16 * WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18 * General Public License for more details.
19 *
20 * Contact Information:
21 *     Megha Dey <megha.dey@linux.intel.com>
22 *
23 * BSD LICENSE
24 *
25 * Copyright(c) 2016 Intel Corporation.
26 *
27 * Redistribution and use in source and binary forms, with or without
28 * modification, are permitted provided that the following conditions
29 * are met:
30 *
31 *   * Redistributions of source code must retain the above copyright
32 *     notice, this list of conditions and the following disclaimer.
33 *   * Redistributions in binary form must reproduce the above copyright
34 *     notice, this list of conditions and the following disclaimer in
35 *     the documentation and/or other materials provided with the
36 *     distribution.
37 *   * Neither the name of Intel Corporation nor the names of its
38 *     contributors may be used to endorse or promote products derived
39 *     from this software without specific prior written permission.
40 *
41 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
42 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
43 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
44 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
45 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
46 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
47 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
48 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
49 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
50 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
51 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
52 */
53
54#include <linux/linkage.h>
55#include <asm/frame.h>
56#include "sha512_mb_mgr_datastruct.S"
57
58.extern sha512_x4_avx2
59
60#define arg1    %rdi
61#define arg2    %rsi
62
63#define idx             %rdx
64#define last_len        %rdx
65
66#define size_offset     %rcx
67#define tmp2            %rcx
68
69# Common definitions
70#define state   arg1
71#define job     arg2
72#define len2    arg2
73#define p2      arg2
74
75#define p               %r11
76#define start_offset    %r11
77
78#define unused_lanes    %rbx
79
80#define job_rax         %rax
81#define len             %rax
82
83#define lane            %r12
84#define tmp3            %r12
85#define lens3           %r12
86
87#define extra_blocks    %r8
88#define lens0           %r8
89
90#define tmp             %r9
91#define lens1           %r9
92
93#define lane_data       %r10
94#define lens2           %r10
95
96#define DWORD_len %eax
97
98# JOB* sha512_mb_mgr_submit_avx2(MB_MGR *state, JOB *job)
99# arg 1 : rcx : state
100# arg 2 : rdx : job
101ENTRY(sha512_mb_mgr_submit_avx2)
102	FRAME_BEGIN
103	push	%rbx
104	push	%r12
105
106        mov     _unused_lanes(state), unused_lanes
107        movzb     %bl,lane
108        shr     $8, unused_lanes
109        imul    $_LANE_DATA_size, lane,lane_data
110        movl    $STS_BEING_PROCESSED, _status(job)
111	lea     _ldata(state, lane_data), lane_data
112        mov     unused_lanes, _unused_lanes(state)
113        movl    _len(job),  DWORD_len
114
115	mov     job, _job_in_lane(lane_data)
116        movl    DWORD_len,_lens+4(state , lane, 8)
117
118	# Load digest words from result_digest
119	vmovdqu	_result_digest+0*16(job), %xmm0
120	vmovdqu _result_digest+1*16(job), %xmm1
121	vmovdqu	_result_digest+2*16(job), %xmm2
122        vmovdqu	_result_digest+3*16(job), %xmm3
123
124	vmovq    %xmm0, _args_digest(state, lane, 8)
125	vpextrq  $1, %xmm0, _args_digest+1*32(state , lane, 8)
126	vmovq    %xmm1, _args_digest+2*32(state , lane, 8)
127	vpextrq  $1, %xmm1, _args_digest+3*32(state , lane, 8)
128	vmovq    %xmm2, _args_digest+4*32(state , lane, 8)
129	vpextrq  $1, %xmm2, _args_digest+5*32(state , lane, 8)
130	vmovq    %xmm3, _args_digest+6*32(state , lane, 8)
131	vpextrq  $1, %xmm3, _args_digest+7*32(state , lane, 8)
132
133	mov     _buffer(job), p
134	mov     p, _args_data_ptr(state, lane, 8)
135
136	cmp     $0xFF, unused_lanes
137	jne     return_null
138
139start_loop:
140
141	# Find min length
142	mov     _lens+0*8(state),lens0
143	mov     lens0,idx
144	mov     _lens+1*8(state),lens1
145	cmp     idx,lens1
146	cmovb   lens1, idx
147	mov     _lens+2*8(state),lens2
148	cmp     idx,lens2
149	cmovb   lens2,idx
150	mov     _lens+3*8(state),lens3
151	cmp     idx,lens3
152	cmovb   lens3,idx
153	mov     idx,len2
154	and     $0xF,idx
155	and     $~0xFF,len2
156	jz      len_is_0
157
158	sub     len2,lens0
159	sub     len2,lens1
160	sub     len2,lens2
161	sub     len2,lens3
162	shr     $32,len2
163	mov     lens0, _lens + 0*8(state)
164	mov     lens1, _lens + 1*8(state)
165	mov     lens2, _lens + 2*8(state)
166	mov     lens3, _lens + 3*8(state)
167
168	# "state" and "args" are the same address, arg1
169	# len is arg2
170	call    sha512_x4_avx2
171	# state and idx are intact
172
173len_is_0:
174
175	# process completed job "idx"
176	imul    $_LANE_DATA_size, idx, lane_data
177	lea     _ldata(state, lane_data), lane_data
178
179	mov     _job_in_lane(lane_data), job_rax
180	mov     _unused_lanes(state), unused_lanes
181	movq    $0, _job_in_lane(lane_data)
182	movl    $STS_COMPLETED, _status(job_rax)
183	shl     $8, unused_lanes
184	or      idx, unused_lanes
185	mov     unused_lanes, _unused_lanes(state)
186
187	movl	$0xFFFFFFFF,_lens+4(state,idx,8)
188	vmovq    _args_digest+0*32(state , idx, 8), %xmm0
189	vpinsrq  $1, _args_digest+1*32(state , idx, 8), %xmm0, %xmm0
190	vmovq    _args_digest+2*32(state , idx, 8), %xmm1
191	vpinsrq  $1, _args_digest+3*32(state , idx, 8), %xmm1, %xmm1
192	vmovq    _args_digest+4*32(state , idx, 8), %xmm2
193	vpinsrq  $1, _args_digest+5*32(state , idx, 8), %xmm2, %xmm2
194	vmovq    _args_digest+6*32(state , idx, 8), %xmm3
195	vpinsrq  $1, _args_digest+7*32(state , idx, 8), %xmm3, %xmm3
196
197	vmovdqu  %xmm0, _result_digest + 0*16(job_rax)
198	vmovdqu  %xmm1, _result_digest + 1*16(job_rax)
199	vmovdqu  %xmm2, _result_digest + 2*16(job_rax)
200	vmovdqu  %xmm3, _result_digest + 3*16(job_rax)
201
202return:
203	pop	%r12
204	pop	%rbx
205	FRAME_END
206	ret
207
208return_null:
209	xor     job_rax, job_rax
210	jmp     return
211ENDPROC(sha512_mb_mgr_submit_avx2)
212
213/* UNUSED?
214.section	.rodata.cst16, "aM", @progbits, 16
215.align 16
216H0:     .int  0x6a09e667
217H1:     .int  0xbb67ae85
218H2:     .int  0x3c6ef372
219H3:     .int  0xa54ff53a
220H4:     .int  0x510e527f
221H5:     .int  0x9b05688c
222H6:     .int  0x1f83d9ab
223H7:     .int  0x5be0cd19
224*/
225