• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1/*
2 * Buffer submit code for multi buffer SHA1 algorithm
3 *
4 * This file is provided under a dual BSD/GPLv2 license.  When using or
5 * redistributing this file, you may do so under either license.
6 *
7 * GPL LICENSE SUMMARY
8 *
9 *  Copyright(c) 2014 Intel Corporation.
10 *
11 *  This program is free software; you can redistribute it and/or modify
12 *  it under the terms of version 2 of the GNU General Public License as
13 *  published by the Free Software Foundation.
14 *
15 *  This program is distributed in the hope that it will be useful, but
16 *  WITHOUT ANY WARRANTY; without even the implied warranty of
17 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18 *  General Public License for more details.
19 *
20 *  Contact Information:
21 *      James Guilford <james.guilford@intel.com>
22 *	Tim Chen <tim.c.chen@linux.intel.com>
23 *
24 *  BSD LICENSE
25 *
26 *  Copyright(c) 2014 Intel Corporation.
27 *
28 *  Redistribution and use in source and binary forms, with or without
29 *  modification, are permitted provided that the following conditions
30 *  are met:
31 *
32 *    * Redistributions of source code must retain the above copyright
33 *      notice, this list of conditions and the following disclaimer.
34 *    * Redistributions in binary form must reproduce the above copyright
35 *      notice, this list of conditions and the following disclaimer in
36 *      the documentation and/or other materials provided with the
37 *      distribution.
38 *    * Neither the name of Intel Corporation nor the names of its
39 *      contributors may be used to endorse or promote products derived
40 *      from this software without specific prior written permission.
41 *
42 *  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
43 *  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
44 *  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
45 *  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
46 *  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
47 *  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
48 *  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
49 *  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
50 *  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
51 *  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
52 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
53 */
54
55#include <linux/linkage.h>
56#include "sha1_mb_mgr_datastruct.S"
57
58
59.extern sha1_x8_avx
60
61# LINUX register definitions
62arg1    = %rdi
63arg2    = %rsi
64size_offset	= %rcx
65tmp2		= %rcx
66extra_blocks	= %rdx
67
68# Common definitions
69#define state   arg1
70#define job     %rsi
71#define len2    arg2
72#define p2      arg2
73
74# idx must be a register not clobberred by sha1_x8_avx2
75idx		= %r8
76DWORD_idx	= %r8d
77last_len	= %r8
78
79p               = %r11
80start_offset    = %r11
81
82unused_lanes    = %rbx
83BYTE_unused_lanes = %bl
84
85job_rax         = %rax
86len             = %rax
87DWORD_len	= %eax
88
89lane            = %rbp
90tmp3            = %rbp
91
92tmp             = %r9
93DWORD_tmp	= %r9d
94
95lane_data       = %r10
96
97# STACK_SPACE needs to be an odd multiple of 8
98STACK_SPACE     = 8*8 + 16*10 + 8
99
100# JOB* submit_mb_mgr_submit_avx2(MB_MGR *state, job_sha1 *job)
101# arg 1 : rcx : state
102# arg 2 : rdx : job
103ENTRY(sha1_mb_mgr_submit_avx2)
104
105	mov	%rsp, %r10
106	sub     $STACK_SPACE, %rsp
107	and	$~31, %rsp
108
109	mov     %rbx, (%rsp)
110	mov	%r10, 8*2(%rsp)	#save old rsp
111	mov     %rbp, 8*3(%rsp)
112	mov	%r12, 8*4(%rsp)
113	mov	%r13, 8*5(%rsp)
114	mov	%r14, 8*6(%rsp)
115	mov	%r15, 8*7(%rsp)
116
117	mov     _unused_lanes(state), unused_lanes
118	mov	unused_lanes, lane
119	and	$0xF, lane
120	shr     $4, unused_lanes
121	imul    $_LANE_DATA_size, lane, lane_data
122	movl    $STS_BEING_PROCESSED, _status(job)
123	lea     _ldata(state, lane_data), lane_data
124	mov     unused_lanes, _unused_lanes(state)
125	movl    _len(job),  DWORD_len
126
127	mov	job, _job_in_lane(lane_data)
128	shl	$4, len
129	or	lane, len
130
131	movl    DWORD_len,  _lens(state , lane, 4)
132
133	# Load digest words from result_digest
134	vmovdqu	_result_digest(job), %xmm0
135	mov	_result_digest+1*16(job), DWORD_tmp
136	vmovd    %xmm0, _args_digest(state, lane, 4)
137	vpextrd  $1, %xmm0, _args_digest+1*32(state , lane, 4)
138	vpextrd  $2, %xmm0, _args_digest+2*32(state , lane, 4)
139	vpextrd  $3, %xmm0, _args_digest+3*32(state , lane, 4)
140	movl    DWORD_tmp, _args_digest+4*32(state , lane, 4)
141
142	mov     _buffer(job), p
143	mov     p, _args_data_ptr(state, lane, 8)
144
145	cmp     $0xF, unused_lanes
146	jne     return_null
147
148start_loop:
149	# Find min length
150	vmovdqa _lens(state), %xmm0
151	vmovdqa _lens+1*16(state), %xmm1
152
153	vpminud %xmm1, %xmm0, %xmm2        # xmm2 has {D,C,B,A}
154	vpalignr $8, %xmm2, %xmm3, %xmm3   # xmm3 has {x,x,D,C}
155	vpminud %xmm3, %xmm2, %xmm2        # xmm2 has {x,x,E,F}
156	vpalignr $4, %xmm2, %xmm3, %xmm3   # xmm3 has {x,x,x,E}
157	vpminud %xmm3, %xmm2, %xmm2        # xmm2 has min value in low dword
158
159	vmovd   %xmm2, DWORD_idx
160	mov    idx, len2
161	and    $0xF, idx
162	shr    $4, len2
163	jz     len_is_0
164
165	vpand   clear_low_nibble(%rip), %xmm2, %xmm2
166	vpshufd $0, %xmm2, %xmm2
167
168	vpsubd  %xmm2, %xmm0, %xmm0
169	vpsubd  %xmm2, %xmm1, %xmm1
170
171	vmovdqa %xmm0, _lens + 0*16(state)
172	vmovdqa %xmm1, _lens + 1*16(state)
173
174
175	# "state" and "args" are the same address, arg1
176	# len is arg2
177	call    sha1_x8_avx2
178
179	# state and idx are intact
180
181len_is_0:
182	# process completed job "idx"
183	imul    $_LANE_DATA_size, idx, lane_data
184	lea     _ldata(state, lane_data), lane_data
185
186	mov     _job_in_lane(lane_data), job_rax
187	mov     _unused_lanes(state), unused_lanes
188	movq    $0, _job_in_lane(lane_data)
189	movl    $STS_COMPLETED, _status(job_rax)
190	shl     $4, unused_lanes
191	or      idx, unused_lanes
192	mov     unused_lanes, _unused_lanes(state)
193
194	movl	$0xFFFFFFFF, _lens(state, idx, 4)
195
196	vmovd    _args_digest(state, idx, 4), %xmm0
197	vpinsrd  $1, _args_digest+1*32(state , idx, 4), %xmm0, %xmm0
198	vpinsrd  $2, _args_digest+2*32(state , idx, 4), %xmm0, %xmm0
199	vpinsrd  $3, _args_digest+3*32(state , idx, 4), %xmm0, %xmm0
200	movl    4*32(state, idx, 4), DWORD_tmp
201
202	vmovdqu  %xmm0, _result_digest(job_rax)
203	movl    DWORD_tmp, _result_digest+1*16(job_rax)
204
205return:
206
207	mov     (%rsp), %rbx
208	mov	8*2(%rsp), %r10	#save old rsp
209	mov     8*3(%rsp), %rbp
210	mov	8*4(%rsp), %r12
211	mov	8*5(%rsp), %r13
212	mov	8*6(%rsp), %r14
213	mov	8*7(%rsp), %r15
214	mov     %r10, %rsp
215
216	ret
217
218return_null:
219	xor     job_rax, job_rax
220	jmp     return
221
222ENDPROC(sha1_mb_mgr_submit_avx2)
223
224.data
225
226.align 16
227clear_low_nibble:
228	.octa	0x000000000000000000000000FFFFFFF0
229