• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Glue Code for the AVX assembler implemention of the Cast5 Cipher
3  *
4  * Copyright (C) 2012 Johannes Goetzfried
5  *     <Johannes.Goetzfried@informatik.stud.uni-erlangen.de>
6  *
7  * This program is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License as published by
9  * the Free Software Foundation; either version 2 of the License, or
10  * (at your option) any later version.
11  *
12  * This program is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15  * GNU General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License
18  * along with this program; if not, write to the Free Software
19  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307
20  * USA
21  *
22  */
23 
24 #include <linux/module.h>
25 #include <linux/hardirq.h>
26 #include <linux/types.h>
27 #include <linux/crypto.h>
28 #include <linux/err.h>
29 #include <crypto/ablk_helper.h>
30 #include <crypto/algapi.h>
31 #include <crypto/cast5.h>
32 #include <crypto/cryptd.h>
33 #include <crypto/ctr.h>
34 #include <asm/fpu/api.h>
35 #include <asm/crypto/glue_helper.h>
36 
37 #define CAST5_PARALLEL_BLOCKS 16
38 
39 asmlinkage void cast5_ecb_enc_16way(struct cast5_ctx *ctx, u8 *dst,
40 				    const u8 *src);
41 asmlinkage void cast5_ecb_dec_16way(struct cast5_ctx *ctx, u8 *dst,
42 				    const u8 *src);
43 asmlinkage void cast5_cbc_dec_16way(struct cast5_ctx *ctx, u8 *dst,
44 				    const u8 *src);
45 asmlinkage void cast5_ctr_16way(struct cast5_ctx *ctx, u8 *dst, const u8 *src,
46 				__be64 *iv);
47 
cast5_fpu_begin(bool fpu_enabled,unsigned int nbytes)48 static inline bool cast5_fpu_begin(bool fpu_enabled, unsigned int nbytes)
49 {
50 	return glue_fpu_begin(CAST5_BLOCK_SIZE, CAST5_PARALLEL_BLOCKS,
51 			      NULL, fpu_enabled, nbytes);
52 }
53 
cast5_fpu_end(bool fpu_enabled)54 static inline void cast5_fpu_end(bool fpu_enabled)
55 {
56 	return glue_fpu_end(fpu_enabled);
57 }
58 
ecb_crypt(struct blkcipher_desc * desc,struct blkcipher_walk * walk,bool enc)59 static int ecb_crypt(struct blkcipher_desc *desc, struct blkcipher_walk *walk,
60 		     bool enc)
61 {
62 	bool fpu_enabled = false;
63 	struct cast5_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
64 	const unsigned int bsize = CAST5_BLOCK_SIZE;
65 	unsigned int nbytes;
66 	void (*fn)(struct cast5_ctx *ctx, u8 *dst, const u8 *src);
67 	int err;
68 
69 	err = blkcipher_walk_virt(desc, walk);
70 	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
71 
72 	while ((nbytes = walk->nbytes)) {
73 		u8 *wsrc = walk->src.virt.addr;
74 		u8 *wdst = walk->dst.virt.addr;
75 
76 		fpu_enabled = cast5_fpu_begin(fpu_enabled, nbytes);
77 
78 		/* Process multi-block batch */
79 		if (nbytes >= bsize * CAST5_PARALLEL_BLOCKS) {
80 			fn = (enc) ? cast5_ecb_enc_16way : cast5_ecb_dec_16way;
81 			do {
82 				fn(ctx, wdst, wsrc);
83 
84 				wsrc += bsize * CAST5_PARALLEL_BLOCKS;
85 				wdst += bsize * CAST5_PARALLEL_BLOCKS;
86 				nbytes -= bsize * CAST5_PARALLEL_BLOCKS;
87 			} while (nbytes >= bsize * CAST5_PARALLEL_BLOCKS);
88 
89 			if (nbytes < bsize)
90 				goto done;
91 		}
92 
93 		fn = (enc) ? __cast5_encrypt : __cast5_decrypt;
94 
95 		/* Handle leftovers */
96 		do {
97 			fn(ctx, wdst, wsrc);
98 
99 			wsrc += bsize;
100 			wdst += bsize;
101 			nbytes -= bsize;
102 		} while (nbytes >= bsize);
103 
104 done:
105 		err = blkcipher_walk_done(desc, walk, nbytes);
106 	}
107 
108 	cast5_fpu_end(fpu_enabled);
109 	return err;
110 }
111 
ecb_encrypt(struct blkcipher_desc * desc,struct scatterlist * dst,struct scatterlist * src,unsigned int nbytes)112 static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
113 		       struct scatterlist *src, unsigned int nbytes)
114 {
115 	struct blkcipher_walk walk;
116 
117 	blkcipher_walk_init(&walk, dst, src, nbytes);
118 	return ecb_crypt(desc, &walk, true);
119 }
120 
ecb_decrypt(struct blkcipher_desc * desc,struct scatterlist * dst,struct scatterlist * src,unsigned int nbytes)121 static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
122 		       struct scatterlist *src, unsigned int nbytes)
123 {
124 	struct blkcipher_walk walk;
125 
126 	blkcipher_walk_init(&walk, dst, src, nbytes);
127 	return ecb_crypt(desc, &walk, false);
128 }
129 
__cbc_encrypt(struct blkcipher_desc * desc,struct blkcipher_walk * walk)130 static unsigned int __cbc_encrypt(struct blkcipher_desc *desc,
131 				  struct blkcipher_walk *walk)
132 {
133 	struct cast5_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
134 	const unsigned int bsize = CAST5_BLOCK_SIZE;
135 	unsigned int nbytes = walk->nbytes;
136 	u64 *src = (u64 *)walk->src.virt.addr;
137 	u64 *dst = (u64 *)walk->dst.virt.addr;
138 	u64 *iv = (u64 *)walk->iv;
139 
140 	do {
141 		*dst = *src ^ *iv;
142 		__cast5_encrypt(ctx, (u8 *)dst, (u8 *)dst);
143 		iv = dst;
144 
145 		src += 1;
146 		dst += 1;
147 		nbytes -= bsize;
148 	} while (nbytes >= bsize);
149 
150 	*(u64 *)walk->iv = *iv;
151 	return nbytes;
152 }
153 
cbc_encrypt(struct blkcipher_desc * desc,struct scatterlist * dst,struct scatterlist * src,unsigned int nbytes)154 static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
155 		       struct scatterlist *src, unsigned int nbytes)
156 {
157 	struct blkcipher_walk walk;
158 	int err;
159 
160 	blkcipher_walk_init(&walk, dst, src, nbytes);
161 	err = blkcipher_walk_virt(desc, &walk);
162 
163 	while ((nbytes = walk.nbytes)) {
164 		nbytes = __cbc_encrypt(desc, &walk);
165 		err = blkcipher_walk_done(desc, &walk, nbytes);
166 	}
167 
168 	return err;
169 }
170 
__cbc_decrypt(struct blkcipher_desc * desc,struct blkcipher_walk * walk)171 static unsigned int __cbc_decrypt(struct blkcipher_desc *desc,
172 				  struct blkcipher_walk *walk)
173 {
174 	struct cast5_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
175 	const unsigned int bsize = CAST5_BLOCK_SIZE;
176 	unsigned int nbytes = walk->nbytes;
177 	u64 *src = (u64 *)walk->src.virt.addr;
178 	u64 *dst = (u64 *)walk->dst.virt.addr;
179 	u64 last_iv;
180 
181 	/* Start of the last block. */
182 	src += nbytes / bsize - 1;
183 	dst += nbytes / bsize - 1;
184 
185 	last_iv = *src;
186 
187 	/* Process multi-block batch */
188 	if (nbytes >= bsize * CAST5_PARALLEL_BLOCKS) {
189 		do {
190 			nbytes -= bsize * (CAST5_PARALLEL_BLOCKS - 1);
191 			src -= CAST5_PARALLEL_BLOCKS - 1;
192 			dst -= CAST5_PARALLEL_BLOCKS - 1;
193 
194 			cast5_cbc_dec_16way(ctx, (u8 *)dst, (u8 *)src);
195 
196 			nbytes -= bsize;
197 			if (nbytes < bsize)
198 				goto done;
199 
200 			*dst ^= *(src - 1);
201 			src -= 1;
202 			dst -= 1;
203 		} while (nbytes >= bsize * CAST5_PARALLEL_BLOCKS);
204 	}
205 
206 	/* Handle leftovers */
207 	for (;;) {
208 		__cast5_decrypt(ctx, (u8 *)dst, (u8 *)src);
209 
210 		nbytes -= bsize;
211 		if (nbytes < bsize)
212 			break;
213 
214 		*dst ^= *(src - 1);
215 		src -= 1;
216 		dst -= 1;
217 	}
218 
219 done:
220 	*dst ^= *(u64 *)walk->iv;
221 	*(u64 *)walk->iv = last_iv;
222 
223 	return nbytes;
224 }
225 
cbc_decrypt(struct blkcipher_desc * desc,struct scatterlist * dst,struct scatterlist * src,unsigned int nbytes)226 static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
227 		       struct scatterlist *src, unsigned int nbytes)
228 {
229 	bool fpu_enabled = false;
230 	struct blkcipher_walk walk;
231 	int err;
232 
233 	blkcipher_walk_init(&walk, dst, src, nbytes);
234 	err = blkcipher_walk_virt(desc, &walk);
235 	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
236 
237 	while ((nbytes = walk.nbytes)) {
238 		fpu_enabled = cast5_fpu_begin(fpu_enabled, nbytes);
239 		nbytes = __cbc_decrypt(desc, &walk);
240 		err = blkcipher_walk_done(desc, &walk, nbytes);
241 	}
242 
243 	cast5_fpu_end(fpu_enabled);
244 	return err;
245 }
246 
ctr_crypt_final(struct blkcipher_desc * desc,struct blkcipher_walk * walk)247 static void ctr_crypt_final(struct blkcipher_desc *desc,
248 			    struct blkcipher_walk *walk)
249 {
250 	struct cast5_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
251 	u8 *ctrblk = walk->iv;
252 	u8 keystream[CAST5_BLOCK_SIZE];
253 	u8 *src = walk->src.virt.addr;
254 	u8 *dst = walk->dst.virt.addr;
255 	unsigned int nbytes = walk->nbytes;
256 
257 	__cast5_encrypt(ctx, keystream, ctrblk);
258 	crypto_xor_cpy(dst, keystream, src, nbytes);
259 
260 	crypto_inc(ctrblk, CAST5_BLOCK_SIZE);
261 }
262 
__ctr_crypt(struct blkcipher_desc * desc,struct blkcipher_walk * walk)263 static unsigned int __ctr_crypt(struct blkcipher_desc *desc,
264 				struct blkcipher_walk *walk)
265 {
266 	struct cast5_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
267 	const unsigned int bsize = CAST5_BLOCK_SIZE;
268 	unsigned int nbytes = walk->nbytes;
269 	u64 *src = (u64 *)walk->src.virt.addr;
270 	u64 *dst = (u64 *)walk->dst.virt.addr;
271 
272 	/* Process multi-block batch */
273 	if (nbytes >= bsize * CAST5_PARALLEL_BLOCKS) {
274 		do {
275 			cast5_ctr_16way(ctx, (u8 *)dst, (u8 *)src,
276 					(__be64 *)walk->iv);
277 
278 			src += CAST5_PARALLEL_BLOCKS;
279 			dst += CAST5_PARALLEL_BLOCKS;
280 			nbytes -= bsize * CAST5_PARALLEL_BLOCKS;
281 		} while (nbytes >= bsize * CAST5_PARALLEL_BLOCKS);
282 
283 		if (nbytes < bsize)
284 			goto done;
285 	}
286 
287 	/* Handle leftovers */
288 	do {
289 		u64 ctrblk;
290 
291 		if (dst != src)
292 			*dst = *src;
293 
294 		ctrblk = *(u64 *)walk->iv;
295 		be64_add_cpu((__be64 *)walk->iv, 1);
296 
297 		__cast5_encrypt(ctx, (u8 *)&ctrblk, (u8 *)&ctrblk);
298 		*dst ^= ctrblk;
299 
300 		src += 1;
301 		dst += 1;
302 		nbytes -= bsize;
303 	} while (nbytes >= bsize);
304 
305 done:
306 	return nbytes;
307 }
308 
ctr_crypt(struct blkcipher_desc * desc,struct scatterlist * dst,struct scatterlist * src,unsigned int nbytes)309 static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst,
310 		     struct scatterlist *src, unsigned int nbytes)
311 {
312 	bool fpu_enabled = false;
313 	struct blkcipher_walk walk;
314 	int err;
315 
316 	blkcipher_walk_init(&walk, dst, src, nbytes);
317 	err = blkcipher_walk_virt_block(desc, &walk, CAST5_BLOCK_SIZE);
318 	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
319 
320 	while ((nbytes = walk.nbytes) >= CAST5_BLOCK_SIZE) {
321 		fpu_enabled = cast5_fpu_begin(fpu_enabled, nbytes);
322 		nbytes = __ctr_crypt(desc, &walk);
323 		err = blkcipher_walk_done(desc, &walk, nbytes);
324 	}
325 
326 	cast5_fpu_end(fpu_enabled);
327 
328 	if (walk.nbytes) {
329 		ctr_crypt_final(desc, &walk);
330 		err = blkcipher_walk_done(desc, &walk, 0);
331 	}
332 
333 	return err;
334 }
335 
336 
337 static struct crypto_alg cast5_algs[6] = { {
338 	.cra_name		= "__ecb-cast5-avx",
339 	.cra_driver_name	= "__driver-ecb-cast5-avx",
340 	.cra_priority		= 0,
341 	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER |
342 				  CRYPTO_ALG_INTERNAL,
343 	.cra_blocksize		= CAST5_BLOCK_SIZE,
344 	.cra_ctxsize		= sizeof(struct cast5_ctx),
345 	.cra_alignmask		= 0,
346 	.cra_type		= &crypto_blkcipher_type,
347 	.cra_module		= THIS_MODULE,
348 	.cra_u = {
349 		.blkcipher = {
350 			.min_keysize	= CAST5_MIN_KEY_SIZE,
351 			.max_keysize	= CAST5_MAX_KEY_SIZE,
352 			.setkey		= cast5_setkey,
353 			.encrypt	= ecb_encrypt,
354 			.decrypt	= ecb_decrypt,
355 		},
356 	},
357 }, {
358 	.cra_name		= "__cbc-cast5-avx",
359 	.cra_driver_name	= "__driver-cbc-cast5-avx",
360 	.cra_priority		= 0,
361 	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER |
362 				  CRYPTO_ALG_INTERNAL,
363 	.cra_blocksize		= CAST5_BLOCK_SIZE,
364 	.cra_ctxsize		= sizeof(struct cast5_ctx),
365 	.cra_alignmask		= 0,
366 	.cra_type		= &crypto_blkcipher_type,
367 	.cra_module		= THIS_MODULE,
368 	.cra_u = {
369 		.blkcipher = {
370 			.min_keysize	= CAST5_MIN_KEY_SIZE,
371 			.max_keysize	= CAST5_MAX_KEY_SIZE,
372 			.setkey		= cast5_setkey,
373 			.encrypt	= cbc_encrypt,
374 			.decrypt	= cbc_decrypt,
375 		},
376 	},
377 }, {
378 	.cra_name		= "__ctr-cast5-avx",
379 	.cra_driver_name	= "__driver-ctr-cast5-avx",
380 	.cra_priority		= 0,
381 	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER |
382 				  CRYPTO_ALG_INTERNAL,
383 	.cra_blocksize		= 1,
384 	.cra_ctxsize		= sizeof(struct cast5_ctx),
385 	.cra_alignmask		= 0,
386 	.cra_type		= &crypto_blkcipher_type,
387 	.cra_module		= THIS_MODULE,
388 	.cra_u = {
389 		.blkcipher = {
390 			.min_keysize	= CAST5_MIN_KEY_SIZE,
391 			.max_keysize	= CAST5_MAX_KEY_SIZE,
392 			.ivsize		= CAST5_BLOCK_SIZE,
393 			.setkey		= cast5_setkey,
394 			.encrypt	= ctr_crypt,
395 			.decrypt	= ctr_crypt,
396 		},
397 	},
398 }, {
399 	.cra_name		= "ecb(cast5)",
400 	.cra_driver_name	= "ecb-cast5-avx",
401 	.cra_priority		= 200,
402 	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
403 	.cra_blocksize		= CAST5_BLOCK_SIZE,
404 	.cra_ctxsize		= sizeof(struct async_helper_ctx),
405 	.cra_alignmask		= 0,
406 	.cra_type		= &crypto_ablkcipher_type,
407 	.cra_module		= THIS_MODULE,
408 	.cra_init		= ablk_init,
409 	.cra_exit		= ablk_exit,
410 	.cra_u = {
411 		.ablkcipher = {
412 			.min_keysize	= CAST5_MIN_KEY_SIZE,
413 			.max_keysize	= CAST5_MAX_KEY_SIZE,
414 			.setkey		= ablk_set_key,
415 			.encrypt	= ablk_encrypt,
416 			.decrypt	= ablk_decrypt,
417 		},
418 	},
419 }, {
420 	.cra_name		= "cbc(cast5)",
421 	.cra_driver_name	= "cbc-cast5-avx",
422 	.cra_priority		= 200,
423 	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
424 	.cra_blocksize		= CAST5_BLOCK_SIZE,
425 	.cra_ctxsize		= sizeof(struct async_helper_ctx),
426 	.cra_alignmask		= 0,
427 	.cra_type		= &crypto_ablkcipher_type,
428 	.cra_module		= THIS_MODULE,
429 	.cra_init		= ablk_init,
430 	.cra_exit		= ablk_exit,
431 	.cra_u = {
432 		.ablkcipher = {
433 			.min_keysize	= CAST5_MIN_KEY_SIZE,
434 			.max_keysize	= CAST5_MAX_KEY_SIZE,
435 			.ivsize		= CAST5_BLOCK_SIZE,
436 			.setkey		= ablk_set_key,
437 			.encrypt	= __ablk_encrypt,
438 			.decrypt	= ablk_decrypt,
439 		},
440 	},
441 }, {
442 	.cra_name		= "ctr(cast5)",
443 	.cra_driver_name	= "ctr-cast5-avx",
444 	.cra_priority		= 200,
445 	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
446 	.cra_blocksize		= 1,
447 	.cra_ctxsize		= sizeof(struct async_helper_ctx),
448 	.cra_alignmask		= 0,
449 	.cra_type		= &crypto_ablkcipher_type,
450 	.cra_module		= THIS_MODULE,
451 	.cra_init		= ablk_init,
452 	.cra_exit		= ablk_exit,
453 	.cra_u = {
454 		.ablkcipher = {
455 			.min_keysize	= CAST5_MIN_KEY_SIZE,
456 			.max_keysize	= CAST5_MAX_KEY_SIZE,
457 			.ivsize		= CAST5_BLOCK_SIZE,
458 			.setkey		= ablk_set_key,
459 			.encrypt	= ablk_encrypt,
460 			.decrypt	= ablk_encrypt,
461 			.geniv		= "chainiv",
462 		},
463 	},
464 } };
465 
cast5_init(void)466 static int __init cast5_init(void)
467 {
468 	const char *feature_name;
469 
470 	if (!cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM,
471 				&feature_name)) {
472 		pr_info("CPU feature '%s' is not supported.\n", feature_name);
473 		return -ENODEV;
474 	}
475 
476 	return crypto_register_algs(cast5_algs, ARRAY_SIZE(cast5_algs));
477 }
478 
cast5_exit(void)479 static void __exit cast5_exit(void)
480 {
481 	crypto_unregister_algs(cast5_algs, ARRAY_SIZE(cast5_algs));
482 }
483 
484 module_init(cast5_init);
485 module_exit(cast5_exit);
486 
487 MODULE_DESCRIPTION("Cast5 Cipher Algorithm, AVX optimized");
488 MODULE_LICENSE("GPL");
489 MODULE_ALIAS_CRYPTO("cast5");
490