1 /*
2 * Glue Code for the AVX assembler implemention of the Cast5 Cipher
3 *
4 * Copyright (C) 2012 Johannes Goetzfried
5 * <Johannes.Goetzfried@informatik.stud.uni-erlangen.de>
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
20 * USA
21 *
22 */
23
24 #include <linux/module.h>
25 #include <linux/hardirq.h>
26 #include <linux/types.h>
27 #include <linux/crypto.h>
28 #include <linux/err.h>
29 #include <crypto/ablk_helper.h>
30 #include <crypto/algapi.h>
31 #include <crypto/cast5.h>
32 #include <crypto/cryptd.h>
33 #include <crypto/ctr.h>
34 #include <asm/fpu/api.h>
35 #include <asm/crypto/glue_helper.h>
36
37 #define CAST5_PARALLEL_BLOCKS 16
38
39 asmlinkage void cast5_ecb_enc_16way(struct cast5_ctx *ctx, u8 *dst,
40 const u8 *src);
41 asmlinkage void cast5_ecb_dec_16way(struct cast5_ctx *ctx, u8 *dst,
42 const u8 *src);
43 asmlinkage void cast5_cbc_dec_16way(struct cast5_ctx *ctx, u8 *dst,
44 const u8 *src);
45 asmlinkage void cast5_ctr_16way(struct cast5_ctx *ctx, u8 *dst, const u8 *src,
46 __be64 *iv);
47
cast5_fpu_begin(bool fpu_enabled,unsigned int nbytes)48 static inline bool cast5_fpu_begin(bool fpu_enabled, unsigned int nbytes)
49 {
50 return glue_fpu_begin(CAST5_BLOCK_SIZE, CAST5_PARALLEL_BLOCKS,
51 NULL, fpu_enabled, nbytes);
52 }
53
cast5_fpu_end(bool fpu_enabled)54 static inline void cast5_fpu_end(bool fpu_enabled)
55 {
56 return glue_fpu_end(fpu_enabled);
57 }
58
ecb_crypt(struct blkcipher_desc * desc,struct blkcipher_walk * walk,bool enc)59 static int ecb_crypt(struct blkcipher_desc *desc, struct blkcipher_walk *walk,
60 bool enc)
61 {
62 bool fpu_enabled = false;
63 struct cast5_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
64 const unsigned int bsize = CAST5_BLOCK_SIZE;
65 unsigned int nbytes;
66 void (*fn)(struct cast5_ctx *ctx, u8 *dst, const u8 *src);
67 int err;
68
69 err = blkcipher_walk_virt(desc, walk);
70 desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
71
72 while ((nbytes = walk->nbytes)) {
73 u8 *wsrc = walk->src.virt.addr;
74 u8 *wdst = walk->dst.virt.addr;
75
76 fpu_enabled = cast5_fpu_begin(fpu_enabled, nbytes);
77
78 /* Process multi-block batch */
79 if (nbytes >= bsize * CAST5_PARALLEL_BLOCKS) {
80 fn = (enc) ? cast5_ecb_enc_16way : cast5_ecb_dec_16way;
81 do {
82 fn(ctx, wdst, wsrc);
83
84 wsrc += bsize * CAST5_PARALLEL_BLOCKS;
85 wdst += bsize * CAST5_PARALLEL_BLOCKS;
86 nbytes -= bsize * CAST5_PARALLEL_BLOCKS;
87 } while (nbytes >= bsize * CAST5_PARALLEL_BLOCKS);
88
89 if (nbytes < bsize)
90 goto done;
91 }
92
93 fn = (enc) ? __cast5_encrypt : __cast5_decrypt;
94
95 /* Handle leftovers */
96 do {
97 fn(ctx, wdst, wsrc);
98
99 wsrc += bsize;
100 wdst += bsize;
101 nbytes -= bsize;
102 } while (nbytes >= bsize);
103
104 done:
105 err = blkcipher_walk_done(desc, walk, nbytes);
106 }
107
108 cast5_fpu_end(fpu_enabled);
109 return err;
110 }
111
ecb_encrypt(struct blkcipher_desc * desc,struct scatterlist * dst,struct scatterlist * src,unsigned int nbytes)112 static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
113 struct scatterlist *src, unsigned int nbytes)
114 {
115 struct blkcipher_walk walk;
116
117 blkcipher_walk_init(&walk, dst, src, nbytes);
118 return ecb_crypt(desc, &walk, true);
119 }
120
ecb_decrypt(struct blkcipher_desc * desc,struct scatterlist * dst,struct scatterlist * src,unsigned int nbytes)121 static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
122 struct scatterlist *src, unsigned int nbytes)
123 {
124 struct blkcipher_walk walk;
125
126 blkcipher_walk_init(&walk, dst, src, nbytes);
127 return ecb_crypt(desc, &walk, false);
128 }
129
__cbc_encrypt(struct blkcipher_desc * desc,struct blkcipher_walk * walk)130 static unsigned int __cbc_encrypt(struct blkcipher_desc *desc,
131 struct blkcipher_walk *walk)
132 {
133 struct cast5_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
134 const unsigned int bsize = CAST5_BLOCK_SIZE;
135 unsigned int nbytes = walk->nbytes;
136 u64 *src = (u64 *)walk->src.virt.addr;
137 u64 *dst = (u64 *)walk->dst.virt.addr;
138 u64 *iv = (u64 *)walk->iv;
139
140 do {
141 *dst = *src ^ *iv;
142 __cast5_encrypt(ctx, (u8 *)dst, (u8 *)dst);
143 iv = dst;
144
145 src += 1;
146 dst += 1;
147 nbytes -= bsize;
148 } while (nbytes >= bsize);
149
150 *(u64 *)walk->iv = *iv;
151 return nbytes;
152 }
153
cbc_encrypt(struct blkcipher_desc * desc,struct scatterlist * dst,struct scatterlist * src,unsigned int nbytes)154 static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
155 struct scatterlist *src, unsigned int nbytes)
156 {
157 struct blkcipher_walk walk;
158 int err;
159
160 blkcipher_walk_init(&walk, dst, src, nbytes);
161 err = blkcipher_walk_virt(desc, &walk);
162
163 while ((nbytes = walk.nbytes)) {
164 nbytes = __cbc_encrypt(desc, &walk);
165 err = blkcipher_walk_done(desc, &walk, nbytes);
166 }
167
168 return err;
169 }
170
__cbc_decrypt(struct blkcipher_desc * desc,struct blkcipher_walk * walk)171 static unsigned int __cbc_decrypt(struct blkcipher_desc *desc,
172 struct blkcipher_walk *walk)
173 {
174 struct cast5_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
175 const unsigned int bsize = CAST5_BLOCK_SIZE;
176 unsigned int nbytes = walk->nbytes;
177 u64 *src = (u64 *)walk->src.virt.addr;
178 u64 *dst = (u64 *)walk->dst.virt.addr;
179 u64 last_iv;
180
181 /* Start of the last block. */
182 src += nbytes / bsize - 1;
183 dst += nbytes / bsize - 1;
184
185 last_iv = *src;
186
187 /* Process multi-block batch */
188 if (nbytes >= bsize * CAST5_PARALLEL_BLOCKS) {
189 do {
190 nbytes -= bsize * (CAST5_PARALLEL_BLOCKS - 1);
191 src -= CAST5_PARALLEL_BLOCKS - 1;
192 dst -= CAST5_PARALLEL_BLOCKS - 1;
193
194 cast5_cbc_dec_16way(ctx, (u8 *)dst, (u8 *)src);
195
196 nbytes -= bsize;
197 if (nbytes < bsize)
198 goto done;
199
200 *dst ^= *(src - 1);
201 src -= 1;
202 dst -= 1;
203 } while (nbytes >= bsize * CAST5_PARALLEL_BLOCKS);
204 }
205
206 /* Handle leftovers */
207 for (;;) {
208 __cast5_decrypt(ctx, (u8 *)dst, (u8 *)src);
209
210 nbytes -= bsize;
211 if (nbytes < bsize)
212 break;
213
214 *dst ^= *(src - 1);
215 src -= 1;
216 dst -= 1;
217 }
218
219 done:
220 *dst ^= *(u64 *)walk->iv;
221 *(u64 *)walk->iv = last_iv;
222
223 return nbytes;
224 }
225
cbc_decrypt(struct blkcipher_desc * desc,struct scatterlist * dst,struct scatterlist * src,unsigned int nbytes)226 static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
227 struct scatterlist *src, unsigned int nbytes)
228 {
229 bool fpu_enabled = false;
230 struct blkcipher_walk walk;
231 int err;
232
233 blkcipher_walk_init(&walk, dst, src, nbytes);
234 err = blkcipher_walk_virt(desc, &walk);
235 desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
236
237 while ((nbytes = walk.nbytes)) {
238 fpu_enabled = cast5_fpu_begin(fpu_enabled, nbytes);
239 nbytes = __cbc_decrypt(desc, &walk);
240 err = blkcipher_walk_done(desc, &walk, nbytes);
241 }
242
243 cast5_fpu_end(fpu_enabled);
244 return err;
245 }
246
ctr_crypt_final(struct blkcipher_desc * desc,struct blkcipher_walk * walk)247 static void ctr_crypt_final(struct blkcipher_desc *desc,
248 struct blkcipher_walk *walk)
249 {
250 struct cast5_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
251 u8 *ctrblk = walk->iv;
252 u8 keystream[CAST5_BLOCK_SIZE];
253 u8 *src = walk->src.virt.addr;
254 u8 *dst = walk->dst.virt.addr;
255 unsigned int nbytes = walk->nbytes;
256
257 __cast5_encrypt(ctx, keystream, ctrblk);
258 crypto_xor_cpy(dst, keystream, src, nbytes);
259
260 crypto_inc(ctrblk, CAST5_BLOCK_SIZE);
261 }
262
__ctr_crypt(struct blkcipher_desc * desc,struct blkcipher_walk * walk)263 static unsigned int __ctr_crypt(struct blkcipher_desc *desc,
264 struct blkcipher_walk *walk)
265 {
266 struct cast5_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
267 const unsigned int bsize = CAST5_BLOCK_SIZE;
268 unsigned int nbytes = walk->nbytes;
269 u64 *src = (u64 *)walk->src.virt.addr;
270 u64 *dst = (u64 *)walk->dst.virt.addr;
271
272 /* Process multi-block batch */
273 if (nbytes >= bsize * CAST5_PARALLEL_BLOCKS) {
274 do {
275 cast5_ctr_16way(ctx, (u8 *)dst, (u8 *)src,
276 (__be64 *)walk->iv);
277
278 src += CAST5_PARALLEL_BLOCKS;
279 dst += CAST5_PARALLEL_BLOCKS;
280 nbytes -= bsize * CAST5_PARALLEL_BLOCKS;
281 } while (nbytes >= bsize * CAST5_PARALLEL_BLOCKS);
282
283 if (nbytes < bsize)
284 goto done;
285 }
286
287 /* Handle leftovers */
288 do {
289 u64 ctrblk;
290
291 if (dst != src)
292 *dst = *src;
293
294 ctrblk = *(u64 *)walk->iv;
295 be64_add_cpu((__be64 *)walk->iv, 1);
296
297 __cast5_encrypt(ctx, (u8 *)&ctrblk, (u8 *)&ctrblk);
298 *dst ^= ctrblk;
299
300 src += 1;
301 dst += 1;
302 nbytes -= bsize;
303 } while (nbytes >= bsize);
304
305 done:
306 return nbytes;
307 }
308
ctr_crypt(struct blkcipher_desc * desc,struct scatterlist * dst,struct scatterlist * src,unsigned int nbytes)309 static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst,
310 struct scatterlist *src, unsigned int nbytes)
311 {
312 bool fpu_enabled = false;
313 struct blkcipher_walk walk;
314 int err;
315
316 blkcipher_walk_init(&walk, dst, src, nbytes);
317 err = blkcipher_walk_virt_block(desc, &walk, CAST5_BLOCK_SIZE);
318 desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
319
320 while ((nbytes = walk.nbytes) >= CAST5_BLOCK_SIZE) {
321 fpu_enabled = cast5_fpu_begin(fpu_enabled, nbytes);
322 nbytes = __ctr_crypt(desc, &walk);
323 err = blkcipher_walk_done(desc, &walk, nbytes);
324 }
325
326 cast5_fpu_end(fpu_enabled);
327
328 if (walk.nbytes) {
329 ctr_crypt_final(desc, &walk);
330 err = blkcipher_walk_done(desc, &walk, 0);
331 }
332
333 return err;
334 }
335
336
337 static struct crypto_alg cast5_algs[6] = { {
338 .cra_name = "__ecb-cast5-avx",
339 .cra_driver_name = "__driver-ecb-cast5-avx",
340 .cra_priority = 0,
341 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER |
342 CRYPTO_ALG_INTERNAL,
343 .cra_blocksize = CAST5_BLOCK_SIZE,
344 .cra_ctxsize = sizeof(struct cast5_ctx),
345 .cra_alignmask = 0,
346 .cra_type = &crypto_blkcipher_type,
347 .cra_module = THIS_MODULE,
348 .cra_u = {
349 .blkcipher = {
350 .min_keysize = CAST5_MIN_KEY_SIZE,
351 .max_keysize = CAST5_MAX_KEY_SIZE,
352 .setkey = cast5_setkey,
353 .encrypt = ecb_encrypt,
354 .decrypt = ecb_decrypt,
355 },
356 },
357 }, {
358 .cra_name = "__cbc-cast5-avx",
359 .cra_driver_name = "__driver-cbc-cast5-avx",
360 .cra_priority = 0,
361 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER |
362 CRYPTO_ALG_INTERNAL,
363 .cra_blocksize = CAST5_BLOCK_SIZE,
364 .cra_ctxsize = sizeof(struct cast5_ctx),
365 .cra_alignmask = 0,
366 .cra_type = &crypto_blkcipher_type,
367 .cra_module = THIS_MODULE,
368 .cra_u = {
369 .blkcipher = {
370 .min_keysize = CAST5_MIN_KEY_SIZE,
371 .max_keysize = CAST5_MAX_KEY_SIZE,
372 .setkey = cast5_setkey,
373 .encrypt = cbc_encrypt,
374 .decrypt = cbc_decrypt,
375 },
376 },
377 }, {
378 .cra_name = "__ctr-cast5-avx",
379 .cra_driver_name = "__driver-ctr-cast5-avx",
380 .cra_priority = 0,
381 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER |
382 CRYPTO_ALG_INTERNAL,
383 .cra_blocksize = 1,
384 .cra_ctxsize = sizeof(struct cast5_ctx),
385 .cra_alignmask = 0,
386 .cra_type = &crypto_blkcipher_type,
387 .cra_module = THIS_MODULE,
388 .cra_u = {
389 .blkcipher = {
390 .min_keysize = CAST5_MIN_KEY_SIZE,
391 .max_keysize = CAST5_MAX_KEY_SIZE,
392 .ivsize = CAST5_BLOCK_SIZE,
393 .setkey = cast5_setkey,
394 .encrypt = ctr_crypt,
395 .decrypt = ctr_crypt,
396 },
397 },
398 }, {
399 .cra_name = "ecb(cast5)",
400 .cra_driver_name = "ecb-cast5-avx",
401 .cra_priority = 200,
402 .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
403 .cra_blocksize = CAST5_BLOCK_SIZE,
404 .cra_ctxsize = sizeof(struct async_helper_ctx),
405 .cra_alignmask = 0,
406 .cra_type = &crypto_ablkcipher_type,
407 .cra_module = THIS_MODULE,
408 .cra_init = ablk_init,
409 .cra_exit = ablk_exit,
410 .cra_u = {
411 .ablkcipher = {
412 .min_keysize = CAST5_MIN_KEY_SIZE,
413 .max_keysize = CAST5_MAX_KEY_SIZE,
414 .setkey = ablk_set_key,
415 .encrypt = ablk_encrypt,
416 .decrypt = ablk_decrypt,
417 },
418 },
419 }, {
420 .cra_name = "cbc(cast5)",
421 .cra_driver_name = "cbc-cast5-avx",
422 .cra_priority = 200,
423 .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
424 .cra_blocksize = CAST5_BLOCK_SIZE,
425 .cra_ctxsize = sizeof(struct async_helper_ctx),
426 .cra_alignmask = 0,
427 .cra_type = &crypto_ablkcipher_type,
428 .cra_module = THIS_MODULE,
429 .cra_init = ablk_init,
430 .cra_exit = ablk_exit,
431 .cra_u = {
432 .ablkcipher = {
433 .min_keysize = CAST5_MIN_KEY_SIZE,
434 .max_keysize = CAST5_MAX_KEY_SIZE,
435 .ivsize = CAST5_BLOCK_SIZE,
436 .setkey = ablk_set_key,
437 .encrypt = __ablk_encrypt,
438 .decrypt = ablk_decrypt,
439 },
440 },
441 }, {
442 .cra_name = "ctr(cast5)",
443 .cra_driver_name = "ctr-cast5-avx",
444 .cra_priority = 200,
445 .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
446 .cra_blocksize = 1,
447 .cra_ctxsize = sizeof(struct async_helper_ctx),
448 .cra_alignmask = 0,
449 .cra_type = &crypto_ablkcipher_type,
450 .cra_module = THIS_MODULE,
451 .cra_init = ablk_init,
452 .cra_exit = ablk_exit,
453 .cra_u = {
454 .ablkcipher = {
455 .min_keysize = CAST5_MIN_KEY_SIZE,
456 .max_keysize = CAST5_MAX_KEY_SIZE,
457 .ivsize = CAST5_BLOCK_SIZE,
458 .setkey = ablk_set_key,
459 .encrypt = ablk_encrypt,
460 .decrypt = ablk_encrypt,
461 .geniv = "chainiv",
462 },
463 },
464 } };
465
cast5_init(void)466 static int __init cast5_init(void)
467 {
468 const char *feature_name;
469
470 if (!cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM,
471 &feature_name)) {
472 pr_info("CPU feature '%s' is not supported.\n", feature_name);
473 return -ENODEV;
474 }
475
476 return crypto_register_algs(cast5_algs, ARRAY_SIZE(cast5_algs));
477 }
478
cast5_exit(void)479 static void __exit cast5_exit(void)
480 {
481 crypto_unregister_algs(cast5_algs, ARRAY_SIZE(cast5_algs));
482 }
483
484 module_init(cast5_init);
485 module_exit(cast5_exit);
486
487 MODULE_DESCRIPTION("Cast5 Cipher Algorithm, AVX optimized");
488 MODULE_LICENSE("GPL");
489 MODULE_ALIAS_CRYPTO("cast5");
490