1 /* Copyright (c) 2014, Google Inc.
2 *
3 * Permission to use, copy, modify, and/or distribute this software for any
4 * purpose with or without fee is hereby granted, provided that the above
5 * copyright notice and this permission notice appear in all copies.
6 *
7 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
8 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
9 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
10 * SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
11 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
12 * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
13 * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */
14
15 // Adapted from the public domain, estream code by D. Bernstein.
16
17 #include <openssl/chacha.h>
18
19 #include <assert.h>
20 #include <string.h>
21
22 #include "../internal.h"
23 #include "internal.h"
24
25
26 // sigma contains the ChaCha constants, which happen to be an ASCII string.
27 static const uint8_t sigma[16] = { 'e', 'x', 'p', 'a', 'n', 'd', ' ', '3',
28 '2', '-', 'b', 'y', 't', 'e', ' ', 'k' };
29
30 // QUARTERROUND updates a, b, c, d with a ChaCha "quarter" round.
31 #define QUARTERROUND(a, b, c, d) \
32 x[a] += x[b]; \
33 x[d] = CRYPTO_rotl_u32(x[d] ^ x[a], 16); \
34 x[c] += x[d]; \
35 x[b] = CRYPTO_rotl_u32(x[b] ^ x[c], 12); \
36 x[a] += x[b]; \
37 x[d] = CRYPTO_rotl_u32(x[d] ^ x[a], 8); \
38 x[c] += x[d]; \
39 x[b] = CRYPTO_rotl_u32(x[b] ^ x[c], 7);
40
CRYPTO_hchacha20(uint8_t out[32],const uint8_t key[32],const uint8_t nonce[16])41 void CRYPTO_hchacha20(uint8_t out[32], const uint8_t key[32],
42 const uint8_t nonce[16]) {
43 uint32_t x[16];
44 OPENSSL_memcpy(x, sigma, sizeof(sigma));
45 OPENSSL_memcpy(&x[4], key, 32);
46 OPENSSL_memcpy(&x[12], nonce, 16);
47
48 for (size_t i = 0; i < 20; i += 2) {
49 QUARTERROUND(0, 4, 8, 12)
50 QUARTERROUND(1, 5, 9, 13)
51 QUARTERROUND(2, 6, 10, 14)
52 QUARTERROUND(3, 7, 11, 15)
53 QUARTERROUND(0, 5, 10, 15)
54 QUARTERROUND(1, 6, 11, 12)
55 QUARTERROUND(2, 7, 8, 13)
56 QUARTERROUND(3, 4, 9, 14)
57 }
58
59 OPENSSL_memcpy(out, &x[0], sizeof(uint32_t) * 4);
60 OPENSSL_memcpy(&out[16], &x[12], sizeof(uint32_t) * 4);
61 }
62
63 #if defined(CHACHA20_ASM)
64
CRYPTO_chacha_20(uint8_t * out,const uint8_t * in,size_t in_len,const uint8_t key[32],const uint8_t nonce[12],uint32_t counter)65 void CRYPTO_chacha_20(uint8_t *out, const uint8_t *in, size_t in_len,
66 const uint8_t key[32], const uint8_t nonce[12],
67 uint32_t counter) {
68 assert(!buffers_alias(out, in_len, in, in_len) || in == out);
69
70 uint32_t counter_nonce[4];
71 counter_nonce[0] = counter;
72 counter_nonce[1] = CRYPTO_load_u32_le(nonce + 0);
73 counter_nonce[2] = CRYPTO_load_u32_le(nonce + 4);
74 counter_nonce[3] = CRYPTO_load_u32_le(nonce + 8);
75
76 const uint32_t *key_ptr = (const uint32_t *)key;
77 #if !defined(OPENSSL_X86) && !defined(OPENSSL_X86_64)
78 // The assembly expects the key to be four-byte aligned.
79 uint32_t key_u32[8];
80 if ((((uintptr_t)key) & 3) != 0) {
81 key_u32[0] = CRYPTO_load_u32_le(key + 0);
82 key_u32[1] = CRYPTO_load_u32_le(key + 4);
83 key_u32[2] = CRYPTO_load_u32_le(key + 8);
84 key_u32[3] = CRYPTO_load_u32_le(key + 12);
85 key_u32[4] = CRYPTO_load_u32_le(key + 16);
86 key_u32[5] = CRYPTO_load_u32_le(key + 20);
87 key_u32[6] = CRYPTO_load_u32_le(key + 24);
88 key_u32[7] = CRYPTO_load_u32_le(key + 28);
89
90 key_ptr = key_u32;
91 }
92 #endif
93
94 while (in_len > 0) {
95 // The assembly functions do not have defined overflow behavior. While
96 // overflow is almost always a bug in the caller, we prefer our functions to
97 // behave the same across platforms, so divide into multiple calls to avoid
98 // this case.
99 uint64_t todo = 64 * ((UINT64_C(1) << 32) - counter_nonce[0]);
100 if (todo > in_len) {
101 todo = in_len;
102 }
103
104 ChaCha20_ctr32(out, in, (size_t)todo, key_ptr, counter_nonce);
105 in += todo;
106 out += todo;
107 in_len -= todo;
108
109 // We're either done and will next break out of the loop, or we stopped at
110 // the wraparound point and the counter should continue at zero.
111 counter_nonce[0] = 0;
112 }
113 }
114
115 #else
116
117 // chacha_core performs 20 rounds of ChaCha on the input words in
118 // |input| and writes the 64 output bytes to |output|.
chacha_core(uint8_t output[64],const uint32_t input[16])119 static void chacha_core(uint8_t output[64], const uint32_t input[16]) {
120 uint32_t x[16];
121 int i;
122
123 OPENSSL_memcpy(x, input, sizeof(uint32_t) * 16);
124 for (i = 20; i > 0; i -= 2) {
125 QUARTERROUND(0, 4, 8, 12)
126 QUARTERROUND(1, 5, 9, 13)
127 QUARTERROUND(2, 6, 10, 14)
128 QUARTERROUND(3, 7, 11, 15)
129 QUARTERROUND(0, 5, 10, 15)
130 QUARTERROUND(1, 6, 11, 12)
131 QUARTERROUND(2, 7, 8, 13)
132 QUARTERROUND(3, 4, 9, 14)
133 }
134
135 for (i = 0; i < 16; ++i) {
136 x[i] += input[i];
137 }
138 for (i = 0; i < 16; ++i) {
139 CRYPTO_store_u32_le(output + 4 * i, x[i]);
140 }
141 }
142
CRYPTO_chacha_20(uint8_t * out,const uint8_t * in,size_t in_len,const uint8_t key[32],const uint8_t nonce[12],uint32_t counter)143 void CRYPTO_chacha_20(uint8_t *out, const uint8_t *in, size_t in_len,
144 const uint8_t key[32], const uint8_t nonce[12],
145 uint32_t counter) {
146 assert(!buffers_alias(out, in_len, in, in_len) || in == out);
147
148 uint32_t input[16];
149 uint8_t buf[64];
150 size_t todo, i;
151
152 input[0] = CRYPTO_load_u32_le(sigma + 0);
153 input[1] = CRYPTO_load_u32_le(sigma + 4);
154 input[2] = CRYPTO_load_u32_le(sigma + 8);
155 input[3] = CRYPTO_load_u32_le(sigma + 12);
156
157 input[4] = CRYPTO_load_u32_le(key + 0);
158 input[5] = CRYPTO_load_u32_le(key + 4);
159 input[6] = CRYPTO_load_u32_le(key + 8);
160 input[7] = CRYPTO_load_u32_le(key + 12);
161
162 input[8] = CRYPTO_load_u32_le(key + 16);
163 input[9] = CRYPTO_load_u32_le(key + 20);
164 input[10] = CRYPTO_load_u32_le(key + 24);
165 input[11] = CRYPTO_load_u32_le(key + 28);
166
167 input[12] = counter;
168 input[13] = CRYPTO_load_u32_le(nonce + 0);
169 input[14] = CRYPTO_load_u32_le(nonce + 4);
170 input[15] = CRYPTO_load_u32_le(nonce + 8);
171
172 while (in_len > 0) {
173 todo = sizeof(buf);
174 if (in_len < todo) {
175 todo = in_len;
176 }
177
178 chacha_core(buf, input);
179 for (i = 0; i < todo; i++) {
180 out[i] = in[i] ^ buf[i];
181 }
182
183 out += todo;
184 in += todo;
185 in_len -= todo;
186
187 input[12]++;
188 }
189 }
190
191 #endif
192