• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* Copyright (c) 2014, Google Inc.
2  *
3  * Permission to use, copy, modify, and/or distribute this software for any
4  * purpose with or without fee is hereby granted, provided that the above
5  * copyright notice and this permission notice appear in all copies.
6  *
7  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
8  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
9  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
10  * SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
11  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
12  * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
13  * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */
14 
15 // Adapted from the public domain, estream code by D. Bernstein.
16 
17 #include <openssl/chacha.h>
18 
19 #include <assert.h>
20 #include <string.h>
21 
22 #include "../internal.h"
23 #include "internal.h"
24 
25 
26 // sigma contains the ChaCha constants, which happen to be an ASCII string.
27 static const uint8_t sigma[16] = { 'e', 'x', 'p', 'a', 'n', 'd', ' ', '3',
28                                    '2', '-', 'b', 'y', 't', 'e', ' ', 'k' };
29 
30 // QUARTERROUND updates a, b, c, d with a ChaCha "quarter" round.
31 #define QUARTERROUND(a, b, c, d)           \
32   x[a] += x[b];                            \
33   x[d] = CRYPTO_rotl_u32(x[d] ^ x[a], 16); \
34   x[c] += x[d];                            \
35   x[b] = CRYPTO_rotl_u32(x[b] ^ x[c], 12); \
36   x[a] += x[b];                            \
37   x[d] = CRYPTO_rotl_u32(x[d] ^ x[a], 8);  \
38   x[c] += x[d];                            \
39   x[b] = CRYPTO_rotl_u32(x[b] ^ x[c], 7);
40 
CRYPTO_hchacha20(uint8_t out[32],const uint8_t key[32],const uint8_t nonce[16])41 void CRYPTO_hchacha20(uint8_t out[32], const uint8_t key[32],
42                       const uint8_t nonce[16]) {
43   uint32_t x[16];
44   OPENSSL_memcpy(x, sigma, sizeof(sigma));
45   OPENSSL_memcpy(&x[4], key, 32);
46   OPENSSL_memcpy(&x[12], nonce, 16);
47 
48   for (size_t i = 0; i < 20; i += 2) {
49     QUARTERROUND(0, 4, 8, 12)
50     QUARTERROUND(1, 5, 9, 13)
51     QUARTERROUND(2, 6, 10, 14)
52     QUARTERROUND(3, 7, 11, 15)
53     QUARTERROUND(0, 5, 10, 15)
54     QUARTERROUND(1, 6, 11, 12)
55     QUARTERROUND(2, 7, 8, 13)
56     QUARTERROUND(3, 4, 9, 14)
57   }
58 
59   OPENSSL_memcpy(out, &x[0], sizeof(uint32_t) * 4);
60   OPENSSL_memcpy(&out[16], &x[12], sizeof(uint32_t) * 4);
61 }
62 
63 #if defined(CHACHA20_ASM)
64 
CRYPTO_chacha_20(uint8_t * out,const uint8_t * in,size_t in_len,const uint8_t key[32],const uint8_t nonce[12],uint32_t counter)65 void CRYPTO_chacha_20(uint8_t *out, const uint8_t *in, size_t in_len,
66                       const uint8_t key[32], const uint8_t nonce[12],
67                       uint32_t counter) {
68   assert(!buffers_alias(out, in_len, in, in_len) || in == out);
69 
70   uint32_t counter_nonce[4];
71   counter_nonce[0] = counter;
72   counter_nonce[1] = CRYPTO_load_u32_le(nonce + 0);
73   counter_nonce[2] = CRYPTO_load_u32_le(nonce + 4);
74   counter_nonce[3] = CRYPTO_load_u32_le(nonce + 8);
75 
76   const uint32_t *key_ptr = (const uint32_t *)key;
77 #if !defined(OPENSSL_X86) && !defined(OPENSSL_X86_64)
78   // The assembly expects the key to be four-byte aligned.
79   uint32_t key_u32[8];
80   if ((((uintptr_t)key) & 3) != 0) {
81     key_u32[0] = CRYPTO_load_u32_le(key + 0);
82     key_u32[1] = CRYPTO_load_u32_le(key + 4);
83     key_u32[2] = CRYPTO_load_u32_le(key + 8);
84     key_u32[3] = CRYPTO_load_u32_le(key + 12);
85     key_u32[4] = CRYPTO_load_u32_le(key + 16);
86     key_u32[5] = CRYPTO_load_u32_le(key + 20);
87     key_u32[6] = CRYPTO_load_u32_le(key + 24);
88     key_u32[7] = CRYPTO_load_u32_le(key + 28);
89 
90     key_ptr = key_u32;
91   }
92 #endif
93 
94   while (in_len > 0) {
95     // The assembly functions do not have defined overflow behavior. While
96     // overflow is almost always a bug in the caller, we prefer our functions to
97     // behave the same across platforms, so divide into multiple calls to avoid
98     // this case.
99     uint64_t todo = 64 * ((UINT64_C(1) << 32) - counter_nonce[0]);
100     if (todo > in_len) {
101       todo = in_len;
102     }
103 
104     ChaCha20_ctr32(out, in, (size_t)todo, key_ptr, counter_nonce);
105     in += todo;
106     out += todo;
107     in_len -= todo;
108 
109     // We're either done and will next break out of the loop, or we stopped at
110     // the wraparound point and the counter should continue at zero.
111     counter_nonce[0] = 0;
112   }
113 }
114 
115 #else
116 
117 // chacha_core performs 20 rounds of ChaCha on the input words in
118 // |input| and writes the 64 output bytes to |output|.
chacha_core(uint8_t output[64],const uint32_t input[16])119 static void chacha_core(uint8_t output[64], const uint32_t input[16]) {
120   uint32_t x[16];
121   int i;
122 
123   OPENSSL_memcpy(x, input, sizeof(uint32_t) * 16);
124   for (i = 20; i > 0; i -= 2) {
125     QUARTERROUND(0, 4, 8, 12)
126     QUARTERROUND(1, 5, 9, 13)
127     QUARTERROUND(2, 6, 10, 14)
128     QUARTERROUND(3, 7, 11, 15)
129     QUARTERROUND(0, 5, 10, 15)
130     QUARTERROUND(1, 6, 11, 12)
131     QUARTERROUND(2, 7, 8, 13)
132     QUARTERROUND(3, 4, 9, 14)
133   }
134 
135   for (i = 0; i < 16; ++i) {
136     x[i] += input[i];
137   }
138   for (i = 0; i < 16; ++i) {
139     CRYPTO_store_u32_le(output + 4 * i, x[i]);
140   }
141 }
142 
CRYPTO_chacha_20(uint8_t * out,const uint8_t * in,size_t in_len,const uint8_t key[32],const uint8_t nonce[12],uint32_t counter)143 void CRYPTO_chacha_20(uint8_t *out, const uint8_t *in, size_t in_len,
144                       const uint8_t key[32], const uint8_t nonce[12],
145                       uint32_t counter) {
146   assert(!buffers_alias(out, in_len, in, in_len) || in == out);
147 
148   uint32_t input[16];
149   uint8_t buf[64];
150   size_t todo, i;
151 
152   input[0] = CRYPTO_load_u32_le(sigma + 0);
153   input[1] = CRYPTO_load_u32_le(sigma + 4);
154   input[2] = CRYPTO_load_u32_le(sigma + 8);
155   input[3] = CRYPTO_load_u32_le(sigma + 12);
156 
157   input[4] = CRYPTO_load_u32_le(key + 0);
158   input[5] = CRYPTO_load_u32_le(key + 4);
159   input[6] = CRYPTO_load_u32_le(key + 8);
160   input[7] = CRYPTO_load_u32_le(key + 12);
161 
162   input[8] = CRYPTO_load_u32_le(key + 16);
163   input[9] = CRYPTO_load_u32_le(key + 20);
164   input[10] = CRYPTO_load_u32_le(key + 24);
165   input[11] = CRYPTO_load_u32_le(key + 28);
166 
167   input[12] = counter;
168   input[13] = CRYPTO_load_u32_le(nonce + 0);
169   input[14] = CRYPTO_load_u32_le(nonce + 4);
170   input[15] = CRYPTO_load_u32_le(nonce + 8);
171 
172   while (in_len > 0) {
173     todo = sizeof(buf);
174     if (in_len < todo) {
175       todo = in_len;
176     }
177 
178     chacha_core(buf, input);
179     for (i = 0; i < todo; i++) {
180       out[i] = in[i] ^ buf[i];
181     }
182 
183     out += todo;
184     in += todo;
185     in_len -= todo;
186 
187     input[12]++;
188   }
189 }
190 
191 #endif
192