1diff -burN android-openssl-lhash2/openssl.config android-openssl/openssl.config 2--- android-openssl-lhash2/openssl.config 2013-11-05 14:38:31.187575574 -0500 3+++ android-openssl/openssl.config 2013-11-05 15:03:54.661551145 -0500 4@@ -432,6 +432,7 @@ 5 crypto/buffer/buf_err.c \ 6 crypto/buffer/buf_str.c \ 7 crypto/buffer/buffer.c \ 8+crypto/chacha/chacha_enc.c \ 9 crypto/cmac/cm_ameth.c \ 10 crypto/cmac/cm_pmeth.c \ 11 crypto/cmac/cmac.c \ 12@@ -565,6 +566,7 @@ 13 crypto/evp/e_aes.c \ 14 crypto/evp/e_aes_cbc_hmac_sha1.c \ 15 crypto/evp/e_bf.c \ 16+crypto/evp/e_chacha20poly1305.c \ 17 crypto/evp/e_des.c \ 18 crypto/evp/e_des3.c \ 19 crypto/evp/e_null.c \ 20@@ -576,6 +578,7 @@ 21 crypto/evp/e_xcbc_d.c \ 22 crypto/evp/encode.c \ 23 crypto/evp/evp_acnf.c \ 24+crypto/evp/evp_aead.c \ 25 crypto/evp/evp_cnf.c \ 26 crypto/evp/evp_enc.c \ 27 crypto/evp/evp_err.c \ 28@@ -674,6 +677,7 @@ 29 crypto/pkcs7/pk7_smime.c \ 30 crypto/pkcs7/pkcs7err.c \ 31 crypto/pqueue/pqueue.c \ 32+crypto/poly1305/poly1305.c \ 33 crypto/rand/md_rand.c \ 34 crypto/rand/rand_egd.c \ 35 crypto/rand/rand_err.c \ 36@@ -789,7 +793,10 @@ 37 crypto/aes/asm/aes-armv4.S \ 38 crypto/bn/asm/armv4-gf2m.S \ 39 crypto/bn/asm/armv4-mont.S \ 40+crypto/chacha/chacha_vec_arm.S \ 41 crypto/modes/asm/ghash-armv4.S \ 42+crypto/poly1305/poly1305_arm.c \ 43+crypto/poly1305/poly1305_arm_asm.S \ 44 crypto/sha/asm/sha1-armv4-large.S \ 45 crypto/sha/asm/sha256-armv4.S \ 46 crypto/sha/asm/sha512-armv4.S \ 47@@ -852,6 +863,7 @@ 48 crypto/bn/asm/x86_64-gf2m.S \ 49 crypto/bn/asm/x86_64-mont.S \ 50 crypto/bn/asm/x86_64-mont5.S \ 51+crypto/chacha/chacha_vec.c \ 52 crypto/md5/asm/md5-x86_64.S \ 53 crypto/modes/asm/ghash-x86_64.S \ 54 crypto/rc4/asm/rc4-md5-x86_64.S \ 55@@ -859,6 +871,7 @@ 56 crypto/sha/asm/sha1-x86_64.S \ 57 crypto/sha/asm/sha256-x86_64.S \ 58 crypto/sha/asm/sha512-x86_64.S \ 59+crypto/poly1305/poly1305_vec.c \ 60 crypto/x86_64cpuid.S \ 61 " 62 63@@ -866,7 +879,9 @@ 64 crypto/aes/aes_cbc.c \ 65 crypto/aes/aes_core.c \ 66 crypto/bn/bn_asm.c \ 67+crypto/chacha/chacha_enc.c \ 68 crypto/mem_clr.c \ 69+crypto/poly1305/poly1305.c \ 70 crypto/rc4/rc4_enc.c \ 71 crypto/rc4/rc4_skey.c \ 72 " 73@@ -998,6 +1013,12 @@ 74 x509_hash_name_algorithm_change.patch \ 75 reduce_client_hello_size.patch \ 76 fix_lhash_iteration.patch \ 77+tls1_change_cipher_state_rewrite.patch \ 78+aead_support.patch \ 79+aead_ssl_support.patch \ 80+use_aead_for_aes_gcm.patch \ 81+chacha20poly1305.patch \ 82+neon_runtime.patch \ 83 " 84 85 OPENSSL_PATCHES_progs_SOURCES="\ 86diff -burN android-openssl-lhash2/patches/aead_ssl_support.patch android-openssl/patches/aead_ssl_support.patch 87--- android-openssl-lhash2/patches/aead_ssl_support.patch 1969-12-31 19:00:00.000000000 -0500 88+++ android-openssl/patches/aead_ssl_support.patch 2013-11-05 14:14:34.631283497 -0500 89@@ -0,0 +1,690 @@ 90+From dc8386dbb390f4b867019873cd072a5fe01ba4e9 Mon Sep 17 00:00:00 2001 91+From: Adam Langley <agl@chromium.org> 92+Date: Thu, 25 Jul 2013 17:35:23 -0400 93+Subject: [PATCH 41/50] aead_ssl_support. 94+ 95+This change allows AEADs to be used in ssl/ to implement SSL/TLS 96+ciphersuites. 97+--- 98+ ssl/s2_clnt.c | 2 +- 99+ ssl/s2_enc.c | 2 +- 100+ ssl/s2_srvr.c | 2 +- 101+ ssl/s3_enc.c | 8 +- 102+ ssl/s3_pkt.c | 4 +- 103+ ssl/ssl.h | 15 +++- 104+ ssl/ssl3.h | 1 + 105+ ssl/ssl_ciph.c | 70 +++++++++++---- 106+ ssl/ssl_err.c | 3 + 107+ ssl/ssl_lib.c | 12 +++ 108+ ssl/ssl_locl.h | 23 ++++- 109+ ssl/ssl_txt.c | 2 +- 110+ ssl/t1_enc.c | 262 +++++++++++++++++++++++++++++++++++++++++++++++++++------ 111+ 13 files changed, 356 insertions(+), 50 deletions(-) 112+ 113+diff --git a/ssl/s2_clnt.c b/ssl/s2_clnt.c 114+index 03b6cf9..32adaf5 100644 115+--- a/ssl/s2_clnt.c 116++++ b/ssl/s2_clnt.c 117+@@ -623,7 +623,7 @@ static int client_master_key(SSL *s) 118+ if (s->state == SSL2_ST_SEND_CLIENT_MASTER_KEY_A) 119+ { 120+ 121+- if (!ssl_cipher_get_evp(s->session,&c,&md,NULL,NULL,NULL)) 122++ if (!ssl_cipher_get_evp(s->session,&c,&md,NULL,NULL)) 123+ { 124+ ssl2_return_error(s,SSL2_PE_NO_CIPHER); 125+ SSLerr(SSL_F_CLIENT_MASTER_KEY,SSL_R_PROBLEMS_MAPPING_CIPHER_FUNCTIONS); 126+diff --git a/ssl/s2_enc.c b/ssl/s2_enc.c 127+index ff3395f..087c4a2 100644 128+--- a/ssl/s2_enc.c 129++++ b/ssl/s2_enc.c 130+@@ -68,7 +68,7 @@ int ssl2_enc_init(SSL *s, int client) 131+ const EVP_MD *md; 132+ int num; 133+ 134+- if (!ssl_cipher_get_evp(s->session,&c,&md,NULL,NULL,NULL)) 135++ if (!ssl_cipher_get_evp(s->session,&c,&md,NULL,NULL)) 136+ { 137+ ssl2_return_error(s,SSL2_PE_NO_CIPHER); 138+ SSLerr(SSL_F_SSL2_ENC_INIT,SSL_R_PROBLEMS_MAPPING_CIPHER_FUNCTIONS); 139+diff --git a/ssl/s2_srvr.c b/ssl/s2_srvr.c 140+index 9b1a6ac..9392921 100644 141+--- a/ssl/s2_srvr.c 142++++ b/ssl/s2_srvr.c 143+@@ -452,7 +452,7 @@ static int get_client_master_key(SSL *s) 144+ 145+ is_export=SSL_C_IS_EXPORT(s->session->cipher); 146+ 147+- if (!ssl_cipher_get_evp(s->session,&c,&md,NULL,NULL,NULL)) 148++ if (!ssl_cipher_get_evp(s->session,&c,&md,NULL,NULL)) 149+ { 150+ ssl2_return_error(s,SSL2_PE_NO_CIPHER); 151+ SSLerr(SSL_F_GET_CLIENT_MASTER_KEY,SSL_R_PROBLEMS_MAPPING_CIPHER_FUNCTIONS); 152+diff --git a/ssl/s3_enc.c b/ssl/s3_enc.c 153+index e3cd4f0..191b86b 100644 154+--- a/ssl/s3_enc.c 155++++ b/ssl/s3_enc.c 156+@@ -397,7 +397,13 @@ int ssl3_setup_key_block(SSL *s) 157+ if (s->s3->tmp.key_block_length != 0) 158+ return(1); 159+ 160+- if (!ssl_cipher_get_evp(s->session,&c,&hash,NULL,NULL,&comp)) 161++ if (!ssl_cipher_get_comp(s->session, &comp)) 162++ { 163++ SSLerr(SSL_F_SSL3_SETUP_KEY_BLOCK,SSL_R_CIPHER_OR_HASH_UNAVAILABLE); 164++ return(0); 165++ } 166++ 167++ if (!ssl_cipher_get_evp(s->session,&c,&hash,NULL,NULL)) 168+ { 169+ SSLerr(SSL_F_SSL3_SETUP_KEY_BLOCK,SSL_R_CIPHER_OR_HASH_UNAVAILABLE); 170+ return(0); 171+diff --git a/ssl/s3_pkt.c b/ssl/s3_pkt.c 172+index 33bb78a..5038f6c 100644 173+--- a/ssl/s3_pkt.c 174++++ b/ssl/s3_pkt.c 175+@@ -790,7 +790,9 @@ static int do_ssl3_write(SSL *s, int type, const unsigned char *buf, 176+ else 177+ eivlen = 0; 178+ } 179+- else 180++ else if (s->aead_write_ctx != NULL) 181++ eivlen = s->aead_write_ctx->variable_nonce_len; 182++ else 183+ eivlen = 0; 184+ 185+ /* lets setup the record stuff. */ 186+diff --git a/ssl/ssl.h b/ssl/ssl.h 187+index 672f3eb..0644cbf 100644 188+--- a/ssl/ssl.h 189++++ b/ssl/ssl.h 190+@@ -406,7 +406,9 @@ struct ssl_cipher_st 191+ unsigned long algorithm_ssl; /* (major) protocol version */ 192+ 193+ unsigned long algo_strength; /* strength and export flags */ 194+- unsigned long algorithm2; /* Extra flags */ 195++ unsigned long algorithm2; /* Extra flags. See SSL2_CF_* in ssl2.h 196++ and algorithm2 section in 197++ ssl_locl.h */ 198+ int strength_bits; /* Number of bits really used */ 199+ int alg_bits; /* Number of bits for algorithm */ 200+ }; 201+@@ -748,6 +750,9 @@ int SRP_generate_client_master_secret(SSL *s,unsigned char *master_key); 202+ 203+ #endif 204+ 205++struct ssl_aead_ctx_st; 206++typedef struct ssl_aead_ctx_st SSL_AEAD_CTX; 207++ 208+ #if defined(OPENSSL_SYS_MSDOS) && !defined(OPENSSL_SYS_WIN32) 209+ #define SSL_MAX_CERT_LIST_DEFAULT 1024*30 /* 30k max cert list :-) */ 210+ #else 211+@@ -1294,6 +1299,9 @@ struct ssl_st 212+ /* These are the ones being used, the ones in SSL_SESSION are 213+ * the ones to be 'copied' into these ones */ 214+ int mac_flags; 215++ SSL_AEAD_CTX *aead_read_ctx; /* AEAD context. If non-NULL, then 216++ |enc_read_ctx| and |read_hash| are 217++ ignored. */ 218+ EVP_CIPHER_CTX *enc_read_ctx; /* cryptographic state */ 219+ EVP_MD_CTX *read_hash; /* used for mac generation */ 220+ #ifndef OPENSSL_NO_COMP 221+@@ -1302,6 +1310,9 @@ struct ssl_st 222+ char *expand; 223+ #endif 224+ 225++ SSL_AEAD_CTX *aead_write_ctx; /* AEAD context. If non-NULL, then 226++ |enc_write_ctx| and |write_hash| are 227++ ignored. */ 228+ EVP_CIPHER_CTX *enc_write_ctx; /* cryptographic state */ 229+ EVP_MD_CTX *write_hash; /* used for mac generation */ 230+ #ifndef OPENSSL_NO_COMP 231+@@ -2437,8 +2448,10 @@ void ERR_load_SSL_strings(void); 232+ #define SSL_F_SSL_USE_RSAPRIVATEKEY_FILE 206 233+ #define SSL_F_SSL_VERIFY_CERT_CHAIN 207 234+ #define SSL_F_SSL_WRITE 208 235++#define SSL_F_TLS1_AEAD_CTX_INIT 339 236+ #define SSL_F_TLS1_CERT_VERIFY_MAC 286 237+ #define SSL_F_TLS1_CHANGE_CIPHER_STATE 209 238++#define SSL_F_TLS1_CHANGE_CIPHER_STATE_AEAD 340 239+ #define SSL_F_TLS1_CHANGE_CIPHER_STATE_CIPHER 338 240+ #define SSL_F_TLS1_CHECK_SERVERHELLO_TLSEXT 274 241+ #define SSL_F_TLS1_ENC 210 242+diff --git a/ssl/ssl3.h b/ssl/ssl3.h 243+index a4f6d4a..6a5cdbe 100644 244+--- a/ssl/ssl3.h 245++++ b/ssl/ssl3.h 246+@@ -517,6 +517,7 @@ typedef struct ssl3_state_st 247+ unsigned char *key_block; 248+ 249+ const EVP_CIPHER *new_sym_enc; 250++ const EVP_AEAD *new_aead; 251+ const EVP_MD *new_hash; 252+ int new_mac_pkey_type; 253+ int new_mac_secret_size; 254+diff --git a/ssl/ssl_ciph.c b/ssl/ssl_ciph.c 255+index 2966ddf..7e780cd 100644 256+--- a/ssl/ssl_ciph.c 257++++ b/ssl/ssl_ciph.c 258+@@ -484,32 +484,66 @@ static void load_builtin_compressions(void) 259+ } 260+ #endif 261+ 262++/* ssl_cipher_get_comp sets |comp| to the correct SSL_COMP for the given 263++ * session and returns 1. On error it returns 0. */ 264++int ssl_cipher_get_comp(const SSL_SESSION *s, SSL_COMP **comp) 265++ { 266++ int i; 267++ 268++ SSL_COMP ctmp; 269++#ifndef OPENSSL_NO_COMP 270++ load_builtin_compressions(); 271++#endif 272++ 273++ *comp=NULL; 274++ ctmp.id=s->compress_meth; 275++ if (ssl_comp_methods != NULL) 276++ { 277++ i=sk_SSL_COMP_find(ssl_comp_methods,&ctmp); 278++ if (i >= 0) 279++ *comp=sk_SSL_COMP_value(ssl_comp_methods,i); 280++ else 281++ *comp=NULL; 282++ } 283++ 284++ return 1; 285++ } 286++ 287++/* ssl_cipher_get_evp_aead sets |*aead| to point to the correct EVP_AEAD object 288++ * for |s->cipher|. It returns 1 on success and 0 on error. */ 289++int ssl_cipher_get_evp_aead(const SSL_SESSION *s, const EVP_AEAD **aead) 290++ { 291++ const SSL_CIPHER *c = s->cipher; 292++ 293++ *aead = NULL; 294++ 295++ if (c == NULL) 296++ return 0; 297++ if ((c->algorithm2 & SSL_CIPHER_ALGORITHM2_AEAD) == 0) 298++ return 0; 299++ 300++#ifndef OPENSSL_NO_AES 301++ /* There is only one AEAD for now. */ 302++ *aead = EVP_aead_aes_128_gcm(); 303++ return 1; 304++#endif 305++ 306++ return 0; 307++ } 308++ 309+ int ssl_cipher_get_evp(const SSL_SESSION *s, const EVP_CIPHER **enc, 310+- const EVP_MD **md, int *mac_pkey_type, int *mac_secret_size,SSL_COMP **comp) 311++ const EVP_MD **md, int *mac_pkey_type, int *mac_secret_size) 312+ { 313+ int i; 314+ const SSL_CIPHER *c; 315+ 316+ c=s->cipher; 317+ if (c == NULL) return(0); 318+- if (comp != NULL) 319+- { 320+- SSL_COMP ctmp; 321+-#ifndef OPENSSL_NO_COMP 322+- load_builtin_compressions(); 323+-#endif 324+ 325+- *comp=NULL; 326+- ctmp.id=s->compress_meth; 327+- if (ssl_comp_methods != NULL) 328+- { 329+- i=sk_SSL_COMP_find(ssl_comp_methods,&ctmp); 330+- if (i >= 0) 331+- *comp=sk_SSL_COMP_value(ssl_comp_methods,i); 332+- else 333+- *comp=NULL; 334+- } 335+- } 336++ /* This function doesn't deal with EVP_AEAD. See 337++ * |ssl_cipher_get_aead_evp|. */ 338++ if (c->algorithm2 & SSL_CIPHER_ALGORITHM2_AEAD) 339++ return(0); 340+ 341+ if ((enc == NULL) || (md == NULL)) return(0); 342+ 343+diff --git a/ssl/ssl_err.c b/ssl/ssl_err.c 344+index 97b2a0d..ad3a7b9 100644 345+--- a/ssl/ssl_err.c 346++++ b/ssl/ssl_err.c 347+@@ -280,6 +280,9 @@ static ERR_STRING_DATA SSL_str_functs[]= 348+ {ERR_FUNC(SSL_F_SSL_VERIFY_CERT_CHAIN), "SSL_VERIFY_CERT_CHAIN"}, 349+ {ERR_FUNC(SSL_F_SSL_WRITE), "SSL_write"}, 350+ {ERR_FUNC(SSL_F_TLS1_CERT_VERIFY_MAC), "tls1_cert_verify_mac"}, 351++{ERR_FUNC(SSL_F_TLS1_AEAD_CTX_INIT), "TLS1_AEAD_CTX_INIT"}, 352++{ERR_FUNC(SSL_F_TLS1_CHANGE_CIPHER_STATE), "tls1_change_cipher_state"}, 353++{ERR_FUNC(SSL_F_TLS1_CHANGE_CIPHER_STATE_AEAD), "TLS1_CHANGE_CIPHER_STATE_AEAD"}, 354+ {ERR_FUNC(SSL_F_TLS1_CHANGE_CIPHER_STATE_CIPHER), "TLS1_CHANGE_CIPHER_STATE_CIPHER"}, 355+ {ERR_FUNC(SSL_F_TLS1_CHECK_SERVERHELLO_TLSEXT), "TLS1_CHECK_SERVERHELLO_TLSEXT"}, 356+ {ERR_FUNC(SSL_F_TLS1_ENC), "TLS1_ENC"}, 357+diff --git a/ssl/ssl_lib.c b/ssl/ssl_lib.c 358+index 3b264b6..8a0150c 100644 359+--- a/ssl/ssl_lib.c 360++++ b/ssl/ssl_lib.c 361+@@ -2881,6 +2881,18 @@ void ssl_clear_cipher_ctx(SSL *s) 362+ OPENSSL_free(s->enc_write_ctx); 363+ s->enc_write_ctx=NULL; 364+ } 365++ if (s->aead_read_ctx != NULL) 366++ { 367++ EVP_AEAD_CTX_cleanup(&s->aead_read_ctx->ctx); 368++ OPENSSL_free(s->aead_read_ctx); 369++ s->aead_read_ctx = NULL; 370++ } 371++ if (s->aead_write_ctx != NULL) 372++ { 373++ EVP_AEAD_CTX_cleanup(&s->aead_write_ctx->ctx); 374++ OPENSSL_free(s->aead_write_ctx); 375++ s->aead_write_ctx = NULL; 376++ } 377+ #ifndef OPENSSL_NO_COMP 378+ if (s->expand != NULL) 379+ { 380+diff --git a/ssl/ssl_locl.h b/ssl/ssl_locl.h 381+index 3d800af..63bc28b 100644 382+--- a/ssl/ssl_locl.h 383++++ b/ssl/ssl_locl.h 384+@@ -380,6 +380,14 @@ 385+ 386+ #define TLSEXT_CHANNEL_ID_SIZE 128 387+ 388++/* SSL_CIPHER_ALGORITHM2_AEAD is a flag in SSL_CIPHER.algorithm2 which 389++ * indicates that the cipher is implemented via an EVP_AEAD. */ 390++#define SSL_CIPHER_ALGORITHM2_AEAD (1<<23) 391++ 392++/* SSL_CIPHER_AEAD_FIXED_NONCE_LEN returns the number of bytes of fixed nonce 393++ * for an SSL_CIPHER* with the SSL_CIPHER_ALGORITHM2_AEAD flag. */ 394++#define SSL_CIPHER_AEAD_FIXED_NONCE_LEN(ssl_cipher) \ 395++ (((ssl_cipher->algorithm2 >> 24) & 0xf)*2) 396+ 397+ /* 398+ * Export and cipher strength information. For each cipher we have to decide 399+@@ -588,6 +596,17 @@ typedef struct ssl3_enc_method 400+ int use_context); 401+ } SSL3_ENC_METHOD; 402+ 403++/* ssl_aead_ctx_st contains information about an AEAD that is being used to 404++ * encrypt an SSL connection. */ 405++struct ssl_aead_ctx_st 406++ { 407++ EVP_AEAD_CTX ctx; 408++ /* fixed_nonce contains any bytes of the nonce that are fixed for all 409++ * records. */ 410++ unsigned char fixed_nonce[8]; 411++ unsigned char fixed_nonce_len, variable_nonce_len, tag_len; 412++ }; 413++ 414+ #ifndef OPENSSL_NO_COMP 415+ /* Used for holding the relevant compression methods loaded into SSL_CTX */ 416+ typedef struct ssl3_comp_st 417+@@ -834,8 +853,10 @@ STACK_OF(SSL_CIPHER) *ssl_create_cipher_list(const SSL_METHOD *meth, 418+ STACK_OF(SSL_CIPHER) **sorted, 419+ const char *rule_str); 420+ void ssl_update_cache(SSL *s, int mode); 421++int ssl_cipher_get_comp(const SSL_SESSION *s, SSL_COMP **comp); 422++int ssl_cipher_get_evp_aead(const SSL_SESSION *s, const EVP_AEAD **aead); 423+ int ssl_cipher_get_evp(const SSL_SESSION *s,const EVP_CIPHER **enc, 424+- const EVP_MD **md,int *mac_pkey_type,int *mac_secret_size, SSL_COMP **comp); 425++ const EVP_MD **md,int *mac_pkey_type,int *mac_secret_size); 426+ int ssl_get_handshake_digest(int i,long *mask,const EVP_MD **md); 427+ int ssl_verify_cert_chain(SSL *s,STACK_OF(X509) *sk); 428+ int ssl_undefined_function(SSL *s); 429+diff --git a/ssl/ssl_txt.c b/ssl/ssl_txt.c 430+index 6479d52..07826d5 100644 431+--- a/ssl/ssl_txt.c 432++++ b/ssl/ssl_txt.c 433+@@ -216,7 +216,7 @@ int SSL_SESSION_print(BIO *bp, const SSL_SESSION *x) 434+ { 435+ SSL_COMP *comp = NULL; 436+ 437+- ssl_cipher_get_evp(x,NULL,NULL,NULL,NULL,&comp); 438++ ssl_cipher_get_comp(x, &comp); 439+ if (comp == NULL) 440+ { 441+ if (BIO_printf(bp,"\n Compression: %d",x->compress_meth) <= 0) goto err; 442+diff --git a/ssl/t1_enc.c b/ssl/t1_enc.c 443+index e1f91ba..7af1a32 100644 444+--- a/ssl/t1_enc.c 445++++ b/ssl/t1_enc.c 446+@@ -316,6 +316,66 @@ static int tls1_generate_key_block(SSL *s, unsigned char *km, 447+ return ret; 448+ } 449+ 450++/* tls1_aead_ctx_init allocates |*aead_ctx|, if needed and returns 1. It 451++ * returns 0 on malloc error. */ 452++static int tls1_aead_ctx_init(SSL_AEAD_CTX **aead_ctx) 453++ { 454++ if (*aead_ctx != NULL) 455++ EVP_AEAD_CTX_cleanup(&(*aead_ctx)->ctx); 456++ else 457++ { 458++ *aead_ctx = (SSL_AEAD_CTX*) OPENSSL_malloc(sizeof(SSL_AEAD_CTX)); 459++ if (*aead_ctx == NULL) 460++ { 461++ SSLerr(SSL_F_TLS1_AEAD_CTX_INIT, ERR_R_MALLOC_FAILURE); 462++ return 0; 463++ } 464++ } 465++ 466++ return 1; 467++ } 468++ 469++static int tls1_change_cipher_state_aead(SSL *s, char is_read, 470++ const unsigned char *key, unsigned key_len, 471++ const unsigned char *iv, unsigned iv_len) 472++ { 473++ const EVP_AEAD *aead = s->s3->tmp.new_aead; 474++ SSL_AEAD_CTX *aead_ctx; 475++ 476++ if (is_read) 477++ { 478++ if (!tls1_aead_ctx_init(&s->aead_read_ctx)) 479++ return 0; 480++ aead_ctx = s->aead_read_ctx; 481++ } 482++ else 483++ { 484++ if (!tls1_aead_ctx_init(&s->aead_write_ctx)) 485++ return 0; 486++ aead_ctx = s->aead_write_ctx; 487++ } 488++ 489++ if (!EVP_AEAD_CTX_init(&aead_ctx->ctx, aead, key, key_len, 490++ EVP_AEAD_DEFAULT_TAG_LENGTH, NULL /* engine */)) 491++ return 0; 492++ if (iv_len > sizeof(aead_ctx->fixed_nonce)) 493++ { 494++ SSLerr(SSL_F_TLS1_CHANGE_CIPHER_STATE_AEAD, ERR_R_INTERNAL_ERROR); 495++ return 0; 496++ } 497++ memcpy(aead_ctx->fixed_nonce, iv, iv_len); 498++ aead_ctx->fixed_nonce_len = iv_len; 499++ aead_ctx->variable_nonce_len = 8; /* always the case, currently. */ 500++ if (aead_ctx->variable_nonce_len + aead_ctx->fixed_nonce_len != EVP_AEAD_nonce_length(aead)) 501++ { 502++ SSLerr(SSL_F_TLS1_CHANGE_CIPHER_STATE_AEAD, ERR_R_INTERNAL_ERROR); 503++ return 0; 504++ } 505++ aead_ctx->tag_len = EVP_AEAD_max_overhead(aead); 506++ 507++ return 1; 508++ } 509++ 510+ /* tls1_change_cipher_state_cipher performs the work needed to switch cipher 511+ * states when using EVP_CIPHER. The argument |is_read| is true iff this 512+ * function is being called due to reading, as opposed to writing, a 513+@@ -494,6 +554,7 @@ int tls1_change_cipher_state(SSL *s, int which) 514+ const unsigned char *client_write_key, *server_write_key, *key; 515+ const unsigned char *client_write_iv, *server_write_iv, *iv; 516+ const EVP_CIPHER *cipher = s->s3->tmp.new_sym_enc; 517++ const EVP_AEAD *aead = s->s3->tmp.new_aead; 518+ unsigned key_len, iv_len, mac_secret_len; 519+ const unsigned char *key_data; 520+ const char is_export = SSL_C_IS_EXPORT(s->s3->tmp.new_cipher) != 0; 521+@@ -551,14 +612,22 @@ int tls1_change_cipher_state(SSL *s, int which) 522+ 523+ mac_secret_len = s->s3->tmp.new_mac_secret_size; 524+ 525+- key_len = EVP_CIPHER_key_length(cipher); 526+- if (is_export && key_len > SSL_C_EXPORT_KEYLENGTH(s->s3->tmp.new_cipher)) 527+- key_len = SSL_C_EXPORT_KEYLENGTH(s->s3->tmp.new_cipher); 528+- 529+- if (EVP_CIPHER_mode(cipher) == EVP_CIPH_GCM_MODE) 530+- iv_len = EVP_GCM_TLS_FIXED_IV_LEN; 531++ if (aead != NULL) 532++ { 533++ key_len = EVP_AEAD_key_length(aead); 534++ iv_len = SSL_CIPHER_AEAD_FIXED_NONCE_LEN(s->s3->tmp.new_cipher); 535++ } 536+ else 537+- iv_len = EVP_CIPHER_iv_length(cipher); 538++ { 539++ key_len = EVP_CIPHER_key_length(cipher); 540++ if (is_export && key_len > SSL_C_EXPORT_KEYLENGTH(s->s3->tmp.new_cipher)) 541++ key_len = SSL_C_EXPORT_KEYLENGTH(s->s3->tmp.new_cipher); 542++ 543++ if (EVP_CIPHER_mode(cipher) == EVP_CIPH_GCM_MODE) 544++ iv_len = EVP_GCM_TLS_FIXED_IV_LEN; 545++ else 546++ iv_len = EVP_CIPHER_iv_length(cipher); 547++ } 548+ 549+ key_data = s->s3->tmp.key_block; 550+ client_write_mac_secret = key_data; key_data += mac_secret_len; 551+@@ -587,12 +656,20 @@ int tls1_change_cipher_state(SSL *s, int which) 552+ return 0; 553+ } 554+ 555+- if (!tls1_change_cipher_state_cipher(s, is_read, use_client_keys, 556+- mac_secret, mac_secret_len, 557+- key, key_len, 558+- iv, iv_len)) { 559+- return 0; 560+- } 561++ if (aead != NULL) 562++ { 563++ if (!tls1_change_cipher_state_aead(s, is_read, 564++ key, key_len, iv, iv_len)) 565++ return 0; 566++ } 567++ else 568++ { 569++ if (!tls1_change_cipher_state_cipher(s, is_read, use_client_keys, 570++ mac_secret, mac_secret_len, 571++ key, key_len, 572++ iv, iv_len)) 573++ return 0; 574++ } 575+ 576+ return 1; 577+ err: 578+@@ -603,13 +680,14 @@ err: 579+ int tls1_setup_key_block(SSL *s) 580+ { 581+ unsigned char *p1,*p2=NULL; 582+- const EVP_CIPHER *c; 583+- const EVP_MD *hash; 584++ const EVP_CIPHER *c = NULL; 585++ const EVP_MD *hash = NULL; 586++ const EVP_AEAD *aead = NULL; 587+ int num; 588+ SSL_COMP *comp; 589+ int mac_type= NID_undef,mac_secret_size=0; 590+ int ret=0; 591+- int iv_len; 592++ unsigned key_len, iv_len; 593+ 594+ #ifdef KSSL_DEBUG 595+ printf ("tls1_setup_key_block()\n"); 596+@@ -618,22 +696,36 @@ int tls1_setup_key_block(SSL *s) 597+ if (s->s3->tmp.key_block_length != 0) 598+ return(1); 599+ 600+- if (!ssl_cipher_get_evp(s->session,&c,&hash,&mac_type,&mac_secret_size,&comp)) 601++ if (!ssl_cipher_get_comp(s->session, &comp)) 602++ goto cipher_unavailable_err; 603++ 604++ if (s->session->cipher && 605++ (s->session->cipher->algorithm2 & SSL_CIPHER_ALGORITHM2_AEAD)) 606+ { 607+- SSLerr(SSL_F_TLS1_SETUP_KEY_BLOCK,SSL_R_CIPHER_OR_HASH_UNAVAILABLE); 608+- return(0); 609++ if (!ssl_cipher_get_evp_aead(s->session, &aead)) 610++ goto cipher_unavailable_err; 611++ key_len = EVP_AEAD_key_length(aead); 612++ iv_len = SSL_CIPHER_AEAD_FIXED_NONCE_LEN(s->session->cipher); 613+ } 614+- 615+- if (EVP_CIPHER_mode(c) == EVP_CIPH_GCM_MODE) 616+- iv_len = EVP_GCM_TLS_FIXED_IV_LEN; 617+ else 618+- iv_len = EVP_CIPHER_iv_length(c); 619++ { 620++ if (!ssl_cipher_get_evp(s->session,&c,&hash,&mac_type,&mac_secret_size)) 621++ goto cipher_unavailable_err; 622++ key_len = EVP_CIPHER_key_length(c); 623+ 624++ if (EVP_CIPHER_mode(c) == EVP_CIPH_GCM_MODE) 625++ iv_len = EVP_GCM_TLS_FIXED_IV_LEN; 626++ else 627++ iv_len = EVP_CIPHER_iv_length(c); 628++ } 629++ 630++ s->s3->tmp.new_aead=aead; 631+ s->s3->tmp.new_sym_enc=c; 632+ s->s3->tmp.new_hash=hash; 633+ s->s3->tmp.new_mac_pkey_type = mac_type; 634+ s->s3->tmp.new_mac_secret_size = mac_secret_size; 635+- num=EVP_CIPHER_key_length(c)+mac_secret_size+iv_len; 636++ 637++ num=key_len+mac_secret_size+iv_len; 638+ num*=2; 639+ 640+ ssl3_cleanup_key_block(s); 641+@@ -696,6 +788,10 @@ err: 642+ OPENSSL_free(p2); 643+ } 644+ return(ret); 645++ 646++cipher_unavailable_err: 647++ SSLerr(SSL_F_TLS1_SETUP_KEY_BLOCK,SSL_R_CIPHER_OR_HASH_UNAVAILABLE); 648++ return 0; 649+ } 650+ 651+ /* tls1_enc encrypts/decrypts the record in |s->wrec| / |s->rrec|, respectively. 652+@@ -714,6 +810,124 @@ int tls1_enc(SSL *s, int send) 653+ unsigned long l; 654+ int bs,i,j,k,pad=0,ret,mac_size=0; 655+ const EVP_CIPHER *enc; 656++ const SSL_AEAD_CTX *aead; 657++ 658++ if (send) 659++ rec = &s->s3->wrec; 660++ else 661++ rec = &s->s3->rrec; 662++ 663++ if (send) 664++ aead = s->aead_write_ctx; 665++ else 666++ aead = s->aead_read_ctx; 667++ 668++ if (aead) 669++ { 670++ unsigned char ad[13], *seq, *in, *out, nonce[16]; 671++ unsigned nonce_used; 672++ ssize_t n; 673++ 674++ seq = send ? s->s3->write_sequence : s->s3->read_sequence; 675++ 676++ if (s->version == DTLS1_VERSION || s->version == DTLS1_BAD_VER) 677++ { 678++ unsigned char dtlsseq[9], *p = dtlsseq; 679++ 680++ s2n(send ? s->d1->w_epoch : s->d1->r_epoch, p); 681++ memcpy(p, &seq[2], 6); 682++ memcpy(ad, dtlsseq, 8); 683++ } 684++ else 685++ { 686++ memcpy(ad, seq, 8); 687++ for (i=7; i>=0; i--) /* increment */ 688++ { 689++ ++seq[i]; 690++ if (seq[i] != 0) 691++ break; 692++ } 693++ } 694++ 695++ ad[8] = rec->type; 696++ ad[9] = (unsigned char)(s->version>>8); 697++ ad[10] = (unsigned char)(s->version); 698++ 699++ if (aead->fixed_nonce_len + aead->variable_nonce_len > sizeof(nonce) || 700++ aead->variable_nonce_len > 8) 701++ return -1; /* internal error - should never happen. */ 702++ 703++ memcpy(nonce, aead->fixed_nonce, aead->fixed_nonce_len); 704++ nonce_used = aead->fixed_nonce_len; 705++ 706++ if (send) 707++ { 708++ size_t len = rec->length; 709++ in = rec->input; 710++ out = rec->data; 711++ 712++ /* When sending we use the sequence number as the 713++ * variable part of the nonce. */ 714++ if (aead->variable_nonce_len > 8) 715++ return -1; 716++ memcpy(nonce + nonce_used, ad, aead->variable_nonce_len); 717++ nonce_used += aead->variable_nonce_len; 718++ 719++ /* in do_ssl3_write, rec->input is moved forward by 720++ * variable_nonce_len in order to leave space for the 721++ * variable nonce. Thus we can copy the sequence number 722++ * bytes into place without overwriting any of the 723++ * plaintext. */ 724++ memcpy(out, ad, aead->variable_nonce_len); 725++ len -= aead->variable_nonce_len; 726++ 727++ ad[11] = len >> 8; 728++ ad[12] = len & 0xff; 729++ 730++ n = EVP_AEAD_CTX_seal(&aead->ctx, 731++ out + aead->variable_nonce_len, len + aead->tag_len, 732++ nonce, nonce_used, 733++ in + aead->variable_nonce_len, len, 734++ ad, sizeof(ad)); 735++ if (n >= 0) 736++ n += aead->variable_nonce_len; 737++ } 738++ else 739++ { 740++ /* receive */ 741++ size_t len = rec->length; 742++ 743++ if (rec->data != rec->input) 744++ return -1; /* internal error - should never happen. */ 745++ out = in = rec->input; 746++ 747++ if (len < aead->variable_nonce_len) 748++ return 0; 749++ memcpy(nonce + nonce_used, in, aead->variable_nonce_len); 750++ nonce_used += aead->variable_nonce_len; 751++ 752++ in += aead->variable_nonce_len; 753++ len -= aead->variable_nonce_len; 754++ out += aead->variable_nonce_len; 755++ 756++ if (len < aead->tag_len) 757++ return 0; 758++ len -= aead->tag_len; 759++ 760++ ad[11] = len >> 8; 761++ ad[12] = len & 0xff; 762++ 763++ n = EVP_AEAD_CTX_open(&aead->ctx, out, len, nonce, nonce_used, 764++ in, len + aead->tag_len, ad, sizeof(ad)); 765++ 766++ rec->data = rec->input = out; 767++ } 768++ 769++ if (n == -1) 770++ return -1; 771++ rec->length = n; 772++ return 1; 773++ } 774+ 775+ if (send) 776+ { 777+-- 778+1.8.4.1 779+ 780diff -burN android-openssl-lhash2/patches/aead_support.patch android-openssl/patches/aead_support.patch 781--- android-openssl-lhash2/patches/aead_support.patch 1969-12-31 19:00:00.000000000 -0500 782+++ android-openssl/patches/aead_support.patch 2013-11-05 14:14:34.631283497 -0500 783@@ -0,0 +1,811 @@ 784+From 98f0c6e114f55b4451bea824b05ab29db3351f12 Mon Sep 17 00:00:00 2001 785+From: Adam Langley <agl@chromium.org> 786+Date: Thu, 25 Jul 2013 16:52:35 -0400 787+Subject: [PATCH 40/50] aead_support 788+ 789+This change adds an AEAD interface to EVP and an AES-GCM implementation 790+suitable for use in TLS. 791+--- 792+ crypto/evp/Makefile | 4 +- 793+ crypto/evp/e_aes.c | 214 +++++++++++++++++++++++++++++++++++---- 794+ crypto/evp/evp.h | 111 ++++++++++++++++++++ 795+ crypto/evp/evp_aead.c | 192 +++++++++++++++++++++++++++++++++++ 796+ crypto/evp/evp_err.c | 8 ++ 797+ crypto/evp/evp_locl.h | 24 +++++ 798+ doc/crypto/EVP_AEAD_CTX_init.pod | 96 ++++++++++++++++++ 799+ 7 files changed, 626 insertions(+), 23 deletions(-) 800+ create mode 100644 crypto/evp/evp_aead.c 801+ create mode 100644 doc/crypto/EVP_AEAD_CTX_init.pod 802+ 803+diff --git a/crypto/evp/Makefile b/crypto/evp/Makefile 804+index 1e46ceb..b73038d 100644 805+--- a/crypto/evp/Makefile 806++++ b/crypto/evp/Makefile 807+@@ -29,7 +29,7 @@ LIBSRC= encode.c digest.c evp_enc.c evp_key.c evp_acnf.c evp_cnf.c \ 808+ c_all.c c_allc.c c_alld.c evp_lib.c bio_ok.c \ 809+ evp_pkey.c evp_pbe.c p5_crpt.c p5_crpt2.c \ 810+ e_old.c pmeth_lib.c pmeth_fn.c pmeth_gn.c m_sigver.c evp_fips.c \ 811+- e_aes_cbc_hmac_sha1.c e_rc4_hmac_md5.c 812++ e_aes_cbc_hmac_sha1.c e_rc4_hmac_md5.c evp_aead.c 813+ 814+ LIBOBJ= encode.o digest.o evp_enc.o evp_key.o evp_acnf.o evp_cnf.o \ 815+ e_des.o e_bf.o e_idea.o e_des3.o e_camellia.o\ 816+@@ -42,7 +42,7 @@ LIBOBJ= encode.o digest.o evp_enc.o evp_key.o evp_acnf.o evp_cnf.o \ 817+ c_all.o c_allc.o c_alld.o evp_lib.o bio_ok.o \ 818+ evp_pkey.o evp_pbe.o p5_crpt.o p5_crpt2.o \ 819+ e_old.o pmeth_lib.o pmeth_fn.o pmeth_gn.o m_sigver.o evp_fips.o \ 820+- e_aes_cbc_hmac_sha1.o e_rc4_hmac_md5.o 821++ e_aes_cbc_hmac_sha1.o e_rc4_hmac_md5.o evp_aead.o 822+ 823+ SRC= $(LIBSRC) 824+ 825+diff --git a/crypto/evp/e_aes.c b/crypto/evp/e_aes.c 826+index ef44f63..e4485e4 100644 827+--- a/crypto/evp/e_aes.c 828++++ b/crypto/evp/e_aes.c 829+@@ -814,44 +814,45 @@ static int aes_gcm_ctrl(EVP_CIPHER_CTX *c, int type, int arg, void *ptr) 830+ } 831+ } 832+ 833+-static int aes_gcm_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key, 834+- const unsigned char *iv, int enc) 835++static ctr128_f aes_gcm_set_key(AES_KEY *aes_key, GCM128_CONTEXT *gcm_ctx, 836++ const unsigned char *key, size_t key_len) 837+ { 838+- EVP_AES_GCM_CTX *gctx = ctx->cipher_data; 839+- if (!iv && !key) 840+- return 1; 841+- if (key) 842+- { do { 843+ #ifdef BSAES_CAPABLE 844+ if (BSAES_CAPABLE) 845+ { 846+- AES_set_encrypt_key(key,ctx->key_len*8,&gctx->ks); 847+- CRYPTO_gcm128_init(&gctx->gcm,&gctx->ks, 848++ AES_set_encrypt_key(key,key_len*8,aes_key); 849++ CRYPTO_gcm128_init(gcm_ctx,aes_key, 850+ (block128_f)AES_encrypt); 851+- gctx->ctr = (ctr128_f)bsaes_ctr32_encrypt_blocks; 852+- break; 853++ return (ctr128_f)bsaes_ctr32_encrypt_blocks; 854+ } 855+- else 856+ #endif 857+ #ifdef VPAES_CAPABLE 858+ if (VPAES_CAPABLE) 859+ { 860+- vpaes_set_encrypt_key(key,ctx->key_len*8,&gctx->ks); 861+- CRYPTO_gcm128_init(&gctx->gcm,&gctx->ks, 862++ vpaes_set_encrypt_key(key,key_len*8,aes_key); 863++ CRYPTO_gcm128_init(gcm_ctx,aes_key, 864+ (block128_f)vpaes_encrypt); 865+- gctx->ctr = NULL; 866+- break; 867++ return NULL; 868+ } 869+ #endif 870+- AES_set_encrypt_key(key, ctx->key_len * 8, &gctx->ks); 871+- CRYPTO_gcm128_init(&gctx->gcm, &gctx->ks, (block128_f)AES_encrypt); 872++ AES_set_encrypt_key(key, key_len*8, aes_key); 873++ CRYPTO_gcm128_init(gcm_ctx, aes_key, (block128_f)AES_encrypt); 874+ #ifdef AES_CTR_ASM 875+- gctx->ctr = (ctr128_f)AES_ctr32_encrypt; 876++ return (ctr128_f)AES_ctr32_encrypt; 877+ #else 878+- gctx->ctr = NULL; 879++ return NULL; 880+ #endif 881+- } while (0); 882++ } 883+ 884++static int aes_gcm_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key, 885++ const unsigned char *iv, int enc) 886++ { 887++ EVP_AES_GCM_CTX *gctx = ctx->cipher_data; 888++ if (!iv && !key) 889++ return 1; 890++ if (key) 891++ { 892++ gctx->ctr = aes_gcm_set_key(&gctx->ks, &gctx->gcm, key, ctx->key_len); 893+ /* If we have an iv can set it directly, otherwise use 894+ * saved IV. 895+ */ 896+@@ -1310,5 +1311,176 @@ BLOCK_CIPHER_custom(NID_aes,128,1,12,ccm,CCM,EVP_CIPH_FLAG_FIPS|CUSTOM_FLAGS) 897+ BLOCK_CIPHER_custom(NID_aes,192,1,12,ccm,CCM,EVP_CIPH_FLAG_FIPS|CUSTOM_FLAGS) 898+ BLOCK_CIPHER_custom(NID_aes,256,1,12,ccm,CCM,EVP_CIPH_FLAG_FIPS|CUSTOM_FLAGS) 899+ 900++#define EVP_AEAD_AES_128_GCM_TAG_LEN 16 901++ 902++struct aead_aes_128_gcm_ctx { 903++ union { double align; AES_KEY ks; } ks; 904++ GCM128_CONTEXT gcm; 905++ ctr128_f ctr; 906++ unsigned char tag_len; 907++}; 908++ 909++static int aead_aes_128_gcm_init(EVP_AEAD_CTX *ctx, 910++ const unsigned char *key, size_t key_len, size_t tag_len) 911++ { 912++ struct aead_aes_128_gcm_ctx *gcm_ctx; 913++ 914++ if (key_len*8 != 128) 915++ { 916++ EVPerr(EVP_F_AEAD_AES_128_GCM_INIT, EVP_R_BAD_KEY_LENGTH); 917++ return 0; /* EVP_AEAD_CTX_init should catch this. */ 918++ } 919++ 920++ if (tag_len == EVP_AEAD_DEFAULT_TAG_LENGTH) 921++ tag_len = EVP_AEAD_AES_128_GCM_TAG_LEN; 922++ 923++ if (tag_len > EVP_AEAD_AES_128_GCM_TAG_LEN) 924++ { 925++ EVPerr(EVP_F_AEAD_AES_128_GCM_INIT, EVP_R_TAG_TOO_LARGE); 926++ return 0; 927++ } 928++ 929++ gcm_ctx = OPENSSL_malloc(sizeof(struct aead_aes_128_gcm_ctx)); 930++ if (gcm_ctx == NULL) 931++ return 0; 932++ 933++#ifdef AESNI_CAPABLE 934++ if (AESNI_CAPABLE) 935++ { 936++ aesni_set_encrypt_key(key, key_len * 8, &gcm_ctx->ks.ks); 937++ CRYPTO_gcm128_init(&gcm_ctx->gcm, &gcm_ctx->ks.ks, 938++ (block128_f)aesni_encrypt); 939++ gcm_ctx->ctr = (ctr128_f) aesni_ctr32_encrypt_blocks; 940++ } 941++ else 942++#endif 943++ { 944++ gcm_ctx->ctr = aes_gcm_set_key(&gcm_ctx->ks.ks, &gcm_ctx->gcm, 945++ key, key_len); 946++ } 947++ gcm_ctx->tag_len = tag_len; 948++ ctx->aead_state = gcm_ctx; 949++ 950++ return 1; 951++ } 952++ 953++static void aead_aes_128_gcm_cleanup(EVP_AEAD_CTX *ctx) 954++ { 955++ struct aead_aes_128_gcm_ctx *gcm_ctx = ctx->aead_state; 956++ OPENSSL_free(gcm_ctx); 957++ } 958++ 959++static ssize_t aead_aes_128_gcm_seal(const EVP_AEAD_CTX *ctx, 960++ unsigned char *out, size_t max_out_len, 961++ const unsigned char *nonce, size_t nonce_len, 962++ const unsigned char *in, size_t in_len, 963++ const unsigned char *ad, size_t ad_len) 964++ { 965++ size_t bulk = 0; 966++ const struct aead_aes_128_gcm_ctx *gcm_ctx = ctx->aead_state; 967++ GCM128_CONTEXT gcm; 968++ 969++ if (max_out_len < in_len + gcm_ctx->tag_len) 970++ { 971++ EVPerr(EVP_F_AEAD_AES_128_GCM_SEAL, EVP_R_BUFFER_TOO_SMALL); 972++ return -1; 973++ } 974++ 975++ memcpy(&gcm, &gcm_ctx->gcm, sizeof(gcm)); 976++ CRYPTO_gcm128_setiv(&gcm, nonce, nonce_len); 977++ 978++ if (ad_len > 0 && CRYPTO_gcm128_aad(&gcm, ad, ad_len)) 979++ return -1; 980++ 981++ if (gcm_ctx->ctr) 982++ { 983++ if (CRYPTO_gcm128_encrypt_ctr32(&gcm, in + bulk, out + bulk, 984++ in_len - bulk, gcm_ctx->ctr)) 985++ return -1; 986++ } 987++ else 988++ { 989++ if (CRYPTO_gcm128_encrypt(&gcm, in + bulk, out + bulk, 990++ in_len - bulk)) 991++ return -1; 992++ } 993++ 994++ CRYPTO_gcm128_tag(&gcm, out + in_len, gcm_ctx->tag_len); 995++ return in_len + gcm_ctx->tag_len; 996++ } 997++ 998++static ssize_t aead_aes_128_gcm_open(const EVP_AEAD_CTX *ctx, 999++ unsigned char *out, size_t max_out_len, 1000++ const unsigned char *nonce, size_t nonce_len, 1001++ const unsigned char *in, size_t in_len, 1002++ const unsigned char *ad, size_t ad_len) 1003++ { 1004++ size_t bulk = 0; 1005++ const struct aead_aes_128_gcm_ctx *gcm_ctx = ctx->aead_state; 1006++ unsigned char tag[EVP_AEAD_AES_128_GCM_TAG_LEN]; 1007++ size_t out_len; 1008++ GCM128_CONTEXT gcm; 1009++ 1010++ if (in_len < gcm_ctx->tag_len) 1011++ { 1012++ EVPerr(EVP_F_AEAD_AES_128_GCM_OPEN, EVP_R_BAD_DECRYPT); 1013++ return -1; 1014++ } 1015++ 1016++ out_len = in_len - gcm_ctx->tag_len; 1017++ 1018++ if (max_out_len < out_len) 1019++ { 1020++ EVPerr(EVP_F_AEAD_AES_128_GCM_OPEN, EVP_R_BUFFER_TOO_SMALL); 1021++ return -1; 1022++ } 1023++ 1024++ memcpy(&gcm, &gcm_ctx->gcm, sizeof(gcm)); 1025++ CRYPTO_gcm128_setiv(&gcm, nonce, nonce_len); 1026++ 1027++ if (CRYPTO_gcm128_aad(&gcm, ad, ad_len)) 1028++ return -1; 1029++ 1030++ if (gcm_ctx->ctr) 1031++ { 1032++ if (CRYPTO_gcm128_decrypt_ctr32(&gcm, in + bulk, out + bulk, 1033++ in_len-bulk-gcm_ctx->tag_len, 1034++ gcm_ctx->ctr)) 1035++ return -1; 1036++ } 1037++ else 1038++ { 1039++ if (CRYPTO_gcm128_decrypt(&gcm, in + bulk, out + bulk, 1040++ in_len - bulk - gcm_ctx->tag_len)) 1041++ return -1; 1042++ } 1043++ 1044++ CRYPTO_gcm128_tag(&gcm, tag, gcm_ctx->tag_len); 1045++ if (CRYPTO_memcmp(tag, in + out_len, gcm_ctx->tag_len) != 0) 1046++ { 1047++ EVPerr(EVP_F_AEAD_AES_128_GCM_OPEN, EVP_R_BAD_DECRYPT); 1048++ return -1; 1049++ } 1050++ 1051++ return out_len; 1052++ } 1053++ 1054++static const EVP_AEAD aead_aes_128_gcm = { 1055++ 16, /* key len */ 1056++ 12, /* nonce len */ 1057++ EVP_AEAD_AES_128_GCM_TAG_LEN, /* overhead */ 1058++ EVP_AEAD_AES_128_GCM_TAG_LEN, /* max tag length */ 1059++ 1060++ aead_aes_128_gcm_init, 1061++ aead_aes_128_gcm_cleanup, 1062++ aead_aes_128_gcm_seal, 1063++ aead_aes_128_gcm_open, 1064++}; 1065++ 1066++const EVP_AEAD *EVP_aead_aes_128_gcm() 1067++ { 1068++ return &aead_aes_128_gcm; 1069++ } 1070++ 1071+ #endif 1072+ #endif 1073+diff --git a/crypto/evp/evp.h b/crypto/evp/evp.h 1074+index 5f18d4b..bd10642 100644 1075+--- a/crypto/evp/evp.h 1076++++ b/crypto/evp/evp.h 1077+@@ -1243,6 +1243,109 @@ void EVP_PKEY_meth_set_ctrl(EVP_PKEY_METHOD *pmeth, 1078+ int (*ctrl_str)(EVP_PKEY_CTX *ctx, 1079+ const char *type, const char *value)); 1080+ 1081++/* Authenticated Encryption with Additional Data. 1082++ * 1083++ * AEAD couples confidentiality and integrity in a single primtive. AEAD 1084++ * algorithms take a key and then can seal and open individual messages. Each 1085++ * message has a unique, per-message nonce and, optionally, additional data 1086++ * which is authenticated but not included in the output. */ 1087++ 1088++struct evp_aead_st; 1089++typedef struct evp_aead_st EVP_AEAD; 1090++ 1091++#ifndef OPENSSL_NO_AES 1092++/* EVP_aes_128_gcm is AES-128 in Galois Counter Mode. */ 1093++const EVP_AEAD *EVP_aead_aes_128_gcm(void); 1094++#endif 1095++ 1096++/* EVP_AEAD_key_length returns the length, in bytes, of the keys used by 1097++ * |aead|. */ 1098++size_t EVP_AEAD_key_length(const EVP_AEAD *aead); 1099++ 1100++/* EVP_AEAD_nonce_length returns the length, in bytes, of the per-message nonce 1101++ * for |aead|. */ 1102++size_t EVP_AEAD_nonce_length(const EVP_AEAD *aead); 1103++ 1104++/* EVP_AEAD_max_overhead returns the maximum number of additional bytes added 1105++ * by the act of sealing data with |aead|. */ 1106++size_t EVP_AEAD_max_overhead(const EVP_AEAD *aead); 1107++ 1108++/* EVP_AEAD_max_tag_len returns the maximum tag length when using |aead|. This 1109++ * is the largest value that can be passed as |tag_len| to 1110++ * |EVP_AEAD_CTX_init|. */ 1111++size_t EVP_AEAD_max_tag_len(const EVP_AEAD *aead); 1112++ 1113++/* An EVP_AEAD_CTX represents an AEAD algorithm configured with a specific key 1114++ * and message-independent IV. */ 1115++typedef struct evp_aead_ctx_st { 1116++ const EVP_AEAD *aead; 1117++ /* aead_state is an opaque pointer to whatever state the AEAD needs to 1118++ * maintain. */ 1119++ void *aead_state; 1120++} EVP_AEAD_CTX; 1121++ 1122++#define EVP_AEAD_DEFAULT_TAG_LENGTH 0 1123++ 1124++/* EVP_AEAD_init initializes |ctx| for the given AEAD algorithm from |impl|. 1125++ * The |impl| argument may be NULL to choose the default implementation. 1126++ * Authentication tags may be truncated by passing a size as |tag_len|. A 1127++ * |tag_len| of zero indicates the default tag length and this is defined as 1128++ * EVP_AEAD_DEFAULT_TAG_LENGTH for readability. 1129++ * Returns 1 on success. Otherwise returns 0 and pushes to the error stack. */ 1130++int EVP_AEAD_CTX_init(EVP_AEAD_CTX *ctx, const EVP_AEAD *aead, 1131++ const unsigned char *key, size_t key_len, 1132++ size_t tag_len, ENGINE *impl); 1133++ 1134++/* EVP_AEAD_CTX_cleanup frees any data allocated by |ctx|. */ 1135++void EVP_AEAD_CTX_cleanup(EVP_AEAD_CTX *ctx); 1136++ 1137++/* EVP_AEAD_CTX_seal encrypts and authenticates |in_len| bytes from |in| and 1138++ * authenticates |ad_len| bytes from |ad| and writes the result to |out|, 1139++ * returning the number of bytes written, or -1 on error. 1140++ * 1141++ * This function may be called (with the same EVP_AEAD_CTX) concurrently with 1142++ * itself or EVP_AEAD_CTX_open. 1143++ * 1144++ * At most |max_out_len| bytes are written to |out| and, in order to ensure 1145++ * success, |max_out_len| should be |in_len| plus the result of 1146++ * EVP_AEAD_overhead. 1147++ * 1148++ * The length of |nonce|, |nonce_len|, must be equal to the result of 1149++ * EVP_AEAD_nonce_length for this AEAD. 1150++ * 1151++ * EVP_AEAD_CTX_seal never results in a partial output. If |max_out_len| is 1152++ * insufficient, -1 will be returned. 1153++ * 1154++ * If |in| and |out| alias then |out| must be <= |in|. */ 1155++ssize_t EVP_AEAD_CTX_seal(const EVP_AEAD_CTX *ctx, 1156++ unsigned char *out, size_t max_out_len, 1157++ const unsigned char *nonce, size_t nonce_len, 1158++ const unsigned char *in, size_t in_len, 1159++ const unsigned char *ad, size_t ad_len); 1160++ 1161++/* EVP_AEAD_CTX_open authenticates |in_len| bytes from |in| and |ad_len| bytes 1162++ * from |ad| and decrypts at most |in_len| bytes into |out|. It returns the 1163++ * number of bytes written, or -1 on error. 1164++ * 1165++ * This function may be called (with the same EVP_AEAD_CTX) concurrently with 1166++ * itself or EVP_AEAD_CTX_seal. 1167++ * 1168++ * At most |in_len| bytes are written to |out|. In order to ensure success, 1169++ * |max_out_len| should be at least |in_len|. 1170++ * 1171++ * The length of |nonce|, |nonce_len|, must be equal to the result of 1172++ * EVP_AEAD_nonce_length for this AEAD. 1173++ * 1174++ * EVP_AEAD_CTX_open never results in a partial output. If |max_out_len| is 1175++ * insufficient, -1 will be returned. 1176++ * 1177++ * If |in| and |out| alias then |out| must be <= |in|. */ 1178++ssize_t EVP_AEAD_CTX_open(const EVP_AEAD_CTX *ctx, 1179++ unsigned char *out, size_t max_out_len, 1180++ const unsigned char *nonce, size_t nonce_len, 1181++ const unsigned char *in, size_t in_len, 1182++ const unsigned char *ad, size_t ad_len); 1183++ 1184+ void EVP_add_alg_module(void); 1185+ 1186+ /* BEGIN ERROR CODES */ 1187+@@ -1254,6 +1357,11 @@ void ERR_load_EVP_strings(void); 1188+ /* Error codes for the EVP functions. */ 1189+ 1190+ /* Function codes. */ 1191++#define EVP_F_AEAD_AES_128_GCM_INIT 183 1192++#define EVP_F_AEAD_AES_128_GCM_OPEN 181 1193++#define EVP_F_AEAD_AES_128_GCM_SEAL 182 1194++#define EVP_F_AEAD_CTX_OPEN 185 1195++#define EVP_F_AEAD_CTX_SEAL 186 1196+ #define EVP_F_AESNI_INIT_KEY 165 1197+ #define EVP_F_AESNI_XTS_CIPHER 176 1198+ #define EVP_F_AES_INIT_KEY 133 1199+@@ -1268,6 +1376,7 @@ void ERR_load_EVP_strings(void); 1200+ #define EVP_F_DSA_PKEY2PKCS8 135 1201+ #define EVP_F_ECDSA_PKEY2PKCS8 129 1202+ #define EVP_F_ECKEY_PKEY2PKCS8 132 1203++#define EVP_F_EVP_AEAD_CTX_INIT 180 1204+ #define EVP_F_EVP_CIPHERINIT_EX 123 1205+ #define EVP_F_EVP_CIPHER_CTX_COPY 163 1206+ #define EVP_F_EVP_CIPHER_CTX_CTRL 124 1207+@@ -1383,10 +1492,12 @@ void ERR_load_EVP_strings(void); 1208+ #define EVP_R_NO_VERIFY_FUNCTION_CONFIGURED 105 1209+ #define EVP_R_OPERATION_NOT_SUPPORTED_FOR_THIS_KEYTYPE 150 1210+ #define EVP_R_OPERATON_NOT_INITIALIZED 151 1211++#define EVP_R_OUTPUT_ALIASES_INPUT 170 1212+ #define EVP_R_PKCS8_UNKNOWN_BROKEN_TYPE 117 1213+ #define EVP_R_PRIVATE_KEY_DECODE_ERROR 145 1214+ #define EVP_R_PRIVATE_KEY_ENCODE_ERROR 146 1215+ #define EVP_R_PUBLIC_KEY_NOT_RSA 106 1216++#define EVP_R_TAG_TOO_LARGE 171 1217+ #define EVP_R_TOO_LARGE 164 1218+ #define EVP_R_UNKNOWN_CIPHER 160 1219+ #define EVP_R_UNKNOWN_DIGEST 161 1220+diff --git a/crypto/evp/evp_aead.c b/crypto/evp/evp_aead.c 1221+new file mode 100644 1222+index 0000000..91da561 1223+--- /dev/null 1224++++ b/crypto/evp/evp_aead.c 1225+@@ -0,0 +1,192 @@ 1226++/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com) 1227++ * All rights reserved. 1228++ * 1229++ * This package is an SSL implementation written 1230++ * by Eric Young (eay@cryptsoft.com). 1231++ * The implementation was written so as to conform with Netscapes SSL. 1232++ * 1233++ * This library is free for commercial and non-commercial use as long as 1234++ * the following conditions are aheared to. The following conditions 1235++ * apply to all code found in this distribution, be it the RC4, RSA, 1236++ * lhash, DES, etc., code; not just the SSL code. The SSL documentation 1237++ * included with this distribution is covered by the same copyright terms 1238++ * except that the holder is Tim Hudson (tjh@cryptsoft.com). 1239++ * 1240++ * Copyright remains Eric Young's, and as such any Copyright notices in 1241++ * the code are not to be removed. 1242++ * If this package is used in a product, Eric Young should be given attribution 1243++ * as the author of the parts of the library used. 1244++ * This can be in the form of a textual message at program startup or 1245++ * in documentation (online or textual) provided with the package. 1246++ * 1247++ * Redistribution and use in source and binary forms, with or without 1248++ * modification, are permitted provided that the following conditions 1249++ * are met: 1250++ * 1. Redistributions of source code must retain the copyright 1251++ * notice, this list of conditions and the following disclaimer. 1252++ * 2. Redistributions in binary form must reproduce the above copyright 1253++ * notice, this list of conditions and the following disclaimer in the 1254++ * documentation and/or other materials provided with the distribution. 1255++ * 3. All advertising materials mentioning features or use of this software 1256++ * must display the following acknowledgement: 1257++ * "This product includes cryptographic software written by 1258++ * Eric Young (eay@cryptsoft.com)" 1259++ * The word 'cryptographic' can be left out if the rouines from the library 1260++ * being used are not cryptographic related :-). 1261++ * 4. If you include any Windows specific code (or a derivative thereof) from 1262++ * the apps directory (application code) you must include an acknowledgement: 1263++ * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)" 1264++ * 1265++ * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND 1266++ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 1267++ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 1268++ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 1269++ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 1270++ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 1271++ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 1272++ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 1273++ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 1274++ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 1275++ * SUCH DAMAGE. 1276++ * 1277++ * The licence and distribution terms for any publically available version or 1278++ * derivative of this code cannot be changed. i.e. this code cannot simply be 1279++ * copied and put under another distribution licence 1280++ * [including the GNU Public Licence.] 1281++ */ 1282++ 1283++#include <limits.h> 1284++#include <string.h> 1285++ 1286++#include <openssl/evp.h> 1287++#include <openssl/err.h> 1288++ 1289++#include "evp_locl.h" 1290++ 1291++size_t EVP_AEAD_key_length(const EVP_AEAD *aead) 1292++ { 1293++ return aead->key_len; 1294++ } 1295++ 1296++size_t EVP_AEAD_nonce_length(const EVP_AEAD *aead) 1297++ { 1298++ return aead->nonce_len; 1299++ } 1300++ 1301++size_t EVP_AEAD_max_overhead(const EVP_AEAD *aead) 1302++ { 1303++ return aead->overhead; 1304++ } 1305++ 1306++size_t EVP_AEAD_max_tag_len(const EVP_AEAD *aead) 1307++ { 1308++ return aead->max_tag_len; 1309++ } 1310++ 1311++int EVP_AEAD_CTX_init(EVP_AEAD_CTX *ctx, const EVP_AEAD *aead, 1312++ const unsigned char *key, size_t key_len, 1313++ size_t tag_len, ENGINE *impl) 1314++ { 1315++ ctx->aead = aead; 1316++ if (key_len != aead->key_len) 1317++ { 1318++ EVPerr(EVP_F_EVP_AEAD_CTX_INIT,EVP_R_UNSUPPORTED_KEY_SIZE); 1319++ return 0; 1320++ } 1321++ return aead->init(ctx, key, key_len, tag_len); 1322++ } 1323++ 1324++void EVP_AEAD_CTX_cleanup(EVP_AEAD_CTX *ctx) 1325++ { 1326++ if (ctx->aead == NULL) 1327++ return; 1328++ ctx->aead->cleanup(ctx); 1329++ ctx->aead = NULL; 1330++ } 1331++ 1332++/* check_alias returns 0 if |out| points within the buffer determined by |in| 1333++ * and |in_len| and 1 otherwise. 1334++ * 1335++ * When processing, there's only an issue if |out| points within in[:in_len] 1336++ * and isn't equal to |in|. If that's the case then writing the output will 1337++ * stomp input that hasn't been read yet. 1338++ * 1339++ * This function checks for that case. */ 1340++static int check_alias(const unsigned char *in, size_t in_len, 1341++ const unsigned char *out) 1342++ { 1343++ if (out <= in) 1344++ return 1; 1345++ if (in + in_len <= out) 1346++ return 1; 1347++ return 0; 1348++ } 1349++ 1350++ssize_t EVP_AEAD_CTX_seal(const EVP_AEAD_CTX *ctx, 1351++ unsigned char *out, size_t max_out_len, 1352++ const unsigned char *nonce, size_t nonce_len, 1353++ const unsigned char *in, size_t in_len, 1354++ const unsigned char *ad, size_t ad_len) 1355++ { 1356++ size_t possible_out_len = in_len + ctx->aead->overhead; 1357++ ssize_t r; 1358++ 1359++ if (possible_out_len < in_len /* overflow */ || 1360++ possible_out_len > SSIZE_MAX /* return value cannot be 1361++ represented */) 1362++ { 1363++ EVPerr(EVP_F_AEAD_CTX_SEAL, EVP_R_TOO_LARGE); 1364++ goto error; 1365++ } 1366++ 1367++ if (!check_alias(in, in_len, out)) 1368++ { 1369++ EVPerr(EVP_F_AEAD_CTX_SEAL, EVP_R_OUTPUT_ALIASES_INPUT); 1370++ goto error; 1371++ } 1372++ 1373++ r = ctx->aead->seal(ctx, out, max_out_len, nonce, nonce_len, 1374++ in, in_len, ad, ad_len); 1375++ if (r >= 0) 1376++ return r; 1377++ 1378++error: 1379++ /* In the event of an error, clear the output buffer so that a caller 1380++ * that doesn't check the return value doesn't send raw data. */ 1381++ memset(out, 0, max_out_len); 1382++ return -1; 1383++ } 1384++ 1385++ssize_t EVP_AEAD_CTX_open(const EVP_AEAD_CTX *ctx, 1386++ unsigned char *out, size_t max_out_len, 1387++ const unsigned char *nonce, size_t nonce_len, 1388++ const unsigned char *in, size_t in_len, 1389++ const unsigned char *ad, size_t ad_len) 1390++ { 1391++ ssize_t r; 1392++ 1393++ if (in_len > SSIZE_MAX) 1394++ { 1395++ EVPerr(EVP_F_AEAD_CTX_OPEN, EVP_R_TOO_LARGE); 1396++ goto error; /* may not be able to represent return value. */ 1397++ } 1398++ 1399++ if (!check_alias(in, in_len, out)) 1400++ { 1401++ EVPerr(EVP_F_AEAD_CTX_OPEN, EVP_R_OUTPUT_ALIASES_INPUT); 1402++ goto error; 1403++ } 1404++ 1405++ r = ctx->aead->open(ctx, out, max_out_len, nonce, nonce_len, 1406++ in, in_len, ad, ad_len); 1407++ 1408++ if (r >= 0) 1409++ return r; 1410++ 1411++error: 1412++ /* In the event of an error, clear the output buffer so that a caller 1413++ * that doesn't check the return value doesn't try and process bad 1414++ * data. */ 1415++ memset(out, 0, max_out_len); 1416++ return -1; 1417++ } 1418+diff --git a/crypto/evp/evp_err.c b/crypto/evp/evp_err.c 1419+index 08eab98..c47969c 100644 1420+--- a/crypto/evp/evp_err.c 1421++++ b/crypto/evp/evp_err.c 1422+@@ -70,6 +70,11 @@ 1423+ 1424+ static ERR_STRING_DATA EVP_str_functs[]= 1425+ { 1426++{ERR_FUNC(EVP_F_AEAD_AES_128_GCM_INIT), "AEAD_AES_128_GCM_INIT"}, 1427++{ERR_FUNC(EVP_F_AEAD_AES_128_GCM_OPEN), "AEAD_AES_128_GCM_OPEN"}, 1428++{ERR_FUNC(EVP_F_AEAD_AES_128_GCM_SEAL), "AEAD_AES_128_GCM_SEAL"}, 1429++{ERR_FUNC(EVP_F_AEAD_CTX_OPEN), "AEAD_CTX_OPEN"}, 1430++{ERR_FUNC(EVP_F_AEAD_CTX_SEAL), "AEAD_CTX_SEAL"}, 1431+ {ERR_FUNC(EVP_F_AESNI_INIT_KEY), "AESNI_INIT_KEY"}, 1432+ {ERR_FUNC(EVP_F_AESNI_XTS_CIPHER), "AESNI_XTS_CIPHER"}, 1433+ {ERR_FUNC(EVP_F_AES_INIT_KEY), "AES_INIT_KEY"}, 1434+@@ -84,6 +89,7 @@ static ERR_STRING_DATA EVP_str_functs[]= 1435+ {ERR_FUNC(EVP_F_DSA_PKEY2PKCS8), "DSA_PKEY2PKCS8"}, 1436+ {ERR_FUNC(EVP_F_ECDSA_PKEY2PKCS8), "ECDSA_PKEY2PKCS8"}, 1437+ {ERR_FUNC(EVP_F_ECKEY_PKEY2PKCS8), "ECKEY_PKEY2PKCS8"}, 1438++{ERR_FUNC(EVP_F_EVP_AEAD_CTX_INIT), "EVP_AEAD_CTX_init"}, 1439+ {ERR_FUNC(EVP_F_EVP_CIPHERINIT_EX), "EVP_CipherInit_ex"}, 1440+ {ERR_FUNC(EVP_F_EVP_CIPHER_CTX_COPY), "EVP_CIPHER_CTX_copy"}, 1441+ {ERR_FUNC(EVP_F_EVP_CIPHER_CTX_CTRL), "EVP_CIPHER_CTX_ctrl"}, 1442+@@ -202,10 +208,12 @@ static ERR_STRING_DATA EVP_str_reasons[]= 1443+ {ERR_REASON(EVP_R_NO_VERIFY_FUNCTION_CONFIGURED),"no verify function configured"}, 1444+ {ERR_REASON(EVP_R_OPERATION_NOT_SUPPORTED_FOR_THIS_KEYTYPE),"operation not supported for this keytype"}, 1445+ {ERR_REASON(EVP_R_OPERATON_NOT_INITIALIZED),"operaton not initialized"}, 1446++{ERR_REASON(EVP_R_OUTPUT_ALIASES_INPUT) ,"output aliases input"}, 1447+ {ERR_REASON(EVP_R_PKCS8_UNKNOWN_BROKEN_TYPE),"pkcs8 unknown broken type"}, 1448+ {ERR_REASON(EVP_R_PRIVATE_KEY_DECODE_ERROR),"private key decode error"}, 1449+ {ERR_REASON(EVP_R_PRIVATE_KEY_ENCODE_ERROR),"private key encode error"}, 1450+ {ERR_REASON(EVP_R_PUBLIC_KEY_NOT_RSA) ,"public key not rsa"}, 1451++{ERR_REASON(EVP_R_TAG_TOO_LARGE) ,"tag too large"}, 1452+ {ERR_REASON(EVP_R_TOO_LARGE) ,"too large"}, 1453+ {ERR_REASON(EVP_R_UNKNOWN_CIPHER) ,"unknown cipher"}, 1454+ {ERR_REASON(EVP_R_UNKNOWN_DIGEST) ,"unknown digest"}, 1455+diff --git a/crypto/evp/evp_locl.h b/crypto/evp/evp_locl.h 1456+index 08c0a66..c0f9fdf 100644 1457+--- a/crypto/evp/evp_locl.h 1458++++ b/crypto/evp/evp_locl.h 1459+@@ -348,6 +348,30 @@ int PKCS5_v2_PBKDF2_keyivgen(EVP_CIPHER_CTX *ctx, const char *pass, int passlen, 1460+ ASN1_TYPE *param, 1461+ const EVP_CIPHER *c, const EVP_MD *md, int en_de); 1462+ 1463++/* EVP_AEAD represents a specific AEAD algorithm. */ 1464++struct evp_aead_st { 1465++ unsigned char key_len; 1466++ unsigned char nonce_len; 1467++ unsigned char overhead; 1468++ unsigned char max_tag_len; 1469++ 1470++ int (*init) (struct evp_aead_ctx_st*, const unsigned char *key, 1471++ size_t key_len, size_t tag_len); 1472++ void (*cleanup) (struct evp_aead_ctx_st*); 1473++ 1474++ ssize_t (*seal) (const struct evp_aead_ctx_st *ctx, 1475++ unsigned char *out, size_t max_out_len, 1476++ const unsigned char *nonce, size_t nonce_len, 1477++ const unsigned char *in, size_t in_len, 1478++ const unsigned char *ad, size_t ad_len); 1479++ 1480++ ssize_t (*open) (const struct evp_aead_ctx_st *ctx, 1481++ unsigned char *out, size_t max_out_len, 1482++ const unsigned char *nonce, size_t nonce_len, 1483++ const unsigned char *in, size_t in_len, 1484++ const unsigned char *ad, size_t ad_len); 1485++}; 1486++ 1487+ #ifdef OPENSSL_FIPS 1488+ 1489+ #ifdef OPENSSL_DOING_MAKEDEPEND 1490+diff --git a/doc/crypto/EVP_AEAD_CTX_init.pod b/doc/crypto/EVP_AEAD_CTX_init.pod 1491+new file mode 100644 1492+index 0000000..20e455d 1493+--- /dev/null 1494++++ b/doc/crypto/EVP_AEAD_CTX_init.pod 1495+@@ -0,0 +1,96 @@ 1496++=pod 1497++ 1498++=head1 NAME 1499++ 1500++EVP_AEAD_CTX_init, EVP_AEAD_CTX_cleanup, EVP_AEAD_CTX_seal, EVP_AEAD_CTX_open - authenticated encryption functions. 1501++ 1502++=head1 SYNOPSIS 1503++ 1504++ #include <openssl/evp.h> 1505++ 1506++ int EVP_AEAD_CTX_init(EVP_AEAD_CTX *ctx, const EVP_AEAD *aead, 1507++ const unsigned char *key, size_t key_len, 1508++ size_t tag_len, ENGINE *impl); 1509++ void EVP_AEAD_CTX_cleanup(EVP_AEAD_CTX *ctx); 1510++ ssize_t EVP_AEAD_CTX_seal(const EVP_AEAD_CTX *ctx, 1511++ unsigned char *out, size_t max_out_len, 1512++ const unsigned char *nonce, size_t nonce_len, 1513++ const unsigned char *in, size_t in_len, 1514++ const unsigned char *ad, size_t ad_len); 1515++ ssize_t EVP_AEAD_CTX_open(const EVP_AEAD_CTX *ctx, 1516++ unsigned char *out, size_t max_out_len, 1517++ const unsigned char *nonce, size_t nonce_len, 1518++ const unsigned char *in, size_t in_len, 1519++ const unsigned char *ad, size_t ad_len); 1520++ 1521++=head1 DESCRIPTION 1522++ 1523++The EVP_AEAD_CTX_init() function initialises an B<EVP_AEAD_CTX> structure and 1524++performs any precomputation needed to use B<aead> with B<key>. The length of 1525++the key, B<key_len>, is given in bytes. 1526++ 1527++The B<tag_len> argument contains the length of the tags, in bytes, and allows 1528++for the processing of truncated authenticators. A zero value indicates that the 1529++default tag length should be used and this is defined as 1530++C<EVP_AEAD_DEFAULT_TAG_LENGTH> in order to make the code clear. Using truncated 1531++tags increases an attacker's chance of creating a valid forgery. Be aware that 1532++the attacker's chance may increase more than exponentially as would naively be 1533++expected. 1534++ 1535++When no longer needed, the initialised B<EVP_AEAD_CTX> structure must be passed 1536++to EVP_AEAD_CTX_cleanup(), which will deallocate any memory used. 1537++ 1538++With an B<EVP_AEAD_CTX> in hand, one can seal and open messages. These 1539++operations are intended to meet the standard notions of privacy and 1540++authenticity for authenticated encryption. For formal definitions see I<Bellare 1541++and Namprempre>, "Authenticated encryption: relations among notions and 1542++analysis of the generic composition paradigm," Lecture Notes in Computer 1543++Science B<1976> (2000), 531–545, 1544++L<http://www-cse.ucsd.edu/~mihir/papers/oem.html>. 1545++ 1546++When sealing messages, a nonce must be given. The length of the nonce is fixed 1547++by the AEAD in use and is returned by EVP_AEAD_nonce_length(). I<The nonce must 1548++be unique for all messages with the same key>. This is critically important - 1549++nonce reuse may completely undermine the security of the AEAD. Nonces may be 1550++predictable and public, so long as they are unique. Uniqueness may be achieved 1551++with a simple counter or, if long enough, may be generated randomly. The nonce 1552++must be passed into the "open" operation by the receiver so must either be 1553++implicit (e.g. a counter), or must be transmitted along with the sealed message. 1554++ 1555++The "seal" and "open" operations are atomic - an entire message must be 1556++encrypted or decrypted in a single call. Large messages may have to be split up 1557++in order to accomodate this. When doing so, be mindful of the need not to 1558++repeat nonces and the possibility that an attacker could duplicate, reorder or 1559++drop message chunks. For example, using a single key for a given (large) 1560++message and sealing chunks with nonces counting from zero would be secure as 1561++long as the number of chunks was securely transmitted. (Otherwise an attacker 1562++could truncate the message by dropping chunks from the end.) 1563++ 1564++The number of chunks could be transmitted by prefixing it to the plaintext, for 1565++example. This also assumes that no other message would ever use the same key 1566++otherwise the rule that nonces must be unique for a given key would be 1567++violated. 1568++ 1569++The "seal" and "open" operations also permit additional data to be 1570++authenticated via the B<ad> parameter. This data is not included in the 1571++ciphertext and must be identical for both the "seal" and "open" call. This 1572++permits implicit context to be authenticated but may be C<NULL> if not needed. 1573++ 1574++The "seal" and "open" operations may work inplace if the B<out> and B<in> 1575++arguments are equal. They may also be used to shift the data left inside the 1576++same buffer if B<out> is less than B<in>. However, B<out> may not point inside 1577++the input data otherwise the input may be overwritten before it has been read. 1578++This case will cause an error. 1579++ 1580++=head1 RETURN VALUES 1581++ 1582++The "seal" and "open" operations return an C<ssize_t> with value -1 on error, 1583++otherwise they return the number of output bytes written. An error will be 1584++returned if the input length is large enough that the output size exceeds the 1585++range of a C<ssize_t>. 1586++ 1587++=head1 HISTORY 1588++ 1589++These functions were first added to OpenSSL 1.0.2. 1590++ 1591++=cut 1592+-- 1593+1.8.4.1 1594+ 1595diff -burN android-openssl-lhash2/patches/chacha20poly1305.patch android-openssl/patches/chacha20poly1305.patch 1596--- android-openssl-lhash2/patches/chacha20poly1305.patch 1969-12-31 19:00:00.000000000 -0500 1597+++ android-openssl/patches/chacha20poly1305.patch 2013-11-05 15:15:28.454480948 -0500 1598@@ -0,0 +1,5740 @@ 1599+From 2688f00904e4ffd647afcff69bb8fe6df8c5902b Mon Sep 17 00:00:00 2001 1600+From: Adam Langley <agl@chromium.org> 1601+Date: Mon, 9 Sep 2013 12:13:24 -0400 1602+Subject: [PATCH 43/52] chacha20poly1305 1603+ 1604+Add support for Chacha20 + Poly1305. 1605+--- 1606+ .gitignore | 1 + 1607+ Configure | 56 +- 1608+ Makefile.org | 6 +- 1609+ apps/speed.c | 64 +- 1610+ crypto/chacha/Makefile | 80 ++ 1611+ crypto/chacha/chacha.h | 85 ++ 1612+ crypto/chacha/chacha_enc.c | 167 +++ 1613+ crypto/chacha/chacha_vec.c | 345 +++++++ 1614+ crypto/chacha/chachatest.c | 211 ++++ 1615+ crypto/evp/Makefile | 35 +- 1616+ crypto/evp/e_chacha20poly1305.c | 261 +++++ 1617+ crypto/evp/evp.h | 8 + 1618+ crypto/evp/evp_err.c | 3 + 1619+ crypto/poly1305/Makefile | 81 ++ 1620+ crypto/poly1305/poly1305.c | 320 ++++++ 1621+ crypto/poly1305/poly1305.h | 88 ++ 1622+ crypto/poly1305/poly1305_arm.c | 335 ++++++ 1623+ crypto/poly1305/poly1305_arm_asm.s | 2009 ++++++++++++++++++++++++++++++++++++ 1624+ crypto/poly1305/poly1305_vec.c | 733 +++++++++++++ 1625+ crypto/poly1305/poly1305test.c | 166 +++ 1626+ ssl/s3_lib.c | 75 +- 1627+ ssl/s3_pkt.c | 5 +- 1628+ ssl/ssl.h | 1 + 1629+ ssl/ssl_ciph.c | 16 +- 1630+ ssl/ssl_locl.h | 10 + 1631+ ssl/t1_enc.c | 30 +- 1632+ ssl/tls1.h | 8 + 1633+ test/Makefile | 23 +- 1634+ 28 files changed, 5166 insertions(+), 56 deletions(-) 1635+ create mode 100644 crypto/chacha/Makefile 1636+ create mode 100644 crypto/chacha/chacha.h 1637+ create mode 100644 crypto/chacha/chacha_enc.c 1638+ create mode 100644 crypto/chacha/chacha_vec.c 1639+ create mode 100644 crypto/chacha/chachatest.c 1640+ create mode 100644 crypto/evp/e_chacha20poly1305.c 1641+ create mode 100644 crypto/poly1305/Makefile 1642+ create mode 100644 crypto/poly1305/poly1305.c 1643+ create mode 100644 crypto/poly1305/poly1305.h 1644+ create mode 100644 crypto/poly1305/poly1305_arm.c 1645+ create mode 100644 crypto/poly1305/poly1305_arm_asm.s 1646+ create mode 100644 crypto/poly1305/poly1305_vec.c 1647+ create mode 100644 crypto/poly1305/poly1305test.c 1648+ 1649+diff --git a/openssl/ssl/ssl_ciph.c b/openssl/ssl/ssl_ciph.c 1650+index db85b29..cebb18a 100644 1651+--- a/ssl/ssl_ciph.c 1652++++ b/ssl/ssl_ciph.c 1653+@@ -1442,7 +1442,9 @@ STACK_OF(SSL_CIPHER) *ssl_create_cipher_list(const SSL_METHOD *ssl_method, 1654+ ssl_cipher_apply_rule(0, SSL_kEECDH, 0, 0, 0, 0, 0, CIPHER_ADD, -1, &head, &tail); 1655+ ssl_cipher_apply_rule(0, SSL_kEECDH, 0, 0, 0, 0, 0, CIPHER_DEL, -1, &head, &tail); 1656+ 1657+- /* AES is our preferred symmetric cipher */ 1658++ /* CHACHA20 is fast and safe on all hardware and is thus our preferred 1659++ * symmetric cipher, with AES second. */ 1660++ ssl_cipher_apply_rule(0, 0, 0, SSL_CHACHA20POLY1305, 0, 0, 0, CIPHER_ADD, -1, &head, &tail); 1661+ ssl_cipher_apply_rule(0, 0, 0, SSL_AES, 0, 0, 0, CIPHER_ADD, -1, &head, &tail); 1662+ 1663+ /* Temporarily enable everything else for sorting */ 1664+diff --git a/Configure b/Configure 1665+index 9c803dc..1b95384 100755 1666+--- a/Configure 1667++++ b/Configure 1668+@@ -124,24 +124,24 @@ my $tlib="-lnsl -lsocket"; 1669+ my $bits1="THIRTY_TWO_BIT "; 1670+ my $bits2="SIXTY_FOUR_BIT "; 1671+ 1672+-my $x86_asm="x86cpuid.o:bn-586.o co-586.o x86-mont.o x86-gf2m.o:des-586.o crypt586.o:aes-586.o vpaes-x86.o aesni-x86.o:bf-586.o:md5-586.o:sha1-586.o sha256-586.o sha512-586.o:cast-586.o:rc4-586.o:rmd-586.o:rc5-586.o:wp_block.o wp-mmx.o:cmll-x86.o:ghash-x86.o:"; 1673++my $x86_asm="x86cpuid.o:bn-586.o co-586.o x86-mont.o x86-gf2m.o:des-586.o crypt586.o:aes-586.o vpaes-x86.o aesni-x86.o:bf-586.o:md5-586.o:sha1-586.o sha256-586.o sha512-586.o:cast-586.o:rc4-586.o:rmd-586.o:rc5-586.o:wp_block.o wp-mmx.o:cmll-x86.o:ghash-x86.o:::"; 1674+ 1675+ my $x86_elf_asm="$x86_asm:elf"; 1676+ 1677+-my $x86_64_asm="x86_64cpuid.o:x86_64-gcc.o x86_64-mont.o x86_64-mont5.o x86_64-gf2m.o modexp512-x86_64.o::aes-x86_64.o vpaes-x86_64.o bsaes-x86_64.o aesni-x86_64.o aesni-sha1-x86_64.o::md5-x86_64.o:sha1-x86_64.o sha256-x86_64.o sha512-x86_64.o::rc4-x86_64.o rc4-md5-x86_64.o:::wp-x86_64.o:cmll-x86_64.o cmll_misc.o:ghash-x86_64.o:"; 1678+-my $ia64_asm="ia64cpuid.o:bn-ia64.o ia64-mont.o::aes_core.o aes_cbc.o aes-ia64.o::md5-ia64.o:sha1-ia64.o sha256-ia64.o sha512-ia64.o::rc4-ia64.o rc4_skey.o:::::ghash-ia64.o::void"; 1679+-my $sparcv9_asm="sparcv9cap.o sparccpuid.o:bn-sparcv9.o sparcv9-mont.o sparcv9a-mont.o:des_enc-sparc.o fcrypt_b.o:aes_core.o aes_cbc.o aes-sparcv9.o:::sha1-sparcv9.o sha256-sparcv9.o sha512-sparcv9.o:::::::ghash-sparcv9.o::void"; 1680+-my $sparcv8_asm=":sparcv8.o:des_enc-sparc.o fcrypt_b.o:::::::::::::void"; 1681+-my $alpha_asm="alphacpuid.o:bn_asm.o alpha-mont.o:::::sha1-alpha.o:::::::ghash-alpha.o::void"; 1682+-my $mips32_asm=":bn-mips.o::aes_cbc.o aes-mips.o:::sha1-mips.o sha256-mips.o::::::::"; 1683+-my $mips64_asm=":bn-mips.o mips-mont.o::aes_cbc.o aes-mips.o:::sha1-mips.o sha256-mips.o sha512-mips.o::::::::"; 1684+-my $s390x_asm="s390xcap.o s390xcpuid.o:bn-s390x.o s390x-mont.o s390x-gf2m.o::aes-s390x.o aes-ctr.o aes-xts.o:::sha1-s390x.o sha256-s390x.o sha512-s390x.o::rc4-s390x.o:::::ghash-s390x.o:"; 1685+-my $armv4_asm="armcap.o armv4cpuid.o:bn_asm.o armv4-mont.o armv4-gf2m.o::aes_cbc.o aes-armv4.o:::sha1-armv4-large.o sha256-armv4.o sha512-armv4.o:::::::ghash-armv4.o::void"; 1686+-my $parisc11_asm="pariscid.o:bn_asm.o parisc-mont.o::aes_core.o aes_cbc.o aes-parisc.o:::sha1-parisc.o sha256-parisc.o sha512-parisc.o::rc4-parisc.o:::::ghash-parisc.o::32"; 1687+-my $parisc20_asm="pariscid.o:pa-risc2W.o parisc-mont.o::aes_core.o aes_cbc.o aes-parisc.o:::sha1-parisc.o sha256-parisc.o sha512-parisc.o::rc4-parisc.o:::::ghash-parisc.o::64"; 1688+-my $ppc32_asm="ppccpuid.o ppccap.o:bn-ppc.o ppc-mont.o ppc64-mont.o::aes_core.o aes_cbc.o aes-ppc.o:::sha1-ppc.o sha256-ppc.o::::::::"; 1689+-my $ppc64_asm="ppccpuid.o ppccap.o:bn-ppc.o ppc-mont.o ppc64-mont.o::aes_core.o aes_cbc.o aes-ppc.o:::sha1-ppc.o sha256-ppc.o sha512-ppc.o::::::::"; 1690+-my $no_asm=":::::::::::::::void"; 1691++my $x86_64_asm="x86_64cpuid.o:x86_64-gcc.o x86_64-mont.o x86_64-mont5.o x86_64-gf2m.o modexp512-x86_64.o::aes-x86_64.o vpaes-x86_64.o bsaes-x86_64.o aesni-x86_64.o aesni-sha1-x86_64.o::md5-x86_64.o:sha1-x86_64.o sha256-x86_64.o sha512-x86_64.o::rc4-x86_64.o rc4-md5-x86_64.o:::wp-x86_64.o:cmll-x86_64.o cmll_misc.o:ghash-x86_64.o::chacha_vec.o:poly1305_vec.o"; 1692++my $ia64_asm="ia64cpuid.o:bn-ia64.o ia64-mont.o::aes_core.o aes_cbc.o aes-ia64.o::md5-ia64.o:sha1-ia64.o sha256-ia64.o sha512-ia64.o::rc4-ia64.o rc4_skey.o:::::ghash-ia64.o::::void"; 1693++my $sparcv9_asm="sparcv9cap.o sparccpuid.o:bn-sparcv9.o sparcv9-mont.o sparcv9a-mont.o:des_enc-sparc.o fcrypt_b.o:aes_core.o aes_cbc.o aes-sparcv9.o:::sha1-sparcv9.o sha256-sparcv9.o sha512-sparcv9.o:::::::ghash-sparcv9.o::::void"; 1694++my $sparcv8_asm=":sparcv8.o:des_enc-sparc.o fcrypt_b.o:::::::::::::::void"; 1695++my $alpha_asm="alphacpuid.o:bn_asm.o alpha-mont.o:::::sha1-alpha.o:::::::ghash-alpha.o::::void"; 1696++my $mips32_asm=":bn-mips.o::aes_cbc.o aes-mips.o:::sha1-mips.o sha256-mips.o::::::::::"; 1697++my $mips64_asm=":bn-mips.o mips-mont.o::aes_cbc.o aes-mips.o:::sha1-mips.o sha256-mips.o sha512-mips.o::::::::::"; 1698++my $s390x_asm="s390xcap.o s390xcpuid.o:bn-s390x.o s390x-mont.o s390x-gf2m.o::aes-s390x.o aes-ctr.o aes-xts.o:::sha1-s390x.o sha256-s390x.o sha512-s390x.o::rc4-s390x.o:::::::ghash-s390x.o:"; 1699++my $armv4_asm="armcap.o armv4cpuid.o:bn_asm.o armv4-mont.o armv4-gf2m.o::aes_cbc.o aes-armv4.o:::sha1-armv4-large.o sha256-armv4.o sha512-armv4.o:::::::ghash-armv4.o::chacha_vec.o:poly1305_arm.o poly1305_arm_asm.o:void"; 1700++my $parisc11_asm="pariscid.o:bn_asm.o parisc-mont.o::aes_core.o aes_cbc.o aes-parisc.o:::sha1-parisc.o sha256-parisc.o sha512-parisc.o::rc4-parisc.o:::::ghash-parisc.o::::32"; 1701++my $parisc20_asm="pariscid.o:pa-risc2W.o parisc-mont.o::aes_core.o aes_cbc.o aes-parisc.o:::sha1-parisc.o sha256-parisc.o sha512-parisc.o::rc4-parisc.o:::::ghash-parisc.o::::64"; 1702++my $ppc32_asm="ppccpuid.o ppccap.o:bn-ppc.o ppc-mont.o ppc64-mont.o::aes_core.o aes_cbc.o aes-ppc.o:::sha1-ppc.o sha256-ppc.o::::::::::"; 1703++my $ppc64_asm="ppccpuid.o ppccap.o:bn-ppc.o ppc-mont.o ppc64-mont.o::aes_core.o aes_cbc.o aes-ppc.o:::sha1-ppc.o sha256-ppc.o sha512-ppc.o::::::::::"; 1704++my $no_asm=":::::::::::::::::void"; 1705+ 1706+ # As for $BSDthreads. Idea is to maintain "collective" set of flags, 1707+ # which would cover all BSD flavors. -pthread applies to them all, 1708+@@ -152,7 +152,7 @@ my $no_asm=":::::::::::::::void"; 1709+ # seems to be sufficient? 1710+ my $BSDthreads="-pthread -D_THREAD_SAFE -D_REENTRANT"; 1711+ 1712+-#config-string $cc : $cflags : $unistd : $thread_cflag : $sys_id : $lflags : $bn_ops : $cpuid_obj : $bn_obj : $des_obj : $aes_obj : $bf_obj : $md5_obj : $sha1_obj : $cast_obj : $rc4_obj : $rmd160_obj : $rc5_obj : $wp_obj : $cmll_obj : $modes_obj : $engines_obj : $dso_scheme : $shared_target : $shared_cflag : $shared_ldflag : $shared_extension : $ranlib : $arflags : $multilib 1713++#config-string $cc : $cflags : $unistd : $thread_cflag : $sys_id : $lflags : $bn_ops : $cpuid_obj : $bn_obj : $des_obj : $aes_obj : $bf_obj : $md5_obj : $sha1_obj : $cast_obj : $rc4_obj : $rmd160_obj : $rc5_obj : $wp_obj : $cmll_obj : $modes_obj : $engines_obj : $chacha_obj : $poly1305_obj : $dso_scheme : $shared_target : $shared_cflag : $shared_ldflag : $shared_extension : $ranlib : $arflags : $multilib : 1714+ 1715+ my %table=( 1716+ # File 'TABLE' (created by 'make TABLE') contains the data from this list, 1717+@@ -647,6 +647,8 @@ my $idx_wp_obj = $idx++; 1718+ my $idx_cmll_obj = $idx++; 1719+ my $idx_modes_obj = $idx++; 1720+ my $idx_engines_obj = $idx++; 1721++my $idx_chacha_obj = $idx++; 1722++my $idx_poly1305_obj = $idx++; 1723+ my $idx_perlasm_scheme = $idx++; 1724+ my $idx_dso_scheme = $idx++; 1725+ my $idx_shared_target = $idx++; 1726+@@ -692,6 +694,8 @@ my $aes_enc="aes_core.o aes_cbc.o"; 1727+ my $bf_enc ="bf_enc.o"; 1728+ my $cast_enc="c_enc.o"; 1729+ my $rc4_enc="rc4_enc.o rc4_skey.o"; 1730++my $chacha_enc="chacha_enc.o"; 1731++my $poly1305 ="poly1305.o"; 1732+ my $rc5_enc="rc5_enc.o"; 1733+ my $md5_obj=""; 1734+ my $sha1_obj=""; 1735+@@ -1144,7 +1148,7 @@ $openssldir=$prefix . "/" . $openssldir if $openssldir !~ /(^\/|^[a-zA-Z]:[\\\/] 1736+ 1737+ print "IsMK1MF=$IsMK1MF\n"; 1738+ 1739+-my @fields = split(/\s*:\s*/,$table{$target} . ":" x 30 , -1); 1740++my @fields = split(/\s*:\s*/,$table{$target} . ":" x 31 , -1); 1741+ my $cc = $fields[$idx_cc]; 1742+ # Allow environment CC to override compiler... 1743+ if($ENV{CC}) { 1744+@@ -1181,6 +1185,8 @@ my $ranlib = $ENV{'RANLIB'} || $fields[$idx_ranlib]; 1745+ my $ar = $ENV{'AR'} || "ar"; 1746+ my $arflags = $fields[$idx_arflags]; 1747+ my $multilib = $fields[$idx_multilib]; 1748++my $chacha_obj = $fields[$idx_chacha_obj]; 1749++my $poly1305_obj = $fields[$idx_poly1305_obj]; 1750+ 1751+ # if $prefix/lib$multilib is not an existing directory, then 1752+ # assume that it's not searched by linker automatically, in 1753+@@ -1477,6 +1483,8 @@ $des_obj=$des_enc unless ($des_obj =~ /\.o$/); 1754+ $bf_obj=$bf_enc unless ($bf_obj =~ /\.o$/); 1755+ $cast_obj=$cast_enc unless ($cast_obj =~ /\.o$/); 1756+ $rc4_obj=$rc4_enc unless ($rc4_obj =~ /\.o$/); 1757++$chacha_obj=$chacha_enc unless ($chacha_obj =~ /\.o$/); 1758++$poly1305_obj=$poly1305 unless ($poly1305_obj =~ /\.o$/); 1759+ $rc5_obj=$rc5_enc unless ($rc5_obj =~ /\.o$/); 1760+ if ($sha1_obj =~ /\.o$/) 1761+ { 1762+@@ -1637,6 +1645,8 @@ while (<IN>) 1763+ s/^BF_ENC=.*$/BF_ENC= $bf_obj/; 1764+ s/^CAST_ENC=.*$/CAST_ENC= $cast_obj/; 1765+ s/^RC4_ENC=.*$/RC4_ENC= $rc4_obj/; 1766++ s/^CHACHA_ENC=.*$/CHACHA_ENC= $chacha_obj/; 1767++ s/^POLY1305=.*$/POLY1305= $poly1305_obj/; 1768+ s/^RC5_ENC=.*$/RC5_ENC= $rc5_obj/; 1769+ s/^MD5_ASM_OBJ=.*$/MD5_ASM_OBJ= $md5_obj/; 1770+ s/^SHA1_ASM_OBJ=.*$/SHA1_ASM_OBJ= $sha1_obj/; 1771+@@ -1698,6 +1708,8 @@ print "AES_ENC =$aes_obj\n"; 1772+ print "BF_ENC =$bf_obj\n"; 1773+ print "CAST_ENC =$cast_obj\n"; 1774+ print "RC4_ENC =$rc4_obj\n"; 1775++print "CHACHA_ENC =$chacha_obj\n"; 1776++print "POLY1305 =$poly1305_obj\n"; 1777+ print "RC5_ENC =$rc5_obj\n"; 1778+ print "MD5_OBJ_ASM =$md5_obj\n"; 1779+ print "SHA1_OBJ_ASM =$sha1_obj\n"; 1780+@@ -2096,11 +2108,11 @@ sub print_table_entry 1781+ 1782+ (my $cc,my $cflags,my $unistd,my $thread_cflag,my $sys_id,my $lflags, 1783+ my $bn_ops,my $cpuid_obj,my $bn_obj,my $des_obj,my $aes_obj, my $bf_obj, 1784+- my $md5_obj,my $sha1_obj,my $cast_obj,my $rc4_obj,my $rmd160_obj, 1785+- my $rc5_obj,my $wp_obj,my $cmll_obj,my $modes_obj, my $engines_obj, 1786++ my $md5_obj,my $sha1_obj,my $cast_obj,my $rc4_obj,my $chacha_obj,my $poly1305_obj, 1787++ my $rmd160_obj, my $rc5_obj,my $wp_obj,my $cmll_obj,my $modes_obj, my $engines_obj, 1788+ my $perlasm_scheme,my $dso_scheme,my $shared_target,my $shared_cflag, 1789+ my $shared_ldflag,my $shared_extension,my $ranlib,my $arflags,my $multilib)= 1790+- split(/\s*:\s*/,$table{$target} . ":" x 30 , -1); 1791++ split(/\s*:\s*/,$table{$target} . ":" x 31 , -1); 1792+ 1793+ print <<EOF 1794+ 1795+@@ -2121,6 +2133,8 @@ sub print_table_entry 1796+ \$sha1_obj = $sha1_obj 1797+ \$cast_obj = $cast_obj 1798+ \$rc4_obj = $rc4_obj 1799++\$chacha_obj = $chacha_obj 1800++\$poly1305_obj = $poly1305_obj 1801+ \$rmd160_obj = $rmd160_obj 1802+ \$rc5_obj = $rc5_obj 1803+ \$wp_obj = $wp_obj 1804+@@ -2150,7 +2164,7 @@ sub test_sanity 1805+ 1806+ foreach $target (sort keys %table) 1807+ { 1808+- @fields = split(/\s*:\s*/,$table{$target} . ":" x 30 , -1); 1809++ @fields = split(/\s*:\s*/,$table{$target} . ":" x 31 , -1); 1810+ 1811+ if ($fields[$idx_dso_scheme-1] =~ /^(beos|dl|dlfcn|win32|vms)$/) 1812+ { 1813+diff --git a/Makefile.org b/Makefile.org 1814+index 2db31ea..919466d 100644 1815+--- a/Makefile.org 1816++++ b/Makefile.org 1817+@@ -94,6 +94,8 @@ BF_ENC= bf_enc.o 1818+ CAST_ENC= c_enc.o 1819+ RC4_ENC= rc4_enc.o 1820+ RC5_ENC= rc5_enc.o 1821++CHACHA_ENC= chacha_enc.o 1822++POLY1305= poly1305.o 1823+ MD5_ASM_OBJ= 1824+ SHA1_ASM_OBJ= 1825+ RMD160_ASM_OBJ= 1826+@@ -147,7 +149,7 @@ SDIRS= \ 1827+ bn ec rsa dsa ecdsa dh ecdh dso engine \ 1828+ buffer bio stack lhash rand err \ 1829+ evp asn1 pem x509 x509v3 conf txt_db pkcs7 pkcs12 comp ocsp ui krb5 \ 1830+- cms pqueue ts jpake srp store cmac 1831++ cms pqueue ts jpake srp store cmac poly1305 chacha 1832+ # keep in mind that the above list is adjusted by ./Configure 1833+ # according to no-xxx arguments... 1834+ 1835+@@ -232,6 +234,8 @@ BUILDENV= PLATFORM='$(PLATFORM)' PROCESSOR='$(PROCESSOR)' \ 1836+ WP_ASM_OBJ='$(WP_ASM_OBJ)' \ 1837+ MODES_ASM_OBJ='$(MODES_ASM_OBJ)' \ 1838+ ENGINES_ASM_OBJ='$(ENGINES_ASM_OBJ)' \ 1839++ CHACHA_ENC='$(CHACHA_ENC)' \ 1840++ POLY1305='$(POLY1305)' \ 1841+ PERLASM_SCHEME='$(PERLASM_SCHEME)' \ 1842+ FIPSLIBDIR='${FIPSLIBDIR}' \ 1843+ FIPSDIR='${FIPSDIR}' \ 1844+diff --git a/crypto/chacha/Makefile b/crypto/chacha/Makefile 1845+new file mode 100644 1846+index 0000000..289933b 1847+--- /dev/null 1848++++ b/crypto/chacha/Makefile 1849+@@ -0,0 +1,80 @@ 1850++# 1851++# OpenSSL/crypto/chacha/Makefile 1852++# 1853++ 1854++DIR= chacha 1855++TOP= ../.. 1856++CC= cc 1857++CPP= $(CC) -E 1858++INCLUDES= 1859++CFLAG=-g 1860++AR= ar r 1861++ 1862++CFLAGS= $(INCLUDES) $(CFLAG) 1863++ASFLAGS= $(INCLUDES) $(ASFLAG) 1864++AFLAGS= $(ASFLAGS) 1865++ 1866++CHACHA_ENC=chacha_enc.o 1867++ 1868++GENERAL=Makefile 1869++TEST=chachatest.o 1870++APPS= 1871++ 1872++LIB=$(TOP)/libcrypto.a 1873++LIBSRC= 1874++LIBOBJ=$(CHACHA_ENC) 1875++ 1876++SRC= $(LIBSRC) 1877++ 1878++EXHEADER=chacha.h 1879++HEADER= $(EXHEADER) 1880++ 1881++ALL= $(GENERAL) $(SRC) $(HEADER) 1882++ 1883++top: 1884++ (cd ../..; $(MAKE) DIRS=crypto SDIRS=$(DIR) sub_all) 1885++ 1886++all: lib 1887++ 1888++lib: $(LIBOBJ) 1889++ $(AR) $(LIB) $(LIBOBJ) 1890++ $(RANLIB) $(LIB) || echo Never mind. 1891++ @touch lib 1892++ 1893++files: 1894++ $(PERL) $(TOP)/util/files.pl Makefile >> $(TOP)/MINFO 1895++ 1896++links: 1897++ @$(PERL) $(TOP)/util/mklink.pl ../../include/openssl $(EXHEADER) 1898++ @$(PERL) $(TOP)/util/mklink.pl ../../test $(TEST) 1899++ @$(PERL) $(TOP)/util/mklink.pl ../../apps $(APPS) 1900++ 1901++install: 1902++ @[ -n "$(INSTALLTOP)" ] # should be set by top Makefile... 1903++ @headerlist="$(EXHEADER)"; for i in $$headerlist ; \ 1904++ do \ 1905++ (cp $$i $(INSTALL_PREFIX)$(INSTALLTOP)/include/openssl/$$i; \ 1906++ chmod 644 $(INSTALL_PREFIX)$(INSTALLTOP)/include/openssl/$$i ); \ 1907++ done; 1908++ 1909++tags: 1910++ ctags $(SRC) 1911++ 1912++tests: 1913++ 1914++lint: 1915++ lint -DLINT $(INCLUDES) $(SRC)>fluff 1916++ 1917++depend: 1918++ @[ -n "$(MAKEDEPEND)" ] # should be set by upper Makefile... 1919++ $(MAKEDEPEND) -- $(CFLAG) $(INCLUDES) $(DEPFLAG) -- $(PROGS) $(LIBSRC) 1920++ 1921++dclean: 1922++ $(PERL) -pe 'if (/^# DO NOT DELETE THIS LINE/) {print; exit(0);}' $(MAKEFILE) >Makefile.new 1923++ mv -f Makefile.new $(MAKEFILE) 1924++ 1925++clean: 1926++ rm -f *.o *.obj lib tags core .pure .nfs* *.old *.bak fluff 1927++ 1928++# DO NOT DELETE THIS LINE -- make depend depends on it. 1929++ 1930+diff --git a/crypto/chacha/chacha.h b/crypto/chacha/chacha.h 1931+new file mode 100644 1932+index 0000000..d56519d 1933+--- /dev/null 1934++++ b/crypto/chacha/chacha.h 1935+@@ -0,0 +1,85 @@ 1936++/* 1937++ * Chacha stream algorithm. 1938++ * 1939++ * Created on: Jun, 2013 1940++ * Author: Elie Bursztein (elieb@google.com) 1941++ * 1942++ * Adapted from the estream code by D. Bernstein. 1943++ */ 1944++/* ==================================================================== 1945++ * Copyright (c) 2011-2013 The OpenSSL Project. All rights reserved. 1946++ * 1947++ * Redistribution and use in source and binary forms, with or without 1948++ * modification, are permitted provided that the following conditions 1949++ * are met: 1950++ * 1951++ * 1. Redistributions of source code must retain the above copyright 1952++ * notice, this list of conditions and the following disclaimer. 1953++ * 1954++ * 2. Redistributions in binary form must reproduce the above copyright 1955++ * notice, this list of conditions and the following disclaimer in 1956++ * the documentation and/or other materials provided with the 1957++ * distribution. 1958++ * 1959++ * 3. All advertising materials mentioning features or use of this 1960++ * software must display the following acknowledgment: 1961++ * "This product includes software developed by the OpenSSL Project 1962++ * for use in the OpenSSL Toolkit. (http://www.OpenSSL.org/)" 1963++ * 1964++ * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to 1965++ * endorse or promote products derived from this software without 1966++ * prior written permission. For written permission, please contact 1967++ * licensing@OpenSSL.org. 1968++ * 1969++ * 5. Products derived from this software may not be called "OpenSSL" 1970++ * nor may "OpenSSL" appear in their names without prior written 1971++ * permission of the OpenSSL Project. 1972++ * 1973++ * 6. Redistributions of any form whatsoever must retain the following 1974++ * acknowledgment: 1975++ * "This product includes software developed by the OpenSSL Project 1976++ * for use in the OpenSSL Toolkit (http://www.OpenSSL.org/)" 1977++ * 1978++ * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY 1979++ * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 1980++ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 1981++ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR 1982++ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 1983++ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 1984++ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 1985++ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 1986++ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 1987++ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 1988++ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED 1989++ * OF THE POSSIBILITY OF SUCH DAMAGE. 1990++ * ==================================================================== 1991++ */ 1992++#ifndef HEADER_CHACHA_H 1993++#define HEADER_CHACHA_H 1994++ 1995++#include <openssl/opensslconf.h> 1996++ 1997++#if defined(OPENSSL_NO_CHACHA) 1998++#error ChaCha support is disabled. 1999++#endif 2000++ 2001++#include <stddef.h> 2002++ 2003++#ifdef __cplusplus 2004++extern "C" { 2005++#endif 2006++ 2007++/* CRYPTO_chacha_20 encrypts |in_len| bytes from |in| with the given key and 2008++ * nonce and writes the result to |out|, which may be equal to |in|. The 2009++ * initial block counter is specified by |counter|. */ 2010++void CRYPTO_chacha_20(unsigned char *out, 2011++ const unsigned char *in, size_t in_len, 2012++ const unsigned char key[32], 2013++ const unsigned char nonce[8], 2014++ size_t counter); 2015++ 2016++#ifdef __cplusplus 2017++} 2018++#endif 2019++ 2020++#endif 2021+diff --git a/crypto/chacha/chacha_enc.c b/crypto/chacha/chacha_enc.c 2022+new file mode 100644 2023+index 0000000..54d1ca3 2024+--- /dev/null 2025++++ b/crypto/chacha/chacha_enc.c 2026+@@ -0,0 +1,167 @@ 2027++/* 2028++ * Chacha stream algorithm. 2029++ * 2030++ * Created on: Jun, 2013 2031++ * Author: Elie Bursztein (elieb@google.com) 2032++ * 2033++ * Adapted from the estream code by D. Bernstein. 2034++ */ 2035++/* ==================================================================== 2036++ * Copyright (c) 2011-2013 The OpenSSL Project. All rights reserved. 2037++ * 2038++ * Redistribution and use in source and binary forms, with or without 2039++ * modification, are permitted provided that the following conditions 2040++ * are met: 2041++ * 2042++ * 1. Redistributions of source code must retain the above copyright 2043++ * notice, this list of conditions and the following disclaimer. 2044++ * 2045++ * 2. Redistributions in binary form must reproduce the above copyright 2046++ * notice, this list of conditions and the following disclaimer in 2047++ * the documentation and/or other materials provided with the 2048++ * distribution. 2049++ * 2050++ * 3. All advertising materials mentioning features or use of this 2051++ * software must display the following acknowledgment: 2052++ * "This product includes software developed by the OpenSSL Project 2053++ * for use in the OpenSSL Toolkit. (http://www.OpenSSL.org/)" 2054++ * 2055++ * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to 2056++ * endorse or promote products derived from this software without 2057++ * prior written permission. For written permission, please contact 2058++ * licensing@OpenSSL.org. 2059++ * 2060++ * 5. Products derived from this software may not be called "OpenSSL" 2061++ * nor may "OpenSSL" appear in their names without prior written 2062++ * permission of the OpenSSL Project. 2063++ * 2064++ * 6. Redistributions of any form whatsoever must retain the following 2065++ * acknowledgment: 2066++ * "This product includes software developed by the OpenSSL Project 2067++ * for use in the OpenSSL Toolkit (http://www.OpenSSL.org/)" 2068++ * 2069++ * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY 2070++ * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 2071++ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 2072++ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR 2073++ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 2074++ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 2075++ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 2076++ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 2077++ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 2078++ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 2079++ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED 2080++ * OF THE POSSIBILITY OF SUCH DAMAGE. 2081++ * ==================================================================== 2082++ */ 2083++ 2084++#include <stdint.h> 2085++#include <string.h> 2086++#include <openssl/opensslconf.h> 2087++ 2088++#if !defined(OPENSSL_NO_CHACHA) 2089++ 2090++#include <openssl/chacha.h> 2091++ 2092++/* sigma contains the ChaCha constants, which happen to be an ASCII string. */ 2093++static const char sigma[16] = "expand 32-byte k"; 2094++ 2095++#define ROTATE(v, n) (((v) << (n)) | ((v) >> (32 - (n)))) 2096++#define XOR(v, w) ((v) ^ (w)) 2097++#define PLUS(x, y) ((x) + (y)) 2098++#define PLUSONE(v) (PLUS((v), 1)) 2099++ 2100++#define U32TO8_LITTLE(p, v) \ 2101++ { (p)[0] = (v >> 0) & 0xff; (p)[1] = (v >> 8) & 0xff; \ 2102++ (p)[2] = (v >> 16) & 0xff; (p)[3] = (v >> 24) & 0xff; } 2103++#define U8TO32_LITTLE(p) \ 2104++ (((uint32_t)((p)[0]) ) | ((uint32_t)((p)[1]) << 8) | \ 2105++ ((uint32_t)((p)[2]) << 16) | ((uint32_t)((p)[3]) << 24) ) 2106++ 2107++/* QUARTERROUND updates a, b, c, d with a ChaCha "quarter" round. */ 2108++#define QUARTERROUND(a,b,c,d) \ 2109++ x[a] = PLUS(x[a],x[b]); x[d] = ROTATE(XOR(x[d],x[a]),16); \ 2110++ x[c] = PLUS(x[c],x[d]); x[b] = ROTATE(XOR(x[b],x[c]),12); \ 2111++ x[a] = PLUS(x[a],x[b]); x[d] = ROTATE(XOR(x[d],x[a]), 8); \ 2112++ x[c] = PLUS(x[c],x[d]); x[b] = ROTATE(XOR(x[b],x[c]), 7); 2113++ 2114++typedef unsigned int uint32_t; 2115++ 2116++/* chacha_core performs |num_rounds| rounds of ChaCha20 on the input words in 2117++ * |input| and writes the 64 output bytes to |output|. */ 2118++static void chacha_core(unsigned char output[64], const uint32_t input[16], 2119++ int num_rounds) 2120++ { 2121++ uint32_t x[16]; 2122++ int i; 2123++ 2124++ memcpy(x, input, sizeof(uint32_t) * 16); 2125++ for (i = 20; i > 0; i -= 2) 2126++ { 2127++ QUARTERROUND( 0, 4, 8,12) 2128++ QUARTERROUND( 1, 5, 9,13) 2129++ QUARTERROUND( 2, 6,10,14) 2130++ QUARTERROUND( 3, 7,11,15) 2131++ QUARTERROUND( 0, 5,10,15) 2132++ QUARTERROUND( 1, 6,11,12) 2133++ QUARTERROUND( 2, 7, 8,13) 2134++ QUARTERROUND( 3, 4, 9,14) 2135++ } 2136++ 2137++ for (i = 0; i < 16; ++i) 2138++ x[i] = PLUS(x[i], input[i]); 2139++ for (i = 0; i < 16; ++i) 2140++ U32TO8_LITTLE(output + 4 * i, x[i]); 2141++ } 2142++ 2143++void CRYPTO_chacha_20(unsigned char *out, 2144++ const unsigned char *in, size_t in_len, 2145++ const unsigned char key[32], 2146++ const unsigned char nonce[8], 2147++ size_t counter) 2148++ { 2149++ uint32_t input[16]; 2150++ unsigned char buf[64]; 2151++ size_t todo, i; 2152++ 2153++ input[0] = U8TO32_LITTLE(sigma + 0); 2154++ input[1] = U8TO32_LITTLE(sigma + 4); 2155++ input[2] = U8TO32_LITTLE(sigma + 8); 2156++ input[3] = U8TO32_LITTLE(sigma + 12); 2157++ 2158++ input[4] = U8TO32_LITTLE(key + 0); 2159++ input[5] = U8TO32_LITTLE(key + 4); 2160++ input[6] = U8TO32_LITTLE(key + 8); 2161++ input[7] = U8TO32_LITTLE(key + 12); 2162++ 2163++ input[8] = U8TO32_LITTLE(key + 16); 2164++ input[9] = U8TO32_LITTLE(key + 20); 2165++ input[10] = U8TO32_LITTLE(key + 24); 2166++ input[11] = U8TO32_LITTLE(key + 28); 2167++ 2168++ input[12] = counter; 2169++ input[13] = ((uint64_t) counter) >> 32; 2170++ input[14] = U8TO32_LITTLE(nonce + 0); 2171++ input[15] = U8TO32_LITTLE(nonce + 4); 2172++ 2173++ while (in_len > 0) 2174++ { 2175++ todo = sizeof(buf); 2176++ if (in_len < todo) 2177++ todo = in_len; 2178++ 2179++ chacha_core(buf, input, 20); 2180++ for (i = 0; i < todo; i++) 2181++ out[i] = in[i] ^ buf[i]; 2182++ 2183++ out += todo; 2184++ in += todo; 2185++ in_len -= todo; 2186++ 2187++ input[12]++; 2188++ if (input[12] == 0) 2189++ input[13]++; 2190++ } 2191++ } 2192++ 2193++#endif /* !OPENSSL_NO_CHACHA */ 2194+diff --git a/crypto/chacha/chacha_vec.c b/crypto/chacha/chacha_vec.c 2195+new file mode 100644 2196+index 0000000..33b2238 2197+--- /dev/null 2198++++ b/crypto/chacha/chacha_vec.c 2199+@@ -0,0 +1,345 @@ 2200++/* ==================================================================== 2201++ * Copyright (c) 2011-2013 The OpenSSL Project. All rights reserved. 2202++ * 2203++ * Redistribution and use in source and binary forms, with or without 2204++ * modification, are permitted provided that the following conditions 2205++ * are met: 2206++ * 2207++ * 1. Redistributions of source code must retain the above copyright 2208++ * notice, this list of conditions and the following disclaimer. 2209++ * 2210++ * 2. Redistributions in binary form must reproduce the above copyright 2211++ * notice, this list of conditions and the following disclaimer in 2212++ * the documentation and/or other materials provided with the 2213++ * distribution. 2214++ * 2215++ * 3. All advertising materials mentioning features or use of this 2216++ * software must display the following acknowledgment: 2217++ * "This product includes software developed by the OpenSSL Project 2218++ * for use in the OpenSSL Toolkit. (http://www.OpenSSL.org/)" 2219++ * 2220++ * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to 2221++ * endorse or promote products derived from this software without 2222++ * prior written permission. For written permission, please contact 2223++ * licensing@OpenSSL.org. 2224++ * 2225++ * 5. Products derived from this software may not be called "OpenSSL" 2226++ * nor may "OpenSSL" appear in their names without prior written 2227++ * permission of the OpenSSL Project. 2228++ * 2229++ * 6. Redistributions of any form whatsoever must retain the following 2230++ * acknowledgment: 2231++ * "This product includes software developed by the OpenSSL Project 2232++ * for use in the OpenSSL Toolkit (http://www.OpenSSL.org/)" 2233++ * 2234++ * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY 2235++ * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 2236++ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 2237++ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR 2238++ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 2239++ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 2240++ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 2241++ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 2242++ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 2243++ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 2244++ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED 2245++ * OF THE POSSIBILITY OF SUCH DAMAGE. 2246++ * ==================================================================== 2247++ */ 2248++ 2249++/* This implementation is by Ted Krovetz and was submitted to SUPERCOP and 2250++ * marked as public domain. It was been altered to allow for non-aligned inputs 2251++ * and to allow the block counter to be passed in specifically. */ 2252++ 2253++#include <string.h> 2254++#include <stdint.h> 2255++#include <openssl/opensslconf.h> 2256++ 2257++#if !defined(OPENSSL_NO_CHACHA) 2258++ 2259++#include <openssl/chacha.h> 2260++ 2261++#ifndef CHACHA_RNDS 2262++#define CHACHA_RNDS 20 /* 8 (high speed), 20 (conservative), 12 (middle) */ 2263++#endif 2264++ 2265++/* Architecture-neutral way to specify 16-byte vector of ints */ 2266++typedef unsigned vec __attribute__ ((vector_size (16))); 2267++ 2268++/* This implementation is designed for Neon, SSE and AltiVec machines. The 2269++ * following specify how to do certain vector operations efficiently on 2270++ * each architecture, using intrinsics. 2271++ * This implementation supports parallel processing of multiple blocks, 2272++ * including potentially using general-purpose registers. 2273++ */ 2274++#if __ARM_NEON__ 2275++#include <arm_neon.h> 2276++#define GPR_TOO 1 2277++#define VBPI 2 2278++#define ONE (vec)vsetq_lane_u32(1,vdupq_n_u32(0),0) 2279++#define LOAD(m) (vec)(*((vec*)(m))) 2280++#define STORE(m,r) (*((vec*)(m))) = (r) 2281++#define ROTV1(x) (vec)vextq_u32((uint32x4_t)x,(uint32x4_t)x,1) 2282++#define ROTV2(x) (vec)vextq_u32((uint32x4_t)x,(uint32x4_t)x,2) 2283++#define ROTV3(x) (vec)vextq_u32((uint32x4_t)x,(uint32x4_t)x,3) 2284++#define ROTW16(x) (vec)vrev32q_u16((uint16x8_t)x) 2285++#if __clang__ 2286++#define ROTW7(x) (x << ((vec){ 7, 7, 7, 7})) ^ (x >> ((vec){25,25,25,25})) 2287++#define ROTW8(x) (x << ((vec){ 8, 8, 8, 8})) ^ (x >> ((vec){24,24,24,24})) 2288++#define ROTW12(x) (x << ((vec){12,12,12,12})) ^ (x >> ((vec){20,20,20,20})) 2289++#else 2290++#define ROTW7(x) (vec)vsriq_n_u32(vshlq_n_u32((uint32x4_t)x,7),(uint32x4_t)x,25) 2291++#define ROTW8(x) (vec)vsriq_n_u32(vshlq_n_u32((uint32x4_t)x,8),(uint32x4_t)x,24) 2292++#define ROTW12(x) (vec)vsriq_n_u32(vshlq_n_u32((uint32x4_t)x,12),(uint32x4_t)x,20) 2293++#endif 2294++#elif __SSE2__ 2295++#include <emmintrin.h> 2296++#define GPR_TOO 0 2297++#if __clang__ 2298++#define VBPI 4 2299++#else 2300++#define VBPI 3 2301++#endif 2302++#define ONE (vec)_mm_set_epi32(0,0,0,1) 2303++#define LOAD(m) (vec)_mm_loadu_si128((__m128i*)(m)) 2304++#define STORE(m,r) _mm_storeu_si128((__m128i*)(m), (__m128i) (r)) 2305++#define ROTV1(x) (vec)_mm_shuffle_epi32((__m128i)x,_MM_SHUFFLE(0,3,2,1)) 2306++#define ROTV2(x) (vec)_mm_shuffle_epi32((__m128i)x,_MM_SHUFFLE(1,0,3,2)) 2307++#define ROTV3(x) (vec)_mm_shuffle_epi32((__m128i)x,_MM_SHUFFLE(2,1,0,3)) 2308++#define ROTW7(x) (vec)(_mm_slli_epi32((__m128i)x, 7) ^ _mm_srli_epi32((__m128i)x,25)) 2309++#define ROTW12(x) (vec)(_mm_slli_epi32((__m128i)x,12) ^ _mm_srli_epi32((__m128i)x,20)) 2310++#if __SSSE3__ 2311++#include <tmmintrin.h> 2312++#define ROTW8(x) (vec)_mm_shuffle_epi8((__m128i)x,_mm_set_epi8(14,13,12,15,10,9,8,11,6,5,4,7,2,1,0,3)) 2313++#define ROTW16(x) (vec)_mm_shuffle_epi8((__m128i)x,_mm_set_epi8(13,12,15,14,9,8,11,10,5,4,7,6,1,0,3,2)) 2314++#else 2315++#define ROTW8(x) (vec)(_mm_slli_epi32((__m128i)x, 8) ^ _mm_srli_epi32((__m128i)x,24)) 2316++#define ROTW16(x) (vec)(_mm_slli_epi32((__m128i)x,16) ^ _mm_srli_epi32((__m128i)x,16)) 2317++#endif 2318++#else 2319++#error -- Implementation supports only machines with neon or SSE2 2320++#endif 2321++ 2322++#ifndef REVV_BE 2323++#define REVV_BE(x) (x) 2324++#endif 2325++ 2326++#ifndef REVW_BE 2327++#define REVW_BE(x) (x) 2328++#endif 2329++ 2330++#define BPI (VBPI + GPR_TOO) /* Blocks computed per loop iteration */ 2331++ 2332++#define DQROUND_VECTORS(a,b,c,d) \ 2333++ a += b; d ^= a; d = ROTW16(d); \ 2334++ c += d; b ^= c; b = ROTW12(b); \ 2335++ a += b; d ^= a; d = ROTW8(d); \ 2336++ c += d; b ^= c; b = ROTW7(b); \ 2337++ b = ROTV1(b); c = ROTV2(c); d = ROTV3(d); \ 2338++ a += b; d ^= a; d = ROTW16(d); \ 2339++ c += d; b ^= c; b = ROTW12(b); \ 2340++ a += b; d ^= a; d = ROTW8(d); \ 2341++ c += d; b ^= c; b = ROTW7(b); \ 2342++ b = ROTV3(b); c = ROTV2(c); d = ROTV1(d); 2343++ 2344++#define QROUND_WORDS(a,b,c,d) \ 2345++ a = a+b; d ^= a; d = d<<16 | d>>16; \ 2346++ c = c+d; b ^= c; b = b<<12 | b>>20; \ 2347++ a = a+b; d ^= a; d = d<< 8 | d>>24; \ 2348++ c = c+d; b ^= c; b = b<< 7 | b>>25; 2349++ 2350++#define WRITE_XOR(in, op, d, v0, v1, v2, v3) \ 2351++ STORE(op + d + 0, LOAD(in + d + 0) ^ REVV_BE(v0)); \ 2352++ STORE(op + d + 4, LOAD(in + d + 4) ^ REVV_BE(v1)); \ 2353++ STORE(op + d + 8, LOAD(in + d + 8) ^ REVV_BE(v2)); \ 2354++ STORE(op + d +12, LOAD(in + d +12) ^ REVV_BE(v3)); 2355++ 2356++void CRYPTO_chacha_20( 2357++ unsigned char *out, 2358++ const unsigned char *in, 2359++ size_t inlen, 2360++ const unsigned char key[32], 2361++ const unsigned char nonce[8], 2362++ size_t counter) 2363++ { 2364++ unsigned iters, i, *op=(unsigned *)out, *ip=(unsigned *)in, *kp; 2365++#if defined(__ARM_NEON__) 2366++ unsigned *np; 2367++#endif 2368++ vec s0, s1, s2, s3; 2369++#if !defined(__ARM_NEON__) && !defined(__SSE2__) 2370++ __attribute__ ((aligned (16))) unsigned key[8], nonce[4]; 2371++#endif 2372++ __attribute__ ((aligned (16))) unsigned chacha_const[] = 2373++ {0x61707865,0x3320646E,0x79622D32,0x6B206574}; 2374++#if defined(__ARM_NEON__) || defined(__SSE2__) 2375++ kp = (unsigned *)key; 2376++#else 2377++ ((vec *)key)[0] = REVV_BE(((vec *)key)[0]); 2378++ ((vec *)key)[1] = REVV_BE(((vec *)key)[1]); 2379++ nonce[0] = REVW_BE(((unsigned *)nonce)[0]); 2380++ nonce[1] = REVW_BE(((unsigned *)nonce)[1]); 2381++ nonce[2] = REVW_BE(((unsigned *)nonce)[2]); 2382++ nonce[3] = REVW_BE(((unsigned *)nonce)[3]); 2383++ kp = (unsigned *)key; 2384++ np = (unsigned *)nonce; 2385++#endif 2386++#if defined(__ARM_NEON__) 2387++ np = (unsigned*) nonce; 2388++#endif 2389++ s0 = LOAD(chacha_const); 2390++ s1 = LOAD(&((vec*)kp)[0]); 2391++ s2 = LOAD(&((vec*)kp)[1]); 2392++ s3 = (vec){ 2393++ counter & 0xffffffff, 2394++#if __ARM_NEON__ 2395++ 0, /* can't right-shift 32 bits on a 32-bit system. */ 2396++#else 2397++ counter >> 32, 2398++#endif 2399++ ((uint32_t*)nonce)[0], 2400++ ((uint32_t*)nonce)[1] 2401++ }; 2402++ 2403++ for (iters = 0; iters < inlen/(BPI*64); iters++) 2404++ { 2405++#if GPR_TOO 2406++ register unsigned x0, x1, x2, x3, x4, x5, x6, x7, x8, 2407++ x9, x10, x11, x12, x13, x14, x15; 2408++#endif 2409++#if VBPI > 2 2410++ vec v8,v9,v10,v11; 2411++#endif 2412++#if VBPI > 3 2413++ vec v12,v13,v14,v15; 2414++#endif 2415++ 2416++ vec v0,v1,v2,v3,v4,v5,v6,v7; 2417++ v4 = v0 = s0; v5 = v1 = s1; v6 = v2 = s2; v3 = s3; 2418++ v7 = v3 + ONE; 2419++#if VBPI > 2 2420++ v8 = v4; v9 = v5; v10 = v6; 2421++ v11 = v7 + ONE; 2422++#endif 2423++#if VBPI > 3 2424++ v12 = v8; v13 = v9; v14 = v10; 2425++ v15 = v11 + ONE; 2426++#endif 2427++#if GPR_TOO 2428++ x0 = chacha_const[0]; x1 = chacha_const[1]; 2429++ x2 = chacha_const[2]; x3 = chacha_const[3]; 2430++ x4 = kp[0]; x5 = kp[1]; x6 = kp[2]; x7 = kp[3]; 2431++ x8 = kp[4]; x9 = kp[5]; x10 = kp[6]; x11 = kp[7]; 2432++ x12 = counter+BPI*iters+(BPI-1); x13 = 0; 2433++ x14 = np[0]; x15 = np[1]; 2434++#endif 2435++ for (i = CHACHA_RNDS/2; i; i--) 2436++ { 2437++ DQROUND_VECTORS(v0,v1,v2,v3) 2438++ DQROUND_VECTORS(v4,v5,v6,v7) 2439++#if VBPI > 2 2440++ DQROUND_VECTORS(v8,v9,v10,v11) 2441++#endif 2442++#if VBPI > 3 2443++ DQROUND_VECTORS(v12,v13,v14,v15) 2444++#endif 2445++#if GPR_TOO 2446++ QROUND_WORDS( x0, x4, x8,x12) 2447++ QROUND_WORDS( x1, x5, x9,x13) 2448++ QROUND_WORDS( x2, x6,x10,x14) 2449++ QROUND_WORDS( x3, x7,x11,x15) 2450++ QROUND_WORDS( x0, x5,x10,x15) 2451++ QROUND_WORDS( x1, x6,x11,x12) 2452++ QROUND_WORDS( x2, x7, x8,x13) 2453++ QROUND_WORDS( x3, x4, x9,x14) 2454++#endif 2455++ } 2456++ 2457++ WRITE_XOR(ip, op, 0, v0+s0, v1+s1, v2+s2, v3+s3) 2458++ s3 += ONE; 2459++ WRITE_XOR(ip, op, 16, v4+s0, v5+s1, v6+s2, v7+s3) 2460++ s3 += ONE; 2461++#if VBPI > 2 2462++ WRITE_XOR(ip, op, 32, v8+s0, v9+s1, v10+s2, v11+s3) 2463++ s3 += ONE; 2464++#endif 2465++#if VBPI > 3 2466++ WRITE_XOR(ip, op, 48, v12+s0, v13+s1, v14+s2, v15+s3) 2467++ s3 += ONE; 2468++#endif 2469++ ip += VBPI*16; 2470++ op += VBPI*16; 2471++#if GPR_TOO 2472++ op[0] = REVW_BE(REVW_BE(ip[0]) ^ (x0 + chacha_const[0])); 2473++ op[1] = REVW_BE(REVW_BE(ip[1]) ^ (x1 + chacha_const[1])); 2474++ op[2] = REVW_BE(REVW_BE(ip[2]) ^ (x2 + chacha_const[2])); 2475++ op[3] = REVW_BE(REVW_BE(ip[3]) ^ (x3 + chacha_const[3])); 2476++ op[4] = REVW_BE(REVW_BE(ip[4]) ^ (x4 + kp[0])); 2477++ op[5] = REVW_BE(REVW_BE(ip[5]) ^ (x5 + kp[1])); 2478++ op[6] = REVW_BE(REVW_BE(ip[6]) ^ (x6 + kp[2])); 2479++ op[7] = REVW_BE(REVW_BE(ip[7]) ^ (x7 + kp[3])); 2480++ op[8] = REVW_BE(REVW_BE(ip[8]) ^ (x8 + kp[4])); 2481++ op[9] = REVW_BE(REVW_BE(ip[9]) ^ (x9 + kp[5])); 2482++ op[10] = REVW_BE(REVW_BE(ip[10]) ^ (x10 + kp[6])); 2483++ op[11] = REVW_BE(REVW_BE(ip[11]) ^ (x11 + kp[7])); 2484++ op[12] = REVW_BE(REVW_BE(ip[12]) ^ (x12 + counter+BPI*iters+(BPI-1))); 2485++ op[13] = REVW_BE(REVW_BE(ip[13]) ^ (x13)); 2486++ op[14] = REVW_BE(REVW_BE(ip[14]) ^ (x14 + np[0])); 2487++ op[15] = REVW_BE(REVW_BE(ip[15]) ^ (x15 + np[1])); 2488++ s3 += ONE; 2489++ ip += 16; 2490++ op += 16; 2491++#endif 2492++ } 2493++ 2494++ for (iters = inlen%(BPI*64)/64; iters != 0; iters--) 2495++ { 2496++ vec v0 = s0, v1 = s1, v2 = s2, v3 = s3; 2497++ for (i = CHACHA_RNDS/2; i; i--) 2498++ { 2499++ DQROUND_VECTORS(v0,v1,v2,v3); 2500++ } 2501++ WRITE_XOR(ip, op, 0, v0+s0, v1+s1, v2+s2, v3+s3) 2502++ s3 += ONE; 2503++ ip += 16; 2504++ op += 16; 2505++ } 2506++ 2507++ inlen = inlen % 64; 2508++ if (inlen) 2509++ { 2510++ __attribute__ ((aligned (16))) vec buf[4]; 2511++ vec v0,v1,v2,v3; 2512++ v0 = s0; v1 = s1; v2 = s2; v3 = s3; 2513++ for (i = CHACHA_RNDS/2; i; i--) 2514++ { 2515++ DQROUND_VECTORS(v0,v1,v2,v3); 2516++ } 2517++ 2518++ if (inlen >= 16) 2519++ { 2520++ STORE(op + 0, LOAD(ip + 0) ^ REVV_BE(v0 + s0)); 2521++ if (inlen >= 32) 2522++ { 2523++ STORE(op + 4, LOAD(ip + 4) ^ REVV_BE(v1 + s1)); 2524++ if (inlen >= 48) 2525++ { 2526++ STORE(op + 8, LOAD(ip + 8) ^ 2527++ REVV_BE(v2 + s2)); 2528++ buf[3] = REVV_BE(v3 + s3); 2529++ } 2530++ else 2531++ buf[2] = REVV_BE(v2 + s2); 2532++ } 2533++ else 2534++ buf[1] = REVV_BE(v1 + s1); 2535++ } 2536++ else 2537++ buf[0] = REVV_BE(v0 + s0); 2538++ 2539++ for (i=inlen & ~15; i<inlen; i++) 2540++ ((char *)op)[i] = ((char *)ip)[i] ^ ((char *)buf)[i]; 2541++ } 2542++ } 2543++ 2544++#endif /* !OPENSSL_NO_CHACHA */ 2545+diff --git a/crypto/chacha/chachatest.c b/crypto/chacha/chachatest.c 2546+new file mode 100644 2547+index 0000000..b2a9389 2548+--- /dev/null 2549++++ b/crypto/chacha/chachatest.c 2550+@@ -0,0 +1,211 @@ 2551++/* 2552++ * Chacha stream algorithm. 2553++ * 2554++ * Created on: Jun, 2013 2555++ * Author: Elie Bursztein (elieb@google.com) 2556++ * 2557++ * Adapted from the estream code by D. Bernstein. 2558++ */ 2559++/* ==================================================================== 2560++ * Copyright (c) 2011-2013 The OpenSSL Project. All rights reserved. 2561++ * 2562++ * Redistribution and use in source and binary forms, with or without 2563++ * modification, are permitted provided that the following conditions 2564++ * are met: 2565++ * 2566++ * 1. Redistributions of source code must retain the above copyright 2567++ * notice, this list of conditions and the following disclaimer. 2568++ * 2569++ * 2. Redistributions in binary form must reproduce the above copyright 2570++ * notice, this list of conditions and the following disclaimer in 2571++ * the documentation and/or other materials provided with the 2572++ * distribution. 2573++ * 2574++ * 3. All advertising materials mentioning features or use of this 2575++ * software must display the following acknowledgment: 2576++ * "This product includes software developed by the OpenSSL Project 2577++ * for use in the OpenSSL Toolkit. (http://www.OpenSSL.org/)" 2578++ * 2579++ * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to 2580++ * endorse or promote products derived from this software without 2581++ * prior written permission. For written permission, please contact 2582++ * licensing@OpenSSL.org. 2583++ * 2584++ * 5. Products derived from this software may not be called "OpenSSL" 2585++ * nor may "OpenSSL" appear in their names without prior written 2586++ * permission of the OpenSSL Project. 2587++ * 2588++ * 6. Redistributions of any form whatsoever must retain the following 2589++ * acknowledgment: 2590++ * "This product includes software developed by the OpenSSL Project 2591++ * for use in the OpenSSL Toolkit (http://www.OpenSSL.org/)" 2592++ * 2593++ * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY 2594++ * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 2595++ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 2596++ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR 2597++ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 2598++ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 2599++ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 2600++ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 2601++ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 2602++ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 2603++ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED 2604++ * OF THE POSSIBILITY OF SUCH DAMAGE. 2605++ * ==================================================================== 2606++ */ 2607++ 2608++#include <stdio.h> 2609++#include <stdlib.h> 2610++#include <string.h> 2611++#include <stdint.h> 2612++ 2613++#include <openssl/chacha.h> 2614++ 2615++struct chacha_test { 2616++ const char *keyhex; 2617++ const char *noncehex; 2618++ const char *outhex; 2619++}; 2620++ 2621++static const struct chacha_test chacha_tests[] = { 2622++ { 2623++ "0000000000000000000000000000000000000000000000000000000000000000", 2624++ "0000000000000000", 2625++ "76b8e0ada0f13d90405d6ae55386bd28bdd219b8a08ded1aa836efcc8b770dc7da41597c5157488d7724e03fb8d84a376a43b8f41518a11cc387b669b2ee6586", 2626++ }, 2627++ { 2628++ "0000000000000000000000000000000000000000000000000000000000000001", 2629++ "0000000000000000", 2630++ "4540f05a9f1fb296d7736e7b208e3c96eb4fe1834688d2604f450952ed432d41bbe2a0b6ea7566d2a5d1e7e20d42af2c53d792b1c43fea817e9ad275ae546963", 2631++ }, 2632++ { 2633++ "0000000000000000000000000000000000000000000000000000000000000000", 2634++ "0000000000000001", 2635++ "de9cba7bf3d69ef5e786dc63973f653a0b49e015adbff7134fcb7df137821031e85a050278a7084527214f73efc7fa5b5277062eb7a0433e445f41e31afab757", 2636++ }, 2637++ { 2638++ "0000000000000000000000000000000000000000000000000000000000000000", 2639++ "0100000000000000", 2640++ "ef3fdfd6c61578fbf5cf35bd3dd33b8009631634d21e42ac33960bd138e50d32111e4caf237ee53ca8ad6426194a88545ddc497a0b466e7d6bbdb0041b2f586b", 2641++ }, 2642++ { 2643++ "000102030405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1f", 2644++ "0001020304050607", 2645++ "f798a189f195e66982105ffb640bb7757f579da31602fc93ec01ac56f85ac3c134a4547b733b46413042c9440049176905d3be59ea1c53f15916155c2be8241a38008b9a26bc35941e2444177c8ade6689de95264986d95889fb60e84629c9bd9a5acb1cc118be563eb9b3a4a472f82e09a7e778492b562ef7130e88dfe031c79db9d4f7c7a899151b9a475032b63fc385245fe054e3dd5a97a5f576fe064025d3ce042c566ab2c507b138db853e3d6959660996546cc9c4a6eafdc777c040d70eaf46f76dad3979e5c5360c3317166a1c894c94a371876a94df7628fe4eaaf2ccb27d5aaae0ad7ad0f9d4b6ad3b54098746d4524d38407a6deb", 2646++ }, 2647++}; 2648++ 2649++static unsigned char hex_digit(char h) 2650++ { 2651++ if (h >= '0' && h <= '9') 2652++ return h - '0'; 2653++ else if (h >= 'a' && h <= 'f') 2654++ return h - 'a' + 10; 2655++ else if (h >= 'A' && h <= 'F') 2656++ return h - 'A' + 10; 2657++ else 2658++ abort(); 2659++ } 2660++ 2661++static void hex_decode(unsigned char *out, const char* hex) 2662++ { 2663++ size_t j = 0; 2664++ 2665++ while (*hex != 0) 2666++ { 2667++ unsigned char v = hex_digit(*hex++); 2668++ v <<= 4; 2669++ v |= hex_digit(*hex++); 2670++ out[j++] = v; 2671++ } 2672++ } 2673++ 2674++static void hexdump(unsigned char *a, size_t len) 2675++ { 2676++ size_t i; 2677++ 2678++ for (i = 0; i < len; i++) 2679++ printf("%02x", a[i]); 2680++ } 2681++ 2682++/* misalign returns a pointer that points 0 to 15 bytes into |in| such that the 2683++ * returned pointer has alignment 1 mod 16. */ 2684++static void* misalign(void* in) 2685++ { 2686++ intptr_t x = (intptr_t) in; 2687++ x += (17 - (x % 16)) % 16; 2688++ return (void*) x; 2689++ } 2690++ 2691++int main() 2692++ { 2693++ static const unsigned num_tests = 2694++ sizeof(chacha_tests) / sizeof(struct chacha_test); 2695++ unsigned i; 2696++ unsigned char key_bytes[32 + 16]; 2697++ unsigned char nonce_bytes[8 + 16] = {0}; 2698++ 2699++ unsigned char *key = misalign(key_bytes); 2700++ unsigned char *nonce = misalign(nonce_bytes); 2701++ 2702++ for (i = 0; i < num_tests; i++) 2703++ { 2704++ const struct chacha_test *test = &chacha_tests[i]; 2705++ unsigned char *expected, *out_bytes, *zero_bytes, *out, *zeros; 2706++ size_t len = strlen(test->outhex); 2707++ 2708++ if (strlen(test->keyhex) != 32*2 || 2709++ strlen(test->noncehex) != 8*2 || 2710++ (len & 1) == 1) 2711++ return 1; 2712++ 2713++ len /= 2; 2714++ 2715++ hex_decode(key, test->keyhex); 2716++ hex_decode(nonce, test->noncehex); 2717++ 2718++ expected = malloc(len); 2719++ out_bytes = malloc(len+16); 2720++ zero_bytes = malloc(len+16); 2721++ /* Attempt to test unaligned inputs. */ 2722++ out = misalign(out_bytes); 2723++ zeros = misalign(zero_bytes); 2724++ memset(zeros, 0, len); 2725++ 2726++ hex_decode(expected, test->outhex); 2727++ CRYPTO_chacha_20(out, zeros, len, key, nonce, 0); 2728++ 2729++ if (memcmp(out, expected, len) != 0) 2730++ { 2731++ printf("ChaCha20 test #%d failed.\n", i); 2732++ printf("got: "); 2733++ hexdump(out, len); 2734++ printf("\nexpected: "); 2735++ hexdump(expected, len); 2736++ printf("\n"); 2737++ return 1; 2738++ } 2739++ 2740++ /* The last test has a large output. We test whether the 2741++ * counter works as expected by skipping the first 64 bytes of 2742++ * it. */ 2743++ if (i == num_tests - 1) 2744++ { 2745++ CRYPTO_chacha_20(out, zeros, len - 64, key, nonce, 1); 2746++ if (memcmp(out, expected + 64, len - 64) != 0) 2747++ { 2748++ printf("ChaCha20 skip test failed.\n"); 2749++ return 1; 2750++ } 2751++ } 2752++ 2753++ free(expected); 2754++ free(zero_bytes); 2755++ free(out_bytes); 2756++ } 2757++ 2758++ 2759++ printf("PASS\n"); 2760++ return 0; 2761++ } 2762+diff --git a/crypto/evp/Makefile b/crypto/evp/Makefile 2763+index b73038d..86b0504 100644 2764+--- a/crypto/evp/Makefile 2765++++ b/crypto/evp/Makefile 2766+@@ -29,7 +29,8 @@ LIBSRC= encode.c digest.c evp_enc.c evp_key.c evp_acnf.c evp_cnf.c \ 2767+ c_all.c c_allc.c c_alld.c evp_lib.c bio_ok.c \ 2768+ evp_pkey.c evp_pbe.c p5_crpt.c p5_crpt2.c \ 2769+ e_old.c pmeth_lib.c pmeth_fn.c pmeth_gn.c m_sigver.c evp_fips.c \ 2770+- e_aes_cbc_hmac_sha1.c e_rc4_hmac_md5.c evp_aead.c 2771++ e_aes_cbc_hmac_sha1.c e_rc4_hmac_md5.c evp_aead.c \ 2772++ e_chacha20poly1305.c 2773+ 2774+ LIBOBJ= encode.o digest.o evp_enc.o evp_key.o evp_acnf.o evp_cnf.o \ 2775+ e_des.o e_bf.o e_idea.o e_des3.o e_camellia.o\ 2776+@@ -42,7 +43,7 @@ LIBOBJ= encode.o digest.o evp_enc.o evp_key.o evp_acnf.o evp_cnf.o \ 2777+ c_all.o c_allc.o c_alld.o evp_lib.o bio_ok.o \ 2778+ evp_pkey.o evp_pbe.o p5_crpt.o p5_crpt2.o \ 2779+ e_old.o pmeth_lib.o pmeth_fn.o pmeth_gn.o m_sigver.o evp_fips.o \ 2780+- e_aes_cbc_hmac_sha1.o e_rc4_hmac_md5.o evp_aead.o 2781++ e_aes_cbc_hmac_sha1.o e_rc4_hmac_md5.o evp_aead.o e_chacha20poly1305.o 2782+ 2783+ SRC= $(LIBSRC) 2784+ 2785+@@ -239,6 +240,21 @@ e_cast.o: ../../include/openssl/objects.h ../../include/openssl/opensslconf.h 2786+ e_cast.o: ../../include/openssl/opensslv.h ../../include/openssl/ossl_typ.h 2787+ e_cast.o: ../../include/openssl/safestack.h ../../include/openssl/stack.h 2788+ e_cast.o: ../../include/openssl/symhacks.h ../cryptlib.h e_cast.c evp_locl.h 2789++e_chacha20poly1305.o: ../../include/openssl/asn1.h ../../include/openssl/bio.h 2790++e_chacha20poly1305.o: ../../include/openssl/chacha.h 2791++e_chacha20poly1305.o: ../../include/openssl/crypto.h 2792++e_chacha20poly1305.o: ../../include/openssl/e_os2.h ../../include/openssl/err.h 2793++e_chacha20poly1305.o: ../../include/openssl/evp.h ../../include/openssl/lhash.h 2794++e_chacha20poly1305.o: ../../include/openssl/obj_mac.h 2795++e_chacha20poly1305.o: ../../include/openssl/objects.h 2796++e_chacha20poly1305.o: ../../include/openssl/opensslconf.h 2797++e_chacha20poly1305.o: ../../include/openssl/opensslv.h 2798++e_chacha20poly1305.o: ../../include/openssl/ossl_typ.h 2799++e_chacha20poly1305.o: ../../include/openssl/poly1305.h 2800++e_chacha20poly1305.o: ../../include/openssl/safestack.h 2801++e_chacha20poly1305.o: ../../include/openssl/stack.h 2802++e_chacha20poly1305.o: ../../include/openssl/symhacks.h e_chacha20poly1305.c 2803++e_chacha20poly1305.o: evp_locl.h 2804+ e_des.o: ../../e_os.h ../../include/openssl/asn1.h ../../include/openssl/bio.h 2805+ e_des.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h 2806+ e_des.o: ../../include/openssl/des.h ../../include/openssl/des_old.h 2807+@@ -258,9 +274,10 @@ e_des3.o: ../../include/openssl/evp.h ../../include/openssl/lhash.h 2808+ e_des3.o: ../../include/openssl/obj_mac.h ../../include/openssl/objects.h 2809+ e_des3.o: ../../include/openssl/opensslconf.h ../../include/openssl/opensslv.h 2810+ e_des3.o: ../../include/openssl/ossl_typ.h ../../include/openssl/rand.h 2811+-e_des3.o: ../../include/openssl/safestack.h ../../include/openssl/stack.h 2812+-e_des3.o: ../../include/openssl/symhacks.h ../../include/openssl/ui.h 2813+-e_des3.o: ../../include/openssl/ui_compat.h ../cryptlib.h e_des3.c evp_locl.h 2814++e_des3.o: ../../include/openssl/safestack.h ../../include/openssl/sha.h 2815++e_des3.o: ../../include/openssl/stack.h ../../include/openssl/symhacks.h 2816++e_des3.o: ../../include/openssl/ui.h ../../include/openssl/ui_compat.h 2817++e_des3.o: ../cryptlib.h e_des3.c evp_locl.h 2818+ e_idea.o: ../../e_os.h ../../include/openssl/asn1.h ../../include/openssl/bio.h 2819+ e_idea.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h 2820+ e_idea.o: ../../include/openssl/e_os2.h ../../include/openssl/err.h 2821+@@ -356,6 +373,14 @@ evp_acnf.o: ../../include/openssl/opensslconf.h 2822+ evp_acnf.o: ../../include/openssl/opensslv.h ../../include/openssl/ossl_typ.h 2823+ evp_acnf.o: ../../include/openssl/safestack.h ../../include/openssl/stack.h 2824+ evp_acnf.o: ../../include/openssl/symhacks.h ../cryptlib.h evp_acnf.c 2825++evp_aead.o: ../../include/openssl/asn1.h ../../include/openssl/bio.h 2826++evp_aead.o: ../../include/openssl/crypto.h ../../include/openssl/e_os2.h 2827++evp_aead.o: ../../include/openssl/err.h ../../include/openssl/evp.h 2828++evp_aead.o: ../../include/openssl/lhash.h ../../include/openssl/obj_mac.h 2829++evp_aead.o: ../../include/openssl/objects.h ../../include/openssl/opensslconf.h 2830++evp_aead.o: ../../include/openssl/opensslv.h ../../include/openssl/ossl_typ.h 2831++evp_aead.o: ../../include/openssl/safestack.h ../../include/openssl/stack.h 2832++evp_aead.o: ../../include/openssl/symhacks.h evp_aead.c 2833+ evp_cnf.o: ../../e_os.h ../../include/openssl/asn1.h 2834+ evp_cnf.o: ../../include/openssl/bio.h ../../include/openssl/buffer.h 2835+ evp_cnf.o: ../../include/openssl/conf.h ../../include/openssl/crypto.h 2836+diff --git a/crypto/evp/e_chacha20poly1305.c b/crypto/evp/e_chacha20poly1305.c 2837+new file mode 100644 2838+index 0000000..1c0c0fb 2839+--- /dev/null 2840++++ b/crypto/evp/e_chacha20poly1305.c 2841+@@ -0,0 +1,267 @@ 2842++/* ==================================================================== 2843++ * Copyright (c) 2013 The OpenSSL Project. All rights reserved. 2844++ * 2845++ * Redistribution and use in source and binary forms, with or without 2846++ * modification, are permitted provided that the following conditions 2847++ * are met: 2848++ * 2849++ * 1. Redistributions of source code must retain the above copyright 2850++ * notice, this list of conditions and the following disclaimer. 2851++ * 2852++ * 2. Redistributions in binary form must reproduce the above copyright 2853++ * notice, this list of conditions and the following disclaimer in 2854++ * the documentation and/or other materials provided with the 2855++ * distribution. 2856++ * 2857++ * 3. All advertising materials mentioning features or use of this 2858++ * software must display the following acknowledgment: 2859++ * "This product includes software developed by the OpenSSL Project 2860++ * for use in the OpenSSL Toolkit. (http://www.openssl.org/)" 2861++ * 2862++ * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to 2863++ * endorse or promote products derived from this software without 2864++ * prior written permission. For written permission, please contact 2865++ * openssl-core@openssl.org. 2866++ * 2867++ * 5. Products derived from this software may not be called "OpenSSL" 2868++ * nor may "OpenSSL" appear in their names without prior written 2869++ * permission of the OpenSSL Project. 2870++ * 2871++ * 6. Redistributions of any form whatsoever must retain the following 2872++ * acknowledgment: 2873++ * "This product includes software developed by the OpenSSL Project 2874++ * for use in the OpenSSL Toolkit (http://www.openssl.org/)" 2875++ * 2876++ * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY 2877++ * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 2878++ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 2879++ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR 2880++ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 2881++ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 2882++ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 2883++ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 2884++ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 2885++ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 2886++ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED 2887++ * OF THE POSSIBILITY OF SUCH DAMAGE. 2888++ * ==================================================================== 2889++ * 2890++ */ 2891++ 2892++#include <stdint.h> 2893++#include <string.h> 2894++#include <openssl/opensslconf.h> 2895++ 2896++#if !defined(OPENSSL_NO_CHACHA) && !defined(OPENSSL_NO_POLY1305) 2897++ 2898++#include <openssl/chacha.h> 2899++#include <openssl/poly1305.h> 2900++#include <openssl/evp.h> 2901++#include <openssl/err.h> 2902++#include "evp_locl.h" 2903++ 2904++#define POLY1305_TAG_LEN 16 2905++#define CHACHA20_NONCE_LEN 8 2906++ 2907++struct aead_chacha20_poly1305_ctx 2908++ { 2909++ unsigned char key[32]; 2910++ unsigned char tag_len; 2911++ }; 2912++ 2913++static int aead_chacha20_poly1305_init(EVP_AEAD_CTX *ctx, const unsigned char *key, size_t key_len, size_t tag_len) 2914++ { 2915++ struct aead_chacha20_poly1305_ctx *c20_ctx; 2916++ 2917++ if (tag_len == 0) 2918++ tag_len = POLY1305_TAG_LEN; 2919++ 2920++ if (tag_len > POLY1305_TAG_LEN) 2921++ { 2922++ EVPerr(EVP_F_AEAD_CHACHA20_POLY1305_INIT, EVP_R_TOO_LARGE); 2923++ return 0; 2924++ } 2925++ 2926++ if (key_len != sizeof(c20_ctx->key)) 2927++ return 0; /* internal error - EVP_AEAD_CTX_init should catch this. */ 2928++ 2929++ c20_ctx = OPENSSL_malloc(sizeof(struct aead_chacha20_poly1305_ctx)); 2930++ if (c20_ctx == NULL) 2931++ return 0; 2932++ 2933++ memcpy(&c20_ctx->key[0], key, key_len); 2934++ c20_ctx->tag_len = tag_len; 2935++ ctx->aead_state = c20_ctx; 2936++ 2937++ return 1; 2938++ } 2939++ 2940++static void aead_chacha20_poly1305_cleanup(EVP_AEAD_CTX *ctx) 2941++ { 2942++ struct aead_chacha20_poly1305_ctx *c20_ctx = ctx->aead_state; 2943++ OPENSSL_cleanse(c20_ctx->key, sizeof(c20_ctx->key)); 2944++ OPENSSL_free(c20_ctx); 2945++ } 2946++ 2947++static void poly1305_update_with_length(poly1305_state *poly1305, 2948++ const unsigned char *data, size_t data_len) 2949++ { 2950++ size_t j = data_len; 2951++ unsigned char length_bytes[8]; 2952++ unsigned i; 2953++ 2954++ for (i = 0; i < sizeof(length_bytes); i++) 2955++ { 2956++ length_bytes[i] = j; 2957++ j >>= 8; 2958++ } 2959++ 2960++ CRYPTO_poly1305_update(poly1305, data, data_len); 2961++ CRYPTO_poly1305_update(poly1305, length_bytes, sizeof(length_bytes)); 2962++} 2963++ 2964++#if __arm__ 2965++#define ALIGNED __attribute__((aligned(16))) 2966++#else 2967++#define ALIGNED 2968++#endif 2969++ 2970++static ssize_t aead_chacha20_poly1305_seal(const EVP_AEAD_CTX *ctx, 2971++ unsigned char *out, size_t max_out_len, 2972++ const unsigned char *nonce, size_t nonce_len, 2973++ const unsigned char *in, size_t in_len, 2974++ const unsigned char *ad, size_t ad_len) 2975++ { 2976++ const struct aead_chacha20_poly1305_ctx *c20_ctx = ctx->aead_state; 2977++ unsigned char poly1305_key[32] ALIGNED; 2978++ poly1305_state poly1305; 2979++ const uint64_t in_len_64 = in_len; 2980++ 2981++ /* The underlying ChaCha implementation may not overflow the block 2982++ * counter into the second counter word. Therefore we disallow 2983++ * individual operations that work on more than 2TB at a time. 2984++ * |in_len_64| is needed because, on 32-bit platforms, size_t is only 2985++ * 32-bits and this produces a warning because it's always false. 2986++ * Casting to uint64_t inside the conditional is not sufficient to stop 2987++ * the warning. */ 2988++ if (in_len_64 >= (1ull << 32)*64-64) 2989++ { 2990++ EVPerr(EVP_F_AEAD_CHACHA20_POLY1305_SEAL, EVP_R_TOO_LARGE); 2991++ return -1; 2992++ } 2993++ 2994++ if (max_out_len < in_len + c20_ctx->tag_len) 2995++ { 2996++ EVPerr(EVP_F_AEAD_CHACHA20_POLY1305_SEAL, EVP_R_BUFFER_TOO_SMALL); 2997++ return -1; 2998++ } 2999++ 3000++ if (nonce_len != CHACHA20_NONCE_LEN) 3001++ { 3002++ EVPerr(EVP_F_AEAD_CHACHA20_POLY1305_SEAL, EVP_R_IV_TOO_LARGE); 3003++ return -1; 3004++ } 3005++ 3006++ memset(poly1305_key, 0, sizeof(poly1305_key)); 3007++ CRYPTO_chacha_20(poly1305_key, poly1305_key, sizeof(poly1305_key), c20_ctx->key, nonce, 0); 3008++ 3009++ CRYPTO_poly1305_init(&poly1305, poly1305_key); 3010++ poly1305_update_with_length(&poly1305, ad, ad_len); 3011++ CRYPTO_chacha_20(out, in, in_len, c20_ctx->key, nonce, 1); 3012++ poly1305_update_with_length(&poly1305, out, in_len); 3013++ 3014++ if (c20_ctx->tag_len != POLY1305_TAG_LEN) 3015++ { 3016++ unsigned char tag[POLY1305_TAG_LEN]; 3017++ CRYPTO_poly1305_finish(&poly1305, tag); 3018++ memcpy(out + in_len, tag, c20_ctx->tag_len); 3019++ return in_len + c20_ctx->tag_len; 3020++ } 3021++ 3022++ CRYPTO_poly1305_finish(&poly1305, out + in_len); 3023++ return in_len + POLY1305_TAG_LEN; 3024++ } 3025++ 3026++static ssize_t aead_chacha20_poly1305_open(const EVP_AEAD_CTX *ctx, 3027++ unsigned char *out, size_t max_out_len, 3028++ const unsigned char *nonce, size_t nonce_len, 3029++ const unsigned char *in, size_t in_len, 3030++ const unsigned char *ad, size_t ad_len) 3031++ { 3032++ const struct aead_chacha20_poly1305_ctx *c20_ctx = ctx->aead_state; 3033++ unsigned char mac[POLY1305_TAG_LEN]; 3034++ unsigned char poly1305_key[32] ALIGNED; 3035++ size_t out_len; 3036++ poly1305_state poly1305; 3037++ const uint64_t in_len_64 = in_len; 3038++ 3039++ if (in_len < c20_ctx->tag_len) 3040++ { 3041++ EVPerr(EVP_F_AEAD_CHACHA20_POLY1305_OPEN, EVP_R_BAD_DECRYPT); 3042++ return -1; 3043++ } 3044++ 3045++ /* The underlying ChaCha implementation may not overflow the block 3046++ * counter into the second counter word. Therefore we disallow 3047++ * individual operations that work on more than 2TB at a time. 3048++ * |in_len_64| is needed because, on 32-bit platforms, size_t is only 3049++ * 32-bits and this produces a warning because it's always false. 3050++ * Casting to uint64_t inside the conditional is not sufficient to stop 3051++ * the warning. */ 3052++ if (in_len_64 >= (1ull << 32)*64-64) 3053++ { 3054++ EVPerr(EVP_F_AEAD_CHACHA20_POLY1305_SEAL, EVP_R_TOO_LARGE); 3055++ return -1; 3056++ } 3057++ 3058++ if (nonce_len != CHACHA20_NONCE_LEN) 3059++ { 3060++ EVPerr(EVP_F_AEAD_CHACHA20_POLY1305_OPEN, EVP_R_IV_TOO_LARGE); 3061++ return -1; 3062++ } 3063++ 3064++ out_len = in_len - c20_ctx->tag_len; 3065++ 3066++ if (max_out_len < out_len) 3067++ { 3068++ EVPerr(EVP_F_AEAD_CHACHA20_POLY1305_OPEN, EVP_R_BUFFER_TOO_SMALL); 3069++ return -1; 3070++ } 3071++ 3072++ memset(poly1305_key, 0, sizeof(poly1305_key)); 3073++ CRYPTO_chacha_20(poly1305_key, poly1305_key, sizeof(poly1305_key), c20_ctx->key, nonce, 0); 3074++ 3075++ CRYPTO_poly1305_init(&poly1305, poly1305_key); 3076++ poly1305_update_with_length(&poly1305, ad, ad_len); 3077++ poly1305_update_with_length(&poly1305, in, out_len); 3078++ CRYPTO_poly1305_finish(&poly1305, mac); 3079++ 3080++ if (CRYPTO_memcmp(mac, in + out_len, c20_ctx->tag_len) != 0) 3081++ { 3082++ EVPerr(EVP_F_AEAD_CHACHA20_POLY1305_OPEN, EVP_R_BAD_DECRYPT); 3083++ return -1; 3084++ } 3085++ 3086++ CRYPTO_chacha_20(out, in, out_len, c20_ctx->key, nonce, 1); 3087++ return out_len; 3088++ } 3089++ 3090++static const EVP_AEAD aead_chacha20_poly1305 = 3091++ { 3092++ 32, /* key len */ 3093++ CHACHA20_NONCE_LEN, /* nonce len */ 3094++ POLY1305_TAG_LEN, /* overhead */ 3095++ POLY1305_TAG_LEN, /* max tag length */ 3096++ 3097++ aead_chacha20_poly1305_init, 3098++ aead_chacha20_poly1305_cleanup, 3099++ aead_chacha20_poly1305_seal, 3100++ aead_chacha20_poly1305_open, 3101++ }; 3102++ 3103++const EVP_AEAD *EVP_aead_chacha20_poly1305() 3104++ { 3105++ return &aead_chacha20_poly1305; 3106++ } 3107++ 3108++#endif /* !OPENSSL_NO_CHACHA && !OPENSSL_NO_POLY1305 */ 3109+diff --git a/crypto/evp/evp.h b/crypto/evp/evp.h 3110+index bd10642..7dc1656 100644 3111+--- a/crypto/evp/evp.h 3112++++ b/crypto/evp/evp.h 3113+@@ -1258,6 +1258,11 @@ typedef struct evp_aead_st EVP_AEAD; 3114+ const EVP_AEAD *EVP_aead_aes_128_gcm(void); 3115+ #endif 3116+ 3117++#if !defined(OPENSSL_NO_CHACHA) && !defined(OPENSSL_NO_POLY1305) 3118++/* EVP_aead_chacha20_poly1305 is ChaCha20 with a Poly1305 authenticator. */ 3119++const EVP_AEAD *EVP_aead_chacha20_poly1305(void); 3120++#endif 3121++ 3122+ /* EVP_AEAD_key_length returns the length, in bytes, of the keys used by 3123+ * |aead|. */ 3124+ size_t EVP_AEAD_key_length(const EVP_AEAD *aead); 3125+@@ -1360,6 +1365,9 @@ void ERR_load_EVP_strings(void); 3126+ #define EVP_F_AEAD_AES_128_GCM_INIT 183 3127+ #define EVP_F_AEAD_AES_128_GCM_OPEN 181 3128+ #define EVP_F_AEAD_AES_128_GCM_SEAL 182 3129++#define EVP_F_AEAD_CHACHA20_POLY1305_INIT 187 3130++#define EVP_F_AEAD_CHACHA20_POLY1305_OPEN 184 3131++#define EVP_F_AEAD_CHACHA20_POLY1305_SEAL 183 3132+ #define EVP_F_AEAD_CTX_OPEN 185 3133+ #define EVP_F_AEAD_CTX_SEAL 186 3134+ #define EVP_F_AESNI_INIT_KEY 165 3135+diff --git a/crypto/evp/evp_err.c b/crypto/evp/evp_err.c 3136+index c47969c..fb747e5 100644 3137+--- a/crypto/evp/evp_err.c 3138++++ b/crypto/evp/evp_err.c 3139+@@ -73,6 +73,9 @@ static ERR_STRING_DATA EVP_str_functs[]= 3140+ {ERR_FUNC(EVP_F_AEAD_AES_128_GCM_INIT), "AEAD_AES_128_GCM_INIT"}, 3141+ {ERR_FUNC(EVP_F_AEAD_AES_128_GCM_OPEN), "AEAD_AES_128_GCM_OPEN"}, 3142+ {ERR_FUNC(EVP_F_AEAD_AES_128_GCM_SEAL), "AEAD_AES_128_GCM_SEAL"}, 3143++{ERR_FUNC(EVP_F_AEAD_CHACHA20_POLY1305_INIT), "AEAD_CHACHA20_POLY1305_INIT"}, 3144++{ERR_FUNC(EVP_F_AEAD_CHACHA20_POLY1305_OPEN), "AEAD_CHACHA20_POLY1305_OPEN"}, 3145++{ERR_FUNC(EVP_F_AEAD_CHACHA20_POLY1305_SEAL), "AEAD_CHACHA20_POLY1305_SEAL"}, 3146+ {ERR_FUNC(EVP_F_AEAD_CTX_OPEN), "AEAD_CTX_OPEN"}, 3147+ {ERR_FUNC(EVP_F_AEAD_CTX_SEAL), "AEAD_CTX_SEAL"}, 3148+ {ERR_FUNC(EVP_F_AESNI_INIT_KEY), "AESNI_INIT_KEY"}, 3149+diff --git a/crypto/poly1305/Makefile b/crypto/poly1305/Makefile 3150+new file mode 100644 3151+index 0000000..397d7cd 3152+--- /dev/null 3153++++ b/crypto/poly1305/Makefile 3154+@@ -0,0 +1,81 @@ 3155++# 3156++# OpenSSL/crypto/poly1305/Makefile 3157++# 3158++ 3159++DIR= poly1305 3160++TOP= ../.. 3161++CC= cc 3162++CPP= $(CC) -E 3163++INCLUDES= 3164++CFLAG=-g 3165++AR= ar r 3166++ 3167++POLY1305=poly1305_vec.o 3168++ 3169++CFLAGS= $(INCLUDES) $(CFLAG) 3170++ASFLAGS= $(INCLUDES) $(ASFLAG) 3171++AFLAGS= $(ASFLAGS) 3172++ 3173++GENERAL=Makefile 3174++TEST= 3175++APPS= 3176++ 3177++LIB=$(TOP)/libcrypto.a 3178++LIBSRC=poly1305_vec.c 3179++LIBOBJ=$(POLY1305) 3180++ 3181++SRC= $(LIBSRC) 3182++ 3183++EXHEADER=poly1305.h 3184++HEADER= $(EXHEADER) 3185++ 3186++ALL= $(GENERAL) $(SRC) $(HEADER) 3187++ 3188++top: 3189++ (cd ../..; $(MAKE) DIRS=crypto SDIRS=$(DIR) sub_all) 3190++ 3191++all: lib 3192++ 3193++lib: $(LIBOBJ) 3194++ $(AR) $(LIB) $(LIBOBJ) 3195++ $(RANLIB) $(LIB) || echo Never mind. 3196++ @touch lib 3197++ 3198++files: 3199++ $(PERL) $(TOP)/util/files.pl Makefile >> $(TOP)/MINFO 3200++ 3201++links: 3202++ @$(PERL) $(TOP)/util/mklink.pl ../../include/openssl $(EXHEADER) 3203++ @$(PERL) $(TOP)/util/mklink.pl ../../test $(TEST) 3204++ @$(PERL) $(TOP)/util/mklink.pl ../../apps $(APPS) 3205++ 3206++install: 3207++ @[ -n "$(INSTALLTOP)" ] # should be set by top Makefile... 3208++ @headerlist="$(EXHEADER)"; for i in $$headerlist ; \ 3209++ do \ 3210++ (cp $$i $(INSTALL_PREFIX)$(INSTALLTOP)/include/openssl/$$i; \ 3211++ chmod 644 $(INSTALL_PREFIX)$(INSTALLTOP)/include/openssl/$$i ); \ 3212++ done; 3213++ 3214++tags: 3215++ ctags $(SRC) 3216++ 3217++tests: 3218++ 3219++lint: 3220++ lint -DLINT $(INCLUDES) $(SRC)>fluff 3221++ 3222++depend: 3223++ @[ -n "$(MAKEDEPEND)" ] # should be set by upper Makefile... 3224++ $(MAKEDEPEND) -- $(CFLAG) $(INCLUDES) $(DEPFLAG) -- $(PROGS) $(LIBSRC) 3225++ 3226++dclean: 3227++ $(PERL) -pe 'if (/^# DO NOT DELETE THIS LINE/) {print; exit(0);}' $(MAKEFILE) >Makefile.new 3228++ mv -f Makefile.new $(MAKEFILE) 3229++ 3230++clean: 3231++ rm -f *.o *.obj lib tags core .pure .nfs* *.old *.bak fluff 3232++ 3233++# DO NOT DELETE THIS LINE -- make depend depends on it. 3234++ 3235++poly1305_vec.o: ../../include/openssl/poly1305.h poly1305_vec.c 3236+diff --git a/crypto/poly1305/poly1305.c b/crypto/poly1305/poly1305.c 3237+new file mode 100644 3238+index 0000000..2e5621d 3239+--- /dev/null 3240++++ b/crypto/poly1305/poly1305.c 3241+@@ -0,0 +1,321 @@ 3242++/* ==================================================================== 3243++ * Copyright (c) 2011-2013 The OpenSSL Project. All rights reserved. 3244++ * 3245++ * Redistribution and use in source and binary forms, with or without 3246++ * modification, are permitted provided that the following conditions 3247++ * are met: 3248++ * 3249++ * 1. Redistributions of source code must retain the above copyright 3250++ * notice, this list of conditions and the following disclaimer. 3251++ * 3252++ * 2. Redistributions in binary form must reproduce the above copyright 3253++ * notice, this list of conditions and the following disclaimer in 3254++ * the documentation and/or other materials provided with the 3255++ * distribution. 3256++ * 3257++ * 3. All advertising materials mentioning features or use of this 3258++ * software must display the following acknowledgment: 3259++ * "This product includes software developed by the OpenSSL Project 3260++ * for use in the OpenSSL Toolkit. (http://www.OpenSSL.org/)" 3261++ * 3262++ * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to 3263++ * endorse or promote products derived from this software without 3264++ * prior written permission. For written permission, please contact 3265++ * licensing@OpenSSL.org. 3266++ * 3267++ * 5. Products derived from this software may not be called "OpenSSL" 3268++ * nor may "OpenSSL" appear in their names without prior written 3269++ * permission of the OpenSSL Project. 3270++ * 3271++ * 6. Redistributions of any form whatsoever must retain the following 3272++ * acknowledgment: 3273++ * "This product includes software developed by the OpenSSL Project 3274++ * for use in the OpenSSL Toolkit (http://www.OpenSSL.org/)" 3275++ * 3276++ * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY 3277++ * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 3278++ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 3279++ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR 3280++ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 3281++ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 3282++ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 3283++ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 3284++ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 3285++ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 3286++ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED 3287++ * OF THE POSSIBILITY OF SUCH DAMAGE. 3288++ * ==================================================================== 3289++ */ 3290++ 3291++/* This implementation of poly1305 is by Andrew Moon 3292++ * (https://github.com/floodyberry/poly1305-donna) and released as public 3293++ * domain. */ 3294++ 3295++#include <string.h> 3296++#include <stdint.h> 3297++#include <openssl/opensslconf.h> 3298++ 3299++#if !defined(OPENSSL_NO_POLY1305) 3300++ 3301++#include <openssl/poly1305.h> 3302++#include <openssl/crypto.h> 3303++ 3304++#if defined(__i386) || defined(__i386__) || defined(__x86_64) || defined(__x86_64__) 3305++/* We can assume little-endian. */ 3306++static uint32_t U8TO32_LE(const unsigned char *m) 3307++ { 3308++ uint32_t r; 3309++ memcpy(&r, m, sizeof(r)); 3310++ return r; 3311++ } 3312++ 3313++static void U32TO8_LE(unsigned char *m, uint32_t v) 3314++ { 3315++ memcpy(m, &v, sizeof(v)); 3316++ } 3317++#else 3318++static uint32_t U8TO32_LE(const unsigned char *m) 3319++ { 3320++ return (uint32_t)m[0] | 3321++ (uint32_t)m[1] << 8 | 3322++ (uint32_t)m[2] << 16 | 3323++ (uint32_t)m[3] << 24; 3324++ } 3325++ 3326++static void U32TO8_LE(unsigned char *m, uint32_t v) 3327++ { 3328++ m[0] = v; 3329++ m[1] = v >> 8; 3330++ m[2] = v >> 16; 3331++ m[3] = v >> 24; 3332++ } 3333++#endif 3334++ 3335++static uint64_t 3336++mul32x32_64(uint32_t a, uint32_t b) 3337++ { 3338++ return (uint64_t)a * b; 3339++ } 3340++ 3341++ 3342++struct poly1305_state_st 3343++ { 3344++ uint32_t r0,r1,r2,r3,r4; 3345++ uint32_t s1,s2,s3,s4; 3346++ uint32_t h0,h1,h2,h3,h4; 3347++ unsigned char buf[16]; 3348++ unsigned int buf_used; 3349++ unsigned char key[16]; 3350++ }; 3351++ 3352++/* poly1305_blocks updates |state| given some amount of input data. This 3353++ * function may only be called with a |len| that is not a multiple of 16 at the 3354++ * end of the data. Otherwise the input must be buffered into 16 byte blocks. 3355++ * */ 3356++static void poly1305_update(struct poly1305_state_st *state, 3357++ const unsigned char *in, size_t len) 3358++ { 3359++ uint32_t t0,t1,t2,t3; 3360++ uint64_t t[5]; 3361++ uint32_t b; 3362++ uint64_t c; 3363++ size_t j; 3364++ unsigned char mp[16]; 3365++ 3366++ if (len < 16) 3367++ goto poly1305_donna_atmost15bytes; 3368++ 3369++poly1305_donna_16bytes: 3370++ t0 = U8TO32_LE(in); 3371++ t1 = U8TO32_LE(in+4); 3372++ t2 = U8TO32_LE(in+8); 3373++ t3 = U8TO32_LE(in+12); 3374++ 3375++ in += 16; 3376++ len -= 16; 3377++ 3378++ state->h0 += t0 & 0x3ffffff; 3379++ state->h1 += ((((uint64_t)t1 << 32) | t0) >> 26) & 0x3ffffff; 3380++ state->h2 += ((((uint64_t)t2 << 32) | t1) >> 20) & 0x3ffffff; 3381++ state->h3 += ((((uint64_t)t3 << 32) | t2) >> 14) & 0x3ffffff; 3382++ state->h4 += (t3 >> 8) | (1 << 24); 3383++ 3384++poly1305_donna_mul: 3385++ t[0] = mul32x32_64(state->h0,state->r0) + 3386++ mul32x32_64(state->h1,state->s4) + 3387++ mul32x32_64(state->h2,state->s3) + 3388++ mul32x32_64(state->h3,state->s2) + 3389++ mul32x32_64(state->h4,state->s1); 3390++ t[1] = mul32x32_64(state->h0,state->r1) + 3391++ mul32x32_64(state->h1,state->r0) + 3392++ mul32x32_64(state->h2,state->s4) + 3393++ mul32x32_64(state->h3,state->s3) + 3394++ mul32x32_64(state->h4,state->s2); 3395++ t[2] = mul32x32_64(state->h0,state->r2) + 3396++ mul32x32_64(state->h1,state->r1) + 3397++ mul32x32_64(state->h2,state->r0) + 3398++ mul32x32_64(state->h3,state->s4) + 3399++ mul32x32_64(state->h4,state->s3); 3400++ t[3] = mul32x32_64(state->h0,state->r3) + 3401++ mul32x32_64(state->h1,state->r2) + 3402++ mul32x32_64(state->h2,state->r1) + 3403++ mul32x32_64(state->h3,state->r0) + 3404++ mul32x32_64(state->h4,state->s4); 3405++ t[4] = mul32x32_64(state->h0,state->r4) + 3406++ mul32x32_64(state->h1,state->r3) + 3407++ mul32x32_64(state->h2,state->r2) + 3408++ mul32x32_64(state->h3,state->r1) + 3409++ mul32x32_64(state->h4,state->r0); 3410++ 3411++ state->h0 = (uint32_t)t[0] & 0x3ffffff; c = (t[0] >> 26); 3412++ t[1] += c; state->h1 = (uint32_t)t[1] & 0x3ffffff; b = (uint32_t)(t[1] >> 26); 3413++ t[2] += b; state->h2 = (uint32_t)t[2] & 0x3ffffff; b = (uint32_t)(t[2] >> 26); 3414++ t[3] += b; state->h3 = (uint32_t)t[3] & 0x3ffffff; b = (uint32_t)(t[3] >> 26); 3415++ t[4] += b; state->h4 = (uint32_t)t[4] & 0x3ffffff; b = (uint32_t)(t[4] >> 26); 3416++ state->h0 += b * 5; 3417++ 3418++ if (len >= 16) 3419++ goto poly1305_donna_16bytes; 3420++ 3421++ /* final bytes */ 3422++poly1305_donna_atmost15bytes: 3423++ if (!len) 3424++ return; 3425++ 3426++ for (j = 0; j < len; j++) 3427++ mp[j] = in[j]; 3428++ mp[j++] = 1; 3429++ for (; j < 16; j++) 3430++ mp[j] = 0; 3431++ len = 0; 3432++ 3433++ t0 = U8TO32_LE(mp+0); 3434++ t1 = U8TO32_LE(mp+4); 3435++ t2 = U8TO32_LE(mp+8); 3436++ t3 = U8TO32_LE(mp+12); 3437++ 3438++ state->h0 += t0 & 0x3ffffff; 3439++ state->h1 += ((((uint64_t)t1 << 32) | t0) >> 26) & 0x3ffffff; 3440++ state->h2 += ((((uint64_t)t2 << 32) | t1) >> 20) & 0x3ffffff; 3441++ state->h3 += ((((uint64_t)t3 << 32) | t2) >> 14) & 0x3ffffff; 3442++ state->h4 += (t3 >> 8); 3443++ 3444++ goto poly1305_donna_mul; 3445++ } 3446++ 3447++void CRYPTO_poly1305_init(poly1305_state *statep, const unsigned char key[32]) 3448++ { 3449++ struct poly1305_state_st *state = (struct poly1305_state_st*) statep; 3450++ uint32_t t0,t1,t2,t3; 3451++ 3452++ t0 = U8TO32_LE(key+0); 3453++ t1 = U8TO32_LE(key+4); 3454++ t2 = U8TO32_LE(key+8); 3455++ t3 = U8TO32_LE(key+12); 3456++ 3457++ /* precompute multipliers */ 3458++ state->r0 = t0 & 0x3ffffff; t0 >>= 26; t0 |= t1 << 6; 3459++ state->r1 = t0 & 0x3ffff03; t1 >>= 20; t1 |= t2 << 12; 3460++ state->r2 = t1 & 0x3ffc0ff; t2 >>= 14; t2 |= t3 << 18; 3461++ state->r3 = t2 & 0x3f03fff; t3 >>= 8; 3462++ state->r4 = t3 & 0x00fffff; 3463++ 3464++ state->s1 = state->r1 * 5; 3465++ state->s2 = state->r2 * 5; 3466++ state->s3 = state->r3 * 5; 3467++ state->s4 = state->r4 * 5; 3468++ 3469++ /* init state */ 3470++ state->h0 = 0; 3471++ state->h1 = 0; 3472++ state->h2 = 0; 3473++ state->h3 = 0; 3474++ state->h4 = 0; 3475++ 3476++ state->buf_used = 0; 3477++ memcpy(state->key, key + 16, sizeof(state->key)); 3478++ } 3479++ 3480++void CRYPTO_poly1305_update(poly1305_state *statep, const unsigned char *in, 3481++ size_t in_len) 3482++ { 3483++ unsigned int i; 3484++ struct poly1305_state_st *state = (struct poly1305_state_st*) statep; 3485++ 3486++ if (state->buf_used) 3487++ { 3488++ unsigned int todo = 16 - state->buf_used; 3489++ if (todo > in_len) 3490++ todo = in_len; 3491++ for (i = 0; i < todo; i++) 3492++ state->buf[state->buf_used + i] = in[i]; 3493++ state->buf_used += todo; 3494++ in_len -= todo; 3495++ in += todo; 3496++ 3497++ if (state->buf_used == 16) 3498++ { 3499++ poly1305_update(state, state->buf, 16); 3500++ state->buf_used = 0; 3501++ } 3502++ } 3503++ 3504++ if (in_len >= 16) 3505++ { 3506++ size_t todo = in_len & ~0xf; 3507++ poly1305_update(state, in, todo); 3508++ in += todo; 3509++ in_len &= 0xf; 3510++ } 3511++ 3512++ if (in_len) 3513++ { 3514++ for (i = 0; i < in_len; i++) 3515++ state->buf[i] = in[i]; 3516++ state->buf_used = in_len; 3517++ } 3518++ } 3519++ 3520++void CRYPTO_poly1305_finish(poly1305_state *statep, unsigned char mac[16]) 3521++ { 3522++ struct poly1305_state_st *state = (struct poly1305_state_st*) statep; 3523++ uint64_t f0,f1,f2,f3; 3524++ uint32_t g0,g1,g2,g3,g4; 3525++ uint32_t b, nb; 3526++ 3527++ if (state->buf_used) 3528++ poly1305_update(state, state->buf, state->buf_used); 3529++ 3530++ b = state->h0 >> 26; state->h0 = state->h0 & 0x3ffffff; 3531++ state->h1 += b; b = state->h1 >> 26; state->h1 = state->h1 & 0x3ffffff; 3532++ state->h2 += b; b = state->h2 >> 26; state->h2 = state->h2 & 0x3ffffff; 3533++ state->h3 += b; b = state->h3 >> 26; state->h3 = state->h3 & 0x3ffffff; 3534++ state->h4 += b; b = state->h4 >> 26; state->h4 = state->h4 & 0x3ffffff; 3535++ state->h0 += b * 5; 3536++ 3537++ g0 = state->h0 + 5; b = g0 >> 26; g0 &= 0x3ffffff; 3538++ g1 = state->h1 + b; b = g1 >> 26; g1 &= 0x3ffffff; 3539++ g2 = state->h2 + b; b = g2 >> 26; g2 &= 0x3ffffff; 3540++ g3 = state->h3 + b; b = g3 >> 26; g3 &= 0x3ffffff; 3541++ g4 = state->h4 + b - (1 << 26); 3542++ 3543++ b = (g4 >> 31) - 1; 3544++ nb = ~b; 3545++ state->h0 = (state->h0 & nb) | (g0 & b); 3546++ state->h1 = (state->h1 & nb) | (g1 & b); 3547++ state->h2 = (state->h2 & nb) | (g2 & b); 3548++ state->h3 = (state->h3 & nb) | (g3 & b); 3549++ state->h4 = (state->h4 & nb) | (g4 & b); 3550++ 3551++ f0 = ((state->h0 ) | (state->h1 << 26)) + (uint64_t)U8TO32_LE(&state->key[0]); 3552++ f1 = ((state->h1 >> 6) | (state->h2 << 20)) + (uint64_t)U8TO32_LE(&state->key[4]); 3553++ f2 = ((state->h2 >> 12) | (state->h3 << 14)) + (uint64_t)U8TO32_LE(&state->key[8]); 3554++ f3 = ((state->h3 >> 18) | (state->h4 << 8)) + (uint64_t)U8TO32_LE(&state->key[12]); 3555++ 3556++ U32TO8_LE(&mac[ 0], f0); f1 += (f0 >> 32); 3557++ U32TO8_LE(&mac[ 4], f1); f2 += (f1 >> 32); 3558++ U32TO8_LE(&mac[ 8], f2); f3 += (f2 >> 32); 3559++ U32TO8_LE(&mac[12], f3); 3560++ } 3561++ 3562++#endif /* !OPENSSL_NO_POLY1305 */ 3563+diff --git a/crypto/poly1305/poly1305.h b/crypto/poly1305/poly1305.h 3564+new file mode 100644 3565+index 0000000..28f85ed 3566+--- /dev/null 3567++++ b/crypto/poly1305/poly1305.h 3568+@@ -0,0 +1,88 @@ 3569++/* 3570++ * Poly1305 3571++ * 3572++ * Created on: Jun, 2013 3573++ * Author: Elie Bursztein (elieb@google.com) 3574++ * 3575++ * Adapted from the estream code by D. Bernstein. 3576++ */ 3577++/* ==================================================================== 3578++ * Copyright (c) 2011-2013 The OpenSSL Project. All rights reserved. 3579++ * 3580++ * Redistribution and use in source and binary forms, with or without 3581++ * modification, are permitted provided that the following conditions 3582++ * are met: 3583++ * 3584++ * 1. Redistributions of source code must retain the above copyright 3585++ * notice, this list of conditions and the following disclaimer. 3586++ * 3587++ * 2. Redistributions in binary form must reproduce the above copyright 3588++ * notice, this list of conditions and the following disclaimer in 3589++ * the documentation and/or other materials provided with the 3590++ * distribution. 3591++ * 3592++ * 3. All advertising materials mentioning features or use of this 3593++ * software must display the following acknowledgment: 3594++ * "This product includes software developed by the OpenSSL Project 3595++ * for use in the OpenSSL Toolkit. (http://www.OpenSSL.org/)" 3596++ * 3597++ * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to 3598++ * endorse or promote products derived from this software without 3599++ * prior written permission. For written permission, please contact 3600++ * licensing@OpenSSL.org. 3601++ * 3602++ * 5. Products derived from this software may not be called "OpenSSL" 3603++ * nor may "OpenSSL" appear in their names without prior written 3604++ * permission of the OpenSSL Project. 3605++ * 3606++ * 6. Redistributions of any form whatsoever must retain the following 3607++ * acknowledgment: 3608++ * "This product includes software developed by the OpenSSL Project 3609++ * for use in the OpenSSL Toolkit (http://www.OpenSSL.org/)" 3610++ * 3611++ * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY 3612++ * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 3613++ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 3614++ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR 3615++ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 3616++ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 3617++ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 3618++ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 3619++ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 3620++ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 3621++ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED 3622++ * OF THE POSSIBILITY OF SUCH DAMAGE. 3623++ * ==================================================================== 3624++ */ 3625++ 3626++#ifndef HEADER_POLY1305_H_ 3627++#define HEADER_POLY1305_H_ 3628++ 3629++#include <stdint.h> 3630++#include <openssl/opensslconf.h> 3631++ 3632++#if defined(OPENSSL_NO_POLY1305) 3633++#error Poly1305 support is disabled. 3634++#endif 3635++ 3636++typedef unsigned char poly1305_state[512]; 3637++ 3638++/* poly1305_init sets up |state| so that it can be used to calculate an 3639++ * authentication tag with the one-time key |key|. Note that |key| is a 3640++ * one-time key and therefore there is no `reset' method because that would 3641++ * enable several messages to be authenticated with the same key. */ 3642++extern void CRYPTO_poly1305_init(poly1305_state* state, 3643++ const unsigned char key[32]); 3644++ 3645++/* poly1305_update processes |in_len| bytes from |in|. It can be called zero or 3646++ * more times after poly1305_init. */ 3647++extern void CRYPTO_poly1305_update(poly1305_state* state, 3648++ const unsigned char *in, 3649++ size_t in_len); 3650++ 3651++/* poly1305_finish completes the poly1305 calculation and writes a 16 byte 3652++ * authentication tag to |mac|. */ 3653++extern void CRYPTO_poly1305_finish(poly1305_state* state, 3654++ unsigned char mac[16]); 3655++ 3656++#endif /* HEADER_POLY1305_H_ */ 3657+diff --git a/crypto/poly1305/poly1305_arm.c b/crypto/poly1305/poly1305_arm.c 3658+new file mode 100644 3659+index 0000000..adcef35 3660+--- /dev/null 3661++++ b/crypto/poly1305/poly1305_arm.c 3662+@@ -0,0 +1,327 @@ 3663++/* ==================================================================== 3664++ * Copyright (c) 2011-2013 The OpenSSL Project. All rights reserved. 3665++ * 3666++ * Redistribution and use in source and binary forms, with or without 3667++ * modification, are permitted provided that the following conditions 3668++ * are met: 3669++ * 3670++ * 1. Redistributions of source code must retain the above copyright 3671++ * notice, this list of conditions and the following disclaimer. 3672++ * 3673++ * 2. Redistributions in binary form must reproduce the above copyright 3674++ * notice, this list of conditions and the following disclaimer in 3675++ * the documentation and/or other materials provided with the 3676++ * distribution. 3677++ * 3678++ * 3. All advertising materials mentioning features or use of this 3679++ * software must display the following acknowledgment: 3680++ * "This product includes software developed by the OpenSSL Project 3681++ * for use in the OpenSSL Toolkit. (http://www.OpenSSL.org/)" 3682++ * 3683++ * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to 3684++ * endorse or promote products derived from this software without 3685++ * prior written permission. For written permission, please contact 3686++ * licensing@OpenSSL.org. 3687++ * 3688++ * 5. Products derived from this software may not be called "OpenSSL" 3689++ * nor may "OpenSSL" appear in their names without prior written 3690++ * permission of the OpenSSL Project. 3691++ * 3692++ * 6. Redistributions of any form whatsoever must retain the following 3693++ * acknowledgment: 3694++ * "This product includes software developed by the OpenSSL Project 3695++ * for use in the OpenSSL Toolkit (http://www.OpenSSL.org/)" 3696++ * 3697++ * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY 3698++ * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 3699++ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 3700++ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR 3701++ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 3702++ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 3703++ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 3704++ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 3705++ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 3706++ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 3707++ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED 3708++ * OF THE POSSIBILITY OF SUCH DAMAGE. 3709++ * ==================================================================== 3710++ */ 3711++ 3712++/* This implementation was taken from the public domain, neon2 version in 3713++ * SUPERCOP by D. J. Bernstein and Peter Schwabe. */ 3714++ 3715++#include <stdint.h> 3716++ 3717++#include <openssl/poly1305.h> 3718++ 3719++#if !defined(OPENSSL_NO_POLY1305) 3720++ 3721++typedef struct { 3722++ uint32_t v[12]; /* for alignment; only using 10 */ 3723++} fe1305x2; 3724++ 3725++#define addmulmod openssl_poly1305_neon2_addmulmod 3726++#define blocks openssl_poly1305_neon2_blocks 3727++ 3728++extern void addmulmod(fe1305x2 *r, const fe1305x2 *x, const fe1305x2 *y, const fe1305x2 *c); 3729++ 3730++extern int blocks(fe1305x2 *h, const fe1305x2 *precomp, const unsigned char *in, unsigned int inlen); 3731++ 3732++static void freeze(fe1305x2 *r) 3733++ { 3734++ int i; 3735++ 3736++ uint32_t x0 = r->v[0]; 3737++ uint32_t x1 = r->v[2]; 3738++ uint32_t x2 = r->v[4]; 3739++ uint32_t x3 = r->v[6]; 3740++ uint32_t x4 = r->v[8]; 3741++ uint32_t y0; 3742++ uint32_t y1; 3743++ uint32_t y2; 3744++ uint32_t y3; 3745++ uint32_t y4; 3746++ uint32_t swap; 3747++ 3748++ for (i = 0;i < 3;++i) 3749++ { 3750++ x1 += x0 >> 26; x0 &= 0x3ffffff; 3751++ x2 += x1 >> 26; x1 &= 0x3ffffff; 3752++ x3 += x2 >> 26; x2 &= 0x3ffffff; 3753++ x4 += x3 >> 26; x3 &= 0x3ffffff; 3754++ x0 += 5*(x4 >> 26); x4 &= 0x3ffffff; 3755++ } 3756++ 3757++ y0 = x0 + 5; 3758++ y1 = x1 + (y0 >> 26); y0 &= 0x3ffffff; 3759++ y2 = x2 + (y1 >> 26); y1 &= 0x3ffffff; 3760++ y3 = x3 + (y2 >> 26); y2 &= 0x3ffffff; 3761++ y4 = x4 + (y3 >> 26); y3 &= 0x3ffffff; 3762++ swap = -(y4 >> 26); y4 &= 0x3ffffff; 3763++ 3764++ y0 ^= x0; 3765++ y1 ^= x1; 3766++ y2 ^= x2; 3767++ y3 ^= x3; 3768++ y4 ^= x4; 3769++ 3770++ y0 &= swap; 3771++ y1 &= swap; 3772++ y2 &= swap; 3773++ y3 &= swap; 3774++ y4 &= swap; 3775++ 3776++ y0 ^= x0; 3777++ y1 ^= x1; 3778++ y2 ^= x2; 3779++ y3 ^= x3; 3780++ y4 ^= x4; 3781++ 3782++ r->v[0] = y0; 3783++ r->v[2] = y1; 3784++ r->v[4] = y2; 3785++ r->v[6] = y3; 3786++ r->v[8] = y4; 3787++ } 3788++ 3789++static void fe1305x2_tobytearray(unsigned char *r, fe1305x2 *x) 3790++ { 3791++ uint32_t x0 = x->v[0]; 3792++ uint32_t x1 = x->v[2]; 3793++ uint32_t x2 = x->v[4]; 3794++ uint32_t x3 = x->v[6]; 3795++ uint32_t x4 = x->v[8]; 3796++ 3797++ x1 += x0 >> 26; 3798++ x0 &= 0x3ffffff; 3799++ x2 += x1 >> 26; 3800++ x1 &= 0x3ffffff; 3801++ x3 += x2 >> 26; 3802++ x2 &= 0x3ffffff; 3803++ x4 += x3 >> 26; 3804++ x3 &= 0x3ffffff; 3805++ 3806++ *(uint32_t *) r = x0 + (x1 << 26); 3807++ *(uint32_t *) (r + 4) = (x1 >> 6) + (x2 << 20); 3808++ *(uint32_t *) (r + 8) = (x2 >> 12) + (x3 << 14); 3809++ *(uint32_t *) (r + 12) = (x3 >> 18) + (x4 << 8); 3810++ } 3811++ 3812++/* load32 exists to avoid breaking strict aliasing rules in 3813++ * fe1305x2_frombytearray. */ 3814++static uint32_t load32(unsigned char *t) 3815++ { 3816++ uint32_t tmp; 3817++ memcpy(&tmp, t, sizeof(tmp)); 3818++ return tmp; 3819++ } 3820++ 3821++static void fe1305x2_frombytearray(fe1305x2 *r, const unsigned char *x, unsigned long long xlen) 3822++ { 3823++ int i; 3824++ unsigned char t[17]; 3825++ 3826++ for (i = 0; (i < 16) && (i < xlen); i++) 3827++ t[i] = x[i]; 3828++ xlen -= i; 3829++ x += i; 3830++ t[i++] = 1; 3831++ for (; i<17; i++) 3832++ t[i] = 0; 3833++ 3834++ r->v[0] = 0x3ffffff & load32(t); 3835++ r->v[2] = 0x3ffffff & (load32(t + 3) >> 2); 3836++ r->v[4] = 0x3ffffff & (load32(t + 6) >> 4); 3837++ r->v[6] = 0x3ffffff & (load32(t + 9) >> 6); 3838++ r->v[8] = load32(t + 13); 3839++ 3840++ if (xlen) 3841++ { 3842++ for (i = 0; (i < 16) && (i < xlen); i++) 3843++ t[i] = x[i]; 3844++ t[i++] = 1; 3845++ for (; i<17; i++) 3846++ t[i] = 0; 3847++ 3848++ r->v[1] = 0x3ffffff & load32(t); 3849++ r->v[3] = 0x3ffffff & (load32(t + 3) >> 2); 3850++ r->v[5] = 0x3ffffff & (load32(t + 6) >> 4); 3851++ r->v[7] = 0x3ffffff & (load32(t + 9) >> 6); 3852++ r->v[9] = load32(t + 13); 3853++ } 3854++ else 3855++ r->v[1] = r->v[3] = r->v[5] = r->v[7] = r->v[9] = 0; 3856++ } 3857++ 3858++static const fe1305x2 zero __attribute__ ((aligned (16))); 3859++ 3860++struct poly1305_state_st { 3861++ unsigned char data[sizeof(fe1305x2[5]) + 128]; 3862++ unsigned char buf[32]; 3863++ unsigned int buf_used; 3864++ unsigned char key[16]; 3865++}; 3866++ 3867++void CRYPTO_poly1305_init(poly1305_state *state, const unsigned char key[32]) 3868++ { 3869++ struct poly1305_state_st *st = (struct poly1305_state_st*) (state); 3870++ fe1305x2 *const r = (fe1305x2 *) (st->data + (15 & (-(int) st->data))); 3871++ fe1305x2 *const h = r + 1; 3872++ fe1305x2 *const c = h + 1; 3873++ fe1305x2 *const precomp = c + 1; 3874++ unsigned int j; 3875++ 3876++ r->v[1] = r->v[0] = 0x3ffffff & *(uint32_t *) key; 3877++ r->v[3] = r->v[2] = 0x3ffff03 & ((*(uint32_t *) (key + 3)) >> 2); 3878++ r->v[5] = r->v[4] = 0x3ffc0ff & ((*(uint32_t *) (key + 6)) >> 4); 3879++ r->v[7] = r->v[6] = 0x3f03fff & ((*(uint32_t *) (key + 9)) >> 6); 3880++ r->v[9] = r->v[8] = 0x00fffff & ((*(uint32_t *) (key + 12)) >> 8); 3881++ 3882++ for (j = 0; j < 10; j++) 3883++ h->v[j] = 0; /* XXX: should fast-forward a bit */ 3884++ 3885++ addmulmod(precomp,r,r,&zero); /* precompute r^2 */ 3886++ addmulmod(precomp + 1,precomp,precomp,&zero); /* precompute r^4 */ 3887++ 3888++ memcpy(st->key, key + 16, 16); 3889++ st->buf_used = 0; 3890++ } 3891++ 3892++void CRYPTO_poly1305_update(poly1305_state *state, const unsigned char *in, size_t in_len) 3893++ { 3894++ struct poly1305_state_st *st = (struct poly1305_state_st*) (state); 3895++ fe1305x2 *const r = (fe1305x2 *) (st->data + (15 & (-(int) st->data))); 3896++ fe1305x2 *const h = r + 1; 3897++ fe1305x2 *const c = h + 1; 3898++ fe1305x2 *const precomp = c + 1; 3899++ unsigned int i; 3900++ 3901++ if (st->buf_used) 3902++ { 3903++ unsigned int todo = 32 - st->buf_used; 3904++ if (todo > in_len) 3905++ todo = in_len; 3906++ for (i = 0; i < todo; i++) 3907++ st->buf[st->buf_used + i] = in[i]; 3908++ st->buf_used += todo; 3909++ in_len -= todo; 3910++ in += todo; 3911++ 3912++ if (st->buf_used == sizeof(st->buf) && in_len) 3913++ { 3914++ addmulmod(h,h,precomp,&zero); 3915++ fe1305x2_frombytearray(c, st->buf, sizeof(st->buf)); 3916++ for (i = 0; i < 10; i++) 3917++ h->v[i] += c->v[i]; 3918++ st->buf_used = 0; 3919++ } 3920++ } 3921++ 3922++ while (in_len > 32) 3923++ { 3924++ unsigned int tlen = 1048576; 3925++ if (in_len < tlen) 3926++ tlen = in_len; 3927++ tlen -= blocks(h, precomp, in, tlen); 3928++ in_len -= tlen; 3929++ in += tlen; 3930++ } 3931++ 3932++ if (in_len) 3933++ { 3934++ for (i = 0; i < in_len; i++) 3935++ st->buf[i] = in[i]; 3936++ st->buf_used = in_len; 3937++ } 3938++ } 3939++ 3940++void CRYPTO_poly1305_finish(poly1305_state* state, unsigned char mac[16]) 3941++ { 3942++ struct poly1305_state_st *st = (struct poly1305_state_st*) (state); 3943++ fe1305x2 *const r = (fe1305x2 *) (st->data + (15 & (-(int) st->data))); 3944++ fe1305x2 *const h = r + 1; 3945++ fe1305x2 *const c = h + 1; 3946++ fe1305x2 *const precomp = c + 1; 3947++ 3948++ addmulmod(h,h,precomp,&zero); 3949++ 3950++ if (st->buf_used > 16) 3951++ { 3952++ fe1305x2_frombytearray(c, st->buf, st->buf_used); 3953++ precomp->v[1] = r->v[1]; 3954++ precomp->v[3] = r->v[3]; 3955++ precomp->v[5] = r->v[5]; 3956++ precomp->v[7] = r->v[7]; 3957++ precomp->v[9] = r->v[9]; 3958++ addmulmod(h,h,precomp,c); 3959++ } 3960++ else if (st->buf_used > 0) 3961++ { 3962++ fe1305x2_frombytearray(c, st->buf, st->buf_used); 3963++ r->v[1] = 1; 3964++ r->v[3] = 0; 3965++ r->v[5] = 0; 3966++ r->v[7] = 0; 3967++ r->v[9] = 0; 3968++ addmulmod(h,h,r,c); 3969++ } 3970++ 3971++ h->v[0] += h->v[1]; 3972++ h->v[2] += h->v[3]; 3973++ h->v[4] += h->v[5]; 3974++ h->v[6] += h->v[7]; 3975++ h->v[8] += h->v[9]; 3976++ freeze(h); 3977++ 3978++ fe1305x2_frombytearray(c, st->key, 16); 3979++ c->v[8] ^= (1 << 24); 3980++ 3981++ h->v[0] += c->v[0]; 3982++ h->v[2] += c->v[2]; 3983++ h->v[4] += c->v[4]; 3984++ h->v[6] += c->v[6]; 3985++ h->v[8] += c->v[8]; 3986++ fe1305x2_tobytearray(mac, h); 3987++ } 3988++ 3989++#endif /* !OPENSSL_NO_POLY1305 */ 3990+diff --git a/crypto/poly1305/poly1305_arm_asm.S b/crypto/poly1305/poly1305_arm_asm.S 3991+new file mode 100644 3992+index 0000000..449d16f 3993+--- /dev/null 3994++++ b/crypto/poly1305/poly1305_arm_asm.S 3995+@@ -0,0 +1,2009 @@ 3996++# This implementation was taken from the public domain, neon2 version in 3997++# SUPERCOP by D. J. Bernstein and Peter Schwabe. 3998++ 3999++# qhasm: int32 input_0 4000++ 4001++# qhasm: int32 input_1 4002++ 4003++# qhasm: int32 input_2 4004++ 4005++# qhasm: int32 input_3 4006++ 4007++# qhasm: stack32 input_4 4008++ 4009++# qhasm: stack32 input_5 4010++ 4011++# qhasm: stack32 input_6 4012++ 4013++# qhasm: stack32 input_7 4014++ 4015++# qhasm: int32 caller_r4 4016++ 4017++# qhasm: int32 caller_r5 4018++ 4019++# qhasm: int32 caller_r6 4020++ 4021++# qhasm: int32 caller_r7 4022++ 4023++# qhasm: int32 caller_r8 4024++ 4025++# qhasm: int32 caller_r9 4026++ 4027++# qhasm: int32 caller_r10 4028++ 4029++# qhasm: int32 caller_r11 4030++ 4031++# qhasm: int32 caller_r12 4032++ 4033++# qhasm: int32 caller_r14 4034++ 4035++# qhasm: reg128 caller_q4 4036++ 4037++# qhasm: reg128 caller_q5 4038++ 4039++# qhasm: reg128 caller_q6 4040++ 4041++# qhasm: reg128 caller_q7 4042++ 4043++# qhasm: startcode 4044++.fpu neon 4045++.text 4046++ 4047++# qhasm: reg128 r0 4048++ 4049++# qhasm: reg128 r1 4050++ 4051++# qhasm: reg128 r2 4052++ 4053++# qhasm: reg128 r3 4054++ 4055++# qhasm: reg128 r4 4056++ 4057++# qhasm: reg128 x01 4058++ 4059++# qhasm: reg128 x23 4060++ 4061++# qhasm: reg128 x4 4062++ 4063++# qhasm: reg128 y0 4064++ 4065++# qhasm: reg128 y12 4066++ 4067++# qhasm: reg128 y34 4068++ 4069++# qhasm: reg128 5y12 4070++ 4071++# qhasm: reg128 5y34 4072++ 4073++# qhasm: stack128 y0_stack 4074++ 4075++# qhasm: stack128 y12_stack 4076++ 4077++# qhasm: stack128 y34_stack 4078++ 4079++# qhasm: stack128 5y12_stack 4080++ 4081++# qhasm: stack128 5y34_stack 4082++ 4083++# qhasm: reg128 z0 4084++ 4085++# qhasm: reg128 z12 4086++ 4087++# qhasm: reg128 z34 4088++ 4089++# qhasm: reg128 5z12 4090++ 4091++# qhasm: reg128 5z34 4092++ 4093++# qhasm: stack128 z0_stack 4094++ 4095++# qhasm: stack128 z12_stack 4096++ 4097++# qhasm: stack128 z34_stack 4098++ 4099++# qhasm: stack128 5z12_stack 4100++ 4101++# qhasm: stack128 5z34_stack 4102++ 4103++# qhasm: stack128 two24 4104++ 4105++# qhasm: int32 ptr 4106++ 4107++# qhasm: reg128 c01 4108++ 4109++# qhasm: reg128 c23 4110++ 4111++# qhasm: reg128 d01 4112++ 4113++# qhasm: reg128 d23 4114++ 4115++# qhasm: reg128 t0 4116++ 4117++# qhasm: reg128 t1 4118++ 4119++# qhasm: reg128 t2 4120++ 4121++# qhasm: reg128 t3 4122++ 4123++# qhasm: reg128 t4 4124++ 4125++# qhasm: reg128 mask 4126++ 4127++# qhasm: reg128 u0 4128++ 4129++# qhasm: reg128 u1 4130++ 4131++# qhasm: reg128 u2 4132++ 4133++# qhasm: reg128 u3 4134++ 4135++# qhasm: reg128 u4 4136++ 4137++# qhasm: reg128 v01 4138++ 4139++# qhasm: reg128 mid 4140++ 4141++# qhasm: reg128 v23 4142++ 4143++# qhasm: reg128 v4 4144++ 4145++# qhasm: int32 len 4146++ 4147++# qhasm: qpushenter crypto_onetimeauth_poly1305_neon2_blocks 4148++.align 4 4149++.global openssl_poly1305_neon2_blocks 4150++.type openssl_poly1305_neon2_blocks STT_FUNC 4151++openssl_poly1305_neon2_blocks: 4152++vpush {q4,q5,q6,q7} 4153++mov r12,sp 4154++sub sp,sp,#192 4155++and sp,sp,#0xffffffe0 4156++ 4157++# qhasm: len = input_3 4158++# asm 1: mov >len=int32#4,<input_3=int32#4 4159++# asm 2: mov >len=r3,<input_3=r3 4160++mov r3,r3 4161++ 4162++# qhasm: new y0 4163++ 4164++# qhasm: y0 = mem64[input_1]y0[1]; input_1 += 8 4165++# asm 1: vld1.8 {<y0=reg128#1%bot},[<input_1=int32#2]! 4166++# asm 2: vld1.8 {<y0=d0},[<input_1=r1]! 4167++vld1.8 {d0},[r1]! 4168++ 4169++# qhasm: y12 = mem128[input_1]; input_1 += 16 4170++# asm 1: vld1.8 {>y12=reg128#2%bot->y12=reg128#2%top},[<input_1=int32#2]! 4171++# asm 2: vld1.8 {>y12=d2->y12=d3},[<input_1=r1]! 4172++vld1.8 {d2-d3},[r1]! 4173++ 4174++# qhasm: y34 = mem128[input_1]; input_1 += 16 4175++# asm 1: vld1.8 {>y34=reg128#3%bot->y34=reg128#3%top},[<input_1=int32#2]! 4176++# asm 2: vld1.8 {>y34=d4->y34=d5},[<input_1=r1]! 4177++vld1.8 {d4-d5},[r1]! 4178++ 4179++# qhasm: input_1 += 8 4180++# asm 1: add >input_1=int32#2,<input_1=int32#2,#8 4181++# asm 2: add >input_1=r1,<input_1=r1,#8 4182++add r1,r1,#8 4183++ 4184++# qhasm: new z0 4185++ 4186++# qhasm: z0 = mem64[input_1]z0[1]; input_1 += 8 4187++# asm 1: vld1.8 {<z0=reg128#4%bot},[<input_1=int32#2]! 4188++# asm 2: vld1.8 {<z0=d6},[<input_1=r1]! 4189++vld1.8 {d6},[r1]! 4190++ 4191++# qhasm: z12 = mem128[input_1]; input_1 += 16 4192++# asm 1: vld1.8 {>z12=reg128#5%bot->z12=reg128#5%top},[<input_1=int32#2]! 4193++# asm 2: vld1.8 {>z12=d8->z12=d9},[<input_1=r1]! 4194++vld1.8 {d8-d9},[r1]! 4195++ 4196++# qhasm: z34 = mem128[input_1]; input_1 += 16 4197++# asm 1: vld1.8 {>z34=reg128#6%bot->z34=reg128#6%top},[<input_1=int32#2]! 4198++# asm 2: vld1.8 {>z34=d10->z34=d11},[<input_1=r1]! 4199++vld1.8 {d10-d11},[r1]! 4200++ 4201++# qhasm: 2x mask = 0xffffffff 4202++# asm 1: vmov.i64 >mask=reg128#7,#0xffffffff 4203++# asm 2: vmov.i64 >mask=q6,#0xffffffff 4204++vmov.i64 q6,#0xffffffff 4205++ 4206++# qhasm: 2x u4 = 0xff 4207++# asm 1: vmov.i64 >u4=reg128#8,#0xff 4208++# asm 2: vmov.i64 >u4=q7,#0xff 4209++vmov.i64 q7,#0xff 4210++ 4211++# qhasm: x01 aligned= mem128[input_0];input_0+=16 4212++# asm 1: vld1.8 {>x01=reg128#9%bot->x01=reg128#9%top},[<input_0=int32#1,: 128]! 4213++# asm 2: vld1.8 {>x01=d16->x01=d17},[<input_0=r0,: 128]! 4214++vld1.8 {d16-d17},[r0,: 128]! 4215++ 4216++# qhasm: x23 aligned= mem128[input_0];input_0+=16 4217++# asm 1: vld1.8 {>x23=reg128#10%bot->x23=reg128#10%top},[<input_0=int32#1,: 128]! 4218++# asm 2: vld1.8 {>x23=d18->x23=d19},[<input_0=r0,: 128]! 4219++vld1.8 {d18-d19},[r0,: 128]! 4220++ 4221++# qhasm: x4 aligned= mem64[input_0]x4[1] 4222++# asm 1: vld1.8 {<x4=reg128#11%bot},[<input_0=int32#1,: 64] 4223++# asm 2: vld1.8 {<x4=d20},[<input_0=r0,: 64] 4224++vld1.8 {d20},[r0,: 64] 4225++ 4226++# qhasm: input_0 -= 32 4227++# asm 1: sub >input_0=int32#1,<input_0=int32#1,#32 4228++# asm 2: sub >input_0=r0,<input_0=r0,#32 4229++sub r0,r0,#32 4230++ 4231++# qhasm: 2x mask unsigned>>=6 4232++# asm 1: vshr.u64 >mask=reg128#7,<mask=reg128#7,#6 4233++# asm 2: vshr.u64 >mask=q6,<mask=q6,#6 4234++vshr.u64 q6,q6,#6 4235++ 4236++# qhasm: 2x u4 unsigned>>= 7 4237++# asm 1: vshr.u64 >u4=reg128#8,<u4=reg128#8,#7 4238++# asm 2: vshr.u64 >u4=q7,<u4=q7,#7 4239++vshr.u64 q7,q7,#7 4240++ 4241++# qhasm: 4x 5y12 = y12 << 2 4242++# asm 1: vshl.i32 >5y12=reg128#12,<y12=reg128#2,#2 4243++# asm 2: vshl.i32 >5y12=q11,<y12=q1,#2 4244++vshl.i32 q11,q1,#2 4245++ 4246++# qhasm: 4x 5y34 = y34 << 2 4247++# asm 1: vshl.i32 >5y34=reg128#13,<y34=reg128#3,#2 4248++# asm 2: vshl.i32 >5y34=q12,<y34=q2,#2 4249++vshl.i32 q12,q2,#2 4250++ 4251++# qhasm: 4x 5y12 += y12 4252++# asm 1: vadd.i32 >5y12=reg128#12,<5y12=reg128#12,<y12=reg128#2 4253++# asm 2: vadd.i32 >5y12=q11,<5y12=q11,<y12=q1 4254++vadd.i32 q11,q11,q1 4255++ 4256++# qhasm: 4x 5y34 += y34 4257++# asm 1: vadd.i32 >5y34=reg128#13,<5y34=reg128#13,<y34=reg128#3 4258++# asm 2: vadd.i32 >5y34=q12,<5y34=q12,<y34=q2 4259++vadd.i32 q12,q12,q2 4260++ 4261++# qhasm: 2x u4 <<= 24 4262++# asm 1: vshl.i64 >u4=reg128#8,<u4=reg128#8,#24 4263++# asm 2: vshl.i64 >u4=q7,<u4=q7,#24 4264++vshl.i64 q7,q7,#24 4265++ 4266++# qhasm: 4x 5z12 = z12 << 2 4267++# asm 1: vshl.i32 >5z12=reg128#14,<z12=reg128#5,#2 4268++# asm 2: vshl.i32 >5z12=q13,<z12=q4,#2 4269++vshl.i32 q13,q4,#2 4270++ 4271++# qhasm: 4x 5z34 = z34 << 2 4272++# asm 1: vshl.i32 >5z34=reg128#15,<z34=reg128#6,#2 4273++# asm 2: vshl.i32 >5z34=q14,<z34=q5,#2 4274++vshl.i32 q14,q5,#2 4275++ 4276++# qhasm: 4x 5z12 += z12 4277++# asm 1: vadd.i32 >5z12=reg128#14,<5z12=reg128#14,<z12=reg128#5 4278++# asm 2: vadd.i32 >5z12=q13,<5z12=q13,<z12=q4 4279++vadd.i32 q13,q13,q4 4280++ 4281++# qhasm: 4x 5z34 += z34 4282++# asm 1: vadd.i32 >5z34=reg128#15,<5z34=reg128#15,<z34=reg128#6 4283++# asm 2: vadd.i32 >5z34=q14,<5z34=q14,<z34=q5 4284++vadd.i32 q14,q14,q5 4285++ 4286++# qhasm: new two24 4287++ 4288++# qhasm: new y0_stack 4289++ 4290++# qhasm: new y12_stack 4291++ 4292++# qhasm: new y34_stack 4293++ 4294++# qhasm: new 5y12_stack 4295++ 4296++# qhasm: new 5y34_stack 4297++ 4298++# qhasm: new z0_stack 4299++ 4300++# qhasm: new z12_stack 4301++ 4302++# qhasm: new z34_stack 4303++ 4304++# qhasm: new 5z12_stack 4305++ 4306++# qhasm: new 5z34_stack 4307++ 4308++# qhasm: ptr = &two24 4309++# asm 1: lea >ptr=int32#2,<two24=stack128#1 4310++# asm 2: lea >ptr=r1,<two24=[sp,#0] 4311++add r1,sp,#0 4312++ 4313++# qhasm: mem128[ptr] aligned= u4 4314++# asm 1: vst1.8 {<u4=reg128#8%bot-<u4=reg128#8%top},[<ptr=int32#2,: 128] 4315++# asm 2: vst1.8 {<u4=d14-<u4=d15},[<ptr=r1,: 128] 4316++vst1.8 {d14-d15},[r1,: 128] 4317++ 4318++# qhasm: r4 = u4 4319++# asm 1: vmov >r4=reg128#16,<u4=reg128#8 4320++# asm 2: vmov >r4=q15,<u4=q7 4321++vmov q15,q7 4322++ 4323++# qhasm: r0 = u4 4324++# asm 1: vmov >r0=reg128#8,<u4=reg128#8 4325++# asm 2: vmov >r0=q7,<u4=q7 4326++vmov q7,q7 4327++ 4328++# qhasm: ptr = &y0_stack 4329++# asm 1: lea >ptr=int32#2,<y0_stack=stack128#2 4330++# asm 2: lea >ptr=r1,<y0_stack=[sp,#16] 4331++add r1,sp,#16 4332++ 4333++# qhasm: mem128[ptr] aligned= y0 4334++# asm 1: vst1.8 {<y0=reg128#1%bot-<y0=reg128#1%top},[<ptr=int32#2,: 128] 4335++# asm 2: vst1.8 {<y0=d0-<y0=d1},[<ptr=r1,: 128] 4336++vst1.8 {d0-d1},[r1,: 128] 4337++ 4338++# qhasm: ptr = &y12_stack 4339++# asm 1: lea >ptr=int32#2,<y12_stack=stack128#3 4340++# asm 2: lea >ptr=r1,<y12_stack=[sp,#32] 4341++add r1,sp,#32 4342++ 4343++# qhasm: mem128[ptr] aligned= y12 4344++# asm 1: vst1.8 {<y12=reg128#2%bot-<y12=reg128#2%top},[<ptr=int32#2,: 128] 4345++# asm 2: vst1.8 {<y12=d2-<y12=d3},[<ptr=r1,: 128] 4346++vst1.8 {d2-d3},[r1,: 128] 4347++ 4348++# qhasm: ptr = &y34_stack 4349++# asm 1: lea >ptr=int32#2,<y34_stack=stack128#4 4350++# asm 2: lea >ptr=r1,<y34_stack=[sp,#48] 4351++add r1,sp,#48 4352++ 4353++# qhasm: mem128[ptr] aligned= y34 4354++# asm 1: vst1.8 {<y34=reg128#3%bot-<y34=reg128#3%top},[<ptr=int32#2,: 128] 4355++# asm 2: vst1.8 {<y34=d4-<y34=d5},[<ptr=r1,: 128] 4356++vst1.8 {d4-d5},[r1,: 128] 4357++ 4358++# qhasm: ptr = &z0_stack 4359++# asm 1: lea >ptr=int32#2,<z0_stack=stack128#7 4360++# asm 2: lea >ptr=r1,<z0_stack=[sp,#96] 4361++add r1,sp,#96 4362++ 4363++# qhasm: mem128[ptr] aligned= z0 4364++# asm 1: vst1.8 {<z0=reg128#4%bot-<z0=reg128#4%top},[<ptr=int32#2,: 128] 4365++# asm 2: vst1.8 {<z0=d6-<z0=d7},[<ptr=r1,: 128] 4366++vst1.8 {d6-d7},[r1,: 128] 4367++ 4368++# qhasm: ptr = &z12_stack 4369++# asm 1: lea >ptr=int32#2,<z12_stack=stack128#8 4370++# asm 2: lea >ptr=r1,<z12_stack=[sp,#112] 4371++add r1,sp,#112 4372++ 4373++# qhasm: mem128[ptr] aligned= z12 4374++# asm 1: vst1.8 {<z12=reg128#5%bot-<z12=reg128#5%top},[<ptr=int32#2,: 128] 4375++# asm 2: vst1.8 {<z12=d8-<z12=d9},[<ptr=r1,: 128] 4376++vst1.8 {d8-d9},[r1,: 128] 4377++ 4378++# qhasm: ptr = &z34_stack 4379++# asm 1: lea >ptr=int32#2,<z34_stack=stack128#9 4380++# asm 2: lea >ptr=r1,<z34_stack=[sp,#128] 4381++add r1,sp,#128 4382++ 4383++# qhasm: mem128[ptr] aligned= z34 4384++# asm 1: vst1.8 {<z34=reg128#6%bot-<z34=reg128#6%top},[<ptr=int32#2,: 128] 4385++# asm 2: vst1.8 {<z34=d10-<z34=d11},[<ptr=r1,: 128] 4386++vst1.8 {d10-d11},[r1,: 128] 4387++ 4388++# qhasm: ptr = &5y12_stack 4389++# asm 1: lea >ptr=int32#2,<5y12_stack=stack128#5 4390++# asm 2: lea >ptr=r1,<5y12_stack=[sp,#64] 4391++add r1,sp,#64 4392++ 4393++# qhasm: mem128[ptr] aligned= 5y12 4394++# asm 1: vst1.8 {<5y12=reg128#12%bot-<5y12=reg128#12%top},[<ptr=int32#2,: 128] 4395++# asm 2: vst1.8 {<5y12=d22-<5y12=d23},[<ptr=r1,: 128] 4396++vst1.8 {d22-d23},[r1,: 128] 4397++ 4398++# qhasm: ptr = &5y34_stack 4399++# asm 1: lea >ptr=int32#2,<5y34_stack=stack128#6 4400++# asm 2: lea >ptr=r1,<5y34_stack=[sp,#80] 4401++add r1,sp,#80 4402++ 4403++# qhasm: mem128[ptr] aligned= 5y34 4404++# asm 1: vst1.8 {<5y34=reg128#13%bot-<5y34=reg128#13%top},[<ptr=int32#2,: 128] 4405++# asm 2: vst1.8 {<5y34=d24-<5y34=d25},[<ptr=r1,: 128] 4406++vst1.8 {d24-d25},[r1,: 128] 4407++ 4408++# qhasm: ptr = &5z12_stack 4409++# asm 1: lea >ptr=int32#2,<5z12_stack=stack128#10 4410++# asm 2: lea >ptr=r1,<5z12_stack=[sp,#144] 4411++add r1,sp,#144 4412++ 4413++# qhasm: mem128[ptr] aligned= 5z12 4414++# asm 1: vst1.8 {<5z12=reg128#14%bot-<5z12=reg128#14%top},[<ptr=int32#2,: 128] 4415++# asm 2: vst1.8 {<5z12=d26-<5z12=d27},[<ptr=r1,: 128] 4416++vst1.8 {d26-d27},[r1,: 128] 4417++ 4418++# qhasm: ptr = &5z34_stack 4419++# asm 1: lea >ptr=int32#2,<5z34_stack=stack128#11 4420++# asm 2: lea >ptr=r1,<5z34_stack=[sp,#160] 4421++add r1,sp,#160 4422++ 4423++# qhasm: mem128[ptr] aligned= 5z34 4424++# asm 1: vst1.8 {<5z34=reg128#15%bot-<5z34=reg128#15%top},[<ptr=int32#2,: 128] 4425++# asm 2: vst1.8 {<5z34=d28-<5z34=d29},[<ptr=r1,: 128] 4426++vst1.8 {d28-d29},[r1,: 128] 4427++ 4428++# qhasm: unsigned>? len - 64 4429++# asm 1: cmp <len=int32#4,#64 4430++# asm 2: cmp <len=r3,#64 4431++cmp r3,#64 4432++ 4433++# qhasm: goto below64bytes if !unsigned> 4434++bls ._below64bytes 4435++ 4436++# qhasm: input_2 += 32 4437++# asm 1: add >input_2=int32#2,<input_2=int32#3,#32 4438++# asm 2: add >input_2=r1,<input_2=r2,#32 4439++add r1,r2,#32 4440++ 4441++# qhasm: mainloop2: 4442++._mainloop2: 4443++ 4444++# qhasm: c01 = mem128[input_2];input_2+=16 4445++# asm 1: vld1.8 {>c01=reg128#1%bot->c01=reg128#1%top},[<input_2=int32#2]! 4446++# asm 2: vld1.8 {>c01=d0->c01=d1},[<input_2=r1]! 4447++vld1.8 {d0-d1},[r1]! 4448++ 4449++# qhasm: c23 = mem128[input_2];input_2+=16 4450++# asm 1: vld1.8 {>c23=reg128#2%bot->c23=reg128#2%top},[<input_2=int32#2]! 4451++# asm 2: vld1.8 {>c23=d2->c23=d3},[<input_2=r1]! 4452++vld1.8 {d2-d3},[r1]! 4453++ 4454++# qhasm: r4[0,1] += x01[0] unsigned* z34[2]; r4[2,3] += x01[1] unsigned* z34[3] 4455++# asm 1: vmlal.u32 <r4=reg128#16,<x01=reg128#9%bot,<z34=reg128#6%top 4456++# asm 2: vmlal.u32 <r4=q15,<x01=d16,<z34=d11 4457++vmlal.u32 q15,d16,d11 4458++ 4459++# qhasm: ptr = &z12_stack 4460++# asm 1: lea >ptr=int32#3,<z12_stack=stack128#8 4461++# asm 2: lea >ptr=r2,<z12_stack=[sp,#112] 4462++add r2,sp,#112 4463++ 4464++# qhasm: z12 aligned= mem128[ptr] 4465++# asm 1: vld1.8 {>z12=reg128#3%bot->z12=reg128#3%top},[<ptr=int32#3,: 128] 4466++# asm 2: vld1.8 {>z12=d4->z12=d5},[<ptr=r2,: 128] 4467++vld1.8 {d4-d5},[r2,: 128] 4468++ 4469++# qhasm: r4[0,1] += x01[2] unsigned* z34[0]; r4[2,3] += x01[3] unsigned* z34[1] 4470++# asm 1: vmlal.u32 <r4=reg128#16,<x01=reg128#9%top,<z34=reg128#6%bot 4471++# asm 2: vmlal.u32 <r4=q15,<x01=d17,<z34=d10 4472++vmlal.u32 q15,d17,d10 4473++ 4474++# qhasm: ptr = &z0_stack 4475++# asm 1: lea >ptr=int32#3,<z0_stack=stack128#7 4476++# asm 2: lea >ptr=r2,<z0_stack=[sp,#96] 4477++add r2,sp,#96 4478++ 4479++# qhasm: z0 aligned= mem128[ptr] 4480++# asm 1: vld1.8 {>z0=reg128#4%bot->z0=reg128#4%top},[<ptr=int32#3,: 128] 4481++# asm 2: vld1.8 {>z0=d6->z0=d7},[<ptr=r2,: 128] 4482++vld1.8 {d6-d7},[r2,: 128] 4483++ 4484++# qhasm: r4[0,1] += x23[0] unsigned* z12[2]; r4[2,3] += x23[1] unsigned* z12[3] 4485++# asm 1: vmlal.u32 <r4=reg128#16,<x23=reg128#10%bot,<z12=reg128#3%top 4486++# asm 2: vmlal.u32 <r4=q15,<x23=d18,<z12=d5 4487++vmlal.u32 q15,d18,d5 4488++ 4489++# qhasm: c01 c23 = c01[0]c01[1]c01[2]c23[2]c23[0]c23[1]c01[3]c23[3] 4490++# asm 1: vtrn.32 <c01=reg128#1%top,<c23=reg128#2%top 4491++# asm 2: vtrn.32 <c01=d1,<c23=d3 4492++vtrn.32 d1,d3 4493++ 4494++# qhasm: r4[0,1] += x23[2] unsigned* z12[0]; r4[2,3] += x23[3] unsigned* z12[1] 4495++# asm 1: vmlal.u32 <r4=reg128#16,<x23=reg128#10%top,<z12=reg128#3%bot 4496++# asm 2: vmlal.u32 <r4=q15,<x23=d19,<z12=d4 4497++vmlal.u32 q15,d19,d4 4498++ 4499++# qhasm: r4[0,1] += x4[0] unsigned* z0[0]; r4[2,3] += x4[1] unsigned* z0[1] 4500++# asm 1: vmlal.u32 <r4=reg128#16,<x4=reg128#11%bot,<z0=reg128#4%bot 4501++# asm 2: vmlal.u32 <r4=q15,<x4=d20,<z0=d6 4502++vmlal.u32 q15,d20,d6 4503++ 4504++# qhasm: r3[0,1] = c23[2]<<18; r3[2,3] = c23[3]<<18 4505++# asm 1: vshll.u32 >r3=reg128#5,<c23=reg128#2%top,#18 4506++# asm 2: vshll.u32 >r3=q4,<c23=d3,#18 4507++vshll.u32 q4,d3,#18 4508++ 4509++# qhasm: c01 c23 = c01[0]c23[0]c01[2]c01[3]c01[1]c23[1]c23[2]c23[3] 4510++# asm 1: vtrn.32 <c01=reg128#1%bot,<c23=reg128#2%bot 4511++# asm 2: vtrn.32 <c01=d0,<c23=d2 4512++vtrn.32 d0,d2 4513++ 4514++# qhasm: r3[0,1] += x01[0] unsigned* z34[0]; r3[2,3] += x01[1] unsigned* z34[1] 4515++# asm 1: vmlal.u32 <r3=reg128#5,<x01=reg128#9%bot,<z34=reg128#6%bot 4516++# asm 2: vmlal.u32 <r3=q4,<x01=d16,<z34=d10 4517++vmlal.u32 q4,d16,d10 4518++ 4519++# qhasm: r3[0,1] += x01[2] unsigned* z12[2]; r3[2,3] += x01[3] unsigned* z12[3] 4520++# asm 1: vmlal.u32 <r3=reg128#5,<x01=reg128#9%top,<z12=reg128#3%top 4521++# asm 2: vmlal.u32 <r3=q4,<x01=d17,<z12=d5 4522++vmlal.u32 q4,d17,d5 4523++ 4524++# qhasm: r0 = r0[1]c01[0]r0[2,3] 4525++# asm 1: vext.32 <r0=reg128#8%bot,<r0=reg128#8%bot,<c01=reg128#1%bot,#1 4526++# asm 2: vext.32 <r0=d14,<r0=d14,<c01=d0,#1 4527++vext.32 d14,d14,d0,#1 4528++ 4529++# qhasm: r3[0,1] += x23[0] unsigned* z12[0]; r3[2,3] += x23[1] unsigned* z12[1] 4530++# asm 1: vmlal.u32 <r3=reg128#5,<x23=reg128#10%bot,<z12=reg128#3%bot 4531++# asm 2: vmlal.u32 <r3=q4,<x23=d18,<z12=d4 4532++vmlal.u32 q4,d18,d4 4533++ 4534++# qhasm: input_2 -= 64 4535++# asm 1: sub >input_2=int32#2,<input_2=int32#2,#64 4536++# asm 2: sub >input_2=r1,<input_2=r1,#64 4537++sub r1,r1,#64 4538++ 4539++# qhasm: r3[0,1] += x23[2] unsigned* z0[0]; r3[2,3] += x23[3] unsigned* z0[1] 4540++# asm 1: vmlal.u32 <r3=reg128#5,<x23=reg128#10%top,<z0=reg128#4%bot 4541++# asm 2: vmlal.u32 <r3=q4,<x23=d19,<z0=d6 4542++vmlal.u32 q4,d19,d6 4543++ 4544++# qhasm: ptr = &5z34_stack 4545++# asm 1: lea >ptr=int32#3,<5z34_stack=stack128#11 4546++# asm 2: lea >ptr=r2,<5z34_stack=[sp,#160] 4547++add r2,sp,#160 4548++ 4549++# qhasm: 5z34 aligned= mem128[ptr] 4550++# asm 1: vld1.8 {>5z34=reg128#6%bot->5z34=reg128#6%top},[<ptr=int32#3,: 128] 4551++# asm 2: vld1.8 {>5z34=d10->5z34=d11},[<ptr=r2,: 128] 4552++vld1.8 {d10-d11},[r2,: 128] 4553++ 4554++# qhasm: r3[0,1] += x4[0] unsigned* 5z34[2]; r3[2,3] += x4[1] unsigned* 5z34[3] 4555++# asm 1: vmlal.u32 <r3=reg128#5,<x4=reg128#11%bot,<5z34=reg128#6%top 4556++# asm 2: vmlal.u32 <r3=q4,<x4=d20,<5z34=d11 4557++vmlal.u32 q4,d20,d11 4558++ 4559++# qhasm: r0 = r0[1]r0[0]r0[3]r0[2] 4560++# asm 1: vrev64.i32 >r0=reg128#8,<r0=reg128#8 4561++# asm 2: vrev64.i32 >r0=q7,<r0=q7 4562++vrev64.i32 q7,q7 4563++ 4564++# qhasm: r2[0,1] = c01[2]<<12; r2[2,3] = c01[3]<<12 4565++# asm 1: vshll.u32 >r2=reg128#14,<c01=reg128#1%top,#12 4566++# asm 2: vshll.u32 >r2=q13,<c01=d1,#12 4567++vshll.u32 q13,d1,#12 4568++ 4569++# qhasm: d01 = mem128[input_2];input_2+=16 4570++# asm 1: vld1.8 {>d01=reg128#12%bot->d01=reg128#12%top},[<input_2=int32#2]! 4571++# asm 2: vld1.8 {>d01=d22->d01=d23},[<input_2=r1]! 4572++vld1.8 {d22-d23},[r1]! 4573++ 4574++# qhasm: r2[0,1] += x01[0] unsigned* z12[2]; r2[2,3] += x01[1] unsigned* z12[3] 4575++# asm 1: vmlal.u32 <r2=reg128#14,<x01=reg128#9%bot,<z12=reg128#3%top 4576++# asm 2: vmlal.u32 <r2=q13,<x01=d16,<z12=d5 4577++vmlal.u32 q13,d16,d5 4578++ 4579++# qhasm: r2[0,1] += x01[2] unsigned* z12[0]; r2[2,3] += x01[3] unsigned* z12[1] 4580++# asm 1: vmlal.u32 <r2=reg128#14,<x01=reg128#9%top,<z12=reg128#3%bot 4581++# asm 2: vmlal.u32 <r2=q13,<x01=d17,<z12=d4 4582++vmlal.u32 q13,d17,d4 4583++ 4584++# qhasm: r2[0,1] += x23[0] unsigned* z0[0]; r2[2,3] += x23[1] unsigned* z0[1] 4585++# asm 1: vmlal.u32 <r2=reg128#14,<x23=reg128#10%bot,<z0=reg128#4%bot 4586++# asm 2: vmlal.u32 <r2=q13,<x23=d18,<z0=d6 4587++vmlal.u32 q13,d18,d6 4588++ 4589++# qhasm: r2[0,1] += x23[2] unsigned* 5z34[2]; r2[2,3] += x23[3] unsigned* 5z34[3] 4590++# asm 1: vmlal.u32 <r2=reg128#14,<x23=reg128#10%top,<5z34=reg128#6%top 4591++# asm 2: vmlal.u32 <r2=q13,<x23=d19,<5z34=d11 4592++vmlal.u32 q13,d19,d11 4593++ 4594++# qhasm: r2[0,1] += x4[0] unsigned* 5z34[0]; r2[2,3] += x4[1] unsigned* 5z34[1] 4595++# asm 1: vmlal.u32 <r2=reg128#14,<x4=reg128#11%bot,<5z34=reg128#6%bot 4596++# asm 2: vmlal.u32 <r2=q13,<x4=d20,<5z34=d10 4597++vmlal.u32 q13,d20,d10 4598++ 4599++# qhasm: r0 = r0[0,1]c01[1]r0[2] 4600++# asm 1: vext.32 <r0=reg128#8%top,<c01=reg128#1%bot,<r0=reg128#8%top,#1 4601++# asm 2: vext.32 <r0=d15,<c01=d0,<r0=d15,#1 4602++vext.32 d15,d0,d15,#1 4603++ 4604++# qhasm: r1[0,1] = c23[0]<<6; r1[2,3] = c23[1]<<6 4605++# asm 1: vshll.u32 >r1=reg128#15,<c23=reg128#2%bot,#6 4606++# asm 2: vshll.u32 >r1=q14,<c23=d2,#6 4607++vshll.u32 q14,d2,#6 4608++ 4609++# qhasm: r1[0,1] += x01[0] unsigned* z12[0]; r1[2,3] += x01[1] unsigned* z12[1] 4610++# asm 1: vmlal.u32 <r1=reg128#15,<x01=reg128#9%bot,<z12=reg128#3%bot 4611++# asm 2: vmlal.u32 <r1=q14,<x01=d16,<z12=d4 4612++vmlal.u32 q14,d16,d4 4613++ 4614++# qhasm: r1[0,1] += x01[2] unsigned* z0[0]; r1[2,3] += x01[3] unsigned* z0[1] 4615++# asm 1: vmlal.u32 <r1=reg128#15,<x01=reg128#9%top,<z0=reg128#4%bot 4616++# asm 2: vmlal.u32 <r1=q14,<x01=d17,<z0=d6 4617++vmlal.u32 q14,d17,d6 4618++ 4619++# qhasm: r1[0,1] += x23[0] unsigned* 5z34[2]; r1[2,3] += x23[1] unsigned* 5z34[3] 4620++# asm 1: vmlal.u32 <r1=reg128#15,<x23=reg128#10%bot,<5z34=reg128#6%top 4621++# asm 2: vmlal.u32 <r1=q14,<x23=d18,<5z34=d11 4622++vmlal.u32 q14,d18,d11 4623++ 4624++# qhasm: r1[0,1] += x23[2] unsigned* 5z34[0]; r1[2,3] += x23[3] unsigned* 5z34[1] 4625++# asm 1: vmlal.u32 <r1=reg128#15,<x23=reg128#10%top,<5z34=reg128#6%bot 4626++# asm 2: vmlal.u32 <r1=q14,<x23=d19,<5z34=d10 4627++vmlal.u32 q14,d19,d10 4628++ 4629++# qhasm: ptr = &5z12_stack 4630++# asm 1: lea >ptr=int32#3,<5z12_stack=stack128#10 4631++# asm 2: lea >ptr=r2,<5z12_stack=[sp,#144] 4632++add r2,sp,#144 4633++ 4634++# qhasm: 5z12 aligned= mem128[ptr] 4635++# asm 1: vld1.8 {>5z12=reg128#1%bot->5z12=reg128#1%top},[<ptr=int32#3,: 128] 4636++# asm 2: vld1.8 {>5z12=d0->5z12=d1},[<ptr=r2,: 128] 4637++vld1.8 {d0-d1},[r2,: 128] 4638++ 4639++# qhasm: r1[0,1] += x4[0] unsigned* 5z12[2]; r1[2,3] += x4[1] unsigned* 5z12[3] 4640++# asm 1: vmlal.u32 <r1=reg128#15,<x4=reg128#11%bot,<5z12=reg128#1%top 4641++# asm 2: vmlal.u32 <r1=q14,<x4=d20,<5z12=d1 4642++vmlal.u32 q14,d20,d1 4643++ 4644++# qhasm: d23 = mem128[input_2];input_2+=16 4645++# asm 1: vld1.8 {>d23=reg128#2%bot->d23=reg128#2%top},[<input_2=int32#2]! 4646++# asm 2: vld1.8 {>d23=d2->d23=d3},[<input_2=r1]! 4647++vld1.8 {d2-d3},[r1]! 4648++ 4649++# qhasm: input_2 += 32 4650++# asm 1: add >input_2=int32#2,<input_2=int32#2,#32 4651++# asm 2: add >input_2=r1,<input_2=r1,#32 4652++add r1,r1,#32 4653++ 4654++# qhasm: r0[0,1] += x4[0] unsigned* 5z12[0]; r0[2,3] += x4[1] unsigned* 5z12[1] 4655++# asm 1: vmlal.u32 <r0=reg128#8,<x4=reg128#11%bot,<5z12=reg128#1%bot 4656++# asm 2: vmlal.u32 <r0=q7,<x4=d20,<5z12=d0 4657++vmlal.u32 q7,d20,d0 4658++ 4659++# qhasm: r0[0,1] += x23[0] unsigned* 5z34[0]; r0[2,3] += x23[1] unsigned* 5z34[1] 4660++# asm 1: vmlal.u32 <r0=reg128#8,<x23=reg128#10%bot,<5z34=reg128#6%bot 4661++# asm 2: vmlal.u32 <r0=q7,<x23=d18,<5z34=d10 4662++vmlal.u32 q7,d18,d10 4663++ 4664++# qhasm: d01 d23 = d01[0] d23[0] d01[1] d23[1] 4665++# asm 1: vswp <d23=reg128#2%bot,<d01=reg128#12%top 4666++# asm 2: vswp <d23=d2,<d01=d23 4667++vswp d2,d23 4668++ 4669++# qhasm: r0[0,1] += x23[2] unsigned* 5z12[2]; r0[2,3] += x23[3] unsigned* 5z12[3] 4670++# asm 1: vmlal.u32 <r0=reg128#8,<x23=reg128#10%top,<5z12=reg128#1%top 4671++# asm 2: vmlal.u32 <r0=q7,<x23=d19,<5z12=d1 4672++vmlal.u32 q7,d19,d1 4673++ 4674++# qhasm: r0[0,1] += x01[0] unsigned* z0[0]; r0[2,3] += x01[1] unsigned* z0[1] 4675++# asm 1: vmlal.u32 <r0=reg128#8,<x01=reg128#9%bot,<z0=reg128#4%bot 4676++# asm 2: vmlal.u32 <r0=q7,<x01=d16,<z0=d6 4677++vmlal.u32 q7,d16,d6 4678++ 4679++# qhasm: new mid 4680++ 4681++# qhasm: 2x v4 = d23 unsigned>> 40 4682++# asm 1: vshr.u64 >v4=reg128#4,<d23=reg128#2,#40 4683++# asm 2: vshr.u64 >v4=q3,<d23=q1,#40 4684++vshr.u64 q3,q1,#40 4685++ 4686++# qhasm: mid = d01[1]d23[0] mid[2,3] 4687++# asm 1: vext.32 <mid=reg128#1%bot,<d01=reg128#12%bot,<d23=reg128#2%bot,#1 4688++# asm 2: vext.32 <mid=d0,<d01=d22,<d23=d2,#1 4689++vext.32 d0,d22,d2,#1 4690++ 4691++# qhasm: new v23 4692++ 4693++# qhasm: v23[2] = d23[0,1] unsigned>> 14; v23[3] = d23[2,3] unsigned>> 14 4694++# asm 1: vshrn.u64 <v23=reg128#10%top,<d23=reg128#2,#14 4695++# asm 2: vshrn.u64 <v23=d19,<d23=q1,#14 4696++vshrn.u64 d19,q1,#14 4697++ 4698++# qhasm: mid = mid[0,1] d01[3]d23[2] 4699++# asm 1: vext.32 <mid=reg128#1%top,<d01=reg128#12%top,<d23=reg128#2%top,#1 4700++# asm 2: vext.32 <mid=d1,<d01=d23,<d23=d3,#1 4701++vext.32 d1,d23,d3,#1 4702++ 4703++# qhasm: new v01 4704++ 4705++# qhasm: v01[2] = d01[0,1] unsigned>> 26; v01[3] = d01[2,3] unsigned>> 26 4706++# asm 1: vshrn.u64 <v01=reg128#11%top,<d01=reg128#12,#26 4707++# asm 2: vshrn.u64 <v01=d21,<d01=q11,#26 4708++vshrn.u64 d21,q11,#26 4709++ 4710++# qhasm: v01 = d01[1]d01[0] v01[2,3] 4711++# asm 1: vext.32 <v01=reg128#11%bot,<d01=reg128#12%bot,<d01=reg128#12%bot,#1 4712++# asm 2: vext.32 <v01=d20,<d01=d22,<d01=d22,#1 4713++vext.32 d20,d22,d22,#1 4714++ 4715++# qhasm: r0[0,1] += x01[2] unsigned* 5z34[2]; r0[2,3] += x01[3] unsigned* 5z34[3] 4716++# asm 1: vmlal.u32 <r0=reg128#8,<x01=reg128#9%top,<5z34=reg128#6%top 4717++# asm 2: vmlal.u32 <r0=q7,<x01=d17,<5z34=d11 4718++vmlal.u32 q7,d17,d11 4719++ 4720++# qhasm: v01 = v01[1]d01[2] v01[2,3] 4721++# asm 1: vext.32 <v01=reg128#11%bot,<v01=reg128#11%bot,<d01=reg128#12%top,#1 4722++# asm 2: vext.32 <v01=d20,<v01=d20,<d01=d23,#1 4723++vext.32 d20,d20,d23,#1 4724++ 4725++# qhasm: v23[0] = mid[0,1] unsigned>> 20; v23[1] = mid[2,3] unsigned>> 20 4726++# asm 1: vshrn.u64 <v23=reg128#10%bot,<mid=reg128#1,#20 4727++# asm 2: vshrn.u64 <v23=d18,<mid=q0,#20 4728++vshrn.u64 d18,q0,#20 4729++ 4730++# qhasm: v4 = v4[0]v4[2]v4[1]v4[3] 4731++# asm 1: vtrn.32 <v4=reg128#4%bot,<v4=reg128#4%top 4732++# asm 2: vtrn.32 <v4=d6,<v4=d7 4733++vtrn.32 d6,d7 4734++ 4735++# qhasm: 4x v01 &= 0x03ffffff 4736++# asm 1: vand.i32 <v01=reg128#11,#0x03ffffff 4737++# asm 2: vand.i32 <v01=q10,#0x03ffffff 4738++vand.i32 q10,#0x03ffffff 4739++ 4740++# qhasm: ptr = &y34_stack 4741++# asm 1: lea >ptr=int32#3,<y34_stack=stack128#4 4742++# asm 2: lea >ptr=r2,<y34_stack=[sp,#48] 4743++add r2,sp,#48 4744++ 4745++# qhasm: y34 aligned= mem128[ptr] 4746++# asm 1: vld1.8 {>y34=reg128#3%bot->y34=reg128#3%top},[<ptr=int32#3,: 128] 4747++# asm 2: vld1.8 {>y34=d4->y34=d5},[<ptr=r2,: 128] 4748++vld1.8 {d4-d5},[r2,: 128] 4749++ 4750++# qhasm: 4x v23 &= 0x03ffffff 4751++# asm 1: vand.i32 <v23=reg128#10,#0x03ffffff 4752++# asm 2: vand.i32 <v23=q9,#0x03ffffff 4753++vand.i32 q9,#0x03ffffff 4754++ 4755++# qhasm: ptr = &y12_stack 4756++# asm 1: lea >ptr=int32#3,<y12_stack=stack128#3 4757++# asm 2: lea >ptr=r2,<y12_stack=[sp,#32] 4758++add r2,sp,#32 4759++ 4760++# qhasm: y12 aligned= mem128[ptr] 4761++# asm 1: vld1.8 {>y12=reg128#2%bot->y12=reg128#2%top},[<ptr=int32#3,: 128] 4762++# asm 2: vld1.8 {>y12=d2->y12=d3},[<ptr=r2,: 128] 4763++vld1.8 {d2-d3},[r2,: 128] 4764++ 4765++# qhasm: 4x v4 |= 0x01000000 4766++# asm 1: vorr.i32 <v4=reg128#4,#0x01000000 4767++# asm 2: vorr.i32 <v4=q3,#0x01000000 4768++vorr.i32 q3,#0x01000000 4769++ 4770++# qhasm: ptr = &y0_stack 4771++# asm 1: lea >ptr=int32#3,<y0_stack=stack128#2 4772++# asm 2: lea >ptr=r2,<y0_stack=[sp,#16] 4773++add r2,sp,#16 4774++ 4775++# qhasm: y0 aligned= mem128[ptr] 4776++# asm 1: vld1.8 {>y0=reg128#1%bot->y0=reg128#1%top},[<ptr=int32#3,: 128] 4777++# asm 2: vld1.8 {>y0=d0->y0=d1},[<ptr=r2,: 128] 4778++vld1.8 {d0-d1},[r2,: 128] 4779++ 4780++# qhasm: r4[0,1] += v01[0] unsigned* y34[2]; r4[2,3] += v01[1] unsigned* y34[3] 4781++# asm 1: vmlal.u32 <r4=reg128#16,<v01=reg128#11%bot,<y34=reg128#3%top 4782++# asm 2: vmlal.u32 <r4=q15,<v01=d20,<y34=d5 4783++vmlal.u32 q15,d20,d5 4784++ 4785++# qhasm: r4[0,1] += v01[2] unsigned* y34[0]; r4[2,3] += v01[3] unsigned* y34[1] 4786++# asm 1: vmlal.u32 <r4=reg128#16,<v01=reg128#11%top,<y34=reg128#3%bot 4787++# asm 2: vmlal.u32 <r4=q15,<v01=d21,<y34=d4 4788++vmlal.u32 q15,d21,d4 4789++ 4790++# qhasm: r4[0,1] += v23[0] unsigned* y12[2]; r4[2,3] += v23[1] unsigned* y12[3] 4791++# asm 1: vmlal.u32 <r4=reg128#16,<v23=reg128#10%bot,<y12=reg128#2%top 4792++# asm 2: vmlal.u32 <r4=q15,<v23=d18,<y12=d3 4793++vmlal.u32 q15,d18,d3 4794++ 4795++# qhasm: r4[0,1] += v23[2] unsigned* y12[0]; r4[2,3] += v23[3] unsigned* y12[1] 4796++# asm 1: vmlal.u32 <r4=reg128#16,<v23=reg128#10%top,<y12=reg128#2%bot 4797++# asm 2: vmlal.u32 <r4=q15,<v23=d19,<y12=d2 4798++vmlal.u32 q15,d19,d2 4799++ 4800++# qhasm: r4[0,1] += v4[0] unsigned* y0[0]; r4[2,3] += v4[1] unsigned* y0[1] 4801++# asm 1: vmlal.u32 <r4=reg128#16,<v4=reg128#4%bot,<y0=reg128#1%bot 4802++# asm 2: vmlal.u32 <r4=q15,<v4=d6,<y0=d0 4803++vmlal.u32 q15,d6,d0 4804++ 4805++# qhasm: ptr = &5y34_stack 4806++# asm 1: lea >ptr=int32#3,<5y34_stack=stack128#6 4807++# asm 2: lea >ptr=r2,<5y34_stack=[sp,#80] 4808++add r2,sp,#80 4809++ 4810++# qhasm: 5y34 aligned= mem128[ptr] 4811++# asm 1: vld1.8 {>5y34=reg128#13%bot->5y34=reg128#13%top},[<ptr=int32#3,: 128] 4812++# asm 2: vld1.8 {>5y34=d24->5y34=d25},[<ptr=r2,: 128] 4813++vld1.8 {d24-d25},[r2,: 128] 4814++ 4815++# qhasm: r3[0,1] += v01[0] unsigned* y34[0]; r3[2,3] += v01[1] unsigned* y34[1] 4816++# asm 1: vmlal.u32 <r3=reg128#5,<v01=reg128#11%bot,<y34=reg128#3%bot 4817++# asm 2: vmlal.u32 <r3=q4,<v01=d20,<y34=d4 4818++vmlal.u32 q4,d20,d4 4819++ 4820++# qhasm: r3[0,1] += v01[2] unsigned* y12[2]; r3[2,3] += v01[3] unsigned* y12[3] 4821++# asm 1: vmlal.u32 <r3=reg128#5,<v01=reg128#11%top,<y12=reg128#2%top 4822++# asm 2: vmlal.u32 <r3=q4,<v01=d21,<y12=d3 4823++vmlal.u32 q4,d21,d3 4824++ 4825++# qhasm: r3[0,1] += v23[0] unsigned* y12[0]; r3[2,3] += v23[1] unsigned* y12[1] 4826++# asm 1: vmlal.u32 <r3=reg128#5,<v23=reg128#10%bot,<y12=reg128#2%bot 4827++# asm 2: vmlal.u32 <r3=q4,<v23=d18,<y12=d2 4828++vmlal.u32 q4,d18,d2 4829++ 4830++# qhasm: r3[0,1] += v23[2] unsigned* y0[0]; r3[2,3] += v23[3] unsigned* y0[1] 4831++# asm 1: vmlal.u32 <r3=reg128#5,<v23=reg128#10%top,<y0=reg128#1%bot 4832++# asm 2: vmlal.u32 <r3=q4,<v23=d19,<y0=d0 4833++vmlal.u32 q4,d19,d0 4834++ 4835++# qhasm: r3[0,1] += v4[0] unsigned* 5y34[2]; r3[2,3] += v4[1] unsigned* 5y34[3] 4836++# asm 1: vmlal.u32 <r3=reg128#5,<v4=reg128#4%bot,<5y34=reg128#13%top 4837++# asm 2: vmlal.u32 <r3=q4,<v4=d6,<5y34=d25 4838++vmlal.u32 q4,d6,d25 4839++ 4840++# qhasm: ptr = &5y12_stack 4841++# asm 1: lea >ptr=int32#3,<5y12_stack=stack128#5 4842++# asm 2: lea >ptr=r2,<5y12_stack=[sp,#64] 4843++add r2,sp,#64 4844++ 4845++# qhasm: 5y12 aligned= mem128[ptr] 4846++# asm 1: vld1.8 {>5y12=reg128#12%bot->5y12=reg128#12%top},[<ptr=int32#3,: 128] 4847++# asm 2: vld1.8 {>5y12=d22->5y12=d23},[<ptr=r2,: 128] 4848++vld1.8 {d22-d23},[r2,: 128] 4849++ 4850++# qhasm: r0[0,1] += v4[0] unsigned* 5y12[0]; r0[2,3] += v4[1] unsigned* 5y12[1] 4851++# asm 1: vmlal.u32 <r0=reg128#8,<v4=reg128#4%bot,<5y12=reg128#12%bot 4852++# asm 2: vmlal.u32 <r0=q7,<v4=d6,<5y12=d22 4853++vmlal.u32 q7,d6,d22 4854++ 4855++# qhasm: r0[0,1] += v23[0] unsigned* 5y34[0]; r0[2,3] += v23[1] unsigned* 5y34[1] 4856++# asm 1: vmlal.u32 <r0=reg128#8,<v23=reg128#10%bot,<5y34=reg128#13%bot 4857++# asm 2: vmlal.u32 <r0=q7,<v23=d18,<5y34=d24 4858++vmlal.u32 q7,d18,d24 4859++ 4860++# qhasm: r0[0,1] += v23[2] unsigned* 5y12[2]; r0[2,3] += v23[3] unsigned* 5y12[3] 4861++# asm 1: vmlal.u32 <r0=reg128#8,<v23=reg128#10%top,<5y12=reg128#12%top 4862++# asm 2: vmlal.u32 <r0=q7,<v23=d19,<5y12=d23 4863++vmlal.u32 q7,d19,d23 4864++ 4865++# qhasm: r0[0,1] += v01[0] unsigned* y0[0]; r0[2,3] += v01[1] unsigned* y0[1] 4866++# asm 1: vmlal.u32 <r0=reg128#8,<v01=reg128#11%bot,<y0=reg128#1%bot 4867++# asm 2: vmlal.u32 <r0=q7,<v01=d20,<y0=d0 4868++vmlal.u32 q7,d20,d0 4869++ 4870++# qhasm: r0[0,1] += v01[2] unsigned* 5y34[2]; r0[2,3] += v01[3] unsigned* 5y34[3] 4871++# asm 1: vmlal.u32 <r0=reg128#8,<v01=reg128#11%top,<5y34=reg128#13%top 4872++# asm 2: vmlal.u32 <r0=q7,<v01=d21,<5y34=d25 4873++vmlal.u32 q7,d21,d25 4874++ 4875++# qhasm: r1[0,1] += v01[0] unsigned* y12[0]; r1[2,3] += v01[1] unsigned* y12[1] 4876++# asm 1: vmlal.u32 <r1=reg128#15,<v01=reg128#11%bot,<y12=reg128#2%bot 4877++# asm 2: vmlal.u32 <r1=q14,<v01=d20,<y12=d2 4878++vmlal.u32 q14,d20,d2 4879++ 4880++# qhasm: r1[0,1] += v01[2] unsigned* y0[0]; r1[2,3] += v01[3] unsigned* y0[1] 4881++# asm 1: vmlal.u32 <r1=reg128#15,<v01=reg128#11%top,<y0=reg128#1%bot 4882++# asm 2: vmlal.u32 <r1=q14,<v01=d21,<y0=d0 4883++vmlal.u32 q14,d21,d0 4884++ 4885++# qhasm: r1[0,1] += v23[0] unsigned* 5y34[2]; r1[2,3] += v23[1] unsigned* 5y34[3] 4886++# asm 1: vmlal.u32 <r1=reg128#15,<v23=reg128#10%bot,<5y34=reg128#13%top 4887++# asm 2: vmlal.u32 <r1=q14,<v23=d18,<5y34=d25 4888++vmlal.u32 q14,d18,d25 4889++ 4890++# qhasm: r1[0,1] += v23[2] unsigned* 5y34[0]; r1[2,3] += v23[3] unsigned* 5y34[1] 4891++# asm 1: vmlal.u32 <r1=reg128#15,<v23=reg128#10%top,<5y34=reg128#13%bot 4892++# asm 2: vmlal.u32 <r1=q14,<v23=d19,<5y34=d24 4893++vmlal.u32 q14,d19,d24 4894++ 4895++# qhasm: r1[0,1] += v4[0] unsigned* 5y12[2]; r1[2,3] += v4[1] unsigned* 5y12[3] 4896++# asm 1: vmlal.u32 <r1=reg128#15,<v4=reg128#4%bot,<5y12=reg128#12%top 4897++# asm 2: vmlal.u32 <r1=q14,<v4=d6,<5y12=d23 4898++vmlal.u32 q14,d6,d23 4899++ 4900++# qhasm: r2[0,1] += v01[0] unsigned* y12[2]; r2[2,3] += v01[1] unsigned* y12[3] 4901++# asm 1: vmlal.u32 <r2=reg128#14,<v01=reg128#11%bot,<y12=reg128#2%top 4902++# asm 2: vmlal.u32 <r2=q13,<v01=d20,<y12=d3 4903++vmlal.u32 q13,d20,d3 4904++ 4905++# qhasm: r2[0,1] += v01[2] unsigned* y12[0]; r2[2,3] += v01[3] unsigned* y12[1] 4906++# asm 1: vmlal.u32 <r2=reg128#14,<v01=reg128#11%top,<y12=reg128#2%bot 4907++# asm 2: vmlal.u32 <r2=q13,<v01=d21,<y12=d2 4908++vmlal.u32 q13,d21,d2 4909++ 4910++# qhasm: r2[0,1] += v23[0] unsigned* y0[0]; r2[2,3] += v23[1] unsigned* y0[1] 4911++# asm 1: vmlal.u32 <r2=reg128#14,<v23=reg128#10%bot,<y0=reg128#1%bot 4912++# asm 2: vmlal.u32 <r2=q13,<v23=d18,<y0=d0 4913++vmlal.u32 q13,d18,d0 4914++ 4915++# qhasm: r2[0,1] += v23[2] unsigned* 5y34[2]; r2[2,3] += v23[3] unsigned* 5y34[3] 4916++# asm 1: vmlal.u32 <r2=reg128#14,<v23=reg128#10%top,<5y34=reg128#13%top 4917++# asm 2: vmlal.u32 <r2=q13,<v23=d19,<5y34=d25 4918++vmlal.u32 q13,d19,d25 4919++ 4920++# qhasm: r2[0,1] += v4[0] unsigned* 5y34[0]; r2[2,3] += v4[1] unsigned* 5y34[1] 4921++# asm 1: vmlal.u32 <r2=reg128#14,<v4=reg128#4%bot,<5y34=reg128#13%bot 4922++# asm 2: vmlal.u32 <r2=q13,<v4=d6,<5y34=d24 4923++vmlal.u32 q13,d6,d24 4924++ 4925++# qhasm: ptr = &two24 4926++# asm 1: lea >ptr=int32#3,<two24=stack128#1 4927++# asm 2: lea >ptr=r2,<two24=[sp,#0] 4928++add r2,sp,#0 4929++ 4930++# qhasm: 2x t1 = r0 unsigned>> 26 4931++# asm 1: vshr.u64 >t1=reg128#4,<r0=reg128#8,#26 4932++# asm 2: vshr.u64 >t1=q3,<r0=q7,#26 4933++vshr.u64 q3,q7,#26 4934++ 4935++# qhasm: len -= 64 4936++# asm 1: sub >len=int32#4,<len=int32#4,#64 4937++# asm 2: sub >len=r3,<len=r3,#64 4938++sub r3,r3,#64 4939++ 4940++# qhasm: r0 &= mask 4941++# asm 1: vand >r0=reg128#6,<r0=reg128#8,<mask=reg128#7 4942++# asm 2: vand >r0=q5,<r0=q7,<mask=q6 4943++vand q5,q7,q6 4944++ 4945++# qhasm: 2x r1 += t1 4946++# asm 1: vadd.i64 >r1=reg128#4,<r1=reg128#15,<t1=reg128#4 4947++# asm 2: vadd.i64 >r1=q3,<r1=q14,<t1=q3 4948++vadd.i64 q3,q14,q3 4949++ 4950++# qhasm: 2x t4 = r3 unsigned>> 26 4951++# asm 1: vshr.u64 >t4=reg128#8,<r3=reg128#5,#26 4952++# asm 2: vshr.u64 >t4=q7,<r3=q4,#26 4953++vshr.u64 q7,q4,#26 4954++ 4955++# qhasm: r3 &= mask 4956++# asm 1: vand >r3=reg128#5,<r3=reg128#5,<mask=reg128#7 4957++# asm 2: vand >r3=q4,<r3=q4,<mask=q6 4958++vand q4,q4,q6 4959++ 4960++# qhasm: 2x x4 = r4 + t4 4961++# asm 1: vadd.i64 >x4=reg128#8,<r4=reg128#16,<t4=reg128#8 4962++# asm 2: vadd.i64 >x4=q7,<r4=q15,<t4=q7 4963++vadd.i64 q7,q15,q7 4964++ 4965++# qhasm: r4 aligned= mem128[ptr] 4966++# asm 1: vld1.8 {>r4=reg128#16%bot->r4=reg128#16%top},[<ptr=int32#3,: 128] 4967++# asm 2: vld1.8 {>r4=d30->r4=d31},[<ptr=r2,: 128] 4968++vld1.8 {d30-d31},[r2,: 128] 4969++ 4970++# qhasm: 2x t2 = r1 unsigned>> 26 4971++# asm 1: vshr.u64 >t2=reg128#9,<r1=reg128#4,#26 4972++# asm 2: vshr.u64 >t2=q8,<r1=q3,#26 4973++vshr.u64 q8,q3,#26 4974++ 4975++# qhasm: r1 &= mask 4976++# asm 1: vand >r1=reg128#4,<r1=reg128#4,<mask=reg128#7 4977++# asm 2: vand >r1=q3,<r1=q3,<mask=q6 4978++vand q3,q3,q6 4979++ 4980++# qhasm: 2x t0 = x4 unsigned>> 26 4981++# asm 1: vshr.u64 >t0=reg128#10,<x4=reg128#8,#26 4982++# asm 2: vshr.u64 >t0=q9,<x4=q7,#26 4983++vshr.u64 q9,q7,#26 4984++ 4985++# qhasm: 2x r2 += t2 4986++# asm 1: vadd.i64 >r2=reg128#9,<r2=reg128#14,<t2=reg128#9 4987++# asm 2: vadd.i64 >r2=q8,<r2=q13,<t2=q8 4988++vadd.i64 q8,q13,q8 4989++ 4990++# qhasm: x4 &= mask 4991++# asm 1: vand >x4=reg128#11,<x4=reg128#8,<mask=reg128#7 4992++# asm 2: vand >x4=q10,<x4=q7,<mask=q6 4993++vand q10,q7,q6 4994++ 4995++# qhasm: 2x x01 = r0 + t0 4996++# asm 1: vadd.i64 >x01=reg128#6,<r0=reg128#6,<t0=reg128#10 4997++# asm 2: vadd.i64 >x01=q5,<r0=q5,<t0=q9 4998++vadd.i64 q5,q5,q9 4999++ 5000++# qhasm: r0 aligned= mem128[ptr] 5001++# asm 1: vld1.8 {>r0=reg128#8%bot->r0=reg128#8%top},[<ptr=int32#3,: 128] 5002++# asm 2: vld1.8 {>r0=d14->r0=d15},[<ptr=r2,: 128] 5003++vld1.8 {d14-d15},[r2,: 128] 5004++ 5005++# qhasm: ptr = &z34_stack 5006++# asm 1: lea >ptr=int32#3,<z34_stack=stack128#9 5007++# asm 2: lea >ptr=r2,<z34_stack=[sp,#128] 5008++add r2,sp,#128 5009++ 5010++# qhasm: 2x t0 <<= 2 5011++# asm 1: vshl.i64 >t0=reg128#10,<t0=reg128#10,#2 5012++# asm 2: vshl.i64 >t0=q9,<t0=q9,#2 5013++vshl.i64 q9,q9,#2 5014++ 5015++# qhasm: 2x t3 = r2 unsigned>> 26 5016++# asm 1: vshr.u64 >t3=reg128#14,<r2=reg128#9,#26 5017++# asm 2: vshr.u64 >t3=q13,<r2=q8,#26 5018++vshr.u64 q13,q8,#26 5019++ 5020++# qhasm: 2x x01 += t0 5021++# asm 1: vadd.i64 >x01=reg128#15,<x01=reg128#6,<t0=reg128#10 5022++# asm 2: vadd.i64 >x01=q14,<x01=q5,<t0=q9 5023++vadd.i64 q14,q5,q9 5024++ 5025++# qhasm: z34 aligned= mem128[ptr] 5026++# asm 1: vld1.8 {>z34=reg128#6%bot->z34=reg128#6%top},[<ptr=int32#3,: 128] 5027++# asm 2: vld1.8 {>z34=d10->z34=d11},[<ptr=r2,: 128] 5028++vld1.8 {d10-d11},[r2,: 128] 5029++ 5030++# qhasm: x23 = r2 & mask 5031++# asm 1: vand >x23=reg128#10,<r2=reg128#9,<mask=reg128#7 5032++# asm 2: vand >x23=q9,<r2=q8,<mask=q6 5033++vand q9,q8,q6 5034++ 5035++# qhasm: 2x r3 += t3 5036++# asm 1: vadd.i64 >r3=reg128#5,<r3=reg128#5,<t3=reg128#14 5037++# asm 2: vadd.i64 >r3=q4,<r3=q4,<t3=q13 5038++vadd.i64 q4,q4,q13 5039++ 5040++# qhasm: input_2 += 32 5041++# asm 1: add >input_2=int32#2,<input_2=int32#2,#32 5042++# asm 2: add >input_2=r1,<input_2=r1,#32 5043++add r1,r1,#32 5044++ 5045++# qhasm: 2x t1 = x01 unsigned>> 26 5046++# asm 1: vshr.u64 >t1=reg128#14,<x01=reg128#15,#26 5047++# asm 2: vshr.u64 >t1=q13,<x01=q14,#26 5048++vshr.u64 q13,q14,#26 5049++ 5050++# qhasm: x23 = x23[0,2,1,3] 5051++# asm 1: vtrn.32 <x23=reg128#10%bot,<x23=reg128#10%top 5052++# asm 2: vtrn.32 <x23=d18,<x23=d19 5053++vtrn.32 d18,d19 5054++ 5055++# qhasm: x01 = x01 & mask 5056++# asm 1: vand >x01=reg128#9,<x01=reg128#15,<mask=reg128#7 5057++# asm 2: vand >x01=q8,<x01=q14,<mask=q6 5058++vand q8,q14,q6 5059++ 5060++# qhasm: 2x r1 += t1 5061++# asm 1: vadd.i64 >r1=reg128#4,<r1=reg128#4,<t1=reg128#14 5062++# asm 2: vadd.i64 >r1=q3,<r1=q3,<t1=q13 5063++vadd.i64 q3,q3,q13 5064++ 5065++# qhasm: 2x t4 = r3 unsigned>> 26 5066++# asm 1: vshr.u64 >t4=reg128#14,<r3=reg128#5,#26 5067++# asm 2: vshr.u64 >t4=q13,<r3=q4,#26 5068++vshr.u64 q13,q4,#26 5069++ 5070++# qhasm: x01 = x01[0,2,1,3] 5071++# asm 1: vtrn.32 <x01=reg128#9%bot,<x01=reg128#9%top 5072++# asm 2: vtrn.32 <x01=d16,<x01=d17 5073++vtrn.32 d16,d17 5074++ 5075++# qhasm: r3 &= mask 5076++# asm 1: vand >r3=reg128#5,<r3=reg128#5,<mask=reg128#7 5077++# asm 2: vand >r3=q4,<r3=q4,<mask=q6 5078++vand q4,q4,q6 5079++ 5080++# qhasm: r1 = r1[0,2,1,3] 5081++# asm 1: vtrn.32 <r1=reg128#4%bot,<r1=reg128#4%top 5082++# asm 2: vtrn.32 <r1=d6,<r1=d7 5083++vtrn.32 d6,d7 5084++ 5085++# qhasm: 2x x4 += t4 5086++# asm 1: vadd.i64 >x4=reg128#11,<x4=reg128#11,<t4=reg128#14 5087++# asm 2: vadd.i64 >x4=q10,<x4=q10,<t4=q13 5088++vadd.i64 q10,q10,q13 5089++ 5090++# qhasm: r3 = r3[0,2,1,3] 5091++# asm 1: vtrn.32 <r3=reg128#5%bot,<r3=reg128#5%top 5092++# asm 2: vtrn.32 <r3=d8,<r3=d9 5093++vtrn.32 d8,d9 5094++ 5095++# qhasm: x01 = x01[0,1] r1[0,1] 5096++# asm 1: vext.32 <x01=reg128#9%top,<r1=reg128#4%bot,<r1=reg128#4%bot,#0 5097++# asm 2: vext.32 <x01=d17,<r1=d6,<r1=d6,#0 5098++vext.32 d17,d6,d6,#0 5099++ 5100++# qhasm: x23 = x23[0,1] r3[0,1] 5101++# asm 1: vext.32 <x23=reg128#10%top,<r3=reg128#5%bot,<r3=reg128#5%bot,#0 5102++# asm 2: vext.32 <x23=d19,<r3=d8,<r3=d8,#0 5103++vext.32 d19,d8,d8,#0 5104++ 5105++# qhasm: x4 = x4[0,2,1,3] 5106++# asm 1: vtrn.32 <x4=reg128#11%bot,<x4=reg128#11%top 5107++# asm 2: vtrn.32 <x4=d20,<x4=d21 5108++vtrn.32 d20,d21 5109++ 5110++# qhasm: unsigned>? len - 64 5111++# asm 1: cmp <len=int32#4,#64 5112++# asm 2: cmp <len=r3,#64 5113++cmp r3,#64 5114++ 5115++# qhasm: goto mainloop2 if unsigned> 5116++bhi ._mainloop2 5117++ 5118++# qhasm: input_2 -= 32 5119++# asm 1: sub >input_2=int32#3,<input_2=int32#2,#32 5120++# asm 2: sub >input_2=r2,<input_2=r1,#32 5121++sub r2,r1,#32 5122++ 5123++# qhasm: below64bytes: 5124++._below64bytes: 5125++ 5126++# qhasm: unsigned>? len - 32 5127++# asm 1: cmp <len=int32#4,#32 5128++# asm 2: cmp <len=r3,#32 5129++cmp r3,#32 5130++ 5131++# qhasm: goto end if !unsigned> 5132++bls ._end 5133++ 5134++# qhasm: mainloop: 5135++._mainloop: 5136++ 5137++# qhasm: new r0 5138++ 5139++# qhasm: ptr = &two24 5140++# asm 1: lea >ptr=int32#2,<two24=stack128#1 5141++# asm 2: lea >ptr=r1,<two24=[sp,#0] 5142++add r1,sp,#0 5143++ 5144++# qhasm: r4 aligned= mem128[ptr] 5145++# asm 1: vld1.8 {>r4=reg128#5%bot->r4=reg128#5%top},[<ptr=int32#2,: 128] 5146++# asm 2: vld1.8 {>r4=d8->r4=d9},[<ptr=r1,: 128] 5147++vld1.8 {d8-d9},[r1,: 128] 5148++ 5149++# qhasm: u4 aligned= mem128[ptr] 5150++# asm 1: vld1.8 {>u4=reg128#6%bot->u4=reg128#6%top},[<ptr=int32#2,: 128] 5151++# asm 2: vld1.8 {>u4=d10->u4=d11},[<ptr=r1,: 128] 5152++vld1.8 {d10-d11},[r1,: 128] 5153++ 5154++# qhasm: c01 = mem128[input_2];input_2+=16 5155++# asm 1: vld1.8 {>c01=reg128#8%bot->c01=reg128#8%top},[<input_2=int32#3]! 5156++# asm 2: vld1.8 {>c01=d14->c01=d15},[<input_2=r2]! 5157++vld1.8 {d14-d15},[r2]! 5158++ 5159++# qhasm: r4[0,1] += x01[0] unsigned* y34[2]; r4[2,3] += x01[1] unsigned* y34[3] 5160++# asm 1: vmlal.u32 <r4=reg128#5,<x01=reg128#9%bot,<y34=reg128#3%top 5161++# asm 2: vmlal.u32 <r4=q4,<x01=d16,<y34=d5 5162++vmlal.u32 q4,d16,d5 5163++ 5164++# qhasm: c23 = mem128[input_2];input_2+=16 5165++# asm 1: vld1.8 {>c23=reg128#14%bot->c23=reg128#14%top},[<input_2=int32#3]! 5166++# asm 2: vld1.8 {>c23=d26->c23=d27},[<input_2=r2]! 5167++vld1.8 {d26-d27},[r2]! 5168++ 5169++# qhasm: r4[0,1] += x01[2] unsigned* y34[0]; r4[2,3] += x01[3] unsigned* y34[1] 5170++# asm 1: vmlal.u32 <r4=reg128#5,<x01=reg128#9%top,<y34=reg128#3%bot 5171++# asm 2: vmlal.u32 <r4=q4,<x01=d17,<y34=d4 5172++vmlal.u32 q4,d17,d4 5173++ 5174++# qhasm: r0 = u4[1]c01[0]r0[2,3] 5175++# asm 1: vext.32 <r0=reg128#4%bot,<u4=reg128#6%bot,<c01=reg128#8%bot,#1 5176++# asm 2: vext.32 <r0=d6,<u4=d10,<c01=d14,#1 5177++vext.32 d6,d10,d14,#1 5178++ 5179++# qhasm: r4[0,1] += x23[0] unsigned* y12[2]; r4[2,3] += x23[1] unsigned* y12[3] 5180++# asm 1: vmlal.u32 <r4=reg128#5,<x23=reg128#10%bot,<y12=reg128#2%top 5181++# asm 2: vmlal.u32 <r4=q4,<x23=d18,<y12=d3 5182++vmlal.u32 q4,d18,d3 5183++ 5184++# qhasm: r0 = r0[0,1]u4[1]c23[0] 5185++# asm 1: vext.32 <r0=reg128#4%top,<u4=reg128#6%bot,<c23=reg128#14%bot,#1 5186++# asm 2: vext.32 <r0=d7,<u4=d10,<c23=d26,#1 5187++vext.32 d7,d10,d26,#1 5188++ 5189++# qhasm: r4[0,1] += x23[2] unsigned* y12[0]; r4[2,3] += x23[3] unsigned* y12[1] 5190++# asm 1: vmlal.u32 <r4=reg128#5,<x23=reg128#10%top,<y12=reg128#2%bot 5191++# asm 2: vmlal.u32 <r4=q4,<x23=d19,<y12=d2 5192++vmlal.u32 q4,d19,d2 5193++ 5194++# qhasm: r0 = r0[1]r0[0]r0[3]r0[2] 5195++# asm 1: vrev64.i32 >r0=reg128#4,<r0=reg128#4 5196++# asm 2: vrev64.i32 >r0=q3,<r0=q3 5197++vrev64.i32 q3,q3 5198++ 5199++# qhasm: r4[0,1] += x4[0] unsigned* y0[0]; r4[2,3] += x4[1] unsigned* y0[1] 5200++# asm 1: vmlal.u32 <r4=reg128#5,<x4=reg128#11%bot,<y0=reg128#1%bot 5201++# asm 2: vmlal.u32 <r4=q4,<x4=d20,<y0=d0 5202++vmlal.u32 q4,d20,d0 5203++ 5204++# qhasm: r0[0,1] += x4[0] unsigned* 5y12[0]; r0[2,3] += x4[1] unsigned* 5y12[1] 5205++# asm 1: vmlal.u32 <r0=reg128#4,<x4=reg128#11%bot,<5y12=reg128#12%bot 5206++# asm 2: vmlal.u32 <r0=q3,<x4=d20,<5y12=d22 5207++vmlal.u32 q3,d20,d22 5208++ 5209++# qhasm: r0[0,1] += x23[0] unsigned* 5y34[0]; r0[2,3] += x23[1] unsigned* 5y34[1] 5210++# asm 1: vmlal.u32 <r0=reg128#4,<x23=reg128#10%bot,<5y34=reg128#13%bot 5211++# asm 2: vmlal.u32 <r0=q3,<x23=d18,<5y34=d24 5212++vmlal.u32 q3,d18,d24 5213++ 5214++# qhasm: r0[0,1] += x23[2] unsigned* 5y12[2]; r0[2,3] += x23[3] unsigned* 5y12[3] 5215++# asm 1: vmlal.u32 <r0=reg128#4,<x23=reg128#10%top,<5y12=reg128#12%top 5216++# asm 2: vmlal.u32 <r0=q3,<x23=d19,<5y12=d23 5217++vmlal.u32 q3,d19,d23 5218++ 5219++# qhasm: c01 c23 = c01[0]c23[0]c01[2]c23[2]c01[1]c23[1]c01[3]c23[3] 5220++# asm 1: vtrn.32 <c01=reg128#8,<c23=reg128#14 5221++# asm 2: vtrn.32 <c01=q7,<c23=q13 5222++vtrn.32 q7,q13 5223++ 5224++# qhasm: r0[0,1] += x01[0] unsigned* y0[0]; r0[2,3] += x01[1] unsigned* y0[1] 5225++# asm 1: vmlal.u32 <r0=reg128#4,<x01=reg128#9%bot,<y0=reg128#1%bot 5226++# asm 2: vmlal.u32 <r0=q3,<x01=d16,<y0=d0 5227++vmlal.u32 q3,d16,d0 5228++ 5229++# qhasm: r3[0,1] = c23[2]<<18; r3[2,3] = c23[3]<<18 5230++# asm 1: vshll.u32 >r3=reg128#6,<c23=reg128#14%top,#18 5231++# asm 2: vshll.u32 >r3=q5,<c23=d27,#18 5232++vshll.u32 q5,d27,#18 5233++ 5234++# qhasm: r0[0,1] += x01[2] unsigned* 5y34[2]; r0[2,3] += x01[3] unsigned* 5y34[3] 5235++# asm 1: vmlal.u32 <r0=reg128#4,<x01=reg128#9%top,<5y34=reg128#13%top 5236++# asm 2: vmlal.u32 <r0=q3,<x01=d17,<5y34=d25 5237++vmlal.u32 q3,d17,d25 5238++ 5239++# qhasm: r3[0,1] += x01[0] unsigned* y34[0]; r3[2,3] += x01[1] unsigned* y34[1] 5240++# asm 1: vmlal.u32 <r3=reg128#6,<x01=reg128#9%bot,<y34=reg128#3%bot 5241++# asm 2: vmlal.u32 <r3=q5,<x01=d16,<y34=d4 5242++vmlal.u32 q5,d16,d4 5243++ 5244++# qhasm: r3[0,1] += x01[2] unsigned* y12[2]; r3[2,3] += x01[3] unsigned* y12[3] 5245++# asm 1: vmlal.u32 <r3=reg128#6,<x01=reg128#9%top,<y12=reg128#2%top 5246++# asm 2: vmlal.u32 <r3=q5,<x01=d17,<y12=d3 5247++vmlal.u32 q5,d17,d3 5248++ 5249++# qhasm: r3[0,1] += x23[0] unsigned* y12[0]; r3[2,3] += x23[1] unsigned* y12[1] 5250++# asm 1: vmlal.u32 <r3=reg128#6,<x23=reg128#10%bot,<y12=reg128#2%bot 5251++# asm 2: vmlal.u32 <r3=q5,<x23=d18,<y12=d2 5252++vmlal.u32 q5,d18,d2 5253++ 5254++# qhasm: r3[0,1] += x23[2] unsigned* y0[0]; r3[2,3] += x23[3] unsigned* y0[1] 5255++# asm 1: vmlal.u32 <r3=reg128#6,<x23=reg128#10%top,<y0=reg128#1%bot 5256++# asm 2: vmlal.u32 <r3=q5,<x23=d19,<y0=d0 5257++vmlal.u32 q5,d19,d0 5258++ 5259++# qhasm: r1[0,1] = c23[0]<<6; r1[2,3] = c23[1]<<6 5260++# asm 1: vshll.u32 >r1=reg128#14,<c23=reg128#14%bot,#6 5261++# asm 2: vshll.u32 >r1=q13,<c23=d26,#6 5262++vshll.u32 q13,d26,#6 5263++ 5264++# qhasm: r3[0,1] += x4[0] unsigned* 5y34[2]; r3[2,3] += x4[1] unsigned* 5y34[3] 5265++# asm 1: vmlal.u32 <r3=reg128#6,<x4=reg128#11%bot,<5y34=reg128#13%top 5266++# asm 2: vmlal.u32 <r3=q5,<x4=d20,<5y34=d25 5267++vmlal.u32 q5,d20,d25 5268++ 5269++# qhasm: r1[0,1] += x01[0] unsigned* y12[0]; r1[2,3] += x01[1] unsigned* y12[1] 5270++# asm 1: vmlal.u32 <r1=reg128#14,<x01=reg128#9%bot,<y12=reg128#2%bot 5271++# asm 2: vmlal.u32 <r1=q13,<x01=d16,<y12=d2 5272++vmlal.u32 q13,d16,d2 5273++ 5274++# qhasm: r1[0,1] += x01[2] unsigned* y0[0]; r1[2,3] += x01[3] unsigned* y0[1] 5275++# asm 1: vmlal.u32 <r1=reg128#14,<x01=reg128#9%top,<y0=reg128#1%bot 5276++# asm 2: vmlal.u32 <r1=q13,<x01=d17,<y0=d0 5277++vmlal.u32 q13,d17,d0 5278++ 5279++# qhasm: r1[0,1] += x23[0] unsigned* 5y34[2]; r1[2,3] += x23[1] unsigned* 5y34[3] 5280++# asm 1: vmlal.u32 <r1=reg128#14,<x23=reg128#10%bot,<5y34=reg128#13%top 5281++# asm 2: vmlal.u32 <r1=q13,<x23=d18,<5y34=d25 5282++vmlal.u32 q13,d18,d25 5283++ 5284++# qhasm: r1[0,1] += x23[2] unsigned* 5y34[0]; r1[2,3] += x23[3] unsigned* 5y34[1] 5285++# asm 1: vmlal.u32 <r1=reg128#14,<x23=reg128#10%top,<5y34=reg128#13%bot 5286++# asm 2: vmlal.u32 <r1=q13,<x23=d19,<5y34=d24 5287++vmlal.u32 q13,d19,d24 5288++ 5289++# qhasm: r2[0,1] = c01[2]<<12; r2[2,3] = c01[3]<<12 5290++# asm 1: vshll.u32 >r2=reg128#8,<c01=reg128#8%top,#12 5291++# asm 2: vshll.u32 >r2=q7,<c01=d15,#12 5292++vshll.u32 q7,d15,#12 5293++ 5294++# qhasm: r1[0,1] += x4[0] unsigned* 5y12[2]; r1[2,3] += x4[1] unsigned* 5y12[3] 5295++# asm 1: vmlal.u32 <r1=reg128#14,<x4=reg128#11%bot,<5y12=reg128#12%top 5296++# asm 2: vmlal.u32 <r1=q13,<x4=d20,<5y12=d23 5297++vmlal.u32 q13,d20,d23 5298++ 5299++# qhasm: r2[0,1] += x01[0] unsigned* y12[2]; r2[2,3] += x01[1] unsigned* y12[3] 5300++# asm 1: vmlal.u32 <r2=reg128#8,<x01=reg128#9%bot,<y12=reg128#2%top 5301++# asm 2: vmlal.u32 <r2=q7,<x01=d16,<y12=d3 5302++vmlal.u32 q7,d16,d3 5303++ 5304++# qhasm: r2[0,1] += x01[2] unsigned* y12[0]; r2[2,3] += x01[3] unsigned* y12[1] 5305++# asm 1: vmlal.u32 <r2=reg128#8,<x01=reg128#9%top,<y12=reg128#2%bot 5306++# asm 2: vmlal.u32 <r2=q7,<x01=d17,<y12=d2 5307++vmlal.u32 q7,d17,d2 5308++ 5309++# qhasm: r2[0,1] += x23[0] unsigned* y0[0]; r2[2,3] += x23[1] unsigned* y0[1] 5310++# asm 1: vmlal.u32 <r2=reg128#8,<x23=reg128#10%bot,<y0=reg128#1%bot 5311++# asm 2: vmlal.u32 <r2=q7,<x23=d18,<y0=d0 5312++vmlal.u32 q7,d18,d0 5313++ 5314++# qhasm: r2[0,1] += x23[2] unsigned* 5y34[2]; r2[2,3] += x23[3] unsigned* 5y34[3] 5315++# asm 1: vmlal.u32 <r2=reg128#8,<x23=reg128#10%top,<5y34=reg128#13%top 5316++# asm 2: vmlal.u32 <r2=q7,<x23=d19,<5y34=d25 5317++vmlal.u32 q7,d19,d25 5318++ 5319++# qhasm: r2[0,1] += x4[0] unsigned* 5y34[0]; r2[2,3] += x4[1] unsigned* 5y34[1] 5320++# asm 1: vmlal.u32 <r2=reg128#8,<x4=reg128#11%bot,<5y34=reg128#13%bot 5321++# asm 2: vmlal.u32 <r2=q7,<x4=d20,<5y34=d24 5322++vmlal.u32 q7,d20,d24 5323++ 5324++# qhasm: 2x t1 = r0 unsigned>> 26 5325++# asm 1: vshr.u64 >t1=reg128#9,<r0=reg128#4,#26 5326++# asm 2: vshr.u64 >t1=q8,<r0=q3,#26 5327++vshr.u64 q8,q3,#26 5328++ 5329++# qhasm: r0 &= mask 5330++# asm 1: vand >r0=reg128#4,<r0=reg128#4,<mask=reg128#7 5331++# asm 2: vand >r0=q3,<r0=q3,<mask=q6 5332++vand q3,q3,q6 5333++ 5334++# qhasm: 2x r1 += t1 5335++# asm 1: vadd.i64 >r1=reg128#9,<r1=reg128#14,<t1=reg128#9 5336++# asm 2: vadd.i64 >r1=q8,<r1=q13,<t1=q8 5337++vadd.i64 q8,q13,q8 5338++ 5339++# qhasm: 2x t4 = r3 unsigned>> 26 5340++# asm 1: vshr.u64 >t4=reg128#10,<r3=reg128#6,#26 5341++# asm 2: vshr.u64 >t4=q9,<r3=q5,#26 5342++vshr.u64 q9,q5,#26 5343++ 5344++# qhasm: r3 &= mask 5345++# asm 1: vand >r3=reg128#6,<r3=reg128#6,<mask=reg128#7 5346++# asm 2: vand >r3=q5,<r3=q5,<mask=q6 5347++vand q5,q5,q6 5348++ 5349++# qhasm: 2x r4 += t4 5350++# asm 1: vadd.i64 >r4=reg128#5,<r4=reg128#5,<t4=reg128#10 5351++# asm 2: vadd.i64 >r4=q4,<r4=q4,<t4=q9 5352++vadd.i64 q4,q4,q9 5353++ 5354++# qhasm: 2x t2 = r1 unsigned>> 26 5355++# asm 1: vshr.u64 >t2=reg128#10,<r1=reg128#9,#26 5356++# asm 2: vshr.u64 >t2=q9,<r1=q8,#26 5357++vshr.u64 q9,q8,#26 5358++ 5359++# qhasm: r1 &= mask 5360++# asm 1: vand >r1=reg128#11,<r1=reg128#9,<mask=reg128#7 5361++# asm 2: vand >r1=q10,<r1=q8,<mask=q6 5362++vand q10,q8,q6 5363++ 5364++# qhasm: 2x t0 = r4 unsigned>> 26 5365++# asm 1: vshr.u64 >t0=reg128#9,<r4=reg128#5,#26 5366++# asm 2: vshr.u64 >t0=q8,<r4=q4,#26 5367++vshr.u64 q8,q4,#26 5368++ 5369++# qhasm: 2x r2 += t2 5370++# asm 1: vadd.i64 >r2=reg128#8,<r2=reg128#8,<t2=reg128#10 5371++# asm 2: vadd.i64 >r2=q7,<r2=q7,<t2=q9 5372++vadd.i64 q7,q7,q9 5373++ 5374++# qhasm: r4 &= mask 5375++# asm 1: vand >r4=reg128#5,<r4=reg128#5,<mask=reg128#7 5376++# asm 2: vand >r4=q4,<r4=q4,<mask=q6 5377++vand q4,q4,q6 5378++ 5379++# qhasm: 2x r0 += t0 5380++# asm 1: vadd.i64 >r0=reg128#4,<r0=reg128#4,<t0=reg128#9 5381++# asm 2: vadd.i64 >r0=q3,<r0=q3,<t0=q8 5382++vadd.i64 q3,q3,q8 5383++ 5384++# qhasm: 2x t0 <<= 2 5385++# asm 1: vshl.i64 >t0=reg128#9,<t0=reg128#9,#2 5386++# asm 2: vshl.i64 >t0=q8,<t0=q8,#2 5387++vshl.i64 q8,q8,#2 5388++ 5389++# qhasm: 2x t3 = r2 unsigned>> 26 5390++# asm 1: vshr.u64 >t3=reg128#14,<r2=reg128#8,#26 5391++# asm 2: vshr.u64 >t3=q13,<r2=q7,#26 5392++vshr.u64 q13,q7,#26 5393++ 5394++# qhasm: 2x r0 += t0 5395++# asm 1: vadd.i64 >r0=reg128#4,<r0=reg128#4,<t0=reg128#9 5396++# asm 2: vadd.i64 >r0=q3,<r0=q3,<t0=q8 5397++vadd.i64 q3,q3,q8 5398++ 5399++# qhasm: x23 = r2 & mask 5400++# asm 1: vand >x23=reg128#10,<r2=reg128#8,<mask=reg128#7 5401++# asm 2: vand >x23=q9,<r2=q7,<mask=q6 5402++vand q9,q7,q6 5403++ 5404++# qhasm: 2x r3 += t3 5405++# asm 1: vadd.i64 >r3=reg128#6,<r3=reg128#6,<t3=reg128#14 5406++# asm 2: vadd.i64 >r3=q5,<r3=q5,<t3=q13 5407++vadd.i64 q5,q5,q13 5408++ 5409++# qhasm: 2x t1 = r0 unsigned>> 26 5410++# asm 1: vshr.u64 >t1=reg128#8,<r0=reg128#4,#26 5411++# asm 2: vshr.u64 >t1=q7,<r0=q3,#26 5412++vshr.u64 q7,q3,#26 5413++ 5414++# qhasm: x01 = r0 & mask 5415++# asm 1: vand >x01=reg128#9,<r0=reg128#4,<mask=reg128#7 5416++# asm 2: vand >x01=q8,<r0=q3,<mask=q6 5417++vand q8,q3,q6 5418++ 5419++# qhasm: 2x r1 += t1 5420++# asm 1: vadd.i64 >r1=reg128#4,<r1=reg128#11,<t1=reg128#8 5421++# asm 2: vadd.i64 >r1=q3,<r1=q10,<t1=q7 5422++vadd.i64 q3,q10,q7 5423++ 5424++# qhasm: 2x t4 = r3 unsigned>> 26 5425++# asm 1: vshr.u64 >t4=reg128#8,<r3=reg128#6,#26 5426++# asm 2: vshr.u64 >t4=q7,<r3=q5,#26 5427++vshr.u64 q7,q5,#26 5428++ 5429++# qhasm: r3 &= mask 5430++# asm 1: vand >r3=reg128#6,<r3=reg128#6,<mask=reg128#7 5431++# asm 2: vand >r3=q5,<r3=q5,<mask=q6 5432++vand q5,q5,q6 5433++ 5434++# qhasm: 2x x4 = r4 + t4 5435++# asm 1: vadd.i64 >x4=reg128#11,<r4=reg128#5,<t4=reg128#8 5436++# asm 2: vadd.i64 >x4=q10,<r4=q4,<t4=q7 5437++vadd.i64 q10,q4,q7 5438++ 5439++# qhasm: len -= 32 5440++# asm 1: sub >len=int32#4,<len=int32#4,#32 5441++# asm 2: sub >len=r3,<len=r3,#32 5442++sub r3,r3,#32 5443++ 5444++# qhasm: x01 = x01[0,2,1,3] 5445++# asm 1: vtrn.32 <x01=reg128#9%bot,<x01=reg128#9%top 5446++# asm 2: vtrn.32 <x01=d16,<x01=d17 5447++vtrn.32 d16,d17 5448++ 5449++# qhasm: x23 = x23[0,2,1,3] 5450++# asm 1: vtrn.32 <x23=reg128#10%bot,<x23=reg128#10%top 5451++# asm 2: vtrn.32 <x23=d18,<x23=d19 5452++vtrn.32 d18,d19 5453++ 5454++# qhasm: r1 = r1[0,2,1,3] 5455++# asm 1: vtrn.32 <r1=reg128#4%bot,<r1=reg128#4%top 5456++# asm 2: vtrn.32 <r1=d6,<r1=d7 5457++vtrn.32 d6,d7 5458++ 5459++# qhasm: r3 = r3[0,2,1,3] 5460++# asm 1: vtrn.32 <r3=reg128#6%bot,<r3=reg128#6%top 5461++# asm 2: vtrn.32 <r3=d10,<r3=d11 5462++vtrn.32 d10,d11 5463++ 5464++# qhasm: x4 = x4[0,2,1,3] 5465++# asm 1: vtrn.32 <x4=reg128#11%bot,<x4=reg128#11%top 5466++# asm 2: vtrn.32 <x4=d20,<x4=d21 5467++vtrn.32 d20,d21 5468++ 5469++# qhasm: x01 = x01[0,1] r1[0,1] 5470++# asm 1: vext.32 <x01=reg128#9%top,<r1=reg128#4%bot,<r1=reg128#4%bot,#0 5471++# asm 2: vext.32 <x01=d17,<r1=d6,<r1=d6,#0 5472++vext.32 d17,d6,d6,#0 5473++ 5474++# qhasm: x23 = x23[0,1] r3[0,1] 5475++# asm 1: vext.32 <x23=reg128#10%top,<r3=reg128#6%bot,<r3=reg128#6%bot,#0 5476++# asm 2: vext.32 <x23=d19,<r3=d10,<r3=d10,#0 5477++vext.32 d19,d10,d10,#0 5478++ 5479++# qhasm: unsigned>? len - 32 5480++# asm 1: cmp <len=int32#4,#32 5481++# asm 2: cmp <len=r3,#32 5482++cmp r3,#32 5483++ 5484++# qhasm: goto mainloop if unsigned> 5485++bhi ._mainloop 5486++ 5487++# qhasm: end: 5488++._end: 5489++ 5490++# qhasm: mem128[input_0] = x01;input_0+=16 5491++# asm 1: vst1.8 {<x01=reg128#9%bot-<x01=reg128#9%top},[<input_0=int32#1]! 5492++# asm 2: vst1.8 {<x01=d16-<x01=d17},[<input_0=r0]! 5493++vst1.8 {d16-d17},[r0]! 5494++ 5495++# qhasm: mem128[input_0] = x23;input_0+=16 5496++# asm 1: vst1.8 {<x23=reg128#10%bot-<x23=reg128#10%top},[<input_0=int32#1]! 5497++# asm 2: vst1.8 {<x23=d18-<x23=d19},[<input_0=r0]! 5498++vst1.8 {d18-d19},[r0]! 5499++ 5500++# qhasm: mem64[input_0] = x4[0] 5501++# asm 1: vst1.8 <x4=reg128#11%bot,[<input_0=int32#1] 5502++# asm 2: vst1.8 <x4=d20,[<input_0=r0] 5503++vst1.8 d20,[r0] 5504++ 5505++# qhasm: len = len 5506++# asm 1: mov >len=int32#1,<len=int32#4 5507++# asm 2: mov >len=r0,<len=r3 5508++mov r0,r3 5509++ 5510++# qhasm: qpopreturn len 5511++mov sp,r12 5512++vpop {q4,q5,q6,q7} 5513++bx lr 5514++ 5515++# qhasm: int32 input_0 5516++ 5517++# qhasm: int32 input_1 5518++ 5519++# qhasm: int32 input_2 5520++ 5521++# qhasm: int32 input_3 5522++ 5523++# qhasm: stack32 input_4 5524++ 5525++# qhasm: stack32 input_5 5526++ 5527++# qhasm: stack32 input_6 5528++ 5529++# qhasm: stack32 input_7 5530++ 5531++# qhasm: int32 caller_r4 5532++ 5533++# qhasm: int32 caller_r5 5534++ 5535++# qhasm: int32 caller_r6 5536++ 5537++# qhasm: int32 caller_r7 5538++ 5539++# qhasm: int32 caller_r8 5540++ 5541++# qhasm: int32 caller_r9 5542++ 5543++# qhasm: int32 caller_r10 5544++ 5545++# qhasm: int32 caller_r11 5546++ 5547++# qhasm: int32 caller_r12 5548++ 5549++# qhasm: int32 caller_r14 5550++ 5551++# qhasm: reg128 caller_q4 5552++ 5553++# qhasm: reg128 caller_q5 5554++ 5555++# qhasm: reg128 caller_q6 5556++ 5557++# qhasm: reg128 caller_q7 5558++ 5559++# qhasm: reg128 r0 5560++ 5561++# qhasm: reg128 r1 5562++ 5563++# qhasm: reg128 r2 5564++ 5565++# qhasm: reg128 r3 5566++ 5567++# qhasm: reg128 r4 5568++ 5569++# qhasm: reg128 x01 5570++ 5571++# qhasm: reg128 x23 5572++ 5573++# qhasm: reg128 x4 5574++ 5575++# qhasm: reg128 y01 5576++ 5577++# qhasm: reg128 y23 5578++ 5579++# qhasm: reg128 y4 5580++ 5581++# qhasm: reg128 _5y01 5582++ 5583++# qhasm: reg128 _5y23 5584++ 5585++# qhasm: reg128 _5y4 5586++ 5587++# qhasm: reg128 c01 5588++ 5589++# qhasm: reg128 c23 5590++ 5591++# qhasm: reg128 c4 5592++ 5593++# qhasm: reg128 t0 5594++ 5595++# qhasm: reg128 t1 5596++ 5597++# qhasm: reg128 t2 5598++ 5599++# qhasm: reg128 t3 5600++ 5601++# qhasm: reg128 t4 5602++ 5603++# qhasm: reg128 mask 5604++ 5605++# qhasm: enter crypto_onetimeauth_poly1305_neon2_addmulmod 5606++.align 2 5607++.global openssl_poly1305_neon2_addmulmod 5608++.type openssl_poly1305_neon2_addmulmod STT_FUNC 5609++openssl_poly1305_neon2_addmulmod: 5610++sub sp,sp,#0 5611++ 5612++# qhasm: 2x mask = 0xffffffff 5613++# asm 1: vmov.i64 >mask=reg128#1,#0xffffffff 5614++# asm 2: vmov.i64 >mask=q0,#0xffffffff 5615++vmov.i64 q0,#0xffffffff 5616++ 5617++# qhasm: y01 aligned= mem128[input_2];input_2+=16 5618++# asm 1: vld1.8 {>y01=reg128#2%bot->y01=reg128#2%top},[<input_2=int32#3,: 128]! 5619++# asm 2: vld1.8 {>y01=d2->y01=d3},[<input_2=r2,: 128]! 5620++vld1.8 {d2-d3},[r2,: 128]! 5621++ 5622++# qhasm: 4x _5y01 = y01 << 2 5623++# asm 1: vshl.i32 >_5y01=reg128#3,<y01=reg128#2,#2 5624++# asm 2: vshl.i32 >_5y01=q2,<y01=q1,#2 5625++vshl.i32 q2,q1,#2 5626++ 5627++# qhasm: y23 aligned= mem128[input_2];input_2+=16 5628++# asm 1: vld1.8 {>y23=reg128#4%bot->y23=reg128#4%top},[<input_2=int32#3,: 128]! 5629++# asm 2: vld1.8 {>y23=d6->y23=d7},[<input_2=r2,: 128]! 5630++vld1.8 {d6-d7},[r2,: 128]! 5631++ 5632++# qhasm: 4x _5y23 = y23 << 2 5633++# asm 1: vshl.i32 >_5y23=reg128#9,<y23=reg128#4,#2 5634++# asm 2: vshl.i32 >_5y23=q8,<y23=q3,#2 5635++vshl.i32 q8,q3,#2 5636++ 5637++# qhasm: y4 aligned= mem64[input_2]y4[1] 5638++# asm 1: vld1.8 {<y4=reg128#10%bot},[<input_2=int32#3,: 64] 5639++# asm 2: vld1.8 {<y4=d18},[<input_2=r2,: 64] 5640++vld1.8 {d18},[r2,: 64] 5641++ 5642++# qhasm: 4x _5y4 = y4 << 2 5643++# asm 1: vshl.i32 >_5y4=reg128#11,<y4=reg128#10,#2 5644++# asm 2: vshl.i32 >_5y4=q10,<y4=q9,#2 5645++vshl.i32 q10,q9,#2 5646++ 5647++# qhasm: x01 aligned= mem128[input_1];input_1+=16 5648++# asm 1: vld1.8 {>x01=reg128#12%bot->x01=reg128#12%top},[<input_1=int32#2,: 128]! 5649++# asm 2: vld1.8 {>x01=d22->x01=d23},[<input_1=r1,: 128]! 5650++vld1.8 {d22-d23},[r1,: 128]! 5651++ 5652++# qhasm: 4x _5y01 += y01 5653++# asm 1: vadd.i32 >_5y01=reg128#3,<_5y01=reg128#3,<y01=reg128#2 5654++# asm 2: vadd.i32 >_5y01=q2,<_5y01=q2,<y01=q1 5655++vadd.i32 q2,q2,q1 5656++ 5657++# qhasm: x23 aligned= mem128[input_1];input_1+=16 5658++# asm 1: vld1.8 {>x23=reg128#13%bot->x23=reg128#13%top},[<input_1=int32#2,: 128]! 5659++# asm 2: vld1.8 {>x23=d24->x23=d25},[<input_1=r1,: 128]! 5660++vld1.8 {d24-d25},[r1,: 128]! 5661++ 5662++# qhasm: 4x _5y23 += y23 5663++# asm 1: vadd.i32 >_5y23=reg128#9,<_5y23=reg128#9,<y23=reg128#4 5664++# asm 2: vadd.i32 >_5y23=q8,<_5y23=q8,<y23=q3 5665++vadd.i32 q8,q8,q3 5666++ 5667++# qhasm: 4x _5y4 += y4 5668++# asm 1: vadd.i32 >_5y4=reg128#11,<_5y4=reg128#11,<y4=reg128#10 5669++# asm 2: vadd.i32 >_5y4=q10,<_5y4=q10,<y4=q9 5670++vadd.i32 q10,q10,q9 5671++ 5672++# qhasm: c01 aligned= mem128[input_3];input_3+=16 5673++# asm 1: vld1.8 {>c01=reg128#14%bot->c01=reg128#14%top},[<input_3=int32#4,: 128]! 5674++# asm 2: vld1.8 {>c01=d26->c01=d27},[<input_3=r3,: 128]! 5675++vld1.8 {d26-d27},[r3,: 128]! 5676++ 5677++# qhasm: 4x x01 += c01 5678++# asm 1: vadd.i32 >x01=reg128#12,<x01=reg128#12,<c01=reg128#14 5679++# asm 2: vadd.i32 >x01=q11,<x01=q11,<c01=q13 5680++vadd.i32 q11,q11,q13 5681++ 5682++# qhasm: c23 aligned= mem128[input_3];input_3+=16 5683++# asm 1: vld1.8 {>c23=reg128#14%bot->c23=reg128#14%top},[<input_3=int32#4,: 128]! 5684++# asm 2: vld1.8 {>c23=d26->c23=d27},[<input_3=r3,: 128]! 5685++vld1.8 {d26-d27},[r3,: 128]! 5686++ 5687++# qhasm: 4x x23 += c23 5688++# asm 1: vadd.i32 >x23=reg128#13,<x23=reg128#13,<c23=reg128#14 5689++# asm 2: vadd.i32 >x23=q12,<x23=q12,<c23=q13 5690++vadd.i32 q12,q12,q13 5691++ 5692++# qhasm: x4 aligned= mem64[input_1]x4[1] 5693++# asm 1: vld1.8 {<x4=reg128#14%bot},[<input_1=int32#2,: 64] 5694++# asm 2: vld1.8 {<x4=d26},[<input_1=r1,: 64] 5695++vld1.8 {d26},[r1,: 64] 5696++ 5697++# qhasm: 2x mask unsigned>>=6 5698++# asm 1: vshr.u64 >mask=reg128#1,<mask=reg128#1,#6 5699++# asm 2: vshr.u64 >mask=q0,<mask=q0,#6 5700++vshr.u64 q0,q0,#6 5701++ 5702++# qhasm: c4 aligned= mem64[input_3]c4[1] 5703++# asm 1: vld1.8 {<c4=reg128#15%bot},[<input_3=int32#4,: 64] 5704++# asm 2: vld1.8 {<c4=d28},[<input_3=r3,: 64] 5705++vld1.8 {d28},[r3,: 64] 5706++ 5707++# qhasm: 4x x4 += c4 5708++# asm 1: vadd.i32 >x4=reg128#14,<x4=reg128#14,<c4=reg128#15 5709++# asm 2: vadd.i32 >x4=q13,<x4=q13,<c4=q14 5710++vadd.i32 q13,q13,q14 5711++ 5712++# qhasm: r0[0,1] = x01[0] unsigned* y01[0]; r0[2,3] = x01[1] unsigned* y01[1] 5713++# asm 1: vmull.u32 >r0=reg128#15,<x01=reg128#12%bot,<y01=reg128#2%bot 5714++# asm 2: vmull.u32 >r0=q14,<x01=d22,<y01=d2 5715++vmull.u32 q14,d22,d2 5716++ 5717++# qhasm: r0[0,1] += x01[2] unsigned* _5y4[0]; r0[2,3] += x01[3] unsigned* _5y4[1] 5718++# asm 1: vmlal.u32 <r0=reg128#15,<x01=reg128#12%top,<_5y4=reg128#11%bot 5719++# asm 2: vmlal.u32 <r0=q14,<x01=d23,<_5y4=d20 5720++vmlal.u32 q14,d23,d20 5721++ 5722++# qhasm: r0[0,1] += x23[0] unsigned* _5y23[2]; r0[2,3] += x23[1] unsigned* _5y23[3] 5723++# asm 1: vmlal.u32 <r0=reg128#15,<x23=reg128#13%bot,<_5y23=reg128#9%top 5724++# asm 2: vmlal.u32 <r0=q14,<x23=d24,<_5y23=d17 5725++vmlal.u32 q14,d24,d17 5726++ 5727++# qhasm: r0[0,1] += x23[2] unsigned* _5y23[0]; r0[2,3] += x23[3] unsigned* _5y23[1] 5728++# asm 1: vmlal.u32 <r0=reg128#15,<x23=reg128#13%top,<_5y23=reg128#9%bot 5729++# asm 2: vmlal.u32 <r0=q14,<x23=d25,<_5y23=d16 5730++vmlal.u32 q14,d25,d16 5731++ 5732++# qhasm: r0[0,1] += x4[0] unsigned* _5y01[2]; r0[2,3] += x4[1] unsigned* _5y01[3] 5733++# asm 1: vmlal.u32 <r0=reg128#15,<x4=reg128#14%bot,<_5y01=reg128#3%top 5734++# asm 2: vmlal.u32 <r0=q14,<x4=d26,<_5y01=d5 5735++vmlal.u32 q14,d26,d5 5736++ 5737++# qhasm: r1[0,1] = x01[0] unsigned* y01[2]; r1[2,3] = x01[1] unsigned* y01[3] 5738++# asm 1: vmull.u32 >r1=reg128#3,<x01=reg128#12%bot,<y01=reg128#2%top 5739++# asm 2: vmull.u32 >r1=q2,<x01=d22,<y01=d3 5740++vmull.u32 q2,d22,d3 5741++ 5742++# qhasm: r1[0,1] += x01[2] unsigned* y01[0]; r1[2,3] += x01[3] unsigned* y01[1] 5743++# asm 1: vmlal.u32 <r1=reg128#3,<x01=reg128#12%top,<y01=reg128#2%bot 5744++# asm 2: vmlal.u32 <r1=q2,<x01=d23,<y01=d2 5745++vmlal.u32 q2,d23,d2 5746++ 5747++# qhasm: r1[0,1] += x23[0] unsigned* _5y4[0]; r1[2,3] += x23[1] unsigned* _5y4[1] 5748++# asm 1: vmlal.u32 <r1=reg128#3,<x23=reg128#13%bot,<_5y4=reg128#11%bot 5749++# asm 2: vmlal.u32 <r1=q2,<x23=d24,<_5y4=d20 5750++vmlal.u32 q2,d24,d20 5751++ 5752++# qhasm: r1[0,1] += x23[2] unsigned* _5y23[2]; r1[2,3] += x23[3] unsigned* _5y23[3] 5753++# asm 1: vmlal.u32 <r1=reg128#3,<x23=reg128#13%top,<_5y23=reg128#9%top 5754++# asm 2: vmlal.u32 <r1=q2,<x23=d25,<_5y23=d17 5755++vmlal.u32 q2,d25,d17 5756++ 5757++# qhasm: r1[0,1] += x4[0] unsigned* _5y23[0]; r1[2,3] += x4[1] unsigned* _5y23[1] 5758++# asm 1: vmlal.u32 <r1=reg128#3,<x4=reg128#14%bot,<_5y23=reg128#9%bot 5759++# asm 2: vmlal.u32 <r1=q2,<x4=d26,<_5y23=d16 5760++vmlal.u32 q2,d26,d16 5761++ 5762++# qhasm: r2[0,1] = x01[0] unsigned* y23[0]; r2[2,3] = x01[1] unsigned* y23[1] 5763++# asm 1: vmull.u32 >r2=reg128#16,<x01=reg128#12%bot,<y23=reg128#4%bot 5764++# asm 2: vmull.u32 >r2=q15,<x01=d22,<y23=d6 5765++vmull.u32 q15,d22,d6 5766++ 5767++# qhasm: r2[0,1] += x01[2] unsigned* y01[2]; r2[2,3] += x01[3] unsigned* y01[3] 5768++# asm 1: vmlal.u32 <r2=reg128#16,<x01=reg128#12%top,<y01=reg128#2%top 5769++# asm 2: vmlal.u32 <r2=q15,<x01=d23,<y01=d3 5770++vmlal.u32 q15,d23,d3 5771++ 5772++# qhasm: r2[0,1] += x23[0] unsigned* y01[0]; r2[2,3] += x23[1] unsigned* y01[1] 5773++# asm 1: vmlal.u32 <r2=reg128#16,<x23=reg128#13%bot,<y01=reg128#2%bot 5774++# asm 2: vmlal.u32 <r2=q15,<x23=d24,<y01=d2 5775++vmlal.u32 q15,d24,d2 5776++ 5777++# qhasm: r2[0,1] += x23[2] unsigned* _5y4[0]; r2[2,3] += x23[3] unsigned* _5y4[1] 5778++# asm 1: vmlal.u32 <r2=reg128#16,<x23=reg128#13%top,<_5y4=reg128#11%bot 5779++# asm 2: vmlal.u32 <r2=q15,<x23=d25,<_5y4=d20 5780++vmlal.u32 q15,d25,d20 5781++ 5782++# qhasm: r2[0,1] += x4[0] unsigned* _5y23[2]; r2[2,3] += x4[1] unsigned* _5y23[3] 5783++# asm 1: vmlal.u32 <r2=reg128#16,<x4=reg128#14%bot,<_5y23=reg128#9%top 5784++# asm 2: vmlal.u32 <r2=q15,<x4=d26,<_5y23=d17 5785++vmlal.u32 q15,d26,d17 5786++ 5787++# qhasm: r3[0,1] = x01[0] unsigned* y23[2]; r3[2,3] = x01[1] unsigned* y23[3] 5788++# asm 1: vmull.u32 >r3=reg128#9,<x01=reg128#12%bot,<y23=reg128#4%top 5789++# asm 2: vmull.u32 >r3=q8,<x01=d22,<y23=d7 5790++vmull.u32 q8,d22,d7 5791++ 5792++# qhasm: r3[0,1] += x01[2] unsigned* y23[0]; r3[2,3] += x01[3] unsigned* y23[1] 5793++# asm 1: vmlal.u32 <r3=reg128#9,<x01=reg128#12%top,<y23=reg128#4%bot 5794++# asm 2: vmlal.u32 <r3=q8,<x01=d23,<y23=d6 5795++vmlal.u32 q8,d23,d6 5796++ 5797++# qhasm: r3[0,1] += x23[0] unsigned* y01[2]; r3[2,3] += x23[1] unsigned* y01[3] 5798++# asm 1: vmlal.u32 <r3=reg128#9,<x23=reg128#13%bot,<y01=reg128#2%top 5799++# asm 2: vmlal.u32 <r3=q8,<x23=d24,<y01=d3 5800++vmlal.u32 q8,d24,d3 5801++ 5802++# qhasm: r3[0,1] += x23[2] unsigned* y01[0]; r3[2,3] += x23[3] unsigned* y01[1] 5803++# asm 1: vmlal.u32 <r3=reg128#9,<x23=reg128#13%top,<y01=reg128#2%bot 5804++# asm 2: vmlal.u32 <r3=q8,<x23=d25,<y01=d2 5805++vmlal.u32 q8,d25,d2 5806++ 5807++# qhasm: r3[0,1] += x4[0] unsigned* _5y4[0]; r3[2,3] += x4[1] unsigned* _5y4[1] 5808++# asm 1: vmlal.u32 <r3=reg128#9,<x4=reg128#14%bot,<_5y4=reg128#11%bot 5809++# asm 2: vmlal.u32 <r3=q8,<x4=d26,<_5y4=d20 5810++vmlal.u32 q8,d26,d20 5811++ 5812++# qhasm: r4[0,1] = x01[0] unsigned* y4[0]; r4[2,3] = x01[1] unsigned* y4[1] 5813++# asm 1: vmull.u32 >r4=reg128#10,<x01=reg128#12%bot,<y4=reg128#10%bot 5814++# asm 2: vmull.u32 >r4=q9,<x01=d22,<y4=d18 5815++vmull.u32 q9,d22,d18 5816++ 5817++# qhasm: r4[0,1] += x01[2] unsigned* y23[2]; r4[2,3] += x01[3] unsigned* y23[3] 5818++# asm 1: vmlal.u32 <r4=reg128#10,<x01=reg128#12%top,<y23=reg128#4%top 5819++# asm 2: vmlal.u32 <r4=q9,<x01=d23,<y23=d7 5820++vmlal.u32 q9,d23,d7 5821++ 5822++# qhasm: r4[0,1] += x23[0] unsigned* y23[0]; r4[2,3] += x23[1] unsigned* y23[1] 5823++# asm 1: vmlal.u32 <r4=reg128#10,<x23=reg128#13%bot,<y23=reg128#4%bot 5824++# asm 2: vmlal.u32 <r4=q9,<x23=d24,<y23=d6 5825++vmlal.u32 q9,d24,d6 5826++ 5827++# qhasm: r4[0,1] += x23[2] unsigned* y01[2]; r4[2,3] += x23[3] unsigned* y01[3] 5828++# asm 1: vmlal.u32 <r4=reg128#10,<x23=reg128#13%top,<y01=reg128#2%top 5829++# asm 2: vmlal.u32 <r4=q9,<x23=d25,<y01=d3 5830++vmlal.u32 q9,d25,d3 5831++ 5832++# qhasm: r4[0,1] += x4[0] unsigned* y01[0]; r4[2,3] += x4[1] unsigned* y01[1] 5833++# asm 1: vmlal.u32 <r4=reg128#10,<x4=reg128#14%bot,<y01=reg128#2%bot 5834++# asm 2: vmlal.u32 <r4=q9,<x4=d26,<y01=d2 5835++vmlal.u32 q9,d26,d2 5836++ 5837++# qhasm: 2x t1 = r0 unsigned>> 26 5838++# asm 1: vshr.u64 >t1=reg128#2,<r0=reg128#15,#26 5839++# asm 2: vshr.u64 >t1=q1,<r0=q14,#26 5840++vshr.u64 q1,q14,#26 5841++ 5842++# qhasm: r0 &= mask 5843++# asm 1: vand >r0=reg128#4,<r0=reg128#15,<mask=reg128#1 5844++# asm 2: vand >r0=q3,<r0=q14,<mask=q0 5845++vand q3,q14,q0 5846++ 5847++# qhasm: 2x r1 += t1 5848++# asm 1: vadd.i64 >r1=reg128#2,<r1=reg128#3,<t1=reg128#2 5849++# asm 2: vadd.i64 >r1=q1,<r1=q2,<t1=q1 5850++vadd.i64 q1,q2,q1 5851++ 5852++# qhasm: 2x t4 = r3 unsigned>> 26 5853++# asm 1: vshr.u64 >t4=reg128#3,<r3=reg128#9,#26 5854++# asm 2: vshr.u64 >t4=q2,<r3=q8,#26 5855++vshr.u64 q2,q8,#26 5856++ 5857++# qhasm: r3 &= mask 5858++# asm 1: vand >r3=reg128#9,<r3=reg128#9,<mask=reg128#1 5859++# asm 2: vand >r3=q8,<r3=q8,<mask=q0 5860++vand q8,q8,q0 5861++ 5862++# qhasm: 2x r4 += t4 5863++# asm 1: vadd.i64 >r4=reg128#3,<r4=reg128#10,<t4=reg128#3 5864++# asm 2: vadd.i64 >r4=q2,<r4=q9,<t4=q2 5865++vadd.i64 q2,q9,q2 5866++ 5867++# qhasm: 2x t2 = r1 unsigned>> 26 5868++# asm 1: vshr.u64 >t2=reg128#10,<r1=reg128#2,#26 5869++# asm 2: vshr.u64 >t2=q9,<r1=q1,#26 5870++vshr.u64 q9,q1,#26 5871++ 5872++# qhasm: r1 &= mask 5873++# asm 1: vand >r1=reg128#2,<r1=reg128#2,<mask=reg128#1 5874++# asm 2: vand >r1=q1,<r1=q1,<mask=q0 5875++vand q1,q1,q0 5876++ 5877++# qhasm: 2x t0 = r4 unsigned>> 26 5878++# asm 1: vshr.u64 >t0=reg128#11,<r4=reg128#3,#26 5879++# asm 2: vshr.u64 >t0=q10,<r4=q2,#26 5880++vshr.u64 q10,q2,#26 5881++ 5882++# qhasm: 2x r2 += t2 5883++# asm 1: vadd.i64 >r2=reg128#10,<r2=reg128#16,<t2=reg128#10 5884++# asm 2: vadd.i64 >r2=q9,<r2=q15,<t2=q9 5885++vadd.i64 q9,q15,q9 5886++ 5887++# qhasm: r4 &= mask 5888++# asm 1: vand >r4=reg128#3,<r4=reg128#3,<mask=reg128#1 5889++# asm 2: vand >r4=q2,<r4=q2,<mask=q0 5890++vand q2,q2,q0 5891++ 5892++# qhasm: 2x r0 += t0 5893++# asm 1: vadd.i64 >r0=reg128#4,<r0=reg128#4,<t0=reg128#11 5894++# asm 2: vadd.i64 >r0=q3,<r0=q3,<t0=q10 5895++vadd.i64 q3,q3,q10 5896++ 5897++# qhasm: 2x t0 <<= 2 5898++# asm 1: vshl.i64 >t0=reg128#11,<t0=reg128#11,#2 5899++# asm 2: vshl.i64 >t0=q10,<t0=q10,#2 5900++vshl.i64 q10,q10,#2 5901++ 5902++# qhasm: 2x t3 = r2 unsigned>> 26 5903++# asm 1: vshr.u64 >t3=reg128#12,<r2=reg128#10,#26 5904++# asm 2: vshr.u64 >t3=q11,<r2=q9,#26 5905++vshr.u64 q11,q9,#26 5906++ 5907++# qhasm: 2x r0 += t0 5908++# asm 1: vadd.i64 >r0=reg128#4,<r0=reg128#4,<t0=reg128#11 5909++# asm 2: vadd.i64 >r0=q3,<r0=q3,<t0=q10 5910++vadd.i64 q3,q3,q10 5911++ 5912++# qhasm: x23 = r2 & mask 5913++# asm 1: vand >x23=reg128#10,<r2=reg128#10,<mask=reg128#1 5914++# asm 2: vand >x23=q9,<r2=q9,<mask=q0 5915++vand q9,q9,q0 5916++ 5917++# qhasm: 2x r3 += t3 5918++# asm 1: vadd.i64 >r3=reg128#9,<r3=reg128#9,<t3=reg128#12 5919++# asm 2: vadd.i64 >r3=q8,<r3=q8,<t3=q11 5920++vadd.i64 q8,q8,q11 5921++ 5922++# qhasm: 2x t1 = r0 unsigned>> 26 5923++# asm 1: vshr.u64 >t1=reg128#11,<r0=reg128#4,#26 5924++# asm 2: vshr.u64 >t1=q10,<r0=q3,#26 5925++vshr.u64 q10,q3,#26 5926++ 5927++# qhasm: x23 = x23[0,2,1,3] 5928++# asm 1: vtrn.32 <x23=reg128#10%bot,<x23=reg128#10%top 5929++# asm 2: vtrn.32 <x23=d18,<x23=d19 5930++vtrn.32 d18,d19 5931++ 5932++# qhasm: x01 = r0 & mask 5933++# asm 1: vand >x01=reg128#4,<r0=reg128#4,<mask=reg128#1 5934++# asm 2: vand >x01=q3,<r0=q3,<mask=q0 5935++vand q3,q3,q0 5936++ 5937++# qhasm: 2x r1 += t1 5938++# asm 1: vadd.i64 >r1=reg128#2,<r1=reg128#2,<t1=reg128#11 5939++# asm 2: vadd.i64 >r1=q1,<r1=q1,<t1=q10 5940++vadd.i64 q1,q1,q10 5941++ 5942++# qhasm: 2x t4 = r3 unsigned>> 26 5943++# asm 1: vshr.u64 >t4=reg128#11,<r3=reg128#9,#26 5944++# asm 2: vshr.u64 >t4=q10,<r3=q8,#26 5945++vshr.u64 q10,q8,#26 5946++ 5947++# qhasm: x01 = x01[0,2,1,3] 5948++# asm 1: vtrn.32 <x01=reg128#4%bot,<x01=reg128#4%top 5949++# asm 2: vtrn.32 <x01=d6,<x01=d7 5950++vtrn.32 d6,d7 5951++ 5952++# qhasm: r3 &= mask 5953++# asm 1: vand >r3=reg128#1,<r3=reg128#9,<mask=reg128#1 5954++# asm 2: vand >r3=q0,<r3=q8,<mask=q0 5955++vand q0,q8,q0 5956++ 5957++# qhasm: r1 = r1[0,2,1,3] 5958++# asm 1: vtrn.32 <r1=reg128#2%bot,<r1=reg128#2%top 5959++# asm 2: vtrn.32 <r1=d2,<r1=d3 5960++vtrn.32 d2,d3 5961++ 5962++# qhasm: 2x x4 = r4 + t4 5963++# asm 1: vadd.i64 >x4=reg128#3,<r4=reg128#3,<t4=reg128#11 5964++# asm 2: vadd.i64 >x4=q2,<r4=q2,<t4=q10 5965++vadd.i64 q2,q2,q10 5966++ 5967++# qhasm: r3 = r3[0,2,1,3] 5968++# asm 1: vtrn.32 <r3=reg128#1%bot,<r3=reg128#1%top 5969++# asm 2: vtrn.32 <r3=d0,<r3=d1 5970++vtrn.32 d0,d1 5971++ 5972++# qhasm: x01 = x01[0,1] r1[0,1] 5973++# asm 1: vext.32 <x01=reg128#4%top,<r1=reg128#2%bot,<r1=reg128#2%bot,#0 5974++# asm 2: vext.32 <x01=d7,<r1=d2,<r1=d2,#0 5975++vext.32 d7,d2,d2,#0 5976++ 5977++# qhasm: x23 = x23[0,1] r3[0,1] 5978++# asm 1: vext.32 <x23=reg128#10%top,<r3=reg128#1%bot,<r3=reg128#1%bot,#0 5979++# asm 2: vext.32 <x23=d19,<r3=d0,<r3=d0,#0 5980++vext.32 d19,d0,d0,#0 5981++ 5982++# qhasm: x4 = x4[0,2,1,3] 5983++# asm 1: vtrn.32 <x4=reg128#3%bot,<x4=reg128#3%top 5984++# asm 2: vtrn.32 <x4=d4,<x4=d5 5985++vtrn.32 d4,d5 5986++ 5987++# qhasm: mem128[input_0] aligned= x01;input_0+=16 5988++# asm 1: vst1.8 {<x01=reg128#4%bot-<x01=reg128#4%top},[<input_0=int32#1,: 128]! 5989++# asm 2: vst1.8 {<x01=d6-<x01=d7},[<input_0=r0,: 128]! 5990++vst1.8 {d6-d7},[r0,: 128]! 5991++ 5992++# qhasm: mem128[input_0] aligned= x23;input_0+=16 5993++# asm 1: vst1.8 {<x23=reg128#10%bot-<x23=reg128#10%top},[<input_0=int32#1,: 128]! 5994++# asm 2: vst1.8 {<x23=d18-<x23=d19},[<input_0=r0,: 128]! 5995++vst1.8 {d18-d19},[r0,: 128]! 5996++ 5997++# qhasm: mem64[input_0] aligned= x4[0] 5998++# asm 1: vst1.8 <x4=reg128#3%bot,[<input_0=int32#1,: 64] 5999++# asm 2: vst1.8 <x4=d4,[<input_0=r0,: 64] 6000++vst1.8 d4,[r0,: 64] 6001++ 6002++# qhasm: return 6003++add sp,sp,#0 6004++bx lr 6005+diff --git a/crypto/poly1305/poly1305_vec.c b/crypto/poly1305/poly1305_vec.c 6006+new file mode 100644 6007+index 0000000..c546200 6008+--- /dev/null 6009++++ b/crypto/poly1305/poly1305_vec.c 6010+@@ -0,0 +1,733 @@ 6011++/* ==================================================================== 6012++ * Copyright (c) 2011-2013 The OpenSSL Project. All rights reserved. 6013++ * 6014++ * Redistribution and use in source and binary forms, with or without 6015++ * modification, are permitted provided that the following conditions 6016++ * are met: 6017++ * 6018++ * 1. Redistributions of source code must retain the above copyright 6019++ * notice, this list of conditions and the following disclaimer. 6020++ * 6021++ * 2. Redistributions in binary form must reproduce the above copyright 6022++ * notice, this list of conditions and the following disclaimer in 6023++ * the documentation and/or other materials provided with the 6024++ * distribution. 6025++ * 6026++ * 3. All advertising materials mentioning features or use of this 6027++ * software must display the following acknowledgment: 6028++ * "This product includes software developed by the OpenSSL Project 6029++ * for use in the OpenSSL Toolkit. (http://www.OpenSSL.org/)" 6030++ * 6031++ * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to 6032++ * endorse or promote products derived from this software without 6033++ * prior written permission. For written permission, please contact 6034++ * licensing@OpenSSL.org. 6035++ * 6036++ * 5. Products derived from this software may not be called "OpenSSL" 6037++ * nor may "OpenSSL" appear in their names without prior written 6038++ * permission of the OpenSSL Project. 6039++ * 6040++ * 6. Redistributions of any form whatsoever must retain the following 6041++ * acknowledgment: 6042++ * "This product includes software developed by the OpenSSL Project 6043++ * for use in the OpenSSL Toolkit (http://www.OpenSSL.org/)" 6044++ * 6045++ * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY 6046++ * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 6047++ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 6048++ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR 6049++ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 6050++ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 6051++ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 6052++ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 6053++ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 6054++ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 6055++ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED 6056++ * OF THE POSSIBILITY OF SUCH DAMAGE. 6057++ * ==================================================================== 6058++ */ 6059++ 6060++/* This implementation of poly1305 is by Andrew Moon 6061++ * (https://github.com/floodyberry/poly1305-donna) and released as public 6062++ * domain. It implements SIMD vectorization based on the algorithm described in 6063++ * http://cr.yp.to/papers.html#neoncrypto. Unrolled to 2 powers, i.e. 64 byte 6064++ * block size 6065++*/ 6066++ 6067++#include <emmintrin.h> 6068++#include <stdint.h> 6069++#include <openssl/opensslconf.h> 6070++ 6071++#if !defined(OPENSSL_NO_POLY1305) 6072++ 6073++#include <openssl/poly1305.h> 6074++ 6075++#define ALIGN(x) __attribute__((aligned(x))) 6076++#define INLINE inline 6077++#define U8TO64_LE(m) (*(uint64_t*)(m)) 6078++#define U8TO32_LE(m) (*(uint32_t*)(m)) 6079++#define U64TO8_LE(m,v) (*(uint64_t*)(m)) = v 6080++ 6081++typedef __m128i xmmi; 6082++typedef unsigned __int128 uint128_t; 6083++ 6084++static const uint32_t ALIGN(16) poly1305_x64_sse2_message_mask[4] = 6085++ {(1 << 26) - 1, 0, (1 << 26) - 1, 0}; 6086++static const uint32_t ALIGN(16) poly1305_x64_sse2_5[4] = {5, 0, 5, 0}; 6087++static const uint32_t ALIGN(16) poly1305_x64_sse2_1shl128[4] = 6088++ {(1 << 24), 0, (1 << 24), 0}; 6089++ 6090++static uint128_t INLINE 6091++add128(uint128_t a, uint128_t b) 6092++ { 6093++ return a + b; 6094++ } 6095++ 6096++static uint128_t INLINE 6097++add128_64(uint128_t a, uint64_t b) 6098++ { 6099++ return a + b; 6100++ } 6101++ 6102++static uint128_t INLINE 6103++mul64x64_128(uint64_t a, uint64_t b) 6104++ { 6105++ return (uint128_t)a * b; 6106++ } 6107++ 6108++static uint64_t INLINE 6109++lo128(uint128_t a) 6110++ { 6111++ return (uint64_t)a; 6112++ } 6113++ 6114++static uint64_t INLINE 6115++shr128(uint128_t v, const int shift) 6116++ { 6117++ return (uint64_t)(v >> shift); 6118++ } 6119++ 6120++static uint64_t INLINE 6121++shr128_pair(uint64_t hi, uint64_t lo, const int shift) 6122++ { 6123++ return (uint64_t)((((uint128_t)hi << 64) | lo) >> shift); 6124++ } 6125++ 6126++typedef struct poly1305_power_t 6127++ { 6128++ union 6129++ { 6130++ xmmi v; 6131++ uint64_t u[2]; 6132++ uint32_t d[4]; 6133++ } R20,R21,R22,R23,R24,S21,S22,S23,S24; 6134++ } poly1305_power; 6135++ 6136++typedef struct poly1305_state_internal_t 6137++ { 6138++ poly1305_power P[2]; /* 288 bytes, top 32 bit halves unused = 144 6139++ bytes of free storage */ 6140++ union 6141++ { 6142++ xmmi H[5]; /* 80 bytes */ 6143++ uint64_t HH[10]; 6144++ }; 6145++ /* uint64_t r0,r1,r2; [24 bytes] */ 6146++ /* uint64_t pad0,pad1; [16 bytes] */ 6147++ uint64_t started; /* 8 bytes */ 6148++ uint64_t leftover; /* 8 bytes */ 6149++ uint8_t buffer[64]; /* 64 bytes */ 6150++ } poly1305_state_internal; /* 448 bytes total + 63 bytes for 6151++ alignment = 511 bytes raw */ 6152++ 6153++static poly1305_state_internal INLINE 6154++*poly1305_aligned_state(poly1305_state *state) 6155++ { 6156++ return (poly1305_state_internal *)(((uint64_t)state + 63) & ~63); 6157++ } 6158++ 6159++/* copy 0-63 bytes */ 6160++static void INLINE 6161++poly1305_block_copy(uint8_t *dst, const uint8_t *src, size_t bytes) 6162++ { 6163++ size_t offset = src - dst; 6164++ if (bytes & 32) 6165++ { 6166++ _mm_storeu_si128((xmmi *)(dst + 0), _mm_loadu_si128((xmmi *)(dst + offset + 0))); 6167++ _mm_storeu_si128((xmmi *)(dst + 16), _mm_loadu_si128((xmmi *)(dst + offset + 16))); 6168++ dst += 32; 6169++ } 6170++ if (bytes & 16) 6171++ { 6172++ _mm_storeu_si128((xmmi *)dst, 6173++ _mm_loadu_si128((xmmi *)(dst + offset))); 6174++ dst += 16; 6175++ } 6176++ if (bytes & 8) 6177++ { 6178++ *(uint64_t *)dst = *(uint64_t *)(dst + offset); 6179++ dst += 8; 6180++ } 6181++ if (bytes & 4) 6182++ { 6183++ *(uint32_t *)dst = *(uint32_t *)(dst + offset); 6184++ dst += 4; 6185++ } 6186++ if (bytes & 2) 6187++ { 6188++ *(uint16_t *)dst = *(uint16_t *)(dst + offset); 6189++ dst += 2; 6190++ } 6191++ if (bytes & 1) 6192++ { 6193++ *( uint8_t *)dst = *( uint8_t *)(dst + offset); 6194++ } 6195++ } 6196++ 6197++/* zero 0-15 bytes */ 6198++static void INLINE 6199++poly1305_block_zero(uint8_t *dst, size_t bytes) 6200++ { 6201++ if (bytes & 8) { *(uint64_t *)dst = 0; dst += 8; } 6202++ if (bytes & 4) { *(uint32_t *)dst = 0; dst += 4; } 6203++ if (bytes & 2) { *(uint16_t *)dst = 0; dst += 2; } 6204++ if (bytes & 1) { *( uint8_t *)dst = 0; } 6205++ } 6206++ 6207++static size_t INLINE 6208++poly1305_min(size_t a, size_t b) 6209++ { 6210++ return (a < b) ? a : b; 6211++ } 6212++ 6213++void 6214++CRYPTO_poly1305_init(poly1305_state *state, const unsigned char key[32]) 6215++ { 6216++ poly1305_state_internal *st = poly1305_aligned_state(state); 6217++ poly1305_power *p; 6218++ uint64_t r0,r1,r2; 6219++ uint64_t t0,t1; 6220++ 6221++ /* clamp key */ 6222++ t0 = U8TO64_LE(key + 0); 6223++ t1 = U8TO64_LE(key + 8); 6224++ r0 = t0 & 0xffc0fffffff; t0 >>= 44; t0 |= t1 << 20; 6225++ r1 = t0 & 0xfffffc0ffff; t1 >>= 24; 6226++ r2 = t1 & 0x00ffffffc0f; 6227++ 6228++ /* store r in un-used space of st->P[1] */ 6229++ p = &st->P[1]; 6230++ p->R20.d[1] = (uint32_t)(r0 ); 6231++ p->R20.d[3] = (uint32_t)(r0 >> 32); 6232++ p->R21.d[1] = (uint32_t)(r1 ); 6233++ p->R21.d[3] = (uint32_t)(r1 >> 32); 6234++ p->R22.d[1] = (uint32_t)(r2 ); 6235++ p->R22.d[3] = (uint32_t)(r2 >> 32); 6236++ 6237++ /* store pad */ 6238++ p->R23.d[1] = U8TO32_LE(key + 16); 6239++ p->R23.d[3] = U8TO32_LE(key + 20); 6240++ p->R24.d[1] = U8TO32_LE(key + 24); 6241++ p->R24.d[3] = U8TO32_LE(key + 28); 6242++ 6243++ /* H = 0 */ 6244++ st->H[0] = _mm_setzero_si128(); 6245++ st->H[1] = _mm_setzero_si128(); 6246++ st->H[2] = _mm_setzero_si128(); 6247++ st->H[3] = _mm_setzero_si128(); 6248++ st->H[4] = _mm_setzero_si128(); 6249++ 6250++ st->started = 0; 6251++ st->leftover = 0; 6252++ } 6253++ 6254++static void 6255++poly1305_first_block(poly1305_state_internal *st, const uint8_t *m) 6256++ { 6257++ const xmmi MMASK = 6258++ _mm_load_si128((xmmi *)poly1305_x64_sse2_message_mask); 6259++ const xmmi FIVE = _mm_load_si128((xmmi*)poly1305_x64_sse2_5); 6260++ const xmmi HIBIT = _mm_load_si128((xmmi*)poly1305_x64_sse2_1shl128); 6261++ xmmi T5,T6; 6262++ poly1305_power *p; 6263++ uint128_t d[3]; 6264++ uint64_t r0,r1,r2; 6265++ uint64_t r20,r21,r22,s22; 6266++ uint64_t pad0,pad1; 6267++ uint64_t c; 6268++ uint64_t i; 6269++ 6270++ /* pull out stored info */ 6271++ p = &st->P[1]; 6272++ 6273++ r0 = ((uint64_t)p->R20.d[3] << 32) | (uint64_t)p->R20.d[1]; 6274++ r1 = ((uint64_t)p->R21.d[3] << 32) | (uint64_t)p->R21.d[1]; 6275++ r2 = ((uint64_t)p->R22.d[3] << 32) | (uint64_t)p->R22.d[1]; 6276++ pad0 = ((uint64_t)p->R23.d[3] << 32) | (uint64_t)p->R23.d[1]; 6277++ pad1 = ((uint64_t)p->R24.d[3] << 32) | (uint64_t)p->R24.d[1]; 6278++ 6279++ /* compute powers r^2,r^4 */ 6280++ r20 = r0; 6281++ r21 = r1; 6282++ r22 = r2; 6283++ for (i = 0; i < 2; i++) 6284++ { 6285++ s22 = r22 * (5 << 2); 6286++ 6287++ d[0] = add128(mul64x64_128(r20, r20), mul64x64_128(r21 * 2, s22)); 6288++ d[1] = add128(mul64x64_128(r22, s22), mul64x64_128(r20 * 2, r21)); 6289++ d[2] = add128(mul64x64_128(r21, r21), mul64x64_128(r22 * 2, r20)); 6290++ 6291++ r20 = lo128(d[0]) & 0xfffffffffff; c = shr128(d[0], 44); 6292++ d[1] = add128_64(d[1], c); r21 = lo128(d[1]) & 0xfffffffffff; c = shr128(d[1], 44); 6293++ d[2] = add128_64(d[2], c); r22 = lo128(d[2]) & 0x3ffffffffff; c = shr128(d[2], 42); 6294++ r20 += c * 5; c = (r20 >> 44); r20 = r20 & 0xfffffffffff; 6295++ r21 += c; 6296++ 6297++ p->R20.v = _mm_shuffle_epi32(_mm_cvtsi32_si128((uint32_t)( r20 ) & 0x3ffffff), _MM_SHUFFLE(1,0,1,0)); 6298++ p->R21.v = _mm_shuffle_epi32(_mm_cvtsi32_si128((uint32_t)((r20 >> 26) | (r21 << 18)) & 0x3ffffff), _MM_SHUFFLE(1,0,1,0)); 6299++ p->R22.v = _mm_shuffle_epi32(_mm_cvtsi32_si128((uint32_t)((r21 >> 8) ) & 0x3ffffff), _MM_SHUFFLE(1,0,1,0)); 6300++ p->R23.v = _mm_shuffle_epi32(_mm_cvtsi32_si128((uint32_t)((r21 >> 34) | (r22 << 10)) & 0x3ffffff), _MM_SHUFFLE(1,0,1,0)); 6301++ p->R24.v = _mm_shuffle_epi32(_mm_cvtsi32_si128((uint32_t)((r22 >> 16) ) ), _MM_SHUFFLE(1,0,1,0)); 6302++ p->S21.v = _mm_mul_epu32(p->R21.v, FIVE); 6303++ p->S22.v = _mm_mul_epu32(p->R22.v, FIVE); 6304++ p->S23.v = _mm_mul_epu32(p->R23.v, FIVE); 6305++ p->S24.v = _mm_mul_epu32(p->R24.v, FIVE); 6306++ p--; 6307++ } 6308++ 6309++ /* put saved info back */ 6310++ p = &st->P[1]; 6311++ p->R20.d[1] = (uint32_t)(r0 ); 6312++ p->R20.d[3] = (uint32_t)(r0 >> 32); 6313++ p->R21.d[1] = (uint32_t)(r1 ); 6314++ p->R21.d[3] = (uint32_t)(r1 >> 32); 6315++ p->R22.d[1] = (uint32_t)(r2 ); 6316++ p->R22.d[3] = (uint32_t)(r2 >> 32); 6317++ p->R23.d[1] = (uint32_t)(pad0 ); 6318++ p->R23.d[3] = (uint32_t)(pad0 >> 32); 6319++ p->R24.d[1] = (uint32_t)(pad1 ); 6320++ p->R24.d[3] = (uint32_t)(pad1 >> 32); 6321++ 6322++ /* H = [Mx,My] */ 6323++ T5 = _mm_unpacklo_epi64(_mm_loadl_epi64((xmmi *)(m + 0)), _mm_loadl_epi64((xmmi *)(m + 16))); 6324++ T6 = _mm_unpacklo_epi64(_mm_loadl_epi64((xmmi *)(m + 8)), _mm_loadl_epi64((xmmi *)(m + 24))); 6325++ st->H[0] = _mm_and_si128(MMASK, T5); 6326++ st->H[1] = _mm_and_si128(MMASK, _mm_srli_epi64(T5, 26)); 6327++ T5 = _mm_or_si128(_mm_srli_epi64(T5, 52), _mm_slli_epi64(T6, 12)); 6328++ st->H[2] = _mm_and_si128(MMASK, T5); 6329++ st->H[3] = _mm_and_si128(MMASK, _mm_srli_epi64(T5, 26)); 6330++ st->H[4] = _mm_or_si128(_mm_srli_epi64(T6, 40), HIBIT); 6331++ } 6332++ 6333++static void 6334++poly1305_blocks(poly1305_state_internal *st, const uint8_t *m, size_t bytes) 6335++ { 6336++ const xmmi MMASK = _mm_load_si128((xmmi *)poly1305_x64_sse2_message_mask); 6337++ const xmmi FIVE = _mm_load_si128((xmmi*)poly1305_x64_sse2_5); 6338++ const xmmi HIBIT = _mm_load_si128((xmmi*)poly1305_x64_sse2_1shl128); 6339++ 6340++ poly1305_power *p; 6341++ xmmi H0,H1,H2,H3,H4; 6342++ xmmi T0,T1,T2,T3,T4,T5,T6; 6343++ xmmi M0,M1,M2,M3,M4; 6344++ xmmi C1,C2; 6345++ 6346++ H0 = st->H[0]; 6347++ H1 = st->H[1]; 6348++ H2 = st->H[2]; 6349++ H3 = st->H[3]; 6350++ H4 = st->H[4]; 6351++ 6352++ while (bytes >= 64) 6353++ { 6354++ /* H *= [r^4,r^4] */ 6355++ p = &st->P[0]; 6356++ T0 = _mm_mul_epu32(H0, p->R20.v); 6357++ T1 = _mm_mul_epu32(H0, p->R21.v); 6358++ T2 = _mm_mul_epu32(H0, p->R22.v); 6359++ T3 = _mm_mul_epu32(H0, p->R23.v); 6360++ T4 = _mm_mul_epu32(H0, p->R24.v); 6361++ T5 = _mm_mul_epu32(H1, p->S24.v); T6 = _mm_mul_epu32(H1, p->R20.v); T0 = _mm_add_epi64(T0, T5); T1 = _mm_add_epi64(T1, T6); 6362++ T5 = _mm_mul_epu32(H2, p->S23.v); T6 = _mm_mul_epu32(H2, p->S24.v); T0 = _mm_add_epi64(T0, T5); T1 = _mm_add_epi64(T1, T6); 6363++ T5 = _mm_mul_epu32(H3, p->S22.v); T6 = _mm_mul_epu32(H3, p->S23.v); T0 = _mm_add_epi64(T0, T5); T1 = _mm_add_epi64(T1, T6); 6364++ T5 = _mm_mul_epu32(H4, p->S21.v); T6 = _mm_mul_epu32(H4, p->S22.v); T0 = _mm_add_epi64(T0, T5); T1 = _mm_add_epi64(T1, T6); 6365++ T5 = _mm_mul_epu32(H1, p->R21.v); T6 = _mm_mul_epu32(H1, p->R22.v); T2 = _mm_add_epi64(T2, T5); T3 = _mm_add_epi64(T3, T6); 6366++ T5 = _mm_mul_epu32(H2, p->R20.v); T6 = _mm_mul_epu32(H2, p->R21.v); T2 = _mm_add_epi64(T2, T5); T3 = _mm_add_epi64(T3, T6); 6367++ T5 = _mm_mul_epu32(H3, p->S24.v); T6 = _mm_mul_epu32(H3, p->R20.v); T2 = _mm_add_epi64(T2, T5); T3 = _mm_add_epi64(T3, T6); 6368++ T5 = _mm_mul_epu32(H4, p->S23.v); T6 = _mm_mul_epu32(H4, p->S24.v); T2 = _mm_add_epi64(T2, T5); T3 = _mm_add_epi64(T3, T6); 6369++ T5 = _mm_mul_epu32(H1, p->R23.v); T4 = _mm_add_epi64(T4, T5); 6370++ T5 = _mm_mul_epu32(H2, p->R22.v); T4 = _mm_add_epi64(T4, T5); 6371++ T5 = _mm_mul_epu32(H3, p->R21.v); T4 = _mm_add_epi64(T4, T5); 6372++ T5 = _mm_mul_epu32(H4, p->R20.v); T4 = _mm_add_epi64(T4, T5); 6373++ 6374++ /* H += [Mx,My]*[r^2,r^2] */ 6375++ T5 = _mm_unpacklo_epi64(_mm_loadl_epi64((xmmi *)(m + 0)), _mm_loadl_epi64((xmmi *)(m + 16))); 6376++ T6 = _mm_unpacklo_epi64(_mm_loadl_epi64((xmmi *)(m + 8)), _mm_loadl_epi64((xmmi *)(m + 24))); 6377++ M0 = _mm_and_si128(MMASK, T5); 6378++ M1 = _mm_and_si128(MMASK, _mm_srli_epi64(T5, 26)); 6379++ T5 = _mm_or_si128(_mm_srli_epi64(T5, 52), _mm_slli_epi64(T6, 12)); 6380++ M2 = _mm_and_si128(MMASK, T5); 6381++ M3 = _mm_and_si128(MMASK, _mm_srli_epi64(T5, 26)); 6382++ M4 = _mm_or_si128(_mm_srli_epi64(T6, 40), HIBIT); 6383++ 6384++ p = &st->P[1]; 6385++ T5 = _mm_mul_epu32(M0, p->R20.v); T6 = _mm_mul_epu32(M0, p->R21.v); T0 = _mm_add_epi64(T0, T5); T1 = _mm_add_epi64(T1, T6); 6386++ T5 = _mm_mul_epu32(M1, p->S24.v); T6 = _mm_mul_epu32(M1, p->R20.v); T0 = _mm_add_epi64(T0, T5); T1 = _mm_add_epi64(T1, T6); 6387++ T5 = _mm_mul_epu32(M2, p->S23.v); T6 = _mm_mul_epu32(M2, p->S24.v); T0 = _mm_add_epi64(T0, T5); T1 = _mm_add_epi64(T1, T6); 6388++ T5 = _mm_mul_epu32(M3, p->S22.v); T6 = _mm_mul_epu32(M3, p->S23.v); T0 = _mm_add_epi64(T0, T5); T1 = _mm_add_epi64(T1, T6); 6389++ T5 = _mm_mul_epu32(M4, p->S21.v); T6 = _mm_mul_epu32(M4, p->S22.v); T0 = _mm_add_epi64(T0, T5); T1 = _mm_add_epi64(T1, T6); 6390++ T5 = _mm_mul_epu32(M0, p->R22.v); T6 = _mm_mul_epu32(M0, p->R23.v); T2 = _mm_add_epi64(T2, T5); T3 = _mm_add_epi64(T3, T6); 6391++ T5 = _mm_mul_epu32(M1, p->R21.v); T6 = _mm_mul_epu32(M1, p->R22.v); T2 = _mm_add_epi64(T2, T5); T3 = _mm_add_epi64(T3, T6); 6392++ T5 = _mm_mul_epu32(M2, p->R20.v); T6 = _mm_mul_epu32(M2, p->R21.v); T2 = _mm_add_epi64(T2, T5); T3 = _mm_add_epi64(T3, T6); 6393++ T5 = _mm_mul_epu32(M3, p->S24.v); T6 = _mm_mul_epu32(M3, p->R20.v); T2 = _mm_add_epi64(T2, T5); T3 = _mm_add_epi64(T3, T6); 6394++ T5 = _mm_mul_epu32(M4, p->S23.v); T6 = _mm_mul_epu32(M4, p->S24.v); T2 = _mm_add_epi64(T2, T5); T3 = _mm_add_epi64(T3, T6); 6395++ T5 = _mm_mul_epu32(M0, p->R24.v); T4 = _mm_add_epi64(T4, T5); 6396++ T5 = _mm_mul_epu32(M1, p->R23.v); T4 = _mm_add_epi64(T4, T5); 6397++ T5 = _mm_mul_epu32(M2, p->R22.v); T4 = _mm_add_epi64(T4, T5); 6398++ T5 = _mm_mul_epu32(M3, p->R21.v); T4 = _mm_add_epi64(T4, T5); 6399++ T5 = _mm_mul_epu32(M4, p->R20.v); T4 = _mm_add_epi64(T4, T5); 6400++ 6401++ /* H += [Mx,My] */ 6402++ T5 = _mm_unpacklo_epi64(_mm_loadl_epi64((xmmi *)(m + 32)), _mm_loadl_epi64((xmmi *)(m + 48))); 6403++ T6 = _mm_unpacklo_epi64(_mm_loadl_epi64((xmmi *)(m + 40)), _mm_loadl_epi64((xmmi *)(m + 56))); 6404++ M0 = _mm_and_si128(MMASK, T5); 6405++ M1 = _mm_and_si128(MMASK, _mm_srli_epi64(T5, 26)); 6406++ T5 = _mm_or_si128(_mm_srli_epi64(T5, 52), _mm_slli_epi64(T6, 12)); 6407++ M2 = _mm_and_si128(MMASK, T5); 6408++ M3 = _mm_and_si128(MMASK, _mm_srli_epi64(T5, 26)); 6409++ M4 = _mm_or_si128(_mm_srli_epi64(T6, 40), HIBIT); 6410++ 6411++ T0 = _mm_add_epi64(T0, M0); 6412++ T1 = _mm_add_epi64(T1, M1); 6413++ T2 = _mm_add_epi64(T2, M2); 6414++ T3 = _mm_add_epi64(T3, M3); 6415++ T4 = _mm_add_epi64(T4, M4); 6416++ 6417++ /* reduce */ 6418++ C1 = _mm_srli_epi64(T0, 26); C2 = _mm_srli_epi64(T3, 26); T0 = _mm_and_si128(T0, MMASK); T3 = _mm_and_si128(T3, MMASK); T1 = _mm_add_epi64(T1, C1); T4 = _mm_add_epi64(T4, C2); 6419++ C1 = _mm_srli_epi64(T1, 26); C2 = _mm_srli_epi64(T4, 26); T1 = _mm_and_si128(T1, MMASK); T4 = _mm_and_si128(T4, MMASK); T2 = _mm_add_epi64(T2, C1); T0 = _mm_add_epi64(T0, _mm_mul_epu32(C2, FIVE)); 6420++ C1 = _mm_srli_epi64(T2, 26); C2 = _mm_srli_epi64(T0, 26); T2 = _mm_and_si128(T2, MMASK); T0 = _mm_and_si128(T0, MMASK); T3 = _mm_add_epi64(T3, C1); T1 = _mm_add_epi64(T1, C2); 6421++ C1 = _mm_srli_epi64(T3, 26); T3 = _mm_and_si128(T3, MMASK); T4 = _mm_add_epi64(T4, C1); 6422++ 6423++ /* H = (H*[r^4,r^4] + [Mx,My]*[r^2,r^2] + [Mx,My]) */ 6424++ H0 = T0; 6425++ H1 = T1; 6426++ H2 = T2; 6427++ H3 = T3; 6428++ H4 = T4; 6429++ 6430++ m += 64; 6431++ bytes -= 64; 6432++ } 6433++ 6434++ st->H[0] = H0; 6435++ st->H[1] = H1; 6436++ st->H[2] = H2; 6437++ st->H[3] = H3; 6438++ st->H[4] = H4; 6439++ } 6440++ 6441++static size_t 6442++poly1305_combine(poly1305_state_internal *st, const uint8_t *m, size_t bytes) 6443++ { 6444++ const xmmi MMASK = 6445++ _mm_load_si128((xmmi *)poly1305_x64_sse2_message_mask); 6446++ const xmmi HIBIT = _mm_load_si128((xmmi*)poly1305_x64_sse2_1shl128); 6447++ const xmmi FIVE = _mm_load_si128((xmmi*)poly1305_x64_sse2_5); 6448++ 6449++ poly1305_power *p; 6450++ xmmi H0,H1,H2,H3,H4; 6451++ xmmi M0,M1,M2,M3,M4; 6452++ xmmi T0,T1,T2,T3,T4,T5,T6; 6453++ xmmi C1,C2; 6454++ 6455++ uint64_t r0,r1,r2; 6456++ uint64_t t0,t1,t2,t3,t4; 6457++ uint64_t c; 6458++ size_t consumed = 0; 6459++ 6460++ H0 = st->H[0]; 6461++ H1 = st->H[1]; 6462++ H2 = st->H[2]; 6463++ H3 = st->H[3]; 6464++ H4 = st->H[4]; 6465++ 6466++ /* p = [r^2,r^2] */ 6467++ p = &st->P[1]; 6468++ 6469++ if (bytes >= 32) 6470++ { 6471++ /* H *= [r^2,r^2] */ 6472++ T0 = _mm_mul_epu32(H0, p->R20.v); 6473++ T1 = _mm_mul_epu32(H0, p->R21.v); 6474++ T2 = _mm_mul_epu32(H0, p->R22.v); 6475++ T3 = _mm_mul_epu32(H0, p->R23.v); 6476++ T4 = _mm_mul_epu32(H0, p->R24.v); 6477++ T5 = _mm_mul_epu32(H1, p->S24.v); T6 = _mm_mul_epu32(H1, p->R20.v); T0 = _mm_add_epi64(T0, T5); T1 = _mm_add_epi64(T1, T6); 6478++ T5 = _mm_mul_epu32(H2, p->S23.v); T6 = _mm_mul_epu32(H2, p->S24.v); T0 = _mm_add_epi64(T0, T5); T1 = _mm_add_epi64(T1, T6); 6479++ T5 = _mm_mul_epu32(H3, p->S22.v); T6 = _mm_mul_epu32(H3, p->S23.v); T0 = _mm_add_epi64(T0, T5); T1 = _mm_add_epi64(T1, T6); 6480++ T5 = _mm_mul_epu32(H4, p->S21.v); T6 = _mm_mul_epu32(H4, p->S22.v); T0 = _mm_add_epi64(T0, T5); T1 = _mm_add_epi64(T1, T6); 6481++ T5 = _mm_mul_epu32(H1, p->R21.v); T6 = _mm_mul_epu32(H1, p->R22.v); T2 = _mm_add_epi64(T2, T5); T3 = _mm_add_epi64(T3, T6); 6482++ T5 = _mm_mul_epu32(H2, p->R20.v); T6 = _mm_mul_epu32(H2, p->R21.v); T2 = _mm_add_epi64(T2, T5); T3 = _mm_add_epi64(T3, T6); 6483++ T5 = _mm_mul_epu32(H3, p->S24.v); T6 = _mm_mul_epu32(H3, p->R20.v); T2 = _mm_add_epi64(T2, T5); T3 = _mm_add_epi64(T3, T6); 6484++ T5 = _mm_mul_epu32(H4, p->S23.v); T6 = _mm_mul_epu32(H4, p->S24.v); T2 = _mm_add_epi64(T2, T5); T3 = _mm_add_epi64(T3, T6); 6485++ T5 = _mm_mul_epu32(H1, p->R23.v); T4 = _mm_add_epi64(T4, T5); 6486++ T5 = _mm_mul_epu32(H2, p->R22.v); T4 = _mm_add_epi64(T4, T5); 6487++ T5 = _mm_mul_epu32(H3, p->R21.v); T4 = _mm_add_epi64(T4, T5); 6488++ T5 = _mm_mul_epu32(H4, p->R20.v); T4 = _mm_add_epi64(T4, T5); 6489++ 6490++ /* H += [Mx,My] */ 6491++ T5 = _mm_unpacklo_epi64(_mm_loadl_epi64((xmmi *)(m + 0)), _mm_loadl_epi64((xmmi *)(m + 16))); 6492++ T6 = _mm_unpacklo_epi64(_mm_loadl_epi64((xmmi *)(m + 8)), _mm_loadl_epi64((xmmi *)(m + 24))); 6493++ M0 = _mm_and_si128(MMASK, T5); 6494++ M1 = _mm_and_si128(MMASK, _mm_srli_epi64(T5, 26)); 6495++ T5 = _mm_or_si128(_mm_srli_epi64(T5, 52), _mm_slli_epi64(T6, 12)); 6496++ M2 = _mm_and_si128(MMASK, T5); 6497++ M3 = _mm_and_si128(MMASK, _mm_srli_epi64(T5, 26)); 6498++ M4 = _mm_or_si128(_mm_srli_epi64(T6, 40), HIBIT); 6499++ 6500++ T0 = _mm_add_epi64(T0, M0); 6501++ T1 = _mm_add_epi64(T1, M1); 6502++ T2 = _mm_add_epi64(T2, M2); 6503++ T3 = _mm_add_epi64(T3, M3); 6504++ T4 = _mm_add_epi64(T4, M4); 6505++ 6506++ /* reduce */ 6507++ C1 = _mm_srli_epi64(T0, 26); C2 = _mm_srli_epi64(T3, 26); T0 = _mm_and_si128(T0, MMASK); T3 = _mm_and_si128(T3, MMASK); T1 = _mm_add_epi64(T1, C1); T4 = _mm_add_epi64(T4, C2); 6508++ C1 = _mm_srli_epi64(T1, 26); C2 = _mm_srli_epi64(T4, 26); T1 = _mm_and_si128(T1, MMASK); T4 = _mm_and_si128(T4, MMASK); T2 = _mm_add_epi64(T2, C1); T0 = _mm_add_epi64(T0, _mm_mul_epu32(C2, FIVE)); 6509++ C1 = _mm_srli_epi64(T2, 26); C2 = _mm_srli_epi64(T0, 26); T2 = _mm_and_si128(T2, MMASK); T0 = _mm_and_si128(T0, MMASK); T3 = _mm_add_epi64(T3, C1); T1 = _mm_add_epi64(T1, C2); 6510++ C1 = _mm_srli_epi64(T3, 26); T3 = _mm_and_si128(T3, MMASK); T4 = _mm_add_epi64(T4, C1); 6511++ 6512++ /* H = (H*[r^2,r^2] + [Mx,My]) */ 6513++ H0 = T0; 6514++ H1 = T1; 6515++ H2 = T2; 6516++ H3 = T3; 6517++ H4 = T4; 6518++ 6519++ consumed = 32; 6520++ } 6521++ 6522++ /* finalize, H *= [r^2,r] */ 6523++ r0 = ((uint64_t)p->R20.d[3] << 32) | (uint64_t)p->R20.d[1]; 6524++ r1 = ((uint64_t)p->R21.d[3] << 32) | (uint64_t)p->R21.d[1]; 6525++ r2 = ((uint64_t)p->R22.d[3] << 32) | (uint64_t)p->R22.d[1]; 6526++ 6527++ p->R20.d[2] = (uint32_t)( r0 ) & 0x3ffffff; 6528++ p->R21.d[2] = (uint32_t)((r0 >> 26) | (r1 << 18)) & 0x3ffffff; 6529++ p->R22.d[2] = (uint32_t)((r1 >> 8) ) & 0x3ffffff; 6530++ p->R23.d[2] = (uint32_t)((r1 >> 34) | (r2 << 10)) & 0x3ffffff; 6531++ p->R24.d[2] = (uint32_t)((r2 >> 16) ) ; 6532++ p->S21.d[2] = p->R21.d[2] * 5; 6533++ p->S22.d[2] = p->R22.d[2] * 5; 6534++ p->S23.d[2] = p->R23.d[2] * 5; 6535++ p->S24.d[2] = p->R24.d[2] * 5; 6536++ 6537++ /* H *= [r^2,r] */ 6538++ T0 = _mm_mul_epu32(H0, p->R20.v); 6539++ T1 = _mm_mul_epu32(H0, p->R21.v); 6540++ T2 = _mm_mul_epu32(H0, p->R22.v); 6541++ T3 = _mm_mul_epu32(H0, p->R23.v); 6542++ T4 = _mm_mul_epu32(H0, p->R24.v); 6543++ T5 = _mm_mul_epu32(H1, p->S24.v); T6 = _mm_mul_epu32(H1, p->R20.v); T0 = _mm_add_epi64(T0, T5); T1 = _mm_add_epi64(T1, T6); 6544++ T5 = _mm_mul_epu32(H2, p->S23.v); T6 = _mm_mul_epu32(H2, p->S24.v); T0 = _mm_add_epi64(T0, T5); T1 = _mm_add_epi64(T1, T6); 6545++ T5 = _mm_mul_epu32(H3, p->S22.v); T6 = _mm_mul_epu32(H3, p->S23.v); T0 = _mm_add_epi64(T0, T5); T1 = _mm_add_epi64(T1, T6); 6546++ T5 = _mm_mul_epu32(H4, p->S21.v); T6 = _mm_mul_epu32(H4, p->S22.v); T0 = _mm_add_epi64(T0, T5); T1 = _mm_add_epi64(T1, T6); 6547++ T5 = _mm_mul_epu32(H1, p->R21.v); T6 = _mm_mul_epu32(H1, p->R22.v); T2 = _mm_add_epi64(T2, T5); T3 = _mm_add_epi64(T3, T6); 6548++ T5 = _mm_mul_epu32(H2, p->R20.v); T6 = _mm_mul_epu32(H2, p->R21.v); T2 = _mm_add_epi64(T2, T5); T3 = _mm_add_epi64(T3, T6); 6549++ T5 = _mm_mul_epu32(H3, p->S24.v); T6 = _mm_mul_epu32(H3, p->R20.v); T2 = _mm_add_epi64(T2, T5); T3 = _mm_add_epi64(T3, T6); 6550++ T5 = _mm_mul_epu32(H4, p->S23.v); T6 = _mm_mul_epu32(H4, p->S24.v); T2 = _mm_add_epi64(T2, T5); T3 = _mm_add_epi64(T3, T6); 6551++ T5 = _mm_mul_epu32(H1, p->R23.v); T4 = _mm_add_epi64(T4, T5); 6552++ T5 = _mm_mul_epu32(H2, p->R22.v); T4 = _mm_add_epi64(T4, T5); 6553++ T5 = _mm_mul_epu32(H3, p->R21.v); T4 = _mm_add_epi64(T4, T5); 6554++ T5 = _mm_mul_epu32(H4, p->R20.v); T4 = _mm_add_epi64(T4, T5); 6555++ 6556++ C1 = _mm_srli_epi64(T0, 26); C2 = _mm_srli_epi64(T3, 26); T0 = _mm_and_si128(T0, MMASK); T3 = _mm_and_si128(T3, MMASK); T1 = _mm_add_epi64(T1, C1); T4 = _mm_add_epi64(T4, C2); 6557++ C1 = _mm_srli_epi64(T1, 26); C2 = _mm_srli_epi64(T4, 26); T1 = _mm_and_si128(T1, MMASK); T4 = _mm_and_si128(T4, MMASK); T2 = _mm_add_epi64(T2, C1); T0 = _mm_add_epi64(T0, _mm_mul_epu32(C2, FIVE)); 6558++ C1 = _mm_srli_epi64(T2, 26); C2 = _mm_srli_epi64(T0, 26); T2 = _mm_and_si128(T2, MMASK); T0 = _mm_and_si128(T0, MMASK); T3 = _mm_add_epi64(T3, C1); T1 = _mm_add_epi64(T1, C2); 6559++ C1 = _mm_srli_epi64(T3, 26); T3 = _mm_and_si128(T3, MMASK); T4 = _mm_add_epi64(T4, C1); 6560++ 6561++ /* H = H[0]+H[1] */ 6562++ H0 = _mm_add_epi64(T0, _mm_srli_si128(T0, 8)); 6563++ H1 = _mm_add_epi64(T1, _mm_srli_si128(T1, 8)); 6564++ H2 = _mm_add_epi64(T2, _mm_srli_si128(T2, 8)); 6565++ H3 = _mm_add_epi64(T3, _mm_srli_si128(T3, 8)); 6566++ H4 = _mm_add_epi64(T4, _mm_srli_si128(T4, 8)); 6567++ 6568++ t0 = _mm_cvtsi128_si32(H0) ; c = (t0 >> 26); t0 &= 0x3ffffff; 6569++ t1 = _mm_cvtsi128_si32(H1) + c; c = (t1 >> 26); t1 &= 0x3ffffff; 6570++ t2 = _mm_cvtsi128_si32(H2) + c; c = (t2 >> 26); t2 &= 0x3ffffff; 6571++ t3 = _mm_cvtsi128_si32(H3) + c; c = (t3 >> 26); t3 &= 0x3ffffff; 6572++ t4 = _mm_cvtsi128_si32(H4) + c; c = (t4 >> 26); t4 &= 0x3ffffff; 6573++ t0 = t0 + (c * 5); c = (t0 >> 26); t0 &= 0x3ffffff; 6574++ t1 = t1 + c; 6575++ 6576++ st->HH[0] = ((t0 ) | (t1 << 26) ) & 0xfffffffffffull; 6577++ st->HH[1] = ((t1 >> 18) | (t2 << 8) | (t3 << 34)) & 0xfffffffffffull; 6578++ st->HH[2] = ((t3 >> 10) | (t4 << 16) ) & 0x3ffffffffffull; 6579++ 6580++ return consumed; 6581++ } 6582++ 6583++void 6584++CRYPTO_poly1305_update(poly1305_state *state, const unsigned char *m, 6585++ size_t bytes) 6586++ { 6587++ poly1305_state_internal *st = poly1305_aligned_state(state); 6588++ size_t want; 6589++ 6590++ /* need at least 32 initial bytes to start the accelerated branch */ 6591++ if (!st->started) 6592++ { 6593++ if ((st->leftover == 0) && (bytes > 32)) 6594++ { 6595++ poly1305_first_block(st, m); 6596++ m += 32; 6597++ bytes -= 32; 6598++ } 6599++ else 6600++ { 6601++ want = poly1305_min(32 - st->leftover, bytes); 6602++ poly1305_block_copy(st->buffer + st->leftover, m, want); 6603++ bytes -= want; 6604++ m += want; 6605++ st->leftover += want; 6606++ if ((st->leftover < 32) || (bytes == 0)) 6607++ return; 6608++ poly1305_first_block(st, st->buffer); 6609++ st->leftover = 0; 6610++ } 6611++ st->started = 1; 6612++ } 6613++ 6614++ /* handle leftover */ 6615++ if (st->leftover) 6616++ { 6617++ want = poly1305_min(64 - st->leftover, bytes); 6618++ poly1305_block_copy(st->buffer + st->leftover, m, want); 6619++ bytes -= want; 6620++ m += want; 6621++ st->leftover += want; 6622++ if (st->leftover < 64) 6623++ return; 6624++ poly1305_blocks(st, st->buffer, 64); 6625++ st->leftover = 0; 6626++ } 6627++ 6628++ /* process 64 byte blocks */ 6629++ if (bytes >= 64) 6630++ { 6631++ want = (bytes & ~63); 6632++ poly1305_blocks(st, m, want); 6633++ m += want; 6634++ bytes -= want; 6635++ } 6636++ 6637++ if (bytes) 6638++ { 6639++ poly1305_block_copy(st->buffer + st->leftover, m, bytes); 6640++ st->leftover += bytes; 6641++ } 6642++ } 6643++ 6644++void 6645++CRYPTO_poly1305_finish(poly1305_state *state, unsigned char mac[16]) 6646++ { 6647++ poly1305_state_internal *st = poly1305_aligned_state(state); 6648++ size_t leftover = st->leftover; 6649++ uint8_t *m = st->buffer; 6650++ uint128_t d[3]; 6651++ uint64_t h0,h1,h2; 6652++ uint64_t t0,t1; 6653++ uint64_t g0,g1,g2,c,nc; 6654++ uint64_t r0,r1,r2,s1,s2; 6655++ poly1305_power *p; 6656++ 6657++ if (st->started) 6658++ { 6659++ size_t consumed = poly1305_combine(st, m, leftover); 6660++ leftover -= consumed; 6661++ m += consumed; 6662++ } 6663++ 6664++ /* st->HH will either be 0 or have the combined result */ 6665++ h0 = st->HH[0]; 6666++ h1 = st->HH[1]; 6667++ h2 = st->HH[2]; 6668++ 6669++ p = &st->P[1]; 6670++ r0 = ((uint64_t)p->R20.d[3] << 32) | (uint64_t)p->R20.d[1]; 6671++ r1 = ((uint64_t)p->R21.d[3] << 32) | (uint64_t)p->R21.d[1]; 6672++ r2 = ((uint64_t)p->R22.d[3] << 32) | (uint64_t)p->R22.d[1]; 6673++ s1 = r1 * (5 << 2); 6674++ s2 = r2 * (5 << 2); 6675++ 6676++ if (leftover < 16) 6677++ goto poly1305_donna_atmost15bytes; 6678++ 6679++poly1305_donna_atleast16bytes: 6680++ t0 = U8TO64_LE(m + 0); 6681++ t1 = U8TO64_LE(m + 8); 6682++ h0 += t0 & 0xfffffffffff; 6683++ t0 = shr128_pair(t1, t0, 44); 6684++ h1 += t0 & 0xfffffffffff; 6685++ h2 += (t1 >> 24) | ((uint64_t)1 << 40); 6686++ 6687++poly1305_donna_mul: 6688++ d[0] = add128(add128(mul64x64_128(h0, r0), mul64x64_128(h1, s2)), mul64x64_128(h2, s1)); 6689++ d[1] = add128(add128(mul64x64_128(h0, r1), mul64x64_128(h1, r0)), mul64x64_128(h2, s2)); 6690++ d[2] = add128(add128(mul64x64_128(h0, r2), mul64x64_128(h1, r1)), mul64x64_128(h2, r0)); 6691++ h0 = lo128(d[0]) & 0xfffffffffff; c = shr128(d[0], 44); 6692++ d[1] = add128_64(d[1], c); h1 = lo128(d[1]) & 0xfffffffffff; c = shr128(d[1], 44); 6693++ d[2] = add128_64(d[2], c); h2 = lo128(d[2]) & 0x3ffffffffff; c = shr128(d[2], 42); 6694++ h0 += c * 5; 6695++ 6696++ m += 16; 6697++ leftover -= 16; 6698++ if (leftover >= 16) goto poly1305_donna_atleast16bytes; 6699++ 6700++ /* final bytes */ 6701++poly1305_donna_atmost15bytes: 6702++ if (!leftover) goto poly1305_donna_finish; 6703++ 6704++ m[leftover++] = 1; 6705++ poly1305_block_zero(m + leftover, 16 - leftover); 6706++ leftover = 16; 6707++ 6708++ t0 = U8TO64_LE(m+0); 6709++ t1 = U8TO64_LE(m+8); 6710++ h0 += t0 & 0xfffffffffff; t0 = shr128_pair(t1, t0, 44); 6711++ h1 += t0 & 0xfffffffffff; 6712++ h2 += (t1 >> 24); 6713++ 6714++ goto poly1305_donna_mul; 6715++ 6716++poly1305_donna_finish: 6717++ c = (h0 >> 44); h0 &= 0xfffffffffff; 6718++ h1 += c; c = (h1 >> 44); h1 &= 0xfffffffffff; 6719++ h2 += c; c = (h2 >> 42); h2 &= 0x3ffffffffff; 6720++ h0 += c * 5; 6721++ 6722++ g0 = h0 + 5; c = (g0 >> 44); g0 &= 0xfffffffffff; 6723++ g1 = h1 + c; c = (g1 >> 44); g1 &= 0xfffffffffff; 6724++ g2 = h2 + c - ((uint64_t)1 << 42); 6725++ 6726++ c = (g2 >> 63) - 1; 6727++ nc = ~c; 6728++ h0 = (h0 & nc) | (g0 & c); 6729++ h1 = (h1 & nc) | (g1 & c); 6730++ h2 = (h2 & nc) | (g2 & c); 6731++ 6732++ /* pad */ 6733++ t0 = ((uint64_t)p->R23.d[3] << 32) | (uint64_t)p->R23.d[1]; 6734++ t1 = ((uint64_t)p->R24.d[3] << 32) | (uint64_t)p->R24.d[1]; 6735++ h0 += (t0 & 0xfffffffffff) ; c = (h0 >> 44); h0 &= 0xfffffffffff; t0 = shr128_pair(t1, t0, 44); 6736++ h1 += (t0 & 0xfffffffffff) + c; c = (h1 >> 44); h1 &= 0xfffffffffff; t1 = (t1 >> 24); 6737++ h2 += (t1 ) + c; 6738++ 6739++ U64TO8_LE(mac + 0, ((h0 ) | (h1 << 44))); 6740++ U64TO8_LE(mac + 8, ((h1 >> 20) | (h2 << 24))); 6741++ } 6742++ 6743++#endif /* !OPENSSL_NO_POLY1305 */ 6744+diff --git a/crypto/poly1305/poly1305test.c b/crypto/poly1305/poly1305test.c 6745+new file mode 100644 6746+index 0000000..8dd26af 6747+--- /dev/null 6748++++ b/crypto/poly1305/poly1305test.c 6749+@@ -0,0 +1,166 @@ 6750++/* ==================================================================== 6751++ * Copyright (c) 2011-2013 The OpenSSL Project. All rights reserved. 6752++ * 6753++ * Redistribution and use in source and binary forms, with or without 6754++ * modification, are permitted provided that the following conditions 6755++ * are met: 6756++ * 6757++ * 1. Redistributions of source code must retain the above copyright 6758++ * notice, this list of conditions and the following disclaimer. 6759++ * 6760++ * 2. Redistributions in binary form must reproduce the above copyright 6761++ * notice, this list of conditions and the following disclaimer in 6762++ * the documentation and/or other materials provided with the 6763++ * distribution. 6764++ * 6765++ * 3. All advertising materials mentioning features or use of this 6766++ * software must display the following acknowledgment: 6767++ * "This product includes software developed by the OpenSSL Project 6768++ * for use in the OpenSSL Toolkit. (http://www.OpenSSL.org/)" 6769++ * 6770++ * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to 6771++ * endorse or promote products derived from this software without 6772++ * prior written permission. For written permission, please contact 6773++ * licensing@OpenSSL.org. 6774++ * 6775++ * 5. Products derived from this software may not be called "OpenSSL" 6776++ * nor may "OpenSSL" appear in their names without prior written 6777++ * permission of the OpenSSL Project. 6778++ * 6779++ * 6. Redistributions of any form whatsoever must retain the following 6780++ * acknowledgment: 6781++ * "This product includes software developed by the OpenSSL Project 6782++ * for use in the OpenSSL Toolkit (http://www.OpenSSL.org/)" 6783++ * 6784++ * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY 6785++ * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 6786++ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 6787++ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR 6788++ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 6789++ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 6790++ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 6791++ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 6792++ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 6793++ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 6794++ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED 6795++ * OF THE POSSIBILITY OF SUCH DAMAGE. 6796++ * ==================================================================== 6797++ */ 6798++ 6799++#include <stdio.h> 6800++#include <stdlib.h> 6801++#include <string.h> 6802++ 6803++#include <openssl/poly1305.h> 6804++ 6805++struct poly1305_test 6806++ { 6807++ const char *inputhex; 6808++ const char *keyhex; 6809++ const char *outhex; 6810++ }; 6811++ 6812++static const struct poly1305_test poly1305_tests[] = { 6813++ { 6814++ "", 6815++ "c8afaac331ee372cd6082de134943b174710130e9f6fea8d72293850a667d86c", 6816++ "4710130e9f6fea8d72293850a667d86c", 6817++ }, 6818++ { 6819++ "48656c6c6f20776f726c6421", 6820++ "746869732069732033322d62797465206b657920666f7220506f6c7931333035", 6821++ "a6f745008f81c916a20dcc74eef2b2f0", 6822++ }, 6823++ { 6824++ "0000000000000000000000000000000000000000000000000000000000000000", 6825++ "746869732069732033322d62797465206b657920666f7220506f6c7931333035", 6826++ "49ec78090e481ec6c26b33b91ccc0307", 6827++ }, 6828++ { 6829++ "000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000", 6830++ "746869732069732033322d62797465206b657920666f7220506f6c7931333035", 6831++ "da84bcab02676c38cdb015604274c2aa", 6832++ }, 6833++}; 6834++ 6835++static unsigned char hex_digit(char h) 6836++ { 6837++ if (h >= '0' && h <= '9') 6838++ return h - '0'; 6839++ else if (h >= 'a' && h <= 'f') 6840++ return h - 'a' + 10; 6841++ else if (h >= 'A' && h <= 'F') 6842++ return h - 'A' + 10; 6843++ else 6844++ abort(); 6845++ } 6846++ 6847++static void hex_decode(unsigned char *out, const char* hex) 6848++ { 6849++ size_t j = 0; 6850++ 6851++ while (*hex != 0) 6852++ { 6853++ unsigned char v = hex_digit(*hex++); 6854++ v <<= 4; 6855++ v |= hex_digit(*hex++); 6856++ out[j++] = v; 6857++ } 6858++ } 6859++ 6860++static void hexdump(unsigned char *a, size_t len) 6861++ { 6862++ size_t i; 6863++ 6864++ for (i = 0; i < len; i++) 6865++ printf("%02x", a[i]); 6866++ } 6867++ 6868++int main() 6869++ { 6870++ static const unsigned num_tests = 6871++ sizeof(poly1305_tests) / sizeof(struct poly1305_test); 6872++ unsigned i; 6873++ unsigned char key[32], out[16], expected[16]; 6874++ poly1305_state poly1305; 6875++ 6876++ for (i = 0; i < num_tests; i++) 6877++ { 6878++ const struct poly1305_test *test = &poly1305_tests[i]; 6879++ unsigned char *in; 6880++ size_t inlen = strlen(test->inputhex); 6881++ 6882++ if (strlen(test->keyhex) != sizeof(key)*2 || 6883++ strlen(test->outhex) != sizeof(out)*2 || 6884++ (inlen & 1) == 1) 6885++ return 1; 6886++ 6887++ inlen /= 2; 6888++ 6889++ hex_decode(key, test->keyhex); 6890++ hex_decode(expected, test->outhex); 6891++ 6892++ in = malloc(inlen); 6893++ 6894++ hex_decode(in, test->inputhex); 6895++ CRYPTO_poly1305_init(&poly1305, key); 6896++ CRYPTO_poly1305_update(&poly1305, in, inlen); 6897++ CRYPTO_poly1305_finish(&poly1305, out); 6898++ 6899++ if (memcmp(out, expected, sizeof(expected)) != 0) 6900++ { 6901++ printf("Poly1305 test #%d failed.\n", i); 6902++ printf("got: "); 6903++ hexdump(out, sizeof(out)); 6904++ printf("\nexpected: "); 6905++ hexdump(expected, sizeof(expected)); 6906++ printf("\n"); 6907++ return 1; 6908++ } 6909++ 6910++ free(in); 6911++ } 6912++ 6913++ printf("PASS\n"); 6914++ return 0; 6915++ } 6916+diff --git a/ssl/s3_lib.c b/ssl/s3_lib.c 6917+index 75b6560..a042b8d 100644 6918+--- a/ssl/s3_lib.c 6919++++ b/ssl/s3_lib.c 6920+@@ -1841,7 +1841,8 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[]={ 6921+ SSL_AEAD, 6922+ SSL_TLSV1_2, 6923+ SSL_NOT_EXP|SSL_HIGH|SSL_FIPS, 6924+- SSL_HANDSHAKE_MAC_SHA256|TLS1_PRF_SHA256|SSL_CIPHER_ALGORITHM2_AEAD|FIXED_NONCE_LEN(4), 6925++ SSL_HANDSHAKE_MAC_SHA256|TLS1_PRF_SHA256|SSL_CIPHER_ALGORITHM2_AEAD|FIXED_NONCE_LEN(4)| 6926++ SSL_CIPHER_ALGORITHM2_VARIABLE_NONCE_INCLUDED_IN_RECORD, 6927+ 128, 6928+ 128, 6929+ }, 6930+@@ -1873,7 +1874,8 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[]={ 6931+ SSL_AEAD, 6932+ SSL_TLSV1_2, 6933+ SSL_NOT_EXP|SSL_HIGH|SSL_FIPS, 6934+- SSL_HANDSHAKE_MAC_SHA256|TLS1_PRF_SHA256|SSL_CIPHER_ALGORITHM2_AEAD|FIXED_NONCE_LEN(4), 6935++ SSL_HANDSHAKE_MAC_SHA256|TLS1_PRF_SHA256|SSL_CIPHER_ALGORITHM2_AEAD|FIXED_NONCE_LEN(4)| 6936++ SSL_CIPHER_ALGORITHM2_VARIABLE_NONCE_INCLUDED_IN_RECORD, 6937+ 128, 6938+ 128, 6939+ }, 6940+@@ -1905,7 +1907,8 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[]={ 6941+ SSL_AEAD, 6942+ SSL_TLSV1_2, 6943+ SSL_NOT_EXP|SSL_HIGH|SSL_FIPS, 6944+- SSL_HANDSHAKE_MAC_SHA256|TLS1_PRF_SHA256|SSL_CIPHER_ALGORITHM2_AEAD|FIXED_NONCE_LEN(4), 6945++ SSL_HANDSHAKE_MAC_SHA256|TLS1_PRF_SHA256|SSL_CIPHER_ALGORITHM2_AEAD|FIXED_NONCE_LEN(4)| 6946++ SSL_CIPHER_ALGORITHM2_VARIABLE_NONCE_INCLUDED_IN_RECORD, 6947+ 128, 6948+ 128, 6949+ }, 6950+@@ -1937,7 +1940,8 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[]={ 6951+ SSL_AEAD, 6952+ SSL_TLSV1_2, 6953+ SSL_NOT_EXP|SSL_HIGH|SSL_FIPS, 6954+- SSL_HANDSHAKE_MAC_SHA256|TLS1_PRF_SHA256|SSL_CIPHER_ALGORITHM2_AEAD|FIXED_NONCE_LEN(4), 6955++ SSL_HANDSHAKE_MAC_SHA256|TLS1_PRF_SHA256|SSL_CIPHER_ALGORITHM2_AEAD|FIXED_NONCE_LEN(4)| 6956++ SSL_CIPHER_ALGORITHM2_VARIABLE_NONCE_INCLUDED_IN_RECORD, 6957+ 128, 6958+ 128, 6959+ }, 6960+@@ -1969,7 +1973,8 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[]={ 6961+ SSL_AEAD, 6962+ SSL_TLSV1_2, 6963+ SSL_NOT_EXP|SSL_HIGH|SSL_FIPS, 6964+- SSL_HANDSHAKE_MAC_SHA256|TLS1_PRF_SHA256|SSL_CIPHER_ALGORITHM2_AEAD|FIXED_NONCE_LEN(4), 6965++ SSL_HANDSHAKE_MAC_SHA256|TLS1_PRF_SHA256|SSL_CIPHER_ALGORITHM2_AEAD|FIXED_NONCE_LEN(4)| 6966++ SSL_CIPHER_ALGORITHM2_VARIABLE_NONCE_INCLUDED_IN_RECORD, 6967+ 128, 6968+ 128, 6969+ }, 6970+@@ -2001,7 +2006,8 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[]={ 6971+ SSL_AEAD, 6972+ SSL_TLSV1_2, 6973+ SSL_NOT_EXP|SSL_HIGH|SSL_FIPS, 6974+- SSL_HANDSHAKE_MAC_SHA256|TLS1_PRF_SHA256|SSL_CIPHER_ALGORITHM2_AEAD|FIXED_NONCE_LEN(4), 6975++ SSL_HANDSHAKE_MAC_SHA256|TLS1_PRF_SHA256|SSL_CIPHER_ALGORITHM2_AEAD|FIXED_NONCE_LEN(4)| 6976++ SSL_CIPHER_ALGORITHM2_VARIABLE_NONCE_INCLUDED_IN_RECORD, 6977+ 128, 6978+ 128, 6979+ }, 6980+@@ -2714,7 +2720,8 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[]={ 6981+ SSL_AEAD, 6982+ SSL_TLSV1_2, 6983+ SSL_NOT_EXP|SSL_HIGH|SSL_FIPS, 6984+- SSL_HANDSHAKE_MAC_SHA256|TLS1_PRF_SHA256|SSL_CIPHER_ALGORITHM2_AEAD|FIXED_NONCE_LEN(4), 6985++ SSL_HANDSHAKE_MAC_SHA256|TLS1_PRF_SHA256|SSL_CIPHER_ALGORITHM2_AEAD|FIXED_NONCE_LEN(4)| 6986++ SSL_CIPHER_ALGORITHM2_VARIABLE_NONCE_INCLUDED_IN_RECORD, 6987+ 128, 6988+ 128, 6989+ }, 6990+@@ -2746,7 +2753,8 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[]={ 6991+ SSL_AEAD, 6992+ SSL_TLSV1_2, 6993+ SSL_NOT_EXP|SSL_HIGH|SSL_FIPS, 6994+- SSL_HANDSHAKE_MAC_SHA256|TLS1_PRF_SHA256|SSL_CIPHER_ALGORITHM2_AEAD|FIXED_NONCE_LEN(4), 6995++ SSL_HANDSHAKE_MAC_SHA256|TLS1_PRF_SHA256|SSL_CIPHER_ALGORITHM2_AEAD|FIXED_NONCE_LEN(4)| 6996++ SSL_CIPHER_ALGORITHM2_VARIABLE_NONCE_INCLUDED_IN_RECORD, 6997+ 128, 6998+ 128, 6999+ }, 7000+@@ -2778,7 +2786,8 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[]={ 7001+ SSL_AEAD, 7002+ SSL_TLSV1_2, 7003+ SSL_NOT_EXP|SSL_HIGH|SSL_FIPS, 7004+- SSL_HANDSHAKE_MAC_SHA256|TLS1_PRF_SHA256|SSL_CIPHER_ALGORITHM2_AEAD|FIXED_NONCE_LEN(4), 7005++ SSL_HANDSHAKE_MAC_SHA256|TLS1_PRF_SHA256|SSL_CIPHER_ALGORITHM2_AEAD|FIXED_NONCE_LEN(4)| 7006++ SSL_CIPHER_ALGORITHM2_VARIABLE_NONCE_INCLUDED_IN_RECORD, 7007+ 128, 7008+ 128, 7009+ }, 7010+@@ -2810,7 +2819,8 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[]={ 7011+ SSL_AEAD, 7012+ SSL_TLSV1_2, 7013+ SSL_NOT_EXP|SSL_HIGH|SSL_FIPS, 7014+- SSL_HANDSHAKE_MAC_SHA256|TLS1_PRF_SHA256|SSL_CIPHER_ALGORITHM2_AEAD|FIXED_NONCE_LEN(4), 7015++ SSL_HANDSHAKE_MAC_SHA256|TLS1_PRF_SHA256|SSL_CIPHER_ALGORITHM2_AEAD|FIXED_NONCE_LEN(4)| 7016++ SSL_CIPHER_ALGORITHM2_VARIABLE_NONCE_INCLUDED_IN_RECORD, 7017+ 128, 7018+ 128, 7019+ }, 7020+@@ -2894,6 +2904,51 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[]={ 7021+ }, 7022+ #endif 7023+ 7024++ { 7025++ 1, 7026++ TLS1_TXT_ECDHE_RSA_WITH_CHACHA20_POLY1305, 7027++ TLS1_CK_ECDHE_RSA_CHACHA20_POLY1305, 7028++ SSL_kEECDH, 7029++ SSL_aRSA, 7030++ SSL_CHACHA20POLY1305, 7031++ SSL_AEAD, 7032++ SSL_TLSV1_2, 7033++ SSL_NOT_EXP|SSL_HIGH, 7034++ SSL_HANDSHAKE_MAC_SHA256|TLS1_PRF_SHA256|SSL_CIPHER_ALGORITHM2_AEAD|FIXED_NONCE_LEN(0), 7035++ 256, 7036++ 0, 7037++ }, 7038++ 7039++ { 7040++ 1, 7041++ TLS1_TXT_ECDHE_ECDSA_WITH_CHACHA20_POLY1305, 7042++ TLS1_CK_ECDHE_ECDSA_CHACHA20_POLY1305, 7043++ SSL_kEECDH, 7044++ SSL_aECDSA, 7045++ SSL_CHACHA20POLY1305, 7046++ SSL_AEAD, 7047++ SSL_TLSV1_2, 7048++ SSL_NOT_EXP|SSL_HIGH, 7049++ SSL_HANDSHAKE_MAC_SHA256|TLS1_PRF_SHA256|SSL_CIPHER_ALGORITHM2_AEAD|FIXED_NONCE_LEN(0), 7050++ 256, 7051++ 0, 7052++ }, 7053++ 7054++ { 7055++ 1, 7056++ TLS1_TXT_DHE_RSA_WITH_CHACHA20_POLY1305, 7057++ TLS1_CK_DHE_RSA_CHACHA20_POLY1305, 7058++ SSL_kEDH, 7059++ SSL_aRSA, 7060++ SSL_CHACHA20POLY1305, 7061++ SSL_AEAD, 7062++ SSL_TLSV1_2, 7063++ SSL_NOT_EXP|SSL_HIGH, 7064++ SSL_HANDSHAKE_MAC_SHA256|TLS1_PRF_SHA256|SSL_CIPHER_ALGORITHM2_AEAD|FIXED_NONCE_LEN(0), 7065++ 256, 7066++ 0, 7067++ }, 7068++ 7069+ /* end of list */ 7070+ }; 7071+ 7072+diff --git a/ssl/s3_pkt.c b/ssl/s3_pkt.c 7073+index 5038f6c..04b474d 100644 7074+--- a/ssl/s3_pkt.c 7075++++ b/ssl/s3_pkt.c 7076+@@ -790,8 +790,11 @@ static int do_ssl3_write(SSL *s, int type, const unsigned char *buf, 7077+ else 7078+ eivlen = 0; 7079+ } 7080+- else if (s->aead_write_ctx != NULL) 7081++ else if (s->aead_write_ctx != NULL && 7082++ s->aead_write_ctx->variable_nonce_included_in_record) 7083++ { 7084+ eivlen = s->aead_write_ctx->variable_nonce_len; 7085++ } 7086+ else 7087+ eivlen = 0; 7088+ 7089+diff --git a/ssl/ssl.h b/ssl/ssl.h 7090+index 0644cbf..d782a98 100644 7091+--- a/ssl/ssl.h 7092++++ b/ssl/ssl.h 7093+@@ -291,6 +291,7 @@ extern "C" { 7094+ #define SSL_TXT_CAMELLIA128 "CAMELLIA128" 7095+ #define SSL_TXT_CAMELLIA256 "CAMELLIA256" 7096+ #define SSL_TXT_CAMELLIA "CAMELLIA" 7097++#define SSL_TXT_CHACHA20 "CHACHA20" 7098+ 7099+ #define SSL_TXT_MD5 "MD5" 7100+ #define SSL_TXT_SHA1 "SHA1" 7101+diff --git a/ssl/ssl_ciph.c b/ssl/ssl_ciph.c 7102+index 7e780cd..b6370bd 100644 7103+--- a/ssl/ssl_ciph.c 7104++++ b/ssl/ssl_ciph.c 7105+@@ -298,6 +298,7 @@ static const SSL_CIPHER cipher_aliases[]={ 7106+ {0,SSL_TXT_CAMELLIA128,0,0,0,SSL_CAMELLIA128,0,0,0,0,0,0}, 7107+ {0,SSL_TXT_CAMELLIA256,0,0,0,SSL_CAMELLIA256,0,0,0,0,0,0}, 7108+ {0,SSL_TXT_CAMELLIA ,0,0,0,SSL_CAMELLIA128|SSL_CAMELLIA256,0,0,0,0,0,0}, 7109++ {0,SSL_TXT_CHACHA20 ,0,0,0,SSL_CHACHA20POLY1305,0,0,0,0,0,0}, 7110+ 7111+ /* MAC aliases */ 7112+ {0,SSL_TXT_MD5,0, 0,0,0,SSL_MD5, 0,0,0,0,0}, 7113+@@ -523,9 +524,15 @@ int ssl_cipher_get_evp_aead(const SSL_SESSION *s, const EVP_AEAD **aead) 7114+ return 0; 7115+ 7116+ #ifndef OPENSSL_NO_AES 7117+- /* There is only one AEAD for now. */ 7118+- *aead = EVP_aead_aes_128_gcm(); 7119+- return 1; 7120++ switch (c->algorithm_enc) 7121++ { 7122++ case SSL_AES128GCM: 7123++ *aead = EVP_aead_aes_128_gcm(); 7124++ return 1; 7125++ case SSL_CHACHA20POLY1305: 7126++ *aead = EVP_aead_chacha20_poly1305(); 7127++ return 1; 7128++ } 7129+ #endif 7130+ 7131+ return 0; 7132+@@ -1715,6 +1722,9 @@ char *SSL_CIPHER_description(const SSL_CIPHER *cipher, char *buf, int len) 7133+ case SSL_SEED: 7134+ enc="SEED(128)"; 7135+ break; 7136++ case SSL_CHACHA20POLY1305: 7137++ enc="ChaCha20-Poly1305"; 7138++ break; 7139+ default: 7140+ enc="unknown"; 7141+ break; 7142+diff --git a/ssl/ssl_locl.h b/ssl/ssl_locl.h 7143+index 63bc28b..b83d8cd 100644 7144+--- a/ssl/ssl_locl.h 7145++++ b/ssl/ssl_locl.h 7146+@@ -328,6 +328,7 @@ 7147+ #define SSL_SEED 0x00000800L 7148+ #define SSL_AES128GCM 0x00001000L 7149+ #define SSL_AES256GCM 0x00002000L 7150++#define SSL_CHACHA20POLY1305 0x00004000L 7151+ 7152+ #define SSL_AES (SSL_AES128|SSL_AES256|SSL_AES128GCM|SSL_AES256GCM) 7153+ #define SSL_CAMELLIA (SSL_CAMELLIA128|SSL_CAMELLIA256) 7154+@@ -389,6 +390,12 @@ 7155+ #define SSL_CIPHER_AEAD_FIXED_NONCE_LEN(ssl_cipher) \ 7156+ (((ssl_cipher->algorithm2 >> 24) & 0xf)*2) 7157+ 7158++/* SSL_CIPHER_ALGORITHM2_VARIABLE_NONCE_INCLUDED_IN_RECORD is a flag in 7159++ * SSL_CIPHER.algorithm2 which indicates that the variable part of the nonce is 7160++ * included as a prefix of the record. (AES-GCM, for example, does with with an 7161++ * 8-byte variable nonce.) */ 7162++#define SSL_CIPHER_ALGORITHM2_VARIABLE_NONCE_INCLUDED_IN_RECORD (1<<22) 7163++ 7164+ /* 7165+ * Export and cipher strength information. For each cipher we have to decide 7166+ * whether it is exportable or not. This information is likely to change 7167+@@ -605,6 +612,9 @@ struct ssl_aead_ctx_st 7168+ * records. */ 7169+ unsigned char fixed_nonce[8]; 7170+ unsigned char fixed_nonce_len, variable_nonce_len, tag_len; 7171++ /* variable_nonce_included_in_record is non-zero if the variable nonce 7172++ * for a record is included as a prefix before the ciphertext. */ 7173++ char variable_nonce_included_in_record; 7174+ }; 7175+ 7176+ #ifndef OPENSSL_NO_COMP 7177+diff --git a/ssl/t1_enc.c b/ssl/t1_enc.c 7178+index 7af1a32..15800af 100644 7179+--- a/ssl/t1_enc.c 7180++++ b/ssl/t1_enc.c 7181+@@ -366,6 +366,8 @@ static int tls1_change_cipher_state_aead(SSL *s, char is_read, 7182+ memcpy(aead_ctx->fixed_nonce, iv, iv_len); 7183+ aead_ctx->fixed_nonce_len = iv_len; 7184+ aead_ctx->variable_nonce_len = 8; /* always the case, currently. */ 7185++ aead_ctx->variable_nonce_included_in_record = 7186++ (s->s3->tmp.new_cipher->algorithm2 & SSL_CIPHER_ALGORITHM2_VARIABLE_NONCE_INCLUDED_IN_RECORD) != 0; 7187+ if (aead_ctx->variable_nonce_len + aead_ctx->fixed_nonce_len != EVP_AEAD_nonce_length(aead)) 7188+ { 7189+ SSLerr(SSL_F_TLS1_CHANGE_CIPHER_STATE_AEAD, ERR_R_INTERNAL_ERROR); 7190+@@ -863,6 +865,7 @@ int tls1_enc(SSL *s, int send) 7191+ if (send) 7192+ { 7193+ size_t len = rec->length; 7194++ size_t eivlen = 0; 7195+ in = rec->input; 7196+ out = rec->data; 7197+ 7198+@@ -878,18 +881,22 @@ int tls1_enc(SSL *s, int send) 7199+ * variable nonce. Thus we can copy the sequence number 7200+ * bytes into place without overwriting any of the 7201+ * plaintext. */ 7202+- memcpy(out, ad, aead->variable_nonce_len); 7203+- len -= aead->variable_nonce_len; 7204++ if (aead->variable_nonce_included_in_record) 7205++ { 7206++ memcpy(out, ad, aead->variable_nonce_len); 7207++ len -= aead->variable_nonce_len; 7208++ eivlen = aead->variable_nonce_len; 7209++ } 7210+ 7211+ ad[11] = len >> 8; 7212+ ad[12] = len & 0xff; 7213+ 7214+ n = EVP_AEAD_CTX_seal(&aead->ctx, 7215+- out + aead->variable_nonce_len, len + aead->tag_len, 7216++ out + eivlen, len + aead->tag_len, 7217+ nonce, nonce_used, 7218+- in + aead->variable_nonce_len, len, 7219++ in + eivlen, len, 7220+ ad, sizeof(ad)); 7221+- if (n >= 0) 7222++ if (n >= 0 && aead->variable_nonce_included_in_record) 7223+ n += aead->variable_nonce_len; 7224+ } 7225+ else 7226+@@ -903,12 +910,17 @@ int tls1_enc(SSL *s, int send) 7227+ 7228+ if (len < aead->variable_nonce_len) 7229+ return 0; 7230+- memcpy(nonce + nonce_used, in, aead->variable_nonce_len); 7231++ memcpy(nonce + nonce_used, 7232++ aead->variable_nonce_included_in_record ? in : ad, 7233++ aead->variable_nonce_len); 7234+ nonce_used += aead->variable_nonce_len; 7235+ 7236+- in += aead->variable_nonce_len; 7237+- len -= aead->variable_nonce_len; 7238+- out += aead->variable_nonce_len; 7239++ if (aead->variable_nonce_included_in_record) 7240++ { 7241++ in += aead->variable_nonce_len; 7242++ len -= aead->variable_nonce_len; 7243++ out += aead->variable_nonce_len; 7244++ } 7245+ 7246+ if (len < aead->tag_len) 7247+ return 0; 7248+diff --git a/ssl/tls1.h b/ssl/tls1.h 7249+index 8cac7df..3cbcb83 100644 7250+--- a/ssl/tls1.h 7251++++ b/ssl/tls1.h 7252+@@ -526,6 +526,10 @@ SSL_CTX_callback_ctrl(ssl,SSL_CTRL_SET_TLSEXT_TICKET_KEY_CB,(void (*)(void))cb) 7253+ #define TLS1_CK_ECDH_RSA_WITH_AES_128_GCM_SHA256 0x0300C031 7254+ #define TLS1_CK_ECDH_RSA_WITH_AES_256_GCM_SHA384 0x0300C032 7255+ 7256++#define TLS1_CK_ECDHE_RSA_CHACHA20_POLY1305 0x0300CC13 7257++#define TLS1_CK_ECDHE_ECDSA_CHACHA20_POLY1305 0x0300CC14 7258++#define TLS1_CK_DHE_RSA_CHACHA20_POLY1305 0x0300CC15 7259++ 7260+ /* XXX 7261+ * Inconsistency alert: 7262+ * The OpenSSL names of ciphers with ephemeral DH here include the string 7263+@@ -677,6 +681,10 @@ SSL_CTX_callback_ctrl(ssl,SSL_CTRL_SET_TLSEXT_TICKET_KEY_CB,(void (*)(void))cb) 7264+ #define TLS1_TXT_ECDH_RSA_WITH_AES_128_GCM_SHA256 "ECDH-RSA-AES128-GCM-SHA256" 7265+ #define TLS1_TXT_ECDH_RSA_WITH_AES_256_GCM_SHA384 "ECDH-RSA-AES256-GCM-SHA384" 7266+ 7267++#define TLS1_TXT_ECDHE_RSA_WITH_CHACHA20_POLY1305 "ECDHE-RSA-CHACHA20-POLY1305" 7268++#define TLS1_TXT_ECDHE_ECDSA_WITH_CHACHA20_POLY1305 "ECDHE-ECDSA-CHACHA20-POLY1305" 7269++#define TLS1_TXT_DHE_RSA_WITH_CHACHA20_POLY1305 "DHE-RSA-CHACHA20-POLY1305" 7270++ 7271+ #define TLS_CT_RSA_SIGN 1 7272+ #define TLS_CT_DSS_SIGN 2 7273+ #define TLS_CT_RSA_FIXED_DH 3 7274+diff --git a/test/Makefile b/test/Makefile 7275+index 4c9eabc..4790aa8 100644 7276+--- a/test/Makefile 7277++++ b/test/Makefile 7278+@@ -86,7 +86,9 @@ OBJ= $(BNTEST).o $(ECTEST).o $(ECDSATEST).o $(ECDHTEST).o $(IDEATEST).o \ 7279+ $(MDC2TEST).o $(RMDTEST).o \ 7280+ $(RANDTEST).o $(DHTEST).o $(ENGINETEST).o $(CASTTEST).o \ 7281+ $(BFTEST).o $(SSLTEST).o $(DSATEST).o $(EXPTEST).o $(RSATEST).o \ 7282+- $(EVPTEST).o $(IGETEST).o $(JPAKETEST).o $(ASN1TEST).o 7283++ $(EVPTEST).o $(IGETEST).o $(JPAKETEST).o $(ASN1TEST).o $(CHACHATEST).o \ 7284++ $(POLY1305TEST).o 7285++ 7286+ SRC= $(BNTEST).c $(ECTEST).c $(ECDSATEST).c $(ECDHTEST).c $(IDEATEST).c \ 7287+ $(MD2TEST).c $(MD4TEST).c $(MD5TEST).c \ 7288+ $(HMACTEST).c $(WPTEST).c \ 7289+@@ -94,7 +96,8 @@ SRC= $(BNTEST).c $(ECTEST).c $(ECDSATEST).c $(ECDHTEST).c $(IDEATEST).c \ 7290+ $(DESTEST).c $(SHATEST).c $(SHA1TEST).c $(MDC2TEST).c $(RMDTEST).c \ 7291+ $(RANDTEST).c $(DHTEST).c $(ENGINETEST).c $(CASTTEST).c \ 7292+ $(BFTEST).c $(SSLTEST).c $(DSATEST).c $(EXPTEST).c $(RSATEST).c \ 7293+- $(EVPTEST).c $(IGETEST).c $(JPAKETEST).c $(SRPTEST).c $(ASN1TEST).c 7294++ $(EVPTEST).c $(IGETEST).c $(JPAKETEST).c $(SRPTEST).c $(ASN1TEST).c \ 7295++ $(CHACHATEST).c $(POLY1305TEST).c 7296+ 7297+ EXHEADER= 7298+ HEADER= $(EXHEADER) 7299+@@ -137,7 +140,7 @@ alltests: \ 7300+ test_enc test_x509 test_rsa test_crl test_sid \ 7301+ test_gen test_req test_pkcs7 test_verify test_dh test_dsa \ 7302+ test_ss test_ca test_engine test_evp test_ssl test_tsa test_ige \ 7303+- test_jpake test_srp test_cms 7304++ test_jpake test_srp test_cms test_chacha test_poly1305 7305+ 7306+ test_evp: 7307+ ../util/shlib_wrap.sh ./$(EVPTEST) evptests.txt 7308+@@ -318,6 +321,14 @@ test_srp: $(SRPTEST)$(EXE_EXT) 7309+ @echo "Test SRP" 7310+ ../util/shlib_wrap.sh ./srptest 7311+ 7312++test_chacha: $(CHACHATEST)$(EXE_EXT) 7313++ @echo "Test ChaCha" 7314++ ../util/shlib_wrap.sh ./$(CHACHATEST) 7315++ 7316++test_poly1305: $(POLY1305TEST)$(EXE_EXT) 7317++ @echo "Test Poly1305" 7318++ ../util/shlib_wrap.sh ./$(POLY1305TEST) 7319++ 7320+ lint: 7321+ lint -DLINT $(INCLUDES) $(SRC)>fluff 7322+ 7323+@@ -394,6 +405,12 @@ $(SHA256TEST)$(EXE_EXT): $(SHA256TEST).o $(DLIBCRYPTO) 7324+ $(SHA512TEST)$(EXE_EXT): $(SHA512TEST).o $(DLIBCRYPTO) 7325+ @target=$(SHA512TEST); $(BUILD_CMD) 7326+ 7327++$(CHACHATEST)$(EXE_EXT): $(CHACHATEST).o $(DLIBCRYPTO) 7328++ @target=$(CHACHATEST); $(BUILD_CMD) 7329++ 7330++$(POLY1305TEST)$(EXE_EXT): $(POLY1305TEST).o $(DLIBCRYPTO) 7331++ @target=$(CHACHATEST); $(BUILD_CMD) 7332++ 7333+ $(RMDTEST)$(EXE_EXT): $(RMDTEST).o $(DLIBCRYPTO) 7334+ @target=$(RMDTEST); $(BUILD_CMD) 7335+ 7336+-- 7337+1.8.4.1 7338+ 7339diff -burN android-openssl-lhash2/patches/neon_runtime.patch android-openssl/patches/neon_runtime.patch 7340--- android-openssl-lhash2/patches/neon_runtime.patch 1969-12-31 19:00:00.000000000 -0500 7341+++ android-openssl/patches/neon_runtime.patch 2013-11-05 16:51:28.668287449 -0500 7342@@ -0,0 +1,1123 @@ 7343+From aea47606333cfd3e7a09cab3e42e488c79a416af Mon Sep 17 00:00:00 2001 7344+From: Adam Langley <agl@chromium.org> 7345+Date: Tue, 5 Nov 2013 13:10:11 -0500 7346+Subject: [PATCH 52/52] Optional NEON support on ARM. 7347+ 7348+This patch causes ARM to build both the NEON and generic versions of 7349+ChaCha20 and Poly1305. The NEON code can be enabled at run-time by 7350+calling CRYPTO_set_NEON_capable(1). 7351+--- 7352+ .gitignore | 1 + 7353+ Configure | 2 +- 7354+ apps/speed.c | 5 + 7355+ crypto/chacha/chacha_enc.c | 18 + 7356+ crypto/chacha/chacha_vec.c | 7 + 7357+ crypto/chacha/chacha_vec_arm.s | 846 +++++++++++++++++++++++++++++++++++++++++ 7358+ crypto/cryptlib.c | 14 + 7359+ crypto/crypto.h | 8 + 7360+ crypto/poly1305/poly1305.c | 35 ++ 7361+ crypto/poly1305/poly1305_arm.c | 9 +- 7362+ 10 files changed, 941 insertions(+), 4 deletions(-) 7363+ create mode 100644 crypto/chacha/chacha_vec_arm.s 7364+ 7365+diff --git a/Configure b/Configure 7366+index 1b95384..18b7af0 100755 7367+--- a/Configure 7368++++ b/Configure 7369+@@ -136,7 +136,7 @@ my $alpha_asm="alphacpuid.o:bn_asm.o alpha-mont.o:::::sha1-alpha.o:::::::ghash-a 7370+ my $mips32_asm=":bn-mips.o::aes_cbc.o aes-mips.o:::sha1-mips.o sha256-mips.o::::::::::"; 7371+ my $mips64_asm=":bn-mips.o mips-mont.o::aes_cbc.o aes-mips.o:::sha1-mips.o sha256-mips.o sha512-mips.o::::::::::"; 7372+ my $s390x_asm="s390xcap.o s390xcpuid.o:bn-s390x.o s390x-mont.o s390x-gf2m.o::aes-s390x.o aes-ctr.o aes-xts.o:::sha1-s390x.o sha256-s390x.o sha512-s390x.o::rc4-s390x.o:::::::ghash-s390x.o:"; 7373+-my $armv4_asm="armcap.o armv4cpuid.o:bn_asm.o armv4-mont.o armv4-gf2m.o::aes_cbc.o aes-armv4.o:::sha1-armv4-large.o sha256-armv4.o sha512-armv4.o:::::::ghash-armv4.o::chacha_vec.o:poly1305_arm.o poly1305_arm_asm.o:void"; 7374++my $armv4_asm="armcap.o armv4cpuid.o:bn_asm.o armv4-mont.o armv4-gf2m.o::aes_cbc.o aes-armv4.o:::sha1-armv4-large.o sha256-armv4.o sha512-armv4.o:::::::ghash-armv4.o::chacha_vec_arm.o chacha_enc.o:poly1305.o poly1305_arm.o poly1305_arm_asm.o:void"; 7375+ my $parisc11_asm="pariscid.o:bn_asm.o parisc-mont.o::aes_core.o aes_cbc.o aes-parisc.o:::sha1-parisc.o sha256-parisc.o sha512-parisc.o::rc4-parisc.o:::::ghash-parisc.o::::32"; 7376+ my $parisc20_asm="pariscid.o:pa-risc2W.o parisc-mont.o::aes_core.o aes_cbc.o aes-parisc.o:::sha1-parisc.o sha256-parisc.o sha512-parisc.o::rc4-parisc.o:::::ghash-parisc.o::::64"; 7377+ my $ppc32_asm="ppccpuid.o ppccap.o:bn-ppc.o ppc-mont.o ppc64-mont.o::aes_core.o aes_cbc.o aes-ppc.o:::sha1-ppc.o sha256-ppc.o::::::::::"; 7378+diff --git a/crypto/chacha/chacha_enc.c b/crypto/chacha/chacha_enc.c 7379+index 54d1ca3..e4b648f 100644 7380+--- a/crypto/chacha/chacha_enc.c 7381++++ b/crypto/chacha/chacha_enc.c 7382+@@ -61,6 +61,7 @@ 7383+ 7384+ #if !defined(OPENSSL_NO_CHACHA) 7385+ 7386++#include <openssl/crypto.h> 7387+ #include <openssl/chacha.h> 7388+ 7389+ /* sigma contains the ChaCha constants, which happen to be an ASCII string. */ 7390+@@ -87,6 +88,15 @@ static const char sigma[16] = "expand 32-byte k"; 7391+ 7392+ typedef unsigned int uint32_t; 7393+ 7394++#if __arm__ 7395++/* Defined in chacha_vec.c */ 7396++void CRYPTO_chacha_20_neon(unsigned char *out, 7397++ const unsigned char *in, size_t in_len, 7398++ const unsigned char key[32], 7399++ const unsigned char nonce[8], 7400++ size_t counter); 7401++#endif 7402++ 7403+ /* chacha_core performs |num_rounds| rounds of ChaCha20 on the input words in 7404+ * |input| and writes the 64 output bytes to |output|. */ 7405+ static void chacha_core(unsigned char output[64], const uint32_t input[16], 7406+@@ -124,6 +134,16 @@ void CRYPTO_chacha_20(unsigned char *out, 7407+ unsigned char buf[64]; 7408+ size_t todo, i; 7409+ 7410++#if __arm__ 7411++ if (CRYPTO_is_NEON_capable() && 7412++ ((intptr_t)in & 15) == 0 && 7413++ ((intptr_t)out & 15) == 0) 7414++ { 7415++ CRYPTO_chacha_20_neon(out, in, in_len, key, nonce, counter); 7416++ return; 7417++ } 7418++#endif 7419++ 7420+ input[0] = U8TO32_LITTLE(sigma + 0); 7421+ input[1] = U8TO32_LITTLE(sigma + 4); 7422+ input[2] = U8TO32_LITTLE(sigma + 8); 7423+diff --git a/crypto/chacha/chacha_vec.c b/crypto/chacha/chacha_vec.c 7424+index 33b2238..1226c39 100644 7425+--- a/crypto/chacha/chacha_vec.c 7426++++ b/crypto/chacha/chacha_vec.c 7427+@@ -154,7 +154,14 @@ typedef unsigned vec __attribute__ ((vector_size (16))); 7428+ STORE(op + d + 8, LOAD(in + d + 8) ^ REVV_BE(v2)); \ 7429+ STORE(op + d +12, LOAD(in + d +12) ^ REVV_BE(v3)); 7430+ 7431++#if __ARM_NEON__ 7432++/* For ARM, we can't depend on NEON support, so this function is compiled with 7433++ * a different name, along with the generic code, and can be enabled at 7434++ * run-time. */ 7435++void CRYPTO_chacha_20_neon( 7436++#else 7437+ void CRYPTO_chacha_20( 7438++#endif 7439+ unsigned char *out, 7440+ const unsigned char *in, 7441+ size_t inlen, 7442+diff --git a/crypto/chacha/chacha_vec_arm.S b/crypto/chacha/chacha_vec_arm.S 7443+new file mode 100644 7444+index 0000000..24a5050 7445+--- /dev/null 7446++++ b/crypto/chacha/chacha_vec_arm.S 7447+@@ -0,0 +1,863 @@ 7448++# This file contains a pre-compiled version of chacha_vec.c for ARM. This is 7449++# needed to support switching on NEON code at runtime. If the whole of OpenSSL 7450++# were to be compiled with the needed flags to build chacha_vec.c, then it 7451++# wouldn't be possible to run on non-NEON systems. 7452++# 7453++# This file was generated by: 7454++# 7455++# /opt/gcc-linaro-arm-linux-gnueabihf-4.7-2012.10-20121022_linux/bin/arm-linux-gnueabihf-gcc -O3 -mcpu=cortex-a8 -mfpu=neon -S chacha_vec.c -I ../../include -fpic -o chacha_vec_arm.S 7456++# 7457++# And then EABI attribute 28 was set to zero to allow linking with soft-float 7458++# code. 7459++ 7460++ .syntax unified 7461++ .cpu cortex-a8 7462++ .eabi_attribute 27, 3 7463++ .eabi_attribute 28, 0 7464++ .fpu neon 7465++ .eabi_attribute 20, 1 7466++ .eabi_attribute 21, 1 7467++ .eabi_attribute 23, 3 7468++ .eabi_attribute 24, 1 7469++ .eabi_attribute 25, 1 7470++ .eabi_attribute 26, 2 7471++ .eabi_attribute 30, 2 7472++ .eabi_attribute 34, 1 7473++ .eabi_attribute 18, 4 7474++ .thumb 7475++ .file "chacha_vec.c" 7476++ .text 7477++ .align 2 7478++ .global CRYPTO_chacha_20_neon 7479++ .thumb 7480++ .thumb_func 7481++ .type CRYPTO_chacha_20_neon, %function 7482++CRYPTO_chacha_20_neon: 7483++ @ args = 8, pretend = 0, frame = 304 7484++ @ frame_needed = 1, uses_anonymous_args = 0 7485++ @ link register save eliminated. 7486++ push {r4, r5, r6, r7, r8, r9, sl, fp} 7487++ fstmfdd sp!, {d8, d9, d10, d11, d12, d13, d14, d15} 7488++ sub sp, sp, #304 7489++ add r7, sp, #0 7490++ movw ip, #43691 7491++ movt ip, 43690 7492++ str r2, [r7, #196] 7493++ sub sp, sp, #96 7494++ ldr r4, [r7, #196] 7495++ ldr r6, [r7, #400] 7496++ ldr r2, .L38+16 7497++ umull r4, ip, ip, r4 7498++ ldr r6, [r6, #0] 7499++ ldr r8, [r7, #400] 7500++.LPIC24: 7501++ add r2, pc 7502++ add r4, sp, #15 7503++ str r3, [r7, #244] 7504++ str r6, [r7, #176] 7505++ bic r4, r4, #15 7506++ str r0, [r7, #188] 7507++ str r4, [r7, #200] 7508++ lsrs ip, ip, #7 7509++ str r1, [r7, #184] 7510++ ldmia r2, {r0, r1, r2, r3} 7511++ ldr r4, [r8, #4] 7512++ ldr r5, [r7, #244] 7513++ vld1.64 {d24-d25}, [r5:64] 7514++ vldr d26, [r5, #16] 7515++ vldr d27, [r5, #24] 7516++ ldr r9, [r7, #200] 7517++ ldr r8, [r7, #404] 7518++ ldr r5, [r7, #176] 7519++ add r6, r9, #64 7520++ str r4, [r7, #300] 7521++ mov r4, #0 7522++ str r8, [r7, #288] 7523++ str r5, [r7, #296] 7524++ str r4, [r7, #292] 7525++ stmia r6, {r0, r1, r2, r3} 7526++ vldr d22, [r9, #64] 7527++ vldr d23, [r9, #72] 7528++ vldr d20, [r7, #288] 7529++ vldr d21, [r7, #296] 7530++ str ip, [r7, #192] 7531++ beq .L20 7532++ lsl r6, ip, #1 7533++ ldr r1, [r9, #68] 7534++ add r3, r6, ip 7535++ str r6, [r7, #180] 7536++ ldr r2, [r9, #72] 7537++ add r8, r8, #2 7538++ ldr r5, [r9, #76] 7539++ vldr d18, .L38 7540++ vldr d19, .L38+8 7541++ str r4, [r7, #240] 7542++ ldr r6, [r7, #184] 7543++ ldr r4, [r7, #188] 7544++ str r0, [r7, #224] 7545++ str r1, [r7, #220] 7546++ str r8, [r7, #208] 7547++ str r2, [r7, #216] 7548++ str r3, [r7, #204] 7549++ str r5, [r7, #212] 7550++ str r6, [r7, #252] 7551++ str r4, [r7, #248] 7552++.L4: 7553++ ldr r2, [r7, #244] 7554++ add r9, r7, #216 7555++ ldr r3, [r7, #244] 7556++ vadd.i32 q8, q10, q9 7557++ ldr r6, [r7, #208] 7558++ vmov q15, q13 @ v4si 7559++ ldr r5, [r7, #240] 7560++ vmov q3, q12 @ v4si 7561++ ldr r4, [r7, #244] 7562++ vmov q2, q11 @ v4si 7563++ adds r5, r5, r6 7564++ ldr r2, [r2, #8] 7565++ ldr r6, [r7, #400] 7566++ vmov q5, q10 @ v4si 7567++ ldr r3, [r3, #12] 7568++ vmov q1, q13 @ v4si 7569++ ldr r0, [r7, #244] 7570++ vmov q0, q12 @ v4si 7571++ ldr r1, [r7, #244] 7572++ vmov q4, q11 @ v4si 7573++ ldmia r9, {r9, sl, fp} 7574++ str r5, [r7, #228] 7575++ ldr r5, [r4, #24] 7576++ ldr r0, [r0, #0] 7577++ ldr r1, [r1, #4] 7578++ str r2, [r7, #264] 7579++ str r3, [r7, #236] 7580++ ldr r2, [r6, #4] 7581++ ldr r3, [r4, #28] 7582++ str r5, [r7, #280] 7583++ ldr r5, [r6, #0] 7584++ movs r6, #0 7585++ ldr ip, [r7, #228] 7586++ ldr r8, [r7, #212] 7587++ str r0, [r7, #232] 7588++ str r1, [r7, #268] 7589++ ldr r0, [r4, #16] 7590++ ldr r1, [r4, #20] 7591++ movs r4, #10 7592++ str r2, [r7, #24] 7593++ str r3, [r7, #284] 7594++ str r4, [r7, #256] 7595++ ldr r2, [r7, #264] 7596++ str r9, [r7, #276] 7597++ mov r9, r6 7598++ ldr r6, [r7, #280] 7599++ str r8, [r7, #260] 7600++ mov r8, sl 7601++ str r1, [r7, #272] 7602++ mov sl, ip 7603++ str r6, [r7, #264] 7604++ mov r6, r5 7605++ ldr r3, [r7, #236] 7606++ mov r5, r0 7607++ ldr ip, [r7, #24] 7608++ ldr r1, [r7, #268] 7609++ ldr r0, [r7, #232] 7610++ b .L39 7611++.L40: 7612++ .align 3 7613++.L38: 7614++ .word 1 7615++ .word 0 7616++ .word 0 7617++ .word 0 7618++ .word .LANCHOR0-(.LPIC24+4) 7619++.L39: 7620++.L3: 7621++ vadd.i32 q4, q4, q0 7622++ add r8, r8, r1 7623++ vadd.i32 q2, q2, q3 7624++ str r8, [r7, #268] 7625++ veor q5, q5, q4 7626++ ldr r8, [r7, #276] 7627++ veor q8, q8, q2 7628++ add fp, fp, r0 7629++ str fp, [r7, #280] 7630++ add r8, r8, r2 7631++ vrev32.16 q5, q5 7632++ str r8, [r7, #276] 7633++ vrev32.16 q8, q8 7634++ vadd.i32 q1, q1, q5 7635++ vadd.i32 q15, q15, q8 7636++ ldr r8, [r7, #280] 7637++ veor q0, q1, q0 7638++ ldr r4, [r7, #260] 7639++ veor q3, q15, q3 7640++ eor sl, sl, r8 7641++ ldr r8, [r7, #276] 7642++ add fp, r4, r3 7643++ vshl.i32 q7, q0, #12 7644++ ldr r4, [r7, #268] 7645++ vshl.i32 q6, q3, #12 7646++ eor r6, r6, r8 7647++ eor r9, r9, r4 7648++ ldr r4, [r7, #272] 7649++ vsri.32 q7, q0, #20 7650++ ror r8, r6, #16 7651++ ldr r6, [r7, #264] 7652++ eor ip, ip, fp 7653++ vsri.32 q6, q3, #20 7654++ ror sl, sl, #16 7655++ ror r9, r9, #16 7656++ add r5, r5, sl 7657++ vadd.i32 q4, q4, q7 7658++ str r5, [r7, #236] 7659++ vadd.i32 q2, q2, q6 7660++ add r5, r4, r9 7661++ add r4, r6, r8 7662++ ldr r6, [r7, #284] 7663++ ror ip, ip, #16 7664++ veor q5, q4, q5 7665++ veor q8, q2, q8 7666++ add r6, r6, ip 7667++ str r6, [r7, #264] 7668++ eors r1, r1, r5 7669++ ldr r6, [r7, #236] 7670++ vshl.i32 q3, q5, #8 7671++ vshl.i32 q14, q8, #8 7672++ eors r2, r2, r4 7673++ eors r0, r0, r6 7674++ ldr r6, [r7, #264] 7675++ vsri.32 q3, q5, #24 7676++ ror r1, r1, #20 7677++ eors r3, r3, r6 7678++ ldr r6, [r7, #280] 7679++ ror r0, r0, #20 7680++ vsri.32 q14, q8, #24 7681++ adds r6, r0, r6 7682++ str r6, [r7, #284] 7683++ ldr r6, [r7, #268] 7684++ vadd.i32 q1, q1, q3 7685++ vadd.i32 q15, q15, q14 7686++ ror r2, r2, #20 7687++ adds r6, r1, r6 7688++ str r6, [r7, #260] 7689++ ldr r6, [r7, #276] 7690++ veor q6, q15, q6 7691++ veor q7, q1, q7 7692++ ror r3, r3, #20 7693++ adds r6, r2, r6 7694++ str r6, [r7, #280] 7695++ ldr r6, [r7, #284] 7696++ vshl.i32 q0, q6, #7 7697++ vshl.i32 q5, q7, #7 7698++ add fp, r3, fp 7699++ eor sl, r6, sl 7700++ ldr r6, [r7, #260] 7701++ eor ip, fp, ip 7702++ vsri.32 q0, q6, #25 7703++ eor r9, r6, r9 7704++ ldr r6, [r7, #280] 7705++ ror sl, sl, #24 7706++ vsri.32 q5, q7, #25 7707++ eor r8, r6, r8 7708++ ldr r6, [r7, #236] 7709++ ror r9, r9, #24 7710++ ror ip, ip, #24 7711++ add r6, sl, r6 7712++ str r6, [r7, #276] 7713++ ldr r6, [r7, #264] 7714++ add r5, r9, r5 7715++ str r5, [r7, #272] 7716++ vext.32 q5, q5, q5, #1 7717++ add r5, ip, r6 7718++ ldr r6, [r7, #276] 7719++ vext.32 q0, q0, q0, #1 7720++ vadd.i32 q4, q4, q5 7721++ eors r0, r0, r6 7722++ ldr r6, [r7, #272] 7723++ vadd.i32 q2, q2, q0 7724++ vext.32 q3, q3, q3, #3 7725++ ror r8, r8, #24 7726++ eors r1, r1, r6 7727++ vext.32 q14, q14, q14, #3 7728++ add r4, r8, r4 7729++ ldr r6, [r7, #284] 7730++ veor q3, q4, q3 7731++ veor q14, q2, q14 7732++ eors r2, r2, r4 7733++ ror r1, r1, #25 7734++ vext.32 q1, q1, q1, #2 7735++ adds r6, r1, r6 7736++ str r6, [r7, #284] 7737++ vext.32 q15, q15, q15, #2 7738++ ldr r6, [r7, #260] 7739++ eors r3, r3, r5 7740++ ror r2, r2, #25 7741++ vrev32.16 q8, q14 7742++ adds r6, r2, r6 7743++ vrev32.16 q3, q3 7744++ str r6, [r7, #268] 7745++ vadd.i32 q1, q1, q3 7746++ ldr r6, [r7, #280] 7747++ vadd.i32 q15, q15, q8 7748++ ror r3, r3, #25 7749++ veor q5, q1, q5 7750++ adds r6, r3, r6 7751++ veor q0, q15, q0 7752++ str r6, [r7, #264] 7753++ ldr r6, [r7, #268] 7754++ ror r0, r0, #25 7755++ add fp, r0, fp 7756++ vshl.i32 q6, q5, #12 7757++ eor sl, r6, sl 7758++ ldr r6, [r7, #284] 7759++ vshl.i32 q14, q0, #12 7760++ eor r8, fp, r8 7761++ eor ip, r6, ip 7762++ ldr r6, [r7, #264] 7763++ vsri.32 q6, q5, #20 7764++ ror sl, sl, #16 7765++ eor r9, r6, r9 7766++ ror r6, r8, #16 7767++ vsri.32 q14, q0, #20 7768++ ldr r8, [r7, #272] 7769++ ror ip, ip, #16 7770++ add r5, sl, r5 7771++ add r8, r6, r8 7772++ add r4, ip, r4 7773++ str r4, [r7, #236] 7774++ eor r0, r8, r0 7775++ str r5, [r7, #280] 7776++ vadd.i32 q4, q4, q6 7777++ ldr r5, [r7, #236] 7778++ vadd.i32 q2, q2, q14 7779++ ldr r4, [r7, #276] 7780++ ror r0, r0, #20 7781++ veor q3, q4, q3 7782++ eors r1, r1, r5 7783++ veor q0, q2, q8 7784++ str r8, [r7, #272] 7785++ str r0, [r7, #24] 7786++ add fp, r0, fp 7787++ ldr r8, [r7, #280] 7788++ ror r9, r9, #16 7789++ ldr r0, [r7, #284] 7790++ add r4, r9, r4 7791++ str fp, [r7, #260] 7792++ ror r1, r1, #20 7793++ add fp, r1, r0 7794++ eor r2, r8, r2 7795++ ldr r0, [r7, #260] 7796++ eors r3, r3, r4 7797++ vshl.i32 q5, q3, #8 7798++ str r4, [r7, #232] 7799++ vshl.i32 q8, q0, #8 7800++ ldr r4, [r7, #268] 7801++ ldr r5, [r7, #264] 7802++ ror r2, r2, #20 7803++ ror r3, r3, #20 7804++ eors r6, r6, r0 7805++ adds r5, r3, r5 7806++ add r8, r2, r4 7807++ vsri.32 q5, q3, #24 7808++ ldr r4, [r7, #272] 7809++ eor r9, r5, r9 7810++ eor ip, fp, ip 7811++ vsri.32 q8, q0, #24 7812++ eor sl, r8, sl 7813++ ror r6, r6, #24 7814++ ldr r0, [r7, #280] 7815++ str r5, [r7, #276] 7816++ adds r4, r6, r4 7817++ ldr r5, [r7, #236] 7818++ vadd.i32 q1, q1, q5 7819++ str r4, [r7, #272] 7820++ vadd.i32 q15, q15, q8 7821++ ldr r4, [r7, #232] 7822++ ror ip, ip, #24 7823++ ror sl, sl, #24 7824++ ror r9, r9, #24 7825++ add r5, ip, r5 7826++ add r0, sl, r0 7827++ str r5, [r7, #264] 7828++ add r5, r9, r4 7829++ str r0, [r7, #284] 7830++ veor q6, q1, q6 7831++ ldr r4, [r7, #24] 7832++ veor q14, q15, q14 7833++ ldr r0, [r7, #272] 7834++ eors r3, r3, r5 7835++ vshl.i32 q0, q6, #7 7836++ vext.32 q1, q1, q1, #2 7837++ eors r0, r0, r4 7838++ ldr r4, [r7, #284] 7839++ str r0, [r7, #280] 7840++ vshl.i32 q3, q14, #7 7841++ eors r2, r2, r4 7842++ ldr r4, [r7, #280] 7843++ ldr r0, [r7, #264] 7844++ vsri.32 q0, q6, #25 7845++ ror r2, r2, #25 7846++ ror r3, r3, #25 7847++ eors r1, r1, r0 7848++ vsri.32 q3, q14, #25 7849++ ror r0, r4, #25 7850++ ldr r4, [r7, #256] 7851++ ror r1, r1, #25 7852++ vext.32 q5, q5, q5, #1 7853++ subs r4, r4, #1 7854++ str r4, [r7, #256] 7855++ vext.32 q15, q15, q15, #2 7856++ vext.32 q8, q8, q8, #1 7857++ vext.32 q0, q0, q0, #3 7858++ vext.32 q3, q3, q3, #3 7859++ bne .L3 7860++ ldr r4, [r7, #264] 7861++ vadd.i32 q14, q10, q9 7862++ str r2, [r7, #264] 7863++ vadd.i32 q10, q10, q5 7864++ ldr r2, [r7, #252] 7865++ vld1.64 {d12-d13}, [r2:64] 7866++ ldr r2, [r7, #220] 7867++ vadd.i32 q4, q11, q4 7868++ str ip, [r7, #24] 7869++ mov ip, sl 7870++ mov sl, r8 7871++ ldr r8, [r7, #260] 7872++ add sl, sl, r2 7873++ ldr r2, [r7, #212] 7874++ str r4, [r7, #280] 7875++ vadd.i32 q0, q12, q0 7876++ ldr r4, [r7, #224] 7877++ add r8, r8, r2 7878++ ldr r2, [r7, #240] 7879++ vadd.i32 q1, q13, q1 7880++ str r0, [r7, #232] 7881++ add fp, fp, r4 7882++ mov r0, r5 7883++ ldr r4, [r7, #216] 7884++ mov r5, r6 7885++ mov r6, r9 7886++ ldr r9, [r7, #276] 7887++ adds r2, r2, #3 7888++ str r2, [r7, #240] 7889++ vadd.i32 q2, q11, q2 7890++ ldr r2, [r7, #252] 7891++ add r9, r9, r4 7892++ vadd.i32 q3, q12, q3 7893++ ldr r4, [r7, #228] 7894++ vadd.i32 q15, q13, q15 7895++ str r1, [r7, #268] 7896++ vadd.i32 q8, q14, q8 7897++ str r3, [r7, #236] 7898++ veor q4, q4, q6 7899++ ldr r3, [r7, #284] 7900++ ldr r1, [r7, #272] 7901++ add ip, r4, ip 7902++ ldr r4, [r7, #248] 7903++ vst1.64 {d8-d9}, [r4:64] 7904++ vldr d8, [r2, #16] 7905++ vldr d9, [r2, #24] 7906++ veor q0, q0, q4 7907++ vstr d0, [r4, #16] 7908++ vstr d1, [r4, #24] 7909++ vldr d0, [r2, #32] 7910++ vldr d1, [r2, #40] 7911++ veor q1, q1, q0 7912++ vstr d2, [r4, #32] 7913++ vstr d3, [r4, #40] 7914++ vldr d2, [r2, #48] 7915++ vldr d3, [r2, #56] 7916++ veor q10, q10, q1 7917++ vstr d20, [r4, #48] 7918++ vstr d21, [r4, #56] 7919++ vldr d8, [r2, #64] 7920++ vldr d9, [r2, #72] 7921++ veor q2, q2, q4 7922++ vstr d4, [r4, #64] 7923++ vstr d5, [r4, #72] 7924++ vldr d10, [r2, #80] 7925++ vldr d11, [r2, #88] 7926++ veor q3, q3, q5 7927++ vstr d6, [r4, #80] 7928++ vstr d7, [r4, #88] 7929++ vldr d12, [r2, #96] 7930++ vldr d13, [r2, #104] 7931++ veor q15, q15, q6 7932++ vstr d30, [r4, #96] 7933++ vstr d31, [r4, #104] 7934++ vldr d20, [r2, #112] 7935++ vldr d21, [r2, #120] 7936++ veor q8, q8, q10 7937++ vstr d16, [r4, #112] 7938++ vstr d17, [r4, #120] 7939++ ldr r4, [r2, #128] 7940++ ldr r2, [r7, #248] 7941++ vadd.i32 q10, q14, q9 7942++ eor r4, fp, r4 7943++ vadd.i32 q10, q10, q9 7944++ str r4, [r2, #128] 7945++ ldr r4, [r7, #252] 7946++ ldr r2, [r4, #132] 7947++ eor r2, sl, r2 7948++ ldr sl, [r7, #248] 7949++ str r2, [sl, #132] 7950++ ldr r2, [r4, #136] 7951++ eor r2, r9, r2 7952++ str r2, [sl, #136] 7953++ ldr r2, [r4, #140] 7954++ eor r2, r8, r2 7955++ str r2, [sl, #140] 7956++ ldr r2, [r7, #244] 7957++ ldr r4, [r4, #144] 7958++ ldr r2, [r2, #0] 7959++ str r4, [r7, #44] 7960++ ldr r4, [r7, #232] 7961++ add r8, r4, r2 7962++ ldr r2, [r7, #44] 7963++ ldr r4, [r7, #244] 7964++ eor r8, r8, r2 7965++ ldr r2, [r7, #252] 7966++ str r8, [sl, #144] 7967++ ldr r4, [r4, #4] 7968++ ldr r2, [r2, #148] 7969++ str r2, [r7, #40] 7970++ ldr r2, [r7, #268] 7971++ add r8, r2, r4 7972++ ldr r4, [r7, #40] 7973++ ldr r2, [r7, #244] 7974++ eor r8, r8, r4 7975++ ldr r4, [r7, #252] 7976++ str r8, [sl, #148] 7977++ ldr r2, [r2, #8] 7978++ ldr r4, [r4, #152] 7979++ str r4, [r7, #36] 7980++ ldr r4, [r7, #264] 7981++ add r8, r4, r2 7982++ ldr r2, [r7, #36] 7983++ eor r8, r8, r2 7984++ str r8, [sl, #152] 7985++ ldr r2, [r7, #252] 7986++ ldr r4, [r7, #244] 7987++ ldr r2, [r2, #156] 7988++ ldr r4, [r4, #12] 7989++ str r2, [r7, #32] 7990++ ldr r2, [r7, #236] 7991++ add r8, r2, r4 7992++ ldr r4, [r7, #32] 7993++ ldr r2, [r7, #252] 7994++ eor r8, r8, r4 7995++ str r8, [sl, #156] 7996++ ldr r8, [r7, #244] 7997++ ldr r2, [r2, #160] 7998++ ldr r4, [r8, #16] 7999++ adds r0, r0, r4 8000++ ldr r4, [r7, #252] 8001++ eors r0, r0, r2 8002++ str r0, [sl, #160] 8003++ ldr r0, [r8, #20] 8004++ ldr r2, [r4, #164] 8005++ adds r1, r1, r0 8006++ ldr r0, [r7, #280] 8007++ eors r1, r1, r2 8008++ str r1, [sl, #164] 8009++ ldr r2, [r8, #24] 8010++ ldr r1, [r4, #168] 8011++ adds r2, r0, r2 8012++ eors r2, r2, r1 8013++ str r2, [sl, #168] 8014++ ldr r1, [r8, #28] 8015++ ldr r2, [r4, #172] 8016++ adds r3, r3, r1 8017++ eors r3, r3, r2 8018++ str r3, [sl, #172] 8019++ ldr r3, [r4, #176] 8020++ eor r3, ip, r3 8021++ str r3, [sl, #176] 8022++ ldr r3, [r4, #180] 8023++ ldr r4, [r7, #400] 8024++ eors r6, r6, r3 8025++ str r6, [sl, #180] 8026++ ldr r6, [r7, #252] 8027++ ldr r2, [r4, #0] 8028++ ldr r3, [r6, #184] 8029++ adds r5, r5, r2 8030++ eors r5, r5, r3 8031++ str r5, [sl, #184] 8032++ ldr r2, [r6, #188] 8033++ adds r6, r6, #192 8034++ ldr r3, [r4, #4] 8035++ str r6, [r7, #252] 8036++ ldr r0, [r7, #24] 8037++ ldr r1, [r7, #240] 8038++ adds r4, r0, r3 8039++ eors r4, r4, r2 8040++ ldr r2, [r7, #204] 8041++ str r4, [sl, #188] 8042++ add sl, sl, #192 8043++ cmp r1, r2 8044++ str sl, [r7, #248] 8045++ bne .L4 8046++ ldr r4, [r7, #192] 8047++ ldr r3, [r7, #180] 8048++ ldr r6, [r7, #188] 8049++ adds r5, r3, r4 8050++ ldr r8, [r7, #184] 8051++ lsls r5, r5, #6 8052++ adds r4, r6, r5 8053++ add r5, r8, r5 8054++.L2: 8055++ ldr r9, [r7, #196] 8056++ movw r3, #43691 8057++ movt r3, 43690 8058++ ldr sl, [r7, #196] 8059++ umull r9, r3, r3, r9 8060++ lsrs r3, r3, #7 8061++ add r3, r3, r3, lsl #1 8062++ sub r3, sl, r3, lsl #6 8063++ lsrs r6, r3, #6 8064++ beq .L5 8065++ add r1, r5, #16 8066++ add r2, r4, #16 8067++ mov r0, r6 8068++ vldr d30, .L41 8069++ vldr d31, .L41+8 8070++.L6: 8071++ vmov q8, q10 @ v4si 8072++ movs r3, #10 8073++ vmov q1, q13 @ v4si 8074++ vmov q14, q12 @ v4si 8075++ vmov q3, q11 @ v4si 8076++.L7: 8077++ vadd.i32 q3, q3, q14 8078++ subs r3, r3, #1 8079++ veor q2, q8, q3 8080++ vrev32.16 q2, q2 8081++ vadd.i32 q8, q1, q2 8082++ veor q9, q8, q14 8083++ vshl.i32 q14, q9, #12 8084++ vsri.32 q14, q9, #20 8085++ vadd.i32 q3, q3, q14 8086++ veor q2, q3, q2 8087++ vshl.i32 q9, q2, #8 8088++ vsri.32 q9, q2, #24 8089++ vadd.i32 q8, q8, q9 8090++ vext.32 q9, q9, q9, #3 8091++ veor q14, q8, q14 8092++ vext.32 q1, q8, q8, #2 8093++ vshl.i32 q8, q14, #7 8094++ vsri.32 q8, q14, #25 8095++ vext.32 q8, q8, q8, #1 8096++ vadd.i32 q3, q3, q8 8097++ veor q2, q3, q9 8098++ vrev32.16 q2, q2 8099++ vadd.i32 q9, q1, q2 8100++ veor q8, q9, q8 8101++ vshl.i32 q14, q8, #12 8102++ vsri.32 q14, q8, #20 8103++ vadd.i32 q3, q3, q14 8104++ veor q2, q3, q2 8105++ vshl.i32 q8, q2, #8 8106++ vsri.32 q8, q2, #24 8107++ vadd.i32 q9, q9, q8 8108++ vext.32 q8, q8, q8, #1 8109++ veor q14, q9, q14 8110++ vext.32 q1, q9, q9, #2 8111++ vshl.i32 q9, q14, #7 8112++ vsri.32 q9, q14, #25 8113++ vext.32 q14, q9, q9, #3 8114++ bne .L7 8115++ vadd.i32 q8, q10, q8 8116++ subs r0, r0, #1 8117++ vadd.i32 q3, q11, q3 8118++ vldr d0, [r1, #-16] 8119++ vldr d1, [r1, #-8] 8120++ vadd.i32 q14, q12, q14 8121++ vadd.i32 q1, q13, q1 8122++ veor q3, q3, q0 8123++ vstr d6, [r2, #-16] 8124++ vstr d7, [r2, #-8] 8125++ vadd.i32 q10, q10, q15 8126++ vld1.64 {d8-d9}, [r1:64] 8127++ veor q14, q14, q4 8128++ vst1.64 {d28-d29}, [r2:64] 8129++ vldr d10, [r1, #16] 8130++ vldr d11, [r1, #24] 8131++ veor q1, q1, q5 8132++ vstr d2, [r2, #16] 8133++ vstr d3, [r2, #24] 8134++ vldr d18, [r1, #32] 8135++ vldr d19, [r1, #40] 8136++ add r1, r1, #64 8137++ veor q8, q8, q9 8138++ vstr d16, [r2, #32] 8139++ vstr d17, [r2, #40] 8140++ add r2, r2, #64 8141++ bne .L6 8142++ lsls r6, r6, #6 8143++ adds r4, r4, r6 8144++ adds r5, r5, r6 8145++.L5: 8146++ ldr r6, [r7, #196] 8147++ ands ip, r6, #63 8148++ beq .L1 8149++ vmov q8, q10 @ v4si 8150++ movs r3, #10 8151++ vmov q14, q13 @ v4si 8152++ vmov q9, q12 @ v4si 8153++ vmov q15, q11 @ v4si 8154++.L10: 8155++ vadd.i32 q15, q15, q9 8156++ subs r3, r3, #1 8157++ veor q8, q8, q15 8158++ vrev32.16 q8, q8 8159++ vadd.i32 q3, q14, q8 8160++ veor q9, q3, q9 8161++ vshl.i32 q14, q9, #12 8162++ vsri.32 q14, q9, #20 8163++ vadd.i32 q15, q15, q14 8164++ veor q9, q15, q8 8165++ vshl.i32 q8, q9, #8 8166++ vsri.32 q8, q9, #24 8167++ vadd.i32 q9, q3, q8 8168++ vext.32 q8, q8, q8, #3 8169++ veor q2, q9, q14 8170++ vext.32 q14, q9, q9, #2 8171++ vshl.i32 q9, q2, #7 8172++ vsri.32 q9, q2, #25 8173++ vext.32 q9, q9, q9, #1 8174++ vadd.i32 q15, q15, q9 8175++ veor q3, q15, q8 8176++ vrev32.16 q3, q3 8177++ vadd.i32 q14, q14, q3 8178++ veor q8, q14, q9 8179++ vshl.i32 q9, q8, #12 8180++ vsri.32 q9, q8, #20 8181++ vadd.i32 q15, q15, q9 8182++ veor q3, q15, q3 8183++ vshl.i32 q8, q3, #8 8184++ vsri.32 q8, q3, #24 8185++ vadd.i32 q14, q14, q8 8186++ vext.32 q8, q8, q8, #1 8187++ veor q3, q14, q9 8188++ vext.32 q14, q14, q14, #2 8189++ vshl.i32 q9, q3, #7 8190++ vsri.32 q9, q3, #25 8191++ vext.32 q9, q9, q9, #3 8192++ bne .L10 8193++ cmp ip, #15 8194++ vadd.i32 q11, q11, q15 8195++ bhi .L37 8196++ ldr r9, [r7, #200] 8197++ vst1.64 {d22-d23}, [r9:128] 8198++.L14: 8199++ ldr sl, [r7, #196] 8200++ and r3, sl, #48 8201++ cmp ip, r3 8202++ bls .L1 8203++ adds r0, r5, r3 8204++ adds r1, r4, r3 8205++ add r2, r0, #16 8206++ add r6, r1, #16 8207++ cmp r1, r2 8208++ it cc 8209++ cmpcc r0, r6 8210++ rsb r9, r3, ip 8211++ ite cc 8212++ movcc r2, #0 8213++ movcs r2, #1 8214++ cmp r9, #15 8215++ ite ls 8216++ movls r2, #0 8217++ andhi r2, r2, #1 8218++ lsr r8, r9, #4 8219++ eor r2, r2, #1 8220++ cmp r8, #0 8221++ it eq 8222++ orreq r2, r2, #1 8223++ lsl sl, r8, #4 8224++ cbnz r2, .L35 8225++ ldr fp, [r7, #200] 8226++ add r6, fp, r3 8227++.L17: 8228++ vld1.8 {q8}, [r0]! 8229++ adds r2, r2, #1 8230++ cmp r8, r2 8231++ vld1.8 {q9}, [r6]! 8232++ veor q8, q9, q8 8233++ vst1.8 {q8}, [r1]! 8234++ bhi .L17 8235++ cmp r9, sl 8236++ add r3, r3, sl 8237++ beq .L1 8238++.L35: 8239++ ldr r0, [r7, #200] 8240++.L25: 8241++ ldrb r2, [r5, r3] @ zero_extendqisi2 8242++ ldrb r1, [r3, r0] @ zero_extendqisi2 8243++ eors r2, r2, r1 8244++ strb r2, [r4, r3] 8245++ adds r3, r3, #1 8246++ cmp ip, r3 8247++ bhi .L25 8248++.L1: 8249++ add r7, r7, #304 8250++ mov sp, r7 8251++ fldmfdd sp!, {d8, d9, d10, d11, d12, d13, d14, d15} 8252++ pop {r4, r5, r6, r7, r8, r9, sl, fp} 8253++ bx lr 8254++.L37: 8255++ cmp ip, #31 8256++ vld1.64 {d0-d1}, [r5:64] 8257++ vadd.i32 q9, q12, q9 8258++ veor q11, q11, q0 8259++ vst1.64 {d22-d23}, [r4:64] 8260++ bls .L12 8261++ cmp ip, #47 8262++ vldr d2, [r5, #16] 8263++ vldr d3, [r5, #24] 8264++ vadd.i32 q13, q13, q14 8265++ veor q9, q9, q1 8266++ vstr d18, [r4, #16] 8267++ vstr d19, [r4, #24] 8268++ bls .L13 8269++ vadd.i32 q8, q8, q10 8270++ vldr d0, [r5, #32] 8271++ vldr d1, [r5, #40] 8272++ ldr r6, [r7, #200] 8273++ vstr d16, [r6, #48] 8274++ vstr d17, [r6, #56] 8275++ veor q8, q13, q0 8276++ vstr d16, [r4, #32] 8277++ vstr d17, [r4, #40] 8278++ b .L14 8279++.L12: 8280++ ldr r8, [r7, #200] 8281++ vstr d18, [r8, #16] 8282++ vstr d19, [r8, #24] 8283++ b .L14 8284++.L20: 8285++ ldr r5, [r7, #184] 8286++ ldr r4, [r7, #188] 8287++ b .L2 8288++.L13: 8289++ ldr r6, [r7, #200] 8290++ vstr d26, [r6, #32] 8291++ vstr d27, [r6, #40] 8292++ b .L14 8293++.L42: 8294++ .align 3 8295++.L41: 8296++ .word 1 8297++ .word 0 8298++ .word 0 8299++ .word 0 8300++ .size CRYPTO_chacha_20_neon, .-CRYPTO_chacha_20_neon 8301++ .section .rodata 8302++ .align 3 8303++.LANCHOR0 = . + 0 8304++.LC0: 8305++ .word 1634760805 8306++ .word 857760878 8307++ .word 2036477234 8308++ .word 1797285236 8309++ .ident "GCC: (crosstool-NG linaro-1.13.1-4.7-2012.10-20121022 - Linaro GCC 2012.10) 4.7.3 20121001 (prerelease)" 8310++ .section .note.GNU-stack,"",%progbits 8311+diff --git a/crypto/cryptlib.c b/crypto/cryptlib.c 8312+index 7bef015..3b6ab1d 100644 8313+--- a/crypto/cryptlib.c 8314++++ b/crypto/cryptlib.c 8315+@@ -661,6 +661,20 @@ const char *CRYPTO_get_lock_name(int type) 8316+ return(sk_OPENSSL_STRING_value(app_locks,type-CRYPTO_NUM_LOCKS)); 8317+ } 8318+ 8319++#if __arm__ 8320++static int global_arm_neon_enabled = 0; 8321++ 8322++void CRYPTO_set_NEON_capable(int on) 8323++ { 8324++ global_arm_neon_enabled = on != 0; 8325++ } 8326++ 8327++int CRYPTO_is_NEON_capable() 8328++ { 8329++ return global_arm_neon_enabled; 8330++ } 8331++#endif 8332++ 8333+ #if defined(__i386) || defined(__i386__) || defined(_M_IX86) || \ 8334+ defined(__INTEL__) || \ 8335+ defined(__x86_64) || defined(__x86_64__) || defined(_M_AMD64) || defined(_M_X64) 8336+diff --git a/crypto/crypto.h b/crypto/crypto.h 8337+index e11ac73..db339c3 100644 8338+--- a/crypto/crypto.h 8339++++ b/crypto/crypto.h 8340+@@ -414,6 +414,14 @@ void CRYPTO_cleanup_all_ex_data(void); 8341+ 8342+ int CRYPTO_get_new_lockid(char *name); 8343+ 8344++/* CRYPTO_set_NEON_capable enables any NEON (ARM vector) dependent code. This 8345++ * code should be called before any non-init functions. */ 8346++void CRYPTO_set_NEON_capable(int on); 8347++ 8348++/* CRYPTO_is_NEON_capable returns the last value given to 8349++ * CRYPTO_set_NEON_capable, or else zero if it has never been called. */ 8350++int CRYPTO_is_NEON_capable(); 8351++ 8352+ int CRYPTO_num_locks(void); /* return CRYPTO_NUM_LOCKS (shared libs!) */ 8353+ void CRYPTO_lock(int mode, int type,const char *file,int line); 8354+ void CRYPTO_set_locking_callback(void (*func)(int mode,int type, 8355+diff --git a/crypto/poly1305/poly1305.c b/crypto/poly1305/poly1305.c 8356+index 2e5621d..00d53bf 100644 8357+--- a/crypto/poly1305/poly1305.c 8358++++ b/crypto/poly1305/poly1305.c 8359+@@ -90,6 +90,17 @@ static void U32TO8_LE(unsigned char *m, uint32_t v) 8360+ } 8361+ #endif 8362+ 8363++#if __arm__ 8364++void CRYPTO_poly1305_init_neon(poly1305_state* state, 8365++ const unsigned char key[32]); 8366++ 8367++void CRYPTO_poly1305_update_neon(poly1305_state* state, 8368++ const unsigned char *in, 8369++ size_t in_len); 8370++ 8371++void CRYPTO_poly1305_finish_neon(poly1305_state* state, unsigned char mac[16]); 8372++#endif 8373++ 8374+ static uint64_t 8375+ mul32x32_64(uint32_t a, uint32_t b) 8376+ { 8377+@@ -207,6 +218,14 @@ void CRYPTO_poly1305_init(poly1305_state *statep, const unsigned char key[32]) 8378+ struct poly1305_state_st *state = (struct poly1305_state_st*) statep; 8379+ uint32_t t0,t1,t2,t3; 8380+ 8381++#if __arm__ 8382++ if (CRYPTO_is_NEON_capable()) 8383++ { 8384++ CRYPTO_poly1305_init_neon(statep, key); 8385++ return; 8386++ } 8387++#endif 8388++ 8389+ t0 = U8TO32_LE(key+0); 8390+ t1 = U8TO32_LE(key+4); 8391+ t2 = U8TO32_LE(key+8); 8392+@@ -241,6 +260,14 @@ void CRYPTO_poly1305_update(poly1305_state *statep, const unsigned char *in, 8393+ unsigned int i; 8394+ struct poly1305_state_st *state = (struct poly1305_state_st*) statep; 8395+ 8396++#if __arm__ 8397++ if (CRYPTO_is_NEON_capable()) 8398++ { 8399++ CRYPTO_poly1305_update_neon(statep, in, in_len); 8400++ return; 8401++ } 8402++#endif 8403++ 8404+ if (state->buf_used) 8405+ { 8406+ unsigned int todo = 16 - state->buf_used; 8407+@@ -282,6 +309,14 @@ void CRYPTO_poly1305_finish(poly1305_state *statep, unsigned char mac[16]) 8408+ uint32_t g0,g1,g2,g3,g4; 8409+ uint32_t b, nb; 8410+ 8411++#if __arm__ 8412++ if (CRYPTO_is_NEON_capable()) 8413++ { 8414++ CRYPTO_poly1305_finish_neon(statep, mac); 8415++ return; 8416++ } 8417++#endif 8418++ 8419+ if (state->buf_used) 8420+ poly1305_update(state, state->buf, state->buf_used); 8421+ 8422+diff --git a/crypto/poly1305/poly1305_arm.c b/crypto/poly1305/poly1305_arm.c 8423+index adcef35..34e339d 100644 8424+--- a/crypto/poly1305/poly1305_arm.c 8425++++ b/crypto/poly1305/poly1305_arm.c 8426+@@ -51,6 +51,7 @@ 8427+ * SUPERCOP by D. J. Bernstein and Peter Schwabe. */ 8428+ 8429+ #include <stdint.h> 8430++#include <string.h> 8431+ 8432+ #include <openssl/poly1305.h> 8433+ 8434+@@ -202,7 +203,8 @@ struct poly1305_state_st { 8435+ unsigned char key[16]; 8436+ }; 8437+ 8438+-void CRYPTO_poly1305_init(poly1305_state *state, const unsigned char key[32]) 8439++void CRYPTO_poly1305_init_neon(poly1305_state *state, 8440++ const unsigned char key[32]) 8441+ { 8442+ struct poly1305_state_st *st = (struct poly1305_state_st*) (state); 8443+ fe1305x2 *const r = (fe1305x2 *) (st->data + (15 & (-(int) st->data))); 8444+@@ -227,7 +229,8 @@ void CRYPTO_poly1305_init(poly1305_state *state, const unsigned char key[32]) 8445+ st->buf_used = 0; 8446+ } 8447+ 8448+-void CRYPTO_poly1305_update(poly1305_state *state, const unsigned char *in, size_t in_len) 8449++void CRYPTO_poly1305_update_neon(poly1305_state *state, const unsigned char *in, 8450++ size_t in_len) 8451+ { 8452+ struct poly1305_state_st *st = (struct poly1305_state_st*) (state); 8453+ fe1305x2 *const r = (fe1305x2 *) (st->data + (15 & (-(int) st->data))); 8454+@@ -285,7 +288,7 @@ void CRYPTO_poly1305_update(poly1305_state *state, const unsigned char *in, size 8455+ } 8456+ } 8457+ 8458+-void CRYPTO_poly1305_finish(poly1305_state* state, unsigned char mac[16]) 8459++void CRYPTO_poly1305_finish_neon(poly1305_state* state, unsigned char mac[16]) 8460+ { 8461+ struct poly1305_state_st *st = (struct poly1305_state_st*) (state); 8462+ fe1305x2 *const r = (fe1305x2 *) (st->data + (15 & (-(int) st->data))); 8463+-- 8464+1.8.4.1 8465+ 8466diff -burN android-openssl-lhash2/patches/tls1_change_cipher_state_rewrite.patch android-openssl/patches/tls1_change_cipher_state_rewrite.patch 8467--- android-openssl-lhash2/patches/tls1_change_cipher_state_rewrite.patch 1969-12-31 19:00:00.000000000 -0500 8468+++ android-openssl/patches/tls1_change_cipher_state_rewrite.patch 2013-11-05 14:14:34.631283497 -0500 8469@@ -0,0 +1,567 @@ 8470+From d7f9af2d2682bc41e7bf1d669cda60f04630b04d Mon Sep 17 00:00:00 2001 8471+From: Adam Langley <agl@chromium.org> 8472+Date: Thu, 25 Jul 2013 14:57:38 -0400 8473+Subject: [PATCH 39/50] tls1_change_cipher_state_rewrite 8474+ 8475+The previous version of the function made adding AEAD changes very 8476+difficult. This change should be a semantic no-op - it should be purely 8477+a cleanup. 8478+--- 8479+ ssl/ssl.h | 1 + 8480+ ssl/ssl_err.c | 2 +- 8481+ ssl/t1_enc.c | 445 +++++++++++++++++++++++++++++++--------------------------- 8482+ 3 files changed, 240 insertions(+), 208 deletions(-) 8483+ 8484+diff --git a/ssl/ssl.h b/ssl/ssl.h 8485+index 68e5648..672f3eb 100644 8486+--- a/ssl/ssl.h 8487++++ b/ssl/ssl.h 8488+@@ -2439,6 +2439,7 @@ void ERR_load_SSL_strings(void); 8489+ #define SSL_F_SSL_WRITE 208 8490+ #define SSL_F_TLS1_CERT_VERIFY_MAC 286 8491+ #define SSL_F_TLS1_CHANGE_CIPHER_STATE 209 8492++#define SSL_F_TLS1_CHANGE_CIPHER_STATE_CIPHER 338 8493+ #define SSL_F_TLS1_CHECK_SERVERHELLO_TLSEXT 274 8494+ #define SSL_F_TLS1_ENC 210 8495+ #define SSL_F_TLS1_EXPORT_KEYING_MATERIAL 314 8496+diff --git a/ssl/ssl_err.c b/ssl/ssl_err.c 8497+index fc98e6c..97b2a0d 100644 8498+--- a/ssl/ssl_err.c 8499++++ b/ssl/ssl_err.c 8500+@@ -280,7 +280,7 @@ static ERR_STRING_DATA SSL_str_functs[]= 8501+ {ERR_FUNC(SSL_F_SSL_VERIFY_CERT_CHAIN), "SSL_VERIFY_CERT_CHAIN"}, 8502+ {ERR_FUNC(SSL_F_SSL_WRITE), "SSL_write"}, 8503+ {ERR_FUNC(SSL_F_TLS1_CERT_VERIFY_MAC), "tls1_cert_verify_mac"}, 8504+-{ERR_FUNC(SSL_F_TLS1_CHANGE_CIPHER_STATE), "TLS1_CHANGE_CIPHER_STATE"}, 8505++{ERR_FUNC(SSL_F_TLS1_CHANGE_CIPHER_STATE_CIPHER), "TLS1_CHANGE_CIPHER_STATE_CIPHER"}, 8506+ {ERR_FUNC(SSL_F_TLS1_CHECK_SERVERHELLO_TLSEXT), "TLS1_CHECK_SERVERHELLO_TLSEXT"}, 8507+ {ERR_FUNC(SSL_F_TLS1_ENC), "TLS1_ENC"}, 8508+ {ERR_FUNC(SSL_F_TLS1_EXPORT_KEYING_MATERIAL), "TLS1_EXPORT_KEYING_MATERIAL"}, 8509+diff --git a/ssl/t1_enc.c b/ssl/t1_enc.c 8510+index 3649544..e1f91ba 100644 8511+--- a/ssl/t1_enc.c 8512++++ b/ssl/t1_enc.c 8513+@@ -316,56 +316,30 @@ static int tls1_generate_key_block(SSL *s, unsigned char *km, 8514+ return ret; 8515+ } 8516+ 8517+-int tls1_change_cipher_state(SSL *s, int which) 8518++/* tls1_change_cipher_state_cipher performs the work needed to switch cipher 8519++ * states when using EVP_CIPHER. The argument |is_read| is true iff this 8520++ * function is being called due to reading, as opposed to writing, a 8521++ * ChangeCipherSpec message. In order to support export ciphersuites, 8522++ * use_client_keys indicates whether the key material provided is in the 8523++ * "client write" direction. */ 8524++static int tls1_change_cipher_state_cipher( 8525++ SSL *s, char is_read, char use_client_keys, 8526++ const unsigned char *mac_secret, unsigned mac_secret_len, 8527++ const unsigned char *key, unsigned key_len, 8528++ const unsigned char *iv, unsigned iv_len) 8529+ { 8530+- static const unsigned char empty[]=""; 8531+- unsigned char *p,*mac_secret; 8532+- unsigned char *exp_label; 8533+- unsigned char tmp1[EVP_MAX_KEY_LENGTH]; 8534+- unsigned char tmp2[EVP_MAX_KEY_LENGTH]; 8535+- unsigned char iv1[EVP_MAX_IV_LENGTH*2]; 8536+- unsigned char iv2[EVP_MAX_IV_LENGTH*2]; 8537+- unsigned char *ms,*key,*iv; 8538+- int client_write; 8539+- EVP_CIPHER_CTX *dd; 8540+- const EVP_CIPHER *c; 8541+-#ifndef OPENSSL_NO_COMP 8542+- const SSL_COMP *comp; 8543+-#endif 8544+- const EVP_MD *m; 8545+- int mac_type; 8546+- int *mac_secret_size; 8547++ const EVP_CIPHER *cipher = s->s3->tmp.new_sym_enc; 8548++ const char is_export = SSL_C_IS_EXPORT(s->s3->tmp.new_cipher) != 0; 8549++ EVP_CIPHER_CTX *cipher_ctx; 8550+ EVP_MD_CTX *mac_ctx; 8551+- EVP_PKEY *mac_key; 8552+- int is_export,n,i,j,k,exp_label_len,cl; 8553+- int reuse_dd = 0; 8554++ char is_aead_cipher; 8555+ 8556+- is_export=SSL_C_IS_EXPORT(s->s3->tmp.new_cipher); 8557+- c=s->s3->tmp.new_sym_enc; 8558+- m=s->s3->tmp.new_hash; 8559+- mac_type = s->s3->tmp.new_mac_pkey_type; 8560+-#ifndef OPENSSL_NO_COMP 8561+- comp=s->s3->tmp.new_compression; 8562+-#endif 8563++ unsigned char export_tmp1[EVP_MAX_KEY_LENGTH]; 8564++ unsigned char export_tmp2[EVP_MAX_KEY_LENGTH]; 8565++ unsigned char export_iv1[EVP_MAX_IV_LENGTH * 2]; 8566++ unsigned char export_iv2[EVP_MAX_IV_LENGTH * 2]; 8567+ 8568+-#ifdef KSSL_DEBUG 8569+- printf("tls1_change_cipher_state(which= %d) w/\n", which); 8570+- printf("\talg= %ld/%ld, comp= %p\n", 8571+- s->s3->tmp.new_cipher->algorithm_mkey, 8572+- s->s3->tmp.new_cipher->algorithm_auth, 8573+- comp); 8574+- printf("\tevp_cipher == %p ==? &d_cbc_ede_cipher3\n", c); 8575+- printf("\tevp_cipher: nid, blksz= %d, %d, keylen=%d, ivlen=%d\n", 8576+- c->nid,c->block_size,c->key_len,c->iv_len); 8577+- printf("\tkey_block: len= %d, data= ", s->s3->tmp.key_block_length); 8578+- { 8579+- int i; 8580+- for (i=0; i<s->s3->tmp.key_block_length; i++) 8581+- printf("%02x", s->s3->tmp.key_block[i]); printf("\n"); 8582+- } 8583+-#endif /* KSSL_DEBUG */ 8584+- 8585+- if (which & SSL3_CC_READ) 8586++ if (is_read) 8587+ { 8588+ if (s->s3->tmp.new_cipher->algorithm2 & TLS1_STREAM_MAC) 8589+ s->mac_flags |= SSL_MAC_FLAG_READ_MAC_STREAM; 8590+@@ -373,206 +347,257 @@ int tls1_change_cipher_state(SSL *s, int which) 8591+ s->mac_flags &= ~SSL_MAC_FLAG_READ_MAC_STREAM; 8592+ 8593+ if (s->enc_read_ctx != NULL) 8594+- reuse_dd = 1; 8595++ EVP_CIPHER_CTX_cleanup(s->enc_read_ctx); 8596+ else if ((s->enc_read_ctx=OPENSSL_malloc(sizeof(EVP_CIPHER_CTX))) == NULL) 8597+ goto err; 8598+ else 8599+ /* make sure it's intialized in case we exit later with an error */ 8600+ EVP_CIPHER_CTX_init(s->enc_read_ctx); 8601+- dd= s->enc_read_ctx; 8602+- mac_ctx=ssl_replace_hash(&s->read_hash,NULL); 8603+-#ifndef OPENSSL_NO_COMP 8604+- if (s->expand != NULL) 8605+- { 8606+- COMP_CTX_free(s->expand); 8607+- s->expand=NULL; 8608+- } 8609+- if (comp != NULL) 8610+- { 8611+- s->expand=COMP_CTX_new(comp->method); 8612+- if (s->expand == NULL) 8613+- { 8614+- SSLerr(SSL_F_TLS1_CHANGE_CIPHER_STATE,SSL_R_COMPRESSION_LIBRARY_ERROR); 8615+- goto err2; 8616+- } 8617+- if (s->s3->rrec.comp == NULL) 8618+- s->s3->rrec.comp=(unsigned char *) 8619+- OPENSSL_malloc(SSL3_RT_MAX_ENCRYPTED_LENGTH); 8620+- if (s->s3->rrec.comp == NULL) 8621+- goto err; 8622+- } 8623+-#endif 8624+- /* this is done by dtls1_reset_seq_numbers for DTLS1_VERSION */ 8625+- if (s->version != DTLS1_VERSION) 8626+- memset(&(s->s3->read_sequence[0]),0,8); 8627+- mac_secret= &(s->s3->read_mac_secret[0]); 8628+- mac_secret_size=&(s->s3->read_mac_secret_size); 8629++ 8630++ cipher_ctx = s->enc_read_ctx; 8631++ mac_ctx = ssl_replace_hash(&s->read_hash, NULL); 8632++ 8633++ memcpy(s->s3->read_mac_secret, mac_secret, mac_secret_len); 8634++ s->s3->read_mac_secret_size = mac_secret_len; 8635+ } 8636+ else 8637+ { 8638+ if (s->s3->tmp.new_cipher->algorithm2 & TLS1_STREAM_MAC) 8639+ s->mac_flags |= SSL_MAC_FLAG_WRITE_MAC_STREAM; 8640+- else 8641++ else 8642+ s->mac_flags &= ~SSL_MAC_FLAG_WRITE_MAC_STREAM; 8643++ 8644+ if (s->enc_write_ctx != NULL) 8645+- reuse_dd = 1; 8646++ EVP_CIPHER_CTX_cleanup(s->enc_write_ctx); 8647+ else if ((s->enc_write_ctx=OPENSSL_malloc(sizeof(EVP_CIPHER_CTX))) == NULL) 8648+ goto err; 8649+ else 8650+ /* make sure it's intialized in case we exit later with an error */ 8651+ EVP_CIPHER_CTX_init(s->enc_write_ctx); 8652+- dd= s->enc_write_ctx; 8653+- mac_ctx = ssl_replace_hash(&s->write_hash,NULL); 8654+-#ifndef OPENSSL_NO_COMP 8655+- if (s->compress != NULL) 8656+- { 8657+- COMP_CTX_free(s->compress); 8658+- s->compress=NULL; 8659+- } 8660+- if (comp != NULL) 8661+- { 8662+- s->compress=COMP_CTX_new(comp->method); 8663+- if (s->compress == NULL) 8664+- { 8665+- SSLerr(SSL_F_TLS1_CHANGE_CIPHER_STATE,SSL_R_COMPRESSION_LIBRARY_ERROR); 8666+- goto err2; 8667+- } 8668+- } 8669+-#endif 8670+- /* this is done by dtls1_reset_seq_numbers for DTLS1_VERSION */ 8671+- if (s->version != DTLS1_VERSION) 8672+- memset(&(s->s3->write_sequence[0]),0,8); 8673+- mac_secret= &(s->s3->write_mac_secret[0]); 8674+- mac_secret_size = &(s->s3->write_mac_secret_size); 8675+- } 8676+- 8677+- if (reuse_dd) 8678+- EVP_CIPHER_CTX_cleanup(dd); 8679+ 8680+- p=s->s3->tmp.key_block; 8681+- i=*mac_secret_size=s->s3->tmp.new_mac_secret_size; 8682++ cipher_ctx = s->enc_write_ctx; 8683++ mac_ctx = ssl_replace_hash(&s->write_hash, NULL); 8684+ 8685+- cl=EVP_CIPHER_key_length(c); 8686+- j=is_export ? (cl < SSL_C_EXPORT_KEYLENGTH(s->s3->tmp.new_cipher) ? 8687+- cl : SSL_C_EXPORT_KEYLENGTH(s->s3->tmp.new_cipher)) : cl; 8688+- /* Was j=(exp)?5:EVP_CIPHER_key_length(c); */ 8689+- /* If GCM mode only part of IV comes from PRF */ 8690+- if (EVP_CIPHER_mode(c) == EVP_CIPH_GCM_MODE) 8691+- k = EVP_GCM_TLS_FIXED_IV_LEN; 8692+- else 8693+- k=EVP_CIPHER_iv_length(c); 8694+- if ( (which == SSL3_CHANGE_CIPHER_CLIENT_WRITE) || 8695+- (which == SSL3_CHANGE_CIPHER_SERVER_READ)) 8696+- { 8697+- ms= &(p[ 0]); n=i+i; 8698+- key= &(p[ n]); n+=j+j; 8699+- iv= &(p[ n]); n+=k+k; 8700+- exp_label=(unsigned char *)TLS_MD_CLIENT_WRITE_KEY_CONST; 8701+- exp_label_len=TLS_MD_CLIENT_WRITE_KEY_CONST_SIZE; 8702+- client_write=1; 8703+- } 8704+- else 8705+- { 8706+- n=i; 8707+- ms= &(p[ n]); n+=i+j; 8708+- key= &(p[ n]); n+=j+k; 8709+- iv= &(p[ n]); n+=k; 8710+- exp_label=(unsigned char *)TLS_MD_SERVER_WRITE_KEY_CONST; 8711+- exp_label_len=TLS_MD_SERVER_WRITE_KEY_CONST_SIZE; 8712+- client_write=0; 8713++ memcpy(s->s3->write_mac_secret, mac_secret, mac_secret_len); 8714++ s->s3->write_mac_secret_size = mac_secret_len; 8715+ } 8716+ 8717+- if (n > s->s3->tmp.key_block_length) 8718+- { 8719+- SSLerr(SSL_F_TLS1_CHANGE_CIPHER_STATE,ERR_R_INTERNAL_ERROR); 8720+- goto err2; 8721+- } 8722+- 8723+- memcpy(mac_secret,ms,i); 8724+- 8725+- if (!(EVP_CIPHER_flags(c)&EVP_CIPH_FLAG_AEAD_CIPHER)) 8726+- { 8727+- mac_key = EVP_PKEY_new_mac_key(mac_type, NULL, 8728+- mac_secret,*mac_secret_size); 8729+- EVP_DigestSignInit(mac_ctx,NULL,m,NULL,mac_key); 8730+- EVP_PKEY_free(mac_key); 8731+- } 8732+-#ifdef TLS_DEBUG 8733+-printf("which = %04X\nmac key=",which); 8734+-{ int z; for (z=0; z<i; z++) printf("%02X%c",ms[z],((z+1)%16)?' ':'\n'); } 8735+-#endif 8736+ if (is_export) 8737+ { 8738+ /* In here I set both the read and write key/iv to the 8739+ * same value since only the correct one will be used :-). 8740+ */ 8741++ const unsigned char *label; 8742++ unsigned label_len; 8743++ 8744++ if (use_client_keys) 8745++ { 8746++ label = (const unsigned char*) TLS_MD_CLIENT_WRITE_KEY_CONST; 8747++ label_len = TLS_MD_CLIENT_WRITE_KEY_CONST_SIZE; 8748++ } 8749++ else 8750++ { 8751++ label = (const unsigned char*) TLS_MD_SERVER_WRITE_KEY_CONST; 8752++ label_len = TLS_MD_SERVER_WRITE_KEY_CONST_SIZE; 8753++ } 8754++ 8755+ if (!tls1_PRF(ssl_get_algorithm2(s), 8756+- exp_label,exp_label_len, 8757+- s->s3->client_random,SSL3_RANDOM_SIZE, 8758+- s->s3->server_random,SSL3_RANDOM_SIZE, 8759+- NULL,0,NULL,0, 8760+- key,j,tmp1,tmp2,EVP_CIPHER_key_length(c))) 8761+- goto err2; 8762+- key=tmp1; 8763++ label, label_len, 8764++ s->s3->client_random, SSL3_RANDOM_SIZE, 8765++ s->s3->server_random, SSL3_RANDOM_SIZE, 8766++ NULL, 0, NULL, 0, 8767++ key /* secret */, key_len /* secret length */, 8768++ export_tmp1 /* output */, 8769++ export_tmp2 /* scratch space */, 8770++ EVP_CIPHER_key_length(s->s3->tmp.new_sym_enc) /* output length */)) 8771++ return 0; 8772++ key = export_tmp1; 8773+ 8774+- if (k > 0) 8775++ if (iv_len > 0) 8776+ { 8777++ static const unsigned char empty[] = ""; 8778++ 8779+ if (!tls1_PRF(ssl_get_algorithm2(s), 8780+- TLS_MD_IV_BLOCK_CONST,TLS_MD_IV_BLOCK_CONST_SIZE, 8781+- s->s3->client_random,SSL3_RANDOM_SIZE, 8782+- s->s3->server_random,SSL3_RANDOM_SIZE, 8783+- NULL,0,NULL,0, 8784+- empty,0,iv1,iv2,k*2)) 8785+- goto err2; 8786+- if (client_write) 8787+- iv=iv1; 8788++ TLS_MD_IV_BLOCK_CONST, TLS_MD_IV_BLOCK_CONST_SIZE, 8789++ s->s3->client_random, SSL3_RANDOM_SIZE, 8790++ s->s3->server_random, SSL3_RANDOM_SIZE, 8791++ NULL, 0, NULL, 0, 8792++ empty /* secret */ ,0 /* secret length */, 8793++ export_iv1 /* output */, 8794++ export_iv2 /* scratch space */, 8795++ iv_len * 2 /* output length */)) 8796++ return 0; 8797++ 8798++ if (use_client_keys) 8799++ iv = export_iv1; 8800+ else 8801+- iv= &(iv1[k]); 8802++ iv = &export_iv1[iv_len]; 8803+ } 8804+ } 8805+ 8806+- s->session->key_arg_length=0; 8807+-#ifdef KSSL_DEBUG 8808+- { 8809+- int i; 8810+- printf("EVP_CipherInit_ex(dd,c,key=,iv=,which)\n"); 8811+- printf("\tkey= "); for (i=0; i<c->key_len; i++) printf("%02x", key[i]); 8812+- printf("\n"); 8813+- printf("\t iv= "); for (i=0; i<c->iv_len; i++) printf("%02x", iv[i]); 8814+- printf("\n"); 8815+- } 8816+-#endif /* KSSL_DEBUG */ 8817++ /* is_aead_cipher indicates whether the EVP_CIPHER implements an AEAD 8818++ * interface. This is different from the newer EVP_AEAD interface. */ 8819++ is_aead_cipher = (EVP_CIPHER_flags(cipher) & EVP_CIPH_FLAG_AEAD_CIPHER) != 0; 8820+ 8821+- if (EVP_CIPHER_mode(c) == EVP_CIPH_GCM_MODE) 8822++ if (!is_aead_cipher) 8823+ { 8824+- EVP_CipherInit_ex(dd,c,NULL,key,NULL,(which & SSL3_CC_WRITE)); 8825+- EVP_CIPHER_CTX_ctrl(dd, EVP_CTRL_GCM_SET_IV_FIXED, k, iv); 8826++ EVP_PKEY *mac_key = 8827++ EVP_PKEY_new_mac_key(s->s3->tmp.new_mac_pkey_type, 8828++ NULL, mac_secret, mac_secret_len); 8829++ if (!mac_key) 8830++ return 0; 8831++ EVP_DigestSignInit(mac_ctx, NULL, s->s3->tmp.new_hash, NULL, mac_key); 8832++ EVP_PKEY_free(mac_key); 8833+ } 8834+- else 8835+- EVP_CipherInit_ex(dd,c,NULL,key,iv,(which & SSL3_CC_WRITE)); 8836++ 8837++ if (EVP_CIPHER_mode(cipher) == EVP_CIPH_GCM_MODE) 8838++ { 8839++ EVP_CipherInit_ex(cipher_ctx, cipher, NULL /* engine */, key, 8840++ NULL /* iv */, !is_read); 8841++ EVP_CIPHER_CTX_ctrl(cipher_ctx, EVP_CTRL_GCM_SET_IV_FIXED, iv_len, (void*) iv); 8842++ } 8843++ else 8844++ EVP_CipherInit_ex(cipher_ctx, cipher, NULL /* engine */, key, iv, !is_read); 8845+ 8846+ /* Needed for "composite" AEADs, such as RC4-HMAC-MD5 */ 8847+- if ((EVP_CIPHER_flags(c)&EVP_CIPH_FLAG_AEAD_CIPHER) && *mac_secret_size) 8848+- EVP_CIPHER_CTX_ctrl(dd,EVP_CTRL_AEAD_SET_MAC_KEY, 8849+- *mac_secret_size,mac_secret); 8850+- 8851+-#ifdef TLS_DEBUG 8852+-printf("which = %04X\nkey=",which); 8853+-{ int z; for (z=0; z<EVP_CIPHER_key_length(c); z++) printf("%02X%c",key[z],((z+1)%16)?' ':'\n'); } 8854+-printf("\niv="); 8855+-{ int z; for (z=0; z<k; z++) printf("%02X%c",iv[z],((z+1)%16)?' ':'\n'); } 8856+-printf("\n"); 8857+-#endif 8858+- 8859+- OPENSSL_cleanse(tmp1,sizeof(tmp1)); 8860+- OPENSSL_cleanse(tmp2,sizeof(tmp1)); 8861+- OPENSSL_cleanse(iv1,sizeof(iv1)); 8862+- OPENSSL_cleanse(iv2,sizeof(iv2)); 8863+- return(1); 8864++ if (is_aead_cipher && mac_secret_len > 0) 8865++ EVP_CIPHER_CTX_ctrl(cipher_ctx, EVP_CTRL_AEAD_SET_MAC_KEY, 8866++ mac_secret_len, (void*) mac_secret); 8867++ 8868++ if (is_export) 8869++ { 8870++ OPENSSL_cleanse(export_tmp1, sizeof(export_tmp1)); 8871++ OPENSSL_cleanse(export_tmp2, sizeof(export_tmp1)); 8872++ OPENSSL_cleanse(export_iv1, sizeof(export_iv1)); 8873++ OPENSSL_cleanse(export_iv2, sizeof(export_iv2)); 8874++ } 8875++ 8876++ return 1; 8877++ 8878++err: 8879++ SSLerr(SSL_F_TLS1_CHANGE_CIPHER_STATE_CIPHER, ERR_R_MALLOC_FAILURE); 8880++ return 0; 8881++ } 8882++ 8883++int tls1_change_cipher_state(SSL *s, int which) 8884++ { 8885++ /* is_read is true if we have just read a ChangeCipherSpec message - 8886++ * i.e. we need to update the read cipherspec. Otherwise we have just 8887++ * written one. */ 8888++ const char is_read = (which & SSL3_CC_READ) != 0; 8889++ /* use_client_keys is true if we wish to use the keys for the "client 8890++ * write" direction. This is the case if we're a client sending a 8891++ * ChangeCipherSpec, or a server reading a client's ChangeCipherSpec. */ 8892++ const char use_client_keys = which == SSL3_CHANGE_CIPHER_CLIENT_WRITE || 8893++ which == SSL3_CHANGE_CIPHER_SERVER_READ; 8894++ const unsigned char *client_write_mac_secret, *server_write_mac_secret, *mac_secret; 8895++ const unsigned char *client_write_key, *server_write_key, *key; 8896++ const unsigned char *client_write_iv, *server_write_iv, *iv; 8897++ const EVP_CIPHER *cipher = s->s3->tmp.new_sym_enc; 8898++ unsigned key_len, iv_len, mac_secret_len; 8899++ const unsigned char *key_data; 8900++ const char is_export = SSL_C_IS_EXPORT(s->s3->tmp.new_cipher) != 0; 8901++ 8902++ /* Update compression contexts. */ 8903++#ifndef OPENSSL_NO_COMP 8904++ const SSL_COMP *comp = s->s3->tmp.new_compression; 8905++ 8906++ if (is_read) 8907++ { 8908++ if (s->expand != NULL) 8909++ { 8910++ COMP_CTX_free(s->expand); 8911++ s->expand = NULL; 8912++ } 8913++ if (comp != NULL) 8914++ { 8915++ s->expand=COMP_CTX_new(comp->method); 8916++ if (s->expand == NULL) 8917++ { 8918++ SSLerr(SSL_F_TLS1_CHANGE_CIPHER_STATE,SSL_R_COMPRESSION_LIBRARY_ERROR); 8919++ return 0; 8920++ } 8921++ if (s->s3->rrec.comp == NULL) 8922++ s->s3->rrec.comp = 8923++ (unsigned char *)OPENSSL_malloc(SSL3_RT_MAX_ENCRYPTED_LENGTH); 8924++ if (s->s3->rrec.comp == NULL) 8925++ goto err; 8926++ } 8927++ } 8928++ else 8929++ { 8930++ if (s->compress != NULL) 8931++ { 8932++ COMP_CTX_free(s->compress); 8933++ s->compress = NULL; 8934++ } 8935++ if (comp != NULL) 8936++ { 8937++ s->compress = COMP_CTX_new(comp->method); 8938++ if (s->compress == NULL) 8939++ { 8940++ SSLerr(SSL_F_TLS1_CHANGE_CIPHER_STATE,SSL_R_COMPRESSION_LIBRARY_ERROR); 8941++ return 0; 8942++ } 8943++ } 8944++ } 8945++#endif /* OPENSSL_NO_COMP */ 8946++ 8947++ /* Reset sequence number to zero. */ 8948++ memset(is_read ? s->s3->read_sequence : s->s3->write_sequence, 0, 8); 8949++ 8950++ /* key_arg is used for SSLv2. We don't need it for TLS. */ 8951++ s->session->key_arg_length = 0; 8952++ 8953++ mac_secret_len = s->s3->tmp.new_mac_secret_size; 8954++ 8955++ key_len = EVP_CIPHER_key_length(cipher); 8956++ if (is_export && key_len > SSL_C_EXPORT_KEYLENGTH(s->s3->tmp.new_cipher)) 8957++ key_len = SSL_C_EXPORT_KEYLENGTH(s->s3->tmp.new_cipher); 8958++ 8959++ if (EVP_CIPHER_mode(cipher) == EVP_CIPH_GCM_MODE) 8960++ iv_len = EVP_GCM_TLS_FIXED_IV_LEN; 8961++ else 8962++ iv_len = EVP_CIPHER_iv_length(cipher); 8963++ 8964++ key_data = s->s3->tmp.key_block; 8965++ client_write_mac_secret = key_data; key_data += mac_secret_len; 8966++ server_write_mac_secret = key_data; key_data += mac_secret_len; 8967++ client_write_key = key_data; key_data += key_len; 8968++ server_write_key = key_data; key_data += key_len; 8969++ client_write_iv = key_data; key_data += iv_len; 8970++ server_write_iv = key_data; key_data += iv_len; 8971++ 8972++ if (use_client_keys) 8973++ { 8974++ mac_secret = client_write_mac_secret; 8975++ key = client_write_key; 8976++ iv = client_write_iv; 8977++ } 8978++ else 8979++ { 8980++ mac_secret = server_write_mac_secret; 8981++ key = server_write_key; 8982++ iv = server_write_iv; 8983++ } 8984++ 8985++ if (key_data - s->s3->tmp.key_block != s->s3->tmp.key_block_length) 8986++ { 8987++ SSLerr(SSL_F_TLS1_CHANGE_CIPHER_STATE,ERR_R_INTERNAL_ERROR); 8988++ return 0; 8989++ } 8990++ 8991++ if (!tls1_change_cipher_state_cipher(s, is_read, use_client_keys, 8992++ mac_secret, mac_secret_len, 8993++ key, key_len, 8994++ iv, iv_len)) { 8995++ return 0; 8996++ } 8997++ 8998++ return 1; 8999+ err: 9000+- SSLerr(SSL_F_TLS1_CHANGE_CIPHER_STATE,ERR_R_MALLOC_FAILURE); 9001+-err2: 9002+- return(0); 9003++ SSLerr(SSL_F_TLS1_CHANGE_CIPHER_STATE, ERR_R_MALLOC_FAILURE); 9004++ return 0; 9005+ } 9006+ 9007+ int tls1_setup_key_block(SSL *s) 9008+@@ -584,6 +609,7 @@ int tls1_setup_key_block(SSL *s) 9009+ SSL_COMP *comp; 9010+ int mac_type= NID_undef,mac_secret_size=0; 9011+ int ret=0; 9012++ int iv_len; 9013+ 9014+ #ifdef KSSL_DEBUG 9015+ printf ("tls1_setup_key_block()\n"); 9016+@@ -598,11 +624,16 @@ int tls1_setup_key_block(SSL *s) 9017+ return(0); 9018+ } 9019+ 9020++ if (EVP_CIPHER_mode(c) == EVP_CIPH_GCM_MODE) 9021++ iv_len = EVP_GCM_TLS_FIXED_IV_LEN; 9022++ else 9023++ iv_len = EVP_CIPHER_iv_length(c); 9024++ 9025+ s->s3->tmp.new_sym_enc=c; 9026+ s->s3->tmp.new_hash=hash; 9027+ s->s3->tmp.new_mac_pkey_type = mac_type; 9028+ s->s3->tmp.new_mac_secret_size = mac_secret_size; 9029+- num=EVP_CIPHER_key_length(c)+mac_secret_size+EVP_CIPHER_iv_length(c); 9030++ num=EVP_CIPHER_key_length(c)+mac_secret_size+iv_len; 9031+ num*=2; 9032+ 9033+ ssl3_cleanup_key_block(s); 9034+-- 9035+1.8.4.1 9036+ 9037diff -burN android-openssl-lhash2/patches/use_aead_for_aes_gcm.patch android-openssl/patches/use_aead_for_aes_gcm.patch 9038--- android-openssl-lhash2/patches/use_aead_for_aes_gcm.patch 1969-12-31 19:00:00.000000000 -0500 9039+++ android-openssl/patches/use_aead_for_aes_gcm.patch 2013-11-05 14:14:34.631283497 -0500 9040@@ -0,0 +1,119 @@ 9041+From 7156ca9ce97c1084d7fd010146c522633ad73e7a Mon Sep 17 00:00:00 2001 9042+From: Adam Langley <agl@chromium.org> 9043+Date: Wed, 4 Sep 2013 12:21:12 -0400 9044+Subject: [PATCH 42/50] use_aead_for_aes_gcm. 9045+ 9046+Switches AES-GCM ciphersuites to use AEAD interfaces. 9047+--- 9048+ ssl/s3_lib.c | 25 +++++++++++++++---------- 9049+ 1 file changed, 15 insertions(+), 10 deletions(-) 9050+ 9051+diff --git a/ssl/s3_lib.c b/ssl/s3_lib.c 9052+index 2cd1654..75b6560 100644 9053+--- a/ssl/s3_lib.c 9054++++ b/ssl/s3_lib.c 9055+@@ -166,6 +166,11 @@ const char ssl3_version_str[]="SSLv3" OPENSSL_VERSION_PTEXT; 9056+ 9057+ #define SSL3_NUM_CIPHERS (sizeof(ssl3_ciphers)/sizeof(SSL_CIPHER)) 9058+ 9059++/* FIXED_NONCE_LEN is a macro that results in the correct value to set the 9060++ * fixed nonce length in SSL_CIPHER.algorithms2. It's the inverse of 9061++ * SSL_CIPHER_AEAD_FIXED_NONCE_LEN. */ 9062++#define FIXED_NONCE_LEN(x) ((x/2)<<24) 9063++ 9064+ /* list of available SSLv3 ciphers (sorted by id) */ 9065+ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[]={ 9066+ 9067+@@ -1836,7 +1841,7 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[]={ 9068+ SSL_AEAD, 9069+ SSL_TLSV1_2, 9070+ SSL_NOT_EXP|SSL_HIGH|SSL_FIPS, 9071+- SSL_HANDSHAKE_MAC_SHA256|TLS1_PRF_SHA256, 9072++ SSL_HANDSHAKE_MAC_SHA256|TLS1_PRF_SHA256|SSL_CIPHER_ALGORITHM2_AEAD|FIXED_NONCE_LEN(4), 9073+ 128, 9074+ 128, 9075+ }, 9076+@@ -1868,7 +1873,7 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[]={ 9077+ SSL_AEAD, 9078+ SSL_TLSV1_2, 9079+ SSL_NOT_EXP|SSL_HIGH|SSL_FIPS, 9080+- SSL_HANDSHAKE_MAC_SHA256|TLS1_PRF_SHA256, 9081++ SSL_HANDSHAKE_MAC_SHA256|TLS1_PRF_SHA256|SSL_CIPHER_ALGORITHM2_AEAD|FIXED_NONCE_LEN(4), 9082+ 128, 9083+ 128, 9084+ }, 9085+@@ -1900,7 +1905,7 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[]={ 9086+ SSL_AEAD, 9087+ SSL_TLSV1_2, 9088+ SSL_NOT_EXP|SSL_HIGH|SSL_FIPS, 9089+- SSL_HANDSHAKE_MAC_SHA256|TLS1_PRF_SHA256, 9090++ SSL_HANDSHAKE_MAC_SHA256|TLS1_PRF_SHA256|SSL_CIPHER_ALGORITHM2_AEAD|FIXED_NONCE_LEN(4), 9091+ 128, 9092+ 128, 9093+ }, 9094+@@ -1932,7 +1937,7 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[]={ 9095+ SSL_AEAD, 9096+ SSL_TLSV1_2, 9097+ SSL_NOT_EXP|SSL_HIGH|SSL_FIPS, 9098+- SSL_HANDSHAKE_MAC_SHA256|TLS1_PRF_SHA256, 9099++ SSL_HANDSHAKE_MAC_SHA256|TLS1_PRF_SHA256|SSL_CIPHER_ALGORITHM2_AEAD|FIXED_NONCE_LEN(4), 9100+ 128, 9101+ 128, 9102+ }, 9103+@@ -1964,7 +1969,7 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[]={ 9104+ SSL_AEAD, 9105+ SSL_TLSV1_2, 9106+ SSL_NOT_EXP|SSL_HIGH|SSL_FIPS, 9107+- SSL_HANDSHAKE_MAC_SHA256|TLS1_PRF_SHA256, 9108++ SSL_HANDSHAKE_MAC_SHA256|TLS1_PRF_SHA256|SSL_CIPHER_ALGORITHM2_AEAD|FIXED_NONCE_LEN(4), 9109+ 128, 9110+ 128, 9111+ }, 9112+@@ -1996,7 +2001,7 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[]={ 9113+ SSL_AEAD, 9114+ SSL_TLSV1_2, 9115+ SSL_NOT_EXP|SSL_HIGH|SSL_FIPS, 9116+- SSL_HANDSHAKE_MAC_SHA256|TLS1_PRF_SHA256, 9117++ SSL_HANDSHAKE_MAC_SHA256|TLS1_PRF_SHA256|SSL_CIPHER_ALGORITHM2_AEAD|FIXED_NONCE_LEN(4), 9118+ 128, 9119+ 128, 9120+ }, 9121+@@ -2709,7 +2714,7 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[]={ 9122+ SSL_AEAD, 9123+ SSL_TLSV1_2, 9124+ SSL_NOT_EXP|SSL_HIGH|SSL_FIPS, 9125+- SSL_HANDSHAKE_MAC_SHA256|TLS1_PRF_SHA256, 9126++ SSL_HANDSHAKE_MAC_SHA256|TLS1_PRF_SHA256|SSL_CIPHER_ALGORITHM2_AEAD|FIXED_NONCE_LEN(4), 9127+ 128, 9128+ 128, 9129+ }, 9130+@@ -2741,7 +2746,7 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[]={ 9131+ SSL_AEAD, 9132+ SSL_TLSV1_2, 9133+ SSL_NOT_EXP|SSL_HIGH|SSL_FIPS, 9134+- SSL_HANDSHAKE_MAC_SHA256|TLS1_PRF_SHA256, 9135++ SSL_HANDSHAKE_MAC_SHA256|TLS1_PRF_SHA256|SSL_CIPHER_ALGORITHM2_AEAD|FIXED_NONCE_LEN(4), 9136+ 128, 9137+ 128, 9138+ }, 9139+@@ -2773,7 +2778,7 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[]={ 9140+ SSL_AEAD, 9141+ SSL_TLSV1_2, 9142+ SSL_NOT_EXP|SSL_HIGH|SSL_FIPS, 9143+- SSL_HANDSHAKE_MAC_SHA256|TLS1_PRF_SHA256, 9144++ SSL_HANDSHAKE_MAC_SHA256|TLS1_PRF_SHA256|SSL_CIPHER_ALGORITHM2_AEAD|FIXED_NONCE_LEN(4), 9145+ 128, 9146+ 128, 9147+ }, 9148+@@ -2805,7 +2810,7 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[]={ 9149+ SSL_AEAD, 9150+ SSL_TLSV1_2, 9151+ SSL_NOT_EXP|SSL_HIGH|SSL_FIPS, 9152+- SSL_HANDSHAKE_MAC_SHA256|TLS1_PRF_SHA256, 9153++ SSL_HANDSHAKE_MAC_SHA256|TLS1_PRF_SHA256|SSL_CIPHER_ALGORITHM2_AEAD|FIXED_NONCE_LEN(4), 9154+ 128, 9155+ 128, 9156+ }, 9157+-- 9158+1.8.4.1 9159+ 9160