1 /* 2 * AVX2 optimized hash slide, based on Intel's slide_sse implementation 3 * 4 * Copyright (C) 2017 Intel Corporation 5 * Authors: 6 * Arjan van de Ven <arjan@linux.intel.com> 7 * Jim Kukunas <james.t.kukunas@linux.intel.com> 8 * Mika T. Lindqvist <postmaster@raasu.org> 9 * 10 * For conditions of distribution and use, see copyright notice in zlib.h 11 */ 12 #include "../../zbuild.h" 13 #include "../../deflate.h" 14 15 #include <immintrin.h> 16 slide_hash_avx2(deflate_state * s)17Z_INTERNAL void slide_hash_avx2(deflate_state *s) { 18 Pos *p; 19 unsigned n; 20 uint16_t wsize = (uint16_t)s->w_size; 21 const __m256i zmm_wsize = _mm256_set1_epi16((short)wsize); 22 23 n = HASH_SIZE; 24 p = &s->head[n] - 16; 25 do { 26 __m256i value, result; 27 28 value = _mm256_loadu_si256((__m256i *)p); 29 result= _mm256_subs_epu16(value, zmm_wsize); 30 _mm256_storeu_si256((__m256i *)p, result); 31 p -= 16; 32 n -= 16; 33 } while (n > 0); 34 35 n = wsize; 36 p = &s->prev[n] - 16; 37 do { 38 __m256i value, result; 39 40 value = _mm256_loadu_si256((__m256i *)p); 41 result= _mm256_subs_epu16(value, zmm_wsize); 42 _mm256_storeu_si256((__m256i *)p, result); 43 44 p -= 16; 45 n -= 16; 46 } while (n > 0); 47 } 48