1 /* slide_neon.c -- Optimized hash table shifting for ARM with support for NEON instructions
2 * Copyright (C) 2017-2020 Mika T. Lindqvist
3 *
4 * Authors:
5 * Mika T. Lindqvist <postmaster@raasu.org>
6 * Jun He <jun.he@arm.com>
7 *
8 * For conditions of distribution and use, see copyright notice in zlib.h
9 */
10
11 #if defined(ARM_NEON_SLIDEHASH)
12 #ifdef _M_ARM64
13 # include <arm64_neon.h>
14 #else
15 # include <arm_neon.h>
16 #endif
17 #include "../../zbuild.h"
18 #include "../../deflate.h"
19
20 /* SIMD version of hash_chain rebase */
slide_hash_chain(Pos * table,unsigned int entries,uint16_t window_size)21 static inline void slide_hash_chain(Pos *table, unsigned int entries, uint16_t window_size) {
22 Z_REGISTER uint16x8_t v, *p;
23 Z_REGISTER size_t n;
24
25 size_t size = entries*sizeof(table[0]);
26 Assert((size % sizeof(uint16x8_t) * 8 == 0), "hash table size err");
27
28 Assert(sizeof(Pos) == 2, "Wrong Pos size");
29 v = vdupq_n_u16(window_size);
30
31 p = (uint16x8_t *)table;
32 n = size / (sizeof(uint16x8_t) * 8);
33 do {
34 p[0] = vqsubq_u16(p[0], v);
35 p[1] = vqsubq_u16(p[1], v);
36 p[2] = vqsubq_u16(p[2], v);
37 p[3] = vqsubq_u16(p[3], v);
38 p[4] = vqsubq_u16(p[4], v);
39 p[5] = vqsubq_u16(p[5], v);
40 p[6] = vqsubq_u16(p[6], v);
41 p[7] = vqsubq_u16(p[7], v);
42 p += 8;
43 } while (--n);
44 }
45
slide_hash_neon(deflate_state * s)46 Z_INTERNAL void slide_hash_neon(deflate_state *s) {
47 unsigned int wsize = s->w_size;
48
49 slide_hash_chain(s->head, HASH_SIZE, wsize);
50 slide_hash_chain(s->prev, wsize, wsize);
51 }
52 #endif
53