1 /* compare258_sse.c -- SSE4.2 version of compare258
2 *
3 * Copyright (C) 2013 Intel Corporation. All rights reserved.
4 * Authors:
5 * Wajdi Feghali <wajdi.k.feghali@intel.com>
6 * Jim Guilford <james.guilford@intel.com>
7 * Vinodh Gopal <vinodh.gopal@intel.com>
8 * Erdinc Ozturk <erdinc.ozturk@intel.com>
9 * Jim Kukunas <james.t.kukunas@linux.intel.com>
10 *
11 * Portions are Copyright (C) 2016 12Sided Technology, LLC.
12 * Author:
13 * Phil Vachon <pvachon@12sidedtech.com>
14 *
15 * For conditions of distribution and use, see copyright notice in zlib.h
16 */
17
18 #include "../../zbuild.h"
19 #include "../../zutil.h"
20
21 #ifdef X86_SSE42_CMP_STR
22
23 #include <immintrin.h>
24 #ifdef _MSC_VER
25 # include <nmmintrin.h>
26 #endif
27
28 /* UNALIGNED_OK, SSE4.2 intrinsic comparison */
compare256_unaligned_sse4_static(const unsigned char * src0,const unsigned char * src1)29 static inline uint32_t compare256_unaligned_sse4_static(const unsigned char *src0, const unsigned char *src1) {
30 uint32_t len = 0;
31
32 do {
33 #define mode _SIDD_UBYTE_OPS | _SIDD_CMP_EQUAL_EACH | _SIDD_NEGATIVE_POLARITY
34 __m128i xmm_src0, xmm_src1;
35 uint32_t ret;
36
37 xmm_src0 = _mm_loadu_si128((__m128i *)src0);
38 xmm_src1 = _mm_loadu_si128((__m128i *)src1);
39 ret = (uint32_t)_mm_cmpestri(xmm_src0, 16, xmm_src1, 16, mode);
40 if (_mm_cmpestrc(xmm_src0, 16, xmm_src1, 16, mode)) {
41 return len + ret;
42 }
43 src0 += 16, src1 += 16, len += 16;
44
45 xmm_src0 = _mm_loadu_si128((__m128i *)src0);
46 xmm_src1 = _mm_loadu_si128((__m128i *)src1);
47 ret = (uint32_t)_mm_cmpestri(xmm_src0, 16, xmm_src1, 16, mode);
48 if (_mm_cmpestrc(xmm_src0, 16, xmm_src1, 16, mode)) {
49 return len + ret;
50 }
51 src0 += 16, src1 += 16, len += 16;
52 } while (len < 256);
53
54 return 256;
55 }
56
compare258_unaligned_sse4_static(const unsigned char * src0,const unsigned char * src1)57 static inline uint32_t compare258_unaligned_sse4_static(const unsigned char *src0, const unsigned char *src1) {
58 if (*(uint16_t *)src0 != *(uint16_t *)src1)
59 return (*src0 == *src1);
60
61 return compare256_unaligned_sse4_static(src0+2, src1+2) + 2;
62 }
63
compare258_unaligned_sse4(const unsigned char * src0,const unsigned char * src1)64 Z_INTERNAL uint32_t compare258_unaligned_sse4(const unsigned char *src0, const unsigned char *src1) {
65 return compare258_unaligned_sse4_static(src0, src1);
66 }
67
68 #define LONGEST_MATCH longest_match_unaligned_sse4
69 #define COMPARE256 compare256_unaligned_sse4_static
70 #define COMPARE258 compare258_unaligned_sse4_static
71
72 #include "match_tpl.h"
73
74 #endif
75