/external/XNNPACK/src/qs8-gavgpool/gen/ |
D | 7p7x-minmax-sse2-c24-acc2.c | 73 …const __m128i vxi0x01234567 = _mm_unpacklo_epi8(vi0x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse2_c24_acc2() 74 …const __m128i vxi0x89ABCDEF = _mm_unpacklo_epi8(vi0x89ABCDEF, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse2_c24_acc2() 75 …const __m128i vxi0xGHIJKLMN = _mm_unpacklo_epi8(vi0xGHIJKLMN, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse2_c24_acc2() 76 …const __m128i vxi1x01234567 = _mm_unpacklo_epi8(vi1x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse2_c24_acc2() 77 …const __m128i vxi1x89ABCDEF = _mm_unpacklo_epi8(vi1x89ABCDEF, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse2_c24_acc2() 78 …const __m128i vxi1xGHIJKLMN = _mm_unpacklo_epi8(vi1xGHIJKLMN, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse2_c24_acc2() 79 …const __m128i vxi2x01234567 = _mm_unpacklo_epi8(vi2x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse2_c24_acc2() 80 …const __m128i vxi2x89ABCDEF = _mm_unpacklo_epi8(vi2x89ABCDEF, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse2_c24_acc2() 81 …const __m128i vxi2xGHIJKLMN = _mm_unpacklo_epi8(vi2xGHIJKLMN, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse2_c24_acc2() 82 …const __m128i vxi3x01234567 = _mm_unpacklo_epi8(vi3x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse2_c24_acc2() [all …]
|
D | 7p7x-minmax-ssse3-c24-acc2.c | 73 …const __m128i vxi0x01234567 = _mm_unpacklo_epi8(vi0x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2() 74 …const __m128i vxi0x89ABCDEF = _mm_unpacklo_epi8(vi0x89ABCDEF, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2() 75 …const __m128i vxi0xGHIJKLMN = _mm_unpacklo_epi8(vi0xGHIJKLMN, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2() 76 …const __m128i vxi1x01234567 = _mm_unpacklo_epi8(vi1x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2() 77 …const __m128i vxi1x89ABCDEF = _mm_unpacklo_epi8(vi1x89ABCDEF, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2() 78 …const __m128i vxi1xGHIJKLMN = _mm_unpacklo_epi8(vi1xGHIJKLMN, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2() 79 …const __m128i vxi2x01234567 = _mm_unpacklo_epi8(vi2x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2() 80 …const __m128i vxi2x89ABCDEF = _mm_unpacklo_epi8(vi2x89ABCDEF, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2() 81 …const __m128i vxi2xGHIJKLMN = _mm_unpacklo_epi8(vi2xGHIJKLMN, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2() 82 …const __m128i vxi3x01234567 = _mm_unpacklo_epi8(vi3x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2() [all …]
|
D | 7p7x-minmax-sse2-c16-acc2.c | 66 …const __m128i vxi0x01234567 = _mm_unpacklo_epi8(vi0x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse2_c16_acc2() 67 …const __m128i vxi0x89ABCDEF = _mm_unpacklo_epi8(vi0x89ABCDEF, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse2_c16_acc2() 68 …const __m128i vxi1x01234567 = _mm_unpacklo_epi8(vi1x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse2_c16_acc2() 69 …const __m128i vxi1x89ABCDEF = _mm_unpacklo_epi8(vi1x89ABCDEF, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse2_c16_acc2() 70 …const __m128i vxi2x01234567 = _mm_unpacklo_epi8(vi2x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse2_c16_acc2() 71 …const __m128i vxi2x89ABCDEF = _mm_unpacklo_epi8(vi2x89ABCDEF, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse2_c16_acc2() 72 …const __m128i vxi3x01234567 = _mm_unpacklo_epi8(vi3x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse2_c16_acc2() 73 …const __m128i vxi3x89ABCDEF = _mm_unpacklo_epi8(vi3x89ABCDEF, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse2_c16_acc2() 74 …const __m128i vxi4x01234567 = _mm_unpacklo_epi8(vi4x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse2_c16_acc2() 75 …const __m128i vxi4x89ABCDEF = _mm_unpacklo_epi8(vi4x89ABCDEF, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse2_c16_acc2() [all …]
|
D | 7p7x-minmax-ssse3-c16-acc2.c | 66 …const __m128i vxi0x01234567 = _mm_unpacklo_epi8(vi0x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c16_acc2() 67 …const __m128i vxi0x89ABCDEF = _mm_unpacklo_epi8(vi0x89ABCDEF, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c16_acc2() 68 …const __m128i vxi1x01234567 = _mm_unpacklo_epi8(vi1x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c16_acc2() 69 …const __m128i vxi1x89ABCDEF = _mm_unpacklo_epi8(vi1x89ABCDEF, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c16_acc2() 70 …const __m128i vxi2x01234567 = _mm_unpacklo_epi8(vi2x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c16_acc2() 71 …const __m128i vxi2x89ABCDEF = _mm_unpacklo_epi8(vi2x89ABCDEF, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c16_acc2() 72 …const __m128i vxi3x01234567 = _mm_unpacklo_epi8(vi3x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c16_acc2() 73 …const __m128i vxi3x89ABCDEF = _mm_unpacklo_epi8(vi3x89ABCDEF, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c16_acc2() 74 …const __m128i vxi4x01234567 = _mm_unpacklo_epi8(vi4x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c16_acc2() 75 …const __m128i vxi4x89ABCDEF = _mm_unpacklo_epi8(vi4x89ABCDEF, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c16_acc2() [all …]
|
D | 7x-minmax-sse2-c24-acc2.c | 90 …const __m128i vxi0x01234567 = _mm_unpacklo_epi8(vi0x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_gavgpool_minmax_ukernel_7x__sse2_c24_acc2() 91 …const __m128i vxi0x89ABCDEF = _mm_unpacklo_epi8(vi0x89ABCDEF, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_gavgpool_minmax_ukernel_7x__sse2_c24_acc2() 92 …const __m128i vxi0xGHIJKLMN = _mm_unpacklo_epi8(vi0xGHIJKLMN, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_gavgpool_minmax_ukernel_7x__sse2_c24_acc2() 93 …const __m128i vxi1x01234567 = _mm_unpacklo_epi8(vi1x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_gavgpool_minmax_ukernel_7x__sse2_c24_acc2() 94 …const __m128i vxi1x89ABCDEF = _mm_unpacklo_epi8(vi1x89ABCDEF, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_gavgpool_minmax_ukernel_7x__sse2_c24_acc2() 95 …const __m128i vxi1xGHIJKLMN = _mm_unpacklo_epi8(vi1xGHIJKLMN, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_gavgpool_minmax_ukernel_7x__sse2_c24_acc2() 96 …const __m128i vxi2x01234567 = _mm_unpacklo_epi8(vi2x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_gavgpool_minmax_ukernel_7x__sse2_c24_acc2() 97 …const __m128i vxi2x89ABCDEF = _mm_unpacklo_epi8(vi2x89ABCDEF, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_gavgpool_minmax_ukernel_7x__sse2_c24_acc2() 98 …const __m128i vxi2xGHIJKLMN = _mm_unpacklo_epi8(vi2xGHIJKLMN, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_gavgpool_minmax_ukernel_7x__sse2_c24_acc2() 99 …const __m128i vxi3x01234567 = _mm_unpacklo_epi8(vi3x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_gavgpool_minmax_ukernel_7x__sse2_c24_acc2() [all …]
|
D | 7p7x-minmax-sse2-c8-acc2.c | 59 …const __m128i vxi0x01234567 = _mm_unpacklo_epi8(vi0x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse2_c8_acc2() 60 …const __m128i vxi1x01234567 = _mm_unpacklo_epi8(vi1x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse2_c8_acc2() 61 …const __m128i vxi2x01234567 = _mm_unpacklo_epi8(vi2x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse2_c8_acc2() 62 …const __m128i vxi3x01234567 = _mm_unpacklo_epi8(vi3x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse2_c8_acc2() 63 …const __m128i vxi4x01234567 = _mm_unpacklo_epi8(vi4x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse2_c8_acc2() 64 …const __m128i vxi5x01234567 = _mm_unpacklo_epi8(vi5x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse2_c8_acc2() 65 …const __m128i vxi6x01234567 = _mm_unpacklo_epi8(vi6x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse2_c8_acc2() 77 const __m128i vsgnacc0x01234567 = _mm_cmpgt_epi16(_mm_setzero_si128(), vacc0x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse2_c8_acc2() 113 …128i vxi0x01234567 = _mm_unpacklo_epi8(vi0x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), vi0x01234… in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse2_c8_acc2() 114 …128i vxi1x01234567 = _mm_unpacklo_epi8(vi1x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), vi1x01234… in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse2_c8_acc2() [all …]
|
D | 7p7x-minmax-ssse3-c8-acc2.c | 59 …const __m128i vxi0x01234567 = _mm_unpacklo_epi8(vi0x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c8_acc2() 60 …const __m128i vxi1x01234567 = _mm_unpacklo_epi8(vi1x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c8_acc2() 61 …const __m128i vxi2x01234567 = _mm_unpacklo_epi8(vi2x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c8_acc2() 62 …const __m128i vxi3x01234567 = _mm_unpacklo_epi8(vi3x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c8_acc2() 63 …const __m128i vxi4x01234567 = _mm_unpacklo_epi8(vi4x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c8_acc2() 64 …const __m128i vxi5x01234567 = _mm_unpacklo_epi8(vi5x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c8_acc2() 65 …const __m128i vxi6x01234567 = _mm_unpacklo_epi8(vi6x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c8_acc2() 77 const __m128i vsgnacc0x01234567 = _mm_cmpgt_epi16(_mm_setzero_si128(), vacc0x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c8_acc2() 113 …128i vxi0x01234567 = _mm_unpacklo_epi8(vi0x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), vi0x01234… in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c8_acc2() 114 …128i vxi1x01234567 = _mm_unpacklo_epi8(vi1x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), vi1x01234… in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c8_acc2() [all …]
|
D | 7x-minmax-sse2-c16-acc2.c | 83 …const __m128i vxi0x01234567 = _mm_unpacklo_epi8(vi0x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_gavgpool_minmax_ukernel_7x__sse2_c16_acc2() 84 …const __m128i vxi0x89ABCDEF = _mm_unpacklo_epi8(vi0x89ABCDEF, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_gavgpool_minmax_ukernel_7x__sse2_c16_acc2() 85 …const __m128i vxi1x01234567 = _mm_unpacklo_epi8(vi1x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_gavgpool_minmax_ukernel_7x__sse2_c16_acc2() 86 …const __m128i vxi1x89ABCDEF = _mm_unpacklo_epi8(vi1x89ABCDEF, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_gavgpool_minmax_ukernel_7x__sse2_c16_acc2() 87 …const __m128i vxi2x01234567 = _mm_unpacklo_epi8(vi2x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_gavgpool_minmax_ukernel_7x__sse2_c16_acc2() 88 …const __m128i vxi2x89ABCDEF = _mm_unpacklo_epi8(vi2x89ABCDEF, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_gavgpool_minmax_ukernel_7x__sse2_c16_acc2() 89 …const __m128i vxi3x01234567 = _mm_unpacklo_epi8(vi3x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_gavgpool_minmax_ukernel_7x__sse2_c16_acc2() 90 …const __m128i vxi3x89ABCDEF = _mm_unpacklo_epi8(vi3x89ABCDEF, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_gavgpool_minmax_ukernel_7x__sse2_c16_acc2() 91 …const __m128i vxi4x01234567 = _mm_unpacklo_epi8(vi4x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_gavgpool_minmax_ukernel_7x__sse2_c16_acc2() 92 …const __m128i vxi4x89ABCDEF = _mm_unpacklo_epi8(vi4x89ABCDEF, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_gavgpool_minmax_ukernel_7x__sse2_c16_acc2() [all …]
|
D | 7x-minmax-ssse3-c24-acc2.c | 90 …const __m128i vxi0x01234567 = _mm_unpacklo_epi8(vi0x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_gavgpool_minmax_ukernel_7x__ssse3_c24_acc2() 91 …const __m128i vxi0x89ABCDEF = _mm_unpacklo_epi8(vi0x89ABCDEF, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_gavgpool_minmax_ukernel_7x__ssse3_c24_acc2() 92 …const __m128i vxi0xGHIJKLMN = _mm_unpacklo_epi8(vi0xGHIJKLMN, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_gavgpool_minmax_ukernel_7x__ssse3_c24_acc2() 93 …const __m128i vxi1x01234567 = _mm_unpacklo_epi8(vi1x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_gavgpool_minmax_ukernel_7x__ssse3_c24_acc2() 94 …const __m128i vxi1x89ABCDEF = _mm_unpacklo_epi8(vi1x89ABCDEF, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_gavgpool_minmax_ukernel_7x__ssse3_c24_acc2() 95 …const __m128i vxi1xGHIJKLMN = _mm_unpacklo_epi8(vi1xGHIJKLMN, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_gavgpool_minmax_ukernel_7x__ssse3_c24_acc2() 96 …const __m128i vxi2x01234567 = _mm_unpacklo_epi8(vi2x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_gavgpool_minmax_ukernel_7x__ssse3_c24_acc2() 97 …const __m128i vxi2x89ABCDEF = _mm_unpacklo_epi8(vi2x89ABCDEF, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_gavgpool_minmax_ukernel_7x__ssse3_c24_acc2() 98 …const __m128i vxi2xGHIJKLMN = _mm_unpacklo_epi8(vi2xGHIJKLMN, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_gavgpool_minmax_ukernel_7x__ssse3_c24_acc2() 99 …const __m128i vxi3x01234567 = _mm_unpacklo_epi8(vi3x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_gavgpool_minmax_ukernel_7x__ssse3_c24_acc2() [all …]
|
D | 7x-minmax-sse2-c8-acc2.c | 76 …const __m128i vxi0x01234567 = _mm_unpacklo_epi8(vi0x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_gavgpool_minmax_ukernel_7x__sse2_c8_acc2() 77 …const __m128i vxi1x01234567 = _mm_unpacklo_epi8(vi1x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_gavgpool_minmax_ukernel_7x__sse2_c8_acc2() 78 …const __m128i vxi2x01234567 = _mm_unpacklo_epi8(vi2x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_gavgpool_minmax_ukernel_7x__sse2_c8_acc2() 79 …const __m128i vxi3x01234567 = _mm_unpacklo_epi8(vi3x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_gavgpool_minmax_ukernel_7x__sse2_c8_acc2() 80 …const __m128i vxi4x01234567 = _mm_unpacklo_epi8(vi4x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_gavgpool_minmax_ukernel_7x__sse2_c8_acc2() 81 …const __m128i vxi5x01234567 = _mm_unpacklo_epi8(vi5x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_gavgpool_minmax_ukernel_7x__sse2_c8_acc2() 82 …const __m128i vxi6x01234567 = _mm_unpacklo_epi8(vi6x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_gavgpool_minmax_ukernel_7x__sse2_c8_acc2() 94 const __m128i vsgnacc0x01234567 = _mm_cmpgt_epi16(_mm_setzero_si128(), vacc0x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse2_c8_acc2() 98 const __m128i vsgnacc0123 = _mm_cmpgt_epi32(_mm_setzero_si128(), vacc0123); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse2_c8_acc2() 99 const __m128i vsgnacc4567 = _mm_cmpgt_epi32(_mm_setzero_si128(), vacc4567); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse2_c8_acc2() [all …]
|
D | 7x-minmax-ssse3-c16-acc2.c | 83 …const __m128i vxi0x01234567 = _mm_unpacklo_epi8(vi0x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_gavgpool_minmax_ukernel_7x__ssse3_c16_acc2() 84 …const __m128i vxi0x89ABCDEF = _mm_unpacklo_epi8(vi0x89ABCDEF, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_gavgpool_minmax_ukernel_7x__ssse3_c16_acc2() 85 …const __m128i vxi1x01234567 = _mm_unpacklo_epi8(vi1x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_gavgpool_minmax_ukernel_7x__ssse3_c16_acc2() 86 …const __m128i vxi1x89ABCDEF = _mm_unpacklo_epi8(vi1x89ABCDEF, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_gavgpool_minmax_ukernel_7x__ssse3_c16_acc2() 87 …const __m128i vxi2x01234567 = _mm_unpacklo_epi8(vi2x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_gavgpool_minmax_ukernel_7x__ssse3_c16_acc2() 88 …const __m128i vxi2x89ABCDEF = _mm_unpacklo_epi8(vi2x89ABCDEF, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_gavgpool_minmax_ukernel_7x__ssse3_c16_acc2() 89 …const __m128i vxi3x01234567 = _mm_unpacklo_epi8(vi3x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_gavgpool_minmax_ukernel_7x__ssse3_c16_acc2() 90 …const __m128i vxi3x89ABCDEF = _mm_unpacklo_epi8(vi3x89ABCDEF, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_gavgpool_minmax_ukernel_7x__ssse3_c16_acc2() 91 …const __m128i vxi4x01234567 = _mm_unpacklo_epi8(vi4x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_gavgpool_minmax_ukernel_7x__ssse3_c16_acc2() 92 …const __m128i vxi4x89ABCDEF = _mm_unpacklo_epi8(vi4x89ABCDEF, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_gavgpool_minmax_ukernel_7x__ssse3_c16_acc2() [all …]
|
/external/libaom/libaom/aom_dsp/x86/ |
D | aom_subpixel_8t_intrin_sse2.c | 48 __m128i ss_1_1 = _mm_unpacklo_epi8(ss_2, _mm_setzero_si128()); in aom_filter_block1d16_h4_sse2() 49 __m128i ss_2_1 = _mm_unpacklo_epi8(ss_4, _mm_setzero_si128()); in aom_filter_block1d16_h4_sse2() 56 __m128i ss_1_2 = _mm_unpacklo_epi8(ss_1, _mm_setzero_si128()); in aom_filter_block1d16_h4_sse2() 57 __m128i ss_2_2 = _mm_unpacklo_epi8(ss_3, _mm_setzero_si128()); in aom_filter_block1d16_h4_sse2() 72 ss_1_1 = _mm_unpacklo_epi8(ss_2, _mm_setzero_si128()); in aom_filter_block1d16_h4_sse2() 73 ss_2_1 = _mm_unpacklo_epi8(ss_4, _mm_setzero_si128()); in aom_filter_block1d16_h4_sse2() 80 ss_1_2 = _mm_unpacklo_epi8(ss_1, _mm_setzero_si128()); in aom_filter_block1d16_h4_sse2() 81 ss_2_2 = _mm_unpacklo_epi8(ss_3, _mm_setzero_si128()); in aom_filter_block1d16_h4_sse2() 145 __m128i resReg23_lo_1 = _mm_unpacklo_epi8(srcReg23_lo, _mm_setzero_si128()); in aom_filter_block1d16_v4_sse2() 146 __m128i resReg23_lo_2 = _mm_unpackhi_epi8(srcReg23_lo, _mm_setzero_si128()); in aom_filter_block1d16_v4_sse2() [all …]
|
D | convolve_sse2.h | 51 ss[0] = _mm_unpacklo_epi8(s[0], _mm_setzero_si128()); in convolve_lo_x() 52 ss[1] = _mm_unpacklo_epi8(s[1], _mm_setzero_si128()); in convolve_lo_x() 53 ss[2] = _mm_unpacklo_epi8(s[2], _mm_setzero_si128()); in convolve_lo_x() 54 ss[3] = _mm_unpacklo_epi8(s[3], _mm_setzero_si128()); in convolve_lo_x() 61 ss[0] = _mm_unpacklo_epi8(s[0], _mm_setzero_si128()); in convolve_lo_y() 62 ss[1] = _mm_unpacklo_epi8(s[2], _mm_setzero_si128()); in convolve_lo_y() 63 ss[2] = _mm_unpacklo_epi8(s[4], _mm_setzero_si128()); in convolve_lo_y() 64 ss[3] = _mm_unpacklo_epi8(s[6], _mm_setzero_si128()); in convolve_lo_y() 71 ss[0] = _mm_unpackhi_epi8(s[0], _mm_setzero_si128()); in convolve_hi_y() 72 ss[1] = _mm_unpackhi_epi8(s[2], _mm_setzero_si128()); in convolve_hi_y() [all …]
|
/external/XNNPACK/src/qs8-dwconv/gen/ |
D | up16x9-minmax-ssse3-mul16.c | 95 …128i vxi0x01234567 = _mm_unpacklo_epi8(vi0x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), vi0x01234… in xnn_qs8_dwconv_minmax_ukernel_up16x9__ssse3_mul16() 96 …128i vxk0x01234567 = _mm_unpacklo_epi8(vk0x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), vk0x01234… in xnn_qs8_dwconv_minmax_ukernel_up16x9__ssse3_mul16() 97 …128i vxi0x89ABCDEF = _mm_unpacklo_epi8(vi0x89ABCDEF, _mm_cmpgt_epi8(_mm_setzero_si128(), vi0x89ABC… in xnn_qs8_dwconv_minmax_ukernel_up16x9__ssse3_mul16() 98 …128i vxk0x89ABCDEF = _mm_unpacklo_epi8(vk0x89ABCDEF, _mm_cmpgt_epi8(_mm_setzero_si128(), vk0x89ABC… in xnn_qs8_dwconv_minmax_ukernel_up16x9__ssse3_mul16() 116 …128i vxi1x01234567 = _mm_unpacklo_epi8(vi1x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), vi1x01234… in xnn_qs8_dwconv_minmax_ukernel_up16x9__ssse3_mul16() 117 …128i vxk1x01234567 = _mm_unpacklo_epi8(vk1x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), vk1x01234… in xnn_qs8_dwconv_minmax_ukernel_up16x9__ssse3_mul16() 118 …128i vxi1x89ABCDEF = _mm_unpacklo_epi8(vi1x89ABCDEF, _mm_cmpgt_epi8(_mm_setzero_si128(), vi1x89ABC… in xnn_qs8_dwconv_minmax_ukernel_up16x9__ssse3_mul16() 119 …128i vxk1x89ABCDEF = _mm_unpacklo_epi8(vk1x89ABCDEF, _mm_cmpgt_epi8(_mm_setzero_si128(), vk1x89ABC… in xnn_qs8_dwconv_minmax_ukernel_up16x9__ssse3_mul16() 137 …128i vxi2x01234567 = _mm_unpacklo_epi8(vi2x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), vi2x01234… in xnn_qs8_dwconv_minmax_ukernel_up16x9__ssse3_mul16() 138 …128i vxk2x01234567 = _mm_unpacklo_epi8(vk2x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), vk2x01234… in xnn_qs8_dwconv_minmax_ukernel_up16x9__ssse3_mul16() [all …]
|
D | up16x9-minmax-sse2-mul16.c | 95 …128i vxi0x01234567 = _mm_unpacklo_epi8(vi0x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), vi0x01234… in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse2_mul16() 96 …128i vxk0x01234567 = _mm_unpacklo_epi8(vk0x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), vk0x01234… in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse2_mul16() 97 …128i vxi0x89ABCDEF = _mm_unpacklo_epi8(vi0x89ABCDEF, _mm_cmpgt_epi8(_mm_setzero_si128(), vi0x89ABC… in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse2_mul16() 98 …128i vxk0x89ABCDEF = _mm_unpacklo_epi8(vk0x89ABCDEF, _mm_cmpgt_epi8(_mm_setzero_si128(), vk0x89ABC… in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse2_mul16() 116 …128i vxi1x01234567 = _mm_unpacklo_epi8(vi1x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), vi1x01234… in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse2_mul16() 117 …128i vxk1x01234567 = _mm_unpacklo_epi8(vk1x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), vk1x01234… in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse2_mul16() 118 …128i vxi1x89ABCDEF = _mm_unpacklo_epi8(vi1x89ABCDEF, _mm_cmpgt_epi8(_mm_setzero_si128(), vi1x89ABC… in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse2_mul16() 119 …128i vxk1x89ABCDEF = _mm_unpacklo_epi8(vk1x89ABCDEF, _mm_cmpgt_epi8(_mm_setzero_si128(), vk1x89ABC… in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse2_mul16() 137 …128i vxi2x01234567 = _mm_unpacklo_epi8(vi2x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), vi2x01234… in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse2_mul16() 138 …128i vxk2x01234567 = _mm_unpacklo_epi8(vk2x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), vk2x01234… in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse2_mul16() [all …]
|
D | up24x9-minmax-ssse3-mul16.c | 99 …128i vxi0x01234567 = _mm_unpacklo_epi8(vi0x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), vi0x01234… in xnn_qs8_dwconv_minmax_ukernel_up24x9__ssse3_mul16() 100 …128i vxk0x01234567 = _mm_unpacklo_epi8(vk0x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), vk0x01234… in xnn_qs8_dwconv_minmax_ukernel_up24x9__ssse3_mul16() 101 …128i vxi0x89ABCDEF = _mm_unpacklo_epi8(vi0x89ABCDEF, _mm_cmpgt_epi8(_mm_setzero_si128(), vi0x89ABC… in xnn_qs8_dwconv_minmax_ukernel_up24x9__ssse3_mul16() 102 …128i vxk0x89ABCDEF = _mm_unpacklo_epi8(vk0x89ABCDEF, _mm_cmpgt_epi8(_mm_setzero_si128(), vk0x89ABC… in xnn_qs8_dwconv_minmax_ukernel_up24x9__ssse3_mul16() 103 …128i vxi0xGHIJKLMN = _mm_unpacklo_epi8(vi0xGHIJKLMN, _mm_cmpgt_epi8(_mm_setzero_si128(), vi0xGHIJK… in xnn_qs8_dwconv_minmax_ukernel_up24x9__ssse3_mul16() 104 …128i vxk0xGHIJKLMN = _mm_unpacklo_epi8(vk0xGHIJKLMN, _mm_cmpgt_epi8(_mm_setzero_si128(), vk0xGHIJK… in xnn_qs8_dwconv_minmax_ukernel_up24x9__ssse3_mul16() 128 …128i vxi1x01234567 = _mm_unpacklo_epi8(vi1x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), vi1x01234… in xnn_qs8_dwconv_minmax_ukernel_up24x9__ssse3_mul16() 129 …128i vxk1x01234567 = _mm_unpacklo_epi8(vk1x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), vk1x01234… in xnn_qs8_dwconv_minmax_ukernel_up24x9__ssse3_mul16() 130 …128i vxi1x89ABCDEF = _mm_unpacklo_epi8(vi1x89ABCDEF, _mm_cmpgt_epi8(_mm_setzero_si128(), vi1x89ABC… in xnn_qs8_dwconv_minmax_ukernel_up24x9__ssse3_mul16() 131 …128i vxk1x89ABCDEF = _mm_unpacklo_epi8(vk1x89ABCDEF, _mm_cmpgt_epi8(_mm_setzero_si128(), vk1x89ABC… in xnn_qs8_dwconv_minmax_ukernel_up24x9__ssse3_mul16() [all …]
|
D | up24x9-minmax-sse2-mul16.c | 99 …128i vxi0x01234567 = _mm_unpacklo_epi8(vi0x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), vi0x01234… in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse2_mul16() 100 …128i vxk0x01234567 = _mm_unpacklo_epi8(vk0x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), vk0x01234… in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse2_mul16() 101 …128i vxi0x89ABCDEF = _mm_unpacklo_epi8(vi0x89ABCDEF, _mm_cmpgt_epi8(_mm_setzero_si128(), vi0x89ABC… in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse2_mul16() 102 …128i vxk0x89ABCDEF = _mm_unpacklo_epi8(vk0x89ABCDEF, _mm_cmpgt_epi8(_mm_setzero_si128(), vk0x89ABC… in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse2_mul16() 103 …128i vxi0xGHIJKLMN = _mm_unpacklo_epi8(vi0xGHIJKLMN, _mm_cmpgt_epi8(_mm_setzero_si128(), vi0xGHIJK… in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse2_mul16() 104 …128i vxk0xGHIJKLMN = _mm_unpacklo_epi8(vk0xGHIJKLMN, _mm_cmpgt_epi8(_mm_setzero_si128(), vk0xGHIJK… in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse2_mul16() 128 …128i vxi1x01234567 = _mm_unpacklo_epi8(vi1x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), vi1x01234… in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse2_mul16() 129 …128i vxk1x01234567 = _mm_unpacklo_epi8(vk1x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), vk1x01234… in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse2_mul16() 130 …128i vxi1x89ABCDEF = _mm_unpacklo_epi8(vi1x89ABCDEF, _mm_cmpgt_epi8(_mm_setzero_si128(), vi1x89ABC… in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse2_mul16() 131 …128i vxk1x89ABCDEF = _mm_unpacklo_epi8(vk1x89ABCDEF, _mm_cmpgt_epi8(_mm_setzero_si128(), vk1x89ABC… in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse2_mul16() [all …]
|
D | up8x9-minmax-sse2-mul16.c | 91 …128i vxi0x01234567 = _mm_unpacklo_epi8(vi0x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), vi0x01234… in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse2_mul16() 92 …128i vxk0x01234567 = _mm_unpacklo_epi8(vk0x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), vk0x01234… in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse2_mul16() 104 …128i vxi1x01234567 = _mm_unpacklo_epi8(vi1x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), vi1x01234… in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse2_mul16() 105 …128i vxk1x01234567 = _mm_unpacklo_epi8(vk1x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), vk1x01234… in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse2_mul16() 117 …128i vxi2x01234567 = _mm_unpacklo_epi8(vi2x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), vi2x01234… in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse2_mul16() 118 …128i vxk2x01234567 = _mm_unpacklo_epi8(vk2x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), vk2x01234… in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse2_mul16() 130 …128i vxi3x01234567 = _mm_unpacklo_epi8(vi3x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), vi3x01234… in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse2_mul16() 131 …128i vxk3x01234567 = _mm_unpacklo_epi8(vk3x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), vk3x01234… in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse2_mul16() 143 …128i vxi4x01234567 = _mm_unpacklo_epi8(vi4x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), vi4x01234… in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse2_mul16() 144 …128i vxk4x01234567 = _mm_unpacklo_epi8(vk4x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), vk4x01234… in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse2_mul16() [all …]
|
D | up8x9-minmax-ssse3-mul16.c | 91 …128i vxi0x01234567 = _mm_unpacklo_epi8(vi0x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), vi0x01234… in xnn_qs8_dwconv_minmax_ukernel_up8x9__ssse3_mul16() 92 …128i vxk0x01234567 = _mm_unpacklo_epi8(vk0x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), vk0x01234… in xnn_qs8_dwconv_minmax_ukernel_up8x9__ssse3_mul16() 104 …128i vxi1x01234567 = _mm_unpacklo_epi8(vi1x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), vi1x01234… in xnn_qs8_dwconv_minmax_ukernel_up8x9__ssse3_mul16() 105 …128i vxk1x01234567 = _mm_unpacklo_epi8(vk1x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), vk1x01234… in xnn_qs8_dwconv_minmax_ukernel_up8x9__ssse3_mul16() 117 …128i vxi2x01234567 = _mm_unpacklo_epi8(vi2x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), vi2x01234… in xnn_qs8_dwconv_minmax_ukernel_up8x9__ssse3_mul16() 118 …128i vxk2x01234567 = _mm_unpacklo_epi8(vk2x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), vk2x01234… in xnn_qs8_dwconv_minmax_ukernel_up8x9__ssse3_mul16() 130 …128i vxi3x01234567 = _mm_unpacklo_epi8(vi3x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), vi3x01234… in xnn_qs8_dwconv_minmax_ukernel_up8x9__ssse3_mul16() 131 …128i vxk3x01234567 = _mm_unpacklo_epi8(vk3x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), vk3x01234… in xnn_qs8_dwconv_minmax_ukernel_up8x9__ssse3_mul16() 143 …128i vxi4x01234567 = _mm_unpacklo_epi8(vi4x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), vi4x01234… in xnn_qs8_dwconv_minmax_ukernel_up8x9__ssse3_mul16() 144 …128i vxk4x01234567 = _mm_unpacklo_epi8(vk4x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), vk4x01234… in xnn_qs8_dwconv_minmax_ukernel_up8x9__ssse3_mul16() [all …]
|
/external/libvpx/libvpx/vpx_dsp/x86/ |
D | vpx_subpixel_4t_intrin_sse2.c | 159 src_reg_m10_lo_1 = _mm_unpacklo_epi8(src_reg_m10_lo, _mm_setzero_si128()); in vpx_filter_block1d16_v4_sse2() 160 src_reg_m10_lo_2 = _mm_unpackhi_epi8(src_reg_m10_lo, _mm_setzero_si128()); in vpx_filter_block1d16_v4_sse2() 161 src_reg_m10_hi_1 = _mm_unpacklo_epi8(src_reg_m10_hi, _mm_setzero_si128()); in vpx_filter_block1d16_v4_sse2() 162 src_reg_m10_hi_2 = _mm_unpackhi_epi8(src_reg_m10_hi, _mm_setzero_si128()); in vpx_filter_block1d16_v4_sse2() 168 src_reg_01_lo_1 = _mm_unpacklo_epi8(src_reg_01_lo, _mm_setzero_si128()); in vpx_filter_block1d16_v4_sse2() 169 src_reg_01_lo_2 = _mm_unpackhi_epi8(src_reg_01_lo, _mm_setzero_si128()); in vpx_filter_block1d16_v4_sse2() 170 src_reg_01_hi_1 = _mm_unpacklo_epi8(src_reg_01_hi, _mm_setzero_si128()); in vpx_filter_block1d16_v4_sse2() 171 src_reg_01_hi_2 = _mm_unpackhi_epi8(src_reg_01_hi, _mm_setzero_si128()); in vpx_filter_block1d16_v4_sse2() 191 src_reg_12_lo_1 = _mm_unpacklo_epi8(src_reg_12_lo, _mm_setzero_si128()); in vpx_filter_block1d16_v4_sse2() 192 src_reg_12_lo_2 = _mm_unpackhi_epi8(src_reg_12_lo, _mm_setzero_si128()); in vpx_filter_block1d16_v4_sse2() [all …]
|
/external/XNNPACK/src/qs8-vadd/gen/ |
D | minmax-sse2-mul16-ld64-x32.c | 48 vx01234567 = _mm_unpacklo_epi8(vx01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), vx01234567)); in xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x32() 49 vy01234567 = _mm_unpacklo_epi8(vy01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), vy01234567)); in xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x32() 50 vx89ABCDEF = _mm_unpacklo_epi8(vx89ABCDEF, _mm_cmpgt_epi8(_mm_setzero_si128(), vx89ABCDEF)); in xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x32() 51 vy89ABCDEF = _mm_unpacklo_epi8(vy89ABCDEF, _mm_cmpgt_epi8(_mm_setzero_si128(), vy89ABCDEF)); in xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x32() 52 vxGHIJKLMN = _mm_unpacklo_epi8(vxGHIJKLMN, _mm_cmpgt_epi8(_mm_setzero_si128(), vxGHIJKLMN)); in xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x32() 53 vyGHIJKLMN = _mm_unpacklo_epi8(vyGHIJKLMN, _mm_cmpgt_epi8(_mm_setzero_si128(), vyGHIJKLMN)); in xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x32() 54 vxOPQRSTUV = _mm_unpacklo_epi8(vxOPQRSTUV, _mm_cmpgt_epi8(_mm_setzero_si128(), vxOPQRSTUV)); in xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x32() 55 vyOPQRSTUV = _mm_unpacklo_epi8(vyOPQRSTUV, _mm_cmpgt_epi8(_mm_setzero_si128(), vyOPQRSTUV)); in xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x32() 110 …add_epi32(_mm_and_si128(vacc0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vacc0123)… in xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x32() 111 …add_epi32(_mm_and_si128(vacc4567, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vacc4567)… in xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x32() [all …]
|
/external/XNNPACK/src/qs8-vaddc/gen/ |
D | minmax-sse2-mul16-ld64-x32.c | 43 vx01234567 = _mm_unpacklo_epi8(vx01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), vx01234567)); in xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x32() 44 vx89ABCDEF = _mm_unpacklo_epi8(vx89ABCDEF, _mm_cmpgt_epi8(_mm_setzero_si128(), vx89ABCDEF)); in xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x32() 45 vxGHIJKLMN = _mm_unpacklo_epi8(vxGHIJKLMN, _mm_cmpgt_epi8(_mm_setzero_si128(), vxGHIJKLMN)); in xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x32() 46 vxOPQRSTUV = _mm_unpacklo_epi8(vxOPQRSTUV, _mm_cmpgt_epi8(_mm_setzero_si128(), vxOPQRSTUV)); in xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x32() 76 …add_epi32(_mm_and_si128(vacc0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vacc0123)… in xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x32() 77 …add_epi32(_mm_and_si128(vacc4567, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vacc4567)… in xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x32() 78 …add_epi32(_mm_and_si128(vacc89AB, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vacc89AB)… in xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x32() 79 …add_epi32(_mm_and_si128(vaccCDEF, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vaccCDEF)… in xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x32() 80 …add_epi32(_mm_and_si128(vaccGHIJ, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vaccGHIJ)… in xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x32() 81 …add_epi32(_mm_and_si128(vaccKLMN, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vaccKLMN)… in xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x32() [all …]
|
/external/libaom/libaom/av1/encoder/x86/ |
D | wedge_utils_sse2.c | 36 __m128i v_acc0_q = _mm_setzero_si128(); in av1_wedge_sse_from_residuals_sse2() 55 const __m128i v_m0_w = _mm_unpacklo_epi8(v_m01_b, _mm_setzero_si128()); in av1_wedge_sse_from_residuals_sse2() 56 const __m128i v_m1_w = _mm_unpackhi_epi8(v_m01_b, _mm_setzero_si128()); in av1_wedge_sse_from_residuals_sse2() 105 __m128i v_acc0_d = _mm_setzero_si128(); in av1_wedge_sign_from_residuals_sse2() 106 __m128i v_acc1_d = _mm_setzero_si128(); in av1_wedge_sign_from_residuals_sse2() 130 const __m128i v_m0_w = _mm_unpacklo_epi8(v_m01_b, _mm_setzero_si128()); in av1_wedge_sign_from_residuals_sse2() 131 const __m128i v_m1_w = _mm_unpackhi_epi8(v_m01_b, _mm_setzero_si128()); in av1_wedge_sign_from_residuals_sse2() 132 const __m128i v_m2_w = _mm_unpacklo_epi8(v_m23_b, _mm_setzero_si128()); in av1_wedge_sign_from_residuals_sse2() 133 const __m128i v_m3_w = _mm_unpackhi_epi8(v_m23_b, _mm_setzero_si128()); in av1_wedge_sign_from_residuals_sse2() 134 const __m128i v_m4_w = _mm_unpacklo_epi8(v_m45_b, _mm_setzero_si128()); in av1_wedge_sign_from_residuals_sse2() [all …]
|
/external/XNNPACK/src/qs8-igemm/gen/ |
D | 4x4c2-minmax-ssse3-ld64.c | 88 const __m128i vxa0 = _mm_unpacklo_epi8(va0, _mm_cmpgt_epi8(_mm_setzero_si128(), va0)); in xnn_qs8_igemm_minmax_ukernel_4x4c2__ssse3_ld64() 91 const __m128i vxa1 = _mm_unpacklo_epi8(va1, _mm_cmpgt_epi8(_mm_setzero_si128(), va1)); in xnn_qs8_igemm_minmax_ukernel_4x4c2__ssse3_ld64() 94 const __m128i vxa2 = _mm_unpacklo_epi8(va2, _mm_cmpgt_epi8(_mm_setzero_si128(), va2)); in xnn_qs8_igemm_minmax_ukernel_4x4c2__ssse3_ld64() 97 const __m128i vxa3 = _mm_unpacklo_epi8(va3, _mm_cmpgt_epi8(_mm_setzero_si128(), va3)); in xnn_qs8_igemm_minmax_ukernel_4x4c2__ssse3_ld64() 101 const __m128i vxb0 = _mm_unpacklo_epi8(vb0, _mm_cmpgt_epi8(_mm_setzero_si128(), vb0)); in xnn_qs8_igemm_minmax_ukernel_4x4c2__ssse3_ld64() 112 const __m128i vxb1 = _mm_unpacklo_epi8(vb1, _mm_cmpgt_epi8(_mm_setzero_si128(), vb1)); in xnn_qs8_igemm_minmax_ukernel_4x4c2__ssse3_ld64() 123 const __m128i vxb2 = _mm_unpacklo_epi8(vb2, _mm_cmpgt_epi8(_mm_setzero_si128(), vb2)); in xnn_qs8_igemm_minmax_ukernel_4x4c2__ssse3_ld64() 134 const __m128i vxb3 = _mm_unpacklo_epi8(vb3, _mm_cmpgt_epi8(_mm_setzero_si128(), vb3)); in xnn_qs8_igemm_minmax_ukernel_4x4c2__ssse3_ld64() 150 const __m128i vxa0 = _mm_unpacklo_epi8(va0, _mm_cmpgt_epi8(_mm_setzero_si128(), va0)); in xnn_qs8_igemm_minmax_ukernel_4x4c2__ssse3_ld64() 153 const __m128i vxa1 = _mm_unpacklo_epi8(va1, _mm_cmpgt_epi8(_mm_setzero_si128(), va1)); in xnn_qs8_igemm_minmax_ukernel_4x4c2__ssse3_ld64() [all …]
|
/external/XNNPACK/src/qs8-gemm/gen/ |
D | 4x4c2-minmax-sse2-ld64.c | 71 const __m128i vxa0 = _mm_unpacklo_epi8(va0, _mm_cmpgt_epi8(_mm_setzero_si128(), va0)); in xnn_qs8_gemm_minmax_ukernel_4x4c2__sse2_ld64() 74 const __m128i vxa1 = _mm_unpacklo_epi8(va1, _mm_cmpgt_epi8(_mm_setzero_si128(), va1)); in xnn_qs8_gemm_minmax_ukernel_4x4c2__sse2_ld64() 77 const __m128i vxa2 = _mm_unpacklo_epi8(va2, _mm_cmpgt_epi8(_mm_setzero_si128(), va2)); in xnn_qs8_gemm_minmax_ukernel_4x4c2__sse2_ld64() 80 const __m128i vxa3 = _mm_unpacklo_epi8(va3, _mm_cmpgt_epi8(_mm_setzero_si128(), va3)); in xnn_qs8_gemm_minmax_ukernel_4x4c2__sse2_ld64() 84 const __m128i vxb0 = _mm_unpacklo_epi8(vb0, _mm_cmpgt_epi8(_mm_setzero_si128(), vb0)); in xnn_qs8_gemm_minmax_ukernel_4x4c2__sse2_ld64() 95 const __m128i vxb1 = _mm_unpacklo_epi8(vb1, _mm_cmpgt_epi8(_mm_setzero_si128(), vb1)); in xnn_qs8_gemm_minmax_ukernel_4x4c2__sse2_ld64() 106 const __m128i vxb2 = _mm_unpacklo_epi8(vb2, _mm_cmpgt_epi8(_mm_setzero_si128(), vb2)); in xnn_qs8_gemm_minmax_ukernel_4x4c2__sse2_ld64() 117 const __m128i vxb3 = _mm_unpacklo_epi8(vb3, _mm_cmpgt_epi8(_mm_setzero_si128(), vb3)); in xnn_qs8_gemm_minmax_ukernel_4x4c2__sse2_ld64() 133 const __m128i vxa0 = _mm_unpacklo_epi8(va0, _mm_cmpgt_epi8(_mm_setzero_si128(), va0)); in xnn_qs8_gemm_minmax_ukernel_4x4c2__sse2_ld64() 136 const __m128i vxa1 = _mm_unpacklo_epi8(va1, _mm_cmpgt_epi8(_mm_setzero_si128(), va1)); in xnn_qs8_gemm_minmax_ukernel_4x4c2__sse2_ld64() [all …]
|