Lines Matching +full:d3 +full:- +full:scale
3 // Use of this source code is governed by a BSD-style license
8 // -----------------------------------------------------------------------------
23 //------------------------------------------------------------------------------
30 // input: 8 bytes ABCDEFGH -> output: A0E0B0F0C0G0D0H0
39 // input: 8 bytes ABCDEFGH -> output: A0B0C0D0E0F0G0H0
48 rescaler_t* frow = wrk->frow; in RescalerImportRowExpand_SSE2()
49 const rescaler_t* const frow_end = frow + wrk->dst_width * wrk->num_channels; in RescalerImportRowExpand_SSE2()
50 const int x_add = wrk->x_add; in RescalerImportRowExpand_SSE2()
55 if (wrk->src_width < 8 || accum >= (1 << 15)) { in RescalerImportRowExpand_SSE2()
61 assert(wrk->x_expand); in RescalerImportRowExpand_SSE2()
62 if (wrk->num_channels == 4) { in RescalerImportRowExpand_SSE2()
66 const __m128i mult = _mm_set1_epi32(((x_add - accum) << 16) | accum); in RescalerImportRowExpand_SSE2()
71 accum -= wrk->x_sub; in RescalerImportRowExpand_SSE2()
80 const uint8_t* const src_limit = src + wrk->src_width - 8; in RescalerImportRowExpand_SSE2()
85 const __m128i mult = _mm_cvtsi32_si128(((x_add - accum) << 16) | accum); in RescalerImportRowExpand_SSE2()
91 accum -= wrk->x_sub; in RescalerImportRowExpand_SSE2()
93 if (--left) { in RescalerImportRowExpand_SSE2()
114 const int x_sub = wrk->x_sub; in RescalerImportRowShrink_SSE2()
118 const __m128i mult1 = _mm_set1_epi32(wrk->fx_scale); in RescalerImportRowShrink_SSE2()
121 rescaler_t* frow = wrk->frow; in RescalerImportRowShrink_SSE2()
122 const rescaler_t* const frow_end = wrk->frow + 4 * wrk->dst_width; in RescalerImportRowShrink_SSE2()
124 if (wrk->num_channels != 4 || wrk->x_add > (x_sub << 7)) { in RescalerImportRowShrink_SSE2()
129 assert(!wrk->x_expand); in RescalerImportRowShrink_SSE2()
133 accum += wrk->x_add; in RescalerImportRowShrink_SSE2()
141 accum -= x_sub; in RescalerImportRowShrink_SSE2()
144 const __m128i mult = _mm_set1_epi16(-accum); in RescalerImportRowShrink_SSE2()
145 const __m128i frac0 = _mm_mullo_epi16(base, mult); // 16b x 16b -> 32b in RescalerImportRowShrink_SSE2()
151 const __m128i frow_out = _mm_sub_epi32(B0, frac); // sum * x_sub - frac in RescalerImportRowShrink_SSE2()
153 const __m128i D1 = _mm_mul_epu32(frac, mult1); // 32b x 16b -> 64b in RescalerImportRowShrink_SSE2()
167 //------------------------------------------------------------------------------
214 _mm_and_si128(_mm_slli_epi64(C2, 32 - WEBP_RESCALER_RFIX), mask); in ProcessRow_SSE2()
215 const __m128i D3 = in ProcessRow_SSE2() local
216 _mm_and_si128(_mm_slli_epi64(C3, 32 - WEBP_RESCALER_RFIX), mask); in ProcessRow_SSE2()
219 const __m128i D3 = _mm_and_si128(C3, mask); in ProcessRow_SSE2() local
222 const __m128i E1 = _mm_or_si128(D1, D3); in ProcessRow_SSE2()
230 uint8_t* const dst = wrk->dst; in RescalerExportRowExpand_SSE2()
231 rescaler_t* const irow = wrk->irow; in RescalerExportRowExpand_SSE2()
232 const int x_out_max = wrk->dst_width * wrk->num_channels; in RescalerExportRowExpand_SSE2()
233 const rescaler_t* const frow = wrk->frow; in RescalerExportRowExpand_SSE2()
234 const __m128i mult = _mm_set_epi32(0, wrk->fy_scale, 0, wrk->fy_scale); in RescalerExportRowExpand_SSE2()
237 assert(wrk->y_accum <= 0 && wrk->y_sub + wrk->y_accum >= 0); in RescalerExportRowExpand_SSE2()
238 assert(wrk->y_expand); in RescalerExportRowExpand_SSE2()
239 if (wrk->y_accum == 0) { in RescalerExportRowExpand_SSE2()
247 const int v = (int)MULT_FIX(J, wrk->fy_scale); in RescalerExportRowExpand_SSE2()
251 const uint32_t B = WEBP_RESCALER_FRAC(-wrk->y_accum, wrk->y_sub); in RescalerExportRowExpand_SSE2()
252 const uint32_t A = (uint32_t)(WEBP_RESCALER_ONE - B); in RescalerExportRowExpand_SSE2()
268 const __m128i D3 = _mm_add_epi64(C3, rounder); in RescalerExportRowExpand_SSE2() local
272 const __m128i E3 = _mm_srli_epi64(D3, WEBP_RESCALER_RFIX); in RescalerExportRowExpand_SSE2()
280 const int v = (int)MULT_FIX(J, wrk->fy_scale); in RescalerExportRowExpand_SSE2()
288 uint8_t* const dst = wrk->dst; in RescalerExportRowShrink_SSE2()
289 rescaler_t* const irow = wrk->irow; in RescalerExportRowShrink_SSE2()
290 const int x_out_max = wrk->dst_width * wrk->num_channels; in RescalerExportRowShrink_SSE2()
291 const rescaler_t* const frow = wrk->frow; in RescalerExportRowShrink_SSE2()
292 const uint32_t yscale = wrk->fy_scale * (-wrk->y_accum); in RescalerExportRowShrink_SSE2()
294 assert(wrk->y_accum <= 0); in RescalerExportRowShrink_SSE2()
295 assert(!wrk->y_expand); in RescalerExportRowShrink_SSE2()
297 const int scale_xy = wrk->fxy_scale; in RescalerExportRowShrink_SSE2()
308 const __m128i D3 = _mm_srli_epi64(B3, WEBP_RESCALER_RFIX); in RescalerExportRowShrink_SSE2() local
309 const __m128i E0 = _mm_sub_epi64(A0, D0); // irow[x] - frac in RescalerExportRowShrink_SSE2()
312 const __m128i E3 = _mm_sub_epi64(A3, D3); in RescalerExportRowShrink_SSE2()
314 const __m128i F3 = _mm_slli_epi64(D3, 32); in RescalerExportRowShrink_SSE2()
324 const int v = (int)MULT_FIX(irow[x_out] - frac, wrk->fxy_scale); in RescalerExportRowShrink_SSE2()
329 const uint32_t scale = wrk->fxy_scale; in RescalerExportRowShrink_SSE2() local
330 const __m128i mult = _mm_set_epi32(0, scale, 0, scale); in RescalerExportRowShrink_SSE2()
340 const int v = (int)MULT_FIX(irow[x_out], scale); in RescalerExportRowShrink_SSE2()
351 //------------------------------------------------------------------------------