Lines Matching full:scale
145 // weight_h[1]: scale - weight_h vector
148 // weight_w[0]: weights_w and scale - weights_w interleave vector
152 const __m128i scale = _mm_set1_epi16(256); in LoadSmoothWeights4() local
155 weight_h[1] = _mm_sub_epi16(scale, weight_h[0]); in LoadSmoothWeights4()
161 weight_h[1] = _mm_sub_epi16(scale, weight_h[0]); in LoadSmoothWeights4()
166 weight_h[1] = _mm_sub_epi16(scale, weight_h[0]); in LoadSmoothWeights4()
168 weight_h[3] = _mm_sub_epi16(scale, weight_h[2]); in LoadSmoothWeights4()
216 const __m128i scale = _mm_set1_epi32(256); in Smooth4x4_SSE4_1() local
221 const __m128i inverted_weights = _mm_sub_epi32(scale, weights); in Smooth4x4_SSE4_1()
231 scaled_top_right, scale); in Smooth4x4_SSE4_1()
234 scaled_bottom_left, scaled_top_right, scale); in Smooth4x4_SSE4_1()
237 scaled_bottom_left, scaled_top_right, scale); in Smooth4x4_SSE4_1()
240 scaled_bottom_left, scaled_top_right, scale); in Smooth4x4_SSE4_1()
308 // weight_h[1]: scale - weight_h vector
315 // weight_w[0]: weights_w and scale - weights_w interleave vector, first half
316 // weight_w[1]: weights_w and scale - weights_w interleave vector, second half
550 __m128i scale = _mm_set1_epi32(256); in SmoothHorizontal4x4_SSE4_1() local
551 const __m128i inverted_weights = _mm_sub_epi32(scale, weights); in SmoothHorizontal4x4_SSE4_1()
553 scale = _mm_set1_epi32(128); in SmoothHorizontal4x4_SSE4_1()
555 WriteSmoothHorizontalSum4<0>(dst, left, weights, scaled_top_right, scale); in SmoothHorizontal4x4_SSE4_1()
557 WriteSmoothHorizontalSum4<0x55>(dst, left, weights, scaled_top_right, scale); in SmoothHorizontal4x4_SSE4_1()
559 WriteSmoothHorizontalSum4<0xAA>(dst, left, weights, scaled_top_right, scale); in SmoothHorizontal4x4_SSE4_1()
561 WriteSmoothHorizontalSum4<0xFF>(dst, left, weights, scaled_top_right, scale); in SmoothHorizontal4x4_SSE4_1()
571 __m128i scale = _mm_set1_epi32(256); in SmoothHorizontal4x8_SSE4_1() local
572 const __m128i inverted_weights = _mm_sub_epi32(scale, weights); in SmoothHorizontal4x8_SSE4_1()
574 scale = _mm_set1_epi32(128); in SmoothHorizontal4x8_SSE4_1()
578 WriteSmoothHorizontalSum4<0>(dst, left, weights, scaled_top_right, scale); in SmoothHorizontal4x8_SSE4_1()
580 WriteSmoothHorizontalSum4<0x55>(dst, left, weights, scaled_top_right, scale); in SmoothHorizontal4x8_SSE4_1()
582 WriteSmoothHorizontalSum4<0xAA>(dst, left, weights, scaled_top_right, scale); in SmoothHorizontal4x8_SSE4_1()
584 WriteSmoothHorizontalSum4<0xFF>(dst, left, weights, scaled_top_right, scale); in SmoothHorizontal4x8_SSE4_1()
588 WriteSmoothHorizontalSum4<0>(dst, left, weights, scaled_top_right, scale); in SmoothHorizontal4x8_SSE4_1()
590 WriteSmoothHorizontalSum4<0x55>(dst, left, weights, scaled_top_right, scale); in SmoothHorizontal4x8_SSE4_1()
592 WriteSmoothHorizontalSum4<0xAA>(dst, left, weights, scaled_top_right, scale); in SmoothHorizontal4x8_SSE4_1()
594 WriteSmoothHorizontalSum4<0xFF>(dst, left, weights, scaled_top_right, scale); in SmoothHorizontal4x8_SSE4_1()
604 __m128i scale = _mm_set1_epi32(256); in SmoothHorizontal4x16_SSE4_1() local
605 const __m128i inverted_weights = _mm_sub_epi32(scale, weights); in SmoothHorizontal4x16_SSE4_1()
607 scale = _mm_set1_epi32(128); in SmoothHorizontal4x16_SSE4_1()
611 WriteSmoothHorizontalSum4<0>(dst, left, weights, scaled_top_right, scale); in SmoothHorizontal4x16_SSE4_1()
613 WriteSmoothHorizontalSum4<0x55>(dst, left, weights, scaled_top_right, scale); in SmoothHorizontal4x16_SSE4_1()
615 WriteSmoothHorizontalSum4<0xAA>(dst, left, weights, scaled_top_right, scale); in SmoothHorizontal4x16_SSE4_1()
617 WriteSmoothHorizontalSum4<0xFF>(dst, left, weights, scaled_top_right, scale); in SmoothHorizontal4x16_SSE4_1()
621 WriteSmoothHorizontalSum4<0>(dst, left, weights, scaled_top_right, scale); in SmoothHorizontal4x16_SSE4_1()
623 WriteSmoothHorizontalSum4<0x55>(dst, left, weights, scaled_top_right, scale); in SmoothHorizontal4x16_SSE4_1()
625 WriteSmoothHorizontalSum4<0xAA>(dst, left, weights, scaled_top_right, scale); in SmoothHorizontal4x16_SSE4_1()
627 WriteSmoothHorizontalSum4<0xFF>(dst, left, weights, scaled_top_right, scale); in SmoothHorizontal4x16_SSE4_1()
631 WriteSmoothHorizontalSum4<0>(dst, left, weights, scaled_top_right, scale); in SmoothHorizontal4x16_SSE4_1()
633 WriteSmoothHorizontalSum4<0x55>(dst, left, weights, scaled_top_right, scale); in SmoothHorizontal4x16_SSE4_1()
635 WriteSmoothHorizontalSum4<0xAA>(dst, left, weights, scaled_top_right, scale); in SmoothHorizontal4x16_SSE4_1()
637 WriteSmoothHorizontalSum4<0xFF>(dst, left, weights, scaled_top_right, scale); in SmoothHorizontal4x16_SSE4_1()
641 WriteSmoothHorizontalSum4<0>(dst, left, weights, scaled_top_right, scale); in SmoothHorizontal4x16_SSE4_1()
643 WriteSmoothHorizontalSum4<0x55>(dst, left, weights, scaled_top_right, scale); in SmoothHorizontal4x16_SSE4_1()
645 WriteSmoothHorizontalSum4<0xAA>(dst, left, weights, scaled_top_right, scale); in SmoothHorizontal4x16_SSE4_1()
647 WriteSmoothHorizontalSum4<0xFF>(dst, left, weights, scaled_top_right, scale); in SmoothHorizontal4x16_SSE4_1()
658 __m128i scale = _mm_set1_epi16(256); in SmoothHorizontal8x4_SSE4_1() local
659 const __m128i inverted_weights = _mm_sub_epi16(scale, weights); in SmoothHorizontal8x4_SSE4_1()
661 scale = _mm_set1_epi16(128); in SmoothHorizontal8x4_SSE4_1()
665 WriteSmoothDirectionalSum8(dst, left_y, weights, scaled_top_right, scale); in SmoothHorizontal8x4_SSE4_1()
669 WriteSmoothDirectionalSum8(dst, left_y, weights, scaled_top_right, scale); in SmoothHorizontal8x4_SSE4_1()
673 WriteSmoothDirectionalSum8(dst, left_y, weights, scaled_top_right, scale); in SmoothHorizontal8x4_SSE4_1()
677 WriteSmoothDirectionalSum8(dst, left_y, weights, scaled_top_right, scale); in SmoothHorizontal8x4_SSE4_1()
688 __m128i scale = _mm_set1_epi16(256); in SmoothHorizontal8x8_SSE4_1() local
689 const __m128i inverted_weights = _mm_sub_epi16(scale, weights); in SmoothHorizontal8x8_SSE4_1()
691 scale = _mm_set1_epi16(128); in SmoothHorizontal8x8_SSE4_1()
696 WriteSmoothDirectionalSum8(dst, left_y, weights, scaled_top_right, scale); in SmoothHorizontal8x8_SSE4_1()
708 __m128i scale = _mm_set1_epi16(256); in SmoothHorizontal8x16_SSE4_1() local
709 const __m128i inverted_weights = _mm_sub_epi16(scale, weights); in SmoothHorizontal8x16_SSE4_1()
711 scale = _mm_set1_epi16(128); in SmoothHorizontal8x16_SSE4_1()
718 WriteSmoothDirectionalSum8(dst, left_y, weights, scaled_top_right, scale); in SmoothHorizontal8x16_SSE4_1()
725 WriteSmoothDirectionalSum8(dst, left_y, weights, scaled_top_right, scale); in SmoothHorizontal8x16_SSE4_1()
737 __m128i scale = _mm_set1_epi16(256); in SmoothHorizontal8x32_SSE4_1() local
738 const __m128i inverted_weights = _mm_sub_epi16(scale, weights); in SmoothHorizontal8x32_SSE4_1()
740 scale = _mm_set1_epi16(128); in SmoothHorizontal8x32_SSE4_1()
747 WriteSmoothDirectionalSum8(dst, left_y, weights, scaled_top_right, scale); in SmoothHorizontal8x32_SSE4_1()
754 WriteSmoothDirectionalSum8(dst, left_y, weights, scaled_top_right, scale); in SmoothHorizontal8x32_SSE4_1()
761 WriteSmoothDirectionalSum8(dst, left_y, weights, scaled_top_right, scale); in SmoothHorizontal8x32_SSE4_1()
768 WriteSmoothDirectionalSum8(dst, left_y, weights, scaled_top_right, scale); in SmoothHorizontal8x32_SSE4_1()
781 __m128i scale = _mm_set1_epi16(256); in SmoothHorizontal16x4_SSE4_1() local
784 const __m128i inverted_weights1 = _mm_sub_epi16(scale, weights1); in SmoothHorizontal16x4_SSE4_1()
785 const __m128i inverted_weights2 = _mm_sub_epi16(scale, weights2); in SmoothHorizontal16x4_SSE4_1()
790 scale = _mm_set1_epi16(128); in SmoothHorizontal16x4_SSE4_1()
795 scaled_top_right1, scaled_top_right2, scale); in SmoothHorizontal16x4_SSE4_1()
800 scaled_top_right1, scaled_top_right2, scale); in SmoothHorizontal16x4_SSE4_1()
805 scaled_top_right1, scaled_top_right2, scale); in SmoothHorizontal16x4_SSE4_1()
810 scaled_top_right1, scaled_top_right2, scale); in SmoothHorizontal16x4_SSE4_1()
821 __m128i scale = _mm_set1_epi16(256); in SmoothHorizontal16x8_SSE4_1() local
824 const __m128i inverted_weights1 = _mm_sub_epi16(scale, weights1); in SmoothHorizontal16x8_SSE4_1()
825 const __m128i inverted_weights2 = _mm_sub_epi16(scale, weights2); in SmoothHorizontal16x8_SSE4_1()
830 scale = _mm_set1_epi16(128); in SmoothHorizontal16x8_SSE4_1()
836 scaled_top_right1, scaled_top_right2, scale); in SmoothHorizontal16x8_SSE4_1()
848 __m128i scale = _mm_set1_epi16(256); in SmoothHorizontal16x16_SSE4_1() local
851 const __m128i inverted_weights1 = _mm_sub_epi16(scale, weights1); in SmoothHorizontal16x16_SSE4_1()
852 const __m128i inverted_weights2 = _mm_sub_epi16(scale, weights2); in SmoothHorizontal16x16_SSE4_1()
857 scale = _mm_set1_epi16(128); in SmoothHorizontal16x16_SSE4_1()
865 scaled_top_right1, scaled_top_right2, scale); in SmoothHorizontal16x16_SSE4_1()
873 scaled_top_right1, scaled_top_right2, scale); in SmoothHorizontal16x16_SSE4_1()
885 __m128i scale = _mm_set1_epi16(256); in SmoothHorizontal16x32_SSE4_1() local
888 const __m128i inverted_weights1 = _mm_sub_epi16(scale, weights1); in SmoothHorizontal16x32_SSE4_1()
889 const __m128i inverted_weights2 = _mm_sub_epi16(scale, weights2); in SmoothHorizontal16x32_SSE4_1()
894 scale = _mm_set1_epi16(128); in SmoothHorizontal16x32_SSE4_1()
902 scaled_top_right1, scaled_top_right2, scale); in SmoothHorizontal16x32_SSE4_1()
910 scaled_top_right1, scaled_top_right2, scale); in SmoothHorizontal16x32_SSE4_1()
918 scaled_top_right1, scaled_top_right2, scale); in SmoothHorizontal16x32_SSE4_1()
926 scaled_top_right1, scaled_top_right2, scale); in SmoothHorizontal16x32_SSE4_1()
938 __m128i scale = _mm_set1_epi16(256); in SmoothHorizontal16x64_SSE4_1() local
941 const __m128i inverted_weights1 = _mm_sub_epi16(scale, weights1); in SmoothHorizontal16x64_SSE4_1()
942 const __m128i inverted_weights2 = _mm_sub_epi16(scale, weights2); in SmoothHorizontal16x64_SSE4_1()
947 scale = _mm_set1_epi16(128); in SmoothHorizontal16x64_SSE4_1()
956 scaled_top_right1, scaled_top_right2, scale); in SmoothHorizontal16x64_SSE4_1()
971 __m128i scale = _mm_set1_epi16(256); in SmoothHorizontal32x8_SSE4_1() local
976 const __m128i inverted_weights1 = _mm_sub_epi16(scale, weights1); in SmoothHorizontal32x8_SSE4_1()
977 const __m128i inverted_weights2 = _mm_sub_epi16(scale, weights2); in SmoothHorizontal32x8_SSE4_1()
978 const __m128i inverted_weights3 = _mm_sub_epi16(scale, weights3); in SmoothHorizontal32x8_SSE4_1()
979 const __m128i inverted_weights4 = _mm_sub_epi16(scale, weights4); in SmoothHorizontal32x8_SSE4_1()
988 scale = _mm_set1_epi16(128); in SmoothHorizontal32x8_SSE4_1()
994 scaled_top_right1, scaled_top_right2, scale); in SmoothHorizontal32x8_SSE4_1()
996 scaled_top_right3, scaled_top_right4, scale); in SmoothHorizontal32x8_SSE4_1()
1010 __m128i scale = _mm_set1_epi16(256); in SmoothHorizontal32x16_SSE4_1() local
1015 const __m128i inverted_weights1 = _mm_sub_epi16(scale, weights1); in SmoothHorizontal32x16_SSE4_1()
1016 const __m128i inverted_weights2 = _mm_sub_epi16(scale, weights2); in SmoothHorizontal32x16_SSE4_1()
1017 const __m128i inverted_weights3 = _mm_sub_epi16(scale, weights3); in SmoothHorizontal32x16_SSE4_1()
1018 const __m128i inverted_weights4 = _mm_sub_epi16(scale, weights4); in SmoothHorizontal32x16_SSE4_1()
1027 scale = _mm_set1_epi16(128); in SmoothHorizontal32x16_SSE4_1()
1033 scaled_top_right1, scaled_top_right2, scale); in SmoothHorizontal32x16_SSE4_1()
1035 scaled_top_right3, scaled_top_right4, scale); in SmoothHorizontal32x16_SSE4_1()
1044 scaled_top_right1, scaled_top_right2, scale); in SmoothHorizontal32x16_SSE4_1()
1046 scaled_top_right3, scaled_top_right4, scale); in SmoothHorizontal32x16_SSE4_1()
1059 __m128i scale = _mm_set1_epi16(256); in SmoothHorizontal32x32_SSE4_1() local
1064 const __m128i inverted_weights1 = _mm_sub_epi16(scale, weights1); in SmoothHorizontal32x32_SSE4_1()
1065 const __m128i inverted_weights2 = _mm_sub_epi16(scale, weights2); in SmoothHorizontal32x32_SSE4_1()
1066 const __m128i inverted_weights3 = _mm_sub_epi16(scale, weights3); in SmoothHorizontal32x32_SSE4_1()
1067 const __m128i inverted_weights4 = _mm_sub_epi16(scale, weights4); in SmoothHorizontal32x32_SSE4_1()
1076 scale = _mm_set1_epi16(128); in SmoothHorizontal32x32_SSE4_1()
1084 scaled_top_right1, scaled_top_right2, scale); in SmoothHorizontal32x32_SSE4_1()
1086 scaled_top_right3, scaled_top_right4, scale); in SmoothHorizontal32x32_SSE4_1()
1094 scaled_top_right1, scaled_top_right2, scale); in SmoothHorizontal32x32_SSE4_1()
1096 scaled_top_right3, scaled_top_right4, scale); in SmoothHorizontal32x32_SSE4_1()
1104 scaled_top_right1, scaled_top_right2, scale); in SmoothHorizontal32x32_SSE4_1()
1106 scaled_top_right3, scaled_top_right4, scale); in SmoothHorizontal32x32_SSE4_1()
1114 scaled_top_right1, scaled_top_right2, scale); in SmoothHorizontal32x32_SSE4_1()
1116 scaled_top_right3, scaled_top_right4, scale); in SmoothHorizontal32x32_SSE4_1()
1129 __m128i scale = _mm_set1_epi16(256); in SmoothHorizontal32x64_SSE4_1() local
1134 const __m128i inverted_weights1 = _mm_sub_epi16(scale, weights1); in SmoothHorizontal32x64_SSE4_1()
1135 const __m128i inverted_weights2 = _mm_sub_epi16(scale, weights2); in SmoothHorizontal32x64_SSE4_1()
1136 const __m128i inverted_weights3 = _mm_sub_epi16(scale, weights3); in SmoothHorizontal32x64_SSE4_1()
1137 const __m128i inverted_weights4 = _mm_sub_epi16(scale, weights4); in SmoothHorizontal32x64_SSE4_1()
1146 scale = _mm_set1_epi16(128); in SmoothHorizontal32x64_SSE4_1()
1155 scaled_top_right1, scaled_top_right2, scale); in SmoothHorizontal32x64_SSE4_1()
1157 scaled_top_right3, scaled_top_right4, scale); in SmoothHorizontal32x64_SSE4_1()
1172 __m128i scale = _mm_set1_epi16(256); in SmoothHorizontal64x16_SSE4_1() local
1177 const __m128i inverted_weights1 = _mm_sub_epi16(scale, weights1); in SmoothHorizontal64x16_SSE4_1()
1178 const __m128i inverted_weights2 = _mm_sub_epi16(scale, weights2); in SmoothHorizontal64x16_SSE4_1()
1179 const __m128i inverted_weights3 = _mm_sub_epi16(scale, weights3); in SmoothHorizontal64x16_SSE4_1()
1180 const __m128i inverted_weights4 = _mm_sub_epi16(scale, weights4); in SmoothHorizontal64x16_SSE4_1()
1195 const __m128i inverted_weights5 = _mm_sub_epi16(scale, weights5); in SmoothHorizontal64x16_SSE4_1()
1196 const __m128i inverted_weights6 = _mm_sub_epi16(scale, weights6); in SmoothHorizontal64x16_SSE4_1()
1197 const __m128i inverted_weights7 = _mm_sub_epi16(scale, weights7); in SmoothHorizontal64x16_SSE4_1()
1198 const __m128i inverted_weights8 = _mm_sub_epi16(scale, weights8); in SmoothHorizontal64x16_SSE4_1()
1207 scale = _mm_set1_epi16(128); in SmoothHorizontal64x16_SSE4_1()
1214 scaled_top_right1, scaled_top_right2, scale); in SmoothHorizontal64x16_SSE4_1()
1216 scaled_top_right3, scaled_top_right4, scale); in SmoothHorizontal64x16_SSE4_1()
1218 scaled_top_right5, scaled_top_right6, scale); in SmoothHorizontal64x16_SSE4_1()
1220 scaled_top_right7, scaled_top_right8, scale); in SmoothHorizontal64x16_SSE4_1()
1228 scaled_top_right1, scaled_top_right2, scale); in SmoothHorizontal64x16_SSE4_1()
1230 scaled_top_right3, scaled_top_right4, scale); in SmoothHorizontal64x16_SSE4_1()
1232 scaled_top_right5, scaled_top_right6, scale); in SmoothHorizontal64x16_SSE4_1()
1234 scaled_top_right7, scaled_top_right8, scale); in SmoothHorizontal64x16_SSE4_1()
1248 __m128i scale = _mm_set1_epi16(256); in SmoothHorizontal64x32_SSE4_1() local
1253 const __m128i inverted_weights1 = _mm_sub_epi16(scale, weights1); in SmoothHorizontal64x32_SSE4_1()
1254 const __m128i inverted_weights2 = _mm_sub_epi16(scale, weights2); in SmoothHorizontal64x32_SSE4_1()
1255 const __m128i inverted_weights3 = _mm_sub_epi16(scale, weights3); in SmoothHorizontal64x32_SSE4_1()
1256 const __m128i inverted_weights4 = _mm_sub_epi16(scale, weights4); in SmoothHorizontal64x32_SSE4_1()
1271 const __m128i inverted_weights5 = _mm_sub_epi16(scale, weights5); in SmoothHorizontal64x32_SSE4_1()
1272 const __m128i inverted_weights6 = _mm_sub_epi16(scale, weights6); in SmoothHorizontal64x32_SSE4_1()
1273 const __m128i inverted_weights7 = _mm_sub_epi16(scale, weights7); in SmoothHorizontal64x32_SSE4_1()
1274 const __m128i inverted_weights8 = _mm_sub_epi16(scale, weights8); in SmoothHorizontal64x32_SSE4_1()
1283 scale = _mm_set1_epi16(128); in SmoothHorizontal64x32_SSE4_1()
1289 scaled_top_right1, scaled_top_right2, scale); in SmoothHorizontal64x32_SSE4_1()
1291 scaled_top_right3, scaled_top_right4, scale); in SmoothHorizontal64x32_SSE4_1()
1293 scaled_top_right5, scaled_top_right6, scale); in SmoothHorizontal64x32_SSE4_1()
1295 scaled_top_right7, scaled_top_right8, scale); in SmoothHorizontal64x32_SSE4_1()
1304 scaled_top_right1, scaled_top_right2, scale); in SmoothHorizontal64x32_SSE4_1()
1306 scaled_top_right3, scaled_top_right4, scale); in SmoothHorizontal64x32_SSE4_1()
1308 scaled_top_right5, scaled_top_right6, scale); in SmoothHorizontal64x32_SSE4_1()
1310 scaled_top_right7, scaled_top_right8, scale); in SmoothHorizontal64x32_SSE4_1()
1318 scaled_top_right1, scaled_top_right2, scale); in SmoothHorizontal64x32_SSE4_1()
1320 scaled_top_right3, scaled_top_right4, scale); in SmoothHorizontal64x32_SSE4_1()
1322 scaled_top_right5, scaled_top_right6, scale); in SmoothHorizontal64x32_SSE4_1()
1324 scaled_top_right7, scaled_top_right8, scale); in SmoothHorizontal64x32_SSE4_1()
1332 scaled_top_right1, scaled_top_right2, scale); in SmoothHorizontal64x32_SSE4_1()
1334 scaled_top_right3, scaled_top_right4, scale); in SmoothHorizontal64x32_SSE4_1()
1336 scaled_top_right5, scaled_top_right6, scale); in SmoothHorizontal64x32_SSE4_1()
1338 scaled_top_right7, scaled_top_right8, scale); in SmoothHorizontal64x32_SSE4_1()
1351 __m128i scale = _mm_set1_epi16(256); in SmoothHorizontal64x64_SSE4_1() local
1356 const __m128i inverted_weights1 = _mm_sub_epi16(scale, weights1); in SmoothHorizontal64x64_SSE4_1()
1357 const __m128i inverted_weights2 = _mm_sub_epi16(scale, weights2); in SmoothHorizontal64x64_SSE4_1()
1358 const __m128i inverted_weights3 = _mm_sub_epi16(scale, weights3); in SmoothHorizontal64x64_SSE4_1()
1359 const __m128i inverted_weights4 = _mm_sub_epi16(scale, weights4); in SmoothHorizontal64x64_SSE4_1()
1374 const __m128i inverted_weights5 = _mm_sub_epi16(scale, weights5); in SmoothHorizontal64x64_SSE4_1()
1375 const __m128i inverted_weights6 = _mm_sub_epi16(scale, weights6); in SmoothHorizontal64x64_SSE4_1()
1376 const __m128i inverted_weights7 = _mm_sub_epi16(scale, weights7); in SmoothHorizontal64x64_SSE4_1()
1377 const __m128i inverted_weights8 = _mm_sub_epi16(scale, weights8); in SmoothHorizontal64x64_SSE4_1()
1386 scale = _mm_set1_epi16(128); in SmoothHorizontal64x64_SSE4_1()
1395 scaled_top_right1, scaled_top_right2, scale); in SmoothHorizontal64x64_SSE4_1()
1397 scaled_top_right3, scaled_top_right4, scale); in SmoothHorizontal64x64_SSE4_1()
1399 scaled_top_right5, scaled_top_right6, scale); in SmoothHorizontal64x64_SSE4_1()
1401 scaled_top_right7, scaled_top_right8, scale); in SmoothHorizontal64x64_SSE4_1()
1527 __m128i scale = _mm_set1_epi16(256); in SmoothVertical8x4_SSE4_1() local
1528 const __m128i inverted_weights = _mm_sub_epi16(scale, weights); in SmoothVertical8x4_SSE4_1()
1531 scale = _mm_set1_epi16(128); in SmoothVertical8x4_SSE4_1()
1538 WriteSmoothDirectionalSum8(dst, top, weights_y, scaled_bottom_left_y, scale); in SmoothVertical8x4_SSE4_1()
1543 WriteSmoothDirectionalSum8(dst, top, weights_y, scaled_bottom_left_y, scale); in SmoothVertical8x4_SSE4_1()
1548 WriteSmoothDirectionalSum8(dst, top, weights_y, scaled_bottom_left_y, scale); in SmoothVertical8x4_SSE4_1()
1553 WriteSmoothDirectionalSum8(dst, top, weights_y, scaled_bottom_left_y, scale); in SmoothVertical8x4_SSE4_1()
1563 __m128i scale = _mm_set1_epi16(256); in SmoothVertical8x8_SSE4_1() local
1564 const __m128i inverted_weights = _mm_sub_epi16(scale, weights); in SmoothVertical8x8_SSE4_1()
1567 scale = _mm_set1_epi16(128); in SmoothVertical8x8_SSE4_1()
1576 scale); in SmoothVertical8x8_SSE4_1()
1591 __m128i scale = _mm_set1_epi16(256); in SmoothVertical8x16_SSE4_1() local
1592 const __m128i inverted_weights1 = _mm_sub_epi16(scale, weights1); in SmoothVertical8x16_SSE4_1()
1593 const __m128i inverted_weights2 = _mm_sub_epi16(scale, weights2); in SmoothVertical8x16_SSE4_1()
1598 scale = _mm_set1_epi16(128); in SmoothVertical8x16_SSE4_1()
1607 scale); in SmoothVertical8x16_SSE4_1()
1616 scale); in SmoothVertical8x16_SSE4_1()
1634 __m128i scale = _mm_set1_epi16(256); in SmoothVertical8x32_SSE4_1() local
1635 const __m128i inverted_weights1 = _mm_sub_epi16(scale, weights1); in SmoothVertical8x32_SSE4_1()
1636 const __m128i inverted_weights2 = _mm_sub_epi16(scale, weights2); in SmoothVertical8x32_SSE4_1()
1637 const __m128i inverted_weights3 = _mm_sub_epi16(scale, weights3); in SmoothVertical8x32_SSE4_1()
1638 const __m128i inverted_weights4 = _mm_sub_epi16(scale, weights4); in SmoothVertical8x32_SSE4_1()
1647 scale = _mm_set1_epi16(128); in SmoothVertical8x32_SSE4_1()
1656 scale); in SmoothVertical8x32_SSE4_1()
1665 scale); in SmoothVertical8x32_SSE4_1()
1674 scale); in SmoothVertical8x32_SSE4_1()
1683 scale); in SmoothVertical8x32_SSE4_1()
1696 __m128i scale = _mm_set1_epi16(256); in SmoothVertical16x4_SSE4_1() local
1697 const __m128i inverted_weights = _mm_sub_epi16(scale, weights); in SmoothVertical16x4_SSE4_1()
1700 scale = _mm_set1_epi16(128); in SmoothVertical16x4_SSE4_1()
1710 scale); in SmoothVertical16x4_SSE4_1()
1717 scale); in SmoothVertical16x4_SSE4_1()
1724 scale); in SmoothVertical16x4_SSE4_1()
1731 scale); in SmoothVertical16x4_SSE4_1()
1742 __m128i scale = _mm_set1_epi16(256); in SmoothVertical16x8_SSE4_1() local
1743 const __m128i inverted_weights = _mm_sub_epi16(scale, weights); in SmoothVertical16x8_SSE4_1()
1746 scale = _mm_set1_epi16(128); in SmoothVertical16x8_SSE4_1()
1758 scale); in SmoothVertical16x8_SSE4_1()
1771 __m128i scale = _mm_set1_epi16(256); in SmoothVertical16x16_SSE4_1() local
1775 const __m128i inverted_weights_lo = _mm_sub_epi16(scale, weights_lo); in SmoothVertical16x16_SSE4_1()
1776 const __m128i inverted_weights_hi = _mm_sub_epi16(scale, weights_hi); in SmoothVertical16x16_SSE4_1()
1781 scale = _mm_set1_epi16(128); in SmoothVertical16x16_SSE4_1()
1793 scale); in SmoothVertical16x16_SSE4_1()
1803 scale); in SmoothVertical16x16_SSE4_1()
1817 __m128i scale = _mm_set1_epi16(256); in SmoothVertical16x32_SSE4_1() local
1823 const __m128i inverted_weights1 = _mm_sub_epi16(scale, weights1); in SmoothVertical16x32_SSE4_1()
1824 const __m128i inverted_weights2 = _mm_sub_epi16(scale, weights2); in SmoothVertical16x32_SSE4_1()
1825 const __m128i inverted_weights3 = _mm_sub_epi16(scale, weights3); in SmoothVertical16x32_SSE4_1()
1826 const __m128i inverted_weights4 = _mm_sub_epi16(scale, weights4); in SmoothVertical16x32_SSE4_1()
1835 scale = _mm_set1_epi16(128); in SmoothVertical16x32_SSE4_1()
1847 scale); in SmoothVertical16x32_SSE4_1()
1857 scale); in SmoothVertical16x32_SSE4_1()
1867 scale); in SmoothVertical16x32_SSE4_1()
1877 scale); in SmoothVertical16x32_SSE4_1()
1889 const __m128i scale = _mm_set1_epi16(256); in SmoothVertical16x64_SSE4_1() local
1901 const __m128i inverted_weights_lo = _mm_sub_epi16(scale, weights_lo); in SmoothVertical16x64_SSE4_1()
1902 const __m128i inverted_weights_hi = _mm_sub_epi16(scale, weights_hi); in SmoothVertical16x64_SSE4_1()
1946 __m128i scale = _mm_set1_epi16(256); in SmoothVertical32x8_SSE4_1() local
1948 const __m128i inverted_weights = _mm_sub_epi16(scale, weights); in SmoothVertical32x8_SSE4_1()
1951 scale = _mm_set1_epi16(128); in SmoothVertical32x8_SSE4_1()
1959 scale); in SmoothVertical32x8_SSE4_1()
1962 scale); in SmoothVertical32x8_SSE4_1()
1985 __m128i scale = _mm_set1_epi16(256); in SmoothVertical32x16_SSE4_1() local
1986 const __m128i inverted_weights1 = _mm_sub_epi16(scale, weights1); in SmoothVertical32x16_SSE4_1()
1987 const __m128i inverted_weights2 = _mm_sub_epi16(scale, weights2); in SmoothVertical32x16_SSE4_1()
1992 scale = _mm_set1_epi16(128); in SmoothVertical32x16_SSE4_1()
2000 scale); in SmoothVertical32x16_SSE4_1()
2003 scale); in SmoothVertical32x16_SSE4_1()
2013 scale); in SmoothVertical32x16_SSE4_1()
2016 scale); in SmoothVertical32x16_SSE4_1()
2032 __m128i scale = _mm_set1_epi16(256); in SmoothVertical32x32_SSE4_1() local
2043 const __m128i inverted_weights1 = _mm_sub_epi16(scale, weights1); in SmoothVertical32x32_SSE4_1()
2044 const __m128i inverted_weights2 = _mm_sub_epi16(scale, weights2); in SmoothVertical32x32_SSE4_1()
2045 const __m128i inverted_weights3 = _mm_sub_epi16(scale, weights3); in SmoothVertical32x32_SSE4_1()
2046 const __m128i inverted_weights4 = _mm_sub_epi16(scale, weights4); in SmoothVertical32x32_SSE4_1()
2055 scale = _mm_set1_epi16(128); in SmoothVertical32x32_SSE4_1()
2063 scale); in SmoothVertical32x32_SSE4_1()
2066 scale); in SmoothVertical32x32_SSE4_1()
2076 scale); in SmoothVertical32x32_SSE4_1()
2079 scale); in SmoothVertical32x32_SSE4_1()
2089 scale); in SmoothVertical32x32_SSE4_1()
2092 scale); in SmoothVertical32x32_SSE4_1()
2102 scale); in SmoothVertical32x32_SSE4_1()
2105 scale); in SmoothVertical32x32_SSE4_1()
2125 const __m128i scale = _mm_set1_epi16(256); in SmoothVertical32x64_SSE4_1() local
2132 const __m128i inverted_weights_lo = _mm_sub_epi16(scale, weights_lo); in SmoothVertical32x64_SSE4_1()
2133 const __m128i inverted_weights_hi = _mm_sub_epi16(scale, weights_hi); in SmoothVertical32x64_SSE4_1()
2176 __m128i scale = _mm_set1_epi16(256); in SmoothVertical64x16_SSE4_1() local
2188 const __m128i inverted_weights1 = _mm_sub_epi16(scale, weights1); in SmoothVertical64x16_SSE4_1()
2189 const __m128i inverted_weights2 = _mm_sub_epi16(scale, weights2); in SmoothVertical64x16_SSE4_1()
2200 scale = _mm_set1_epi16(128); in SmoothVertical64x16_SSE4_1()
2208 scale); in SmoothVertical64x16_SSE4_1()
2211 scale); in SmoothVertical64x16_SSE4_1()
2214 scale); in SmoothVertical64x16_SSE4_1()
2217 scale); in SmoothVertical64x16_SSE4_1()
2227 scale); in SmoothVertical64x16_SSE4_1()
2230 scale); in SmoothVertical64x16_SSE4_1()
2233 scale); in SmoothVertical64x16_SSE4_1()
2236 scale); in SmoothVertical64x16_SSE4_1()
2268 __m128i scale = _mm_set1_epi16(256); in SmoothVertical64x32_SSE4_1() local
2269 const __m128i inverted_weights1 = _mm_sub_epi16(scale, weights1); in SmoothVertical64x32_SSE4_1()
2270 const __m128i inverted_weights2 = _mm_sub_epi16(scale, weights2); in SmoothVertical64x32_SSE4_1()
2271 const __m128i inverted_weights3 = _mm_sub_epi16(scale, weights3); in SmoothVertical64x32_SSE4_1()
2272 const __m128i inverted_weights4 = _mm_sub_epi16(scale, weights4); in SmoothVertical64x32_SSE4_1()
2281 scale = _mm_set1_epi16(128); in SmoothVertical64x32_SSE4_1()
2290 scale); in SmoothVertical64x32_SSE4_1()
2293 scale); in SmoothVertical64x32_SSE4_1()
2296 scale); in SmoothVertical64x32_SSE4_1()
2299 scale); in SmoothVertical64x32_SSE4_1()
2309 scale); in SmoothVertical64x32_SSE4_1()
2312 scale); in SmoothVertical64x32_SSE4_1()
2315 scale); in SmoothVertical64x32_SSE4_1()
2318 scale); in SmoothVertical64x32_SSE4_1()
2328 scale); in SmoothVertical64x32_SSE4_1()
2331 scale); in SmoothVertical64x32_SSE4_1()
2334 scale); in SmoothVertical64x32_SSE4_1()
2337 scale); in SmoothVertical64x32_SSE4_1()
2347 scale); in SmoothVertical64x32_SSE4_1()
2350 scale); in SmoothVertical64x32_SSE4_1()
2353 scale); in SmoothVertical64x32_SSE4_1()
2356 scale); in SmoothVertical64x32_SSE4_1()
2382 const __m128i scale = _mm_set1_epi16(256); in SmoothVertical64x64_SSE4_1() local
2389 const __m128i inverted_weights_lo = _mm_sub_epi16(scale, weights_lo); in SmoothVertical64x64_SSE4_1()
2390 const __m128i inverted_weights_hi = _mm_sub_epi16(scale, weights_hi); in SmoothVertical64x64_SSE4_1()