1 /* 2 * Copyright 2021 The libgav1 Authors 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #ifndef LIBGAV1_SRC_DSP_X86_INTRAPRED_CFL_SSE4_H_ 18 #define LIBGAV1_SRC_DSP_X86_INTRAPRED_CFL_SSE4_H_ 19 20 #include "src/dsp/dsp.h" 21 #include "src/utils/cpu.h" 22 23 namespace libgav1 { 24 namespace dsp { 25 26 // Initializes Dsp::cfl_intra_predictors and Dsp::cfl_subsamplers, see the 27 // defines below for specifics. These functions are not thread-safe. 28 void IntraPredCflInit_SSE4_1(); 29 30 } // namespace dsp 31 } // namespace libgav1 32 33 // If sse4 is enabled and the baseline isn't set due to a higher level of 34 // optimization being enabled, signal the sse4 implementation should be used. 35 #if LIBGAV1_TARGETING_SSE4_1 36 #ifndef LIBGAV1_Dsp8bpp_TransformSize4x4_CflSubsampler420 37 #define LIBGAV1_Dsp8bpp_TransformSize4x4_CflSubsampler420 LIBGAV1_CPU_SSE4_1 38 #endif 39 40 #ifndef LIBGAV1_Dsp8bpp_TransformSize4x8_CflSubsampler420 41 #define LIBGAV1_Dsp8bpp_TransformSize4x8_CflSubsampler420 LIBGAV1_CPU_SSE4_1 42 #endif 43 44 #ifndef LIBGAV1_Dsp8bpp_TransformSize4x16_CflSubsampler420 45 #define LIBGAV1_Dsp8bpp_TransformSize4x16_CflSubsampler420 LIBGAV1_CPU_SSE4_1 46 #endif 47 48 #ifndef LIBGAV1_Dsp8bpp_TransformSize8x4_CflSubsampler420 49 #define LIBGAV1_Dsp8bpp_TransformSize8x4_CflSubsampler420 LIBGAV1_CPU_SSE4_1 50 #endif 51 52 #ifndef LIBGAV1_Dsp8bpp_TransformSize8x8_CflSubsampler420 53 #define LIBGAV1_Dsp8bpp_TransformSize8x8_CflSubsampler420 LIBGAV1_CPU_SSE4_1 54 #endif 55 56 #ifndef LIBGAV1_Dsp8bpp_TransformSize8x16_CflSubsampler420 57 #define LIBGAV1_Dsp8bpp_TransformSize8x16_CflSubsampler420 LIBGAV1_CPU_SSE4_1 58 #endif 59 60 #ifndef LIBGAV1_Dsp8bpp_TransformSize8x32_CflSubsampler420 61 #define LIBGAV1_Dsp8bpp_TransformSize8x32_CflSubsampler420 LIBGAV1_CPU_SSE4_1 62 #endif 63 64 #ifndef LIBGAV1_Dsp8bpp_TransformSize16x4_CflSubsampler420 65 #define LIBGAV1_Dsp8bpp_TransformSize16x4_CflSubsampler420 LIBGAV1_CPU_SSE4_1 66 #endif 67 68 #ifndef LIBGAV1_Dsp8bpp_TransformSize16x8_CflSubsampler420 69 #define LIBGAV1_Dsp8bpp_TransformSize16x8_CflSubsampler420 LIBGAV1_CPU_SSE4_1 70 #endif 71 72 #ifndef LIBGAV1_Dsp8bpp_TransformSize16x16_CflSubsampler420 73 #define LIBGAV1_Dsp8bpp_TransformSize16x16_CflSubsampler420 LIBGAV1_CPU_SSE4_1 74 #endif 75 76 #ifndef LIBGAV1_Dsp8bpp_TransformSize16x32_CflSubsampler420 77 #define LIBGAV1_Dsp8bpp_TransformSize16x32_CflSubsampler420 LIBGAV1_CPU_SSE4_1 78 #endif 79 80 #ifndef LIBGAV1_Dsp8bpp_TransformSize32x8_CflSubsampler420 81 #define LIBGAV1_Dsp8bpp_TransformSize32x8_CflSubsampler420 LIBGAV1_CPU_SSE4_1 82 #endif 83 84 #ifndef LIBGAV1_Dsp8bpp_TransformSize32x16_CflSubsampler420 85 #define LIBGAV1_Dsp8bpp_TransformSize32x16_CflSubsampler420 LIBGAV1_CPU_SSE4_1 86 #endif 87 88 #ifndef LIBGAV1_Dsp8bpp_TransformSize32x32_CflSubsampler420 89 #define LIBGAV1_Dsp8bpp_TransformSize32x32_CflSubsampler420 LIBGAV1_CPU_SSE4_1 90 #endif 91 92 #ifndef LIBGAV1_Dsp8bpp_TransformSize4x4_CflSubsampler444 93 #define LIBGAV1_Dsp8bpp_TransformSize4x4_CflSubsampler444 LIBGAV1_CPU_SSE4_1 94 #endif 95 96 #ifndef LIBGAV1_Dsp8bpp_TransformSize4x8_CflSubsampler444 97 #define LIBGAV1_Dsp8bpp_TransformSize4x8_CflSubsampler444 LIBGAV1_CPU_SSE4_1 98 #endif 99 100 #ifndef LIBGAV1_Dsp8bpp_TransformSize4x16_CflSubsampler444 101 #define LIBGAV1_Dsp8bpp_TransformSize4x16_CflSubsampler444 LIBGAV1_CPU_SSE4_1 102 #endif 103 104 #ifndef LIBGAV1_Dsp8bpp_TransformSize8x4_CflSubsampler444 105 #define LIBGAV1_Dsp8bpp_TransformSize8x4_CflSubsampler444 LIBGAV1_CPU_SSE4_1 106 #endif 107 108 #ifndef LIBGAV1_Dsp8bpp_TransformSize8x8_CflSubsampler444 109 #define LIBGAV1_Dsp8bpp_TransformSize8x8_CflSubsampler444 LIBGAV1_CPU_SSE4_1 110 #endif 111 112 #ifndef LIBGAV1_Dsp8bpp_TransformSize8x16_CflSubsampler444 113 #define LIBGAV1_Dsp8bpp_TransformSize8x16_CflSubsampler444 LIBGAV1_CPU_SSE4_1 114 #endif 115 116 #ifndef LIBGAV1_Dsp8bpp_TransformSize8x32_CflSubsampler444 117 #define LIBGAV1_Dsp8bpp_TransformSize8x32_CflSubsampler444 LIBGAV1_CPU_SSE4_1 118 #endif 119 120 #ifndef LIBGAV1_Dsp8bpp_TransformSize16x4_CflSubsampler444 121 #define LIBGAV1_Dsp8bpp_TransformSize16x4_CflSubsampler444 LIBGAV1_CPU_SSE4_1 122 #endif 123 124 #ifndef LIBGAV1_Dsp8bpp_TransformSize16x8_CflSubsampler444 125 #define LIBGAV1_Dsp8bpp_TransformSize16x8_CflSubsampler444 LIBGAV1_CPU_SSE4_1 126 #endif 127 128 #ifndef LIBGAV1_Dsp8bpp_TransformSize16x16_CflSubsampler444 129 #define LIBGAV1_Dsp8bpp_TransformSize16x16_CflSubsampler444 LIBGAV1_CPU_SSE4_1 130 #endif 131 132 #ifndef LIBGAV1_Dsp8bpp_TransformSize16x32_CflSubsampler444 133 #define LIBGAV1_Dsp8bpp_TransformSize16x32_CflSubsampler444 LIBGAV1_CPU_SSE4_1 134 #endif 135 136 #ifndef LIBGAV1_Dsp8bpp_TransformSize32x8_CflSubsampler444 137 #define LIBGAV1_Dsp8bpp_TransformSize32x8_CflSubsampler444 LIBGAV1_CPU_SSE4_1 138 #endif 139 140 #ifndef LIBGAV1_Dsp8bpp_TransformSize32x16_CflSubsampler444 141 #define LIBGAV1_Dsp8bpp_TransformSize32x16_CflSubsampler444 LIBGAV1_CPU_SSE4_1 142 #endif 143 144 #ifndef LIBGAV1_Dsp8bpp_TransformSize32x32_CflSubsampler444 145 #define LIBGAV1_Dsp8bpp_TransformSize32x32_CflSubsampler444 LIBGAV1_CPU_SSE4_1 146 #endif 147 148 #ifndef LIBGAV1_Dsp8bpp_TransformSize4x4_CflIntraPredictor 149 #define LIBGAV1_Dsp8bpp_TransformSize4x4_CflIntraPredictor LIBGAV1_CPU_SSE4_1 150 #endif 151 152 #ifndef LIBGAV1_Dsp8bpp_TransformSize4x8_CflIntraPredictor 153 #define LIBGAV1_Dsp8bpp_TransformSize4x8_CflIntraPredictor LIBGAV1_CPU_SSE4_1 154 #endif 155 156 #ifndef LIBGAV1_Dsp8bpp_TransformSize4x16_CflIntraPredictor 157 #define LIBGAV1_Dsp8bpp_TransformSize4x16_CflIntraPredictor LIBGAV1_CPU_SSE4_1 158 #endif 159 160 #ifndef LIBGAV1_Dsp8bpp_TransformSize8x4_CflIntraPredictor 161 #define LIBGAV1_Dsp8bpp_TransformSize8x4_CflIntraPredictor LIBGAV1_CPU_SSE4_1 162 #endif 163 164 #ifndef LIBGAV1_Dsp8bpp_TransformSize8x8_CflIntraPredictor 165 #define LIBGAV1_Dsp8bpp_TransformSize8x8_CflIntraPredictor LIBGAV1_CPU_SSE4_1 166 #endif 167 168 #ifndef LIBGAV1_Dsp8bpp_TransformSize8x16_CflIntraPredictor 169 #define LIBGAV1_Dsp8bpp_TransformSize8x16_CflIntraPredictor LIBGAV1_CPU_SSE4_1 170 #endif 171 172 #ifndef LIBGAV1_Dsp8bpp_TransformSize8x32_CflIntraPredictor 173 #define LIBGAV1_Dsp8bpp_TransformSize8x32_CflIntraPredictor LIBGAV1_CPU_SSE4_1 174 #endif 175 176 #ifndef LIBGAV1_Dsp8bpp_TransformSize16x4_CflIntraPredictor 177 #define LIBGAV1_Dsp8bpp_TransformSize16x4_CflIntraPredictor LIBGAV1_CPU_SSE4_1 178 #endif 179 180 #ifndef LIBGAV1_Dsp8bpp_TransformSize16x8_CflIntraPredictor 181 #define LIBGAV1_Dsp8bpp_TransformSize16x8_CflIntraPredictor LIBGAV1_CPU_SSE4_1 182 #endif 183 184 #ifndef LIBGAV1_Dsp8bpp_TransformSize16x16_CflIntraPredictor 185 #define LIBGAV1_Dsp8bpp_TransformSize16x16_CflIntraPredictor LIBGAV1_CPU_SSE4_1 186 #endif 187 188 #ifndef LIBGAV1_Dsp8bpp_TransformSize16x32_CflIntraPredictor 189 #define LIBGAV1_Dsp8bpp_TransformSize16x32_CflIntraPredictor LIBGAV1_CPU_SSE4_1 190 #endif 191 192 #ifndef LIBGAV1_Dsp8bpp_TransformSize32x8_CflIntraPredictor 193 #define LIBGAV1_Dsp8bpp_TransformSize32x8_CflIntraPredictor LIBGAV1_CPU_SSE4_1 194 #endif 195 196 #ifndef LIBGAV1_Dsp8bpp_TransformSize32x16_CflIntraPredictor 197 #define LIBGAV1_Dsp8bpp_TransformSize32x16_CflIntraPredictor LIBGAV1_CPU_SSE4_1 198 #endif 199 200 #ifndef LIBGAV1_Dsp8bpp_TransformSize32x32_CflIntraPredictor 201 #define LIBGAV1_Dsp8bpp_TransformSize32x32_CflIntraPredictor LIBGAV1_CPU_SSE4_1 202 #endif 203 204 //------------------------------------------------------------------------------ 205 // 10bpp 206 207 #ifndef LIBGAV1_Dsp10bpp_TransformSize4x4_CflSubsampler420 208 #define LIBGAV1_Dsp10bpp_TransformSize4x4_CflSubsampler420 LIBGAV1_CPU_SSE4_1 209 #endif 210 211 #ifndef LIBGAV1_Dsp10bpp_TransformSize4x8_CflSubsampler420 212 #define LIBGAV1_Dsp10bpp_TransformSize4x8_CflSubsampler420 LIBGAV1_CPU_SSE4_1 213 #endif 214 215 #ifndef LIBGAV1_Dsp10bpp_TransformSize4x16_CflSubsampler420 216 #define LIBGAV1_Dsp10bpp_TransformSize4x16_CflSubsampler420 LIBGAV1_CPU_SSE4_1 217 #endif 218 219 #ifndef LIBGAV1_Dsp10bpp_TransformSize8x4_CflSubsampler420 220 #define LIBGAV1_Dsp10bpp_TransformSize8x4_CflSubsampler420 LIBGAV1_CPU_SSE4_1 221 #endif 222 223 #ifndef LIBGAV1_Dsp10bpp_TransformSize8x8_CflSubsampler420 224 #define LIBGAV1_Dsp10bpp_TransformSize8x8_CflSubsampler420 LIBGAV1_CPU_SSE4_1 225 #endif 226 227 #ifndef LIBGAV1_Dsp10bpp_TransformSize8x16_CflSubsampler420 228 #define LIBGAV1_Dsp10bpp_TransformSize8x16_CflSubsampler420 LIBGAV1_CPU_SSE4_1 229 #endif 230 231 #ifndef LIBGAV1_Dsp10bpp_TransformSize8x32_CflSubsampler420 232 #define LIBGAV1_Dsp10bpp_TransformSize8x32_CflSubsampler420 LIBGAV1_CPU_SSE4_1 233 #endif 234 235 #ifndef LIBGAV1_Dsp10bpp_TransformSize16x4_CflSubsampler420 236 #define LIBGAV1_Dsp10bpp_TransformSize16x4_CflSubsampler420 LIBGAV1_CPU_SSE4_1 237 #endif 238 239 #ifndef LIBGAV1_Dsp10bpp_TransformSize16x8_CflSubsampler420 240 #define LIBGAV1_Dsp10bpp_TransformSize16x8_CflSubsampler420 LIBGAV1_CPU_SSE4_1 241 #endif 242 243 #ifndef LIBGAV1_Dsp10bpp_TransformSize16x16_CflSubsampler420 244 #define LIBGAV1_Dsp10bpp_TransformSize16x16_CflSubsampler420 LIBGAV1_CPU_SSE4_1 245 #endif 246 247 #ifndef LIBGAV1_Dsp10bpp_TransformSize16x32_CflSubsampler420 248 #define LIBGAV1_Dsp10bpp_TransformSize16x32_CflSubsampler420 LIBGAV1_CPU_SSE4_1 249 #endif 250 251 #ifndef LIBGAV1_Dsp10bpp_TransformSize32x8_CflSubsampler420 252 #define LIBGAV1_Dsp10bpp_TransformSize32x8_CflSubsampler420 LIBGAV1_CPU_SSE4_1 253 #endif 254 255 #ifndef LIBGAV1_Dsp10bpp_TransformSize32x16_CflSubsampler420 256 #define LIBGAV1_Dsp10bpp_TransformSize32x16_CflSubsampler420 LIBGAV1_CPU_SSE4_1 257 #endif 258 259 #ifndef LIBGAV1_Dsp10bpp_TransformSize32x32_CflSubsampler420 260 #define LIBGAV1_Dsp10bpp_TransformSize32x32_CflSubsampler420 LIBGAV1_CPU_SSE4_1 261 #endif 262 263 #ifndef LIBGAV1_Dsp10bpp_TransformSize4x4_CflSubsampler444 264 #define LIBGAV1_Dsp10bpp_TransformSize4x4_CflSubsampler444 LIBGAV1_CPU_SSE4_1 265 #endif 266 267 #ifndef LIBGAV1_Dsp10bpp_TransformSize4x8_CflSubsampler444 268 #define LIBGAV1_Dsp10bpp_TransformSize4x8_CflSubsampler444 LIBGAV1_CPU_SSE4_1 269 #endif 270 271 #ifndef LIBGAV1_Dsp10bpp_TransformSize4x16_CflSubsampler444 272 #define LIBGAV1_Dsp10bpp_TransformSize4x16_CflSubsampler444 LIBGAV1_CPU_SSE4_1 273 #endif 274 275 #ifndef LIBGAV1_Dsp10bpp_TransformSize8x4_CflSubsampler444 276 #define LIBGAV1_Dsp10bpp_TransformSize8x4_CflSubsampler444 LIBGAV1_CPU_SSE4_1 277 #endif 278 279 #ifndef LIBGAV1_Dsp10bpp_TransformSize8x8_CflSubsampler444 280 #define LIBGAV1_Dsp10bpp_TransformSize8x8_CflSubsampler444 LIBGAV1_CPU_SSE4_1 281 #endif 282 283 #ifndef LIBGAV1_Dsp10bpp_TransformSize8x16_CflSubsampler444 284 #define LIBGAV1_Dsp10bpp_TransformSize8x16_CflSubsampler444 LIBGAV1_CPU_SSE4_1 285 #endif 286 287 #ifndef LIBGAV1_Dsp10bpp_TransformSize8x32_CflSubsampler444 288 #define LIBGAV1_Dsp10bpp_TransformSize8x32_CflSubsampler444 LIBGAV1_CPU_SSE4_1 289 #endif 290 291 #ifndef LIBGAV1_Dsp10bpp_TransformSize16x4_CflSubsampler444 292 #define LIBGAV1_Dsp10bpp_TransformSize16x4_CflSubsampler444 LIBGAV1_CPU_SSE4_1 293 #endif 294 295 #ifndef LIBGAV1_Dsp10bpp_TransformSize16x8_CflSubsampler444 296 #define LIBGAV1_Dsp10bpp_TransformSize16x8_CflSubsampler444 LIBGAV1_CPU_SSE4_1 297 #endif 298 299 #ifndef LIBGAV1_Dsp10bpp_TransformSize16x16_CflSubsampler444 300 #define LIBGAV1_Dsp10bpp_TransformSize16x16_CflSubsampler444 LIBGAV1_CPU_SSE4_1 301 #endif 302 303 #ifndef LIBGAV1_Dsp10bpp_TransformSize16x32_CflSubsampler444 304 #define LIBGAV1_Dsp10bpp_TransformSize16x32_CflSubsampler444 LIBGAV1_CPU_SSE4_1 305 #endif 306 307 #ifndef LIBGAV1_Dsp10bpp_TransformSize32x8_CflSubsampler444 308 #define LIBGAV1_Dsp10bpp_TransformSize32x8_CflSubsampler444 LIBGAV1_CPU_SSE4_1 309 #endif 310 311 #ifndef LIBGAV1_Dsp10bpp_TransformSize32x16_CflSubsampler444 312 #define LIBGAV1_Dsp10bpp_TransformSize32x16_CflSubsampler444 LIBGAV1_CPU_SSE4_1 313 #endif 314 315 #ifndef LIBGAV1_Dsp10bpp_TransformSize32x32_CflSubsampler444 316 #define LIBGAV1_Dsp10bpp_TransformSize32x32_CflSubsampler444 LIBGAV1_CPU_SSE4_1 317 #endif 318 319 #ifndef LIBGAV1_Dsp10bpp_TransformSize4x4_CflIntraPredictor 320 #define LIBGAV1_Dsp10bpp_TransformSize4x4_CflIntraPredictor LIBGAV1_CPU_SSE4_1 321 #endif 322 323 #ifndef LIBGAV1_Dsp10bpp_TransformSize4x8_CflIntraPredictor 324 #define LIBGAV1_Dsp10bpp_TransformSize4x8_CflIntraPredictor LIBGAV1_CPU_SSE4_1 325 #endif 326 327 #ifndef LIBGAV1_Dsp10bpp_TransformSize4x16_CflIntraPredictor 328 #define LIBGAV1_Dsp10bpp_TransformSize4x16_CflIntraPredictor LIBGAV1_CPU_SSE4_1 329 #endif 330 331 #ifndef LIBGAV1_Dsp10bpp_TransformSize8x4_CflIntraPredictor 332 #define LIBGAV1_Dsp10bpp_TransformSize8x4_CflIntraPredictor LIBGAV1_CPU_SSE4_1 333 #endif 334 335 #ifndef LIBGAV1_Dsp10bpp_TransformSize8x8_CflIntraPredictor 336 #define LIBGAV1_Dsp10bpp_TransformSize8x8_CflIntraPredictor LIBGAV1_CPU_SSE4_1 337 #endif 338 339 #ifndef LIBGAV1_Dsp10bpp_TransformSize8x16_CflIntraPredictor 340 #define LIBGAV1_Dsp10bpp_TransformSize8x16_CflIntraPredictor LIBGAV1_CPU_SSE4_1 341 #endif 342 343 #ifndef LIBGAV1_Dsp10bpp_TransformSize8x32_CflIntraPredictor 344 #define LIBGAV1_Dsp10bpp_TransformSize8x32_CflIntraPredictor LIBGAV1_CPU_SSE4_1 345 #endif 346 347 #ifndef LIBGAV1_Dsp10bpp_TransformSize16x4_CflIntraPredictor 348 #define LIBGAV1_Dsp10bpp_TransformSize16x4_CflIntraPredictor LIBGAV1_CPU_SSE4_1 349 #endif 350 351 #ifndef LIBGAV1_Dsp10bpp_TransformSize16x8_CflIntraPredictor 352 #define LIBGAV1_Dsp10bpp_TransformSize16x8_CflIntraPredictor LIBGAV1_CPU_SSE4_1 353 #endif 354 355 #ifndef LIBGAV1_Dsp10bpp_TransformSize16x16_CflIntraPredictor 356 #define LIBGAV1_Dsp10bpp_TransformSize16x16_CflIntraPredictor LIBGAV1_CPU_SSE4_1 357 #endif 358 359 #ifndef LIBGAV1_Dsp10bpp_TransformSize16x32_CflIntraPredictor 360 #define LIBGAV1_Dsp10bpp_TransformSize16x32_CflIntraPredictor LIBGAV1_CPU_SSE4_1 361 #endif 362 363 #ifndef LIBGAV1_Dsp10bpp_TransformSize32x8_CflIntraPredictor 364 #define LIBGAV1_Dsp10bpp_TransformSize32x8_CflIntraPredictor LIBGAV1_CPU_SSE4_1 365 #endif 366 367 #ifndef LIBGAV1_Dsp10bpp_TransformSize32x16_CflIntraPredictor 368 #define LIBGAV1_Dsp10bpp_TransformSize32x16_CflIntraPredictor LIBGAV1_CPU_SSE4_1 369 #endif 370 371 #ifndef LIBGAV1_Dsp10bpp_TransformSize32x32_CflIntraPredictor 372 #define LIBGAV1_Dsp10bpp_TransformSize32x32_CflIntraPredictor LIBGAV1_CPU_SSE4_1 373 #endif 374 #endif // LIBGAV1_TARGETING_SSE4_1 375 376 #endif // LIBGAV1_SRC_DSP_X86_INTRAPRED_CFL_SSE4_H_ 377