1 /*******************************************************************************
2 * Copyright 2001-2018 Intel Corporation
3 * All Rights Reserved.
4 *
5 * If this software was obtained under the Intel Simplified Software License,
6 * the following terms apply:
7 *
8 * The source code, information and material ("Material") contained herein is
9 * owned by Intel Corporation or its suppliers or licensors, and title to such
10 * Material remains with Intel Corporation or its suppliers or licensors. The
11 * Material contains proprietary information of Intel or its suppliers and
12 * licensors. The Material is protected by worldwide copyright laws and treaty
13 * provisions. No part of the Material may be used, copied, reproduced,
14 * modified, published, uploaded, posted, transmitted, distributed or disclosed
15 * in any way without Intel's prior express written permission. No license under
16 * any patent, copyright or other intellectual property rights in the Material
17 * is granted to or conferred upon you, either expressly, by implication,
18 * inducement, estoppel or otherwise. Any license under such intellectual
19 * property rights must be express and approved by Intel in writing.
20 *
21 * Unless otherwise agreed by Intel in writing, you may not remove or alter this
22 * notice or any other notice embedded in Materials by Intel or Intel's
23 * suppliers or licensors in any way.
24 *
25 *
26 * If this software was obtained under the Apache License, Version 2.0 (the
27 * "License"), the following terms apply:
28 *
29 * You may not use this file except in compliance with the License. You may
30 * obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0
31 *
32 *
33 * Unless required by applicable law or agreed to in writing, software
34 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
35 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
36 *
37 * See the License for the specific language governing permissions and
38 * limitations under the License.
39 *******************************************************************************/
40
41 #if defined( _OPENMP )
42 #include <omp.h>
43 #endif
44
45 #include "owndefs.h"
46 #include "ippcpdefs.h"
47 #include "ippcp.h"
48 #ifdef _PCS
49 #undef _PCS
50 #define _MY_PCS_DISABLED
51 #endif
52 #include "dispatcher.h"
53 #ifdef _MY_PCS_DISABLED
54 #define _PCS
55 #endif
56 #if defined( _IPP_DATA )
57
58 static Ipp64u cpFeatures = 0;
59 static Ipp64u cpFeaturesMask = 0;
60
61 static int cpGetFeatures( Ipp64u* pFeaturesMask );
62 extern void IPP_CDECL cpGetReg( int* buf, int valEAX, int valECX );
63 extern int IPP_CDECL cp_is_avx_extension();
64 extern int IPP_CDECL cp_is_avx512_extension();
65 IppStatus owncpSetCpuFeaturesAndIdx( Ipp64u cpuFeatures, int* index );
66
67 IPPFUN( Ipp64u, ippcpGetEnabledCpuFeatures, ( void ))
68 {
69 return cpFeaturesMask;
70 }
71
72 /*===================================================================*/
73 IPPFUN( IppStatus, ippcpGetCpuFeatures, ( Ipp64u* pFeaturesMask ))
74 {
IPP_BAD_PTR1_RET(pFeaturesMask)75 IPP_BAD_PTR1_RET( pFeaturesMask )
76 {
77 if( 0 != cpFeatures){
78 *pFeaturesMask = cpFeatures;// & cpFeaturesMask;
79 } else {
80 int ret = cpGetFeatures( pFeaturesMask );
81 if( !ret ) return ippStsNotSupportedCpu;
82 }
83 return ippStsNoErr;
84 }
85 }
86
87 /*===================================================================*/
88
cpGetFeature(Ipp64u Feature)89 int cpGetFeature( Ipp64u Feature )
90 {
91 if(( cpFeaturesMask & Feature ) == Feature ){
92 return 1;
93 } else {
94 return 0;
95 }
96 }
97
k0_cpGetFeature(Ipp64u Feature)98 int k0_cpGetFeature( Ipp64u Feature ){
99 if(( cpFeaturesMask & Feature ) == Feature ) return 1;
100 else return 0; }
n0_cpGetFeature(Ipp64u Feature)101 int n0_cpGetFeature( Ipp64u Feature ){
102 if(( cpFeaturesMask & Feature ) == Feature ) return 1;
103 else return 0; }
l9_cpGetFeature(Ipp64u Feature)104 int l9_cpGetFeature( Ipp64u Feature ){
105 if(( cpFeaturesMask & Feature ) == Feature ) return 1;
106 else return 0; }
e9_cpGetFeature(Ipp64u Feature)107 int e9_cpGetFeature( Ipp64u Feature ){
108 if(( cpFeaturesMask & Feature ) == Feature ) return 1;
109 else return 0; }
y8_cpGetFeature(Ipp64u Feature)110 int y8_cpGetFeature( Ipp64u Feature ){
111 if(( cpFeaturesMask & Feature ) == Feature ) return 1;
112 else return 0; }
113
h9_cpGetFeature(Ipp64u Feature)114 int h9_cpGetFeature( Ipp64u Feature ){
115 if(( cpFeaturesMask & Feature ) == Feature ) return 1;
116 else return 0; }
g9_cpGetFeature(Ipp64u Feature)117 int g9_cpGetFeature( Ipp64u Feature ){
118 if(( cpFeaturesMask & Feature ) == Feature ) return 1;
119 else return 0; }
p8_cpGetFeature(Ipp64u Feature)120 int p8_cpGetFeature( Ipp64u Feature ){
121 if(( cpFeaturesMask & Feature ) == Feature ) return 1;
122 else return 0; }
123
124 /*===================================================================*/
125 #define BIT00 0x00000001
126 #define BIT01 0x00000002
127 #define BIT02 0x00000004
128 #define BIT03 0x00000008
129 #define BIT04 0x00000010
130 #define BIT05 0x00000020
131 #define BIT06 0x00000040
132 #define BIT07 0x00000080
133 #define BIT08 0x00000100
134 #define BIT09 0x00000200
135 #define BIT10 0x00000400
136 #define BIT11 0x00000800
137 #define BIT12 0x00001000
138 #define BIT13 0x00002000
139 #define BIT14 0x00004000
140 #define BIT15 0x00008000
141 #define BIT16 0x00010000
142 #define BIT17 0x00020000
143 #define BIT18 0x00040000
144 #define BIT19 0x00080000
145 #define BIT20 0x00100000
146 #define BIT21 0x00200000
147 #define BIT22 0x00400000
148 #define BIT23 0x00800000
149 #define BIT24 0x01000000
150 #define BIT25 0x02000000
151 #define BIT26 0x04000000
152 #define BIT27 0x08000000
153 #define BIT28 0x10000000
154 #define BIT29 0x20000000
155 #define BIT30 0x40000000
156 #define BIT31 0x80000000
157
158
cpGetFeatures(Ipp64u * pFeaturesMask)159 static int cpGetFeatures( Ipp64u* pFeaturesMask )
160 {
161 Ipp32u buf[4];
162 Ipp32u eax_, ebx_, ecx_, edx_, tmp;
163 Ipp64u mask;
164 int flgFMA=0, flgINT=0, flgGPR=0; // for avx2
165 Ipp32u idBaseMax, idExtdMax;
166
167 cpGetReg((int*)buf, 0, 0); //get max value for basic info.
168 idBaseMax = buf[0];
169 cpGetReg((int*)buf, 0x80000000, 0); //get max value for extended info.
170 idExtdMax = buf[0];
171
172 cpGetReg( (int*)buf, 1, 0 );
173 eax_ = (Ipp32u)buf[0];
174 ecx_ = (Ipp32u)buf[2];
175 edx_ = (Ipp32u)buf[3];
176 mask = 0;
177 if( edx_ & BIT23 ) mask |= ippCPUID_MMX; // edx[23] - MMX(TM) Technology
178 if( edx_ & BIT25 ) mask |= ippCPUID_SSE; // edx[25] - Intel(R) Streaming SIMD Extensions (Intel(R) SSE)
179 if( edx_ & BIT26 ) mask |= ippCPUID_SSE2; // edx[26] - Intel(R) Streaming SIMD Extensions 2 (Intel(R) SSE2)
180 if( ecx_ & BIT00 ) mask |= ippCPUID_SSE3; // ecx[0] - Intel(R) Streaming SIMD Extensions 3 (Intel(R) SSE3) (formerly codenamed Prescott)
181 if( ecx_ & BIT09 ) mask |= ippCPUID_SSSE3; // ecx[9] - Supplemental Streaming SIMD Extensions 3 (SSSE3) (formerly codenamed Merom)
182 if( ecx_ & BIT22 ) mask |= ippCPUID_MOVBE; // ecx[22] - Intel(R) instruction MOVBE (Intel Atom(R) processor)
183 if( ecx_ & BIT19 ) mask |= ippCPUID_SSE41; // ecx[19] - Intel(R) Streaming SIMD Extensions 4.1 (Intel(R) SSE4.1) (formerly codenamed Penryn)
184 if( ecx_ & BIT20 ) mask |= ippCPUID_SSE42; // ecx[20] - Intel(R) Streaming SIMD Extensions 4.2 (Intel(R) SSE4.2) (formerly codenamed Nenalem)
185 if( ecx_ & BIT28 ) mask |= ippCPUID_AVX; // ecx[28] - Intel(R) Advanced Vector Extensions (Intel(R) AVX) (formerly codenamed Sandy Bridge)
186 if(( ecx_ & 0x18000000 ) == 0x18000000 ){
187 tmp = (Ipp32u)cp_is_avx_extension();
188 if( tmp & BIT00 ) mask |= ippAVX_ENABLEDBYOS; // Intel(R) AVX is supported by OS
189 }
190 if( ecx_ & BIT25 ) mask |= ippCPUID_AES; // ecx[25] - Intel(R) AES New Instructions
191 if( ecx_ & BIT01 ) mask |= ippCPUID_CLMUL; // ecx[1] - Intel(R) instruction PCLMULQDQ
192 if( ecx_ & BIT30 ) mask |= ippCPUID_RDRAND; // ecx[30] - Intel(R) instruction RDRRAND
193 if( ecx_ & BIT29 ) mask |= ippCPUID_F16C; // ecx[29] - Intel(R) instruction F16C
194 // Intel(R) AVX2 instructions extention: only if 3 features are enabled at once:
195 // FMA, Intel(R) AVX 256 int & GPR BMI (bit-manipulation);
196 if( ecx_ & BIT12 ) flgFMA = 1; else flgFMA = 0; // ecx[12] - FMA 128 & 256 bit
197 if( idBaseMax >= 7 ){ // get CPUID.eax = 7
198 cpGetReg( (int*)buf, 0x7, 0 );
199 ebx_ = (Ipp32u)buf[1];
200 ecx_ = (Ipp32u)buf[2];
201 edx_ = (Ipp32u)buf[3];
202 if( ebx_ & BIT05 ) flgINT = 1;
203 else flgINT = 0; //ebx[5], Intel(R) Advanced Vector Extensions 2 (Intel(R) AVX2) (int 256bits)
204 // ebx[3] - enabled ANDN, BEXTR, BLSI, BLSMK, BLSR, TZCNT
205 // ebx[8] - enabled BZHI, MULX, PDEP, PEXT, RORX, SARX, SHLX, SHRX
206 if(( ebx_ & BIT03 )&&( ebx_ & BIT08 )) flgGPR = 1;
207 else flgGPR = 0; // VEX-encoded GPR instructions (GPR BMI)
208 // Intel(R) architecture formerly codenamed Broadwell instructions extention
209 if( ebx_ & BIT19 ) mask |= ippCPUID_ADCOX; // eax[0x7] -->> ebx:: Bit 19: Intel(R) instructions ADOX/ADCX
210 if( ebx_ & BIT18 ) mask |= ippCPUID_RDSEED; // eax[0x7] -->> ebx:: Bit 18: Intel(R) instruction RDSEED
211 if( ebx_ & BIT29 ) mask |= ippCPUID_SHA; // eax[0x7] -->> ebx:: Bit 29: Intel(R) Secure Hash Algorithm Extensions
212 // Intel(R) Advanced Vector Extensions 512 (Intel(R) AVX-512) extention
213 if(cp_is_avx512_extension()){
214 mask |= ippAVX512_ENABLEDBYOS; // Intel(R) AVX-512 is supported by OS
215 }
216 if( ebx_ & BIT16 ) mask |= ippCPUID_AVX512F; // ebx[16] - Intel(R) AVX-512 Foundation
217 if( ebx_ & BIT26 ) mask |= ippCPUID_AVX512PF; // ebx[26] - Intel(R) AVX-512 Prefetch instructions
218 if( ebx_ & BIT27 ) mask |= ippCPUID_AVX512ER; // ebx[27] - Intel(R) AVX-512 Exponential and Reciprocal instructions
219 if( ebx_ & BIT28 ) mask |= ippCPUID_AVX512CD; // ebx[28] - Intel(R) AVX-512 Conflict Detection
220 if( ebx_ & BIT17 ) mask |= ippCPUID_AVX512DQ; // ebx[17] - Intel(R) AVX-512 Dword & Quadword
221 if( ebx_ & BIT30 ) mask |= ippCPUID_AVX512BW; // ebx[30] - Intel(R) AVX-512 Byte & Word
222 if( ebx_ & BIT31 ) mask |= ippCPUID_AVX512VL; // ebx[31] - Intel(R) AVX-512 Vector Length extensions
223 if( ecx_ & BIT01 ) mask |= ippCPUID_AVX512VBMI; // ecx[01] - Intel(R) AVX-512 Vector Byte Manipulation Instructions
224 if( edx_ & BIT02 ) mask |= ippCPUID_AVX512_4VNNIW; // edx[02] - Intel(R) AVX-512 Vector instructions for deep learning enhanced word variable precision
225 if( edx_ & BIT03 ) mask |= ippCPUID_AVX512_4FMADDPS; // edx[03] - Intel(R) AVX-512 Vector instructions for deep learning floating-point single precision
226 // bitwise OR between ippCPUID_MPX & ippCPUID_AVX flags can be used to define that arch is GE than formerly codenamed Skylake
227 if( ebx_ & BIT14 ) mask |= ippCPUID_MPX; // ebx[14] - Intel(R) Memory Protection Extensions (Intel(R) MPX)
228 if( ebx_ & BIT21 ) mask |= ippCPUID_AVX512IFMA; // ebx[21] - Intel(R) AVX-512 IFMA PMADD52
229 }
230 mask = ( flgFMA && flgINT && flgGPR ) ? (mask | ippCPUID_AVX2) : mask; // to separate Intel(R) AVX2 flags here
231
232 if( idExtdMax >= 0x80000001 ){ // get CPUID.eax=0x80000001
233 cpGetReg( (int*)buf, 0x80000001, 0 );
234 ecx_ = (Ipp32u)buf[2];
235 // Intel(R) architecture formerly codenamed Broadwell instructions extention
236 if( ecx_ & BIT08 ) mask |= ippCPUID_PREFETCHW; // eax[0x80000001] -->> ecx:: Bit 8: Intel(R) instruction PREFETCHW
237 }
238 // Intel(R) architecture formerly codenamed Knights Corner
239 if(((( eax_ << 20 ) >> 24 ) ^ 0xb1 ) == 0 ){
240 mask = mask | ippCPUID_KNC;
241 }
242 cpFeatures = mask;
243 cpFeaturesMask = mask; /* all CPU features are enabled by default */
244 *pFeaturesMask = cpFeatures;
245 return 1; /* if somebody need to check for cpuid support - do it at the top of function and return 0 if it's not supported */
246 }
247
248 int ippcpJumpIndexForMergedLibs = -1;
249 static int cpthreads_omp_of_n_ipp = 1;
250
251 IPPFUN( int, ippcpGetEnabledNumThreads,( void ))
252 {
253 return cpthreads_omp_of_n_ipp;
254 }
255
256
257 #define AVX3X_FEATURES ( ippCPUID_AVX512F|ippCPUID_AVX512CD|ippCPUID_AVX512VL|ippCPUID_AVX512BW|ippCPUID_AVX512DQ )
258 #define AVX3M_FEATURES ( ippCPUID_AVX512F|ippCPUID_AVX512CD|ippCPUID_AVX512PF|ippCPUID_AVX512ER )
259 // AVX3X_FEATURES means Intel(R) Xeon(R) processor
260 // AVX3M_FEATURES means Intel(R) Many Integrated Core Architecture
261
262
owncpFeaturesToIdx(Ipp64u * cpuFeatures,int * index)263 IppStatus owncpFeaturesToIdx( Ipp64u* cpuFeatures, int* index )
264 {
265 IppStatus ownStatus = ippStsNoErr;
266 Ipp64u mask = 0;
267
268 *index = 0;
269
270 if(( AVX3X_FEATURES == ( *cpuFeatures & AVX3X_FEATURES ))&&
271 ( ippAVX512_ENABLEDBYOS & cpFeatures )){ /* Intel(R) architecture formerlySkylake ia32=S0, x64=K0 */
272 mask = AVX3X_MSK;
273 *index = LIB_AVX3X;
274 } else
275 if(( AVX3M_FEATURES == ( *cpuFeatures & AVX3M_FEATURES ))&&
276 ( ippAVX512_ENABLEDBYOS & cpFeatures )){ /* Intel(R) architecture formerly codenamed Knights Landing ia32=i0, x64=N0 */
277 mask = AVX3M_MSK;
278 *index = LIB_AVX3M;
279 } else
280 if(( ippCPUID_AVX2 == ( *cpuFeatures & ippCPUID_AVX2 ))&&
281 ( ippAVX_ENABLEDBYOS & cpFeatures )){ /* Intel(R) architecture formerly codenamed Haswell ia32=H9, x64=L9 */
282 mask = AVX2_MSK;
283 *index = LIB_AVX2;
284 } else
285 if(( ippCPUID_AVX == ( *cpuFeatures & ippCPUID_AVX ))&&
286 ( ippAVX_ENABLEDBYOS & cpFeatures )){ /* Intel(R) architecture formerly codenamed Sandy Bridge ia32=G9, x64=E9 */
287 mask = AVX_MSK;
288 *index = LIB_AVX;
289 } else
290 if( ippCPUID_SSE42 == ( *cpuFeatures & ippCPUID_SSE42 )){ /* Intel(R) architecture formerly codenamed Nehalem or Intel(R) architecture formerly codenamed Westmer = Intel(R) architecture formerly codenamed Penryn + Intel(R) SSE4.2 + ?Intel(R) instruction PCLMULQDQ + ?(Intel(R) AES New Instructions) + ?(Intel(R) Secure Hash Algorithm Extensions) */
291 mask = SSE42_MSK; /* or new Intel Atom(R) processor formerly codenamed Silvermont */
292 *index = LIB_SSE42;
293 } else
294 if( ippCPUID_SSE41 == ( *cpuFeatures & ippCPUID_SSE41 )){ /* Intel(R) architecture formerly codenamed Penryn ia32=P8, x64=Y8 */
295 mask = SSE41_MSK;
296 *index = LIB_SSE41;
297 } else
298 if( ippCPUID_MOVBE == ( *cpuFeatures & ippCPUID_MOVBE )) { /* Intel Atom(R) processor formerly codenamed Silverthorne ia32=S8, x64=N8 */
299 mask = ATOM_MSK;
300 *index = LIB_ATOM;
301 } else
302 if( ippCPUID_SSSE3 == ( *cpuFeatures & ippCPUID_SSSE3 )) { /* Intel(R) architecture formerly codenamed Merom ia32=V8, x64=U8 (letters etymology is unknown) */
303 mask = SSSE3_MSK;
304 *index = LIB_SSSE3;
305 } else
306 if( ippCPUID_SSE3 == ( *cpuFeatures & ippCPUID_SSE3 )) { /* Intel(R) architecture formerly codenamed Prescott ia32=W7, x64=M7 */
307 mask = SSE3_MSK;
308 *index = LIB_SSE3;
309 } else
310 if( ippCPUID_SSE2 == ( *cpuFeatures & ippCPUID_SSE2 )) { /* Intel(R) architecture formerly codenamed Willamette ia32=W7, x64=PX */
311 mask = SSE2_MSK;
312 *index = LIB_SSE2;
313 } else
314 if( ippCPUID_SSE == ( *cpuFeatures & ippCPUID_SSE )) { /* Intel(R) Pentium(R) processor III ia32=PX only */
315 mask = SSE_MSK;
316 *index = LIB_SSE;
317 #if (defined( _WIN32E ) || defined( linux32e ) || defined( OSXEM64T )) && !(defined( _ARCH_LRB2 ))
318 ownStatus = ippStsNotSupportedCpu; /* the lowest CPU supported by Intel(R) Integrated Performance Primitives (Intel(R) IPP) must at least support Intel(R) SSE2 for x64 */
319 #endif
320 } else
321 if( ippCPUID_MMX >= ( *cpuFeatures & ippCPUID_MMX )) { /* not supported, PX dispatched */
322 mask = MMX_MSK;
323 *index = LIB_MMX;
324 ownStatus = ippStsNotSupportedCpu; /* the lowest CPU supported by Intel(R) IPP must at least support Intel(R) SSE for ia32 or Intel(R) SSE2 for x64 */
325 }
326 #if defined ( _IPP_QUARK)
327 else {
328 mask = PX_MSK;
329 *index = LIB_PX;
330 ownStatus = ippStsNoErr; /* the lowest CPU supported by Intel(R) IPP must at least support Intel(R) SSE for ia32 or Intel(R) SSE2 for x64 */
331 }
332 #endif
333
334 if(( mask != ( *cpuFeatures & mask ))&&( ownStatus == ippStsNoErr ))
335 ownStatus = ippStsFeaturesCombination; /* warning if combination of features is incomplete */
336 *cpuFeatures |= mask;
337 return ownStatus;
338 }
339
340 #ifdef _PCS
341
342 extern IppStatus (IPP_STDCALL *pcpSetCpuFeatures)( Ipp64u cpuFeatures );
343 extern IppStatus (IPP_STDCALL *pcpSetNumThreads)( int numThr );
344 extern IppStatus (IPP_STDCALL *pcpGetNumThreads)( int* pNumThr );
345
346 IPPFUN( IppStatus, ippcpSetNumThreads, ( int numThr ))
347 {
348 IppStatus status = ippStsNoErr;
349
350 if (pcpSetNumThreads != 0)
351 {
352 status = pcpSetNumThreads(numThr);
353 if (status == ippStsNoErr)
354 {
355 cpthreads_omp_of_n_ipp = numThr;
356 }
357 }
358 return status;
359 }
360
361 IPPFUN( IppStatus, ippcpGetNumThreads, (int* pNumThr) )
362 {
363 IppStatus status = ippStsNoErr;
364
365 IPP_BAD_PTR1_RET( pNumThr )
366
367 if (pcpGetNumThreads != 0)
368 {
369 status = pcpGetNumThreads(pNumThr);
370 }
371 return status;
372 }
373 #else
374
375
376 IPPFUN( IppStatus, ippcpSetNumThreads, ( int numThr ))
377 {
378 IppStatus status = ippStsNoErr;
379 #if defined( _OPENMP )
380 IPP_BAD_SIZE_RET( numThr )
381 cpthreads_omp_of_n_ipp = numThr;
382 status = ippStsNoErr;
383 #else
384 UNREFERENCED_PARAMETER(numThr);
385 status = ippStsNoOperation;
386 #endif
387 return status;
388 }
389
390 IPPFUN( IppStatus, ippcpGetNumThreads, (int* pNumThr) )
391 {
392 IppStatus status = ippStsNoErr;
393 IPP_BAD_PTR1_RET( pNumThr )
394
395 #if defined( _OPENMP )
396 *pNumThr = cpthreads_omp_of_n_ipp;
397 status = ippStsNoErr;
398 #else
399 *pNumThr = 1;
400 status = ippStsNoOperation;
401 #endif
402 return status;
403 }
404
405 #endif /* #ifdef _PCS */
406
407 #ifdef _IPP_DYNAMIC
408
409 typedef IppStatus (IPP_STDCALL *DYN_RELOAD)( int );
410 static DYN_RELOAD IppDispatcher; /* ippCP only */
411 static int currentCpu = -1; /* control for disabling the same DLL re-loading */
412
owncpRegisterLib(DYN_RELOAD reload)413 void owncpRegisterLib( DYN_RELOAD reload )
414 {
415 pcpSetCpuFeatures = 0;
416 pcpSetNumThreads = 0;
417 pcpGetNumThreads = 0;
418
419 IppDispatcher = reload; /* function DynReload() that is defined in ippmain.gen - */
420 return; /* therefore in each domain there is own DynReload() function */
421 }
422
owncpUnregisterLib(void)423 void owncpUnregisterLib( void )
424 {
425 IppDispatcher = 0;
426 currentCpu = -1;
427
428 pcpSetCpuFeatures = 0;
429 pcpSetNumThreads = 0;
430 pcpGetNumThreads = 0;
431
432 return;
433 }
434
435 IPPFUN( IppStatus, ippcpSetCpuFeatures,( Ipp64u cpuFeatures ))
436 {
437 IppStatus status, ownStatus;
438 int index = 0;
439
440 ownStatus = owncpSetCpuFeaturesAndIdx( cpuFeatures, &index );
441 if(( IppDispatcher )&&( currentCpu != index )) {
442 status = IppDispatcher( index );
443 currentCpu = index;
444 } else
445 status = ippStsNoErr;
446
447 #ifdef _PCS
448 if (pcpSetCpuFeatures != 0 && status >= ippStsNoErr)
449 {
450 /* Pass down features to Waterfall dll */
451 status = pcpSetCpuFeatures(cpuFeatures);
452 }
453 if (pcpSetNumThreads != 0 && status >= ippStsNoErr)
454 {
455 /* Pass down features to Waterfall dll */
456 status = pcpSetNumThreads(cpthreads_omp_of_n_ipp);
457 }
458 #endif
459
460 if( status != ippStsNoErr && status != ippStsNoOperation)
461 return status;
462 else
463 return ownStatus;
464 }
465
466 IPPFUN( IppStatus, ippcpInit,( void ))
467 {
468 int index = 0;
469 IppStatus status, statusf, statusi;
470 Ipp64u cpuFeatures;
471
472 statusf = ippcpGetCpuFeatures( &cpuFeatures );
473 statusi = owncpSetCpuFeaturesAndIdx( cpuFeatures, &index ); /* ownSetFeatures instead of ippSetFeatures because need unconditional initialization, */
474 if( IppDispatcher ) status = IppDispatcher( index ); /* call DynReload() function for each domain */
475 else status = ippStsNoErr;
476 currentCpu = index;
477 if( ippStsNoErr != statusf ) return statusf;
478 if( ippStsNoErr != statusi ) return statusi;
479 if( ippStsNoErr != status ) return status;
480 return ippStsNoErr;
481 }
482
483
484 #else /* _IPP_DYNAMIC */
485
486 IPPFUN( IppStatus, ippcpInit,( void ))
487 {
488 Ipp64u cpuFeatures;
489
490 #if defined( _OPENMP )
491 ippcpSetNumThreads( IPP_MIN( omp_get_num_procs(), omp_get_max_threads()));
492 #endif
493 ippcpGetCpuFeatures( &cpuFeatures );
494 return ippcpSetCpuFeatures( cpuFeatures );
495 }
496
497
498 IPPFUN( IppStatus, ippcpSetCpuFeatures,( Ipp64u cpuFeatures ))
499 {
500 IppStatus ownStatus;
501 int index = 0;
502
503 #if defined( _OPENMP )
504 ippcpSetNumThreads( IPP_MIN( omp_get_num_procs(), omp_get_max_threads()));
505 #endif
506 ownStatus = owncpSetCpuFeaturesAndIdx( cpuFeatures, &index );
507 ippcpJumpIndexForMergedLibs = index;
508 cpFeaturesMask = cpuFeatures;
509 return ownStatus;
510 }
511
512 #endif
513
owncpSetCpuFeaturesAndIdx(Ipp64u cpuFeatures,int * index)514 IppStatus owncpSetCpuFeaturesAndIdx( Ipp64u cpuFeatures, int* index )
515 {
516 Ipp64u tmp;
517 IppStatus tmpStatus;
518 *index = 0;
519
520 if( ippCPUID_NOCHECK & cpuFeatures ){
521 // if NOCHECK is set - static variable cpFeatures is initialized unconditionally and real CPU features from CPUID are ignored;
522 // the one who uses this method of initialization must understand what and why it does and the possible unpredictable consequences.
523 // the only one known purpose for this approach - environments where CPUID instruction is disabled (for example Intel(R) Software Guard Extensions).
524 cpuFeatures &= ( IPP_MAX_64U ^ ippCPUID_NOCHECK );
525 cpFeatures = cpuFeatures;
526 } else
527 // if( 0 == cpFeatures ) //do cpFeatures restore unconditionally - to protect from possible previous NOCHECK
528 {
529 // if library has not been initialized yet
530 cpGetFeatures( &tmp );
531 }
532 tmpStatus = owncpFeaturesToIdx( &cpuFeatures, index );
533 cpFeaturesMask = cpuFeatures;
534
535 return tmpStatus;
536 }
537
538 static struct {
539 int sts;
540 const char *msg;
541 } ippcpMsg[] = {
542 /* ippStatus */
543 /* -9999 */ ippStsCpuNotSupportedErr, "ippStsCpuNotSupportedErr: The target CPU is not supported",
544 /* -9702 */ MSG_NO_SHARED, "No shared libraries were found in the Waterfall procedure",
545 /* -9701 */ MSG_NO_DLL, "No DLLs were found in the Waterfall procedure",
546 /* -9700 */ MSG_LOAD_DLL_ERR, "Error at loading of %s library",
547 /* -1016 */ ippStsQuadraticNonResidueErr, "ippStsQuadraticNonResidueErr: SQRT operation on quadratic non-residue value",
548 /* -1015 */ ippStsPointAtInfinity, "ippStsPointAtInfinity: Point at infinity is detected",
549 /* -1014 */ ippStsOFBSizeErr, "ippStsOFBSizeErr: Incorrect value for crypto OFB block size",
550 /* -1013 */ ippStsIncompleteContextErr, "ippStsIncompleteContextErr: Crypto: set up of context is not complete",
551 /* -1012 */ ippStsCTRSizeErr, "ippStsCTRSizeErr: Incorrect value for crypto CTR block size",
552 /* -1011 */ ippStsEphemeralKeyErr, "ippStsEphemeralKeyErr: ECC: Invalid ephemeral key",
553 /* -1010 */ ippStsMessageErr, "ippStsMessageErr: ECC: Invalid message digest",
554 /* -1009 */ ippStsShareKeyErr, "ippStsShareKeyErr: ECC: Invalid share key",
555 /* -1008 */ ippStsIvalidPrivateKey, "ippStsIvalidPrivateKey ECC: Invalid private key",
556 /* -1007 */ ippStsOutOfECErr, "ippStsOutOfECErr: ECC: Point out of EC",
557 /* -1006 */ ippStsECCInvalidFlagErr, "ippStsECCInvalidFlagErr: ECC: Invalid Flag",
558 /* -1005 */ ippStsUnderRunErr, "ippStsUnderRunErr: Error in data under run",
559 /* -1004 */ ippStsPaddingErr, "ippStsPaddingErr: Detected padding error indicates the possible data corruption",
560 /* -1003 */ ippStsCFBSizeErr, "ippStsCFBSizeErr: Incorrect value for crypto CFB block size",
561 /* -1002 */ ippStsPaddingSchemeErr, "ippStsPaddingSchemeErr: Invalid padding scheme",
562 /* -1001 */ ippStsBadModulusErr, "ippStsBadModulusErr: Bad modulus caused a failure in module inversion",
563 /* -216 */ ippStsUnknownStatusCodeErr, "ippStsUnknownStatusCodeErr: Unknown status code",
564 /* -221 */ ippStsLoadDynErr, "ippStsLoadDynErr: Error when loading the dynamic library",
565 /* -15 */ ippStsLengthErr, "ippStsLengthErr: Incorrect value for string length",
566 /* -14 */ ippStsNotSupportedModeErr, "ippStsNotSupportedModeErr: The requested mode is currently not supported",
567 /* -13 */ ippStsContextMatchErr, "ippStsContextMatchErr: Context parameter does not match the operation",
568 /* -12 */ ippStsScaleRangeErr, "ippStsScaleRangeErr: Scale bounds are out of range",
569 /* -11 */ ippStsOutOfRangeErr, "ippStsOutOfRangeErr: Argument is out of range, or point is outside the image",
570 /* -10 */ ippStsDivByZeroErr, "ippStsDivByZeroErr: An attempt to divide by zero",
571 /* -9 */ ippStsMemAllocErr, "ippStsMemAllocErr: Memory allocated for the operation is not enough",
572 /* -8 */ ippStsNullPtrErr, "ippStsNullPtrErr: Null pointer error",
573 /* -7 */ ippStsRangeErr, "ippStsRangeErr: Incorrect values for bounds: the lower bound is greater than the upper bound",
574 /* -6 */ ippStsSizeErr, "ippStsSizeErr: Incorrect value for data size",
575 /* -5 */ ippStsBadArgErr, "ippStsBadArgErr: Incorrect arg/param of the function",
576 /* -4 */ ippStsNoMemErr, "ippStsNoMemErr: Not enough memory for the operation",
577 /* -2 */ ippStsErr, "ippStsErr: Unknown/unspecified error, -2",
578 /* 0 */ ippStsNoErr, "ippStsNoErr: No errors",
579 /* 1 */ ippStsNoOperation, "ippStsNoOperation: No operation has been executed",
580 /* 2 */ ippStsDivByZero, "ippStsDivByZero: Zero value(s) for the divisor in the Div function",
581 /* 25 */ ippStsInsufficientEntropy, "ippStsInsufficientEntropy: Generation of the prime/key failed due to insufficient entropy in the random seed and stimulus bit string",
582 /* 36 */ ippStsNotSupportedCpu, "The CPU is not supported",
583 /* 36 */ ippStsFeaturesCombination, "Wrong combination of features",
584 };
585
586 /* /////////////////////////////////////////////////////////////////////////////
587 // Name: ippcpGetStatusString
588 // Purpose: transformation of a code of a status Intel(R) IPP to string
589 // Returns:
590 // Parameters:
591 // StsCode Intel(R) IPP status code
592 //
593 // Notes: not necessary to release the returned string
594 */
595 IPPFUN( const char*, ippcpGetStatusString, ( IppStatus StsCode ) )
596 {
597 unsigned int i;
598 for( i=0; i<IPP_COUNT_OF( ippcpMsg ); i++ ) {
599 if( StsCode == ippcpMsg[i].sts ) {
600 return ippcpMsg[i].msg;
601 }
602 }
603 return ippcpGetStatusString( ippStsUnknownStatusCodeErr );
604 }
605
606 extern Ipp64u IPP_CDECL cp_get_pentium_counter (void);
607
608 /* /////////////////////////////////////////////////////////////////////////////
609 // Name: ippcpGetCpuClocks
610 // Purpose: time stamp counter (TSC) register reading
611 // Returns: TSC value
612 //
613 // Note: An hardware exception is possible if TSC reading is not supported by
614 // the current chipset
615 */
616 IPPFUN( Ipp64u, ippcpGetCpuClocks, (void) )
617 {
618 return (Ipp64u)cp_get_pentium_counter();
619 }
620
621 #endif /* _IPP_DATA */
622