1 /**************************************************************************** 2 * 3 * ftcalc.h 4 * 5 * Arithmetic computations (specification). 6 * 7 * Copyright 1996-2018 by 8 * David Turner, Robert Wilhelm, and Werner Lemberg. 9 * 10 * This file is part of the FreeType project, and may only be used, 11 * modified, and distributed under the terms of the FreeType project 12 * license, LICENSE.TXT. By continuing to use, modify, or distribute 13 * this file you indicate that you have read the license and 14 * understand and accept it fully. 15 * 16 */ 17 18 19 #ifndef FTCALC_H_ 20 #define FTCALC_H_ 21 22 23 #include <ft2build.h> 24 #include FT_FREETYPE_H 25 26 27 FT_BEGIN_HEADER 28 29 30 /************************************************************************** 31 * 32 * FT_MulDiv() and FT_MulFix() are declared in freetype.h. 33 * 34 */ 35 36 #ifndef FT_CONFIG_OPTION_NO_ASSEMBLER 37 /* Provide assembler fragments for performance-critical functions. */ 38 /* These must be defined `static __inline__' with GCC. */ 39 40 #if defined( __CC_ARM ) || defined( __ARMCC__ ) /* RVCT */ 41 42 #define FT_MULFIX_ASSEMBLER FT_MulFix_arm 43 44 /* documentation is in freetype.h */ 45 46 static __inline FT_Int32 FT_MulFix_arm(FT_Int32 a,FT_Int32 b)47 FT_MulFix_arm( FT_Int32 a, 48 FT_Int32 b ) 49 { 50 FT_Int32 t, t2; 51 52 53 __asm 54 { 55 smull t2, t, b, a /* (lo=t2,hi=t) = a*b */ 56 mov a, t, asr #31 /* a = (hi >> 31) */ 57 add a, a, #0x8000 /* a += 0x8000 */ 58 adds t2, t2, a /* t2 += a */ 59 adc t, t, #0 /* t += carry */ 60 mov a, t2, lsr #16 /* a = t2 >> 16 */ 61 orr a, a, t, lsl #16 /* a |= t << 16 */ 62 } 63 return a; 64 } 65 66 #endif /* __CC_ARM || __ARMCC__ */ 67 68 69 #ifdef __GNUC__ 70 71 #if defined( __arm__ ) && \ 72 ( !defined( __thumb__ ) || defined( __thumb2__ ) ) && \ 73 !( defined( __CC_ARM ) || defined( __ARMCC__ ) ) 74 75 #define FT_MULFIX_ASSEMBLER FT_MulFix_arm 76 77 /* documentation is in freetype.h */ 78 79 static __inline__ FT_Int32 FT_MulFix_arm(FT_Int32 a,FT_Int32 b)80 FT_MulFix_arm( FT_Int32 a, 81 FT_Int32 b ) 82 { 83 FT_Int32 t, t2; 84 85 86 __asm__ __volatile__ ( 87 "smull %1, %2, %4, %3\n\t" /* (lo=%1,hi=%2) = a*b */ 88 "mov %0, %2, asr #31\n\t" /* %0 = (hi >> 31) */ 89 #if defined( __clang__ ) && defined( __thumb2__ ) 90 "add.w %0, %0, #0x8000\n\t" /* %0 += 0x8000 */ 91 #else 92 "add %0, %0, #0x8000\n\t" /* %0 += 0x8000 */ 93 #endif 94 "adds %1, %1, %0\n\t" /* %1 += %0 */ 95 "adc %2, %2, #0\n\t" /* %2 += carry */ 96 "mov %0, %1, lsr #16\n\t" /* %0 = %1 >> 16 */ 97 "orr %0, %0, %2, lsl #16\n\t" /* %0 |= %2 << 16 */ 98 : "=r"(a), "=&r"(t2), "=&r"(t) 99 : "r"(a), "r"(b) 100 : "cc" ); 101 return a; 102 } 103 104 #endif /* __arm__ && */ 105 /* ( __thumb2__ || !__thumb__ ) && */ 106 /* !( __CC_ARM || __ARMCC__ ) */ 107 108 109 #if defined( __i386__ ) 110 111 #define FT_MULFIX_ASSEMBLER FT_MulFix_i386 112 113 /* documentation is in freetype.h */ 114 115 static __inline__ FT_Int32 FT_MulFix_i386(FT_Int32 a,FT_Int32 b)116 FT_MulFix_i386( FT_Int32 a, 117 FT_Int32 b ) 118 { 119 FT_Int32 result; 120 121 122 __asm__ __volatile__ ( 123 "imul %%edx\n" 124 "movl %%edx, %%ecx\n" 125 "sarl $31, %%ecx\n" 126 "addl $0x8000, %%ecx\n" 127 "addl %%ecx, %%eax\n" 128 "adcl $0, %%edx\n" 129 "shrl $16, %%eax\n" 130 "shll $16, %%edx\n" 131 "addl %%edx, %%eax\n" 132 : "=a"(result), "=d"(b) 133 : "a"(a), "d"(b) 134 : "%ecx", "cc" ); 135 return result; 136 } 137 138 #endif /* i386 */ 139 140 #endif /* __GNUC__ */ 141 142 143 #ifdef _MSC_VER /* Visual C++ */ 144 145 #ifdef _M_IX86 146 147 #define FT_MULFIX_ASSEMBLER FT_MulFix_i386 148 149 /* documentation is in freetype.h */ 150 151 static __inline FT_Int32 FT_MulFix_i386(FT_Int32 a,FT_Int32 b)152 FT_MulFix_i386( FT_Int32 a, 153 FT_Int32 b ) 154 { 155 FT_Int32 result; 156 157 __asm 158 { 159 mov eax, a 160 mov edx, b 161 imul edx 162 mov ecx, edx 163 sar ecx, 31 164 add ecx, 8000h 165 add eax, ecx 166 adc edx, 0 167 shr eax, 16 168 shl edx, 16 169 add eax, edx 170 mov result, eax 171 } 172 return result; 173 } 174 175 #endif /* _M_IX86 */ 176 177 #endif /* _MSC_VER */ 178 179 180 #if defined( __GNUC__ ) && defined( __x86_64__ ) 181 182 #define FT_MULFIX_ASSEMBLER FT_MulFix_x86_64 183 184 static __inline__ FT_Int32 FT_MulFix_x86_64(FT_Int32 a,FT_Int32 b)185 FT_MulFix_x86_64( FT_Int32 a, 186 FT_Int32 b ) 187 { 188 /* Temporarily disable the warning that C90 doesn't support */ 189 /* `long long'. */ 190 #if __GNUC__ > 4 || ( __GNUC__ == 4 && __GNUC_MINOR__ >= 6 ) 191 #pragma GCC diagnostic push 192 #pragma GCC diagnostic ignored "-Wlong-long" 193 #endif 194 195 #if 1 196 /* Technically not an assembly fragment, but GCC does a really good */ 197 /* job at inlining it and generating good machine code for it. */ 198 long long ret, tmp; 199 200 201 ret = (long long)a * b; 202 tmp = ret >> 63; 203 ret += 0x8000 + tmp; 204 205 return (FT_Int32)( ret >> 16 ); 206 #else 207 208 /* For some reason, GCC 4.6 on Ubuntu 12.04 generates invalid machine */ 209 /* code from the lines below. The main issue is that `wide_a' is not */ 210 /* properly initialized by sign-extending `a'. Instead, the generated */ 211 /* machine code assumes that the register that contains `a' on input */ 212 /* can be used directly as a 64-bit value, which is wrong most of the */ 213 /* time. */ 214 long long wide_a = (long long)a; 215 long long wide_b = (long long)b; 216 long long result; 217 218 219 __asm__ __volatile__ ( 220 "imul %2, %1\n" 221 "mov %1, %0\n" 222 "sar $63, %0\n" 223 "lea 0x8000(%1, %0), %0\n" 224 "sar $16, %0\n" 225 : "=&r"(result), "=&r"(wide_a) 226 : "r"(wide_b) 227 : "cc" ); 228 229 return (FT_Int32)result; 230 #endif 231 232 #if __GNUC__ > 4 || ( __GNUC__ == 4 && __GNUC_MINOR__ >= 6 ) 233 #pragma GCC diagnostic pop 234 #endif 235 } 236 237 #endif /* __GNUC__ && __x86_64__ */ 238 239 #endif /* !FT_CONFIG_OPTION_NO_ASSEMBLER */ 240 241 242 #ifdef FT_CONFIG_OPTION_INLINE_MULFIX 243 #ifdef FT_MULFIX_ASSEMBLER 244 #define FT_MulFix( a, b ) FT_MULFIX_ASSEMBLER( (FT_Int32)(a), (FT_Int32)(b) ) 245 #endif 246 #endif 247 248 249 /************************************************************************** 250 * 251 * @function: 252 * FT_MulDiv_No_Round 253 * 254 * @description: 255 * A very simple function used to perform the computation `(a*b)/c' 256 * (without rounding) with maximum accuracy (it uses a 64-bit 257 * intermediate integer whenever necessary). 258 * 259 * This function isn't necessarily as fast as some processor-specific 260 * operations, but is at least completely portable. 261 * 262 * @input: 263 * a :: 264 * The first multiplier. 265 * b :: 266 * The second multiplier. 267 * c :: 268 * The divisor. 269 * 270 * @return: 271 * The result of `(a*b)/c'. This function never traps when trying to 272 * divide by zero; it simply returns `MaxInt' or `MinInt' depending 273 * on the signs of `a' and `b'. 274 */ 275 FT_BASE( FT_Long ) 276 FT_MulDiv_No_Round( FT_Long a, 277 FT_Long b, 278 FT_Long c ); 279 280 281 /* 282 * A variant of FT_Matrix_Multiply which scales its result afterwards. 283 * The idea is that both `a' and `b' are scaled by factors of 10 so that 284 * the values are as precise as possible to get a correct result during 285 * the 64bit multiplication. Let `sa' and `sb' be the scaling factors of 286 * `a' and `b', respectively, then the scaling factor of the result is 287 * `sa*sb'. 288 */ 289 FT_BASE( void ) 290 FT_Matrix_Multiply_Scaled( const FT_Matrix* a, 291 FT_Matrix *b, 292 FT_Long scaling ); 293 294 295 /* 296 * Check a matrix. If the transformation would lead to extreme shear or 297 * extreme scaling, for example, return 0. If everything is OK, return 1. 298 * 299 * Based on geometric considerations we use the following inequality to 300 * identify a degenerate matrix. 301 * 302 * 50 * abs(xx*yy - xy*yx) < xx^2 + xy^2 + yx^2 + yy^2 303 * 304 * Value 50 is heuristic. 305 */ 306 FT_BASE( FT_Bool ) 307 FT_Matrix_Check( const FT_Matrix* matrix ); 308 309 310 /* 311 * A variant of FT_Vector_Transform. See comments for 312 * FT_Matrix_Multiply_Scaled. 313 */ 314 FT_BASE( void ) 315 FT_Vector_Transform_Scaled( FT_Vector* vector, 316 const FT_Matrix* matrix, 317 FT_Long scaling ); 318 319 320 /* 321 * This function normalizes a vector and returns its original length. 322 * The normalized vector is a 16.16 fixed-point unit vector with length 323 * close to 0x10000. The accuracy of the returned length is limited to 324 * 16 bits also. The function utilizes quick inverse square root 325 * approximation without divisions and square roots relying on Newton's 326 * iterations instead. 327 */ 328 FT_BASE( FT_UInt32 ) 329 FT_Vector_NormLen( FT_Vector* vector ); 330 331 332 /* 333 * Return -1, 0, or +1, depending on the orientation of a given corner. 334 * We use the Cartesian coordinate system, with positive vertical values 335 * going upwards. The function returns +1 if the corner turns to the 336 * left, -1 to the right, and 0 for undecidable cases. 337 */ 338 FT_BASE( FT_Int ) 339 ft_corner_orientation( FT_Pos in_x, 340 FT_Pos in_y, 341 FT_Pos out_x, 342 FT_Pos out_y ); 343 344 345 /* 346 * Return TRUE if a corner is flat or nearly flat. This is equivalent to 347 * saying that the corner point is close to its neighbors, or inside an 348 * ellipse defined by the neighbor focal points to be more precise. 349 */ 350 FT_BASE( FT_Int ) 351 ft_corner_is_flat( FT_Pos in_x, 352 FT_Pos in_y, 353 FT_Pos out_x, 354 FT_Pos out_y ); 355 356 357 /* 358 * Return the most significant bit index. 359 */ 360 361 #ifndef FT_CONFIG_OPTION_NO_ASSEMBLER 362 363 #if defined( __GNUC__ ) && \ 364 ( __GNUC__ > 3 || ( __GNUC__ == 3 && __GNUC_MINOR__ >= 4 ) ) 365 366 #if FT_SIZEOF_INT == 4 367 368 #define FT_MSB( x ) ( 31 - __builtin_clz( x ) ) 369 370 #elif FT_SIZEOF_LONG == 4 371 372 #define FT_MSB( x ) ( 31 - __builtin_clzl( x ) ) 373 374 #endif /* __GNUC__ */ 375 376 377 #elif defined( _MSC_VER ) && ( _MSC_VER >= 1400 ) 378 379 #if FT_SIZEOF_INT == 4 380 381 #include <intrin.h> 382 383 static __inline FT_Int32 FT_MSB_i386(FT_UInt32 x)384 FT_MSB_i386( FT_UInt32 x ) 385 { 386 unsigned long where; 387 388 389 /* not available in older VC versions */ 390 _BitScanReverse( &where, x ); 391 392 return (FT_Int32)where; 393 } 394 395 #define FT_MSB( x ) ( FT_MSB_i386( x ) ) 396 397 #endif 398 399 #endif /* _MSC_VER */ 400 401 402 #endif /* !FT_CONFIG_OPTION_NO_ASSEMBLER */ 403 404 #ifndef FT_MSB 405 406 FT_BASE( FT_Int ) 407 FT_MSB( FT_UInt32 z ); 408 409 #endif 410 411 412 /* 413 * Return sqrt(x*x+y*y), which is the same as `FT_Vector_Length' but uses 414 * two fixed-point arguments instead. 415 */ 416 FT_BASE( FT_Fixed ) 417 FT_Hypot( FT_Fixed x, 418 FT_Fixed y ); 419 420 421 #if 0 422 423 /************************************************************************** 424 * 425 * @function: 426 * FT_SqrtFixed 427 * 428 * @description: 429 * Computes the square root of a 16.16 fixed-point value. 430 * 431 * @input: 432 * x :: 433 * The value to compute the root for. 434 * 435 * @return: 436 * The result of `sqrt(x)'. 437 * 438 * @note: 439 * This function is not very fast. 440 */ 441 FT_BASE( FT_Int32 ) 442 FT_SqrtFixed( FT_Int32 x ); 443 444 #endif /* 0 */ 445 446 447 #define INT_TO_F26DOT6( x ) ( (FT_Long)(x) * 64 ) /* << 6 */ 448 #define INT_TO_F2DOT14( x ) ( (FT_Long)(x) * 16384 ) /* << 14 */ 449 #define INT_TO_FIXED( x ) ( (FT_Long)(x) * 65536 ) /* << 16 */ 450 #define F2DOT14_TO_FIXED( x ) ( (FT_Long)(x) * 4 ) /* << 2 */ 451 #define FIXED_TO_INT( x ) ( FT_RoundFix( x ) >> 16 ) 452 453 #define ROUND_F26DOT6( x ) ( x >= 0 ? ( ( (x) + 32 ) & -64 ) \ 454 : ( -( ( 32 - (x) ) & -64 ) ) ) 455 456 /* 457 * The following macros have two purposes. 458 * 459 * - Tag places where overflow is expected and harmless. 460 * 461 * - Avoid run-time sanitizer errors. 462 * 463 * Use with care! 464 */ 465 #define ADD_INT( a, b ) \ 466 (FT_Int)( (FT_UInt)(a) + (FT_UInt)(b) ) 467 #define SUB_INT( a, b ) \ 468 (FT_Int)( (FT_UInt)(a) - (FT_UInt)(b) ) 469 #define MUL_INT( a, b ) \ 470 (FT_Int)( (FT_UInt)(a) * (FT_UInt)(b) ) 471 #define NEG_INT( a ) \ 472 (FT_Int)( (FT_UInt)0 - (FT_UInt)(a) ) 473 474 #define ADD_LONG( a, b ) \ 475 (FT_Long)( (FT_ULong)(a) + (FT_ULong)(b) ) 476 #define SUB_LONG( a, b ) \ 477 (FT_Long)( (FT_ULong)(a) - (FT_ULong)(b) ) 478 #define MUL_LONG( a, b ) \ 479 (FT_Long)( (FT_ULong)(a) * (FT_ULong)(b) ) 480 #define NEG_LONG( a ) \ 481 (FT_Long)( (FT_ULong)0 - (FT_ULong)(a) ) 482 483 #define ADD_INT32( a, b ) \ 484 (FT_Int32)( (FT_UInt32)(a) + (FT_UInt32)(b) ) 485 #define SUB_INT32( a, b ) \ 486 (FT_Int32)( (FT_UInt32)(a) - (FT_UInt32)(b) ) 487 #define MUL_INT32( a, b ) \ 488 (FT_Int32)( (FT_UInt32)(a) * (FT_UInt32)(b) ) 489 #define NEG_INT32( a ) \ 490 (FT_Int32)( (FT_UInt32)0 - (FT_UInt32)(a) ) 491 492 493 FT_END_HEADER 494 495 #endif /* FTCALC_H_ */ 496 497 498 /* END */ 499