1 /**************************************************************************** 2 * 3 * ftcalc.h 4 * 5 * Arithmetic computations (specification). 6 * 7 * Copyright (C) 1996-2021 by 8 * David Turner, Robert Wilhelm, and Werner Lemberg. 9 * 10 * This file is part of the FreeType project, and may only be used, 11 * modified, and distributed under the terms of the FreeType project 12 * license, LICENSE.TXT. By continuing to use, modify, or distribute 13 * this file you indicate that you have read the license and 14 * understand and accept it fully. 15 * 16 */ 17 18 19 #ifndef FTCALC_H_ 20 #define FTCALC_H_ 21 22 23 #include <freetype/freetype.h> 24 25 #include "compiler-macros.h" 26 27 FT_BEGIN_HEADER 28 29 30 /************************************************************************** 31 * 32 * FT_MulDiv() and FT_MulFix() are declared in freetype.h. 33 * 34 */ 35 36 #ifndef FT_CONFIG_OPTION_NO_ASSEMBLER 37 /* Provide assembler fragments for performance-critical functions. */ 38 /* These must be defined `static __inline__' with GCC. */ 39 40 #if defined( __CC_ARM ) || defined( __ARMCC__ ) /* RVCT */ 41 42 #define FT_MULFIX_ASSEMBLER FT_MulFix_arm 43 44 /* documentation is in freetype.h */ 45 46 static __inline FT_Int32 FT_MulFix_arm(FT_Int32 a,FT_Int32 b)47 FT_MulFix_arm( FT_Int32 a, 48 FT_Int32 b ) 49 { 50 FT_Int32 t, t2; 51 52 53 __asm 54 { 55 smull t2, t, b, a /* (lo=t2,hi=t) = a*b */ 56 mov a, t, asr #31 /* a = (hi >> 31) */ 57 add a, a, #0x8000 /* a += 0x8000 */ 58 adds t2, t2, a /* t2 += a */ 59 adc t, t, #0 /* t += carry */ 60 mov a, t2, lsr #16 /* a = t2 >> 16 */ 61 orr a, a, t, lsl #16 /* a |= t << 16 */ 62 } 63 return a; 64 } 65 66 #endif /* __CC_ARM || __ARMCC__ */ 67 68 69 #ifdef __GNUC__ 70 71 #if defined( __arm__ ) && \ 72 ( !defined( __thumb__ ) || defined( __thumb2__ ) ) && \ 73 !( defined( __CC_ARM ) || defined( __ARMCC__ ) ) 74 75 #define FT_MULFIX_ASSEMBLER FT_MulFix_arm 76 77 /* documentation is in freetype.h */ 78 79 static __inline__ FT_Int32 FT_MulFix_arm(FT_Int32 a,FT_Int32 b)80 FT_MulFix_arm( FT_Int32 a, 81 FT_Int32 b ) 82 { 83 FT_Int32 t, t2; 84 85 86 __asm__ __volatile__ ( 87 "smull %1, %2, %4, %3\n\t" /* (lo=%1,hi=%2) = a*b */ 88 "mov %0, %2, asr #31\n\t" /* %0 = (hi >> 31) */ 89 #if defined( __clang__ ) && defined( __thumb2__ ) 90 "add.w %0, %0, #0x8000\n\t" /* %0 += 0x8000 */ 91 #else 92 "add %0, %0, #0x8000\n\t" /* %0 += 0x8000 */ 93 #endif 94 "adds %1, %1, %0\n\t" /* %1 += %0 */ 95 "adc %2, %2, #0\n\t" /* %2 += carry */ 96 "mov %0, %1, lsr #16\n\t" /* %0 = %1 >> 16 */ 97 "orr %0, %0, %2, lsl #16\n\t" /* %0 |= %2 << 16 */ 98 : "=r"(a), "=&r"(t2), "=&r"(t) 99 : "r"(a), "r"(b) 100 : "cc" ); 101 return a; 102 } 103 104 #endif /* __arm__ && */ 105 /* ( __thumb2__ || !__thumb__ ) && */ 106 /* !( __CC_ARM || __ARMCC__ ) */ 107 108 109 #if defined( __i386__ ) 110 111 #define FT_MULFIX_ASSEMBLER FT_MulFix_i386 112 113 /* documentation is in freetype.h */ 114 115 static __inline__ FT_Int32 FT_MulFix_i386(FT_Int32 a,FT_Int32 b)116 FT_MulFix_i386( FT_Int32 a, 117 FT_Int32 b ) 118 { 119 FT_Int32 result; 120 121 122 __asm__ __volatile__ ( 123 "imul %%edx\n" 124 "movl %%edx, %%ecx\n" 125 "sarl $31, %%ecx\n" 126 "addl $0x8000, %%ecx\n" 127 "addl %%ecx, %%eax\n" 128 "adcl $0, %%edx\n" 129 "shrl $16, %%eax\n" 130 "shll $16, %%edx\n" 131 "addl %%edx, %%eax\n" 132 : "=a"(result), "=d"(b) 133 : "a"(a), "d"(b) 134 : "%ecx", "cc" ); 135 return result; 136 } 137 138 #endif /* i386 */ 139 140 #endif /* __GNUC__ */ 141 142 143 #ifdef _MSC_VER /* Visual C++ */ 144 145 #ifdef _M_IX86 146 147 #define FT_MULFIX_ASSEMBLER FT_MulFix_i386 148 149 /* documentation is in freetype.h */ 150 151 static __inline FT_Int32 FT_MulFix_i386(FT_Int32 a,FT_Int32 b)152 FT_MulFix_i386( FT_Int32 a, 153 FT_Int32 b ) 154 { 155 FT_Int32 result; 156 157 __asm 158 { 159 mov eax, a 160 mov edx, b 161 imul edx 162 mov ecx, edx 163 sar ecx, 31 164 add ecx, 8000h 165 add eax, ecx 166 adc edx, 0 167 shr eax, 16 168 shl edx, 16 169 add eax, edx 170 mov result, eax 171 } 172 return result; 173 } 174 175 #endif /* _M_IX86 */ 176 177 #endif /* _MSC_VER */ 178 179 180 #if defined( __GNUC__ ) && defined( __x86_64__ ) 181 182 #define FT_MULFIX_ASSEMBLER FT_MulFix_x86_64 183 184 static __inline__ FT_Int32 FT_MulFix_x86_64(FT_Int32 a,FT_Int32 b)185 FT_MulFix_x86_64( FT_Int32 a, 186 FT_Int32 b ) 187 { 188 /* Temporarily disable the warning that C90 doesn't support */ 189 /* `long long'. */ 190 #if __GNUC__ > 4 || ( __GNUC__ == 4 && __GNUC_MINOR__ >= 6 ) 191 #pragma GCC diagnostic push 192 #pragma GCC diagnostic ignored "-Wlong-long" 193 #endif 194 195 #if 1 196 /* Technically not an assembly fragment, but GCC does a really good */ 197 /* job at inlining it and generating good machine code for it. */ 198 long long ret, tmp; 199 200 201 ret = (long long)a * b; 202 tmp = ret >> 63; 203 ret += 0x8000 + tmp; 204 205 return (FT_Int32)( ret >> 16 ); 206 #else 207 208 /* For some reason, GCC 4.6 on Ubuntu 12.04 generates invalid machine */ 209 /* code from the lines below. The main issue is that `wide_a' is not */ 210 /* properly initialized by sign-extending `a'. Instead, the generated */ 211 /* machine code assumes that the register that contains `a' on input */ 212 /* can be used directly as a 64-bit value, which is wrong most of the */ 213 /* time. */ 214 long long wide_a = (long long)a; 215 long long wide_b = (long long)b; 216 long long result; 217 218 219 __asm__ __volatile__ ( 220 "imul %2, %1\n" 221 "mov %1, %0\n" 222 "sar $63, %0\n" 223 "lea 0x8000(%1, %0), %0\n" 224 "sar $16, %0\n" 225 : "=&r"(result), "=&r"(wide_a) 226 : "r"(wide_b) 227 : "cc" ); 228 229 return (FT_Int32)result; 230 #endif 231 232 #if __GNUC__ > 4 || ( __GNUC__ == 4 && __GNUC_MINOR__ >= 6 ) 233 #pragma GCC diagnostic pop 234 #endif 235 } 236 237 #endif /* __GNUC__ && __x86_64__ */ 238 239 #endif /* !FT_CONFIG_OPTION_NO_ASSEMBLER */ 240 241 242 #ifdef FT_CONFIG_OPTION_INLINE_MULFIX 243 #ifdef FT_MULFIX_ASSEMBLER 244 #define FT_MulFix( a, b ) FT_MULFIX_ASSEMBLER( (FT_Int32)(a), (FT_Int32)(b) ) 245 #endif 246 #endif 247 248 249 /************************************************************************** 250 * 251 * @function: 252 * FT_MulDiv_No_Round 253 * 254 * @description: 255 * A very simple function used to perform the computation '(a*b)/c' 256 * (without rounding) with maximum accuracy (it uses a 64-bit 257 * intermediate integer whenever necessary). 258 * 259 * This function isn't necessarily as fast as some processor-specific 260 * operations, but is at least completely portable. 261 * 262 * @input: 263 * a :: 264 * The first multiplier. 265 * b :: 266 * The second multiplier. 267 * c :: 268 * The divisor. 269 * 270 * @return: 271 * The result of '(a*b)/c'. This function never traps when trying to 272 * divide by zero; it simply returns 'MaxInt' or 'MinInt' depending on 273 * the signs of 'a' and 'b'. 274 */ 275 FT_BASE( FT_Long ) 276 FT_MulDiv_No_Round( FT_Long a, 277 FT_Long b, 278 FT_Long c ); 279 280 281 /* 282 * A variant of FT_Matrix_Multiply which scales its result afterwards. The 283 * idea is that both `a' and `b' are scaled by factors of 10 so that the 284 * values are as precise as possible to get a correct result during the 285 * 64bit multiplication. Let `sa' and `sb' be the scaling factors of `a' 286 * and `b', respectively, then the scaling factor of the result is `sa*sb'. 287 */ 288 FT_BASE( void ) 289 FT_Matrix_Multiply_Scaled( const FT_Matrix* a, 290 FT_Matrix *b, 291 FT_Long scaling ); 292 293 294 /* 295 * Check a matrix. If the transformation would lead to extreme shear or 296 * extreme scaling, for example, return 0. If everything is OK, return 1. 297 * 298 * Based on geometric considerations we use the following inequality to 299 * identify a degenerate matrix. 300 * 301 * 50 * abs(xx*yy - xy*yx) < xx^2 + xy^2 + yx^2 + yy^2 302 * 303 * Value 50 is heuristic. 304 */ 305 FT_BASE( FT_Bool ) 306 FT_Matrix_Check( const FT_Matrix* matrix ); 307 308 309 /* 310 * A variant of FT_Vector_Transform. See comments for 311 * FT_Matrix_Multiply_Scaled. 312 */ 313 FT_BASE( void ) 314 FT_Vector_Transform_Scaled( FT_Vector* vector, 315 const FT_Matrix* matrix, 316 FT_Long scaling ); 317 318 319 /* 320 * This function normalizes a vector and returns its original length. The 321 * normalized vector is a 16.16 fixed-point unit vector with length close 322 * to 0x10000. The accuracy of the returned length is limited to 16 bits 323 * also. The function utilizes quick inverse square root approximation 324 * without divisions and square roots relying on Newton's iterations 325 * instead. 326 */ 327 FT_BASE( FT_UInt32 ) 328 FT_Vector_NormLen( FT_Vector* vector ); 329 330 331 /* 332 * Return -1, 0, or +1, depending on the orientation of a given corner. We 333 * use the Cartesian coordinate system, with positive vertical values going 334 * upwards. The function returns +1 if the corner turns to the left, -1 to 335 * the right, and 0 for undecidable cases. 336 */ 337 FT_BASE( FT_Int ) 338 ft_corner_orientation( FT_Pos in_x, 339 FT_Pos in_y, 340 FT_Pos out_x, 341 FT_Pos out_y ); 342 343 344 /* 345 * Return TRUE if a corner is flat or nearly flat. This is equivalent to 346 * saying that the corner point is close to its neighbors, or inside an 347 * ellipse defined by the neighbor focal points to be more precise. 348 */ 349 FT_BASE( FT_Int ) 350 ft_corner_is_flat( FT_Pos in_x, 351 FT_Pos in_y, 352 FT_Pos out_x, 353 FT_Pos out_y ); 354 355 356 /* 357 * Return the most significant bit index. 358 */ 359 360 #ifndef FT_CONFIG_OPTION_NO_ASSEMBLER 361 362 #if defined( __clang__ ) || ( defined( __GNUC__ ) && \ 363 ( __GNUC__ > 3 || ( __GNUC__ == 3 && __GNUC_MINOR__ >= 4 ) ) ) 364 365 #if FT_SIZEOF_INT == 4 366 367 #define FT_MSB( x ) ( 31 - __builtin_clz( x ) ) 368 369 #elif FT_SIZEOF_LONG == 4 370 371 #define FT_MSB( x ) ( 31 - __builtin_clzl( x ) ) 372 373 #endif 374 375 #elif defined( _MSC_VER ) && _MSC_VER >= 1400 376 377 #if defined( _WIN32_WCE ) 378 379 #include <cmnintrin.h> 380 #pragma intrinsic( _CountLeadingZeros ) 381 382 #define FT_MSB( x ) ( 31 - _CountLeadingZeros( x ) ) 383 384 #elif defined( _M_ARM64 ) || defined( _M_ARM ) 385 386 #include <intrin.h> 387 #pragma intrinsic( _CountLeadingZeros ) 388 389 #define FT_MSB( x ) ( 31 - _CountLeadingZeros( x ) ) 390 391 #elif defined( _M_IX86 ) || defined( _M_AMD64 ) || defined( _M_IA64 ) 392 393 #include <intrin.h> 394 #pragma intrinsic( _BitScanReverse ) 395 396 static __inline FT_Int32 FT_MSB_i386(FT_UInt32 x)397 FT_MSB_i386( FT_UInt32 x ) 398 { 399 unsigned long where; 400 401 402 _BitScanReverse( &where, x ); 403 404 return (FT_Int32)where; 405 } 406 407 #define FT_MSB( x ) FT_MSB_i386( x ) 408 409 #endif 410 411 #elif defined( __DECC ) || defined( __DECCXX ) 412 413 #include <builtins.h> 414 415 #define FT_MSB( x ) (FT_Int)( 63 - _leadz( x ) ) 416 417 #elif defined( _CRAYC ) 418 419 #include <intrinsics.h> 420 421 #define FT_MSB( x ) (FT_Int)( 31 - _leadz32( x ) ) 422 423 #endif /* FT_MSB macro definitions */ 424 425 #endif /* !FT_CONFIG_OPTION_NO_ASSEMBLER */ 426 427 428 #ifndef FT_MSB 429 430 FT_BASE( FT_Int ) 431 FT_MSB( FT_UInt32 z ); 432 433 #endif 434 435 436 /* 437 * Return sqrt(x*x+y*y), which is the same as `FT_Vector_Length' but uses 438 * two fixed-point arguments instead. 439 */ 440 FT_BASE( FT_Fixed ) 441 FT_Hypot( FT_Fixed x, 442 FT_Fixed y ); 443 444 445 #if 0 446 447 /************************************************************************** 448 * 449 * @function: 450 * FT_SqrtFixed 451 * 452 * @description: 453 * Computes the square root of a 16.16 fixed-point value. 454 * 455 * @input: 456 * x :: 457 * The value to compute the root for. 458 * 459 * @return: 460 * The result of 'sqrt(x)'. 461 * 462 * @note: 463 * This function is not very fast. 464 */ 465 FT_BASE( FT_Int32 ) 466 FT_SqrtFixed( FT_Int32 x ); 467 468 #endif /* 0 */ 469 470 471 #define INT_TO_F26DOT6( x ) ( (FT_Long)(x) * 64 ) /* << 6 */ 472 #define INT_TO_F2DOT14( x ) ( (FT_Long)(x) * 16384 ) /* << 14 */ 473 #define INT_TO_FIXED( x ) ( (FT_Long)(x) * 65536 ) /* << 16 */ 474 #define F2DOT14_TO_FIXED( x ) ( (FT_Long)(x) * 4 ) /* << 2 */ 475 #define FIXED_TO_INT( x ) ( FT_RoundFix( x ) >> 16 ) 476 477 #define ROUND_F26DOT6( x ) ( ( (x) + 32 - ( x < 0 ) ) & -64 ) 478 479 /* 480 * The following macros have two purposes. 481 * 482 * - Tag places where overflow is expected and harmless. 483 * 484 * - Avoid run-time sanitizer errors. 485 * 486 * Use with care! 487 */ 488 #define ADD_INT( a, b ) \ 489 (FT_Int)( (FT_UInt)(a) + (FT_UInt)(b) ) 490 #define SUB_INT( a, b ) \ 491 (FT_Int)( (FT_UInt)(a) - (FT_UInt)(b) ) 492 #define MUL_INT( a, b ) \ 493 (FT_Int)( (FT_UInt)(a) * (FT_UInt)(b) ) 494 #define NEG_INT( a ) \ 495 (FT_Int)( (FT_UInt)0 - (FT_UInt)(a) ) 496 497 #define ADD_LONG( a, b ) \ 498 (FT_Long)( (FT_ULong)(a) + (FT_ULong)(b) ) 499 #define SUB_LONG( a, b ) \ 500 (FT_Long)( (FT_ULong)(a) - (FT_ULong)(b) ) 501 #define MUL_LONG( a, b ) \ 502 (FT_Long)( (FT_ULong)(a) * (FT_ULong)(b) ) 503 #define NEG_LONG( a ) \ 504 (FT_Long)( (FT_ULong)0 - (FT_ULong)(a) ) 505 506 #define ADD_INT32( a, b ) \ 507 (FT_Int32)( (FT_UInt32)(a) + (FT_UInt32)(b) ) 508 #define SUB_INT32( a, b ) \ 509 (FT_Int32)( (FT_UInt32)(a) - (FT_UInt32)(b) ) 510 #define MUL_INT32( a, b ) \ 511 (FT_Int32)( (FT_UInt32)(a) * (FT_UInt32)(b) ) 512 #define NEG_INT32( a ) \ 513 (FT_Int32)( (FT_UInt32)0 - (FT_UInt32)(a) ) 514 515 #ifdef FT_INT64 516 517 #define ADD_INT64( a, b ) \ 518 (FT_Int64)( (FT_UInt64)(a) + (FT_UInt64)(b) ) 519 #define SUB_INT64( a, b ) \ 520 (FT_Int64)( (FT_UInt64)(a) - (FT_UInt64)(b) ) 521 #define MUL_INT64( a, b ) \ 522 (FT_Int64)( (FT_UInt64)(a) * (FT_UInt64)(b) ) 523 #define NEG_INT64( a ) \ 524 (FT_Int64)( (FT_UInt64)0 - (FT_UInt64)(a) ) 525 526 #endif /* FT_INT64 */ 527 528 529 FT_END_HEADER 530 531 #endif /* FTCALC_H_ */ 532 533 534 /* END */ 535