1//===----------------------Hexagon builtin routine ------------------------===// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7//===----------------------------------------------------------------------===// 8 9// Double Precision Divide 10 11#define A r1:0 12#define AH r1 13#define AL r0 14 15#define B r3:2 16#define BH r3 17#define BL r2 18 19#define Q r5:4 20#define QH r5 21#define QL r4 22 23#define PROD r7:6 24#define PRODHI r7 25#define PRODLO r6 26 27#define SFONE r8 28#define SFDEN r9 29#define SFERROR r10 30#define SFRECIP r11 31 32#define EXPBA r13:12 33#define EXPB r13 34#define EXPA r12 35 36#define REMSUB2 r15:14 37 38 39 40#define SIGN r28 41 42#define Q_POSITIVE p3 43#define NORMAL p2 44#define NO_OVF_UNF p1 45#define P_TMP p0 46 47#define RECIPEST_SHIFT 3 48#define QADJ 61 49 50#define DFCLASS_NORMAL 0x02 51#define DFCLASS_NUMBER 0x0F 52#define DFCLASS_INFINITE 0x08 53#define DFCLASS_ZERO 0x01 54#define DFCLASS_NONZERO (DFCLASS_NUMBER ^ DFCLASS_ZERO) 55#define DFCLASS_NONINFINITE (DFCLASS_NUMBER ^ DFCLASS_INFINITE) 56 57#define DF_MANTBITS 52 58#define DF_EXPBITS 11 59#define SF_MANTBITS 23 60#define SF_EXPBITS 8 61#define DF_BIAS 0x3ff 62 63#define SR_ROUND_OFF 22 64 65#define Q6_ALIAS(TAG) .global __qdsp_##TAG ; .set __qdsp_##TAG, __hexagon_##TAG 66#define FAST_ALIAS(TAG) .global __hexagon_fast_##TAG ; .set __hexagon_fast_##TAG, __hexagon_##TAG 67#define FAST2_ALIAS(TAG) .global __hexagon_fast2_##TAG ; .set __hexagon_fast2_##TAG, __hexagon_##TAG 68#define END(TAG) .size TAG,.-TAG 69 70 .text 71 .global __hexagon_divdf3 72 .type __hexagon_divdf3,@function 73 Q6_ALIAS(divdf3) 74 FAST_ALIAS(divdf3) 75 FAST2_ALIAS(divdf3) 76 .p2align 5 77__hexagon_divdf3: 78 { 79 NORMAL = dfclass(A,#DFCLASS_NORMAL) 80 NORMAL = dfclass(B,#DFCLASS_NORMAL) 81 EXPBA = combine(BH,AH) 82 SIGN = xor(AH,BH) 83 } 84#undef A 85#undef AH 86#undef AL 87#undef B 88#undef BH 89#undef BL 90#define REM r1:0 91#define REMHI r1 92#define REMLO r0 93#define DENOM r3:2 94#define DENOMHI r3 95#define DENOMLO r2 96 { 97 if (!NORMAL) jump .Ldiv_abnormal 98 PROD = extractu(DENOM,#SF_MANTBITS,#DF_MANTBITS-SF_MANTBITS) 99 SFONE = ##0x3f800001 100 } 101 { 102 SFDEN = or(SFONE,PRODLO) 103 EXPB = extractu(EXPB,#DF_EXPBITS,#DF_MANTBITS-32) 104 EXPA = extractu(EXPA,#DF_EXPBITS,#DF_MANTBITS-32) 105 Q_POSITIVE = cmp.gt(SIGN,#-1) 106 } 107#undef SIGN 108#define ONE r28 109.Ldenorm_continue: 110 { 111 SFRECIP,P_TMP = sfrecipa(SFONE,SFDEN) 112 SFERROR = and(SFONE,#-2) 113 ONE = #1 114 EXPA = sub(EXPA,EXPB) 115 } 116#undef EXPB 117#define RECIPEST r13 118 { 119 SFERROR -= sfmpy(SFRECIP,SFDEN):lib 120 REMHI = insert(ONE,#DF_EXPBITS+1,#DF_MANTBITS-32) 121 RECIPEST = ##0x00800000 << RECIPEST_SHIFT 122 } 123 { 124 SFRECIP += sfmpy(SFRECIP,SFERROR):lib 125 DENOMHI = insert(ONE,#DF_EXPBITS+1,#DF_MANTBITS-32) 126 SFERROR = and(SFONE,#-2) 127 } 128 { 129 SFERROR -= sfmpy(SFRECIP,SFDEN):lib 130 QH = #-DF_BIAS+1 131 QL = #DF_BIAS-1 132 } 133 { 134 SFRECIP += sfmpy(SFRECIP,SFERROR):lib 135 NO_OVF_UNF = cmp.gt(EXPA,QH) 136 NO_OVF_UNF = !cmp.gt(EXPA,QL) 137 } 138 { 139 RECIPEST = insert(SFRECIP,#SF_MANTBITS,#RECIPEST_SHIFT) 140 Q = #0 141 EXPA = add(EXPA,#-QADJ) 142 } 143#undef SFERROR 144#undef SFRECIP 145#define TMP r10 146#define TMP1 r11 147 { 148 RECIPEST = add(RECIPEST,#((-3) << RECIPEST_SHIFT)) 149 } 150 151#define DIV_ITER1B(QSHIFTINSN,QSHIFT,REMSHIFT,EXTRA) \ 152 { \ 153 PROD = mpyu(RECIPEST,REMHI); \ 154 REM = asl(REM,# ## ( REMSHIFT )); \ 155 }; \ 156 { \ 157 PRODLO = # ## 0; \ 158 REM -= mpyu(PRODHI,DENOMLO); \ 159 REMSUB2 = mpyu(PRODHI,DENOMHI); \ 160 }; \ 161 { \ 162 Q += QSHIFTINSN(PROD, # ## ( QSHIFT )); \ 163 REM -= asl(REMSUB2, # ## 32); \ 164 EXTRA \ 165 } 166 167 168 DIV_ITER1B(ASL,14,15,) 169 DIV_ITER1B(ASR,1,15,) 170 DIV_ITER1B(ASR,16,15,) 171 DIV_ITER1B(ASR,31,15,PROD=# ( 0 );) 172 173#undef REMSUB2 174#define TMPPAIR r15:14 175#define TMPPAIRHI r15 176#define TMPPAIRLO r14 177#undef RECIPEST 178#define EXPB r13 179 { 180 // compare or sub with carry 181 TMPPAIR = sub(REM,DENOM) 182 P_TMP = cmp.gtu(DENOM,REM) 183 // set up amt to add to q 184 if (!P_TMP.new) PRODLO = #2 185 } 186 { 187 Q = add(Q,PROD) 188 if (!P_TMP) REM = TMPPAIR 189 TMPPAIR = #0 190 } 191 { 192 P_TMP = cmp.eq(REM,TMPPAIR) 193 if (!P_TMP.new) QL = or(QL,ONE) 194 } 195 { 196 PROD = neg(Q) 197 } 198 { 199 if (!Q_POSITIVE) Q = PROD 200 } 201#undef REM 202#undef REMHI 203#undef REMLO 204#undef DENOM 205#undef DENOMLO 206#undef DENOMHI 207#define A r1:0 208#define AH r1 209#define AL r0 210#define B r3:2 211#define BH r3 212#define BL r2 213 { 214 A = convert_d2df(Q) 215 if (!NO_OVF_UNF) jump .Ldiv_ovf_unf 216 } 217 { 218 AH += asl(EXPA,#DF_MANTBITS-32) 219 jumpr r31 220 } 221 222.Ldiv_ovf_unf: 223 { 224 AH += asl(EXPA,#DF_MANTBITS-32) 225 EXPB = extractu(AH,#DF_EXPBITS,#DF_MANTBITS-32) 226 } 227 { 228 PROD = abs(Q) 229 EXPA = add(EXPA,EXPB) 230 } 231 { 232 P_TMP = cmp.gt(EXPA,##DF_BIAS+DF_BIAS) // overflow 233 if (P_TMP.new) jump:nt .Ldiv_ovf 234 } 235 { 236 P_TMP = cmp.gt(EXPA,#0) 237 if (P_TMP.new) jump:nt .Lpossible_unf // round up to normal possible... 238 } 239 // Underflow 240 // We know what the infinite range exponent should be (EXPA) 241 // Q is 2's complement, PROD is abs(Q) 242 // Normalize Q, shift right, add a high bit, convert, change exponent 243 244#define FUDGE1 7 // how much to shift right 245#define FUDGE2 4 // how many guard/round to keep at lsbs 246 247 { 248 EXPB = add(clb(PROD),#-1) // doesn't need to be added in since 249 EXPA = sub(#FUDGE1,EXPA) // we extract post-converted exponent 250 TMP = USR 251 TMP1 = #63 252 } 253 { 254 EXPB = min(EXPA,TMP1) 255 TMP1 = or(TMP,#0x030) 256 PROD = asl(PROD,EXPB) 257 EXPA = #0 258 } 259 { 260 TMPPAIR = extractu(PROD,EXPBA) // bits that will get shifted out 261 PROD = lsr(PROD,EXPB) // shift out bits 262 B = #1 263 } 264 { 265 P_TMP = cmp.gtu(B,TMPPAIR) 266 if (!P_TMP.new) PRODLO = or(BL,PRODLO) 267 PRODHI = setbit(PRODHI,#DF_MANTBITS-32+FUDGE2) 268 } 269 { 270 Q = neg(PROD) 271 P_TMP = bitsclr(PRODLO,#(1<<FUDGE2)-1) 272 if (!P_TMP.new) TMP = TMP1 273 } 274 { 275 USR = TMP 276 if (Q_POSITIVE) Q = PROD 277 TMP = #-DF_BIAS-(DF_MANTBITS+FUDGE2) 278 } 279 { 280 A = convert_d2df(Q) 281 } 282 { 283 AH += asl(TMP,#DF_MANTBITS-32) 284 jumpr r31 285 } 286 287 288.Lpossible_unf: 289 // If upper parts of Q were all F's, but abs(A) == 0x00100000_00000000, we rounded up to min_normal 290 // The answer is correct, but we need to raise Underflow 291 { 292 B = extractu(A,#63,#0) 293 TMPPAIR = combine(##0x00100000,#0) // min normal 294 TMP = #0x7FFF 295 } 296 { 297 P_TMP = dfcmp.eq(TMPPAIR,B) // Is everything zero in the rounded value... 298 P_TMP = bitsset(PRODHI,TMP) // but a bunch of bits set in the unrounded abs(quotient)? 299 } 300 301#if (__HEXAGON_ARCH__ == 60) 302 TMP = USR // If not, just return 303 if (!P_TMP) jumpr r31 // Else, we want to set Unf+Inexact 304 // Note that inexact is already set... 305#else 306 { 307 if (!P_TMP) jumpr r31 // If not, just return 308 TMP = USR // Else, we want to set Unf+Inexact 309 } // Note that inexact is already set... 310#endif 311 { 312 TMP = or(TMP,#0x30) 313 } 314 { 315 USR = TMP 316 } 317 { 318 p0 = dfcmp.eq(A,A) 319 jumpr r31 320 } 321 322.Ldiv_ovf: 323 324 // Raise Overflow, and choose the correct overflow value (saturated normal or infinity) 325 326 { 327 TMP = USR 328 B = combine(##0x7fefffff,#-1) 329 AH = mux(Q_POSITIVE,#0,#-1) 330 } 331 { 332 PROD = combine(##0x7ff00000,#0) 333 QH = extractu(TMP,#2,#SR_ROUND_OFF) 334 TMP = or(TMP,#0x28) 335 } 336 { 337 USR = TMP 338 QH ^= lsr(AH,#31) 339 QL = QH 340 } 341 { 342 p0 = !cmp.eq(QL,#1) // if not round-to-zero 343 p0 = !cmp.eq(QH,#2) // and not rounding the other way 344 if (p0.new) B = PROD // go to inf 345 p0 = dfcmp.eq(B,B) // get exceptions 346 } 347 { 348 A = insert(B,#63,#0) 349 jumpr r31 350 } 351 352#undef ONE 353#define SIGN r28 354#undef NORMAL 355#undef NO_OVF_UNF 356#define P_INF p1 357#define P_ZERO p2 358.Ldiv_abnormal: 359 { 360 P_TMP = dfclass(A,#DFCLASS_NUMBER) 361 P_TMP = dfclass(B,#DFCLASS_NUMBER) 362 Q_POSITIVE = cmp.gt(SIGN,#-1) 363 } 364 { 365 P_INF = dfclass(A,#DFCLASS_INFINITE) 366 P_INF = dfclass(B,#DFCLASS_INFINITE) 367 } 368 { 369 P_ZERO = dfclass(A,#DFCLASS_ZERO) 370 P_ZERO = dfclass(B,#DFCLASS_ZERO) 371 } 372 { 373 if (!P_TMP) jump .Ldiv_nan 374 if (P_INF) jump .Ldiv_invalid 375 } 376 { 377 if (P_ZERO) jump .Ldiv_invalid 378 } 379 { 380 P_ZERO = dfclass(A,#DFCLASS_NONZERO) // nonzero 381 P_ZERO = dfclass(B,#DFCLASS_NONINFINITE) // non-infinite 382 } 383 { 384 P_INF = dfclass(A,#DFCLASS_NONINFINITE) // non-infinite 385 P_INF = dfclass(B,#DFCLASS_NONZERO) // nonzero 386 } 387 { 388 if (!P_ZERO) jump .Ldiv_zero_result 389 if (!P_INF) jump .Ldiv_inf_result 390 } 391 // Now we've narrowed it down to (de)normal / (de)normal 392 // Set up A/EXPA B/EXPB and go back 393#undef P_ZERO 394#undef P_INF 395#define P_TMP2 p1 396 { 397 P_TMP = dfclass(A,#DFCLASS_NORMAL) 398 P_TMP2 = dfclass(B,#DFCLASS_NORMAL) 399 TMP = ##0x00100000 400 } 401 { 402 EXPBA = combine(BH,AH) 403 AH = insert(TMP,#DF_EXPBITS+1,#DF_MANTBITS-32) // clear out hidden bit, sign bit 404 BH = insert(TMP,#DF_EXPBITS+1,#DF_MANTBITS-32) // clear out hidden bit, sign bit 405 } 406 { 407 if (P_TMP) AH = or(AH,TMP) // if normal, add back in hidden bit 408 if (P_TMP2) BH = or(BH,TMP) // if normal, add back in hidden bit 409 } 410 { 411 QH = add(clb(A),#-DF_EXPBITS) 412 QL = add(clb(B),#-DF_EXPBITS) 413 TMP = #1 414 } 415 { 416 EXPA = extractu(EXPA,#DF_EXPBITS,#DF_MANTBITS-32) 417 EXPB = extractu(EXPB,#DF_EXPBITS,#DF_MANTBITS-32) 418 } 419 { 420 A = asl(A,QH) 421 B = asl(B,QL) 422 if (!P_TMP) EXPA = sub(TMP,QH) 423 if (!P_TMP2) EXPB = sub(TMP,QL) 424 } // recreate values needed by resume coke 425 { 426 PROD = extractu(B,#SF_MANTBITS,#DF_MANTBITS-SF_MANTBITS) 427 } 428 { 429 SFDEN = or(SFONE,PRODLO) 430 jump .Ldenorm_continue 431 } 432 433.Ldiv_zero_result: 434 { 435 AH = xor(AH,BH) 436 B = #0 437 } 438 { 439 A = insert(B,#63,#0) 440 jumpr r31 441 } 442.Ldiv_inf_result: 443 { 444 p2 = dfclass(B,#DFCLASS_ZERO) 445 p2 = dfclass(A,#DFCLASS_NONINFINITE) 446 } 447 { 448 TMP = USR 449 if (!p2) jump 1f 450 AH = xor(AH,BH) 451 } 452 { 453 TMP = or(TMP,#0x04) // DBZ 454 } 455 { 456 USR = TMP 457 } 4581: 459 { 460 B = combine(##0x7ff00000,#0) 461 p0 = dfcmp.uo(B,B) // take possible exception 462 } 463 { 464 A = insert(B,#63,#0) 465 jumpr r31 466 } 467.Ldiv_nan: 468 { 469 p0 = dfclass(A,#0x10) 470 p1 = dfclass(B,#0x10) 471 if (!p0.new) A = B 472 if (!p1.new) B = A 473 } 474 { 475 QH = convert_df2sf(A) // get possible invalid exceptions 476 QL = convert_df2sf(B) 477 } 478 { 479 A = #-1 480 jumpr r31 481 } 482 483.Ldiv_invalid: 484 { 485 TMP = ##0x7f800001 486 } 487 { 488 A = convert_sf2df(TMP) // get invalid, get DF qNaN 489 jumpr r31 490 } 491END(__hexagon_divdf3) 492