1 /* ===-- udivmodti4.c - Implement __udivmodti4 -----------------------------=== 2 * 3 * The LLVM Compiler Infrastructure 4 * 5 * This file is dual licensed under the MIT and the University of Illinois Open 6 * Source Licenses. See LICENSE.TXT for details. 7 * 8 * ===----------------------------------------------------------------------=== 9 * 10 * This file implements __udivmodti4 for the compiler_rt library. 11 * 12 * ===----------------------------------------------------------------------=== 13 */ 14 15 #include "int_lib.h" 16 17 #if __x86_64 18 19 /* Effects: if rem != 0, *rem = a % b 20 * Returns: a / b 21 */ 22 23 /* Translated from Figure 3-40 of The PowerPC Compiler Writer's Guide */ 24 25 tu_int __udivmodti4(tu_int a,tu_int b,tu_int * rem)26__udivmodti4(tu_int a, tu_int b, tu_int* rem) 27 { 28 const unsigned n_udword_bits = sizeof(du_int) * CHAR_BIT; 29 const unsigned n_utword_bits = sizeof(tu_int) * CHAR_BIT; 30 utwords n; 31 n.all = a; 32 utwords d; 33 d.all = b; 34 utwords q; 35 utwords r; 36 unsigned sr; 37 /* special cases, X is unknown, K != 0 */ 38 if (n.s.high == 0) 39 { 40 if (d.s.high == 0) 41 { 42 /* 0 X 43 * --- 44 * 0 X 45 */ 46 if (rem) 47 *rem = n.s.low % d.s.low; 48 return n.s.low / d.s.low; 49 } 50 /* 0 X 51 * --- 52 * K X 53 */ 54 if (rem) 55 *rem = n.s.low; 56 return 0; 57 } 58 /* n.s.high != 0 */ 59 if (d.s.low == 0) 60 { 61 if (d.s.high == 0) 62 { 63 /* K X 64 * --- 65 * 0 0 66 */ 67 if (rem) 68 *rem = n.s.high % d.s.low; 69 return n.s.high / d.s.low; 70 } 71 /* d.s.high != 0 */ 72 if (n.s.low == 0) 73 { 74 /* K 0 75 * --- 76 * K 0 77 */ 78 if (rem) 79 { 80 r.s.high = n.s.high % d.s.high; 81 r.s.low = 0; 82 *rem = r.all; 83 } 84 return n.s.high / d.s.high; 85 } 86 /* K K 87 * --- 88 * K 0 89 */ 90 if ((d.s.high & (d.s.high - 1)) == 0) /* if d is a power of 2 */ 91 { 92 if (rem) 93 { 94 r.s.low = n.s.low; 95 r.s.high = n.s.high & (d.s.high - 1); 96 *rem = r.all; 97 } 98 return n.s.high >> __builtin_ctzll(d.s.high); 99 } 100 /* K K 101 * --- 102 * K 0 103 */ 104 sr = __builtin_clzll(d.s.high) - __builtin_clzll(n.s.high); 105 /* 0 <= sr <= n_udword_bits - 2 or sr large */ 106 if (sr > n_udword_bits - 2) 107 { 108 if (rem) 109 *rem = n.all; 110 return 0; 111 } 112 ++sr; 113 /* 1 <= sr <= n_udword_bits - 1 */ 114 /* q.all = n.all << (n_utword_bits - sr); */ 115 q.s.low = 0; 116 q.s.high = n.s.low << (n_udword_bits - sr); 117 /* r.all = n.all >> sr; */ 118 r.s.high = n.s.high >> sr; 119 r.s.low = (n.s.high << (n_udword_bits - sr)) | (n.s.low >> sr); 120 } 121 else /* d.s.low != 0 */ 122 { 123 if (d.s.high == 0) 124 { 125 /* K X 126 * --- 127 * 0 K 128 */ 129 if ((d.s.low & (d.s.low - 1)) == 0) /* if d is a power of 2 */ 130 { 131 if (rem) 132 *rem = n.s.low & (d.s.low - 1); 133 if (d.s.low == 1) 134 return n.all; 135 sr = __builtin_ctzll(d.s.low); 136 q.s.high = n.s.high >> sr; 137 q.s.low = (n.s.high << (n_udword_bits - sr)) | (n.s.low >> sr); 138 return q.all; 139 } 140 /* K X 141 * --- 142 * 0 K 143 */ 144 sr = 1 + n_udword_bits + __builtin_clzll(d.s.low) 145 - __builtin_clzll(n.s.high); 146 /* 2 <= sr <= n_utword_bits - 1 147 * q.all = n.all << (n_utword_bits - sr); 148 * r.all = n.all >> sr; 149 * if (sr == n_udword_bits) 150 * { 151 * q.s.low = 0; 152 * q.s.high = n.s.low; 153 * r.s.high = 0; 154 * r.s.low = n.s.high; 155 * } 156 * else if (sr < n_udword_bits) // 2 <= sr <= n_udword_bits - 1 157 * { 158 * q.s.low = 0; 159 * q.s.high = n.s.low << (n_udword_bits - sr); 160 * r.s.high = n.s.high >> sr; 161 * r.s.low = (n.s.high << (n_udword_bits - sr)) | (n.s.low >> sr); 162 * } 163 * else // n_udword_bits + 1 <= sr <= n_utword_bits - 1 164 * { 165 * q.s.low = n.s.low << (n_utword_bits - sr); 166 * q.s.high = (n.s.high << (n_utword_bits - sr)) | 167 * (n.s.low >> (sr - n_udword_bits)); 168 * r.s.high = 0; 169 * r.s.low = n.s.high >> (sr - n_udword_bits); 170 * } 171 */ 172 q.s.low = (n.s.low << (n_utword_bits - sr)) & 173 ((di_int)(int)(n_udword_bits - sr) >> (n_udword_bits-1)); 174 q.s.high = ((n.s.low << ( n_udword_bits - sr)) & 175 ((di_int)(int)(sr - n_udword_bits - 1) >> (n_udword_bits-1))) | 176 (((n.s.high << (n_utword_bits - sr)) | 177 (n.s.low >> (sr - n_udword_bits))) & 178 ((di_int)(int)(n_udword_bits - sr) >> (n_udword_bits-1))); 179 r.s.high = (n.s.high >> sr) & 180 ((di_int)(int)(sr - n_udword_bits) >> (n_udword_bits-1)); 181 r.s.low = ((n.s.high >> (sr - n_udword_bits)) & 182 ((di_int)(int)(n_udword_bits - sr - 1) >> (n_udword_bits-1))) | 183 (((n.s.high << (n_udword_bits - sr)) | 184 (n.s.low >> sr)) & 185 ((di_int)(int)(sr - n_udword_bits) >> (n_udword_bits-1))); 186 } 187 else 188 { 189 /* K X 190 * --- 191 * K K 192 */ 193 sr = __builtin_clzll(d.s.high) - __builtin_clzll(n.s.high); 194 /*0 <= sr <= n_udword_bits - 1 or sr large */ 195 if (sr > n_udword_bits - 1) 196 { 197 if (rem) 198 *rem = n.all; 199 return 0; 200 } 201 ++sr; 202 /* 1 <= sr <= n_udword_bits */ 203 /* q.all = n.all << (n_utword_bits - sr); */ 204 q.s.low = 0; 205 q.s.high = n.s.low << (n_udword_bits - sr); 206 /* r.all = n.all >> sr; 207 * if (sr < n_udword_bits) 208 * { 209 * r.s.high = n.s.high >> sr; 210 * r.s.low = (n.s.high << (n_udword_bits - sr)) | (n.s.low >> sr); 211 * } 212 * else 213 * { 214 * r.s.high = 0; 215 * r.s.low = n.s.high; 216 * } 217 */ 218 r.s.high = (n.s.high >> sr) & 219 ((di_int)(int)(sr - n_udword_bits) >> (n_udword_bits-1)); 220 r.s.low = (n.s.high << (n_udword_bits - sr)) | 221 ((n.s.low >> sr) & 222 ((di_int)(int)(sr - n_udword_bits) >> (n_udword_bits-1))); 223 } 224 } 225 /* Not a special case 226 * q and r are initialized with: 227 * q.all = n.all << (n_utword_bits - sr); 228 * r.all = n.all >> sr; 229 * 1 <= sr <= n_utword_bits - 1 230 */ 231 su_int carry = 0; 232 for (; sr > 0; --sr) 233 { 234 /* r:q = ((r:q) << 1) | carry */ 235 r.s.high = (r.s.high << 1) | (r.s.low >> (n_udword_bits - 1)); 236 r.s.low = (r.s.low << 1) | (q.s.high >> (n_udword_bits - 1)); 237 q.s.high = (q.s.high << 1) | (q.s.low >> (n_udword_bits - 1)); 238 q.s.low = (q.s.low << 1) | carry; 239 /* carry = 0; 240 * if (r.all >= d.all) 241 * { 242 * r.all -= d.all; 243 * carry = 1; 244 * } 245 */ 246 const ti_int s = (ti_int)(d.all - r.all - 1) >> (n_utword_bits - 1); 247 carry = s & 1; 248 r.all -= d.all & s; 249 } 250 q.all = (q.all << 1) | carry; 251 if (rem) 252 *rem = r.all; 253 return q.all; 254 } 255 256 #endif /* __x86_64 */ 257