1 package org.bouncycastle.math.ec; 2 3 import org.bouncycastle.util.Arrays; 4 5 import java.math.BigInteger; 6 7 class LongArray implements Cloneable 8 { 9 // private static long DEINTERLEAVE_MASK = 0x5555555555555555L; 10 11 /* 12 * This expands 8 bit indices into 16 bit contents (high bit 14), by inserting 0s between bits. 13 * In a binary field, this operation is the same as squaring an 8 bit number. 14 * 15 * NOTE: All entries are positive so sign-extension is not an issue. 16 */ 17 private static final short[] INTERLEAVE2_TABLE = new short[] 18 { 19 0x0000, 0x0001, 0x0004, 0x0005, 0x0010, 0x0011, 0x0014, 0x0015, 20 0x0040, 0x0041, 0x0044, 0x0045, 0x0050, 0x0051, 0x0054, 0x0055, 21 0x0100, 0x0101, 0x0104, 0x0105, 0x0110, 0x0111, 0x0114, 0x0115, 22 0x0140, 0x0141, 0x0144, 0x0145, 0x0150, 0x0151, 0x0154, 0x0155, 23 0x0400, 0x0401, 0x0404, 0x0405, 0x0410, 0x0411, 0x0414, 0x0415, 24 0x0440, 0x0441, 0x0444, 0x0445, 0x0450, 0x0451, 0x0454, 0x0455, 25 0x0500, 0x0501, 0x0504, 0x0505, 0x0510, 0x0511, 0x0514, 0x0515, 26 0x0540, 0x0541, 0x0544, 0x0545, 0x0550, 0x0551, 0x0554, 0x0555, 27 0x1000, 0x1001, 0x1004, 0x1005, 0x1010, 0x1011, 0x1014, 0x1015, 28 0x1040, 0x1041, 0x1044, 0x1045, 0x1050, 0x1051, 0x1054, 0x1055, 29 0x1100, 0x1101, 0x1104, 0x1105, 0x1110, 0x1111, 0x1114, 0x1115, 30 0x1140, 0x1141, 0x1144, 0x1145, 0x1150, 0x1151, 0x1154, 0x1155, 31 0x1400, 0x1401, 0x1404, 0x1405, 0x1410, 0x1411, 0x1414, 0x1415, 32 0x1440, 0x1441, 0x1444, 0x1445, 0x1450, 0x1451, 0x1454, 0x1455, 33 0x1500, 0x1501, 0x1504, 0x1505, 0x1510, 0x1511, 0x1514, 0x1515, 34 0x1540, 0x1541, 0x1544, 0x1545, 0x1550, 0x1551, 0x1554, 0x1555, 35 0x4000, 0x4001, 0x4004, 0x4005, 0x4010, 0x4011, 0x4014, 0x4015, 36 0x4040, 0x4041, 0x4044, 0x4045, 0x4050, 0x4051, 0x4054, 0x4055, 37 0x4100, 0x4101, 0x4104, 0x4105, 0x4110, 0x4111, 0x4114, 0x4115, 38 0x4140, 0x4141, 0x4144, 0x4145, 0x4150, 0x4151, 0x4154, 0x4155, 39 0x4400, 0x4401, 0x4404, 0x4405, 0x4410, 0x4411, 0x4414, 0x4415, 40 0x4440, 0x4441, 0x4444, 0x4445, 0x4450, 0x4451, 0x4454, 0x4455, 41 0x4500, 0x4501, 0x4504, 0x4505, 0x4510, 0x4511, 0x4514, 0x4515, 42 0x4540, 0x4541, 0x4544, 0x4545, 0x4550, 0x4551, 0x4554, 0x4555, 43 0x5000, 0x5001, 0x5004, 0x5005, 0x5010, 0x5011, 0x5014, 0x5015, 44 0x5040, 0x5041, 0x5044, 0x5045, 0x5050, 0x5051, 0x5054, 0x5055, 45 0x5100, 0x5101, 0x5104, 0x5105, 0x5110, 0x5111, 0x5114, 0x5115, 46 0x5140, 0x5141, 0x5144, 0x5145, 0x5150, 0x5151, 0x5154, 0x5155, 47 0x5400, 0x5401, 0x5404, 0x5405, 0x5410, 0x5411, 0x5414, 0x5415, 48 0x5440, 0x5441, 0x5444, 0x5445, 0x5450, 0x5451, 0x5454, 0x5455, 49 0x5500, 0x5501, 0x5504, 0x5505, 0x5510, 0x5511, 0x5514, 0x5515, 50 0x5540, 0x5541, 0x5544, 0x5545, 0x5550, 0x5551, 0x5554, 0x5555 51 }; 52 53 /* 54 * This expands 7 bit indices into 21 bit contents (high bit 18), by inserting 0s between bits. 55 */ 56 private static final int[] INTERLEAVE3_TABLE = new int[] 57 { 58 0x00000, 0x00001, 0x00008, 0x00009, 0x00040, 0x00041, 0x00048, 0x00049, 59 0x00200, 0x00201, 0x00208, 0x00209, 0x00240, 0x00241, 0x00248, 0x00249, 60 0x01000, 0x01001, 0x01008, 0x01009, 0x01040, 0x01041, 0x01048, 0x01049, 61 0x01200, 0x01201, 0x01208, 0x01209, 0x01240, 0x01241, 0x01248, 0x01249, 62 0x08000, 0x08001, 0x08008, 0x08009, 0x08040, 0x08041, 0x08048, 0x08049, 63 0x08200, 0x08201, 0x08208, 0x08209, 0x08240, 0x08241, 0x08248, 0x08249, 64 0x09000, 0x09001, 0x09008, 0x09009, 0x09040, 0x09041, 0x09048, 0x09049, 65 0x09200, 0x09201, 0x09208, 0x09209, 0x09240, 0x09241, 0x09248, 0x09249, 66 0x40000, 0x40001, 0x40008, 0x40009, 0x40040, 0x40041, 0x40048, 0x40049, 67 0x40200, 0x40201, 0x40208, 0x40209, 0x40240, 0x40241, 0x40248, 0x40249, 68 0x41000, 0x41001, 0x41008, 0x41009, 0x41040, 0x41041, 0x41048, 0x41049, 69 0x41200, 0x41201, 0x41208, 0x41209, 0x41240, 0x41241, 0x41248, 0x41249, 70 0x48000, 0x48001, 0x48008, 0x48009, 0x48040, 0x48041, 0x48048, 0x48049, 71 0x48200, 0x48201, 0x48208, 0x48209, 0x48240, 0x48241, 0x48248, 0x48249, 72 0x49000, 0x49001, 0x49008, 0x49009, 0x49040, 0x49041, 0x49048, 0x49049, 73 0x49200, 0x49201, 0x49208, 0x49209, 0x49240, 0x49241, 0x49248, 0x49249 74 }; 75 76 /* 77 * This expands 8 bit indices into 32 bit contents (high bit 28), by inserting 0s between bits. 78 */ 79 private static final int[] INTERLEAVE4_TABLE = new int[] 80 { 81 0x00000000, 0x00000001, 0x00000010, 0x00000011, 0x00000100, 0x00000101, 0x00000110, 0x00000111, 82 0x00001000, 0x00001001, 0x00001010, 0x00001011, 0x00001100, 0x00001101, 0x00001110, 0x00001111, 83 0x00010000, 0x00010001, 0x00010010, 0x00010011, 0x00010100, 0x00010101, 0x00010110, 0x00010111, 84 0x00011000, 0x00011001, 0x00011010, 0x00011011, 0x00011100, 0x00011101, 0x00011110, 0x00011111, 85 0x00100000, 0x00100001, 0x00100010, 0x00100011, 0x00100100, 0x00100101, 0x00100110, 0x00100111, 86 0x00101000, 0x00101001, 0x00101010, 0x00101011, 0x00101100, 0x00101101, 0x00101110, 0x00101111, 87 0x00110000, 0x00110001, 0x00110010, 0x00110011, 0x00110100, 0x00110101, 0x00110110, 0x00110111, 88 0x00111000, 0x00111001, 0x00111010, 0x00111011, 0x00111100, 0x00111101, 0x00111110, 0x00111111, 89 0x01000000, 0x01000001, 0x01000010, 0x01000011, 0x01000100, 0x01000101, 0x01000110, 0x01000111, 90 0x01001000, 0x01001001, 0x01001010, 0x01001011, 0x01001100, 0x01001101, 0x01001110, 0x01001111, 91 0x01010000, 0x01010001, 0x01010010, 0x01010011, 0x01010100, 0x01010101, 0x01010110, 0x01010111, 92 0x01011000, 0x01011001, 0x01011010, 0x01011011, 0x01011100, 0x01011101, 0x01011110, 0x01011111, 93 0x01100000, 0x01100001, 0x01100010, 0x01100011, 0x01100100, 0x01100101, 0x01100110, 0x01100111, 94 0x01101000, 0x01101001, 0x01101010, 0x01101011, 0x01101100, 0x01101101, 0x01101110, 0x01101111, 95 0x01110000, 0x01110001, 0x01110010, 0x01110011, 0x01110100, 0x01110101, 0x01110110, 0x01110111, 96 0x01111000, 0x01111001, 0x01111010, 0x01111011, 0x01111100, 0x01111101, 0x01111110, 0x01111111, 97 0x10000000, 0x10000001, 0x10000010, 0x10000011, 0x10000100, 0x10000101, 0x10000110, 0x10000111, 98 0x10001000, 0x10001001, 0x10001010, 0x10001011, 0x10001100, 0x10001101, 0x10001110, 0x10001111, 99 0x10010000, 0x10010001, 0x10010010, 0x10010011, 0x10010100, 0x10010101, 0x10010110, 0x10010111, 100 0x10011000, 0x10011001, 0x10011010, 0x10011011, 0x10011100, 0x10011101, 0x10011110, 0x10011111, 101 0x10100000, 0x10100001, 0x10100010, 0x10100011, 0x10100100, 0x10100101, 0x10100110, 0x10100111, 102 0x10101000, 0x10101001, 0x10101010, 0x10101011, 0x10101100, 0x10101101, 0x10101110, 0x10101111, 103 0x10110000, 0x10110001, 0x10110010, 0x10110011, 0x10110100, 0x10110101, 0x10110110, 0x10110111, 104 0x10111000, 0x10111001, 0x10111010, 0x10111011, 0x10111100, 0x10111101, 0x10111110, 0x10111111, 105 0x11000000, 0x11000001, 0x11000010, 0x11000011, 0x11000100, 0x11000101, 0x11000110, 0x11000111, 106 0x11001000, 0x11001001, 0x11001010, 0x11001011, 0x11001100, 0x11001101, 0x11001110, 0x11001111, 107 0x11010000, 0x11010001, 0x11010010, 0x11010011, 0x11010100, 0x11010101, 0x11010110, 0x11010111, 108 0x11011000, 0x11011001, 0x11011010, 0x11011011, 0x11011100, 0x11011101, 0x11011110, 0x11011111, 109 0x11100000, 0x11100001, 0x11100010, 0x11100011, 0x11100100, 0x11100101, 0x11100110, 0x11100111, 110 0x11101000, 0x11101001, 0x11101010, 0x11101011, 0x11101100, 0x11101101, 0x11101110, 0x11101111, 111 0x11110000, 0x11110001, 0x11110010, 0x11110011, 0x11110100, 0x11110101, 0x11110110, 0x11110111, 112 0x11111000, 0x11111001, 0x11111010, 0x11111011, 0x11111100, 0x11111101, 0x11111110, 0x11111111 113 }; 114 115 /* 116 * This expands 7 bit indices into 35 bit contents (high bit 30), by inserting 0s between bits. 117 */ 118 private static final int[] INTERLEAVE5_TABLE = new int[] { 119 0x00000000, 0x00000001, 0x00000020, 0x00000021, 0x00000400, 0x00000401, 0x00000420, 0x00000421, 120 0x00008000, 0x00008001, 0x00008020, 0x00008021, 0x00008400, 0x00008401, 0x00008420, 0x00008421, 121 0x00100000, 0x00100001, 0x00100020, 0x00100021, 0x00100400, 0x00100401, 0x00100420, 0x00100421, 122 0x00108000, 0x00108001, 0x00108020, 0x00108021, 0x00108400, 0x00108401, 0x00108420, 0x00108421, 123 0x02000000, 0x02000001, 0x02000020, 0x02000021, 0x02000400, 0x02000401, 0x02000420, 0x02000421, 124 0x02008000, 0x02008001, 0x02008020, 0x02008021, 0x02008400, 0x02008401, 0x02008420, 0x02008421, 125 0x02100000, 0x02100001, 0x02100020, 0x02100021, 0x02100400, 0x02100401, 0x02100420, 0x02100421, 126 0x02108000, 0x02108001, 0x02108020, 0x02108021, 0x02108400, 0x02108401, 0x02108420, 0x02108421, 127 0x40000000, 0x40000001, 0x40000020, 0x40000021, 0x40000400, 0x40000401, 0x40000420, 0x40000421, 128 0x40008000, 0x40008001, 0x40008020, 0x40008021, 0x40008400, 0x40008401, 0x40008420, 0x40008421, 129 0x40100000, 0x40100001, 0x40100020, 0x40100021, 0x40100400, 0x40100401, 0x40100420, 0x40100421, 130 0x40108000, 0x40108001, 0x40108020, 0x40108021, 0x40108400, 0x40108401, 0x40108420, 0x40108421, 131 0x42000000, 0x42000001, 0x42000020, 0x42000021, 0x42000400, 0x42000401, 0x42000420, 0x42000421, 132 0x42008000, 0x42008001, 0x42008020, 0x42008021, 0x42008400, 0x42008401, 0x42008420, 0x42008421, 133 0x42100000, 0x42100001, 0x42100020, 0x42100021, 0x42100400, 0x42100401, 0x42100420, 0x42100421, 134 0x42108000, 0x42108001, 0x42108020, 0x42108021, 0x42108400, 0x42108401, 0x42108420, 0x42108421 135 }; 136 137 /* 138 * This expands 9 bit indices into 63 bit (long) contents (high bit 56), by inserting 0s between bits. 139 */ 140 private static final long[] INTERLEAVE7_TABLE = new long[] 141 { 142 0x0000000000000000L, 0x0000000000000001L, 0x0000000000000080L, 0x0000000000000081L, 143 0x0000000000004000L, 0x0000000000004001L, 0x0000000000004080L, 0x0000000000004081L, 144 0x0000000000200000L, 0x0000000000200001L, 0x0000000000200080L, 0x0000000000200081L, 145 0x0000000000204000L, 0x0000000000204001L, 0x0000000000204080L, 0x0000000000204081L, 146 0x0000000010000000L, 0x0000000010000001L, 0x0000000010000080L, 0x0000000010000081L, 147 0x0000000010004000L, 0x0000000010004001L, 0x0000000010004080L, 0x0000000010004081L, 148 0x0000000010200000L, 0x0000000010200001L, 0x0000000010200080L, 0x0000000010200081L, 149 0x0000000010204000L, 0x0000000010204001L, 0x0000000010204080L, 0x0000000010204081L, 150 0x0000000800000000L, 0x0000000800000001L, 0x0000000800000080L, 0x0000000800000081L, 151 0x0000000800004000L, 0x0000000800004001L, 0x0000000800004080L, 0x0000000800004081L, 152 0x0000000800200000L, 0x0000000800200001L, 0x0000000800200080L, 0x0000000800200081L, 153 0x0000000800204000L, 0x0000000800204001L, 0x0000000800204080L, 0x0000000800204081L, 154 0x0000000810000000L, 0x0000000810000001L, 0x0000000810000080L, 0x0000000810000081L, 155 0x0000000810004000L, 0x0000000810004001L, 0x0000000810004080L, 0x0000000810004081L, 156 0x0000000810200000L, 0x0000000810200001L, 0x0000000810200080L, 0x0000000810200081L, 157 0x0000000810204000L, 0x0000000810204001L, 0x0000000810204080L, 0x0000000810204081L, 158 0x0000040000000000L, 0x0000040000000001L, 0x0000040000000080L, 0x0000040000000081L, 159 0x0000040000004000L, 0x0000040000004001L, 0x0000040000004080L, 0x0000040000004081L, 160 0x0000040000200000L, 0x0000040000200001L, 0x0000040000200080L, 0x0000040000200081L, 161 0x0000040000204000L, 0x0000040000204001L, 0x0000040000204080L, 0x0000040000204081L, 162 0x0000040010000000L, 0x0000040010000001L, 0x0000040010000080L, 0x0000040010000081L, 163 0x0000040010004000L, 0x0000040010004001L, 0x0000040010004080L, 0x0000040010004081L, 164 0x0000040010200000L, 0x0000040010200001L, 0x0000040010200080L, 0x0000040010200081L, 165 0x0000040010204000L, 0x0000040010204001L, 0x0000040010204080L, 0x0000040010204081L, 166 0x0000040800000000L, 0x0000040800000001L, 0x0000040800000080L, 0x0000040800000081L, 167 0x0000040800004000L, 0x0000040800004001L, 0x0000040800004080L, 0x0000040800004081L, 168 0x0000040800200000L, 0x0000040800200001L, 0x0000040800200080L, 0x0000040800200081L, 169 0x0000040800204000L, 0x0000040800204001L, 0x0000040800204080L, 0x0000040800204081L, 170 0x0000040810000000L, 0x0000040810000001L, 0x0000040810000080L, 0x0000040810000081L, 171 0x0000040810004000L, 0x0000040810004001L, 0x0000040810004080L, 0x0000040810004081L, 172 0x0000040810200000L, 0x0000040810200001L, 0x0000040810200080L, 0x0000040810200081L, 173 0x0000040810204000L, 0x0000040810204001L, 0x0000040810204080L, 0x0000040810204081L, 174 0x0002000000000000L, 0x0002000000000001L, 0x0002000000000080L, 0x0002000000000081L, 175 0x0002000000004000L, 0x0002000000004001L, 0x0002000000004080L, 0x0002000000004081L, 176 0x0002000000200000L, 0x0002000000200001L, 0x0002000000200080L, 0x0002000000200081L, 177 0x0002000000204000L, 0x0002000000204001L, 0x0002000000204080L, 0x0002000000204081L, 178 0x0002000010000000L, 0x0002000010000001L, 0x0002000010000080L, 0x0002000010000081L, 179 0x0002000010004000L, 0x0002000010004001L, 0x0002000010004080L, 0x0002000010004081L, 180 0x0002000010200000L, 0x0002000010200001L, 0x0002000010200080L, 0x0002000010200081L, 181 0x0002000010204000L, 0x0002000010204001L, 0x0002000010204080L, 0x0002000010204081L, 182 0x0002000800000000L, 0x0002000800000001L, 0x0002000800000080L, 0x0002000800000081L, 183 0x0002000800004000L, 0x0002000800004001L, 0x0002000800004080L, 0x0002000800004081L, 184 0x0002000800200000L, 0x0002000800200001L, 0x0002000800200080L, 0x0002000800200081L, 185 0x0002000800204000L, 0x0002000800204001L, 0x0002000800204080L, 0x0002000800204081L, 186 0x0002000810000000L, 0x0002000810000001L, 0x0002000810000080L, 0x0002000810000081L, 187 0x0002000810004000L, 0x0002000810004001L, 0x0002000810004080L, 0x0002000810004081L, 188 0x0002000810200000L, 0x0002000810200001L, 0x0002000810200080L, 0x0002000810200081L, 189 0x0002000810204000L, 0x0002000810204001L, 0x0002000810204080L, 0x0002000810204081L, 190 0x0002040000000000L, 0x0002040000000001L, 0x0002040000000080L, 0x0002040000000081L, 191 0x0002040000004000L, 0x0002040000004001L, 0x0002040000004080L, 0x0002040000004081L, 192 0x0002040000200000L, 0x0002040000200001L, 0x0002040000200080L, 0x0002040000200081L, 193 0x0002040000204000L, 0x0002040000204001L, 0x0002040000204080L, 0x0002040000204081L, 194 0x0002040010000000L, 0x0002040010000001L, 0x0002040010000080L, 0x0002040010000081L, 195 0x0002040010004000L, 0x0002040010004001L, 0x0002040010004080L, 0x0002040010004081L, 196 0x0002040010200000L, 0x0002040010200001L, 0x0002040010200080L, 0x0002040010200081L, 197 0x0002040010204000L, 0x0002040010204001L, 0x0002040010204080L, 0x0002040010204081L, 198 0x0002040800000000L, 0x0002040800000001L, 0x0002040800000080L, 0x0002040800000081L, 199 0x0002040800004000L, 0x0002040800004001L, 0x0002040800004080L, 0x0002040800004081L, 200 0x0002040800200000L, 0x0002040800200001L, 0x0002040800200080L, 0x0002040800200081L, 201 0x0002040800204000L, 0x0002040800204001L, 0x0002040800204080L, 0x0002040800204081L, 202 0x0002040810000000L, 0x0002040810000001L, 0x0002040810000080L, 0x0002040810000081L, 203 0x0002040810004000L, 0x0002040810004001L, 0x0002040810004080L, 0x0002040810004081L, 204 0x0002040810200000L, 0x0002040810200001L, 0x0002040810200080L, 0x0002040810200081L, 205 0x0002040810204000L, 0x0002040810204001L, 0x0002040810204080L, 0x0002040810204081L, 206 0x0100000000000000L, 0x0100000000000001L, 0x0100000000000080L, 0x0100000000000081L, 207 0x0100000000004000L, 0x0100000000004001L, 0x0100000000004080L, 0x0100000000004081L, 208 0x0100000000200000L, 0x0100000000200001L, 0x0100000000200080L, 0x0100000000200081L, 209 0x0100000000204000L, 0x0100000000204001L, 0x0100000000204080L, 0x0100000000204081L, 210 0x0100000010000000L, 0x0100000010000001L, 0x0100000010000080L, 0x0100000010000081L, 211 0x0100000010004000L, 0x0100000010004001L, 0x0100000010004080L, 0x0100000010004081L, 212 0x0100000010200000L, 0x0100000010200001L, 0x0100000010200080L, 0x0100000010200081L, 213 0x0100000010204000L, 0x0100000010204001L, 0x0100000010204080L, 0x0100000010204081L, 214 0x0100000800000000L, 0x0100000800000001L, 0x0100000800000080L, 0x0100000800000081L, 215 0x0100000800004000L, 0x0100000800004001L, 0x0100000800004080L, 0x0100000800004081L, 216 0x0100000800200000L, 0x0100000800200001L, 0x0100000800200080L, 0x0100000800200081L, 217 0x0100000800204000L, 0x0100000800204001L, 0x0100000800204080L, 0x0100000800204081L, 218 0x0100000810000000L, 0x0100000810000001L, 0x0100000810000080L, 0x0100000810000081L, 219 0x0100000810004000L, 0x0100000810004001L, 0x0100000810004080L, 0x0100000810004081L, 220 0x0100000810200000L, 0x0100000810200001L, 0x0100000810200080L, 0x0100000810200081L, 221 0x0100000810204000L, 0x0100000810204001L, 0x0100000810204080L, 0x0100000810204081L, 222 0x0100040000000000L, 0x0100040000000001L, 0x0100040000000080L, 0x0100040000000081L, 223 0x0100040000004000L, 0x0100040000004001L, 0x0100040000004080L, 0x0100040000004081L, 224 0x0100040000200000L, 0x0100040000200001L, 0x0100040000200080L, 0x0100040000200081L, 225 0x0100040000204000L, 0x0100040000204001L, 0x0100040000204080L, 0x0100040000204081L, 226 0x0100040010000000L, 0x0100040010000001L, 0x0100040010000080L, 0x0100040010000081L, 227 0x0100040010004000L, 0x0100040010004001L, 0x0100040010004080L, 0x0100040010004081L, 228 0x0100040010200000L, 0x0100040010200001L, 0x0100040010200080L, 0x0100040010200081L, 229 0x0100040010204000L, 0x0100040010204001L, 0x0100040010204080L, 0x0100040010204081L, 230 0x0100040800000000L, 0x0100040800000001L, 0x0100040800000080L, 0x0100040800000081L, 231 0x0100040800004000L, 0x0100040800004001L, 0x0100040800004080L, 0x0100040800004081L, 232 0x0100040800200000L, 0x0100040800200001L, 0x0100040800200080L, 0x0100040800200081L, 233 0x0100040800204000L, 0x0100040800204001L, 0x0100040800204080L, 0x0100040800204081L, 234 0x0100040810000000L, 0x0100040810000001L, 0x0100040810000080L, 0x0100040810000081L, 235 0x0100040810004000L, 0x0100040810004001L, 0x0100040810004080L, 0x0100040810004081L, 236 0x0100040810200000L, 0x0100040810200001L, 0x0100040810200080L, 0x0100040810200081L, 237 0x0100040810204000L, 0x0100040810204001L, 0x0100040810204080L, 0x0100040810204081L, 238 0x0102000000000000L, 0x0102000000000001L, 0x0102000000000080L, 0x0102000000000081L, 239 0x0102000000004000L, 0x0102000000004001L, 0x0102000000004080L, 0x0102000000004081L, 240 0x0102000000200000L, 0x0102000000200001L, 0x0102000000200080L, 0x0102000000200081L, 241 0x0102000000204000L, 0x0102000000204001L, 0x0102000000204080L, 0x0102000000204081L, 242 0x0102000010000000L, 0x0102000010000001L, 0x0102000010000080L, 0x0102000010000081L, 243 0x0102000010004000L, 0x0102000010004001L, 0x0102000010004080L, 0x0102000010004081L, 244 0x0102000010200000L, 0x0102000010200001L, 0x0102000010200080L, 0x0102000010200081L, 245 0x0102000010204000L, 0x0102000010204001L, 0x0102000010204080L, 0x0102000010204081L, 246 0x0102000800000000L, 0x0102000800000001L, 0x0102000800000080L, 0x0102000800000081L, 247 0x0102000800004000L, 0x0102000800004001L, 0x0102000800004080L, 0x0102000800004081L, 248 0x0102000800200000L, 0x0102000800200001L, 0x0102000800200080L, 0x0102000800200081L, 249 0x0102000800204000L, 0x0102000800204001L, 0x0102000800204080L, 0x0102000800204081L, 250 0x0102000810000000L, 0x0102000810000001L, 0x0102000810000080L, 0x0102000810000081L, 251 0x0102000810004000L, 0x0102000810004001L, 0x0102000810004080L, 0x0102000810004081L, 252 0x0102000810200000L, 0x0102000810200001L, 0x0102000810200080L, 0x0102000810200081L, 253 0x0102000810204000L, 0x0102000810204001L, 0x0102000810204080L, 0x0102000810204081L, 254 0x0102040000000000L, 0x0102040000000001L, 0x0102040000000080L, 0x0102040000000081L, 255 0x0102040000004000L, 0x0102040000004001L, 0x0102040000004080L, 0x0102040000004081L, 256 0x0102040000200000L, 0x0102040000200001L, 0x0102040000200080L, 0x0102040000200081L, 257 0x0102040000204000L, 0x0102040000204001L, 0x0102040000204080L, 0x0102040000204081L, 258 0x0102040010000000L, 0x0102040010000001L, 0x0102040010000080L, 0x0102040010000081L, 259 0x0102040010004000L, 0x0102040010004001L, 0x0102040010004080L, 0x0102040010004081L, 260 0x0102040010200000L, 0x0102040010200001L, 0x0102040010200080L, 0x0102040010200081L, 261 0x0102040010204000L, 0x0102040010204001L, 0x0102040010204080L, 0x0102040010204081L, 262 0x0102040800000000L, 0x0102040800000001L, 0x0102040800000080L, 0x0102040800000081L, 263 0x0102040800004000L, 0x0102040800004001L, 0x0102040800004080L, 0x0102040800004081L, 264 0x0102040800200000L, 0x0102040800200001L, 0x0102040800200080L, 0x0102040800200081L, 265 0x0102040800204000L, 0x0102040800204001L, 0x0102040800204080L, 0x0102040800204081L, 266 0x0102040810000000L, 0x0102040810000001L, 0x0102040810000080L, 0x0102040810000081L, 267 0x0102040810004000L, 0x0102040810004001L, 0x0102040810004080L, 0x0102040810004081L, 268 0x0102040810200000L, 0x0102040810200001L, 0x0102040810200080L, 0x0102040810200081L, 269 0x0102040810204000L, 0x0102040810204001L, 0x0102040810204080L, 0x0102040810204081L 270 }; 271 272 // For toString(); must have length 64 273 private static final String ZEROES = "0000000000000000000000000000000000000000000000000000000000000000"; 274 275 final static byte[] bitLengths = 276 { 277 0, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 278 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 279 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 280 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 281 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 282 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 283 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 284 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 285 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 286 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 287 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 288 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 289 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 290 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 291 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 292 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8 293 }; 294 295 // TODO make m fixed for the LongArray, and hence compute T once and for all 296 297 private long[] m_ints; 298 LongArray(int intLen)299 public LongArray(int intLen) 300 { 301 m_ints = new long[intLen]; 302 } 303 LongArray(long[] ints)304 public LongArray(long[] ints) 305 { 306 m_ints = ints; 307 } 308 LongArray(long[] ints, int off, int len)309 public LongArray(long[] ints, int off, int len) 310 { 311 if (off == 0 && len == ints.length) 312 { 313 m_ints = ints; 314 } 315 else 316 { 317 m_ints = new long[len]; 318 System.arraycopy(ints, off, m_ints, 0, len); 319 } 320 } 321 LongArray(BigInteger bigInt)322 public LongArray(BigInteger bigInt) 323 { 324 if (bigInt == null || bigInt.signum() < 0) 325 { 326 throw new IllegalArgumentException("invalid F2m field value"); 327 } 328 329 if (bigInt.signum() == 0) 330 { 331 m_ints = new long[] { 0L }; 332 return; 333 } 334 335 byte[] barr = bigInt.toByteArray(); 336 int barrLen = barr.length; 337 int barrStart = 0; 338 if (barr[0] == 0) 339 { 340 // First byte is 0 to enforce highest (=sign) bit is zero. 341 // In this case ignore barr[0]. 342 barrLen--; 343 barrStart = 1; 344 } 345 int intLen = (barrLen + 7) / 8; 346 m_ints = new long[intLen]; 347 348 int iarrJ = intLen - 1; 349 int rem = barrLen % 8 + barrStart; 350 long temp = 0; 351 int barrI = barrStart; 352 if (barrStart < rem) 353 { 354 for (; barrI < rem; barrI++) 355 { 356 temp <<= 8; 357 int barrBarrI = barr[barrI] & 0xFF; 358 temp |= barrBarrI; 359 } 360 m_ints[iarrJ--] = temp; 361 } 362 363 for (; iarrJ >= 0; iarrJ--) 364 { 365 temp = 0; 366 for (int i = 0; i < 8; i++) 367 { 368 temp <<= 8; 369 int barrBarrI = barr[barrI++] & 0xFF; 370 temp |= barrBarrI; 371 } 372 m_ints[iarrJ] = temp; 373 } 374 } 375 isOne()376 public boolean isOne() 377 { 378 long[] a = m_ints; 379 if (a[0] != 1L) 380 { 381 return false; 382 } 383 for (int i = 1; i < a.length; ++i) 384 { 385 if (a[i] != 0L) 386 { 387 return false; 388 } 389 } 390 return true; 391 } 392 isZero()393 public boolean isZero() 394 { 395 long[] a = m_ints; 396 for (int i = 0; i < a.length; ++i) 397 { 398 if (a[i] != 0L) 399 { 400 return false; 401 } 402 } 403 return true; 404 } 405 getUsedLength()406 public int getUsedLength() 407 { 408 return getUsedLengthFrom(m_ints.length); 409 } 410 getUsedLengthFrom(int from)411 public int getUsedLengthFrom(int from) 412 { 413 long[] a = m_ints; 414 from = Math.min(from, a.length); 415 416 if (from < 1) 417 { 418 return 0; 419 } 420 421 // Check if first element will act as sentinel 422 if (a[0] != 0) 423 { 424 while (a[--from] == 0) 425 { 426 } 427 return from + 1; 428 } 429 430 do 431 { 432 if (a[--from] != 0) 433 { 434 return from + 1; 435 } 436 } 437 while (from > 0); 438 439 return 0; 440 } 441 degree()442 public int degree() 443 { 444 int i = m_ints.length; 445 long w; 446 do 447 { 448 if (i == 0) 449 { 450 return 0; 451 } 452 w = m_ints[--i]; 453 } 454 while (w == 0); 455 456 return (i << 6) + bitLength(w); 457 } 458 degreeFrom(int limit)459 private int degreeFrom(int limit) 460 { 461 int i = (limit + 62) >>> 6; 462 long w; 463 do 464 { 465 if (i == 0) 466 { 467 return 0; 468 } 469 w = m_ints[--i]; 470 } 471 while (w == 0); 472 473 return (i << 6) + bitLength(w); 474 } 475 476 // private int lowestCoefficient() 477 // { 478 // for (int i = 0; i < m_ints.length; ++i) 479 // { 480 // long mi = m_ints[i]; 481 // if (mi != 0) 482 // { 483 // int j = 0; 484 // while ((mi & 0xFFL) == 0) 485 // { 486 // j += 8; 487 // mi >>>= 8; 488 // } 489 // while ((mi & 1L) == 0) 490 // { 491 // ++j; 492 // mi >>>= 1; 493 // } 494 // return (i << 6) + j; 495 // } 496 // } 497 // return -1; 498 // } 499 bitLength(long w)500 private static int bitLength(long w) 501 { 502 int u = (int)(w >>> 32), b; 503 if (u == 0) 504 { 505 u = (int)w; 506 b = 0; 507 } 508 else 509 { 510 b = 32; 511 } 512 513 int t = u >>> 16, k; 514 if (t == 0) 515 { 516 t = u >>> 8; 517 k = (t == 0) ? bitLengths[u] : 8 + bitLengths[t]; 518 } 519 else 520 { 521 int v = t >>> 8; 522 k = (v == 0) ? 16 + bitLengths[t] : 24 + bitLengths[v]; 523 } 524 525 return b + k; 526 } 527 resizedInts(int newLen)528 private long[] resizedInts(int newLen) 529 { 530 long[] newInts = new long[newLen]; 531 System.arraycopy(m_ints, 0, newInts, 0, Math.min(m_ints.length, newLen)); 532 return newInts; 533 } 534 toBigInteger()535 public BigInteger toBigInteger() 536 { 537 int usedLen = getUsedLength(); 538 if (usedLen == 0) 539 { 540 return ECConstants.ZERO; 541 } 542 543 long highestInt = m_ints[usedLen - 1]; 544 byte[] temp = new byte[8]; 545 int barrI = 0; 546 boolean trailingZeroBytesDone = false; 547 for (int j = 7; j >= 0; j--) 548 { 549 byte thisByte = (byte)(highestInt >>> (8 * j)); 550 if (trailingZeroBytesDone || (thisByte != 0)) 551 { 552 trailingZeroBytesDone = true; 553 temp[barrI++] = thisByte; 554 } 555 } 556 557 int barrLen = 8 * (usedLen - 1) + barrI; 558 byte[] barr = new byte[barrLen]; 559 for (int j = 0; j < barrI; j++) 560 { 561 barr[j] = temp[j]; 562 } 563 // Highest value int is done now 564 565 for (int iarrJ = usedLen - 2; iarrJ >= 0; iarrJ--) 566 { 567 long mi = m_ints[iarrJ]; 568 for (int j = 7; j >= 0; j--) 569 { 570 barr[barrI++] = (byte)(mi >>> (8 * j)); 571 } 572 } 573 return new BigInteger(1, barr); 574 } 575 576 // private static long shiftUp(long[] x, int xOff, int count) 577 // { 578 // long prev = 0; 579 // for (int i = 0; i < count; ++i) 580 // { 581 // long next = x[xOff + i]; 582 // x[xOff + i] = (next << 1) | prev; 583 // prev = next >>> 63; 584 // } 585 // return prev; 586 // } 587 shiftUp(long[] x, int xOff, int count, int shift)588 private static long shiftUp(long[] x, int xOff, int count, int shift) 589 { 590 int shiftInv = 64 - shift; 591 long prev = 0; 592 for (int i = 0; i < count; ++i) 593 { 594 long next = x[xOff + i]; 595 x[xOff + i] = (next << shift) | prev; 596 prev = next >>> shiftInv; 597 } 598 return prev; 599 } 600 shiftUp(long[] x, int xOff, long[] z, int zOff, int count, int shift)601 private static long shiftUp(long[] x, int xOff, long[] z, int zOff, int count, int shift) 602 { 603 int shiftInv = 64 - shift; 604 long prev = 0; 605 for (int i = 0; i < count; ++i) 606 { 607 long next = x[xOff + i]; 608 z[zOff + i] = (next << shift) | prev; 609 prev = next >>> shiftInv; 610 } 611 return prev; 612 } 613 addOne()614 public LongArray addOne() 615 { 616 if (m_ints.length == 0) 617 { 618 return new LongArray(new long[]{ 1L }); 619 } 620 621 int resultLen = Math.max(1, getUsedLength()); 622 long[] ints = resizedInts(resultLen); 623 ints[0] ^= 1L; 624 return new LongArray(ints); 625 } 626 627 // private void addShiftedByBits(LongArray other, int bits) 628 // { 629 // int words = bits >>> 6; 630 // int shift = bits & 0x3F; 631 // 632 // if (shift == 0) 633 // { 634 // addShiftedByWords(other, words); 635 // return; 636 // } 637 // 638 // int otherUsedLen = other.getUsedLength(); 639 // if (otherUsedLen == 0) 640 // { 641 // return; 642 // } 643 // 644 // int minLen = otherUsedLen + words + 1; 645 // if (minLen > m_ints.length) 646 // { 647 // m_ints = resizedInts(minLen); 648 // } 649 // 650 // long carry = addShiftedByBits(m_ints, words, other.m_ints, 0, otherUsedLen, shift); 651 // m_ints[otherUsedLen + words] ^= carry; 652 // } 653 addShiftedByBitsSafe(LongArray other, int otherDegree, int bits)654 private void addShiftedByBitsSafe(LongArray other, int otherDegree, int bits) 655 { 656 int otherLen = (otherDegree + 63) >>> 6; 657 658 int words = bits >>> 6; 659 int shift = bits & 0x3F; 660 661 if (shift == 0) 662 { 663 add(m_ints, words, other.m_ints, 0, otherLen); 664 return; 665 } 666 667 long carry = addShiftedUp(m_ints, words, other.m_ints, 0, otherLen, shift); 668 if (carry != 0L) 669 { 670 m_ints[otherLen + words] ^= carry; 671 } 672 } 673 addShiftedUp(long[] x, int xOff, long[] y, int yOff, int count, int shift)674 private static long addShiftedUp(long[] x, int xOff, long[] y, int yOff, int count, int shift) 675 { 676 int shiftInv = 64 - shift; 677 long prev = 0; 678 for (int i = 0; i < count; ++i) 679 { 680 long next = y[yOff + i]; 681 x[xOff + i] ^= (next << shift) | prev; 682 prev = next >>> shiftInv; 683 } 684 return prev; 685 } 686 addShiftedDown(long[] x, int xOff, long[] y, int yOff, int count, int shift)687 private static long addShiftedDown(long[] x, int xOff, long[] y, int yOff, int count, int shift) 688 { 689 int shiftInv = 64 - shift; 690 long prev = 0; 691 int i = count; 692 while (--i >= 0) 693 { 694 long next = y[yOff + i]; 695 x[xOff + i] ^= (next >>> shift) | prev; 696 prev = next << shiftInv; 697 } 698 return prev; 699 } 700 addShiftedByWords(LongArray other, int words)701 public void addShiftedByWords(LongArray other, int words) 702 { 703 int otherUsedLen = other.getUsedLength(); 704 if (otherUsedLen == 0) 705 { 706 return; 707 } 708 709 int minLen = otherUsedLen + words; 710 if (minLen > m_ints.length) 711 { 712 m_ints = resizedInts(minLen); 713 } 714 715 add(m_ints, words, other.m_ints, 0, otherUsedLen); 716 } 717 add(long[] x, int xOff, long[] y, int yOff, int count)718 private static void add(long[] x, int xOff, long[] y, int yOff, int count) 719 { 720 for (int i = 0; i < count; ++i) 721 { 722 x[xOff + i] ^= y[yOff + i]; 723 } 724 } 725 add(long[] x, int xOff, long[] y, int yOff, long[] z, int zOff, int count)726 private static void add(long[] x, int xOff, long[] y, int yOff, long[] z, int zOff, int count) 727 { 728 for (int i = 0; i < count; ++i) 729 { 730 z[zOff + i] = x[xOff + i] ^ y[yOff + i]; 731 } 732 } 733 addBoth(long[] x, int xOff, long[] y1, int y1Off, long[] y2, int y2Off, int count)734 private static void addBoth(long[] x, int xOff, long[] y1, int y1Off, long[] y2, int y2Off, int count) 735 { 736 for (int i = 0; i < count; ++i) 737 { 738 x[xOff + i] ^= y1[y1Off + i] ^ y2[y2Off + i]; 739 } 740 } 741 distribute(long[] x, int src, int dst1, int dst2, int count)742 private static void distribute(long[] x, int src, int dst1, int dst2, int count) 743 { 744 for (int i = 0; i < count; ++i) 745 { 746 long v = x[src + i]; 747 x[dst1 + i] ^= v; 748 x[dst2 + i] ^= v; 749 } 750 } 751 getLength()752 public int getLength() 753 { 754 return m_ints.length; 755 } 756 flipWord(long[] buf, int off, int bit, long word)757 private static void flipWord(long[] buf, int off, int bit, long word) 758 { 759 int n = off + (bit >>> 6); 760 int shift = bit & 0x3F; 761 if (shift == 0) 762 { 763 buf[n] ^= word; 764 } 765 else 766 { 767 buf[n] ^= word << shift; 768 word >>>= (64 - shift); 769 if (word != 0) 770 { 771 buf[++n] ^= word; 772 } 773 } 774 } 775 776 // private static long getWord(long[] buf, int off, int len, int bit) 777 // { 778 // int n = off + (bit >>> 6); 779 // int shift = bit & 0x3F; 780 // if (shift == 0) 781 // { 782 // return buf[n]; 783 // } 784 // long result = buf[n] >>> shift; 785 // if (++n < len) 786 // { 787 // result |= buf[n] << (64 - shift); 788 // } 789 // return result; 790 // } 791 testBitZero()792 public boolean testBitZero() 793 { 794 return m_ints.length > 0 && (m_ints[0] & 1L) != 0; 795 } 796 testBit(long[] buf, int off, int n)797 private static boolean testBit(long[] buf, int off, int n) 798 { 799 // theInt = n / 64 800 int theInt = n >>> 6; 801 // theBit = n % 64 802 int theBit = n & 0x3F; 803 long tester = 1L << theBit; 804 return (buf[off + theInt] & tester) != 0; 805 } 806 flipBit(long[] buf, int off, int n)807 private static void flipBit(long[] buf, int off, int n) 808 { 809 // theInt = n / 64 810 int theInt = n >>> 6; 811 // theBit = n % 64 812 int theBit = n & 0x3F; 813 long flipper = 1L << theBit; 814 buf[off + theInt] ^= flipper; 815 } 816 817 // private static void setBit(long[] buf, int off, int n) 818 // { 819 // // theInt = n / 64 820 // int theInt = n >>> 6; 821 // // theBit = n % 64 822 // int theBit = n & 0x3F; 823 // long setter = 1L << theBit; 824 // buf[off + theInt] |= setter; 825 // } 826 // 827 // private static void clearBit(long[] buf, int off, int n) 828 // { 829 // // theInt = n / 64 830 // int theInt = n >>> 6; 831 // // theBit = n % 64 832 // int theBit = n & 0x3F; 833 // long setter = 1L << theBit; 834 // buf[off + theInt] &= ~setter; 835 // } 836 multiplyWord(long a, long[] b, int bLen, long[] c, int cOff)837 private static void multiplyWord(long a, long[] b, int bLen, long[] c, int cOff) 838 { 839 if ((a & 1L) != 0L) 840 { 841 add(c, cOff, b, 0, bLen); 842 } 843 int k = 1; 844 while ((a >>>= 1) != 0L) 845 { 846 if ((a & 1L) != 0L) 847 { 848 long carry = addShiftedUp(c, cOff, b, 0, bLen, k); 849 if (carry != 0L) 850 { 851 c[cOff + bLen] ^= carry; 852 } 853 } 854 ++k; 855 } 856 } 857 modMultiplyLD(LongArray other, int m, int[] ks)858 public LongArray modMultiplyLD(LongArray other, int m, int[] ks) 859 { 860 /* 861 * Find out the degree of each argument and handle the zero cases 862 */ 863 int aDeg = degree(); 864 if (aDeg == 0) 865 { 866 return this; 867 } 868 int bDeg = other.degree(); 869 if (bDeg == 0) 870 { 871 return other; 872 } 873 874 /* 875 * Swap if necessary so that A is the smaller argument 876 */ 877 LongArray A = this, B = other; 878 if (aDeg > bDeg) 879 { 880 A = other; B = this; 881 int tmp = aDeg; aDeg = bDeg; bDeg = tmp; 882 } 883 884 /* 885 * Establish the word lengths of the arguments and result 886 */ 887 int aLen = (aDeg + 63) >>> 6; 888 int bLen = (bDeg + 63) >>> 6; 889 int cLen = (aDeg + bDeg + 62) >>> 6; 890 891 if (aLen == 1) 892 { 893 long a0 = A.m_ints[0]; 894 if (a0 == 1L) 895 { 896 return B; 897 } 898 899 /* 900 * Fast path for small A, with performance dependent only on the number of set bits 901 */ 902 long[] c0 = new long[cLen]; 903 multiplyWord(a0, B.m_ints, bLen, c0, 0); 904 905 /* 906 * Reduce the raw answer against the reduction coefficients 907 */ 908 return reduceResult(c0, 0, cLen, m, ks); 909 } 910 911 /* 912 * Determine if B will get bigger during shifting 913 */ 914 int bMax = (bDeg + 7 + 63) >>> 6; 915 916 /* 917 * Lookup table for the offset of each B in the tables 918 */ 919 int[] ti = new int[16]; 920 921 /* 922 * Precompute table of all 4-bit products of B 923 */ 924 long[] T0 = new long[bMax << 4]; 925 int tOff = bMax; 926 ti[1] = tOff; 927 System.arraycopy(B.m_ints, 0, T0, tOff, bLen); 928 for (int i = 2; i < 16; ++i) 929 { 930 ti[i] = (tOff += bMax); 931 if ((i & 1) == 0) 932 { 933 shiftUp(T0, tOff >>> 1, T0, tOff, bMax, 1); 934 } 935 else 936 { 937 add(T0, bMax, T0, tOff - bMax, T0, tOff, bMax); 938 } 939 } 940 941 /* 942 * Second table with all 4-bit products of B shifted 4 bits 943 */ 944 long[] T1 = new long[T0.length]; 945 shiftUp(T0, 0, T1, 0, T0.length, 4); 946 // shiftUp(T0, bMax, T1, bMax, tOff, 4); 947 948 long[] a = A.m_ints; 949 long[] c = new long[cLen]; 950 951 int MASK = 0xF; 952 953 /* 954 * Lopez-Dahab algorithm 955 */ 956 957 for (int k = 56; k >= 0; k -= 8) 958 { 959 for (int j = 1; j < aLen; j += 2) 960 { 961 int aVal = (int)(a[j] >>> k); 962 int u = aVal & MASK; 963 int v = (aVal >>> 4) & MASK; 964 addBoth(c, j - 1, T0, ti[u], T1, ti[v], bMax); 965 } 966 shiftUp(c, 0, cLen, 8); 967 } 968 969 for (int k = 56; k >= 0; k -= 8) 970 { 971 for (int j = 0; j < aLen; j += 2) 972 { 973 int aVal = (int)(a[j] >>> k); 974 int u = aVal & MASK; 975 int v = (aVal >>> 4) & MASK; 976 addBoth(c, j, T0, ti[u], T1, ti[v], bMax); 977 } 978 if (k > 0) 979 { 980 shiftUp(c, 0, cLen, 8); 981 } 982 } 983 984 /* 985 * Finally the raw answer is collected, reduce it against the reduction coefficients 986 */ 987 return reduceResult(c, 0, cLen, m, ks); 988 } 989 modMultiply(LongArray other, int m, int[] ks)990 public LongArray modMultiply(LongArray other, int m, int[] ks) 991 { 992 /* 993 * Find out the degree of each argument and handle the zero cases 994 */ 995 int aDeg = degree(); 996 if (aDeg == 0) 997 { 998 return this; 999 } 1000 int bDeg = other.degree(); 1001 if (bDeg == 0) 1002 { 1003 return other; 1004 } 1005 1006 /* 1007 * Swap if necessary so that A is the smaller argument 1008 */ 1009 LongArray A = this, B = other; 1010 if (aDeg > bDeg) 1011 { 1012 A = other; B = this; 1013 int tmp = aDeg; aDeg = bDeg; bDeg = tmp; 1014 } 1015 1016 /* 1017 * Establish the word lengths of the arguments and result 1018 */ 1019 int aLen = (aDeg + 63) >>> 6; 1020 int bLen = (bDeg + 63) >>> 6; 1021 int cLen = (aDeg + bDeg + 62) >>> 6; 1022 1023 if (aLen == 1) 1024 { 1025 long a0 = A.m_ints[0]; 1026 if (a0 == 1L) 1027 { 1028 return B; 1029 } 1030 1031 /* 1032 * Fast path for small A, with performance dependent only on the number of set bits 1033 */ 1034 long[] c0 = new long[cLen]; 1035 multiplyWord(a0, B.m_ints, bLen, c0, 0); 1036 1037 /* 1038 * Reduce the raw answer against the reduction coefficients 1039 */ 1040 return reduceResult(c0, 0, cLen, m, ks); 1041 } 1042 1043 /* 1044 * Determine if B will get bigger during shifting 1045 */ 1046 int bMax = (bDeg + 7 + 63) >>> 6; 1047 1048 /* 1049 * Lookup table for the offset of each B in the tables 1050 */ 1051 int[] ti = new int[16]; 1052 1053 /* 1054 * Precompute table of all 4-bit products of B 1055 */ 1056 long[] T0 = new long[bMax << 4]; 1057 int tOff = bMax; 1058 ti[1] = tOff; 1059 System.arraycopy(B.m_ints, 0, T0, tOff, bLen); 1060 for (int i = 2; i < 16; ++i) 1061 { 1062 ti[i] = (tOff += bMax); 1063 if ((i & 1) == 0) 1064 { 1065 shiftUp(T0, tOff >>> 1, T0, tOff, bMax, 1); 1066 } 1067 else 1068 { 1069 add(T0, bMax, T0, tOff - bMax, T0, tOff, bMax); 1070 } 1071 } 1072 1073 /* 1074 * Second table with all 4-bit products of B shifted 4 bits 1075 */ 1076 long[] T1 = new long[T0.length]; 1077 shiftUp(T0, 0, T1, 0, T0.length, 4); 1078 // shiftUp(T0, bMax, T1, bMax, tOff, 4); 1079 1080 long[] a = A.m_ints; 1081 long[] c = new long[cLen << 3]; 1082 1083 int MASK = 0xF; 1084 1085 /* 1086 * Lopez-Dahab (Modified) algorithm 1087 */ 1088 1089 for (int aPos = 0; aPos < aLen; ++aPos) 1090 { 1091 long aVal = a[aPos]; 1092 int cOff = aPos; 1093 for (;;) 1094 { 1095 int u = (int)aVal & MASK; 1096 aVal >>>= 4; 1097 int v = (int)aVal & MASK; 1098 addBoth(c, cOff, T0, ti[u], T1, ti[v], bMax); 1099 aVal >>>= 4; 1100 if (aVal == 0L) 1101 { 1102 break; 1103 } 1104 cOff += cLen; 1105 } 1106 } 1107 1108 { 1109 int cOff = c.length; 1110 while ((cOff -= cLen) != 0) 1111 { 1112 addShiftedUp(c, cOff - cLen, c, cOff, cLen, 8); 1113 } 1114 } 1115 1116 /* 1117 * Finally the raw answer is collected, reduce it against the reduction coefficients 1118 */ 1119 return reduceResult(c, 0, cLen, m, ks); 1120 } 1121 modMultiplyAlt(LongArray other, int m, int[] ks)1122 public LongArray modMultiplyAlt(LongArray other, int m, int[] ks) 1123 { 1124 /* 1125 * Find out the degree of each argument and handle the zero cases 1126 */ 1127 int aDeg = degree(); 1128 if (aDeg == 0) 1129 { 1130 return this; 1131 } 1132 int bDeg = other.degree(); 1133 if (bDeg == 0) 1134 { 1135 return other; 1136 } 1137 1138 /* 1139 * Swap if necessary so that A is the smaller argument 1140 */ 1141 LongArray A = this, B = other; 1142 if (aDeg > bDeg) 1143 { 1144 A = other; B = this; 1145 int tmp = aDeg; aDeg = bDeg; bDeg = tmp; 1146 } 1147 1148 /* 1149 * Establish the word lengths of the arguments and result 1150 */ 1151 int aLen = (aDeg + 63) >>> 6; 1152 int bLen = (bDeg + 63) >>> 6; 1153 int cLen = (aDeg + bDeg + 62) >>> 6; 1154 1155 if (aLen == 1) 1156 { 1157 long a0 = A.m_ints[0]; 1158 if (a0 == 1L) 1159 { 1160 return B; 1161 } 1162 1163 /* 1164 * Fast path for small A, with performance dependent only on the number of set bits 1165 */ 1166 long[] c0 = new long[cLen]; 1167 multiplyWord(a0, B.m_ints, bLen, c0, 0); 1168 1169 /* 1170 * Reduce the raw answer against the reduction coefficients 1171 */ 1172 return reduceResult(c0, 0, cLen, m, ks); 1173 } 1174 1175 // NOTE: This works, but is slower than width 4 processing 1176 // if (aLen == 2) 1177 // { 1178 // /* 1179 // * Use common-multiplicand optimization to save ~1/4 of the adds 1180 // */ 1181 // long a1 = A.m_ints[0], a2 = A.m_ints[1]; 1182 // long aa = a1 & a2; a1 ^= aa; a2 ^= aa; 1183 // 1184 // long[] b = B.m_ints; 1185 // long[] c = new long[cLen]; 1186 // multiplyWord(aa, b, bLen, c, 1); 1187 // add(c, 0, c, 1, cLen - 1); 1188 // multiplyWord(a1, b, bLen, c, 0); 1189 // multiplyWord(a2, b, bLen, c, 1); 1190 // 1191 // /* 1192 // * Reduce the raw answer against the reduction coefficients 1193 // */ 1194 // return reduceResult(c, 0, cLen, m, ks); 1195 // } 1196 1197 /* 1198 * Determine the parameters of the interleaved window algorithm: the 'width' in bits to 1199 * process together, the number of evaluation 'positions' implied by that width, and the 1200 * 'top' position at which the regular window algorithm stops. 1201 */ 1202 int width, positions, top, banks; 1203 1204 // NOTE: width 4 is the fastest over the entire range of sizes used in current crypto 1205 // width = 1; positions = 64; top = 64; banks = 4; 1206 // width = 2; positions = 32; top = 64; banks = 4; 1207 // width = 3; positions = 21; top = 63; banks = 3; 1208 width = 4; positions = 16; top = 64; banks = 8; 1209 // width = 5; positions = 13; top = 65; banks = 7; 1210 // width = 7; positions = 9; top = 63; banks = 9; 1211 // width = 8; positions = 8; top = 64; banks = 8; 1212 1213 /* 1214 * Determine if B will get bigger during shifting 1215 */ 1216 int shifts = top < 64 ? positions : positions - 1; 1217 int bMax = (bDeg + shifts + 63) >>> 6; 1218 1219 int bTotal = bMax * banks, stride = width * banks; 1220 1221 /* 1222 * Create a single temporary buffer, with an offset table to find the positions of things in it 1223 */ 1224 int[] ci = new int[1 << width]; 1225 int cTotal = aLen; 1226 { 1227 ci[0] = cTotal; 1228 cTotal += bTotal; 1229 ci[1] = cTotal; 1230 for (int i = 2; i < ci.length; ++i) 1231 { 1232 cTotal += cLen; 1233 ci[i] = cTotal; 1234 } 1235 cTotal += cLen; 1236 } 1237 // NOTE: Provide a safe dump for "high zeroes" since we are adding 'bMax' and not 'bLen' 1238 ++cTotal; 1239 1240 long[] c = new long[cTotal]; 1241 1242 // Prepare A in interleaved form, according to the chosen width 1243 interleave(A.m_ints, 0, c, 0, aLen, width); 1244 1245 // Make a working copy of B, since we will be shifting it 1246 { 1247 int bOff = aLen; 1248 System.arraycopy(B.m_ints, 0, c, bOff, bLen); 1249 for (int bank = 1; bank < banks; ++bank) 1250 { 1251 shiftUp(c, aLen, c, bOff += bMax, bMax, bank); 1252 } 1253 } 1254 1255 /* 1256 * The main loop analyzes the interleaved windows in A, and for each non-zero window 1257 * a single word-array XOR is performed to a carefully selected slice of 'c'. The loop is 1258 * breadth-first, checking the lowest window in each word, then looping again for the 1259 * next higher window position. 1260 */ 1261 int MASK = (1 << width) - 1; 1262 1263 int k = 0; 1264 for (;;) 1265 { 1266 int aPos = 0; 1267 do 1268 { 1269 long aVal = c[aPos] >>> k; 1270 int bank = 0, bOff = aLen; 1271 for (;;) 1272 { 1273 int index = (int)(aVal) & MASK; 1274 if (index != 0) 1275 { 1276 /* 1277 * Add to a 'c' buffer based on the bit-pattern of 'index'. Since A is in 1278 * interleaved form, the bits represent the current B shifted by 0, 'positions', 1279 * 'positions' * 2, ..., 'positions' * ('width' - 1) 1280 */ 1281 add(c, aPos + ci[index], c, bOff, bMax); 1282 } 1283 if (++bank == banks) 1284 { 1285 break; 1286 } 1287 bOff += bMax; 1288 aVal >>>= width; 1289 } 1290 } 1291 while (++aPos < aLen); 1292 1293 if ((k += stride) >= top) 1294 { 1295 if (k >= 64) 1296 { 1297 break; 1298 } 1299 1300 /* 1301 * Adjustment for window setups with top == 63, the final bit (if any) is processed 1302 * as the top-bit of a window 1303 */ 1304 k = 64 - width; 1305 MASK &= MASK << (top - k); 1306 } 1307 1308 /* 1309 * After each position has been checked for all words of A, B is shifted up 1 place 1310 */ 1311 shiftUp(c, aLen, bTotal, banks); 1312 } 1313 1314 int ciPos = ci.length; 1315 while (--ciPos > 1) 1316 { 1317 if ((ciPos & 1L) == 0L) 1318 { 1319 /* 1320 * For even numbers, shift contents and add to the half-position 1321 */ 1322 addShiftedUp(c, ci[ciPos >>> 1], c, ci[ciPos], cLen, positions); 1323 } 1324 else 1325 { 1326 /* 1327 * For odd numbers, 'distribute' contents to the result and the next-lowest position 1328 */ 1329 distribute(c, ci[ciPos], ci[ciPos - 1], ci[1], cLen); 1330 } 1331 } 1332 1333 /* 1334 * Finally the raw answer is collected, reduce it against the reduction coefficients 1335 */ 1336 return reduceResult(c, ci[1], cLen, m, ks); 1337 } 1338 modReduce(int m, int[] ks)1339 public LongArray modReduce(int m, int[] ks) 1340 { 1341 long[] buf = Arrays.clone(m_ints); 1342 int rLen = reduceInPlace(buf, 0, buf.length, m, ks); 1343 return new LongArray(buf, 0, rLen); 1344 } 1345 multiply(LongArray other, int m, int[] ks)1346 public LongArray multiply(LongArray other, int m, int[] ks) 1347 { 1348 /* 1349 * Find out the degree of each argument and handle the zero cases 1350 */ 1351 int aDeg = degree(); 1352 if (aDeg == 0) 1353 { 1354 return this; 1355 } 1356 int bDeg = other.degree(); 1357 if (bDeg == 0) 1358 { 1359 return other; 1360 } 1361 1362 /* 1363 * Swap if necessary so that A is the smaller argument 1364 */ 1365 LongArray A = this, B = other; 1366 if (aDeg > bDeg) 1367 { 1368 A = other; B = this; 1369 int tmp = aDeg; aDeg = bDeg; bDeg = tmp; 1370 } 1371 1372 /* 1373 * Establish the word lengths of the arguments and result 1374 */ 1375 int aLen = (aDeg + 63) >>> 6; 1376 int bLen = (bDeg + 63) >>> 6; 1377 int cLen = (aDeg + bDeg + 62) >>> 6; 1378 1379 if (aLen == 1) 1380 { 1381 long a0 = A.m_ints[0]; 1382 if (a0 == 1L) 1383 { 1384 return B; 1385 } 1386 1387 /* 1388 * Fast path for small A, with performance dependent only on the number of set bits 1389 */ 1390 long[] c0 = new long[cLen]; 1391 multiplyWord(a0, B.m_ints, bLen, c0, 0); 1392 1393 /* 1394 * Reduce the raw answer against the reduction coefficients 1395 */ 1396 // return reduceResult(c0, 0, cLen, m, ks); 1397 return new LongArray(c0, 0, cLen); 1398 } 1399 1400 /* 1401 * Determine if B will get bigger during shifting 1402 */ 1403 int bMax = (bDeg + 7 + 63) >>> 6; 1404 1405 /* 1406 * Lookup table for the offset of each B in the tables 1407 */ 1408 int[] ti = new int[16]; 1409 1410 /* 1411 * Precompute table of all 4-bit products of B 1412 */ 1413 long[] T0 = new long[bMax << 4]; 1414 int tOff = bMax; 1415 ti[1] = tOff; 1416 System.arraycopy(B.m_ints, 0, T0, tOff, bLen); 1417 for (int i = 2; i < 16; ++i) 1418 { 1419 ti[i] = (tOff += bMax); 1420 if ((i & 1) == 0) 1421 { 1422 shiftUp(T0, tOff >>> 1, T0, tOff, bMax, 1); 1423 } 1424 else 1425 { 1426 add(T0, bMax, T0, tOff - bMax, T0, tOff, bMax); 1427 } 1428 } 1429 1430 /* 1431 * Second table with all 4-bit products of B shifted 4 bits 1432 */ 1433 long[] T1 = new long[T0.length]; 1434 shiftUp(T0, 0, T1, 0, T0.length, 4); 1435 // shiftUp(T0, bMax, T1, bMax, tOff, 4); 1436 1437 long[] a = A.m_ints; 1438 long[] c = new long[cLen << 3]; 1439 1440 int MASK = 0xF; 1441 1442 /* 1443 * Lopez-Dahab (Modified) algorithm 1444 */ 1445 1446 for (int aPos = 0; aPos < aLen; ++aPos) 1447 { 1448 long aVal = a[aPos]; 1449 int cOff = aPos; 1450 for (;;) 1451 { 1452 int u = (int)aVal & MASK; 1453 aVal >>>= 4; 1454 int v = (int)aVal & MASK; 1455 addBoth(c, cOff, T0, ti[u], T1, ti[v], bMax); 1456 aVal >>>= 4; 1457 if (aVal == 0L) 1458 { 1459 break; 1460 } 1461 cOff += cLen; 1462 } 1463 } 1464 1465 { 1466 int cOff = c.length; 1467 while ((cOff -= cLen) != 0) 1468 { 1469 addShiftedUp(c, cOff - cLen, c, cOff, cLen, 8); 1470 } 1471 } 1472 1473 /* 1474 * Finally the raw answer is collected, reduce it against the reduction coefficients 1475 */ 1476 // return reduceResult(c, 0, cLen, m, ks); 1477 return new LongArray(c, 0, cLen); 1478 } 1479 reduce(int m, int[] ks)1480 public void reduce(int m, int[] ks) 1481 { 1482 long[] buf = m_ints; 1483 int rLen = reduceInPlace(buf, 0, buf.length, m, ks); 1484 if (rLen < buf.length) 1485 { 1486 m_ints = new long[rLen]; 1487 System.arraycopy(buf, 0, m_ints, 0, rLen); 1488 } 1489 } 1490 reduceResult(long[] buf, int off, int len, int m, int[] ks)1491 private static LongArray reduceResult(long[] buf, int off, int len, int m, int[] ks) 1492 { 1493 int rLen = reduceInPlace(buf, off, len, m, ks); 1494 return new LongArray(buf, off, rLen); 1495 } 1496 1497 // private static void deInterleave(long[] x, int xOff, long[] z, int zOff, int count, int rounds) 1498 // { 1499 // for (int i = 0; i < count; ++i) 1500 // { 1501 // z[zOff + i] = deInterleave(x[zOff + i], rounds); 1502 // } 1503 // } 1504 // 1505 // private static long deInterleave(long x, int rounds) 1506 // { 1507 // while (--rounds >= 0) 1508 // { 1509 // x = deInterleave32(x & DEINTERLEAVE_MASK) | (deInterleave32((x >>> 1) & DEINTERLEAVE_MASK) << 32); 1510 // } 1511 // return x; 1512 // } 1513 // 1514 // private static long deInterleave32(long x) 1515 // { 1516 // x = (x | (x >>> 1)) & 0x3333333333333333L; 1517 // x = (x | (x >>> 2)) & 0x0F0F0F0F0F0F0F0FL; 1518 // x = (x | (x >>> 4)) & 0x00FF00FF00FF00FFL; 1519 // x = (x | (x >>> 8)) & 0x0000FFFF0000FFFFL; 1520 // x = (x | (x >>> 16)) & 0x00000000FFFFFFFFL; 1521 // return x; 1522 // } 1523 reduceInPlace(long[] buf, int off, int len, int m, int[] ks)1524 private static int reduceInPlace(long[] buf, int off, int len, int m, int[] ks) 1525 { 1526 int mLen = (m + 63) >>> 6; 1527 if (len < mLen) 1528 { 1529 return len; 1530 } 1531 1532 int numBits = Math.min(len << 6, (m << 1) - 1); // TODO use actual degree? 1533 int excessBits = (len << 6) - numBits; 1534 while (excessBits >= 64) 1535 { 1536 --len; 1537 excessBits -= 64; 1538 } 1539 1540 int kLen = ks.length, kMax = ks[kLen - 1], kNext = kLen > 1 ? ks[kLen - 2] : 0; 1541 int wordWiseLimit = Math.max(m, kMax + 64); 1542 int vectorableWords = (excessBits + Math.min(numBits - wordWiseLimit, m - kNext)) >> 6; 1543 if (vectorableWords > 1) 1544 { 1545 int vectorWiseWords = len - vectorableWords; 1546 reduceVectorWise(buf, off, len, vectorWiseWords, m, ks); 1547 while (len > vectorWiseWords) 1548 { 1549 buf[off + --len] = 0L; 1550 } 1551 numBits = vectorWiseWords << 6; 1552 } 1553 1554 if (numBits > wordWiseLimit) 1555 { 1556 reduceWordWise(buf, off, len, wordWiseLimit, m, ks); 1557 numBits = wordWiseLimit; 1558 } 1559 1560 if (numBits > m) 1561 { 1562 reduceBitWise(buf, off, numBits, m, ks); 1563 } 1564 1565 return mLen; 1566 } 1567 reduceBitWise(long[] buf, int off, int bitlength, int m, int[] ks)1568 private static void reduceBitWise(long[] buf, int off, int bitlength, int m, int[] ks) 1569 { 1570 while (--bitlength >= m) 1571 { 1572 if (testBit(buf, off, bitlength)) 1573 { 1574 reduceBit(buf, off, bitlength, m, ks); 1575 } 1576 } 1577 } 1578 reduceBit(long[] buf, int off, int bit, int m, int[] ks)1579 private static void reduceBit(long[] buf, int off, int bit, int m, int[] ks) 1580 { 1581 flipBit(buf, off, bit); 1582 int n = bit - m; 1583 int j = ks.length; 1584 while (--j >= 0) 1585 { 1586 flipBit(buf, off, ks[j] + n); 1587 } 1588 flipBit(buf, off, n); 1589 } 1590 reduceWordWise(long[] buf, int off, int len, int toBit, int m, int[] ks)1591 private static void reduceWordWise(long[] buf, int off, int len, int toBit, int m, int[] ks) 1592 { 1593 int toPos = toBit >>> 6; 1594 1595 while (--len > toPos) 1596 { 1597 long word = buf[off + len]; 1598 if (word != 0) 1599 { 1600 buf[off + len] = 0; 1601 reduceWord(buf, off, (len << 6), word, m, ks); 1602 } 1603 } 1604 1605 { 1606 int partial = toBit & 0x3F; 1607 long word = buf[off + toPos] >>> partial; 1608 if (word != 0) 1609 { 1610 buf[off + toPos] ^= word << partial; 1611 reduceWord(buf, off, toBit, word, m, ks); 1612 } 1613 } 1614 } 1615 reduceWord(long[] buf, int off, int bit, long word, int m, int[] ks)1616 private static void reduceWord(long[] buf, int off, int bit, long word, int m, int[] ks) 1617 { 1618 int offset = bit - m; 1619 int j = ks.length; 1620 while (--j >= 0) 1621 { 1622 flipWord(buf, off, offset + ks[j], word); 1623 } 1624 flipWord(buf, off, offset, word); 1625 } 1626 reduceVectorWise(long[] buf, int off, int len, int words, int m, int[] ks)1627 private static void reduceVectorWise(long[] buf, int off, int len, int words, int m, int[] ks) 1628 { 1629 /* 1630 * NOTE: It's important we go from highest coefficient to lowest, because for the highest 1631 * one (only) we allow the ranges to partially overlap, and therefore any changes must take 1632 * effect for the subsequent lower coefficients. 1633 */ 1634 int baseBit = (words << 6) - m; 1635 int j = ks.length; 1636 while (--j >= 0) 1637 { 1638 flipVector(buf, off, buf, off + words, len - words, baseBit + ks[j]); 1639 } 1640 flipVector(buf, off, buf, off + words, len - words, baseBit); 1641 } 1642 flipVector(long[] x, int xOff, long[] y, int yOff, int yLen, int bits)1643 private static void flipVector(long[] x, int xOff, long[] y, int yOff, int yLen, int bits) 1644 { 1645 xOff += bits >>> 6; 1646 bits &= 0x3F; 1647 1648 if (bits == 0) 1649 { 1650 add(x, xOff, y, yOff, yLen); 1651 } 1652 else 1653 { 1654 long carry = addShiftedDown(x, xOff + 1, y, yOff, yLen, 64 - bits); 1655 x[xOff] ^= carry; 1656 } 1657 } 1658 modSquare(int m, int[] ks)1659 public LongArray modSquare(int m, int[] ks) 1660 { 1661 int len = getUsedLength(); 1662 if (len == 0) 1663 { 1664 return this; 1665 } 1666 1667 int _2len = len << 1; 1668 long[] r = new long[_2len]; 1669 1670 int pos = 0; 1671 while (pos < _2len) 1672 { 1673 long mi = m_ints[pos >>> 1]; 1674 r[pos++] = interleave2_32to64((int)mi); 1675 r[pos++] = interleave2_32to64((int)(mi >>> 32)); 1676 } 1677 1678 return new LongArray(r, 0, reduceInPlace(r, 0, r.length, m, ks)); 1679 } 1680 modSquareN(int n, int m, int[] ks)1681 public LongArray modSquareN(int n, int m, int[] ks) 1682 { 1683 int len = getUsedLength(); 1684 if (len == 0) 1685 { 1686 return this; 1687 } 1688 1689 int mLen = (m + 63) >>> 6; 1690 long[] r = new long[mLen << 1]; 1691 System.arraycopy(m_ints, 0, r, 0, len); 1692 1693 while (--n >= 0) 1694 { 1695 squareInPlace(r, len, m, ks); 1696 len = reduceInPlace(r, 0, r.length, m, ks); 1697 } 1698 1699 return new LongArray(r, 0, len); 1700 } 1701 square(int m, int[] ks)1702 public LongArray square(int m, int[] ks) 1703 { 1704 int len = getUsedLength(); 1705 if (len == 0) 1706 { 1707 return this; 1708 } 1709 1710 int _2len = len << 1; 1711 long[] r = new long[_2len]; 1712 1713 int pos = 0; 1714 while (pos < _2len) 1715 { 1716 long mi = m_ints[pos >>> 1]; 1717 r[pos++] = interleave2_32to64((int)mi); 1718 r[pos++] = interleave2_32to64((int)(mi >>> 32)); 1719 } 1720 1721 return new LongArray(r, 0, r.length); 1722 } 1723 squareInPlace(long[] x, int xLen, int m, int[] ks)1724 private static void squareInPlace(long[] x, int xLen, int m, int[] ks) 1725 { 1726 int pos = xLen << 1; 1727 while (--xLen >= 0) 1728 { 1729 long xVal = x[xLen]; 1730 x[--pos] = interleave2_32to64((int)(xVal >>> 32)); 1731 x[--pos] = interleave2_32to64((int)xVal); 1732 } 1733 } 1734 interleave(long[] x, int xOff, long[] z, int zOff, int count, int width)1735 private static void interleave(long[] x, int xOff, long[] z, int zOff, int count, int width) 1736 { 1737 switch (width) 1738 { 1739 case 3: 1740 interleave3(x, xOff, z, zOff, count); 1741 break; 1742 case 5: 1743 interleave5(x, xOff, z, zOff, count); 1744 break; 1745 case 7: 1746 interleave7(x, xOff, z, zOff, count); 1747 break; 1748 default: 1749 interleave2_n(x, xOff, z, zOff, count, bitLengths[width] - 1); 1750 break; 1751 } 1752 } 1753 interleave3(long[] x, int xOff, long[] z, int zOff, int count)1754 private static void interleave3(long[] x, int xOff, long[] z, int zOff, int count) 1755 { 1756 for (int i = 0; i < count; ++i) 1757 { 1758 z[zOff + i] = interleave3(x[xOff + i]); 1759 } 1760 } 1761 interleave3(long x)1762 private static long interleave3(long x) 1763 { 1764 long z = x & (1L << 63); 1765 return z 1766 | interleave3_21to63((int)x & 0x1FFFFF) 1767 | interleave3_21to63((int)(x >>> 21) & 0x1FFFFF) << 1 1768 | interleave3_21to63((int)(x >>> 42) & 0x1FFFFF) << 2; 1769 1770 // int zPos = 0, wPos = 0, xPos = 0; 1771 // for (;;) 1772 // { 1773 // z |= ((x >>> xPos) & 1L) << zPos; 1774 // if (++zPos == 63) 1775 // { 1776 // String sz2 = Long.toBinaryString(z); 1777 // return z; 1778 // } 1779 // if ((xPos += 21) >= 63) 1780 // { 1781 // xPos = ++wPos; 1782 // } 1783 // } 1784 } 1785 interleave3_21to63(int x)1786 private static long interleave3_21to63(int x) 1787 { 1788 int r00 = INTERLEAVE3_TABLE[x & 0x7F]; 1789 int r21 = INTERLEAVE3_TABLE[(x >>> 7) & 0x7F]; 1790 int r42 = INTERLEAVE3_TABLE[x >>> 14]; 1791 return (r42 & 0xFFFFFFFFL) << 42 | (r21 & 0xFFFFFFFFL) << 21 | (r00 & 0xFFFFFFFFL); 1792 } 1793 interleave5(long[] x, int xOff, long[] z, int zOff, int count)1794 private static void interleave5(long[] x, int xOff, long[] z, int zOff, int count) 1795 { 1796 for (int i = 0; i < count; ++i) 1797 { 1798 z[zOff + i] = interleave5(x[xOff + i]); 1799 } 1800 } 1801 interleave5(long x)1802 private static long interleave5(long x) 1803 { 1804 return interleave3_13to65((int)x & 0x1FFF) 1805 | interleave3_13to65((int)(x >>> 13) & 0x1FFF) << 1 1806 | interleave3_13to65((int)(x >>> 26) & 0x1FFF) << 2 1807 | interleave3_13to65((int)(x >>> 39) & 0x1FFF) << 3 1808 | interleave3_13to65((int)(x >>> 52) & 0x1FFF) << 4; 1809 1810 // long z = 0; 1811 // int zPos = 0, wPos = 0, xPos = 0; 1812 // for (;;) 1813 // { 1814 // z |= ((x >>> xPos) & 1L) << zPos; 1815 // if (++zPos == 64) 1816 // { 1817 // return z; 1818 // } 1819 // if ((xPos += 13) >= 64) 1820 // { 1821 // xPos = ++wPos; 1822 // } 1823 // } 1824 } 1825 interleave3_13to65(int x)1826 private static long interleave3_13to65(int x) 1827 { 1828 int r00 = INTERLEAVE5_TABLE[x & 0x7F]; 1829 int r35 = INTERLEAVE5_TABLE[x >>> 7]; 1830 return (r35 & 0xFFFFFFFFL) << 35 | (r00 & 0xFFFFFFFFL); 1831 } 1832 interleave7(long[] x, int xOff, long[] z, int zOff, int count)1833 private static void interleave7(long[] x, int xOff, long[] z, int zOff, int count) 1834 { 1835 for (int i = 0; i < count; ++i) 1836 { 1837 z[zOff + i] = interleave7(x[xOff + i]); 1838 } 1839 } 1840 interleave7(long x)1841 private static long interleave7(long x) 1842 { 1843 long z = x & (1L << 63); 1844 return z 1845 | INTERLEAVE7_TABLE[(int)x & 0x1FF] 1846 | INTERLEAVE7_TABLE[(int)(x >>> 9) & 0x1FF] << 1 1847 | INTERLEAVE7_TABLE[(int)(x >>> 18) & 0x1FF] << 2 1848 | INTERLEAVE7_TABLE[(int)(x >>> 27) & 0x1FF] << 3 1849 | INTERLEAVE7_TABLE[(int)(x >>> 36) & 0x1FF] << 4 1850 | INTERLEAVE7_TABLE[(int)(x >>> 45) & 0x1FF] << 5 1851 | INTERLEAVE7_TABLE[(int)(x >>> 54) & 0x1FF] << 6; 1852 1853 // int zPos = 0, wPos = 0, xPos = 0; 1854 // for (;;) 1855 // { 1856 // z |= ((x >>> xPos) & 1L) << zPos; 1857 // if (++zPos == 63) 1858 // { 1859 // return z; 1860 // } 1861 // if ((xPos += 9) >= 63) 1862 // { 1863 // xPos = ++wPos; 1864 // } 1865 // } 1866 } 1867 interleave2_n(long[] x, int xOff, long[] z, int zOff, int count, int rounds)1868 private static void interleave2_n(long[] x, int xOff, long[] z, int zOff, int count, int rounds) 1869 { 1870 for (int i = 0; i < count; ++i) 1871 { 1872 z[zOff + i] = interleave2_n(x[xOff + i], rounds); 1873 } 1874 } 1875 interleave2_n(long x, int rounds)1876 private static long interleave2_n(long x, int rounds) 1877 { 1878 while (rounds > 1) 1879 { 1880 rounds -= 2; 1881 x = interleave4_16to64((int)x & 0xFFFF) 1882 | interleave4_16to64((int)(x >>> 16) & 0xFFFF) << 1 1883 | interleave4_16to64((int)(x >>> 32) & 0xFFFF) << 2 1884 | interleave4_16to64((int)(x >>> 48) & 0xFFFF) << 3; 1885 } 1886 if (rounds > 0) 1887 { 1888 x = interleave2_32to64((int)x) | interleave2_32to64((int)(x >>> 32)) << 1; 1889 } 1890 return x; 1891 } 1892 interleave4_16to64(int x)1893 private static long interleave4_16to64(int x) 1894 { 1895 int r00 = INTERLEAVE4_TABLE[x & 0xFF]; 1896 int r32 = INTERLEAVE4_TABLE[x >>> 8]; 1897 return (r32 & 0xFFFFFFFFL) << 32 | (r00 & 0xFFFFFFFFL); 1898 } 1899 interleave2_32to64(int x)1900 private static long interleave2_32to64(int x) 1901 { 1902 int r00 = INTERLEAVE2_TABLE[x & 0xFF] | INTERLEAVE2_TABLE[(x >>> 8) & 0xFF] << 16; 1903 int r32 = INTERLEAVE2_TABLE[(x >>> 16) & 0xFF] | INTERLEAVE2_TABLE[x >>> 24] << 16; 1904 return (r32 & 0xFFFFFFFFL) << 32 | (r00 & 0xFFFFFFFFL); 1905 } 1906 1907 // private static LongArray expItohTsujii2(LongArray B, int n, int m, int[] ks) 1908 // { 1909 // LongArray t1 = B, t3 = new LongArray(new long[]{ 1L }); 1910 // int scale = 1; 1911 // 1912 // int numTerms = n; 1913 // while (numTerms > 1) 1914 // { 1915 // if ((numTerms & 1) != 0) 1916 // { 1917 // t3 = t3.modMultiply(t1, m, ks); 1918 // t1 = t1.modSquareN(scale, m, ks); 1919 // } 1920 // 1921 // LongArray t2 = t1.modSquareN(scale, m, ks); 1922 // t1 = t1.modMultiply(t2, m, ks); 1923 // numTerms >>>= 1; scale <<= 1; 1924 // } 1925 // 1926 // return t3.modMultiply(t1, m, ks); 1927 // } 1928 // 1929 // private static LongArray expItohTsujii23(LongArray B, int n, int m, int[] ks) 1930 // { 1931 // LongArray t1 = B, t3 = new LongArray(new long[]{ 1L }); 1932 // int scale = 1; 1933 // 1934 // int numTerms = n; 1935 // while (numTerms > 1) 1936 // { 1937 // boolean m03 = numTerms % 3 == 0; 1938 // boolean m14 = !m03 && (numTerms & 1) != 0; 1939 // 1940 // if (m14) 1941 // { 1942 // t3 = t3.modMultiply(t1, m, ks); 1943 // t1 = t1.modSquareN(scale, m, ks); 1944 // } 1945 // 1946 // LongArray t2 = t1.modSquareN(scale, m, ks); 1947 // t1 = t1.modMultiply(t2, m, ks); 1948 // 1949 // if (m03) 1950 // { 1951 // t2 = t2.modSquareN(scale, m, ks); 1952 // t1 = t1.modMultiply(t2, m, ks); 1953 // numTerms /= 3; scale *= 3; 1954 // } 1955 // else 1956 // { 1957 // numTerms >>>= 1; scale <<= 1; 1958 // } 1959 // } 1960 // 1961 // return t3.modMultiply(t1, m, ks); 1962 // } 1963 // 1964 // private static LongArray expItohTsujii235(LongArray B, int n, int m, int[] ks) 1965 // { 1966 // LongArray t1 = B, t4 = new LongArray(new long[]{ 1L }); 1967 // int scale = 1; 1968 // 1969 // int numTerms = n; 1970 // while (numTerms > 1) 1971 // { 1972 // if (numTerms % 5 == 0) 1973 // { 1974 //// t1 = expItohTsujii23(t1, 5, m, ks); 1975 // 1976 // LongArray t3 = t1; 1977 // t1 = t1.modSquareN(scale, m, ks); 1978 // 1979 // LongArray t2 = t1.modSquareN(scale, m, ks); 1980 // t1 = t1.modMultiply(t2, m, ks); 1981 // t2 = t1.modSquareN(scale << 1, m, ks); 1982 // t1 = t1.modMultiply(t2, m, ks); 1983 // 1984 // t1 = t1.modMultiply(t3, m, ks); 1985 // 1986 // numTerms /= 5; scale *= 5; 1987 // continue; 1988 // } 1989 // 1990 // boolean m03 = numTerms % 3 == 0; 1991 // boolean m14 = !m03 && (numTerms & 1) != 0; 1992 // 1993 // if (m14) 1994 // { 1995 // t4 = t4.modMultiply(t1, m, ks); 1996 // t1 = t1.modSquareN(scale, m, ks); 1997 // } 1998 // 1999 // LongArray t2 = t1.modSquareN(scale, m, ks); 2000 // t1 = t1.modMultiply(t2, m, ks); 2001 // 2002 // if (m03) 2003 // { 2004 // t2 = t2.modSquareN(scale, m, ks); 2005 // t1 = t1.modMultiply(t2, m, ks); 2006 // numTerms /= 3; scale *= 3; 2007 // } 2008 // else 2009 // { 2010 // numTerms >>>= 1; scale <<= 1; 2011 // } 2012 // } 2013 // 2014 // return t4.modMultiply(t1, m, ks); 2015 // } 2016 modInverse(int m, int[] ks)2017 public LongArray modInverse(int m, int[] ks) 2018 { 2019 /* 2020 * Fermat's Little Theorem 2021 */ 2022 // LongArray A = this; 2023 // LongArray B = A.modSquare(m, ks); 2024 // LongArray R0 = B, R1 = B; 2025 // for (int i = 2; i < m; ++i) 2026 // { 2027 // R1 = R1.modSquare(m, ks); 2028 // R0 = R0.modMultiply(R1, m, ks); 2029 // } 2030 // 2031 // return R0; 2032 2033 /* 2034 * Itoh-Tsujii 2035 */ 2036 // LongArray B = modSquare(m, ks); 2037 // switch (m) 2038 // { 2039 // case 409: 2040 // return expItohTsujii23(B, m - 1, m, ks); 2041 // case 571: 2042 // return expItohTsujii235(B, m - 1, m, ks); 2043 // case 163: 2044 // case 233: 2045 // case 283: 2046 // default: 2047 // return expItohTsujii2(B, m - 1, m, ks); 2048 // } 2049 2050 /* 2051 * Inversion in F2m using the extended Euclidean algorithm 2052 * 2053 * Input: A nonzero polynomial a(z) of degree at most m-1 2054 * Output: a(z)^(-1) mod f(z) 2055 */ 2056 int uzDegree = degree(); 2057 if (uzDegree == 0) 2058 { 2059 throw new IllegalStateException(); 2060 } 2061 if (uzDegree == 1) 2062 { 2063 return this; 2064 } 2065 2066 // u(z) := a(z) 2067 LongArray uz = (LongArray)clone(); 2068 2069 int t = (m + 63) >>> 6; 2070 2071 // v(z) := f(z) 2072 LongArray vz = new LongArray(t); 2073 reduceBit(vz.m_ints, 0, m, m, ks); 2074 2075 // g1(z) := 1, g2(z) := 0 2076 LongArray g1z = new LongArray(t); 2077 g1z.m_ints[0] = 1L; 2078 LongArray g2z = new LongArray(t); 2079 2080 int[] uvDeg = new int[]{ uzDegree, m + 1 }; 2081 LongArray[] uv = new LongArray[]{ uz, vz }; 2082 2083 int[] ggDeg = new int[]{ 1, 0 }; 2084 LongArray[] gg = new LongArray[]{ g1z, g2z }; 2085 2086 int b = 1; 2087 int duv1 = uvDeg[b]; 2088 int dgg1 = ggDeg[b]; 2089 int j = duv1 - uvDeg[1 - b]; 2090 2091 for (;;) 2092 { 2093 if (j < 0) 2094 { 2095 j = -j; 2096 uvDeg[b] = duv1; 2097 ggDeg[b] = dgg1; 2098 b = 1 - b; 2099 duv1 = uvDeg[b]; 2100 dgg1 = ggDeg[b]; 2101 } 2102 2103 uv[b].addShiftedByBitsSafe(uv[1 - b], uvDeg[1 - b], j); 2104 2105 int duv2 = uv[b].degreeFrom(duv1); 2106 if (duv2 == 0) 2107 { 2108 return gg[1 - b]; 2109 } 2110 2111 { 2112 int dgg2 = ggDeg[1 - b]; 2113 gg[b].addShiftedByBitsSafe(gg[1 - b], dgg2, j); 2114 dgg2 += j; 2115 2116 if (dgg2 > dgg1) 2117 { 2118 dgg1 = dgg2; 2119 } 2120 else if (dgg2 == dgg1) 2121 { 2122 dgg1 = gg[b].degreeFrom(dgg1); 2123 } 2124 } 2125 2126 j += (duv2 - duv1); 2127 duv1 = duv2; 2128 } 2129 } 2130 equals(Object o)2131 public boolean equals(Object o) 2132 { 2133 if (!(o instanceof LongArray)) 2134 { 2135 return false; 2136 } 2137 LongArray other = (LongArray) o; 2138 int usedLen = getUsedLength(); 2139 if (other.getUsedLength() != usedLen) 2140 { 2141 return false; 2142 } 2143 for (int i = 0; i < usedLen; i++) 2144 { 2145 if (m_ints[i] != other.m_ints[i]) 2146 { 2147 return false; 2148 } 2149 } 2150 return true; 2151 } 2152 hashCode()2153 public int hashCode() 2154 { 2155 int usedLen = getUsedLength(); 2156 int hash = 1; 2157 for (int i = 0; i < usedLen; i++) 2158 { 2159 long mi = m_ints[i]; 2160 hash *= 31; 2161 hash ^= (int)mi; 2162 hash *= 31; 2163 hash ^= (int)(mi >>> 32); 2164 } 2165 return hash; 2166 } 2167 clone()2168 public Object clone() 2169 { 2170 return new LongArray(Arrays.clone(m_ints)); 2171 } 2172 toString()2173 public String toString() 2174 { 2175 int i = getUsedLength(); 2176 if (i == 0) 2177 { 2178 return "0"; 2179 } 2180 2181 StringBuffer sb = new StringBuffer(Long.toBinaryString(m_ints[--i])); 2182 while (--i >= 0) 2183 { 2184 String s = Long.toBinaryString(m_ints[i]); 2185 2186 // Add leading zeroes, except for highest significant word 2187 int len = s.length(); 2188 if (len < 64) 2189 { 2190 sb.append(ZEROES.substring(len)); 2191 } 2192 2193 sb.append(s); 2194 } 2195 return sb.toString(); 2196 } 2197 }