1// set up a sparse array of all unicode codepoints listed in the index 2// this will be used for lookup in iso2022jpEncoded 3var jis0208CPs = []; // index is unicode cp, value is pointer 4for (var p = 0; p < jis0208.length; p++) { 5 if (jis0208[p] != null && jis0208CPs[jis0208[p]] == null) { 6 jis0208CPs[jis0208[p]] = p; 7 } 8} 9 10// set up mappings for half/full width katakana 11// index is a katakana index pointer, value is Unicode codepoint (dec) 12// this is copy-pasted from the json version of the index belonging to the Encoding spec 13var iso2022jpkatakana = [ 14 12290, 15 12300, 16 12301, 17 12289, 18 12539, 19 12530, 20 12449, 21 12451, 22 12453, 23 12455, 24 12457, 25 12515, 26 12517, 27 12519, 28 12483, 29 12540, 30 12450, 31 12452, 32 12454, 33 12456, 34 12458, 35 12459, 36 12461, 37 12463, 38 12465, 39 12467, 40 12469, 41 12471, 42 12473, 43 12475, 44 12477, 45 12479, 46 12481, 47 12484, 48 12486, 49 12488, 50 12490, 51 12491, 52 12492, 53 12493, 54 12494, 55 12495, 56 12498, 57 12501, 58 12504, 59 12507, 60 12510, 61 12511, 62 12512, 63 12513, 64 12514, 65 12516, 66 12518, 67 12520, 68 12521, 69 12522, 70 12523, 71 12524, 72 12525, 73 12527, 74 12531, 75 12443, 76 12444 77]; 78 79function chars2cps(chars) { 80 // this is needed because of javascript's handling of supplementary characters 81 // char: a string of unicode characters 82 // returns an array of decimal code point values 83 var haut = 0; 84 var out = []; 85 for (var i = 0; i < chars.length; i++) { 86 var b = chars.charCodeAt(i); 87 if (b < 0 || b > 0xffff) { 88 alert( 89 "Error in chars2cps: byte out of range " + b.toString(16) + "!" 90 ); 91 } 92 if (haut != 0) { 93 if (0xdc00 <= b && b <= 0xdfff) { 94 out.push(0x10000 + ((haut - 0xd800) << 10) + (b - 0xdc00)); 95 haut = 0; 96 continue; 97 } else { 98 alert( 99 "Error in chars2cps: surrogate out of range " + 100 haut.toString(16) + 101 "!" 102 ); 103 haut = 0; 104 } 105 } 106 if (0xd800 <= b && b <= 0xdbff) { 107 haut = b; 108 } else { 109 out.push(b); 110 } 111 } 112 return out; 113} 114 115function iso2022jpEncoder(stream) { 116 var cps = chars2cps(stream); 117 var endofstream = 2000000; 118 var out = ""; 119 var encState = "ascii"; 120 var finished = false; 121 var cp, ptr; 122 123 while (!finished) { 124 if (cps.length == 0) cp = endofstream; 125 else cp = cps.shift(); 126 if (cp == endofstream && encState != "ascii") { 127 cps.unshift(cp); 128 encState = "ascii"; 129 out += " 1B 28 42"; 130 continue; 131 } 132 if (cp == endofstream && encState == "ascii") { 133 finished = true; 134 continue; 135 } 136 if ( 137 (encState === "ascii" || encState === "roman") && 138 (cp === 0x0e || cp === 0x0f || cp === 0x1b) 139 ) { 140 //out += ' &#'+cp+';' 141 // continue 142 return null; 143 } 144 if (encState == "ascii" && cp >= 0x00 && cp <= 0x7f) { 145 out += " " + cp.toString(16).toUpperCase(); 146 continue; 147 } 148 if ( 149 encState == "roman" && 150 ((cp >= 0x00 && cp <= 0x7f && cp !== 0x5c && cp !== 0x7e) || 151 cp == 0xa5 || 152 cp == 0x203e) 153 ) { 154 if (cp >= 0x00 && cp <= 0x7f) { 155 // ASCII 156 out += " " + cp.toString(16).toUpperCase(); 157 continue; 158 } 159 if (cp == 0xa5) { 160 out += " 5C"; 161 continue; 162 } 163 if (cp == 0x203e) { 164 out += " 7E"; 165 continue; 166 } 167 } 168 if (encState != "ascii" && cp >= 0x00 && cp <= 0x7f) { 169 cps.unshift(cp); 170 encState = "ascii"; 171 out += " 1B 28 42"; 172 continue; 173 } 174 if ((cp == 0xa5 || cp == 0x203e) && encState != "roman") { 175 cps.unshift(cp); 176 encState = "roman"; 177 out += " 1B 28 4A"; 178 continue; 179 } 180 if (cp == 0x2212) cp = 0xff0d; 181 if (cp >= 0xff61 && cp <= 0xff9f) { 182 cp = iso2022jpkatakana[cp - 0xff61]; 183 } 184 ptr = jis0208CPs[cp]; 185 if (ptr == null) { 186 //out += ' &#'+cp+';' 187 //continue 188 return null; 189 } 190 if (encState != "jis0208") { 191 cps.unshift(cp); 192 encState = "jis0208"; 193 out += " 1B 24 42"; 194 continue; 195 } 196 var lead = Math.floor(ptr / 94) + 0x21; 197 var trail = ptr % 94 + 0x21; 198 out += 199 " " + 200 lead.toString(16).toUpperCase() + 201 " " + 202 trail.toString(16).toUpperCase(); 203 } 204 return out.trim(); 205} 206 207function convertToHex(str) { 208 // converts a string of ASCII characters to hex byte codes 209 var out = ""; 210 var result; 211 for (var c = 0; c < str.length; c++) { 212 result = 213 str 214 .charCodeAt(c) 215 .toString(16) 216 .toUpperCase() + " "; 217 out += result; 218 } 219 return out; 220} 221 222function normalizeStr(str) { 223 var out = ""; 224 for (var c = 0; c < str.length; c++) { 225 if ( 226 str.charAt(c) == "%" && 227 str.charAt(c + 1) != "%" && 228 str.charAt(c + 2) != "%" 229 ) { 230 out += String.fromCodePoint( 231 parseInt(str.charAt(c + 1) + str.charAt(c + 2), 16) 232 ); 233 c += 2; 234 } else out += str.charAt(c); 235 } 236 var result = ""; 237 for (var o = 0; o < out.length; o++) { 238 result += 239 "%" + 240 out 241 .charCodeAt(o) 242 .toString(16) 243 .toUpperCase(); 244 } 245 return result.replace(/%1B%28%42$/, ""); 246} 247