• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1// set up a sparse array of all unicode codepoints listed in the index
2// this will be used for lookup in iso2022jpEncoded
3var jis0208CPs = []; // index is unicode cp, value is pointer
4for (var p = 0; p < jis0208.length; p++) {
5    if (jis0208[p] != null && jis0208CPs[jis0208[p]] == null) {
6        jis0208CPs[jis0208[p]] = p;
7    }
8}
9
10// set up mappings for half/full width katakana
11// index is a katakana index pointer, value is Unicode codepoint (dec)
12// this is copy-pasted from the json version of the index belonging to the Encoding spec
13var iso2022jpkatakana = [
14    12290,
15    12300,
16    12301,
17    12289,
18    12539,
19    12530,
20    12449,
21    12451,
22    12453,
23    12455,
24    12457,
25    12515,
26    12517,
27    12519,
28    12483,
29    12540,
30    12450,
31    12452,
32    12454,
33    12456,
34    12458,
35    12459,
36    12461,
37    12463,
38    12465,
39    12467,
40    12469,
41    12471,
42    12473,
43    12475,
44    12477,
45    12479,
46    12481,
47    12484,
48    12486,
49    12488,
50    12490,
51    12491,
52    12492,
53    12493,
54    12494,
55    12495,
56    12498,
57    12501,
58    12504,
59    12507,
60    12510,
61    12511,
62    12512,
63    12513,
64    12514,
65    12516,
66    12518,
67    12520,
68    12521,
69    12522,
70    12523,
71    12524,
72    12525,
73    12527,
74    12531,
75    12443,
76    12444
77];
78
79function chars2cps(chars) {
80    // this is needed because of javascript's handling of supplementary characters
81    // char: a string of unicode characters
82    // returns an array of decimal code point values
83    var haut = 0;
84    var out = [];
85    for (var i = 0; i < chars.length; i++) {
86        var b = chars.charCodeAt(i);
87        if (b < 0 || b > 0xffff) {
88            alert(
89                "Error in chars2cps: byte out of range " + b.toString(16) + "!"
90            );
91        }
92        if (haut != 0) {
93            if (0xdc00 <= b && b <= 0xdfff) {
94                out.push(0x10000 + ((haut - 0xd800) << 10) + (b - 0xdc00));
95                haut = 0;
96                continue;
97            } else {
98                alert(
99                    "Error in chars2cps: surrogate out of range " +
100                        haut.toString(16) +
101                        "!"
102                );
103                haut = 0;
104            }
105        }
106        if (0xd800 <= b && b <= 0xdbff) {
107            haut = b;
108        } else {
109            out.push(b);
110        }
111    }
112    return out;
113}
114
115function iso2022jpEncoder(stream) {
116    var cps = chars2cps(stream);
117    var endofstream = 2000000;
118    var out = "";
119    var encState = "ascii";
120    var finished = false;
121    var cp, ptr;
122
123    while (!finished) {
124        if (cps.length == 0) cp = endofstream;
125        else cp = cps.shift();
126        if (cp == endofstream && encState != "ascii") {
127            cps.unshift(cp);
128            encState = "ascii";
129            out += " 1B 28 42";
130            continue;
131        }
132        if (cp == endofstream && encState == "ascii") {
133            finished = true;
134            continue;
135        }
136        if (
137            (encState === "ascii" || encState === "roman") &&
138            (cp === 0x0e || cp === 0x0f || cp === 0x1b)
139        ) {
140            //out += ' &#'+cp+';'
141            // continue
142            return null;
143        }
144        if (encState == "ascii" && cp >= 0x00 && cp <= 0x7f) {
145            out += " " + cp.toString(16).toUpperCase();
146            continue;
147        }
148        if (
149            encState == "roman" &&
150            ((cp >= 0x00 && cp <= 0x7f && cp !== 0x5c && cp !== 0x7e) ||
151                cp == 0xa5 ||
152                cp == 0x203e)
153        ) {
154            if (cp >= 0x00 && cp <= 0x7f) {
155                // ASCII
156                out += " " + cp.toString(16).toUpperCase();
157                continue;
158            }
159            if (cp == 0xa5) {
160                out += " 5C";
161                continue;
162            }
163            if (cp == 0x203e) {
164                out += " 7E";
165                continue;
166            }
167        }
168        if (encState != "ascii" && cp >= 0x00 && cp <= 0x7f) {
169            cps.unshift(cp);
170            encState = "ascii";
171            out += " 1B 28 42";
172            continue;
173        }
174        if ((cp == 0xa5 || cp == 0x203e) && encState != "roman") {
175            cps.unshift(cp);
176            encState = "roman";
177            out += " 1B 28 4A";
178            continue;
179        }
180        if (cp == 0x2212) cp = 0xff0d;
181        if (cp >= 0xff61 && cp <= 0xff9f) {
182            cp = iso2022jpkatakana[cp - 0xff61];
183        }
184        ptr = jis0208CPs[cp];
185        if (ptr == null) {
186            //out += ' &#'+cp+';'
187            //continue
188            return null;
189        }
190        if (encState != "jis0208") {
191            cps.unshift(cp);
192            encState = "jis0208";
193            out += " 1B 24 42";
194            continue;
195        }
196        var lead = Math.floor(ptr / 94) + 0x21;
197        var trail = ptr % 94 + 0x21;
198        out +=
199            " " +
200            lead.toString(16).toUpperCase() +
201            " " +
202            trail.toString(16).toUpperCase();
203    }
204    return out.trim();
205}
206
207function convertToHex(str) {
208    // converts a string of ASCII characters to hex byte codes
209    var out = "";
210    var result;
211    for (var c = 0; c < str.length; c++) {
212        result =
213            str
214                .charCodeAt(c)
215                .toString(16)
216                .toUpperCase() + " ";
217        out += result;
218    }
219    return out;
220}
221
222function normalizeStr(str) {
223    var out = "";
224    for (var c = 0; c < str.length; c++) {
225        if (
226            str.charAt(c) == "%" &&
227            str.charAt(c + 1) != "%" &&
228            str.charAt(c + 2) != "%"
229        ) {
230            out += String.fromCodePoint(
231                parseInt(str.charAt(c + 1) + str.charAt(c + 2), 16)
232            );
233            c += 2;
234        } else out += str.charAt(c);
235    }
236    var result = "";
237    for (var o = 0; o < out.length; o++) {
238        result +=
239            "%" +
240            out
241                .charCodeAt(o)
242                .toString(16)
243                .toUpperCase();
244    }
245    return result.replace(/%1B%28%42$/, "");
246}
247