• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1"use strict"
2
3var defaults = require('defaults')
4var combining = require('./combining')
5
6var DEFAULTS = {
7  nul: 0,
8  control: 0
9}
10
11module.exports = function wcwidth(str) {
12  return wcswidth(str, DEFAULTS)
13}
14
15module.exports.config = function(opts) {
16  opts = defaults(opts || {}, DEFAULTS)
17  return function wcwidth(str) {
18    return wcswidth(str, opts)
19  }
20}
21
22/*
23 *  The following functions define the column width of an ISO 10646
24 *  character as follows:
25 *  - The null character (U+0000) has a column width of 0.
26 *  - Other C0/C1 control characters and DEL will lead to a return value
27 *    of -1.
28 *  - Non-spacing and enclosing combining characters (general category
29 *    code Mn or Me in the
30 *    Unicode database) have a column width of 0.
31 *  - SOFT HYPHEN (U+00AD) has a column width of 1.
32 *  - Other format characters (general category code Cf in the Unicode
33 *    database) and ZERO WIDTH
34 *    SPACE (U+200B) have a column width of 0.
35 *  - Hangul Jamo medial vowels and final consonants (U+1160-U+11FF)
36 *    have a column width of 0.
37 *  - Spacing characters in the East Asian Wide (W) or East Asian
38 *    Full-width (F) category as
39 *    defined in Unicode Technical Report #11 have a column width of 2.
40 *  - All remaining characters (including all printable ISO 8859-1 and
41 *    WGL4 characters, Unicode control characters, etc.) have a column
42 *    width of 1.
43 *  This implementation assumes that characters are encoded in ISO 10646.
44*/
45
46function wcswidth(str, opts) {
47  if (typeof str !== 'string') return wcwidth(str, opts)
48
49  var s = 0
50  for (var i = 0; i < str.length; i++) {
51    var n = wcwidth(str.charCodeAt(i), opts)
52    if (n < 0) return -1
53    s += n
54  }
55
56  return s
57}
58
59function wcwidth(ucs, opts) {
60  // test for 8-bit control characters
61  if (ucs === 0) return opts.nul
62  if (ucs < 32 || (ucs >= 0x7f && ucs < 0xa0)) return opts.control
63
64  // binary search in table of non-spacing characters
65  if (bisearch(ucs)) return 0
66
67  // if we arrive here, ucs is not a combining or C0/C1 control character
68  return 1 +
69      (ucs >= 0x1100 &&
70       (ucs <= 0x115f ||                       // Hangul Jamo init. consonants
71        ucs == 0x2329 || ucs == 0x232a ||
72        (ucs >= 0x2e80 && ucs <= 0xa4cf &&
73         ucs != 0x303f) ||                     // CJK ... Yi
74        (ucs >= 0xac00 && ucs <= 0xd7a3) ||    // Hangul Syllables
75        (ucs >= 0xf900 && ucs <= 0xfaff) ||    // CJK Compatibility Ideographs
76        (ucs >= 0xfe10 && ucs <= 0xfe19) ||    // Vertical forms
77        (ucs >= 0xfe30 && ucs <= 0xfe6f) ||    // CJK Compatibility Forms
78        (ucs >= 0xff00 && ucs <= 0xff60) ||    // Fullwidth Forms
79        (ucs >= 0xffe0 && ucs <= 0xffe6) ||
80        (ucs >= 0x20000 && ucs <= 0x2fffd) ||
81        (ucs >= 0x30000 && ucs <= 0x3fffd)));
82}
83
84function bisearch(ucs) {
85  var min = 0
86  var max = combining.length - 1
87  var mid
88
89  if (ucs < combining[0][0] || ucs > combining[max][1]) return false
90
91  while (max >= min) {
92    mid = Math.floor((min + max) / 2)
93    if (ucs > combining[mid][1]) min = mid + 1
94    else if (ucs < combining[mid][0]) max = mid - 1
95    else return true
96  }
97
98  return false
99}
100