• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1// Copyright 2013 the V8 project authors. All rights reserved.
2// Redistribution and use in source and binary forms, with or without
3// modification, are permitted provided that the following conditions are
4// met:
5//
6//     * Redistributions of source code must retain the above copyright
7//       notice, this list of conditions and the following disclaimer.
8//     * Redistributions in binary form must reproduce the above
9//       copyright notice, this list of conditions and the following
10//       disclaimer in the documentation and/or other materials provided
11//       with the distribution.
12//     * Neither the name of Google Inc. nor the names of its
13//       contributors may be used to endorse or promote products derived
14//       from this software without specific prior written permission.
15//
16// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27
28// Tests the new String.prototype.normalize method.
29
30
31// Common use case when searching for 'not very exact' match.
32// These are examples of data one might encounter in real use.
33var testRealUseCases = function() {
34  // Vietnamese legacy text, old Windows 9x / non-Unicode applications use
35  // windows-1258 code page, which is neither precomposed, nor decomposed.
36  assertEquals('ti\u00ea\u0301ng Vi\u00ea\u0323t'.normalize('NFKD'),
37   'ti\u1ebfng Vi\u1ec7t'.normalize('NFKD')); // all precomposed
38
39  // Various kinds of spaces
40  assertEquals('Google\u0020Maps'.normalize('NFKD'), // normal space
41    'Google\u00a0Maps'.normalize('NFKD')); // non-breaking space
42  assertEquals('Google\u0020Maps'.normalize('NFKD'), // normal space
43    'Google\u2002Maps'.normalize('NFKD')); // en-space
44  assertEquals('Google\u0020Maps'.normalize('NFKD'), // normal space
45    'Google\u2003Maps'.normalize('NFKD')); // em-space
46  assertEquals('Google\u0020Maps'.normalize('NFKD'), // normal space
47    'Google\u3000Maps'.normalize('NFKC')); // ideographic space
48
49  // Latin small ligature "fi"
50  assertEquals('fi'.normalize('NFKD'), '\ufb01'.normalize('NFKD'));
51
52  // ŀ, Latin small L with middle dot, used in Catalan and often represented
53  // as decomposed for non-Unicode environments ( l + ·)
54  assertEquals('l\u00b7'.normalize('NFKD'), '\u0140'.normalize('NFKD'));
55
56  // Legacy text, Japanese narrow Kana (MS-DOS & Win 3.x time)
57  assertEquals('\u30d1\u30bd\u30b3\u30f3'.normalize('NFKD'), // パソコン  :  wide
58    '\uff8a\uff9f\uff7f\uff7a\uff9d'.normalize('NFKD')); // パソコン  :  narrow
59  // Also for Japanese, Latin fullwidth forms vs. ASCII
60  assertEquals('ABCD'.normalize('NFKD'),
61    '\uff21\uff22\uff23\uff24'.normalize('NFKD')); // ABCD, fullwidth
62}();
63
64
65var testEdgeCases = function() {
66  // Make sure we throw RangeError, as the standard requires.
67  assertThrows('"".normalize(1234)', RangeError);
68  assertThrows('"".normalize("BAD")', RangeError);
69
70  // The standard does not say what kind of exceptions we should throw, so we
71  // will not be specific. But we still test that we throw errors.
72  assertThrows('s.normalize()'); // s is not defined
73  assertThrows('var s = null; s.normalize()');
74  assertThrows('var s = undefined; s.normalize()');
75  assertThrows('var s = 1234; s.normalize()'); // no normalize for non-strings
76}();
77
78
79// Several kinds of mappings. No need to be comprehensive, we don't test
80// the ICU functionality, we only test C - JavaScript 'glue'
81var testData = [
82  // org, default, NFC, NFD, NKFC, NKFD
83  ['\u00c7', // Ç : Combining sequence, Latin 1
84    '\u00c7', '\u0043\u0327',
85    '\u00c7', '\u0043\u0327'],
86  ['\u0218', // Ș : Combining sequence, non-Latin 1
87    '\u0218', '\u0053\u0326',
88    '\u0218', '\u0053\u0326'],
89  ['\uac00', // 가 : Hangul
90    '\uac00', '\u1100\u1161',
91    '\uac00', '\u1100\u1161'],
92  ['\uff76', // カ : Narrow Kana
93    '\uff76', '\uff76',
94    '\u30ab', '\u30ab'],
95  ['\u00bc', // ¼ : Fractions
96    '\u00bc', '\u00bc',
97    '\u0031\u2044\u0034', '\u0031\u2044\u0034'],
98  ['\u01c6', // dž  : Latin ligature
99    '\u01c6', '\u01c6',
100    '\u0064\u017e', '\u0064\u007a\u030c'],
101  ['s\u0307\u0323', // s + dot above + dot below, ordering of combining marks
102    '\u1e69', 's\u0323\u0307',
103    '\u1e69', 's\u0323\u0307'],
104  ['\u3300', // ㌀ : Squared characters
105    '\u3300', '\u3300',
106    '\u30a2\u30d1\u30fc\u30c8', // アパート
107    '\u30a2\u30cf\u309a\u30fc\u30c8'], // アパート
108  ['\ufe37', // ︷ : Vertical forms
109    '\ufe37', '\ufe37',
110    '{' , '{'],
111  ['\u2079', // ⁹ : superscript 9
112    '\u2079', '\u2079',
113    '9', '9'],
114  ['\ufee5\ufee6\ufee7\ufee8', // Arabic forms
115    '\ufee5\ufee6\ufee7\ufee8', '\ufee5\ufee6\ufee7\ufee8',
116    '\u0646\u0646\u0646\u0646', '\u0646\u0646\u0646\u0646'],
117  ['\u2460', // ① : Circled
118    '\u2460', '\u2460',
119    '1', '1'],
120  ['\u210c', // ℌ : Font variants
121    '\u210c', '\u210c',
122    'H', 'H'],
123  ['\u2126', // Ω : Singleton, OHM sign vs. Greek capital letter OMEGA
124    '\u03a9', '\u03a9',
125    '\u03a9', '\u03a9'],
126  ['\ufdfb', // Long ligature, ARABIC LIGATURE JALLAJALALOUHOU
127    '\ufdfb', '\ufdfb',
128    '\u062C\u0644\u0020\u062C\u0644\u0627\u0644\u0647',
129    '\u062C\u0644\u0020\u062C\u0644\u0627\u0644\u0647']
130];
131
132var testArray = function() {
133  var kNFC = 1, kNFD = 2, kNFKC = 3, kNFKD = 4;
134  for (var i = 0; i < testData.length; ++i) {
135    // the original, NFC and NFD should normalize to the same thing
136    for (var column = 0; column < 3; ++column) {
137      var str = testData[i][column];
138      assertEquals(str.normalize(), testData[i][kNFC]); // defaults to NFC
139      assertEquals(str.normalize('NFC'), testData[i][kNFC]);
140      assertEquals(str.normalize('NFD'), testData[i][kNFD]);
141      assertEquals(str.normalize('NFKC'), testData[i][kNFKC]);
142      assertEquals(str.normalize('NFKD'), testData[i][kNFKD]);
143    }
144  }
145}();
146