1/** 2 * Copyright (c) 2021-2022 Huawei Device Co., Ltd. 3 * Licensed under the Apache License, Version 2.0 (the "License"); 4 * you may not use this file except in compliance with the License. 5 * You may obtain a copy of the License at 6 * 7 * http://www.apache.org/licenses/LICENSE-2.0 8 * 9 * Unless required by applicable law or agreed to in writing, software 10 * distributed under the License is distributed on an "AS IS" BASIS, 11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 * See the License for the specific language governing permissions and 13 * limitations under the License. 14 */ 15 16 17/** 18 * Pinyin. 19 * 20 * @typedef Option. 21 * @type Object. 22 * @property {Boolean} [checkPolyphone=false] Whether to check for polyphonic words. 23 * @property {Number} [charCase=0] Output pinyin case mode, 0- first letter capitalization; 1- All lowercase; 2 - all uppercase. 24 */ 25import { PinyinDict } from './PinyinDict'; 26 27class Pinyin { 28 private options; 29 private char_dict; 30 private full_dict; 31 private polyphone; 32 33 /** 34 * Constructor. 35 * 36 * @param {object} options - the options for chinese transform to pinyin 37 */ 38 constructor(options) { 39 this.setOptions(options); 40 this.initialize(); 41 } 42 43 /** 44 * set params. 45 * 46 * @param {object} options - the options for chinese transform to pinyin 47 */ 48 setOptions(options) { 49 options = options || {}; 50 this.options = Object.assign({ checkPolyphone: false, charCase: 0 }, options); 51 } 52 53 /** 54 * initialize data. 55 * 56 */ 57 initialize() { 58 this.char_dict = PinyinDict.char_dict; 59 this.full_dict = PinyinDict.full_dict; 60 this.polyphone = PinyinDict.polyphone; 61 } 62 63 /** 64 * Get the initials of pinyin. 65 * 66 * @param {string} str - The input Chinese string 67 * @return {object} - result for CamelChars. 68 */ 69 getCamelChars(str) { 70 if (typeof (str) != 'string') 71 throw new Error('getCamelChars need string param!'); 72 const chars = []; 73 let i = 0; 74 while (i< str.length){ 75 //get unicode 76 const ch = str.charAt(i); 77 //Check whether the Unicode code is within the range of processing, if it returns the pinyin first letter of the Chinese character reflected by the code, if it is not, call other functions to process 78 chars.push(this.getChar(ch)); 79 i++; 80 } 81 82 let result = this.getResult(chars); 83 84 switch (this.options.charCase) { 85 case 1: 86 result = result.toLowerCase(); 87 break; 88 case 2: 89 result = result.toUpperCase(); 90 break; 91 default: {}; 92 break; 93 } 94 return result; 95 } 96 97 /** 98 * Get Pinyin. 99 * 100 * @param {string} str - The input Chinese string. 101 * @return {object} result for FullChars. 102 */ 103 getFullChars(str) { 104 let result = ''; 105 const reg = new RegExp('[a-zA-Z0-9\- ]'); 106 let i = 0; 107 while (i < str.length){ 108 const ch = str.substr(i, 1); 109 const unicode = ch.charCodeAt(0); 110 if (unicode > 19968 && unicode < 40869) { 111 const name = this.getFullChar(ch); 112 if (name !== false) { 113 result += name; 114 } 115 }else { 116 result += ch; 117 } 118 i++; 119 } 120 121 switch (this.options.charCase) { 122 case 1: 123 result = result.toLowerCase(); 124 break; 125 case 2: 126 result = result.toUpperCase(); 127 break; 128 default: {}; 129 break; 130 } 131 return result; 132 } 133 134 getFullChar(ch) { 135 for (const key in this.full_dict) { 136 if (this.full_dict[key].indexOf(ch) != -1) { 137 return this.capitalize(key); 138 } 139 } 140 return false; 141 } 142 143 capitalize(str) { 144 if (str.length <= 0) 145 throw new Error('The length of str should be greater than 0'); 146 const first = str.substr(0, 1).toUpperCase(); 147 const spare = str.substr(1, str.length); 148 return first + spare; 149 } 150 151 getChar(ch) { 152 const unicode = ch.charCodeAt(0); 153 // Determine whether it is within the range of Chinese character processing 154 if (unicode > 19968 && unicode < 40869){ 155 //To check if it is polyphonic, it is polyphonic rather than looking for the corresponding letter in the string strChineseFirstPY 156 if (!this.options.checkPolyphone) { 157 return this.char_dict.charAt(unicode - 19968); 158 } 159 return this.polyphone[unicode] ? this.polyphone[unicode] : this.char_dict.charAt(unicode - 19968); 160 } else { 161 // If it is not a kanji, return an atomic string 162 return ch; 163 } 164 } 165 166 getResult(chars) { 167 if (!this.options.checkPolyphone) { 168 return chars.join(''); 169 } 170 let result = ['']; 171 let i= 0; 172 let len = chars.length; 173 while (i < len) { 174 const str = chars[i]; 175 const strlen = str.length; 176 if (strlen == 1) { 177 for (let j = 0; j < result.length; j++) { 178 result[j] += str; 179 } 180 } else { 181 const swap1 = result.slice(0); 182 result = []; 183 for (let j = 0; j < strlen; j++) { 184 const swap2 = swap1.slice(0); 185 for (let k = 0; k < swap2.length; k++) { 186 swap2[k] += str.charAt(j); 187 } 188 result = result.concat(swap2); 189 } 190 } 191 i++; 192 } 193 return result; 194 } 195} 196 197export default Pinyin;