1/** 2 * Copyright (c) 2021-2022 Huawei Device Co., Ltd. 3 * Licensed under the Apache License, Version 2.0 (the "License"); 4 * you may not use this file except in compliance with the License. 5 * You may obtain a copy of the License at 6 * 7 * http://www.apache.org/licenses/LICENSE-2.0 8 * 9 * Unless required by applicable law or agreed to in writing, software 10 * distributed under the License is distributed on an "AS IS" BASIS, 11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 * See the License for the specific language governing permissions and 13 * limitations under the License. 14 */ 15 16/** 17 * Pinyin. 18 * 19 * @typedef Option. 20 * @type Object. 21 * @property {Boolean} [checkPolyphone=false] Whether to check for polyphonic words. 22 * @property {Number} [charCase=0] Output pinyin case mode, 0- first letter capitalization; 1- All lowercase; 2 - all uppercase. 23 */ 24import { PinyinDict } from './PinyinDict'; 25import { Log } from './Log'; 26 27const TAG = 'Pinyin'; 28 29class Pinyin { 30 private options; 31 private char_dict; 32 private full_dict; 33 private polyphone; 34 35 /** 36 * Constructor. 37 * 38 * @param {object} options - the options for chinese transform to pinyin 39 */ 40 constructor(options) { 41 this.setOptions(options); 42 this.initialize(); 43 } 44 45 /** 46 * set params. 47 * 48 * @param {object} options - the options for chinese transform to pinyin 49 */ 50 setOptions(options) { 51 options = options || {}; 52 this.options = Object.assign({ checkPolyphone: false, charCase: 0 }, options); 53 } 54 55 /** 56 * initialize data. 57 * 58 */ 59 initialize() { 60 this.char_dict = PinyinDict.char_dict; 61 this.full_dict = PinyinDict.full_dict; 62 this.polyphone = PinyinDict.polyphone; 63 } 64 65 /** 66 * Get the initials of pinyin. 67 * 68 * @param {string} str - The input Chinese string 69 * @return {object} - result for CamelChars. 70 */ 71 getCamelChars(str) { 72 if (typeof (str) != 'string') { 73 Log.showError(TAG, 'getCamelChars need string param!'); 74 return; 75 } 76 const chars = []; 77 let i = 0; 78 while (i< str.length) { 79 //get unicode 80 const ch = str.charAt(i); 81 //Check whether the Unicode code is within the range of processing, if it returns the pinyin first letter of the Chinese character reflected by the code, if it is not, call other functions to process 82 chars.push(this.getChar(ch)); 83 i++; 84 } 85 86 let result = this.getResult(chars); 87 88 switch (this.options.charCase) { 89 case 1: 90 result = result.toLowerCase(); 91 break; 92 case 2: 93 result = result.toUpperCase(); 94 break; 95 default: {}; 96 break; 97 } 98 return result; 99 } 100 101 /** 102 * Get Pinyin. 103 * 104 * @param {string} str - The input Chinese string. 105 * @return {object} result for FullChars. 106 */ 107 getFullChars(str) { 108 let result = ''; 109 const reg = new RegExp('[a-zA-Z0-9\- ]'); 110 let i = 0; 111 while (i < str.length) { 112 const ch = str.substr(i, 1); 113 const unicode = ch.charCodeAt(0); 114 if (unicode > 19968 && unicode < 40869) { 115 const name = this.getFullChar(ch); 116 if (name !== false) { 117 result += name; 118 } 119 }else { 120 result += ch; 121 } 122 i++; 123 } 124 125 switch (this.options.charCase) { 126 case 1: 127 result = result.toLowerCase(); 128 break; 129 case 2: 130 result = result.toUpperCase(); 131 break; 132 default: {}; 133 break; 134 } 135 return result; 136 } 137 138 getFullChar(ch) { 139 for (const key in this.full_dict) { 140 if (this.full_dict[key].indexOf(ch) != -1) { 141 return this.capitalize(key); 142 } 143 } 144 return false; 145 } 146 147 capitalize(str) { 148 if (str.length <= 0) { 149 Log.showError(TAG, 'The length of str should be greater than 0!'); 150 return; 151 } 152 const first = str.substr(0, 1).toUpperCase(); 153 const spare = str.substr(1, str.length); 154 return first + spare; 155 } 156 157 getChar(ch) { 158 const unicode = ch.charCodeAt(0); 159 // Determine whether it is within the range of Chinese character processing 160 if (unicode > 19968 && unicode < 40869) { 161 //To check if it is polyphonic, it is polyphonic rather than looking for the corresponding letter in the string strChineseFirstPY 162 if (!this.options.checkPolyphone) { 163 return this.char_dict.charAt(unicode - 19968); 164 } 165 return this.polyphone[unicode] ? this.polyphone[unicode] : this.char_dict.charAt(unicode - 19968); 166 } else { 167 // If it is not a kanji, return an atomic string 168 return ch; 169 } 170 } 171 172 getResult(chars) { 173 if (!this.options.checkPolyphone) { 174 return chars.join(''); 175 } 176 let result = ['']; 177 let i= 0; 178 let len = chars.length; 179 while (i < len) { 180 const str = chars[i]; 181 const strlen = str.length; 182 if (strlen == 1) { 183 for (let j = 0; j < result.length; j++) { 184 result[j] += str; 185 } 186 } else { 187 const swap1 = result.slice(0); 188 result = []; 189 for (let j = 0; j < strlen; j++) { 190 const swap2 = swap1.slice(0); 191 for (let k = 0; k < swap2.length; k++) { 192 swap2[k] += str.charAt(j); 193 } 194 result = result.concat(swap2); 195 } 196 } 197 i++; 198 } 199 return result; 200 } 201} 202 203export default Pinyin;