• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1/**
2 * Copyright (c) 2021-2022 Huawei Device Co., Ltd.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at
6 *
7 *     http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15
16
17/**
18 * Pinyin.
19 *
20 * @typedef Option.
21 * @type Object.
22 * @property {Boolean} [checkPolyphone=false] Whether to check for polyphonic words.
23 * @property {Number} [charCase=0] Output pinyin case mode, 0- first letter capitalization; 1- All lowercase; 2 - all uppercase.
24 */
25import { PinyinDict } from './PinyinDict';
26
27class Pinyin {
28  private options;
29  private char_dict;
30  private full_dict;
31  private polyphone;
32
33  /**
34   * Constructor.
35   *
36   * @param {object} options - the options for chinese transform to pinyin
37   */
38  constructor(options) {
39    this.setOptions(options);
40    this.initialize();
41  }
42
43  /**
44   * set params.
45   *
46   * @param {object} options - the options for chinese transform to pinyin
47   */
48  setOptions(options) {
49    options = options || {};
50    this.options = Object.assign({ checkPolyphone: false, charCase: 0 }, options);
51  }
52
53  /**
54   * initialize data.
55   *
56   */
57  initialize() {
58    this.char_dict = PinyinDict.char_dict;
59    this.full_dict = PinyinDict.full_dict;
60    this.polyphone = PinyinDict.polyphone;
61  }
62
63  /**
64   * Get the initials of pinyin.
65   *
66   * @param {string} str - The input Chinese string
67   * @return {object} - result for CamelChars.
68   */
69  getCamelChars(str) {
70    if (typeof (str) != 'string')
71      throw new Error('getCamelChars need string param!');
72    const chars = [];
73    let i = 0;
74    while (i< str.length){
75      //get unicode
76      const ch = str.charAt(i);
77      //Check whether the Unicode code is within the range of processing, if it returns the pinyin first letter of the Chinese character reflected by the code, if it is not, call other functions to process
78      chars.push(this.getChar(ch));
79      i++;
80    }
81
82    let result = this.getResult(chars);
83
84    switch (this.options.charCase) {
85      case 1:
86        result = result.toLowerCase();
87        break;
88      case 2:
89        result = result.toUpperCase();
90        break;
91      default: {};
92        break;
93    }
94    return result;
95  }
96
97  /**
98   * Get Pinyin.
99   *
100   * @param {string} str - The input Chinese string.
101   * @return {object} result for FullChars.
102   */
103  getFullChars(str) {
104    let result = '';
105    const reg = new RegExp('[a-zA-Z0-9\- ]');
106    let i = 0;
107    while (i < str.length){
108      const ch = str.substr(i, 1);
109      const unicode = ch.charCodeAt(0);
110      if (unicode > 19968 && unicode < 40869) {
111        const name = this.getFullChar(ch);
112        if (name !== false) {
113          result += name;
114        }
115      }else {
116        result += ch;
117      }
118      i++;
119    }
120
121    switch (this.options.charCase) {
122      case 1:
123        result = result.toLowerCase();
124        break;
125      case 2:
126        result = result.toUpperCase();
127        break;
128      default: {};
129        break;
130    }
131    return result;
132  }
133
134  getFullChar(ch) {
135    for (const key in this.full_dict) {
136      if (this.full_dict[key].indexOf(ch) != -1) {
137        return this.capitalize(key);
138      }
139    }
140    return false;
141  }
142
143  capitalize(str) {
144    if (str.length <= 0)
145      throw new Error('The length of str should be greater than 0');
146    const first = str.substr(0, 1).toUpperCase();
147    const spare = str.substr(1, str.length);
148    return first + spare;
149  }
150
151  getChar(ch) {
152    const unicode = ch.charCodeAt(0);
153    // Determine whether it is within the range of Chinese character processing
154    if (unicode > 19968 && unicode < 40869){
155      //To check if it is polyphonic, it is polyphonic rather than looking for the corresponding letter in the string strChineseFirstPY
156      if (!this.options.checkPolyphone) {
157        return this.char_dict.charAt(unicode - 19968);
158      }
159      return this.polyphone[unicode] ? this.polyphone[unicode] : this.char_dict.charAt(unicode - 19968);
160    } else {
161      // If it is not a kanji, return an atomic string
162      return ch;
163    }
164  }
165
166  getResult(chars) {
167    if (!this.options.checkPolyphone) {
168      return chars.join('');
169    }
170    let result = [''];
171    let i= 0;
172    let len = chars.length;
173    while (i < len) {
174      const str = chars[i];
175      const strlen = str.length;
176      if (strlen == 1) {
177        for (let j = 0; j < result.length; j++) {
178          result[j] += str;
179        }
180      } else {
181        const swap1 = result.slice(0);
182        result = [];
183        for (let j = 0; j < strlen; j++) {
184          const swap2 = swap1.slice(0);
185          for (let k = 0; k < swap2.length; k++) {
186            swap2[k] += str.charAt(j);
187          }
188          result = result.concat(swap2);
189        }
190      }
191      i++;
192    }
193    return result;
194  }
195}
196
197export default Pinyin;