• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1/**
2 * Copyright (c) 2021-2022 Huawei Device Co., Ltd.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at
6 *
7 *     http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15
16/**
17 * Pinyin.
18 *
19 * @typedef Option.
20 * @type Object.
21 * @property {Boolean} [checkPolyphone=false] Whether to check for polyphonic words.
22 * @property {Number} [charCase=0] Output pinyin case mode, 0- first letter capitalization; 1- All lowercase; 2 - all uppercase.
23 */
24import { PinyinDict } from './PinyinDict';
25import { Log } from './Log';
26
27const TAG = 'Pinyin';
28
29class Pinyin {
30  private options;
31  private char_dict;
32  private full_dict;
33  private polyphone;
34
35  /**
36   * Constructor.
37   *
38   * @param {object} options - the options for chinese transform to pinyin
39   */
40  constructor(options) {
41    this.setOptions(options);
42    this.initialize();
43  }
44
45  /**
46   * set params.
47   *
48   * @param {object} options - the options for chinese transform to pinyin
49   */
50  setOptions(options) {
51    options = options || {};
52    this.options = Object.assign({ checkPolyphone: false, charCase: 0 }, options);
53  }
54
55  /**
56   * initialize data.
57   *
58   */
59  initialize() {
60    this.char_dict = PinyinDict.char_dict;
61    this.full_dict = PinyinDict.full_dict;
62    this.polyphone = PinyinDict.polyphone;
63  }
64
65  /**
66   * Get the initials of pinyin.
67   *
68   * @param {string} str - The input Chinese string
69   * @return {object} - result for CamelChars.
70   */
71  getCamelChars(str) {
72    if (typeof (str) != 'string') {
73      Log.showError(TAG, 'getCamelChars need string param!');
74      return;
75    }
76    const chars = [];
77    let i = 0;
78    while (i< str.length) {
79      //get unicode
80      const ch = str.charAt(i);
81      //Check whether the Unicode code is within the range of processing, if it returns the pinyin first letter of the Chinese character reflected by the code, if it is not, call other functions to process
82      chars.push(this.getChar(ch));
83      i++;
84    }
85
86    let result = this.getResult(chars);
87
88    switch (this.options.charCase) {
89      case 1:
90        result = result.toLowerCase();
91        break;
92      case 2:
93        result = result.toUpperCase();
94        break;
95      default: {};
96        break;
97    }
98    return result;
99  }
100
101  /**
102   * Get Pinyin.
103   *
104   * @param {string} str - The input Chinese string.
105   * @return {object} result for FullChars.
106   */
107  getFullChars(str) {
108    let result = '';
109    const reg = new RegExp('[a-zA-Z0-9\- ]');
110    let i = 0;
111    while (i < str.length) {
112      const ch = str.substr(i, 1);
113      const unicode = ch.charCodeAt(0);
114      if (unicode > 19968 && unicode < 40869) {
115        const name = this.getFullChar(ch);
116        if (name !== false) {
117          result += name;
118        }
119      }else {
120        result += ch;
121      }
122      i++;
123    }
124
125    switch (this.options.charCase) {
126      case 1:
127        result = result.toLowerCase();
128        break;
129      case 2:
130        result = result.toUpperCase();
131        break;
132      default: {};
133        break;
134    }
135    return result;
136  }
137
138  getFullChar(ch) {
139    for (const key in this.full_dict) {
140      if (this.full_dict[key].indexOf(ch) != -1) {
141        return this.capitalize(key);
142      }
143    }
144    return false;
145  }
146
147  capitalize(str) {
148    if (str.length <= 0) {
149      Log.showError(TAG, 'The length of str should be greater than 0!');
150      return;
151    }
152    const first = str.substr(0, 1).toUpperCase();
153    const spare = str.substr(1, str.length);
154    return first + spare;
155  }
156
157  getChar(ch) {
158    const unicode = ch.charCodeAt(0);
159    // Determine whether it is within the range of Chinese character processing
160    if (unicode > 19968 && unicode < 40869) {
161      //To check if it is polyphonic, it is polyphonic rather than looking for the corresponding letter in the string strChineseFirstPY
162      if (!this.options.checkPolyphone) {
163        return this.char_dict.charAt(unicode - 19968);
164      }
165      return this.polyphone[unicode] ? this.polyphone[unicode] : this.char_dict.charAt(unicode - 19968);
166    } else {
167      // If it is not a kanji, return an atomic string
168      return ch;
169    }
170  }
171
172  getResult(chars) {
173    if (!this.options.checkPolyphone) {
174      return chars.join('');
175    }
176    let result = [''];
177    let i= 0;
178    let len = chars.length;
179    while (i < len) {
180      const str = chars[i];
181      const strlen = str.length;
182      if (strlen == 1) {
183        for (let j = 0; j < result.length; j++) {
184          result[j] += str;
185        }
186      } else {
187        const swap1 = result.slice(0);
188        result = [];
189        for (let j = 0; j < strlen; j++) {
190          const swap2 = swap1.slice(0);
191          for (let k = 0; k < swap2.length; k++) {
192            swap2[k] += str.charAt(j);
193          }
194          result = result.concat(swap2);
195        }
196      }
197      i++;
198    }
199    return result;
200  }
201}
202
203export default Pinyin;