1 // Copyright 2013 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "ui/base/ime/chromeos/character_composer.h"
6
7 #include <algorithm>
8 #include <iterator>
9
10 #include "base/strings/utf_string_conversions.h"
11 #include "base/third_party/icu/icu_utf.h"
12 // Note for Gtk removal: gdkkeysyms.h only contains a set of
13 // '#define GDK_KeyName 0xNNNN' macros and does not #include any Gtk headers.
14 #include "third_party/gtk+/gdk/gdkkeysyms.h"
15
16 #include "ui/base/glib/glib_integers.h"
17 #include "ui/events/event.h"
18 #include "ui/events/keycodes/keyboard_codes.h"
19
20 // Note for Gtk removal: gtkimcontextsimpleseqs.h does not #include any Gtk
21 // headers and only contains one big guint16 array |gtk_compose_seqs_compact|
22 // which defines the main compose table. The table has internal linkage.
23 // The order of header inclusion is out of order because
24 // gtkimcontextsimpleseqs.h depends on guint16, which is defined in
25 // "ui/base/glib/glib_integers.h".
26 #include "third_party/gtk+/gtk/gtkimcontextsimpleseqs.h"
27
28 namespace {
29
30 // A black list for not composing dead keys. Once the key combination is listed
31 // below, the dead key won't work even when this is listed in
32 // gtkimcontextsimpleseqs.h. This only supports two keyevent sequenses.
33 // TODO(nona): Remove this hack.
34 const struct BlackListedDeadKey {
35 uint32 first_key; // target first key event.
36 uint32 second_key; // target second key event.
37 uint32 output_char; // the character to be inserted if the filter is matched.
38 bool consume; // true if the original key event will be consumed.
39 } kBlackListedDeadKeys[] = {
40 { GDK_KEY_dead_acute, GDK_KEY_m, GDK_KEY_apostrophe, false },
41 { GDK_KEY_dead_acute, GDK_KEY_s, GDK_KEY_apostrophe, false },
42 { GDK_KEY_dead_acute, GDK_KEY_t, GDK_KEY_apostrophe, false },
43 { GDK_KEY_dead_acute, GDK_KEY_v, GDK_KEY_apostrophe, false },
44 { GDK_KEY_dead_acute, GDK_KEY_dead_acute, GDK_KEY_apostrophe, true },
45 };
46
47 typedef std::vector<unsigned int> ComposeBufferType;
48
49 // An iterator class to apply std::lower_bound for composition table.
50 class SequenceIterator
51 : public std::iterator<std::random_access_iterator_tag, const uint16*> {
52 public:
SequenceIterator()53 SequenceIterator() : ptr_(NULL), stride_(0) {}
SequenceIterator(const uint16 * ptr,int stride)54 SequenceIterator(const uint16* ptr, int stride)
55 : ptr_(ptr), stride_(stride) {}
56
ptr() const57 const uint16* ptr() const {return ptr_;}
stride() const58 int stride() const {return stride_;}
59
operator ++()60 SequenceIterator& operator++() {
61 ptr_ += stride_;
62 return *this;
63 }
operator +=(int n)64 SequenceIterator& operator+=(int n) {
65 ptr_ += stride_*n;
66 return *this;
67 }
68
operator *() const69 const uint16* operator*() const {return ptr_;}
70
71 private:
72 const uint16* ptr_;
73 int stride_;
74 };
75
operator +(const SequenceIterator & l,int r)76 inline SequenceIterator operator+(const SequenceIterator& l, int r) {
77 return SequenceIterator(l) += r;
78 }
79
operator -(const SequenceIterator & l,const SequenceIterator & r)80 inline int operator-(const SequenceIterator& l, const SequenceIterator& r) {
81 const int d = l.ptr() - r.ptr();
82 DCHECK(l.stride() == r.stride() && l.stride() > 0 && d%l.stride() == 0);
83 return d/l.stride();
84 }
85
operator ==(const SequenceIterator & l,const SequenceIterator & r)86 inline bool operator==(const SequenceIterator& l, const SequenceIterator& r) {
87 DCHECK(l.stride() == r.stride());
88 return l.ptr() == r.ptr();
89 }
90
operator !=(const SequenceIterator & l,const SequenceIterator & r)91 inline bool operator!=(const SequenceIterator& l, const SequenceIterator& r) {
92 return !(l == r);
93 }
94
95 // A function to compare key value.
CompareSequenceValue(unsigned int l,unsigned int r)96 inline int CompareSequenceValue(unsigned int l, unsigned int r) {
97 return (l > r) ? 1 : ((l < r) ? -1 : 0);
98 }
99
100 // A template to make |CompareFunc| work like operator<.
101 // |CompareFunc| is required to implement a member function,
102 // int operator()(const ComposeBufferType& l, const uint16* r) const.
103 template<typename CompareFunc>
104 struct ComparatorAdoptor {
operator ()__anon94d2f1470111::ComparatorAdoptor105 bool operator()(const ComposeBufferType& l, const uint16* r) const {
106 return CompareFunc()(l, r) == -1;
107 }
operator ()__anon94d2f1470111::ComparatorAdoptor108 bool operator()(const uint16* l, const ComposeBufferType& r) const {
109 return CompareFunc()(r, l) == 1;
110 }
111 };
112
113 class ComposeChecker {
114 public:
115 // This class does not take the ownership of |data|, |data| should be alive
116 // for the lifetime of the object.
117 // |data| is a pointer to the head of an array of
118 // length (|max_sequence_length| + 2)*|n_sequences|.
119 // Every (|max_sequence_length| + 2) elements of |data| represent an entry.
120 // First |max_sequence_length| elements of an entry is the sequecne which
121 // composes the character represented by the last two elements of the entry.
122 ComposeChecker(const uint16* data, int max_sequence_length, int n_sequences);
123 bool CheckSequence(const ComposeBufferType& sequence,
124 uint32* composed_character) const;
125
126 private:
127 struct CompareSequence {
128 int operator()(const ComposeBufferType& l, const uint16* r) const;
129 };
130
131 // This class does not take the ownership of |data_|,
132 // the dtor does not delete |data_|.
133 const uint16* data_;
134 int max_sequence_length_;
135 int n_sequences_;
136 int row_stride_;
137
138 DISALLOW_COPY_AND_ASSIGN(ComposeChecker);
139 };
140
ComposeChecker(const uint16 * data,int max_sequence_length,int n_sequences)141 ComposeChecker::ComposeChecker(const uint16* data,
142 int max_sequence_length,
143 int n_sequences)
144 : data_(data),
145 max_sequence_length_(max_sequence_length),
146 n_sequences_(n_sequences),
147 row_stride_(max_sequence_length + 2) {
148 }
149
CheckSequence(const ComposeBufferType & sequence,uint32 * composed_character) const150 bool ComposeChecker::CheckSequence(const ComposeBufferType& sequence,
151 uint32* composed_character) const {
152 const int sequence_length = sequence.size();
153 if (sequence_length > max_sequence_length_)
154 return false;
155 // Find sequence in the table.
156 const SequenceIterator begin(data_, row_stride_);
157 const SequenceIterator end = begin + n_sequences_;
158 const SequenceIterator found = std::lower_bound(
159 begin, end, sequence, ComparatorAdoptor<CompareSequence>());
160 if (found == end || CompareSequence()(sequence, *found) != 0)
161 return false;
162
163 if (sequence_length == max_sequence_length_ ||
164 (*found)[sequence_length] == 0) {
165 // |found| is not partially matching. It's fully matching.
166 if (found + 1 == end ||
167 CompareSequence()(sequence, *(found + 1)) != 0) {
168 // There is no composition longer than |found| which matches to
169 // |sequence|.
170 const uint32 value = ((*found)[max_sequence_length_] << 16) |
171 (*found)[max_sequence_length_ + 1];
172 *composed_character = value;
173 }
174 }
175 return true;
176 }
177
operator ()(const ComposeBufferType & l,const uint16 * r) const178 int ComposeChecker::CompareSequence::operator()(const ComposeBufferType& l,
179 const uint16* r) const {
180 for(size_t i = 0; i < l.size(); ++i) {
181 const int compare_result = CompareSequenceValue(l[i], r[i]);
182 if(compare_result)
183 return compare_result;
184 }
185 return 0;
186 }
187
188
189 class ComposeCheckerWithCompactTable {
190 public:
191 // This class does not take the ownership of |data|, |data| should be alive
192 // for the lifetime of the object.
193 // First |index_size|*|index_stride| elements of |data| are an index table.
194 // Every |index_stride| elements of an index table are an index entry.
195 // If you are checking with a sequence of length N beginning with character C,
196 // you have to find an index entry whose first element is C, then get the N-th
197 // element of the index entry as the index.
198 // The index is pointing the element of |data| where the composition table for
199 // sequences of length N beginning with C is placed.
200
201 ComposeCheckerWithCompactTable(const uint16* data,
202 int max_sequence_length,
203 int index_size,
204 int index_stride);
205 bool CheckSequence(const ComposeBufferType& sequence,
206 uint32* composed_character) const;
207
208 private:
209 struct CompareSequenceFront {
210 int operator()(const ComposeBufferType& l, const uint16* r) const;
211 };
212 struct CompareSequenceSkipFront {
213 int operator()(const ComposeBufferType& l, const uint16* r) const;
214 };
215
216 // This class does not take the ownership of |data_|,
217 // the dtor does not delete |data_|.
218 const uint16* data_;
219 int max_sequence_length_;
220 int index_size_;
221 int index_stride_;
222 };
223
ComposeCheckerWithCompactTable(const uint16 * data,int max_sequence_length,int index_size,int index_stride)224 ComposeCheckerWithCompactTable::ComposeCheckerWithCompactTable(
225 const uint16* data,
226 int max_sequence_length,
227 int index_size,
228 int index_stride)
229 : data_(data),
230 max_sequence_length_(max_sequence_length),
231 index_size_(index_size),
232 index_stride_(index_stride) {
233 }
234
CheckSequence(const ComposeBufferType & sequence,uint32 * composed_character) const235 bool ComposeCheckerWithCompactTable::CheckSequence(
236 const ComposeBufferType& sequence,
237 uint32* composed_character) const {
238 const int compose_length = sequence.size();
239 if (compose_length > max_sequence_length_)
240 return false;
241 // Find corresponding index for the first keypress.
242 const SequenceIterator index_begin(data_, index_stride_);
243 const SequenceIterator index_end = index_begin + index_size_;
244 const SequenceIterator index =
245 std::lower_bound(index_begin, index_end, sequence,
246 ComparatorAdoptor<CompareSequenceFront>());
247 if (index == index_end || CompareSequenceFront()(sequence, *index) != 0)
248 return false;
249 if (compose_length == 1)
250 return true;
251 // Check for composition sequences.
252 for (int length = compose_length - 1; length < max_sequence_length_;
253 ++length) {
254 const uint16* table = data_ + (*index)[length];
255 const uint16* table_next = data_ + (*index)[length + 1];
256 if (table_next > table) {
257 // There are composition sequences for this |length|.
258 const int row_stride = length + 1;
259 const int n_sequences = (table_next - table)/row_stride;
260 const SequenceIterator table_begin(table, row_stride);
261 const SequenceIterator table_end = table_begin + n_sequences;
262 const SequenceIterator found =
263 std::lower_bound(table_begin, table_end, sequence,
264 ComparatorAdoptor<CompareSequenceSkipFront>());
265 if (found != table_end &&
266 CompareSequenceSkipFront()(sequence, *found) == 0) {
267 if (length == compose_length - 1) // Exact match.
268 *composed_character = (*found)[length];
269 return true;
270 }
271 }
272 }
273 return false;
274 }
275
operator ()(const ComposeBufferType & l,const uint16 * r) const276 int ComposeCheckerWithCompactTable::CompareSequenceFront::operator()(
277 const ComposeBufferType& l, const uint16* r) const {
278 return CompareSequenceValue(l[0], r[0]);
279 }
280
operator ()(const ComposeBufferType & l,const uint16 * r) const281 int ComposeCheckerWithCompactTable::CompareSequenceSkipFront::operator()(
282 const ComposeBufferType& l, const uint16* r) const {
283 for(size_t i = 1; i < l.size(); ++i) {
284 const int compare_result = CompareSequenceValue(l[i], r[i - 1]);
285 if(compare_result)
286 return compare_result;
287 }
288 return 0;
289 }
290
291
292 // Additional table.
293
294 // The difference between this and the default input method is the handling
295 // of C+acute - this method produces C WITH CEDILLA rather than C WITH ACUTE.
296 // For languages that use CCedilla and not acute, this is the preferred mapping,
297 // and is particularly important for pt_BR, where the us-intl keyboard is
298 // used extensively.
299
300 const uint16 cedilla_compose_seqs[] = {
301 // LATIN_CAPITAL_LETTER_C_WITH_CEDILLA
302 GDK_KEY_dead_acute, GDK_KEY_C, 0, 0, 0, 0x00C7,
303 // LATIN_SMALL_LETTER_C_WITH_CEDILLA
304 GDK_KEY_dead_acute, GDK_KEY_c, 0, 0, 0, 0x00E7,
305 // LATIN_CAPITAL_LETTER_C_WITH_CEDILLA
306 GDK_KEY_Multi_key, GDK_KEY_apostrophe, GDK_KEY_C, 0, 0, 0x00C7,
307 // LATIN_SMALL_LETTER_C_WITH_CEDILLA
308 GDK_KEY_Multi_key, GDK_KEY_apostrophe, GDK_KEY_c, 0, 0, 0x00E7,
309 // LATIN_CAPITAL_LETTER_C_WITH_CEDILLA
310 GDK_KEY_Multi_key, GDK_KEY_C, GDK_KEY_apostrophe, 0, 0, 0x00C7,
311 // LATIN_SMALL_LETTER_C_WITH_CEDILLA
312 GDK_KEY_Multi_key, GDK_KEY_c, GDK_KEY_apostrophe, 0, 0, 0x00E7,
313 };
314
KeypressShouldBeIgnored(unsigned int keyval)315 bool KeypressShouldBeIgnored(unsigned int keyval) {
316 switch(keyval) {
317 case GDK_KEY_Shift_L:
318 case GDK_KEY_Shift_R:
319 case GDK_KEY_Control_L:
320 case GDK_KEY_Control_R:
321 case GDK_KEY_Caps_Lock:
322 case GDK_KEY_Shift_Lock:
323 case GDK_KEY_Meta_L:
324 case GDK_KEY_Meta_R:
325 case GDK_KEY_Alt_L:
326 case GDK_KEY_Alt_R:
327 case GDK_KEY_Super_L:
328 case GDK_KEY_Super_R:
329 case GDK_KEY_Hyper_L:
330 case GDK_KEY_Hyper_R:
331 case GDK_KEY_Mode_switch:
332 case GDK_KEY_ISO_Level3_Shift:
333 return true;
334 default:
335 return false;
336 }
337 }
338
CheckCharacterComposeTable(const ComposeBufferType & sequence,uint32 * composed_character)339 bool CheckCharacterComposeTable(const ComposeBufferType& sequence,
340 uint32* composed_character) {
341 // Check cedilla compose table.
342 const ComposeChecker kCedillaComposeChecker(
343 cedilla_compose_seqs, 4, arraysize(cedilla_compose_seqs)/(4 + 2));
344 if (kCedillaComposeChecker.CheckSequence(sequence, composed_character))
345 return true;
346
347 // Check main compose table.
348 const ComposeCheckerWithCompactTable kMainComposeChecker(
349 gtk_compose_seqs_compact, 5, 24, 6);
350 if (kMainComposeChecker.CheckSequence(sequence, composed_character))
351 return true;
352
353 return false;
354 }
355
356 // Converts |character| to UTF16 string.
357 // Returns false when |character| is not a valid character.
UTF32CharacterToUTF16(uint32 character,base::string16 * output)358 bool UTF32CharacterToUTF16(uint32 character, base::string16* output) {
359 output->clear();
360 // Reject invalid character. (e.g. codepoint greater than 0x10ffff)
361 if (!CBU_IS_UNICODE_CHAR(character))
362 return false;
363 if (character) {
364 output->resize(CBU16_LENGTH(character));
365 size_t i = 0;
366 CBU16_APPEND_UNSAFE(&(*output)[0], i, character);
367 }
368 return true;
369 }
370
371 // Returns an hexadecimal digit integer (0 to 15) corresponding to |keyval|.
372 // -1 is returned when |keyval| cannot be a hexadecimal digit.
KeyvalToHexDigit(unsigned int keyval)373 int KeyvalToHexDigit(unsigned int keyval) {
374 if (GDK_KEY_0 <= keyval && keyval <= GDK_KEY_9)
375 return keyval - GDK_KEY_0;
376 if (GDK_KEY_a <= keyval && keyval <= GDK_KEY_f)
377 return keyval - GDK_KEY_a + 10;
378 if (GDK_KEY_A <= keyval && keyval <= GDK_KEY_F)
379 return keyval - GDK_KEY_A + 10;
380 return -1; // |keyval| cannot be a hexadecimal digit.
381 }
382
383 // Returns an hexadecimal digit integer (0 to 15) corresponding to |keycode|.
384 // -1 is returned when |keycode| cannot be a hexadecimal digit.
KeycodeToHexDigit(unsigned int keycode)385 int KeycodeToHexDigit(unsigned int keycode) {
386 if (ui::VKEY_0 <= keycode && keycode <= ui::VKEY_9)
387 return keycode - ui::VKEY_0;
388 if (ui::VKEY_A <= keycode && keycode <= ui::VKEY_F)
389 return keycode - ui::VKEY_A + 10;
390 return -1; // |keycode| cannot be a hexadecimal digit.
391 }
392
393 } // namespace
394
395 namespace ui {
396
CharacterComposer()397 CharacterComposer::CharacterComposer() : composition_mode_(KEY_SEQUENCE_MODE) {}
398
~CharacterComposer()399 CharacterComposer::~CharacterComposer() {}
400
Reset()401 void CharacterComposer::Reset() {
402 compose_buffer_.clear();
403 composed_character_.clear();
404 preedit_string_.clear();
405 composition_mode_ = KEY_SEQUENCE_MODE;
406 }
407
FilterKeyPress(const ui::KeyEvent & event)408 bool CharacterComposer::FilterKeyPress(const ui::KeyEvent& event) {
409 uint32 keyval = event.platform_keycode();
410 if (!keyval ||
411 (event.type() != ET_KEY_PRESSED && event.type() != ET_KEY_RELEASED))
412 return false;
413
414 return FilterKeyPressInternal(keyval, event.key_code(), event.flags());
415 }
416
417
FilterKeyPressInternal(unsigned int keyval,unsigned int keycode,int flags)418 bool CharacterComposer::FilterKeyPressInternal(unsigned int keyval,
419 unsigned int keycode,
420 int flags) {
421 composed_character_.clear();
422 preedit_string_.clear();
423
424 // We don't care about modifier key presses.
425 if(KeypressShouldBeIgnored(keyval))
426 return false;
427
428 // When the user presses Ctrl+Shift+U, maybe switch to HEX_MODE.
429 // We don't care about other modifiers like Alt. When CapsLock is down, we
430 // do nothing because what we receive is Ctrl+Shift+u (not U).
431 if (keyval == GDK_KEY_U && (flags & EF_SHIFT_DOWN) &&
432 (flags & EF_CONTROL_DOWN)) {
433 if (composition_mode_ == KEY_SEQUENCE_MODE && compose_buffer_.empty()) {
434 // There is no ongoing composition. Let's switch to HEX_MODE.
435 composition_mode_ = HEX_MODE;
436 UpdatePreeditStringHexMode();
437 return true;
438 }
439 }
440
441 // Filter key press in an appropriate manner.
442 switch (composition_mode_) {
443 case KEY_SEQUENCE_MODE:
444 return FilterKeyPressSequenceMode(keyval, flags);
445 case HEX_MODE:
446 return FilterKeyPressHexMode(keyval, keycode, flags);
447 default:
448 NOTREACHED();
449 return false;
450 }
451 }
452
FilterKeyPressSequenceMode(unsigned int keyval,int flags)453 bool CharacterComposer::FilterKeyPressSequenceMode(unsigned int keyval,
454 int flags) {
455 DCHECK(composition_mode_ == KEY_SEQUENCE_MODE);
456 compose_buffer_.push_back(keyval);
457
458 if (compose_buffer_.size() == 2U) {
459 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(kBlackListedDeadKeys); ++i) {
460 if (compose_buffer_[0] == kBlackListedDeadKeys[i].first_key &&
461 compose_buffer_[1] == kBlackListedDeadKeys[i].second_key ) {
462 Reset();
463 composed_character_.push_back(kBlackListedDeadKeys[i].output_char);
464 return kBlackListedDeadKeys[i].consume;
465 }
466 }
467 }
468
469 // Check compose table.
470 uint32 composed_character_utf32 = 0;
471 if (CheckCharacterComposeTable(compose_buffer_, &composed_character_utf32)) {
472 // Key press is recognized as a part of composition.
473 if (composed_character_utf32 != 0) {
474 // We get a composed character.
475 compose_buffer_.clear();
476 UTF32CharacterToUTF16(composed_character_utf32, &composed_character_);
477 }
478 return true;
479 }
480 // Key press is not a part of composition.
481 compose_buffer_.pop_back(); // Remove the keypress added this time.
482 if (!compose_buffer_.empty()) {
483 compose_buffer_.clear();
484 return true;
485 }
486 return false;
487 }
488
FilterKeyPressHexMode(unsigned int keyval,unsigned int keycode,int flags)489 bool CharacterComposer::FilterKeyPressHexMode(unsigned int keyval,
490 unsigned int keycode,
491 int flags) {
492 DCHECK(composition_mode_ == HEX_MODE);
493 const size_t kMaxHexSequenceLength = 8;
494 int hex_digit = KeyvalToHexDigit(keyval);
495 if (hex_digit < 0) {
496 // With 101 keyboard, control + shift + 3 produces '#', but a user may
497 // have intended to type '3'. So, if a hexadecimal character was not found,
498 // suppose a user is holding shift key (and possibly control key, too) and
499 // try a character with modifier keys removed.
500 hex_digit = KeycodeToHexDigit(keycode);
501 }
502
503 if (keyval == GDK_KEY_Escape) {
504 // Cancel composition when ESC is pressed.
505 Reset();
506 } else if (keyval == GDK_KEY_Return || keyval == GDK_KEY_KP_Enter ||
507 keyval == GDK_KEY_ISO_Enter ||
508 keyval == GDK_KEY_space || keyval == GDK_KEY_KP_Space) {
509 // Commit the composed character when Enter or space is pressed.
510 CommitHex();
511 } else if (keyval == GDK_KEY_BackSpace) {
512 // Pop back the buffer when Backspace is pressed.
513 if (!compose_buffer_.empty()) {
514 compose_buffer_.pop_back();
515 } else {
516 // If there is no character in |compose_buffer_|, cancel composition.
517 Reset();
518 }
519 } else if (hex_digit >= 0 &&
520 compose_buffer_.size() < kMaxHexSequenceLength) {
521 // Add the key to the buffer if it is a hex digit.
522 compose_buffer_.push_back(hex_digit);
523 }
524
525 UpdatePreeditStringHexMode();
526
527 return true;
528 }
529
CommitHex()530 void CharacterComposer::CommitHex() {
531 DCHECK(composition_mode_ == HEX_MODE);
532 uint32 composed_character_utf32 = 0;
533 for (size_t i = 0; i != compose_buffer_.size(); ++i) {
534 const uint32 digit = compose_buffer_[i];
535 DCHECK(0 <= digit && digit < 16);
536 composed_character_utf32 <<= 4;
537 composed_character_utf32 |= digit;
538 }
539 Reset();
540 UTF32CharacterToUTF16(composed_character_utf32, &composed_character_);
541 }
542
UpdatePreeditStringHexMode()543 void CharacterComposer::UpdatePreeditStringHexMode() {
544 if (composition_mode_ != HEX_MODE) {
545 preedit_string_.clear();
546 return;
547 }
548 std::string preedit_string_ascii("u");
549 for (size_t i = 0; i != compose_buffer_.size(); ++i) {
550 const int digit = compose_buffer_[i];
551 DCHECK(0 <= digit && digit < 16);
552 preedit_string_ascii += digit <= 9 ? ('0' + digit) : ('a' + (digit - 10));
553 }
554 preedit_string_ = base::ASCIIToUTF16(preedit_string_ascii);
555 }
556
557 } // namespace ui
558