1 // icu.h
2
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 //
15 // Copyright 2005-2010 Google, Inc.
16 // Author: roubert@google.com (Fredrik Roubert)
17
18 // Wrapper class for UErrorCode, with conversion operators for direct use in
19 // ICU C and C++ APIs.
20 //
21 // Features:
22 // - The constructor initializes the internal UErrorCode to U_ZERO_ERROR,
23 // removing one common source of errors.
24 // - Same use in C APIs taking a UErrorCode* (pointer) and C++ taking
25 // UErrorCode& (reference), via conversion operators.
26 // - Automatic checking for success when it goes out of scope. On failure,
27 // the destructor will FSTERROR() an error message.
28 //
29 // Most of ICU will handle errors gracefully and provide sensible fallbacks.
30 // Using IcuErrorCode, it is therefore possible to write very compact code
31 // that does sensible things on failure and provides logging for debugging.
32 //
33 // Example:
34 //
35 // IcuErrorCode icuerrorcode;
36 // return collator.compareUTF8(a, b, icuerrorcode) == UCOL_EQUAL;
37
38 #ifndef FST_LIB_ICU_H_
39 #define FST_LIB_ICU_H_
40
41 #include <unicode/errorcode.h>
42 #include <unicode/unistr.h>
43 #include <unicode/ustring.h>
44 #include <unicode/utf8.h>
45
46 class IcuErrorCode : public icu::ErrorCode {
47 public:
IcuErrorCode()48 IcuErrorCode() {}
~IcuErrorCode()49 virtual ~IcuErrorCode() { if (isFailure()) handleFailure(); }
50
51 // Redefine 'errorName()' in order to be compatible with ICU version 4.2
errorName()52 const char* errorName() const {
53 return u_errorName(errorCode);
54 }
55
56 protected:
handleFailure()57 virtual void handleFailure() const {
58 FSTERROR() << errorName();
59 }
60
61 private:
62 DISALLOW_COPY_AND_ASSIGN(IcuErrorCode);
63 };
64
65 namespace fst {
66
67 template <class Label>
UTF8StringToLabels(const string & str,vector<Label> * labels)68 bool UTF8StringToLabels(const string &str, vector<Label> *labels) {
69 const char *c_str = str.c_str();
70 int32_t length = str.size();
71 UChar32 c;
72 for (int32_t i = 0; i < length; /* no update */) {
73 U8_NEXT(c_str, i, length, c);
74 if (c < 0) {
75 LOG(ERROR) << "UTF8StringToLabels: Invalid character found: " << c;
76 return false;
77 }
78 labels->push_back(c);
79 }
80 return true;
81 }
82
83 template <class Label>
LabelsToUTF8String(const vector<Label> & labels,string * str)84 bool LabelsToUTF8String(const vector<Label> &labels, string *str) {
85 icu::UnicodeString u_str;
86 char c_str[5];
87 for (size_t i = 0; i < labels.size(); ++i) {
88 u_str.setTo(labels[i]);
89 IcuErrorCode error;
90 u_strToUTF8(c_str, 5, NULL, u_str.getTerminatedBuffer(), -1, error);
91 if (error.isFailure()) {
92 LOG(ERROR) << "LabelsToUTF8String: Bad encoding: "
93 << error.errorName();
94 return false;
95 }
96 *str += c_str;
97 }
98 return true;
99 }
100
101 } // namespace fst
102
103 #endif // FST_LIB_ICU_H_
104