• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // icu.h
2 
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //     http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 //
15 // Copyright 2005-2010 Google, Inc.
16 // Author: roubert@google.com (Fredrik Roubert)
17 
18 // Wrapper class for UErrorCode, with conversion operators for direct use in
19 // ICU C and C++ APIs.
20 //
21 // Features:
22 // - The constructor initializes the internal UErrorCode to U_ZERO_ERROR,
23 //   removing one common source of errors.
24 // - Same use in C APIs taking a UErrorCode* (pointer) and C++ taking
25 //   UErrorCode& (reference), via conversion operators.
26 // - Automatic checking for success when it goes out of scope. On failure,
27 //   the destructor will FSTERROR() an error message.
28 //
29 // Most of ICU will handle errors gracefully and provide sensible fallbacks.
30 // Using IcuErrorCode, it is therefore possible to write very compact code
31 // that does sensible things on failure and provides logging for debugging.
32 //
33 // Example:
34 //
35 // IcuErrorCode icuerrorcode;
36 // return collator.compareUTF8(a, b, icuerrorcode) == UCOL_EQUAL;
37 
38 #ifndef FST_LIB_ICU_H_
39 #define FST_LIB_ICU_H_
40 
41 #include <unicode/errorcode.h>
42 #include <unicode/unistr.h>
43 #include <unicode/ustring.h>
44 #include <unicode/utf8.h>
45 
46 class IcuErrorCode : public icu::ErrorCode {
47  public:
IcuErrorCode()48   IcuErrorCode() {}
~IcuErrorCode()49   virtual ~IcuErrorCode() { if (isFailure()) handleFailure(); }
50 
51   // Redefine 'errorName()' in order to be compatible with ICU version 4.2
errorName()52   const char* errorName() const {
53     return u_errorName(errorCode);
54   }
55 
56  protected:
handleFailure()57   virtual void handleFailure() const {
58     FSTERROR() << errorName();
59 }
60 
61  private:
62   DISALLOW_COPY_AND_ASSIGN(IcuErrorCode);
63 };
64 
65 namespace fst {
66 
67 template <class Label>
UTF8StringToLabels(const string & str,vector<Label> * labels)68 bool UTF8StringToLabels(const string &str, vector<Label> *labels) {
69   const char *c_str = str.c_str();
70   int32_t length = str.size();
71   UChar32 c;
72   for (int32_t i = 0; i < length; /* no update */) {
73     U8_NEXT(c_str, i, length, c);
74     if (c < 0) {
75       LOG(ERROR) << "UTF8StringToLabels: Invalid character found: " << c;
76       return false;
77     }
78     labels->push_back(c);
79   }
80   return true;
81 }
82 
83 template <class Label>
LabelsToUTF8String(const vector<Label> & labels,string * str)84 bool LabelsToUTF8String(const vector<Label> &labels, string *str) {
85   icu::UnicodeString u_str;
86   char c_str[5];
87   for (size_t i = 0; i < labels.size(); ++i) {
88     u_str.setTo(labels[i]);
89     IcuErrorCode error;
90     u_strToUTF8(c_str, 5, NULL, u_str.getTerminatedBuffer(), -1, error);
91     if (error.isFailure()) {
92       LOG(ERROR) << "LabelsToUTF8String: Bad encoding: "
93                  << error.errorName();
94       return false;
95     }
96     *str += c_str;
97   }
98   return true;
99 }
100 
101 }  // namespace fst
102 
103 #endif  // FST_LIB_ICU_H_
104