• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 //
2 //  Copyright (c) 2009-2011 Artyom Beilis (Tonkikh)
3 //
4 //  Distributed under the Boost Software License, Version 1.0. (See
5 //  accompanying file LICENSE_1_0.txt or copy at
6 //  http://www.boost.org/LICENSE_1_0.txt)
7 //
8 #define BOOST_LOCALE_SOURCE
9 #include <boost/locale/encoding.hpp>
10 #include <boost/shared_ptr.hpp>
11 #include <boost/locale/hold_ptr.hpp>
12 #include "../encoding/conv.hpp"
13 #include <boost/locale/util.hpp>
14 #include "all_generator.hpp"
15 
16 #include <errno.h>
17 #include <algorithm>
18 #include <stdexcept>
19 #include <vector>
20 #include "codecvt.hpp"
21 
22 #ifdef BOOST_LOCALE_WITH_ICONV
23 #include "../util/iconv.hpp"
24 #endif
25 
26 namespace boost {
27 namespace locale {
28 namespace impl_posix {
29 
30 #ifdef BOOST_LOCALE_WITH_ICONV
31     class mb2_iconv_converter : public util::base_converter {
32     public:
33 
mb2_iconv_converter(std::string const & encoding)34         mb2_iconv_converter(std::string const &encoding) :
35             encoding_(encoding),
36             to_utf_((iconv_t)(-1)),
37             from_utf_((iconv_t)(-1))
38         {
39             iconv_t d = (iconv_t)(-1);
40             std::vector<uint32_t> first_byte_table;
41             try {
42                 d = iconv_open(utf32_encoding(),encoding.c_str());
43                 if(d == (iconv_t)(-1)) {
44                     throw std::runtime_error("Unsupported encoding" + encoding);
45                 }
46                 for(unsigned c=0;c<256;c++) {
47                     char ibuf[2] = { char(c) , 0 };
48                     char *in = ibuf;
49                     size_t insize =2;
50                     uint32_t obuf[2] = {illegal,illegal};
51                     char *out = reinterpret_cast<char *>(obuf);
52                     size_t outsize = 8;
53                     // Basic sigle codepoint conversion
54                     call_iconv(d,&in,&insize,&out,&outsize);
55                     if(insize == 0 && outsize == 0 && obuf[1] == 0) {
56                         first_byte_table.push_back(obuf[0]);
57                         continue;
58                     }
59 
60                     // Test if this is illegal first byte or incomplete
61                     in = ibuf;
62                     insize = 1;
63                     out = reinterpret_cast<char *>(obuf);
64                     outsize = 8;
65                     call_iconv(d,0,0,0,0);
66                     size_t res = call_iconv(d,&in,&insize,&out,&outsize);
67 
68                     // Now if this single byte starts a sequence we add incomplete
69                     // to know to ask that we need two bytes, othewise it may only be
70                     // illegal
71 
72                     uint32_t point;
73                     if(res == (size_t)(-1) && errno == EINVAL)
74                         point = incomplete;
75                     else
76                         point = illegal;
77                     first_byte_table.push_back(point);
78 
79                 }
80             }
81             catch(...) {
82                 if(d!=(iconv_t)(-1))
83                     iconv_close(d);
84                 throw;
85             }
86             iconv_close(d);
87             first_byte_table_.reset(new std::vector<uint32_t>());
88             first_byte_table_->swap(first_byte_table);
89         }
90 
mb2_iconv_converter(mb2_iconv_converter const & other)91         mb2_iconv_converter(mb2_iconv_converter const &other) :
92             first_byte_table_(other.first_byte_table_),
93             encoding_(other.encoding_),
94             to_utf_((iconv_t)(-1)),
95             from_utf_((iconv_t)(-1))
96         {
97         }
98 
~mb2_iconv_converter()99         virtual ~mb2_iconv_converter()
100         {
101             if(to_utf_ != (iconv_t)(-1))
102                 iconv_close(to_utf_);
103             if(from_utf_ != (iconv_t)(-1))
104                 iconv_close(from_utf_);
105 
106         }
107 
is_thread_safe() const108         virtual bool is_thread_safe() const
109         {
110             return false;
111         }
112 
clone() const113         virtual mb2_iconv_converter *clone() const
114         {
115             return new mb2_iconv_converter(*this);
116         }
117 
to_unicode(char const * & begin,char const * end)118         uint32_t to_unicode(char const *&begin,char const *end)
119         {
120             if(begin == end)
121                 return incomplete;
122 
123             unsigned char seq0 = *begin;
124             uint32_t index = (*first_byte_table_)[seq0];
125             if(index == illegal)
126                 return illegal;
127             if(index != incomplete) {
128                 begin++;
129                 return index;
130             }
131             else if(begin+1 == end)
132                 return incomplete;
133 
134             open(to_utf_,utf32_encoding(),encoding_.c_str());
135 
136             // maybe illegal or may be double byte
137 
138             char inseq[3] = { static_cast<char>(seq0) , begin[1], 0};
139             char *inbuf = inseq;
140             size_t insize = 3;
141             uint32_t result[2] = { illegal, illegal };
142             size_t outsize = 8;
143             char *outbuf = reinterpret_cast<char*>(result);
144             call_iconv(to_utf_,&inbuf,&insize,&outbuf,&outsize);
145             if(outsize == 0 && insize == 0 && result[1]==0 ) {
146                 begin+=2;
147                 return result[0];
148             }
149             return illegal;
150         }
151 
from_unicode(uint32_t cp,char * begin,char const * end)152         uint32_t from_unicode(uint32_t cp,char *begin,char const *end)
153         {
154             if(cp == 0) {
155                 if(begin!=end) {
156                     *begin = 0;
157                     return 1;
158                 }
159                 else {
160                     return incomplete;
161                 }
162             }
163 
164             open(from_utf_,encoding_.c_str(),utf32_encoding());
165 
166             uint32_t codepoints[2] = {cp,0};
167             char *inbuf = reinterpret_cast<char *>(codepoints);
168             size_t insize = sizeof(codepoints);
169             char outseq[3] = {0};
170             char *outbuf = outseq;
171             size_t outsize = 3;
172 
173             call_iconv(from_utf_,&inbuf,&insize,&outbuf,&outsize);
174 
175             if(insize != 0 || outsize > 1)
176                 return illegal;
177             size_t len = 2 - outsize ;
178             size_t reminder = end - begin;
179             if(reminder < len)
180                 return incomplete;
181             for(unsigned i=0;i<len;i++)
182                 *begin++ = outseq[i];
183             return len;
184         }
185 
open(iconv_t & d,char const * to,char const * from)186         void open(iconv_t &d,char const *to,char const *from)
187         {
188             if(d!=(iconv_t)(-1))
189                 return;
190             d=iconv_open(to,from);
191         }
192 
utf32_encoding()193         static char const *utf32_encoding()
194         {
195             union { char one; uint32_t value; } test;
196             test.value = 1;
197             if(test.one == 1)
198                 return "UTF-32LE";
199             else
200                 return "UTF-32BE";
201         }
202 
max_len() const203         virtual int max_len() const
204         {
205             return 2;
206         }
207 
208     private:
209         boost::shared_ptr<std::vector<uint32_t> > first_byte_table_;
210         std::string encoding_;
211         iconv_t to_utf_;
212         iconv_t from_utf_;
213     };
214 
create_iconv_converter(std::string const & encoding)215     util::base_converter *create_iconv_converter(std::string const &encoding)
216     {
217         hold_ptr<util::base_converter> cvt;
218         try {
219             cvt.reset(new mb2_iconv_converter(encoding));
220         }
221         catch(std::exception const &e) {
222             // Nothing to do, just retrun empty cvt
223         }
224         return cvt.release();
225     }
226 
227 #else // no iconv
228     util::base_converter *create_iconv_converter(std::string const &/*encoding*/)
229     {
230         return 0;
231     }
232 #endif
233 
create_codecvt(std::locale const & in,std::string const & encoding,character_facet_type type)234     std::locale create_codecvt(std::locale const &in,std::string const &encoding,character_facet_type type)
235     {
236         if(conv::impl::normalize_encoding(encoding.c_str())=="utf8")
237             return util::create_utf8_codecvt(in,type);
238 
239         try {
240             return util::create_simple_codecvt(in,encoding,type);
241         }
242         catch(conv::invalid_charset_error const &) {
243             util::base_converter *cvt = create_iconv_converter(encoding);
244             return util::create_codecvt_from_pointer(in,cvt,type);
245         }
246     }
247 
248 } // impl_posix
249 } // locale
250 } // boost
251 
252 // vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4
253