1 /* Output stream that converts the output to another encoding.
2 Copyright (C) 2006-2007, 2010, 2019 Free Software Foundation, Inc.
3 Written by Bruno Haible <bruno@clisp.org>, 2006.
4
5 This program is free software: you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 3 of the License, or
8 (at your option) any later version.
9
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
14
15 You should have received a copy of the GNU General Public License
16 along with this program. If not, see <https://www.gnu.org/licenses/>. */
17
18 #include <config.h>
19
20 /* Specification. */
21 #include "iconv-ostream.h"
22
23 #if HAVE_ICONV
24
25 #include <errno.h>
26 #include <iconv.h>
27 #include <stdlib.h>
28 #include <string.h>
29
30 #include "c-strcase.h"
31 #include "error.h"
32 #include "xalloc.h"
33 #include "gettext.h"
34
35 #define _(str) gettext (str)
36
37 #endif /* HAVE_ICONV */
38
39 struct iconv_ostream : struct ostream
40 {
41 fields:
42 #if HAVE_ICONV
43 /* The destination stream. */
44 ostream_t destination;
45 /* The from and to encodings. */
46 char *from_encoding;
47 char *to_encoding;
48 /* The converter. */
49 iconv_t cd;
50 /* Last few bytes that could not yet be converted. */
51 #define BUFSIZE 64
52 char buf[BUFSIZE];
53 size_t buflen;
54 #endif /* HAVE_ICONV */
55 };
56
57 #if HAVE_ICONV
58
59 /* Implementation of ostream_t methods. */
60
61 static void
write_mem(iconv_ostream_t stream,const void * data,size_t len)62 iconv_ostream::write_mem (iconv_ostream_t stream, const void *data, size_t len)
63 {
64 if (len > 0)
65 {
66 #define BUFFERSIZE 256
67 char inbuffer[BUFFERSIZE];
68 size_t inbufcount;
69
70 inbufcount = stream->buflen;
71 if (inbufcount > 0)
72 memcpy (inbuffer, stream->buf, inbufcount);
73 for (;;)
74 {
75 /* At this point, inbuffer[0..inbufcount-1] is filled. */
76 {
77 /* Combine the previous rest with a chunk of new input. */
78 size_t n =
79 (len <= BUFFERSIZE - inbufcount ? len : BUFFERSIZE - inbufcount);
80
81 if (n > 0)
82 {
83 memcpy (inbuffer + inbufcount, data, n);
84 data = (char *) data + n;
85 inbufcount += n;
86 len -= n;
87 }
88 }
89 {
90 /* Attempt to convert the combined input. */
91 char outbuffer[8*BUFFERSIZE];
92
93 const char *inptr = inbuffer;
94 size_t insize = inbufcount;
95 char *outptr = outbuffer;
96 size_t outsize = sizeof (outbuffer);
97
98 size_t res = iconv (stream->cd,
99 (ICONV_CONST char **) &inptr, &insize,
100 &outptr, &outsize);
101 #if !defined _LIBICONV_VERSION \
102 && !(defined __GLIBC__ && !defined __UCLIBC__)
103 /* Irix iconv() inserts a NUL byte if it cannot convert.
104 NetBSD iconv() inserts a question mark if it cannot convert.
105 Only GNU libiconv and GNU libc are known to prefer to fail rather
106 than doing a lossy conversion. */
107 if (res > 0)
108 {
109 errno = EILSEQ;
110 res = -1;
111 }
112 #endif
113 if (res == (size_t)(-1) && errno != EINVAL)
114 error (EXIT_FAILURE, 0, _("%s: cannot convert from %s to %s"),
115 "iconv_ostream",
116 stream->from_encoding, stream->to_encoding);
117 /* Output the converted part. */
118 if (sizeof (outbuffer) - outsize > 0)
119 ostream_write_mem (stream->destination,
120 outbuffer, sizeof (outbuffer) - outsize);
121 /* Put back the unconverted part. */
122 if (insize > BUFSIZE)
123 error (EXIT_FAILURE, 0, _("%s: shift sequence too long"),
124 "iconv_ostream");
125 if (len == 0)
126 {
127 if (insize > 0)
128 memcpy (stream->buf, inptr, insize);
129 stream->buflen = insize;
130 break;
131 }
132 if (insize > 0)
133 memmove (inbuffer, inptr, insize);
134 inbufcount = insize;
135 }
136 }
137 #undef BUFFERSIZE
138 }
139 }
140
141 static void
flush(iconv_ostream_t stream,ostream_flush_scope_t scope)142 iconv_ostream::flush (iconv_ostream_t stream, ostream_flush_scope_t scope)
143 {
144 /* For scope == FLUSH_THIS_STREAM, there's nothing we can do here, since
145 stream->buf[] contains only a few bytes that don't correspond to a
146 character. */
147 if (scope != FLUSH_THIS_STREAM)
148 ostream_flush (stream->destination, scope);
149 }
150
151 static void
free(iconv_ostream_t stream)152 iconv_ostream::free (iconv_ostream_t stream)
153 {
154 /* Silently ignore the few bytes in stream->buf[] that don't correspond to a
155 character. */
156
157 /* Avoid glibc-2.1 bug and Solaris 2.7 bug. */
158 #if defined _LIBICONV_VERSION \
159 || !(((__GLIBC__ - 0 == 2 && __GLIBC_MINOR__ - 0 <= 1) \
160 && !defined __UCLIBC__) \
161 || defined __sun)
162 {
163 char outbuffer[2048];
164 char *outptr = outbuffer;
165 size_t outsize = sizeof (outbuffer);
166 size_t res = iconv (stream->cd, NULL, NULL, &outptr, &outsize);
167 if (res == (size_t)(-1))
168 error (EXIT_FAILURE, 0, _("%s: cannot convert from %s to %s"),
169 "iconv_ostream", stream->from_encoding, stream->to_encoding);
170 /* Output the converted part. */
171 if (sizeof (outbuffer) - outsize > 0)
172 ostream_write_mem (stream->destination,
173 outbuffer, sizeof (outbuffer) - outsize);
174 }
175 #endif
176
177 iconv_close (stream->cd);
178 free (stream->from_encoding);
179 free (stream->to_encoding);
180 free (stream);
181 }
182
183 /* Constructor. */
184
185 iconv_ostream_t
iconv_ostream_create(const char * from_encoding,const char * to_encoding,ostream_t destination)186 iconv_ostream_create (const char *from_encoding, const char *to_encoding,
187 ostream_t destination)
188 {
189 iconv_ostream_t stream = XMALLOC (struct iconv_ostream_representation);
190
191 stream->base.vtable = &iconv_ostream_vtable;
192 stream->destination = destination;
193 stream->from_encoding = xstrdup (from_encoding);
194 stream->to_encoding = xstrdup (to_encoding);
195
196 /* Avoid glibc-2.1 bug with EUC-KR. */
197 #if ((__GLIBC__ - 0 == 2 && __GLIBC_MINOR__ - 0 <= 1) \
198 && !defined __UCLIBC__) \
199 && !defined _LIBICONV_VERSION
200 if (c_strcasecmp (from_encoding, "EUC-KR") == 0
201 || c_strcasecmp (to_encoding, "EUC-KR") == 0)
202 stream->cd = (iconv_t)(-1):
203 else
204 #endif
205 stream->cd = iconv_open (to_encoding, from_encoding);
206 if (stream->cd == (iconv_t)(-1))
207 {
208 if (iconv_open ("UTF-8", from_encoding) == (iconv_t)(-1))
209 error (EXIT_FAILURE, 0, _("%s does not support conversion from %s"),
210 "iconv", from_encoding);
211 else if (iconv_open (to_encoding, "UTF-8") == (iconv_t)(-1))
212 error (EXIT_FAILURE, 0, _("%s does not support conversion to %s"),
213 "iconv", to_encoding);
214 else
215 error (EXIT_FAILURE, 0,
216 _("%s does not support conversion from %s to %s"),
217 "iconv", from_encoding, to_encoding);
218 }
219
220 stream->buflen = 0;
221
222 return stream;
223 }
224
225 #else
226
227 static void
write_mem(iconv_ostream_t stream,const void * data,size_t len)228 iconv_ostream::write_mem (iconv_ostream_t stream, const void *data, size_t len)
229 {
230 abort ();
231 }
232
233 static void
flush(iconv_ostream_t stream)234 iconv_ostream::flush (iconv_ostream_t stream)
235 {
236 abort ();
237 }
238
239 static void
free(iconv_ostream_t stream)240 iconv_ostream::free (iconv_ostream_t stream)
241 {
242 abort ();
243 }
244
245 #endif /* HAVE_ICONV */
246