1 /* Recode Serbian text from Cyrillic to Latin script.
2 Copyright (C) 2006-2007, 2010, 2012, 2018-2020 Free Software Foundation,
3 Inc.
4 Written by Bruno Haible <bruno@clisp.org>, 2006.
5
6 This program is free software: you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3 of the License, or
9 (at your option) any later version.
10
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with this program. If not, see <https://www.gnu.org/licenses/>. */
18
19 #ifdef HAVE_CONFIG_H
20 # include "config.h"
21 #endif
22
23 #include <errno.h>
24 #include <getopt.h>
25 #include <stdbool.h>
26 #include <stdio.h>
27 #include <stdlib.h>
28 #include <locale.h>
29
30 #if HAVE_ICONV
31 #include <iconv.h>
32 #endif
33
34 #include "noreturn.h"
35 #include "closeout.h"
36 #include "error.h"
37 #include "progname.h"
38 #include "relocatable.h"
39 #include "basename-lgpl.h"
40 #include "xalloc.h"
41 #include "localcharset.h"
42 #include "c-strcase.h"
43 #include "xstriconv.h"
44 #include "filters.h"
45 #include "propername.h"
46 #include "gettext.h"
47
48 #define _(str) gettext (str)
49
50
51 /* Long options. */
52 static const struct option long_options[] =
53 {
54 { "help", no_argument, NULL, 'h' },
55 { "version", no_argument, NULL, 'V' },
56 { NULL, 0, NULL, 0 }
57 };
58
59 /* Forward declaration of local functions. */
60 _GL_NORETURN_FUNC static void usage (int status);
61 static void process (FILE *stream);
62
63 int
main(int argc,char * argv[])64 main (int argc, char *argv[])
65 {
66 /* Default values for command line options. */
67 bool do_help = false;
68 bool do_version = false;
69
70 int opt;
71
72 /* Set program name for message texts. */
73 set_program_name (argv[0]);
74
75 /* Set locale via LC_ALL. */
76 setlocale (LC_ALL, "");
77
78 /* Set the text message domain. */
79 bindtextdomain (PACKAGE, relocate (LOCALEDIR));
80 textdomain (PACKAGE);
81
82 /* Ensure that write errors on stdout are detected. */
83 atexit (close_stdout);
84
85 /* Parse command line options. */
86 while ((opt = getopt_long (argc, argv, "hV", long_options, NULL)) != EOF)
87 switch (opt)
88 {
89 case '\0': /* Long option. */
90 break;
91 case 'h':
92 do_help = true;
93 break;
94 case 'V':
95 do_version = true;
96 break;
97 default:
98 usage (EXIT_FAILURE);
99 }
100
101 /* Version information is requested. */
102 if (do_version)
103 {
104 printf ("%s (GNU %s) %s\n", last_component (program_name),
105 PACKAGE, VERSION);
106 /* xgettext: no-wrap */
107 printf (_("Copyright (C) %s Free Software Foundation, Inc.\n\
108 License GPLv3+: GNU GPL version 3 or later <%s>\n\
109 This is free software: you are free to change and redistribute it.\n\
110 There is NO WARRANTY, to the extent permitted by law.\n\
111 "),
112 "2006-2020", "https://gnu.org/licenses/gpl.html");
113 printf (_("Written by %s and %s.\n"),
114 /* TRANSLATORS: This is a proper name. The last name is
115 (with Unicode escapes) "\u0160egan" or (with HTML entities)
116 "Šegan". */
117 proper_name_utf8 ("Danilo Segan", "Danilo \305\240egan"),
118 proper_name ("Bruno Haible"));
119 exit (EXIT_SUCCESS);
120 }
121
122 /* Help is requested. */
123 if (do_help)
124 usage (EXIT_SUCCESS);
125
126 if (argc - optind > 0)
127 error (EXIT_FAILURE, 0, _("too many arguments"));
128
129 process (stdin);
130
131 exit (EXIT_SUCCESS);
132 }
133
134
135 /* Display usage information and exit. */
136 static void
usage(int status)137 usage (int status)
138 {
139 if (status != EXIT_SUCCESS)
140 fprintf (stderr, _("Try '%s --help' for more information.\n"),
141 program_name);
142 else
143 {
144 /* xgettext: no-wrap */
145 printf (_("\
146 Usage: %s [OPTION]\n\
147 "), program_name);
148 printf ("\n");
149 /* xgettext: no-wrap */
150 printf (_("\
151 Recode Serbian text from Cyrillic to Latin script.\n"));
152 /* xgettext: no-wrap */
153 printf (_("\
154 The input text is read from standard input. The converted text is output to\n\
155 standard output.\n"));
156 printf ("\n");
157 /* xgettext: no-wrap */
158 printf (_("\
159 Informative output:\n"));
160 /* xgettext: no-wrap */
161 printf (_("\
162 -h, --help display this help and exit\n"));
163 /* xgettext: no-wrap */
164 printf (_("\
165 -V, --version output version information and exit\n"));
166 printf ("\n");
167 /* TRANSLATORS: The first placeholder is the web address of the Savannah
168 project of this package. The second placeholder is the bug-reporting
169 email address for this package. Please add _another line_ saying
170 "Report translation bugs to <...>\n" with the address for translation
171 bugs (typically your translation team's web or email address). */
172 printf(_("\
173 Report bugs in the bug tracker at <%s>\n\
174 or by email to <%s>.\n"),
175 "https://savannah.gnu.org/projects/gettext",
176 "bug-gettext@gnu.org");
177 }
178
179 exit (status);
180 }
181
182
183 /* Routines for reading a line.
184 Don't use routines that drop NUL bytes. Don't use getline(), because it
185 doesn't provide a good error message in case of memory allocation failure.
186 The gnulib module 'linebuffer' is nearly the right thing, except that we
187 don't want an extra newline at the end of file. */
188
189 /* A 'struct linebuffer' holds a line of text. */
190
191 struct linebuffer
192 {
193 size_t size; /* Allocated. */
194 size_t length; /* Used. */
195 char *buffer;
196 };
197
198 /* Initialize linebuffer LINEBUFFER for use. */
199 static inline void
init_linebuffer(struct linebuffer * lb)200 init_linebuffer (struct linebuffer *lb)
201 {
202 lb->size = 0;
203 lb->length = 0;
204 lb->buffer = NULL;
205 }
206
207 /* Read an arbitrarily long line of text from STREAM into linebuffer LB.
208 Keep the newline. Do not NUL terminate.
209 Return LINEBUFFER, except at end of file return NULL. */
210 static struct linebuffer *
read_linebuffer(struct linebuffer * lb,FILE * stream)211 read_linebuffer (struct linebuffer *lb, FILE *stream)
212 {
213 if (feof (stream))
214 return NULL;
215 else
216 {
217 char *p = lb->buffer;
218 char *end = lb->buffer + lb->size;
219
220 for (;;)
221 {
222 int c = getc (stream);
223 if (c == EOF)
224 {
225 if (p == lb->buffer || ferror (stream))
226 return NULL;
227 break;
228 }
229 if (p == end)
230 {
231 size_t oldsize = lb->size; /* = p - lb->buffer */
232 size_t newsize = 2 * oldsize + 40;
233 lb->buffer = (char *) xrealloc (lb->buffer, newsize);
234 lb->size = newsize;
235 p = lb->buffer + oldsize;
236 end = lb->buffer + newsize;
237 }
238 *p++ = c;
239 if (c == '\n')
240 break;
241 }
242
243 lb->length = p - lb->buffer;
244 return lb;
245 }
246 }
247
248 /* Free linebuffer LB and its data, all allocated with malloc. */
249 static inline void
destroy_linebuffer(struct linebuffer * lb)250 destroy_linebuffer (struct linebuffer *lb)
251 {
252 if (lb->buffer != NULL)
253 free (lb->buffer);
254 }
255
256
257 /* Process the input and produce the output. */
258 static void
process(FILE * stream)259 process (FILE *stream)
260 {
261 struct linebuffer lb;
262 const char *locale_code = locale_charset ();
263 bool need_code_conversion = (c_strcasecmp (locale_code, "UTF-8") != 0);
264 #if HAVE_ICONV
265 iconv_t conv_to_utf8 = (iconv_t)(-1);
266 iconv_t conv_from_utf8 = (iconv_t)(-1);
267 char *last_utf8_line;
268 size_t last_utf8_line_len;
269 char *last_backconv_line;
270 size_t last_backconv_line_len;
271 #endif
272
273 init_linebuffer (&lb);
274
275 /* Initialize the conversion descriptors. */
276 if (need_code_conversion)
277 {
278 #if HAVE_ICONV
279 /* Avoid glibc-2.1 bug with EUC-KR. */
280 # if ((__GLIBC__ == 2 && __GLIBC_MINOR__ <= 1) && !defined __UCLIBC__) \
281 && !defined _LIBICONV_VERSION
282 if (strcmp (locale_code, "EUC-KR") != 0)
283 # endif
284 {
285 conv_to_utf8 = iconv_open ("UTF-8", locale_code);
286 /* TODO: Maybe append //TRANSLIT here? */
287 conv_from_utf8 = iconv_open (locale_code, "UTF-8");
288 }
289 if (conv_to_utf8 == (iconv_t)(-1))
290 error (EXIT_FAILURE, 0,
291 _("Cannot convert from \"%s\" to \"%s\". %s relies on iconv(), and iconv() does not support this conversion."),
292 locale_code, "UTF-8", last_component (program_name));
293 if (conv_from_utf8 == (iconv_t)(-1))
294 error (EXIT_FAILURE, 0,
295 _("Cannot convert from \"%s\" to \"%s\". %s relies on iconv(), and iconv() does not support this conversion."),
296 "UTF-8", locale_code, last_component (program_name));
297 last_utf8_line = NULL;
298 last_utf8_line_len = 0;
299 last_backconv_line = NULL;
300 last_backconv_line_len = 0;
301 #else
302 error (EXIT_FAILURE, 0,
303 _("Cannot convert from \"%s\" to \"%s\". %s relies on iconv(). This version was built without iconv()."),
304 locale_code, "UTF-8", last_component (program_name));
305 #endif
306 }
307
308 /* Read the input line by line.
309 Processing it character by character is not possible, because some
310 filters need to look at adjacent characters. Processing the entire file
311 in a whole chunk would take an excessive amount of memory. */
312 for (;;)
313 {
314 char *line;
315 size_t line_len;
316 char *filtered_line;
317 size_t filtered_line_len;
318
319 /* Read a line. */
320 if (read_linebuffer (&lb, stream) == NULL)
321 break;
322 line = lb.buffer;
323 line_len = lb.length;
324 /* read_linebuffer always returns a non-void result. */
325 if (line_len == 0)
326 abort ();
327
328 #if HAVE_ICONV
329 /* Convert it to UTF-8. */
330 if (need_code_conversion)
331 {
332 char *utf8_line = last_utf8_line;
333 size_t utf8_line_len = last_utf8_line_len;
334
335 if (xmem_cd_iconv (line, line_len, conv_to_utf8,
336 &utf8_line, &utf8_line_len) != 0)
337 error (EXIT_FAILURE, errno,
338 _("input is not valid in \"%s\" encoding"),
339 locale_code);
340 if (utf8_line != last_utf8_line)
341 {
342 if (last_utf8_line != NULL)
343 free (last_utf8_line);
344 last_utf8_line = utf8_line;
345 last_utf8_line_len = utf8_line_len;
346 }
347
348 line = utf8_line;
349 line_len = utf8_line_len;
350 }
351 #endif
352
353 /* Apply the filter. */
354 serbian_to_latin (line, line_len, &filtered_line, &filtered_line_len);
355
356 #if HAVE_ICONV
357 /* Convert it back to the original encoding. */
358 if (need_code_conversion)
359 {
360 char *backconv_line = last_backconv_line;
361 size_t backconv_line_len = last_backconv_line_len;
362
363 if (xmem_cd_iconv (filtered_line, filtered_line_len, conv_from_utf8,
364 &backconv_line, &backconv_line_len) != 0)
365 error (EXIT_FAILURE, errno,
366 _("error while converting from \"%s\" encoding to \"%s\" encoding"),
367 "UTF-8", locale_code);
368 if (backconv_line != last_backconv_line)
369 {
370 if (last_backconv_line != NULL)
371 free (last_backconv_line);
372 last_backconv_line = backconv_line;
373 last_backconv_line_len = backconv_line_len;
374 }
375
376 fwrite (backconv_line, 1, backconv_line_len, stdout);
377 }
378 else
379 #endif
380 fwrite (filtered_line, 1, filtered_line_len, stdout);
381
382 free (filtered_line);
383 }
384
385 #if HAVE_ICONV
386 if (need_code_conversion)
387 {
388 iconv_close (conv_from_utf8);
389 iconv_close (conv_to_utf8);
390 }
391 #endif
392
393 destroy_linebuffer (&lb);
394 }
395