• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* Recode Serbian text from Cyrillic to Latin script.
2    Copyright (C) 2006-2007, 2010, 2012, 2018-2020 Free Software Foundation,
3    Inc.
4    Written by Bruno Haible <bruno@clisp.org>, 2006.
5 
6    This program is free software: you can redistribute it and/or modify
7    it under the terms of the GNU General Public License as published by
8    the Free Software Foundation; either version 3 of the License, or
9    (at your option) any later version.
10 
11    This program is distributed in the hope that it will be useful,
12    but WITHOUT ANY WARRANTY; without even the implied warranty of
13    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14    GNU General Public License for more details.
15 
16    You should have received a copy of the GNU General Public License
17    along with this program.  If not, see <https://www.gnu.org/licenses/>.  */
18 
19 #ifdef HAVE_CONFIG_H
20 # include "config.h"
21 #endif
22 
23 #include <errno.h>
24 #include <getopt.h>
25 #include <stdbool.h>
26 #include <stdio.h>
27 #include <stdlib.h>
28 #include <locale.h>
29 
30 #if HAVE_ICONV
31 #include <iconv.h>
32 #endif
33 
34 #include "noreturn.h"
35 #include "closeout.h"
36 #include "error.h"
37 #include "progname.h"
38 #include "relocatable.h"
39 #include "basename-lgpl.h"
40 #include "xalloc.h"
41 #include "localcharset.h"
42 #include "c-strcase.h"
43 #include "xstriconv.h"
44 #include "filters.h"
45 #include "propername.h"
46 #include "gettext.h"
47 
48 #define _(str) gettext (str)
49 
50 
51 /* Long options.  */
52 static const struct option long_options[] =
53 {
54   { "help", no_argument, NULL, 'h' },
55   { "version", no_argument, NULL, 'V' },
56   { NULL, 0, NULL, 0 }
57 };
58 
59 /* Forward declaration of local functions.  */
60 _GL_NORETURN_FUNC static void usage (int status);
61 static void process (FILE *stream);
62 
63 int
main(int argc,char * argv[])64 main (int argc, char *argv[])
65 {
66   /* Default values for command line options.  */
67   bool do_help = false;
68   bool do_version = false;
69 
70   int opt;
71 
72   /* Set program name for message texts.  */
73   set_program_name (argv[0]);
74 
75   /* Set locale via LC_ALL.  */
76   setlocale (LC_ALL, "");
77 
78   /* Set the text message domain.  */
79   bindtextdomain (PACKAGE, relocate (LOCALEDIR));
80   textdomain (PACKAGE);
81 
82   /* Ensure that write errors on stdout are detected.  */
83   atexit (close_stdout);
84 
85   /* Parse command line options.  */
86   while ((opt = getopt_long (argc, argv, "hV", long_options, NULL)) != EOF)
87     switch (opt)
88     {
89     case '\0':          /* Long option.  */
90       break;
91     case 'h':
92       do_help = true;
93       break;
94     case 'V':
95       do_version = true;
96       break;
97     default:
98       usage (EXIT_FAILURE);
99     }
100 
101   /* Version information is requested.  */
102   if (do_version)
103     {
104       printf ("%s (GNU %s) %s\n", last_component (program_name),
105               PACKAGE, VERSION);
106       /* xgettext: no-wrap */
107       printf (_("Copyright (C) %s Free Software Foundation, Inc.\n\
108 License GPLv3+: GNU GPL version 3 or later <%s>\n\
109 This is free software: you are free to change and redistribute it.\n\
110 There is NO WARRANTY, to the extent permitted by law.\n\
111 "),
112               "2006-2020", "https://gnu.org/licenses/gpl.html");
113       printf (_("Written by %s and %s.\n"),
114               /* TRANSLATORS: This is a proper name. The last name is
115                  (with Unicode escapes) "\u0160egan" or (with HTML entities)
116                  "&Scaron;egan".  */
117               proper_name_utf8 ("Danilo Segan", "Danilo \305\240egan"),
118               proper_name ("Bruno Haible"));
119       exit (EXIT_SUCCESS);
120     }
121 
122   /* Help is requested.  */
123   if (do_help)
124     usage (EXIT_SUCCESS);
125 
126   if (argc - optind > 0)
127     error (EXIT_FAILURE, 0, _("too many arguments"));
128 
129   process (stdin);
130 
131   exit (EXIT_SUCCESS);
132 }
133 
134 
135 /* Display usage information and exit.  */
136 static void
usage(int status)137 usage (int status)
138 {
139   if (status != EXIT_SUCCESS)
140     fprintf (stderr, _("Try '%s --help' for more information.\n"),
141              program_name);
142   else
143     {
144       /* xgettext: no-wrap */
145       printf (_("\
146 Usage: %s [OPTION]\n\
147 "), program_name);
148       printf ("\n");
149       /* xgettext: no-wrap */
150       printf (_("\
151 Recode Serbian text from Cyrillic to Latin script.\n"));
152       /* xgettext: no-wrap */
153       printf (_("\
154 The input text is read from standard input.  The converted text is output to\n\
155 standard output.\n"));
156       printf ("\n");
157       /* xgettext: no-wrap */
158       printf (_("\
159 Informative output:\n"));
160       /* xgettext: no-wrap */
161       printf (_("\
162   -h, --help                  display this help and exit\n"));
163       /* xgettext: no-wrap */
164       printf (_("\
165   -V, --version               output version information and exit\n"));
166       printf ("\n");
167       /* TRANSLATORS: The first placeholder is the web address of the Savannah
168          project of this package.  The second placeholder is the bug-reporting
169          email address for this package.  Please add _another line_ saying
170          "Report translation bugs to <...>\n" with the address for translation
171          bugs (typically your translation team's web or email address).  */
172       printf(_("\
173 Report bugs in the bug tracker at <%s>\n\
174 or by email to <%s>.\n"),
175              "https://savannah.gnu.org/projects/gettext",
176              "bug-gettext@gnu.org");
177     }
178 
179   exit (status);
180 }
181 
182 
183 /* Routines for reading a line.
184    Don't use routines that drop NUL bytes.  Don't use getline(), because it
185    doesn't provide a good error message in case of memory allocation failure.
186    The gnulib module 'linebuffer' is nearly the right thing, except that we
187    don't want an extra newline at the end of file.  */
188 
189 /* A 'struct linebuffer' holds a line of text. */
190 
191 struct linebuffer
192 {
193   size_t size;                  /* Allocated. */
194   size_t length;                /* Used. */
195   char *buffer;
196 };
197 
198 /* Initialize linebuffer LINEBUFFER for use. */
199 static inline void
init_linebuffer(struct linebuffer * lb)200 init_linebuffer (struct linebuffer *lb)
201 {
202   lb->size = 0;
203   lb->length = 0;
204   lb->buffer = NULL;
205 }
206 
207 /* Read an arbitrarily long line of text from STREAM into linebuffer LB.
208    Keep the newline.  Do not NUL terminate.
209    Return LINEBUFFER, except at end of file return NULL.  */
210 static struct linebuffer *
read_linebuffer(struct linebuffer * lb,FILE * stream)211 read_linebuffer (struct linebuffer *lb, FILE *stream)
212 {
213   if (feof (stream))
214     return NULL;
215   else
216     {
217       char *p = lb->buffer;
218       char *end = lb->buffer + lb->size;
219 
220       for (;;)
221         {
222           int c = getc (stream);
223           if (c == EOF)
224             {
225               if (p == lb->buffer || ferror (stream))
226                 return NULL;
227               break;
228             }
229           if (p == end)
230             {
231               size_t oldsize = lb->size; /* = p - lb->buffer */
232               size_t newsize = 2 * oldsize + 40;
233               lb->buffer = (char *) xrealloc (lb->buffer, newsize);
234               lb->size = newsize;
235               p = lb->buffer + oldsize;
236               end = lb->buffer + newsize;
237             }
238           *p++ = c;
239           if (c == '\n')
240             break;
241         }
242 
243       lb->length = p - lb->buffer;
244       return lb;
245     }
246 }
247 
248 /* Free linebuffer LB and its data, all allocated with malloc. */
249 static inline void
destroy_linebuffer(struct linebuffer * lb)250 destroy_linebuffer (struct linebuffer *lb)
251 {
252   if (lb->buffer != NULL)
253     free (lb->buffer);
254 }
255 
256 
257 /* Process the input and produce the output.  */
258 static void
process(FILE * stream)259 process (FILE *stream)
260 {
261   struct linebuffer lb;
262   const char *locale_code = locale_charset ();
263   bool need_code_conversion = (c_strcasecmp (locale_code, "UTF-8") != 0);
264 #if HAVE_ICONV
265   iconv_t conv_to_utf8 = (iconv_t)(-1);
266   iconv_t conv_from_utf8 = (iconv_t)(-1);
267   char *last_utf8_line;
268   size_t last_utf8_line_len;
269   char *last_backconv_line;
270   size_t last_backconv_line_len;
271 #endif
272 
273   init_linebuffer (&lb);
274 
275   /* Initialize the conversion descriptors.  */
276   if (need_code_conversion)
277     {
278 #if HAVE_ICONV
279       /* Avoid glibc-2.1 bug with EUC-KR.  */
280 # if ((__GLIBC__ == 2 && __GLIBC_MINOR__ <= 1) && !defined __UCLIBC__) \
281      && !defined _LIBICONV_VERSION
282       if (strcmp (locale_code, "EUC-KR") != 0)
283 # endif
284         {
285           conv_to_utf8 = iconv_open ("UTF-8", locale_code);
286           /* TODO:  Maybe append //TRANSLIT here?  */
287           conv_from_utf8 = iconv_open (locale_code, "UTF-8");
288         }
289       if (conv_to_utf8 == (iconv_t)(-1))
290         error (EXIT_FAILURE, 0,
291                _("Cannot convert from \"%s\" to \"%s\". %s relies on iconv(), and iconv() does not support this conversion."),
292                locale_code, "UTF-8", last_component (program_name));
293       if (conv_from_utf8 == (iconv_t)(-1))
294         error (EXIT_FAILURE, 0,
295                _("Cannot convert from \"%s\" to \"%s\". %s relies on iconv(), and iconv() does not support this conversion."),
296                "UTF-8", locale_code, last_component (program_name));
297       last_utf8_line = NULL;
298       last_utf8_line_len = 0;
299       last_backconv_line = NULL;
300       last_backconv_line_len = 0;
301 #else
302       error (EXIT_FAILURE, 0,
303              _("Cannot convert from \"%s\" to \"%s\". %s relies on iconv(). This version was built without iconv()."),
304              locale_code, "UTF-8", last_component (program_name));
305 #endif
306     }
307 
308   /* Read the input line by line.
309      Processing it character by character is not possible, because some
310      filters need to look at adjacent characters.  Processing the entire file
311      in a whole chunk would take an excessive amount of memory.  */
312   for (;;)
313     {
314       char *line;
315       size_t line_len;
316       char *filtered_line;
317       size_t filtered_line_len;
318 
319       /* Read a line.  */
320       if (read_linebuffer (&lb, stream) == NULL)
321         break;
322       line = lb.buffer;
323       line_len = lb.length;
324       /* read_linebuffer always returns a non-void result.  */
325       if (line_len == 0)
326         abort ();
327 
328 #if HAVE_ICONV
329       /* Convert it to UTF-8.  */
330       if (need_code_conversion)
331         {
332           char *utf8_line = last_utf8_line;
333           size_t utf8_line_len = last_utf8_line_len;
334 
335           if (xmem_cd_iconv (line, line_len, conv_to_utf8,
336                              &utf8_line, &utf8_line_len) != 0)
337             error (EXIT_FAILURE, errno,
338                    _("input is not valid in \"%s\" encoding"),
339                    locale_code);
340           if (utf8_line != last_utf8_line)
341             {
342               if (last_utf8_line != NULL)
343                 free (last_utf8_line);
344               last_utf8_line = utf8_line;
345               last_utf8_line_len = utf8_line_len;
346             }
347 
348           line = utf8_line;
349           line_len = utf8_line_len;
350         }
351 #endif
352 
353       /* Apply the filter.  */
354       serbian_to_latin (line, line_len, &filtered_line, &filtered_line_len);
355 
356 #if HAVE_ICONV
357       /* Convert it back to the original encoding.  */
358       if (need_code_conversion)
359         {
360           char *backconv_line = last_backconv_line;
361           size_t backconv_line_len = last_backconv_line_len;
362 
363           if (xmem_cd_iconv (filtered_line, filtered_line_len, conv_from_utf8,
364                              &backconv_line, &backconv_line_len) != 0)
365             error (EXIT_FAILURE, errno,
366                    _("error while converting from \"%s\" encoding to \"%s\" encoding"),
367                    "UTF-8", locale_code);
368           if (backconv_line != last_backconv_line)
369             {
370               if (last_backconv_line != NULL)
371                 free (last_backconv_line);
372               last_backconv_line = backconv_line;
373               last_backconv_line_len = backconv_line_len;
374             }
375 
376           fwrite (backconv_line, 1, backconv_line_len, stdout);
377         }
378       else
379 #endif
380         fwrite (filtered_line, 1, filtered_line_len, stdout);
381 
382       free (filtered_line);
383     }
384 
385 #if HAVE_ICONV
386   if (need_code_conversion)
387     {
388       iconv_close (conv_from_utf8);
389       iconv_close (conv_to_utf8);
390     }
391 #endif
392 
393   destroy_linebuffer (&lb);
394 }
395