1 /* KUIT (KDE User Interface Text) format strings.
2 Copyright (C) 2015, 2018-2019 Free Software Foundation, Inc.
3 Written by Daiki Ueno <ueno@gnu.org>, 2015.
4
5 This program is free software: you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 3 of the License, or
8 (at your option) any later version.
9
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
14
15 You should have received a copy of the GNU General Public License
16 along with this program. If not, see <https://www.gnu.org/licenses/>. */
17
18 #ifdef HAVE_CONFIG_H
19 # include <config.h>
20 #endif
21
22 #include <assert.h>
23 #include <stdbool.h>
24 #include <stdlib.h>
25
26 #include "format.h"
27 #include "unistr.h"
28 #include "xalloc.h"
29 #include "xvasprintf.h"
30 #include "gettext.h"
31
32 #if IN_LIBGETTEXTPO
33 /* Use included markup parser to avoid extra dependency from
34 libgettextpo to libxml2. */
35 # ifndef FORMAT_KDE_KUIT_FALLBACK_MARKUP
36 # define FORMAT_KDE_KUIT_USE_FALLBACK_MARKUP 1
37 # endif
38 #else
39 # define FORMAT_KDE_KUIT_USE_LIBXML2 1
40 #endif
41
42 #if FORMAT_KDE_KUIT_USE_LIBXML2
43 # include <libxml/parser.h>
44 #elif FORMAT_KDE_KUIT_USE_FALLBACK_MARKUP
45 # include "markup.h"
46 #endif
47
48
49 #define _(str) gettext (str)
50
51 #define SIZEOF(a) (sizeof(a) / sizeof(a[0]))
52
53
54 /* KUIT (KDE User Interface Text) is an XML-like markup which augments
55 translatable strings with semantic information:
56 https://api.kde.org/frameworks/ki18n/html/prg_guide.html#kuit_markup
57 KUIT can be seen as a fragment of a well-formed XML document,
58 except that it allows '&' as a Qt accelerator marker and '%' as a
59 format directive. */
60
61 struct spec
62 {
63 /* A format string descriptor returned from formatstring_kde.parse. */
64 void *base;
65 };
66
67 #define XML_NS "https://www.gnu.org/s/gettext/kde"
68
69 struct char_range
70 {
71 ucs4_t start;
72 ucs4_t end;
73 };
74
75 /* Character ranges for NameStartChar defined in:
76 https://www.w3.org/TR/REC-xml/#NT-NameStartChar */
77 static const struct char_range name_chars1[] =
78 {
79 { ':', ':' },
80 { 'A', 'Z' },
81 { '_', '_' },
82 { 'a', 'z' },
83 { 0xC0, 0xD6 },
84 { 0xD8, 0xF6 },
85 { 0xF8, 0x2FF },
86 { 0x370, 0x37D },
87 { 0x37F, 0x1FFF },
88 { 0x200C, 0x200D },
89 { 0x2070, 0x218F },
90 { 0x2C00, 0x2FEF },
91 { 0x3001, 0xD7FF },
92 { 0xF900, 0xFDCF },
93 { 0xFDF0, 0xFFFD },
94 { 0x10000, 0xEFFFF }
95 };
96
97 /* Character ranges for NameChar, excluding NameStartChar:
98 https://www.w3.org/TR/REC-xml/#NT-NameChar */
99 static const struct char_range name_chars2[] =
100 {
101 { '-', '-' },
102 { '.', '.' },
103 { '0', '9' },
104 { 0xB7, 0xB7 },
105 { 0x0300, 0x036F },
106 { 0x203F, 0x2040 }
107 };
108
109 /* Return true if INPUT is an XML reference. */
110 static bool
is_reference(const char * input)111 is_reference (const char *input)
112 {
113 const char *str = input;
114 const char *str_limit = str + strlen (input);
115 ucs4_t uc;
116 int i;
117
118 str += u8_mbtouc (&uc, (const unsigned char *) str, str_limit - str);
119 assert (uc == '&');
120
121 str += u8_mbtouc (&uc, (const unsigned char *) str, str_limit - str);
122
123 /* CharRef */
124 if (uc == '#')
125 {
126 str += u8_mbtouc (&uc, (const unsigned char *) str, str_limit - str);
127 if (uc == 'x')
128 {
129 while (str < str_limit)
130 {
131 str += u8_mbtouc (&uc, (const unsigned char *) str,
132 str_limit - str);
133 if (!(('0' <= uc && uc <= '9')
134 || ('A' <= uc && uc <= 'F')
135 || ('a' <= uc && uc <= 'f')))
136 break;
137 }
138 return uc == ';';
139 }
140 else if ('0' <= uc && uc <= '9')
141 {
142 while (str < str_limit)
143 {
144 str += u8_mbtouc (&uc, (const unsigned char *) str,
145 str_limit - str);
146 if (!('0' <= uc && uc <= '9'))
147 break;
148 }
149 return uc == ';';
150 }
151 }
152 else
153 {
154 /* EntityRef */
155 for (i = 0; i < SIZEOF (name_chars1); i++)
156 if (name_chars1[i].start <= uc && uc <= name_chars1[i].end)
157 break;
158
159 if (i == SIZEOF (name_chars1))
160 return false;
161
162 while (str < str_limit)
163 {
164 str += u8_mbtouc (&uc, (const unsigned char *) str, str_limit - str);
165 for (i = 0; i < SIZEOF (name_chars1); i++)
166 if (name_chars1[i].start <= uc && uc <= name_chars1[i].end)
167 break;
168 if (i == SIZEOF (name_chars1))
169 {
170 for (i = 0; i < SIZEOF (name_chars2); i++)
171 if (name_chars2[i].start <= uc && uc <= name_chars2[i].end)
172 break;
173 if (i == SIZEOF (name_chars2))
174 return false;
175 }
176 }
177 return uc == ';';
178 }
179
180 return false;
181 }
182
183
184 static void *
format_parse(const char * format,bool translated,char * fdi,char ** invalid_reason)185 format_parse (const char *format, bool translated, char *fdi,
186 char **invalid_reason)
187 {
188 struct spec spec;
189 struct spec *result;
190 const char *str;
191 const char *str_limit;
192 size_t amp_count;
193 char *buffer, *bp;
194
195 spec.base = NULL;
196
197 /* Preprocess the input, putting the content in a <gt:kuit> element. */
198 str = format;
199 str_limit = str + strlen (format);
200
201 for (amp_count = 0; str < str_limit; amp_count++)
202 {
203 const char *amp = strchrnul (str, '&');
204 if (*amp != '&')
205 break;
206 str = amp + 1;
207 }
208
209 buffer = xmalloc (amp_count * 4
210 + strlen (format)
211 + strlen ("<gt:kuit xmlns:gt=\"" XML_NS "\"></gt:kuit>")
212 + 1);
213 *buffer = '\0';
214
215 bp = buffer;
216 bp = stpcpy (bp, "<gt:kuit xmlns:gt=\"" XML_NS "\">");
217 str = format;
218 while (str < str_limit)
219 {
220 const char *amp = strchrnul (str, '&');
221
222 bp = stpncpy (bp, str, amp - str);
223 if (*amp != '&')
224 break;
225
226 bp = stpcpy (bp, is_reference (amp) ? "&" : "&");
227 str = amp + 1;
228 }
229 stpcpy (bp, "</gt:kuit>");
230
231 #if FORMAT_KDE_KUIT_USE_LIBXML2
232 {
233 xmlDocPtr doc;
234
235 doc = xmlReadMemory (buffer, strlen (buffer), "", NULL,
236 XML_PARSE_NONET
237 | XML_PARSE_NOWARNING
238 | XML_PARSE_NOERROR
239 | XML_PARSE_NOBLANKS);
240 if (doc == NULL)
241 {
242 xmlError *err = xmlGetLastError ();
243 *invalid_reason =
244 xasprintf (_("error while parsing: %s"),
245 err->message);
246 free (buffer);
247 xmlFreeDoc (doc);
248 return NULL;
249 }
250
251 free (buffer);
252 xmlFreeDoc (doc);
253 }
254 #elif FORMAT_KDE_KUIT_USE_FALLBACK_MARKUP
255 {
256 markup_parser_ty parser;
257 markup_parse_context_ty *context;
258
259 memset (&parser, 0, sizeof (markup_parser_ty));
260 context = markup_parse_context_new (&parser, 0, NULL);
261 if (!markup_parse_context_parse (context, buffer, strlen (buffer)))
262 {
263 *invalid_reason =
264 xasprintf (_("error while parsing: %s"),
265 markup_parse_context_get_error (context));
266 free (buffer);
267 markup_parse_context_free (context);
268 return NULL;
269 }
270
271 if (!markup_parse_context_end_parse (context))
272 {
273 *invalid_reason =
274 xasprintf (_("error while parsing: %s"),
275 markup_parse_context_get_error (context));
276 free (buffer);
277 markup_parse_context_free (context);
278 return NULL;
279 }
280
281 free (buffer);
282 markup_parse_context_free (context);
283 }
284 #else
285 /* No support for XML. */
286 free (buffer);
287 #endif
288
289 spec.base = formatstring_kde.parse (format, translated, fdi, invalid_reason);
290 if (spec.base == NULL)
291 return NULL;
292
293 result = XMALLOC (struct spec);
294 *result = spec;
295 return result;
296 }
297
298 static void
format_free(void * descr)299 format_free (void *descr)
300 {
301 struct spec *spec = descr;
302 formatstring_kde.free (spec->base);
303 free (spec);
304 }
305
306 static int
format_get_number_of_directives(void * descr)307 format_get_number_of_directives (void *descr)
308 {
309 struct spec *spec = descr;
310 return formatstring_kde.get_number_of_directives (spec->base);
311 }
312
313 static bool
format_check(void * msgid_descr,void * msgstr_descr,bool equality,formatstring_error_logger_t error_logger,const char * pretty_msgid,const char * pretty_msgstr)314 format_check (void *msgid_descr, void *msgstr_descr, bool equality,
315 formatstring_error_logger_t error_logger,
316 const char *pretty_msgid, const char *pretty_msgstr)
317 {
318 struct spec *msgid_spec = msgid_descr;
319 struct spec *msgstr_spec = msgstr_descr;
320
321 return formatstring_kde.check (msgid_spec->base, msgstr_spec->base, equality,
322 error_logger,
323 pretty_msgid, pretty_msgstr);
324 }
325
326 struct formatstring_parser formatstring_kde_kuit =
327 {
328 format_parse,
329 format_free,
330 format_get_number_of_directives,
331 NULL,
332 format_check
333 };
334
335
336 #ifdef TEST
337
338 /* Test program: Print the argument list specification returned by
339 format_parse for strings read from standard input. */
340
341 #include <stdio.h>
342
343 static void
format_print(void * descr)344 format_print (void *descr)
345 {
346 struct spec *spec = (struct spec *) descr;
347 unsigned int last;
348 unsigned int i;
349
350 if (spec == NULL)
351 {
352 printf ("INVALID");
353 return;
354 }
355
356 printf ("(");
357 last = 1;
358 for (i = 0; i < spec->numbered_arg_count; i++)
359 {
360 unsigned int number = spec->numbered[i].number;
361
362 if (i > 0)
363 printf (" ");
364 if (number < last)
365 abort ();
366 for (; last < number; last++)
367 printf ("_ ");
368 last = number + 1;
369 }
370 printf (")");
371 }
372
373 int
main()374 main ()
375 {
376 for (;;)
377 {
378 char *line = NULL;
379 size_t line_size = 0;
380 int line_len;
381 char *invalid_reason;
382 void *descr;
383
384 line_len = getline (&line, &line_size, stdin);
385 if (line_len < 0)
386 break;
387 if (line_len > 0 && line[line_len - 1] == '\n')
388 line[--line_len] = '\0';
389
390 invalid_reason = NULL;
391 descr = format_parse (line, false, NULL, &invalid_reason);
392
393 format_print (descr);
394 printf ("\n");
395 if (descr == NULL)
396 printf ("%s\n", invalid_reason);
397
398 free (invalid_reason);
399 free (line);
400 }
401
402 return 0;
403 }
404
405 /*
406 * For Emacs M-x compile
407 * Local Variables:
408 * compile-command: "/bin/sh ../libtool --tag=CC --mode=link gcc -o a.out -static -O -g -Wall -I.. -I../gnulib-lib -I../../gettext-runtime/intl -DHAVE_CONFIG_H -DTEST format-kde-kuit.c ../gnulib-lib/libgettextlib.la"
409 * End:
410 */
411
412 #endif /* TEST */
413