1 /* Shell format strings.
2 Copyright (C) 2003-2004, 2006-2007, 2009, 2019-2020 Free Software Foundation, Inc.
3 Written by Bruno Haible <bruno@clisp.org>, 2003.
4
5 This program is free software: you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 3 of the License, or
8 (at your option) any later version.
9
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
14
15 You should have received a copy of the GNU General Public License
16 along with this program. If not, see <https://www.gnu.org/licenses/>. */
17
18 #ifdef HAVE_CONFIG_H
19 # include <config.h>
20 #endif
21
22 #include <stdbool.h>
23 #include <stdlib.h>
24 #include <string.h>
25
26 #include "format.h"
27 #include "c-ctype.h"
28 #include "xalloc.h"
29 #include "format-invalid.h"
30 #include "gettext.h"
31
32 #define _(str) gettext (str)
33
34 /* Shell format strings are simply strings subjects to variable substitution.
35 A variable substitution starts with '$' and is finished by either
36 - a nonempty sequence of alphanumeric ASCII characters, the first being
37 not a digit, or
38 - an opening brace '{', a nonempty sequence of alphanumeric ASCII
39 characters, the first being not a digit, and a closing brace '}'.
40 We don't support variable references like $1, $$ or $? since they make
41 no sense when 'envsubst' is invoked.
42 We don't support non-ASCII variable names, to avoid dependencies w.r.t. the
43 current encoding: While "${\xe0}" looks like a variable access in ISO-8859-1
44 encoding, it doesn't look like one in the BIG5, BIG5-HKSCS, GBK, GB18030,
45 SHIFT_JIS, JOHAB encodings, because \xe0\x7d is a single character in these
46 encodings.
47 We don't support the POSIX syntax for default or alternate values:
48 ${variable-default} ${variable:-default}
49 ${variable=default} ${variable:=default}
50 ${variable+replacement} ${variable:+replacement}
51 ${variable?ignored} ${variable:?ignored}
52 because the translator might be tempted to change the default value; if
53 we allow it we have a security problem; if we don't allow it the translator
54 will be surprised.
55 */
56
57 struct named_arg
58 {
59 char *name;
60 };
61
62 struct spec
63 {
64 unsigned int directives;
65 unsigned int named_arg_count;
66 struct named_arg *named;
67 };
68
69
70 static int
named_arg_compare(const void * p1,const void * p2)71 named_arg_compare (const void *p1, const void *p2)
72 {
73 return strcmp (((const struct named_arg *) p1)->name,
74 ((const struct named_arg *) p2)->name);
75 }
76
77 #define INVALID_NON_ASCII_VARIABLE() \
78 xstrdup (_("The string refers to a shell variable with a non-ASCII name."))
79 #define INVALID_SHELL_SYNTAX() \
80 xstrdup (_("The string refers to a shell variable with complex shell brace syntax. This syntax is unsupported here due to security reasons."))
81 #define INVALID_CONTEXT_DEPENDENT_VARIABLE() \
82 xstrdup (_("The string refers to a shell variable whose value may be different inside shell functions."))
83 #define INVALID_EMPTY_VARIABLE() \
84 xstrdup (_("The string refers to a shell variable with an empty name."))
85
86 static void *
format_parse(const char * format,bool translated,char * fdi,char ** invalid_reason)87 format_parse (const char *format, bool translated, char *fdi,
88 char **invalid_reason)
89 {
90 const char *const format_start = format;
91 struct spec spec;
92 unsigned int named_allocated;
93 struct spec *result;
94
95 spec.directives = 0;
96 spec.named_arg_count = 0;
97 spec.named = NULL;
98 named_allocated = 0;
99
100 for (; *format != '\0';)
101 if (*format++ == '$')
102 {
103 /* A variable substitution. */
104 char *name;
105
106 FDI_SET (format - 1, FMTDIR_START);
107 spec.directives++;
108
109 if (*format == '{')
110 {
111 const char *name_start;
112 const char *name_end;
113 size_t n;
114
115 name_start = ++format;
116 for (; *format != '\0'; format++)
117 {
118 if (*format == '}')
119 break;
120 if (!c_isascii (*format))
121 {
122 *invalid_reason = INVALID_NON_ASCII_VARIABLE ();
123 FDI_SET (format, FMTDIR_ERROR);
124 goto bad_format;
125 }
126 if (format > name_start
127 && (*format == '-' || *format == '=' || *format == '+'
128 || *format == '?' || *format == ':'))
129 {
130 *invalid_reason = INVALID_SHELL_SYNTAX ();
131 FDI_SET (format, FMTDIR_ERROR);
132 goto bad_format;
133 }
134 if (!(c_isalnum (*format) || *format == '_')
135 || (format == name_start && c_isdigit (*format)))
136 {
137 *invalid_reason = INVALID_CONTEXT_DEPENDENT_VARIABLE ();
138 FDI_SET (format, FMTDIR_ERROR);
139 goto bad_format;
140 }
141 }
142 if (*format == '\0')
143 {
144 *invalid_reason = INVALID_UNTERMINATED_DIRECTIVE ();
145 FDI_SET (format - 1, FMTDIR_ERROR);
146 goto bad_format;
147 }
148 name_end = format++;
149
150 n = name_end - name_start;
151 if (n == 0)
152 {
153 *invalid_reason = INVALID_EMPTY_VARIABLE ();
154 FDI_SET (format - 1, FMTDIR_ERROR);
155 goto bad_format;
156 }
157 name = XNMALLOC (n + 1, char);
158 memcpy (name, name_start, n);
159 name[n] = '\0';
160 }
161 else if (c_isalpha (*format) || *format == '_')
162 {
163 const char *name_start;
164 const char *name_end;
165 size_t n;
166
167 name_start = format;
168 do
169 format++;
170 while (*format != '\0' && (c_isalnum (*format) || *format == '_'));
171 name_end = format;
172
173 n = name_end - name_start;
174 name = XNMALLOC (n + 1, char);
175 memcpy (name, name_start, n);
176 name[n] = '\0';
177 }
178 else if (*format != '\0')
179 {
180 if (!c_isascii (*format))
181 {
182 *invalid_reason = INVALID_NON_ASCII_VARIABLE ();
183 FDI_SET (format, FMTDIR_ERROR);
184 goto bad_format;
185 }
186 else
187 {
188 *invalid_reason = INVALID_CONTEXT_DEPENDENT_VARIABLE ();
189 FDI_SET (format, FMTDIR_ERROR);
190 goto bad_format;
191 }
192 }
193 else
194 {
195 *invalid_reason = INVALID_UNTERMINATED_DIRECTIVE ();
196 FDI_SET (format - 1, FMTDIR_ERROR);
197 goto bad_format;
198 }
199
200 /* Named argument. */
201 if (named_allocated == spec.named_arg_count)
202 {
203 named_allocated = 2 * named_allocated + 1;
204 spec.named = (struct named_arg *) xrealloc (spec.named, named_allocated * sizeof (struct named_arg));
205 }
206 spec.named[spec.named_arg_count].name = name;
207 spec.named_arg_count++;
208
209 FDI_SET (format - 1, FMTDIR_END);
210 }
211
212 /* Sort the named argument array, and eliminate duplicates. */
213 if (spec.named_arg_count > 1)
214 {
215 unsigned int i, j;
216
217 qsort (spec.named, spec.named_arg_count, sizeof (struct named_arg),
218 named_arg_compare);
219
220 /* Remove duplicates: Copy from i to j, keeping 0 <= j <= i. */
221 for (i = j = 0; i < spec.named_arg_count; i++)
222 if (j > 0 && strcmp (spec.named[i].name, spec.named[j-1].name) == 0)
223 free (spec.named[i].name);
224 else
225 {
226 if (j < i)
227 spec.named[j].name = spec.named[i].name;
228 j++;
229 }
230 spec.named_arg_count = j;
231 }
232
233 result = XMALLOC (struct spec);
234 *result = spec;
235 return result;
236
237 bad_format:
238 if (spec.named != NULL)
239 {
240 unsigned int i;
241 for (i = 0; i < spec.named_arg_count; i++)
242 free (spec.named[i].name);
243 free (spec.named);
244 }
245 return NULL;
246 }
247
248 static void
format_free(void * descr)249 format_free (void *descr)
250 {
251 struct spec *spec = (struct spec *) descr;
252
253 if (spec->named != NULL)
254 {
255 unsigned int i;
256 for (i = 0; i < spec->named_arg_count; i++)
257 free (spec->named[i].name);
258 free (spec->named);
259 }
260 free (spec);
261 }
262
263 static int
format_get_number_of_directives(void * descr)264 format_get_number_of_directives (void *descr)
265 {
266 struct spec *spec = (struct spec *) descr;
267
268 return spec->directives;
269 }
270
271 static bool
format_check(void * msgid_descr,void * msgstr_descr,bool equality,formatstring_error_logger_t error_logger,const char * pretty_msgid,const char * pretty_msgstr)272 format_check (void *msgid_descr, void *msgstr_descr, bool equality,
273 formatstring_error_logger_t error_logger,
274 const char *pretty_msgid, const char *pretty_msgstr)
275 {
276 struct spec *spec1 = (struct spec *) msgid_descr;
277 struct spec *spec2 = (struct spec *) msgstr_descr;
278 bool err = false;
279
280 if (spec1->named_arg_count + spec2->named_arg_count > 0)
281 {
282 unsigned int i, j;
283 unsigned int n1 = spec1->named_arg_count;
284 unsigned int n2 = spec2->named_arg_count;
285
286 /* Check the argument names are the same.
287 Both arrays are sorted. We search for the first difference. */
288 for (i = 0, j = 0; i < n1 || j < n2; )
289 {
290 int cmp = (i >= n1 ? 1 :
291 j >= n2 ? -1 :
292 strcmp (spec1->named[i].name, spec2->named[j].name));
293
294 if (cmp > 0)
295 {
296 if (error_logger)
297 error_logger (_("a format specification for argument '%s', as in '%s', doesn't exist in '%s'"),
298 spec2->named[j].name, pretty_msgstr,
299 pretty_msgid);
300 err = true;
301 break;
302 }
303 else if (cmp < 0)
304 {
305 if (equality)
306 {
307 if (error_logger)
308 error_logger (_("a format specification for argument '%s' doesn't exist in '%s'"),
309 spec1->named[i].name, pretty_msgstr);
310 err = true;
311 break;
312 }
313 else
314 i++;
315 }
316 else
317 j++, i++;
318 }
319 }
320
321 return err;
322 }
323
324
325 struct formatstring_parser formatstring_sh =
326 {
327 format_parse,
328 format_free,
329 format_get_number_of_directives,
330 NULL,
331 format_check
332 };
333
334
335 #ifdef TEST
336
337 /* Test program: Print the argument list specification returned by
338 format_parse for strings read from standard input. */
339
340 #include <stdio.h>
341
342 static void
format_print(void * descr)343 format_print (void *descr)
344 {
345 struct spec *spec = (struct spec *) descr;
346 unsigned int i;
347
348 if (spec == NULL)
349 {
350 printf ("INVALID");
351 return;
352 }
353
354 printf ("{");
355 for (i = 0; i < spec->named_arg_count; i++)
356 {
357 if (i > 0)
358 printf (", ");
359 printf ("'%s'", spec->named[i].name);
360 }
361 printf ("}");
362 }
363
364 int
main()365 main ()
366 {
367 for (;;)
368 {
369 char *line = NULL;
370 size_t line_size = 0;
371 int line_len;
372 char *invalid_reason;
373 void *descr;
374
375 line_len = getline (&line, &line_size, stdin);
376 if (line_len < 0)
377 break;
378 if (line_len > 0 && line[line_len - 1] == '\n')
379 line[--line_len] = '\0';
380
381 invalid_reason = NULL;
382 descr = format_parse (line, false, NULL, &invalid_reason);
383
384 format_print (descr);
385 printf ("\n");
386 if (descr == NULL)
387 printf ("%s\n", invalid_reason);
388
389 free (invalid_reason);
390 free (line);
391 }
392
393 return 0;
394 }
395
396 /*
397 * For Emacs M-x compile
398 * Local Variables:
399 * compile-command: "/bin/sh ../libtool --tag=CC --mode=link gcc -o a.out -static -O -g -Wall -I.. -I../gnulib-lib -I../../gettext-runtime/intl -DHAVE_CONFIG_H -DTEST format-sh.c ../gnulib-lib/libgettextlib.la"
400 * End:
401 */
402
403 #endif /* TEST */
404