1 /* Writing Java .properties files.
2 Copyright (C) 2003, 2005-2009, 2019 Free Software Foundation, Inc.
3 Written by Bruno Haible <bruno@clisp.org>, 2003.
4
5 This program is free software: you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 3 of the License, or
8 (at your option) any later version.
9
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
14
15 You should have received a copy of the GNU General Public License
16 along with this program. If not, see <https://www.gnu.org/licenses/>. */
17
18 #ifdef HAVE_CONFIG_H
19 # include <config.h>
20 #endif
21
22 /* Specification. */
23 #include "write-properties.h"
24
25 #include <errno.h>
26 #include <stdbool.h>
27 #include <stdio.h>
28 #include <stdlib.h>
29 #include <string.h>
30
31 #include <textstyle.h>
32
33 #include "error.h"
34 #include "message.h"
35 #include "msgl-ascii.h"
36 #include "msgl-iconv.h"
37 #include "po-charset.h"
38 #include "unistr.h"
39 #include "write-po.h"
40 #include "xalloc.h"
41
42 /* The format of the Java .properties files is documented in the JDK
43 documentation for class java.util.Properties. In the case of .properties
44 files for PropertyResourceBundle, for each message, the msgid becomes the
45 key (left-hand side) and the msgstr becomes the value (right-hand side)
46 of a "key=value" line. Messages with plurals are not supported in this
47 format. */
48
49 /* Handling of comments: We copy all comments from the PO file to the
50 .properties file. This is not really needed; it's a service for translators
51 who don't like PO files and prefer to maintain the .properties file. */
52
53 /* Converts a string to JAVA encoding (with \uxxxx sequences for non-ASCII
54 characters). */
55 static const char *
conv_to_java(const char * string)56 conv_to_java (const char *string)
57 {
58 /* We cannot use iconv to "JAVA" because not all iconv() implementations
59 know about the "JAVA" encoding. */
60 static const char hexdigit[] = "0123456789abcdef";
61 size_t length;
62 char *result;
63
64 if (is_ascii_string (string))
65 return string;
66
67 length = 0;
68 {
69 const char *str = string;
70 const char *str_limit = str + strlen (str);
71
72 while (str < str_limit)
73 {
74 ucs4_t uc;
75 str += u8_mbtouc (&uc, (const unsigned char *) str, str_limit - str);
76 length += (uc <= 0x007f ? 1 : uc < 0x10000 ? 6 : 12);
77 }
78 }
79
80 result = XNMALLOC (length + 1, char);
81
82 {
83 char *newstr = result;
84 const char *str = string;
85 const char *str_limit = str + strlen (str);
86
87 while (str < str_limit)
88 {
89 ucs4_t uc;
90 str += u8_mbtouc (&uc, (const unsigned char *) str, str_limit - str);
91 if (uc <= 0x007f)
92 /* ASCII characters can be output literally.
93 We could treat non-ASCII ISO-8859-1 characters (0x0080..0x00FF)
94 the same way, but there is no point in doing this; Sun's
95 nativetoascii doesn't do it either. */
96 *newstr++ = uc;
97 else if (uc < 0x10000)
98 {
99 /* Single UCS-2 'char' */
100 sprintf (newstr, "\\u%c%c%c%c",
101 hexdigit[(uc >> 12) & 0x0f], hexdigit[(uc >> 8) & 0x0f],
102 hexdigit[(uc >> 4) & 0x0f], hexdigit[uc & 0x0f]);
103 newstr += 6;
104 }
105 else
106 {
107 /* UTF-16 surrogate: two 'char's. */
108 ucs4_t uc1 = 0xd800 + ((uc - 0x10000) >> 10);
109 ucs4_t uc2 = 0xdc00 + ((uc - 0x10000) & 0x3ff);
110 sprintf (newstr, "\\u%c%c%c%c",
111 hexdigit[(uc1 >> 12) & 0x0f], hexdigit[(uc1 >> 8) & 0x0f],
112 hexdigit[(uc1 >> 4) & 0x0f], hexdigit[uc1 & 0x0f]);
113 newstr += 6;
114 sprintf (newstr, "\\u%c%c%c%c",
115 hexdigit[(uc2 >> 12) & 0x0f], hexdigit[(uc2 >> 8) & 0x0f],
116 hexdigit[(uc2 >> 4) & 0x0f], hexdigit[uc2 & 0x0f]);
117 newstr += 6;
118 }
119 }
120 *newstr = '\0';
121 }
122
123 return result;
124 }
125
126 /* Writes a key or value to the stream, without newline. */
127 static void
write_escaped_string(ostream_t stream,const char * str,bool in_key)128 write_escaped_string (ostream_t stream, const char *str, bool in_key)
129 {
130 static const char hexdigit[] = "0123456789abcdef";
131 const char *str_limit = str + strlen (str);
132 bool first = true;
133
134 while (str < str_limit)
135 {
136 ucs4_t uc;
137 str += u8_mbtouc (&uc, (const unsigned char *) str, str_limit - str);
138 /* Whitespace must be escaped. */
139 if (uc == 0x0020 && (first || in_key))
140 ostream_write_str (stream, "\\ ");
141 else if (uc == 0x0009)
142 ostream_write_str (stream, "\\t");
143 else if (uc == 0x000a)
144 ostream_write_str (stream, "\\n");
145 else if (uc == 0x000d)
146 ostream_write_str (stream, "\\r");
147 else if (uc == 0x000c)
148 ostream_write_str (stream, "\\f");
149 else if (/* Backslash must be escaped. */
150 uc == '\\'
151 /* Possible comment introducers must be escaped. */
152 || uc == '#' || uc == '!'
153 /* Key terminators must be escaped. */
154 || uc == '=' || uc == ':')
155 {
156 char seq[2];
157 seq[0] = '\\';
158 seq[1] = uc;
159 ostream_write_mem (stream, seq, 2);
160 }
161 else if (uc >= 0x0020 && uc <= 0x007e)
162 {
163 /* ASCII characters can be output literally.
164 We could treat non-ASCII ISO-8859-1 characters (0x0080..0x00FF)
165 the same way, but there is no point in doing this; Sun's
166 nativetoascii doesn't do it either. */
167 char seq[1];
168 seq[0] = uc;
169 ostream_write_mem (stream, seq, 1);
170 }
171 else if (uc < 0x10000)
172 {
173 /* Single UCS-2 'char' */
174 char seq[6];
175 seq[0] = '\\';
176 seq[1] = 'u';
177 seq[2] = hexdigit[(uc >> 12) & 0x0f];
178 seq[3] = hexdigit[(uc >> 8) & 0x0f];
179 seq[4] = hexdigit[(uc >> 4) & 0x0f];
180 seq[5] = hexdigit[uc & 0x0f];
181 ostream_write_mem (stream, seq, 6);
182 }
183 else
184 {
185 /* UTF-16 surrogate: two 'char's. */
186 ucs4_t uc1 = 0xd800 + ((uc - 0x10000) >> 10);
187 ucs4_t uc2 = 0xdc00 + ((uc - 0x10000) & 0x3ff);
188 char seq[6];
189 seq[0] = '\\';
190 seq[1] = 'u';
191 seq[2] = hexdigit[(uc1 >> 12) & 0x0f];
192 seq[3] = hexdigit[(uc1 >> 8) & 0x0f];
193 seq[4] = hexdigit[(uc1 >> 4) & 0x0f];
194 seq[5] = hexdigit[uc1 & 0x0f];
195 ostream_write_mem (stream, seq, 6);
196 seq[0] = '\\';
197 seq[1] = 'u';
198 seq[2] = hexdigit[(uc2 >> 12) & 0x0f];
199 seq[3] = hexdigit[(uc2 >> 8) & 0x0f];
200 seq[4] = hexdigit[(uc2 >> 4) & 0x0f];
201 seq[5] = hexdigit[uc2 & 0x0f];
202 ostream_write_mem (stream, seq, 6);
203 }
204 first = false;
205 }
206 }
207
208 /* Writes a message to the stream. */
209 static void
write_message(ostream_t stream,const message_ty * mp,size_t page_width,bool debug)210 write_message (ostream_t stream, const message_ty *mp,
211 size_t page_width, bool debug)
212 {
213 /* Print translator comment if available. */
214 message_print_comment (mp, stream);
215
216 /* Print xgettext extracted comments. */
217 message_print_comment_dot (mp, stream);
218
219 /* Print the file position comments. */
220 message_print_comment_filepos (mp, stream, false, page_width);
221
222 /* Print flag information in special comment. */
223 message_print_comment_flags (mp, stream, debug);
224
225 /* Put a comment mark if the message is the header or untranslated or
226 fuzzy. */
227 if (is_header (mp)
228 || mp->msgstr[0] == '\0'
229 || (mp->is_fuzzy && !is_header (mp)))
230 ostream_write_str (stream, "!");
231
232 /* Now write the untranslated string and the translated string. */
233 write_escaped_string (stream, mp->msgid, true);
234 ostream_write_str (stream, "=");
235 write_escaped_string (stream, mp->msgstr, false);
236
237 ostream_write_str (stream, "\n");
238 }
239
240 /* Writes an entire message list to the stream. */
241 static void
write_properties(ostream_t stream,message_list_ty * mlp,const char * canon_encoding,size_t page_width,bool debug)242 write_properties (ostream_t stream, message_list_ty *mlp,
243 const char *canon_encoding, size_t page_width, bool debug)
244 {
245 bool blank_line;
246 size_t j, i;
247
248 /* Convert the messages to Unicode. */
249 iconv_message_list (mlp, canon_encoding, po_charset_utf8, NULL);
250 for (j = 0; j < mlp->nitems; ++j)
251 {
252 message_ty *mp = mlp->item[j];
253
254 if (mp->comment != NULL)
255 for (i = 0; i < mp->comment->nitems; ++i)
256 mp->comment->item[i] = conv_to_java (mp->comment->item[i]);
257 if (mp->comment_dot != NULL)
258 for (i = 0; i < mp->comment_dot->nitems; ++i)
259 mp->comment_dot->item[i] = conv_to_java (mp->comment_dot->item[i]);
260 }
261
262 /* Loop through the messages. */
263 blank_line = false;
264 for (j = 0; j < mlp->nitems; ++j)
265 {
266 const message_ty *mp = mlp->item[j];
267
268 if (mp->msgid_plural == NULL && !mp->obsolete)
269 {
270 if (blank_line)
271 ostream_write_str (stream, "\n");
272
273 write_message (stream, mp, page_width, debug);
274
275 blank_line = true;
276 }
277 }
278 }
279
280 /* Output the contents of a PO file in Java .properties syntax. */
281 static void
msgdomain_list_print_properties(msgdomain_list_ty * mdlp,ostream_t stream,size_t page_width,bool debug)282 msgdomain_list_print_properties (msgdomain_list_ty *mdlp, ostream_t stream,
283 size_t page_width, bool debug)
284 {
285 message_list_ty *mlp;
286
287 if (mdlp->nitems == 1)
288 mlp = mdlp->item[0]->messages;
289 else
290 mlp = message_list_alloc (false);
291 write_properties (stream, mlp, mdlp->encoding, page_width, debug);
292 }
293
294 /* Describes a PO file in Java .properties syntax. */
295 const struct catalog_output_format output_format_properties =
296 {
297 msgdomain_list_print_properties, /* print */
298 true, /* requires_utf8 */
299 false, /* supports_color */
300 false, /* supports_multiple_domains */
301 false, /* supports_contexts */
302 false, /* supports_plurals */
303 false, /* sorts_obsoletes_to_end */
304 true, /* alternative_is_po */
305 true /* alternative_is_java_class */
306 };
307