1 /* Internationalization Tag Set (ITS) handling
2 Copyright (C) 2015, 2018-2020 Free Software Foundation, Inc.
3
4 This file was written by Daiki Ueno <ueno@gnu.org>, 2015.
5
6 This program is free software: you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3 of the License, or
9 (at your option) any later version.
10
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with this program. If not, see <https://www.gnu.org/licenses/>. */
18
19 #ifdef HAVE_CONFIG_H
20 #include <config.h>
21 #endif
22
23 /* Specification. */
24 #include "its.h"
25
26 #include <assert.h>
27 #include <errno.h>
28 #include "error.h"
29 #include "gettext.h"
30 #include "mem-hash-map.h"
31 #include <stdint.h>
32 #include <libxml/tree.h>
33 #include <libxml/parser.h>
34 #include <libxml/xmlwriter.h>
35 #include <libxml/xpath.h>
36 #include <libxml/xpathInternals.h>
37 #include <stdlib.h>
38 #include "trim.h"
39 #include "xalloc.h"
40 #include "xvasprintf.h"
41
42 #define _(str) gettext (str)
43
44 /* The Internationalization Tag Set (ITS) 2.0 standard is available at:
45 https://www.w3.org/TR/its20/
46
47 This implementation supports only a few data categories, useful for
48 gettext-based projects. Other data categories can be added by
49 extending the its_rule_class_ty class and registering it in
50 init_classes().
51
52 The message extraction is performed in three steps. In the first
53 step, its_rule_list_apply() assigns values to nodes in an XML
54 document. In the second step, its_rule_list_extract_nodes() marks
55 translatable nodes. In the final step,
56 its_rule_list_extract_text() extracts text contents from the marked
57 nodes.
58
59 The values assigned to a node are represented as an array of
60 key-value pairs, where both keys and values are string. The array
61 is stored in node->_private. To retrieve the values for a node,
62 use its_rule_list_eval(). */
63
64 #define ITS_NS "http://www.w3.org/2005/11/its"
65 #define XML_NS "http://www.w3.org/XML/1998/namespace"
66 #define GT_NS "https://www.gnu.org/s/gettext/ns/its/extensions/1.0"
67
68 struct its_value_ty
69 {
70 char *name;
71 char *value;
72 };
73
74 struct its_value_list_ty
75 {
76 struct its_value_ty *items;
77 size_t nitems;
78 size_t nitems_max;
79 };
80
81 static void
its_value_list_append(struct its_value_list_ty * values,const char * name,const char * value)82 its_value_list_append (struct its_value_list_ty *values,
83 const char *name,
84 const char *value)
85 {
86 struct its_value_ty _value;
87
88 _value.name = xstrdup (name);
89 _value.value = xstrdup (value);
90
91 if (values->nitems == values->nitems_max)
92 {
93 values->nitems_max = 2 * values->nitems_max + 1;
94 values->items =
95 xrealloc (values->items,
96 sizeof (struct its_value_ty) * values->nitems_max);
97 }
98 memcpy (&values->items[values->nitems++], &_value,
99 sizeof (struct its_value_ty));
100 }
101
102 static const char *
its_value_list_get_value(struct its_value_list_ty * values,const char * name)103 its_value_list_get_value (struct its_value_list_ty *values,
104 const char *name)
105 {
106 size_t i;
107
108 for (i = 0; i < values->nitems; i++)
109 {
110 struct its_value_ty *value = &values->items[i];
111 if (strcmp (value->name, name) == 0)
112 return value->value;
113 }
114 return NULL;
115 }
116
117 static void
its_value_list_set_value(struct its_value_list_ty * values,const char * name,const char * value)118 its_value_list_set_value (struct its_value_list_ty *values,
119 const char *name,
120 const char *value)
121 {
122 size_t i;
123
124 for (i = 0; i < values->nitems; i++)
125 {
126 struct its_value_ty *_value = &values->items[i];
127 if (strcmp (_value->name, name) == 0)
128 {
129 free (_value->value);
130 _value->value = xstrdup (value);
131 break;
132 }
133 }
134
135 if (i == values->nitems)
136 its_value_list_append (values, name, value);
137 }
138
139 static void
its_value_list_merge(struct its_value_list_ty * values,struct its_value_list_ty * other)140 its_value_list_merge (struct its_value_list_ty *values,
141 struct its_value_list_ty *other)
142 {
143 size_t i;
144
145 for (i = 0; i < other->nitems; i++)
146 {
147 struct its_value_ty *other_value = &other->items[i];
148 size_t j;
149
150 for (j = 0; j < values->nitems; j++)
151 {
152 struct its_value_ty *value = &values->items[j];
153
154 if (strcmp (value->name, other_value->name) == 0
155 && strcmp (value->value, other_value->value) != 0)
156 {
157 free (value->value);
158 value->value = xstrdup (other_value->value);
159 break;
160 }
161 }
162
163 if (j == values->nitems)
164 its_value_list_append (values, other_value->name, other_value->value);
165 }
166 }
167
168 static void
its_value_list_destroy(struct its_value_list_ty * values)169 its_value_list_destroy (struct its_value_list_ty *values)
170 {
171 size_t i;
172
173 for (i = 0; i < values->nitems; i++)
174 {
175 free (values->items[i].name);
176 free (values->items[i].value);
177 }
178 free (values->items);
179 }
180
181 struct its_pool_ty
182 {
183 struct its_value_list_ty *items;
184 size_t nitems;
185 size_t nitems_max;
186 };
187
188 static struct its_value_list_ty *
its_pool_alloc_value_list(struct its_pool_ty * pool)189 its_pool_alloc_value_list (struct its_pool_ty *pool)
190 {
191 struct its_value_list_ty *values;
192
193 if (pool->nitems == pool->nitems_max)
194 {
195 pool->nitems_max = 2 * pool->nitems_max + 1;
196 pool->items =
197 xrealloc (pool->items,
198 sizeof (struct its_value_list_ty) * pool->nitems_max);
199 }
200
201 values = &pool->items[pool->nitems++];
202 memset (values, 0, sizeof (struct its_value_list_ty));
203 return values;
204 }
205
206 static const char *
its_pool_get_value_for_node(struct its_pool_ty * pool,xmlNode * node,const char * name)207 its_pool_get_value_for_node (struct its_pool_ty *pool, xmlNode *node,
208 const char *name)
209 {
210 intptr_t index = (intptr_t) node->_private;
211 if (index > 0)
212 {
213 struct its_value_list_ty *values;
214
215 assert (index <= pool->nitems);
216 values = &pool->items[index - 1];
217
218 return its_value_list_get_value (values, name);
219 }
220 return NULL;
221 }
222
223 static void
its_pool_destroy(struct its_pool_ty * pool)224 its_pool_destroy (struct its_pool_ty *pool)
225 {
226 size_t i;
227
228 for (i = 0; i < pool->nitems; i++)
229 its_value_list_destroy (&pool->items[i]);
230 free (pool->items);
231 }
232
233 struct its_rule_list_ty
234 {
235 struct its_rule_ty **items;
236 size_t nitems;
237 size_t nitems_max;
238
239 struct its_pool_ty pool;
240 };
241
242 struct its_node_list_ty
243 {
244 xmlNode **items;
245 size_t nitems;
246 size_t nitems_max;
247 };
248
249 static void
its_node_list_append(struct its_node_list_ty * nodes,xmlNode * node)250 its_node_list_append (struct its_node_list_ty *nodes,
251 xmlNode *node)
252 {
253 if (nodes->nitems == nodes->nitems_max)
254 {
255 nodes->nitems_max = 2 * nodes->nitems_max + 1;
256 nodes->items =
257 xrealloc (nodes->items, sizeof (xmlNode *) * nodes->nitems_max);
258 }
259 nodes->items[nodes->nitems++] = node;
260 }
261
262 /* Base class representing an ITS rule in global definition. */
263 struct its_rule_class_ty
264 {
265 /* How many bytes to malloc for an instance of this class. */
266 size_t size;
267
268 /* What to do immediately after the instance is malloc()ed. */
269 void (*constructor) (struct its_rule_ty *pop, xmlNode *node);
270
271 /* What to do immediately before the instance is free()ed. */
272 void (*destructor) (struct its_rule_ty *pop);
273
274 /* How to apply the rule to all elements in DOC. */
275 void (* apply) (struct its_rule_ty *pop, struct its_pool_ty *pool,
276 xmlDoc *doc);
277
278 /* How to evaluate the value of NODE according to the rule. */
279 struct its_value_list_ty *(* eval) (struct its_rule_ty *pop,
280 struct its_pool_ty *pool, xmlNode *node);
281 };
282
283 #define ITS_RULE_TY \
284 struct its_rule_class_ty *methods; \
285 char *selector; \
286 struct its_value_list_ty values; \
287 xmlNs **namespaces;
288
289 struct its_rule_ty
290 {
291 ITS_RULE_TY
292 };
293
294 static hash_table classes;
295
296 static void
its_rule_destructor(struct its_rule_ty * pop)297 its_rule_destructor (struct its_rule_ty *pop)
298 {
299 free (pop->selector);
300 its_value_list_destroy (&pop->values);
301 if (pop->namespaces)
302 {
303 size_t i;
304 for (i = 0; pop->namespaces[i] != NULL; i++)
305 xmlFreeNs (pop->namespaces[i]);
306 free (pop->namespaces);
307 }
308 }
309
310 static void
its_rule_apply(struct its_rule_ty * rule,struct its_pool_ty * pool,xmlDoc * doc)311 its_rule_apply (struct its_rule_ty *rule, struct its_pool_ty *pool, xmlDoc *doc)
312 {
313 xmlXPathContext *context;
314 xmlXPathObject *object;
315 size_t i;
316
317 if (!rule->selector)
318 {
319 error (0, 0, _("selector is not specified"));
320 return;
321 }
322
323 context = xmlXPathNewContext (doc);
324 if (!context)
325 {
326 error (0, 0, _("cannot create XPath context"));
327 return;
328 }
329
330 if (rule->namespaces)
331 {
332 size_t i;
333 for (i = 0; rule->namespaces[i] != NULL; i++)
334 {
335 xmlNs *ns = rule->namespaces[i];
336 xmlXPathRegisterNs (context, ns->prefix, ns->href);
337 }
338 }
339
340 object = xmlXPathEval (BAD_CAST rule->selector, context);
341 if (!object)
342 {
343 xmlXPathFreeContext (context);
344 error (0, 0, _("cannot evaluate XPath expression: %s"), rule->selector);
345 return;
346 }
347
348 if (object->nodesetval)
349 {
350 xmlNodeSet *nodes = object->nodesetval;
351 for (i = 0; i < nodes->nodeNr; i++)
352 {
353 xmlNode *node = nodes->nodeTab[i];
354 struct its_value_list_ty *values;
355
356 /* We can't store VALUES in NODE, since the address can
357 change when realloc()ed. */
358 intptr_t index = (intptr_t) node->_private;
359
360 assert (index <= pool->nitems);
361 if (index > 0)
362 values = &pool->items[index - 1];
363 else
364 {
365 values = its_pool_alloc_value_list (pool);
366 node->_private = (void *) pool->nitems;
367 }
368
369 its_value_list_merge (values, &rule->values);
370 }
371 }
372
373 xmlXPathFreeObject (object);
374 xmlXPathFreeContext (context);
375 }
376
377 static char *
_its_get_attribute(xmlNode * node,const char * attr,const char * namespace)378 _its_get_attribute (xmlNode *node, const char *attr, const char *namespace)
379 {
380 xmlChar *value;
381 char *result;
382
383 value = xmlGetNsProp (node, BAD_CAST attr, BAD_CAST namespace);
384
385 result = xstrdup ((const char *) value);
386 xmlFree (value);
387
388 return result;
389 }
390
391 static char *
normalize_whitespace(const char * text,enum its_whitespace_type_ty whitespace)392 normalize_whitespace (const char *text, enum its_whitespace_type_ty whitespace)
393 {
394 switch (whitespace)
395 {
396 case ITS_WHITESPACE_PRESERVE:
397 return xstrdup (text);
398
399 case ITS_WHITESPACE_TRIM:
400 return trim (text);
401
402 case ITS_WHITESPACE_NORMALIZE_PARAGRAPH:
403 /* Normalize whitespaces within the text, keeping paragraph
404 boundaries. */
405 {
406 char *result = xstrdup (text);
407 /* Go through the string, shrinking it, reading from *p++
408 and writing to *out++. (result <= out <= p.) */
409 const char *start_of_paragraph;
410 char *out;
411
412 out = result;
413 for (start_of_paragraph = result; *start_of_paragraph != '\0';)
414 {
415 const char *end_of_paragraph;
416 const char *next_paragraph;
417
418 /* Find the next paragraph boundary. */
419 {
420 const char *p;
421
422 for (p = start_of_paragraph;;)
423 {
424 const char *nl = strchrnul (p, '\n');
425 if (*nl == '\0')
426 {
427 end_of_paragraph = nl;
428 next_paragraph = end_of_paragraph;
429 break;
430 }
431 p = nl + 1;
432 {
433 const char *past_whitespace = p + strspn (p, " \t\n");
434 if (memchr (p, '\n', past_whitespace - p) != NULL)
435 {
436 end_of_paragraph = nl;
437 next_paragraph = past_whitespace;
438 break;
439 }
440 p = past_whitespace;
441 }
442 }
443 }
444
445 /* Normalize whitespaces in the paragraph. */
446 {
447 const char *p;
448
449 /* Remove whitespace at the beginning of the paragraph. */
450 for (p = start_of_paragraph; p < end_of_paragraph; p++)
451 if (!(*p == ' ' || *p == '\t' || *p == '\n'))
452 break;
453
454 for (; p < end_of_paragraph;)
455 {
456 if (*p == ' ' || *p == '\t' || *p == '\n')
457 {
458 /* Normalize whitespace inside the paragraph, and
459 remove whitespace at the end of the paragraph. */
460 do
461 p++;
462 while (p < end_of_paragraph
463 && (*p == ' ' || *p == '\t' || *p == '\n'));
464 if (p < end_of_paragraph)
465 *out++ = ' ';
466 }
467 else
468 *out++ = *p++;
469 }
470 }
471
472 if (*next_paragraph != '\0')
473 {
474 memcpy (out, "\n\n", 2);
475 out += 2;
476 }
477 start_of_paragraph = next_paragraph;
478 }
479 *out = '\0';
480 return result;
481 }
482 default:
483 /* Normalize whitespaces within the text, but do not eliminate whitespace
484 at the beginning nor the end of the text. */
485 {
486 char *result = xstrdup (text);
487 char *out;
488 const char *p;
489
490 out = result;
491 for (p = result; *p != '\0';)
492 {
493 if (*p == ' ' || *p == '\t' || *p == '\n')
494 {
495 do
496 p++;
497 while (*p == ' ' || *p == '\t' || *p == '\n');
498 *out++ = ' ';
499 }
500 else
501 *out++ = *p++;
502 }
503 *out = '\0';
504 return result;
505 }
506 }
507 }
508
509 static char *
_its_encode_special_chars(const char * content,bool is_attribute)510 _its_encode_special_chars (const char *content, bool is_attribute)
511 {
512 const char *str;
513 size_t amount = 0;
514 char *result, *p;
515
516 for (str = content; *str != '\0'; str++)
517 {
518 switch (*str)
519 {
520 case '&':
521 amount += sizeof ("&");
522 break;
523 case '<':
524 amount += sizeof ("<");
525 break;
526 case '>':
527 amount += sizeof (">");
528 break;
529 case '"':
530 if (is_attribute)
531 amount += sizeof (""");
532 else
533 amount += 1;
534 break;
535 default:
536 amount += 1;
537 break;
538 }
539 }
540
541 result = XNMALLOC (amount + 1, char);
542 *result = '\0';
543 p = result;
544 for (str = content; *str != '\0'; str++)
545 {
546 switch (*str)
547 {
548 case '&':
549 p = stpcpy (p, "&");
550 break;
551 case '<':
552 p = stpcpy (p, "<");
553 break;
554 case '>':
555 p = stpcpy (p, ">");
556 break;
557 case '"':
558 if (is_attribute)
559 p = stpcpy (p, """);
560 else
561 *p++ = '"';
562 break;
563 default:
564 *p++ = *str;
565 break;
566 }
567 }
568 *p = '\0';
569 return result;
570 }
571
572 static char *
_its_collect_text_content(xmlNode * node,enum its_whitespace_type_ty whitespace,bool no_escape)573 _its_collect_text_content (xmlNode *node,
574 enum its_whitespace_type_ty whitespace,
575 bool no_escape)
576 {
577 char *buffer = NULL;
578 size_t bufmax = 0;
579 size_t bufpos = 0;
580 xmlNode *n;
581
582 for (n = node->children; n; n = n->next)
583 {
584 char *content = NULL;
585
586 switch (n->type)
587 {
588 case XML_TEXT_NODE:
589 case XML_CDATA_SECTION_NODE:
590 {
591 xmlChar *xcontent = xmlNodeGetContent (n);
592 char *econtent;
593 const char *ccontent;
594
595 /* We can't expect xmlTextWriterWriteString() encode
596 special characters as we write text outside of the
597 element. */
598 if (no_escape)
599 econtent = xstrdup ((const char *) xcontent);
600 else
601 econtent =
602 _its_encode_special_chars ((const char *) xcontent,
603 node->type == XML_ATTRIBUTE_NODE);
604 xmlFree (xcontent);
605
606 /* Skip whitespaces at the beginning of the text, if this
607 is the first node. */
608 ccontent = econtent;
609 if (whitespace == ITS_WHITESPACE_NORMALIZE && !n->prev)
610 ccontent = ccontent + strspn (ccontent, " \t\n");
611 content =
612 normalize_whitespace (ccontent, whitespace);
613 free (econtent);
614
615 /* Skip whitespaces at the end of the text, if this
616 is the last node. */
617 if (whitespace == ITS_WHITESPACE_NORMALIZE && !n->next)
618 {
619 char *p = content + strlen (content);
620 for (; p > content; p--)
621 {
622 int c = *(p - 1);
623 if (!(c == ' ' || c == '\t' || c == '\n'))
624 {
625 *p = '\0';
626 break;
627 }
628 }
629 }
630 }
631 break;
632
633 case XML_ELEMENT_NODE:
634 {
635 xmlOutputBuffer *buffer = xmlAllocOutputBuffer (NULL);
636 xmlTextWriter *writer = xmlNewTextWriter (buffer);
637 char *p = _its_collect_text_content (n, whitespace,
638 no_escape);
639 const char *ccontent;
640
641 xmlTextWriterStartElement (writer, BAD_CAST n->name);
642 if (n->properties)
643 {
644 xmlAttr *attr = n->properties;
645 for (; attr; attr = attr->next)
646 {
647 xmlChar *prop = xmlGetProp (n, attr->name);
648 xmlTextWriterWriteAttribute (writer,
649 attr->name,
650 prop);
651 xmlFree (prop);
652 }
653 }
654 if (*p != '\0')
655 xmlTextWriterWriteRaw (writer, BAD_CAST p);
656 xmlTextWriterEndElement (writer);
657 ccontent = (const char *) xmlOutputBufferGetContent (buffer);
658 content = normalize_whitespace (ccontent, whitespace);
659 xmlFreeTextWriter (writer);
660 free (p);
661 }
662 break;
663
664 case XML_ENTITY_REF_NODE:
665 content = xasprintf ("&%s;", (const char *) n->name);
666 break;
667
668 default:
669 break;
670 }
671
672 if (content != NULL)
673 {
674 size_t length = strlen (content);
675
676 if (bufpos + length + 1 >= bufmax)
677 {
678 bufmax = 2 * bufmax + length + 1;
679 buffer = xrealloc (buffer, bufmax);
680 }
681 strcpy (&buffer[bufpos], content);
682 bufpos += length;
683 }
684 free (content);
685 }
686
687 if (buffer == NULL)
688 buffer = xstrdup ("");
689 return buffer;
690 }
691
692 static void
_its_error_missing_attribute(xmlNode * node,const char * attribute)693 _its_error_missing_attribute (xmlNode *node, const char *attribute)
694 {
695 error (0, 0, _("\"%s\" node does not contain \"%s\""),
696 node->name, attribute);
697 }
698
699 /* Implementation of Translate data category. */
700 static void
its_translate_rule_constructor(struct its_rule_ty * pop,xmlNode * node)701 its_translate_rule_constructor (struct its_rule_ty *pop, xmlNode *node)
702 {
703 char *prop;
704
705 if (!xmlHasProp (node, BAD_CAST "selector"))
706 {
707 _its_error_missing_attribute (node, "selector");
708 return;
709 }
710
711 if (!xmlHasProp (node, BAD_CAST "translate"))
712 {
713 _its_error_missing_attribute (node, "translate");
714 return;
715 }
716
717 prop = _its_get_attribute (node, "selector", NULL);
718 if (prop)
719 pop->selector = prop;
720
721 prop = _its_get_attribute (node, "translate", NULL);
722 its_value_list_append (&pop->values, "translate", prop);
723 free (prop);
724 }
725
726 struct its_value_list_ty *
its_translate_rule_eval(struct its_rule_ty * pop,struct its_pool_ty * pool,xmlNode * node)727 its_translate_rule_eval (struct its_rule_ty *pop, struct its_pool_ty *pool,
728 xmlNode *node)
729 {
730 struct its_value_list_ty *result;
731
732 result = XCALLOC (1, struct its_value_list_ty);
733
734 switch (node->type)
735 {
736 case XML_ATTRIBUTE_NODE:
737 /* Attribute nodes don't inherit from the parent elements. */
738 {
739 const char *value =
740 its_pool_get_value_for_node (pool, node, "translate");
741 if (value != NULL)
742 {
743 its_value_list_set_value (result, "translate", value);
744 return result;
745 }
746
747 /* The default value is translate="no". */
748 its_value_list_append (result, "translate", "no");
749 }
750 break;
751
752 case XML_ELEMENT_NODE:
753 /* Inherit from the parent elements. */
754 {
755 const char *value;
756
757 /* A local attribute overrides the global rule. */
758 if (xmlHasNsProp (node, BAD_CAST "translate", BAD_CAST ITS_NS))
759 {
760 char *prop;
761
762 prop = _its_get_attribute (node, "translate", ITS_NS);
763 its_value_list_append (result, "translate", prop);
764 free (prop);
765 return result;
766 }
767
768 /* Check value for the current node. */
769 value = its_pool_get_value_for_node (pool, node, "translate");
770 if (value != NULL)
771 {
772 its_value_list_set_value (result, "translate", value);
773 return result;
774 }
775
776 /* Recursively check value for the parent node. */
777 if (node->parent == NULL
778 || node->parent->type != XML_ELEMENT_NODE)
779 /* The default value is translate="yes". */
780 its_value_list_append (result, "translate", "yes");
781 else
782 {
783 struct its_value_list_ty *values;
784
785 values = its_translate_rule_eval (pop, pool, node->parent);
786 its_value_list_merge (result, values);
787 its_value_list_destroy (values);
788 free (values);
789 }
790 }
791 break;
792
793 default:
794 break;
795 }
796
797 return result;
798 }
799
800 static struct its_rule_class_ty its_translate_rule_class =
801 {
802 sizeof (struct its_rule_ty),
803 its_translate_rule_constructor,
804 its_rule_destructor,
805 its_rule_apply,
806 its_translate_rule_eval,
807 };
808
809 /* Implementation of Localization Note data category. */
810 static void
its_localization_note_rule_constructor(struct its_rule_ty * pop,xmlNode * node)811 its_localization_note_rule_constructor (struct its_rule_ty *pop, xmlNode *node)
812 {
813 char *prop;
814 xmlNode *n;
815
816 if (!xmlHasProp (node, BAD_CAST "selector"))
817 {
818 _its_error_missing_attribute (node, "selector");
819 return;
820 }
821
822 if (!xmlHasProp (node, BAD_CAST "locNoteType"))
823 {
824 _its_error_missing_attribute (node, "locNoteType");
825 return;
826 }
827
828 prop = _its_get_attribute (node, "selector", NULL);
829 if (prop)
830 pop->selector = prop;
831
832 for (n = node->children; n; n = n->next)
833 {
834 if (n->type == XML_ELEMENT_NODE
835 && xmlStrEqual (n->name, BAD_CAST "locNote")
836 && xmlStrEqual (n->ns->href, BAD_CAST ITS_NS))
837 break;
838 }
839
840 prop = _its_get_attribute (node, "locNoteType", NULL);
841 if (prop)
842 its_value_list_append (&pop->values, "locNoteType", prop);
843 free (prop);
844
845 if (n)
846 {
847 /* FIXME: Respect space attribute. */
848 char *content = _its_collect_text_content (n, ITS_WHITESPACE_NORMALIZE,
849 false);
850 its_value_list_append (&pop->values, "locNote", content);
851 free (content);
852 }
853 else if (xmlHasProp (node, BAD_CAST "locNotePointer"))
854 {
855 prop = _its_get_attribute (node, "locNotePointer", NULL);
856 its_value_list_append (&pop->values, "locNotePointer", prop);
857 free (prop);
858 }
859 /* FIXME: locNoteRef and locNoteRefPointer */
860 }
861
862 struct its_value_list_ty *
its_localization_note_rule_eval(struct its_rule_ty * pop,struct its_pool_ty * pool,xmlNode * node)863 its_localization_note_rule_eval (struct its_rule_ty *pop,
864 struct its_pool_ty *pool,
865 xmlNode *node)
866 {
867 struct its_value_list_ty *result;
868
869 result = XCALLOC (1, struct its_value_list_ty);
870
871 switch (node->type)
872 {
873 case XML_ATTRIBUTE_NODE:
874 /* Attribute nodes don't inherit from the parent elements. */
875 {
876 const char *value;
877
878 value = its_pool_get_value_for_node (pool, node, "locNoteType");
879 if (value != NULL)
880 its_value_list_set_value (result, "locNoteType", value);
881
882 value = its_pool_get_value_for_node (pool, node, "locNote");
883 if (value != NULL)
884 {
885 its_value_list_set_value (result, "locNote", value);
886 return result;
887 }
888
889 value = its_pool_get_value_for_node (pool, node, "locNotePointer");
890 if (value != NULL)
891 {
892 its_value_list_set_value (result, "locNotePointer", value);
893 return result;
894 }
895 }
896 break;
897
898 case XML_ELEMENT_NODE:
899 /* Inherit from the parent elements. */
900 {
901 const char *value;
902
903 /* Local attributes overrides the global rule. */
904 if (xmlHasNsProp (node, BAD_CAST "locNote", BAD_CAST ITS_NS)
905 || xmlHasNsProp (node, BAD_CAST "locNoteRef", BAD_CAST ITS_NS)
906 || xmlHasNsProp (node, BAD_CAST "locNoteType", BAD_CAST ITS_NS))
907 {
908 char *prop;
909
910 if (xmlHasNsProp (node, BAD_CAST "locNote", BAD_CAST ITS_NS))
911 {
912 prop = _its_get_attribute (node, "locNote", ITS_NS);
913 its_value_list_append (result, "locNote", prop);
914 free (prop);
915 }
916
917 /* FIXME: locNoteRef */
918
919 if (xmlHasNsProp (node, BAD_CAST "locNoteType", BAD_CAST ITS_NS))
920 {
921 prop = _its_get_attribute (node, "locNoteType", ITS_NS);
922 its_value_list_append (result, "locNoteType", prop);
923 free (prop);
924 }
925
926 return result;
927 }
928
929 /* Check value for the current node. */
930 value = its_pool_get_value_for_node (pool, node, "locNoteType");
931 if (value != NULL)
932 its_value_list_set_value (result, "locNoteType", value);
933
934 value = its_pool_get_value_for_node (pool, node, "locNote");
935 if (value != NULL)
936 {
937 its_value_list_set_value (result, "locNote", value);
938 return result;
939 }
940
941 value = its_pool_get_value_for_node (pool, node, "locNotePointer");
942 if (value != NULL)
943 {
944 its_value_list_set_value (result, "locNotePointer", value);
945 return result;
946 }
947
948 /* Recursively check value for the parent node. */
949 if (node->parent == NULL
950 || node->parent->type != XML_ELEMENT_NODE)
951 return result;
952 else
953 {
954 struct its_value_list_ty *values;
955
956 values = its_localization_note_rule_eval (pop, pool, node->parent);
957 its_value_list_merge (result, values);
958 its_value_list_destroy (values);
959 free (values);
960 }
961 }
962 break;
963
964 default:
965 break;
966 }
967
968 /* The default value is None. */
969 return result;
970 }
971
972 static struct its_rule_class_ty its_localization_note_rule_class =
973 {
974 sizeof (struct its_rule_ty),
975 its_localization_note_rule_constructor,
976 its_rule_destructor,
977 its_rule_apply,
978 its_localization_note_rule_eval,
979 };
980
981 /* Implementation of Element Within Text data category. */
982 static void
its_element_within_text_rule_constructor(struct its_rule_ty * pop,xmlNode * node)983 its_element_within_text_rule_constructor (struct its_rule_ty *pop,
984 xmlNode *node)
985 {
986 char *prop;
987
988 if (!xmlHasProp (node, BAD_CAST "selector"))
989 {
990 _its_error_missing_attribute (node, "selector");
991 return;
992 }
993
994 if (!xmlHasProp (node, BAD_CAST "withinText"))
995 {
996 _its_error_missing_attribute (node, "withinText");
997 return;
998 }
999
1000 prop = _its_get_attribute (node, "selector", NULL);
1001 if (prop)
1002 pop->selector = prop;
1003
1004 prop = _its_get_attribute (node, "withinText", NULL);
1005 its_value_list_append (&pop->values, "withinText", prop);
1006 free (prop);
1007 }
1008
1009 struct its_value_list_ty *
its_element_within_text_rule_eval(struct its_rule_ty * pop,struct its_pool_ty * pool,xmlNode * node)1010 its_element_within_text_rule_eval (struct its_rule_ty *pop,
1011 struct its_pool_ty *pool,
1012 xmlNode *node)
1013 {
1014 struct its_value_list_ty *result;
1015 const char *value;
1016
1017 result = XCALLOC (1, struct its_value_list_ty);
1018
1019 if (node->type != XML_ELEMENT_NODE)
1020 return result;
1021
1022 /* A local attribute overrides the global rule. */
1023 if (xmlHasNsProp (node, BAD_CAST "withinText", BAD_CAST ITS_NS))
1024 {
1025 char *prop;
1026
1027 prop = _its_get_attribute (node, "withinText", ITS_NS);
1028 its_value_list_append (result, "withinText", prop);
1029 free (prop);
1030 return result;
1031 }
1032
1033 /* Doesn't inherit from the parent elements, and the default value
1034 is None. */
1035 value = its_pool_get_value_for_node (pool, node, "withinText");
1036 if (value != NULL)
1037 its_value_list_set_value (result, "withinText", value);
1038
1039 return result;
1040 }
1041
1042 static struct its_rule_class_ty its_element_within_text_rule_class =
1043 {
1044 sizeof (struct its_rule_ty),
1045 its_element_within_text_rule_constructor,
1046 its_rule_destructor,
1047 its_rule_apply,
1048 its_element_within_text_rule_eval,
1049 };
1050
1051 /* Implementation of Preserve Space data category. */
1052 static void
its_preserve_space_rule_constructor(struct its_rule_ty * pop,xmlNode * node)1053 its_preserve_space_rule_constructor (struct its_rule_ty *pop,
1054 xmlNode *node)
1055 {
1056 char *prop;
1057
1058 if (!xmlHasProp (node, BAD_CAST "selector"))
1059 {
1060 _its_error_missing_attribute (node, "selector");
1061 return;
1062 }
1063
1064 if (!xmlHasProp (node, BAD_CAST "space"))
1065 {
1066 _its_error_missing_attribute (node, "space");
1067 return;
1068 }
1069
1070 prop = _its_get_attribute (node, "selector", NULL);
1071 if (prop)
1072 pop->selector = prop;
1073
1074 prop = _its_get_attribute (node, "space", NULL);
1075 if (prop
1076 && !(strcmp (prop, "preserve") ==0
1077 || strcmp (prop, "default") == 0
1078 /* gettext extension: remove leading/trailing whitespaces only. */
1079 || (node->ns && xmlStrEqual (node->ns->href, BAD_CAST GT_NS)
1080 && strcmp (prop, "trim") == 0)
1081 /* gettext extension: same as default except keeping
1082 paragraph boundaries. */
1083 || (node->ns && xmlStrEqual (node->ns->href, BAD_CAST GT_NS)
1084 && strcmp (prop, "paragraph") == 0)))
1085 {
1086 error (0, 0, _("invalid attribute value \"%s\" for \"%s\""),
1087 prop, "space");
1088 free (prop);
1089 return;
1090 }
1091
1092 its_value_list_append (&pop->values, "space", prop);
1093 free (prop);
1094 }
1095
1096 struct its_value_list_ty *
its_preserve_space_rule_eval(struct its_rule_ty * pop,struct its_pool_ty * pool,xmlNode * node)1097 its_preserve_space_rule_eval (struct its_rule_ty *pop,
1098 struct its_pool_ty *pool,
1099 xmlNode *node)
1100 {
1101 struct its_value_list_ty *result;
1102 struct its_value_list_ty *values;
1103 const char *value;
1104
1105 result = XCALLOC (1, struct its_value_list_ty);
1106
1107 if (node->type != XML_ELEMENT_NODE)
1108 return result;
1109
1110 /* A local attribute overrides the global rule. */
1111 if (xmlHasNsProp (node, BAD_CAST "space", BAD_CAST XML_NS))
1112 {
1113 char *prop;
1114
1115 prop = _its_get_attribute (node, "space", XML_NS);
1116 its_value_list_append (result, "space", prop);
1117 free (prop);
1118 return result;
1119 }
1120
1121 /* Check value for the current node. */
1122 value = its_pool_get_value_for_node (pool, node, "space");
1123 if (value != NULL)
1124 {
1125 its_value_list_set_value (result, "space", value);
1126 return result;
1127 }
1128
1129 if (node->parent == NULL
1130 || node->parent->type != XML_ELEMENT_NODE)
1131 {
1132 /* The default value is space="default". */
1133 its_value_list_append (result, "space", "default");
1134 return result;
1135 }
1136
1137 /* Recursively check value for the parent node. */
1138 values = its_preserve_space_rule_eval (pop, pool, node->parent);
1139 its_value_list_merge (result, values);
1140 its_value_list_destroy (values);
1141 free (values);
1142
1143 return result;
1144 }
1145
1146 static struct its_rule_class_ty its_preserve_space_rule_class =
1147 {
1148 sizeof (struct its_rule_ty),
1149 its_preserve_space_rule_constructor,
1150 its_rule_destructor,
1151 its_rule_apply,
1152 its_preserve_space_rule_eval,
1153 };
1154
1155 /* Implementation of Context data category. */
1156 static void
its_extension_context_rule_constructor(struct its_rule_ty * pop,xmlNode * node)1157 its_extension_context_rule_constructor (struct its_rule_ty *pop, xmlNode *node)
1158 {
1159 char *prop;
1160
1161 if (!xmlHasProp (node, BAD_CAST "selector"))
1162 {
1163 _its_error_missing_attribute (node, "selector");
1164 return;
1165 }
1166
1167 if (!xmlHasProp (node, BAD_CAST "contextPointer"))
1168 {
1169 _its_error_missing_attribute (node, "contextPointer");
1170 return;
1171 }
1172
1173 prop = _its_get_attribute (node, "selector", NULL);
1174 if (prop)
1175 pop->selector = prop;
1176
1177 prop = _its_get_attribute (node, "contextPointer", NULL);
1178 its_value_list_append (&pop->values, "contextPointer", prop);
1179 free (prop);
1180
1181 if (xmlHasProp (node, BAD_CAST "textPointer"))
1182 {
1183 prop = _its_get_attribute (node, "textPointer", NULL);
1184 its_value_list_append (&pop->values, "textPointer", prop);
1185 free (prop);
1186 }
1187 }
1188
1189 struct its_value_list_ty *
its_extension_context_rule_eval(struct its_rule_ty * pop,struct its_pool_ty * pool,xmlNode * node)1190 its_extension_context_rule_eval (struct its_rule_ty *pop,
1191 struct its_pool_ty *pool,
1192 xmlNode *node)
1193 {
1194 struct its_value_list_ty *result;
1195 const char *value;
1196
1197 result = XCALLOC (1, struct its_value_list_ty);
1198
1199 /* Doesn't inherit from the parent elements, and the default value
1200 is None. */
1201 value = its_pool_get_value_for_node (pool, node, "contextPointer");
1202 if (value != NULL)
1203 its_value_list_set_value (result, "contextPointer", value);
1204
1205 value = its_pool_get_value_for_node (pool, node, "textPointer");
1206 if (value != NULL)
1207 its_value_list_set_value (result, "textPointer", value);
1208
1209 return result;
1210 }
1211
1212 static struct its_rule_class_ty its_extension_context_rule_class =
1213 {
1214 sizeof (struct its_rule_ty),
1215 its_extension_context_rule_constructor,
1216 its_rule_destructor,
1217 its_rule_apply,
1218 its_extension_context_rule_eval,
1219 };
1220
1221 /* Implementation of Escape Special Characters data category. */
1222 static void
its_extension_escape_rule_constructor(struct its_rule_ty * pop,xmlNode * node)1223 its_extension_escape_rule_constructor (struct its_rule_ty *pop, xmlNode *node)
1224 {
1225 char *prop;
1226
1227 if (!xmlHasProp (node, BAD_CAST "selector"))
1228 {
1229 _its_error_missing_attribute (node, "selector");
1230 return;
1231 }
1232
1233 if (!xmlHasProp (node, BAD_CAST "escape"))
1234 {
1235 _its_error_missing_attribute (node, "escape");
1236 return;
1237 }
1238
1239 prop = _its_get_attribute (node, "selector", NULL);
1240 if (prop)
1241 pop->selector = prop;
1242
1243 prop = _its_get_attribute (node, "escape", NULL);
1244 its_value_list_append (&pop->values, "escape", prop);
1245 free (prop);
1246 }
1247
1248 struct its_value_list_ty *
its_extension_escape_rule_eval(struct its_rule_ty * pop,struct its_pool_ty * pool,xmlNode * node)1249 its_extension_escape_rule_eval (struct its_rule_ty *pop,
1250 struct its_pool_ty *pool,
1251 xmlNode *node)
1252 {
1253 struct its_value_list_ty *result;
1254
1255 result = XCALLOC (1, struct its_value_list_ty);
1256
1257 switch (node->type)
1258 {
1259 case XML_ATTRIBUTE_NODE:
1260 /* Attribute nodes don't inherit from the parent elements. */
1261 {
1262 const char *value =
1263 its_pool_get_value_for_node (pool, node, "escape");
1264 if (value != NULL)
1265 {
1266 its_value_list_set_value (result, "escape", value);
1267 return result;
1268 }
1269 }
1270 break;
1271
1272 case XML_ELEMENT_NODE:
1273 /* Inherit from the parent elements. */
1274 {
1275 const char *value;
1276
1277 /* Check value for the current node. */
1278 value = its_pool_get_value_for_node (pool, node, "escape");
1279 if (value != NULL)
1280 {
1281 its_value_list_set_value (result, "escape", value);
1282 return result;
1283 }
1284
1285 /* Recursively check value for the parent node. */
1286 if (node->parent != NULL
1287 && node->parent->type == XML_ELEMENT_NODE)
1288 {
1289 struct its_value_list_ty *values;
1290
1291 values = its_extension_escape_rule_eval (pop, pool, node->parent);
1292 its_value_list_merge (result, values);
1293 its_value_list_destroy (values);
1294 free (values);
1295 }
1296 }
1297 break;
1298
1299 default:
1300 break;
1301 }
1302
1303 return result;
1304 }
1305
1306 static struct its_rule_class_ty its_extension_escape_rule_class =
1307 {
1308 sizeof (struct its_rule_ty),
1309 its_extension_escape_rule_constructor,
1310 its_rule_destructor,
1311 its_rule_apply,
1312 its_extension_escape_rule_eval,
1313 };
1314
1315 static struct its_rule_ty *
its_rule_alloc(struct its_rule_class_ty * method_table,xmlNode * node)1316 its_rule_alloc (struct its_rule_class_ty *method_table, xmlNode *node)
1317 {
1318 struct its_rule_ty *pop;
1319
1320 pop = (struct its_rule_ty *) xcalloc (1, method_table->size);
1321 pop->methods = method_table;
1322 if (method_table->constructor)
1323 method_table->constructor (pop, node);
1324 return pop;
1325 }
1326
1327 static struct its_rule_ty *
its_rule_parse(xmlDoc * doc,xmlNode * node)1328 its_rule_parse (xmlDoc *doc, xmlNode *node)
1329 {
1330 const char *name = (const char *) node->name;
1331 void *value;
1332
1333 if (hash_find_entry (&classes, name, strlen (name), &value) == 0)
1334 {
1335 struct its_rule_ty *result;
1336 xmlNs **namespaces;
1337
1338 result = its_rule_alloc ((struct its_rule_class_ty *) value, node);
1339 namespaces = xmlGetNsList (doc, node);
1340 if (namespaces)
1341 {
1342 size_t i;
1343 for (i = 0; namespaces[i] != NULL; i++)
1344 ;
1345 result->namespaces = XCALLOC (i + 1, xmlNs *);
1346 for (i = 0; namespaces[i] != NULL; i++)
1347 result->namespaces[i] = xmlCopyNamespace (namespaces[i]);
1348 }
1349 xmlFree (namespaces);
1350 return result;
1351 }
1352
1353 return NULL;
1354 }
1355
1356 static void
its_rule_destroy(struct its_rule_ty * pop)1357 its_rule_destroy (struct its_rule_ty *pop)
1358 {
1359 if (pop->methods->destructor)
1360 pop->methods->destructor (pop);
1361 }
1362
1363 static void
init_classes(void)1364 init_classes (void)
1365 {
1366 #define ADD_RULE_CLASS(n, c) \
1367 hash_insert_entry (&classes, n, strlen (n), &c);
1368
1369 ADD_RULE_CLASS ("translateRule", its_translate_rule_class);
1370 ADD_RULE_CLASS ("locNoteRule", its_localization_note_rule_class);
1371 ADD_RULE_CLASS ("withinTextRule", its_element_within_text_rule_class);
1372 ADD_RULE_CLASS ("preserveSpaceRule", its_preserve_space_rule_class);
1373 ADD_RULE_CLASS ("contextRule", its_extension_context_rule_class);
1374 ADD_RULE_CLASS ("escapeRule", its_extension_escape_rule_class);
1375
1376 #undef ADD_RULE_CLASS
1377 }
1378
1379 struct its_rule_list_ty *
its_rule_list_alloc(void)1380 its_rule_list_alloc (void)
1381 {
1382 struct its_rule_list_ty *result;
1383
1384 if (classes.table == NULL)
1385 {
1386 hash_init (&classes, 10);
1387 init_classes ();
1388 }
1389
1390 result = XCALLOC (1, struct its_rule_list_ty);
1391 return result;
1392 }
1393
1394 void
its_rule_list_free(struct its_rule_list_ty * rules)1395 its_rule_list_free (struct its_rule_list_ty *rules)
1396 {
1397 size_t i;
1398
1399 for (i = 0; i < rules->nitems; i++)
1400 {
1401 its_rule_destroy (rules->items[i]);
1402 free (rules->items[i]);
1403 }
1404 free (rules->items);
1405 its_pool_destroy (&rules->pool);
1406 }
1407
1408 static bool
its_rule_list_add_from_doc(struct its_rule_list_ty * rules,xmlDoc * doc)1409 its_rule_list_add_from_doc (struct its_rule_list_ty *rules,
1410 xmlDoc *doc)
1411 {
1412 xmlNode *root, *node;
1413
1414 root = xmlDocGetRootElement (doc);
1415 if (!(xmlStrEqual (root->name, BAD_CAST "rules")
1416 && xmlStrEqual (root->ns->href, BAD_CAST ITS_NS)))
1417 {
1418 error (0, 0, _("the root element is not \"rules\""
1419 " under namespace %s"),
1420 ITS_NS);
1421 xmlFreeDoc (doc);
1422 return false;
1423 }
1424
1425 for (node = root->children; node; node = node->next)
1426 {
1427 struct its_rule_ty *rule;
1428
1429 rule = its_rule_parse (doc, node);
1430 if (rule != NULL)
1431 {
1432 if (rules->nitems == rules->nitems_max)
1433 {
1434 rules->nitems_max = 2 * rules->nitems_max + 1;
1435 rules->items =
1436 xrealloc (rules->items,
1437 sizeof (struct its_rule_ty *) * rules->nitems_max);
1438 }
1439 rules->items[rules->nitems++] = rule;
1440 }
1441 }
1442
1443 return true;
1444 }
1445
1446 bool
its_rule_list_add_from_file(struct its_rule_list_ty * rules,const char * filename)1447 its_rule_list_add_from_file (struct its_rule_list_ty *rules,
1448 const char *filename)
1449 {
1450 xmlDoc *doc;
1451 bool result;
1452
1453 doc = xmlReadFile (filename, "utf-8",
1454 XML_PARSE_NONET
1455 | XML_PARSE_NOWARNING
1456 | XML_PARSE_NOBLANKS
1457 | XML_PARSE_NOERROR);
1458 if (doc == NULL)
1459 {
1460 xmlError *err = xmlGetLastError ();
1461 error (0, 0, _("cannot read %s: %s"), filename, err->message);
1462 return false;
1463 }
1464
1465 result = its_rule_list_add_from_doc (rules, doc);
1466 xmlFreeDoc (doc);
1467 return result;
1468 }
1469
1470 bool
its_rule_list_add_from_string(struct its_rule_list_ty * rules,const char * rule)1471 its_rule_list_add_from_string (struct its_rule_list_ty *rules,
1472 const char *rule)
1473 {
1474 xmlDoc *doc;
1475 bool result;
1476
1477 doc = xmlReadMemory (rule, strlen (rule),
1478 "(internal)",
1479 NULL,
1480 XML_PARSE_NONET
1481 | XML_PARSE_NOWARNING
1482 | XML_PARSE_NOBLANKS
1483 | XML_PARSE_NOERROR);
1484 if (doc == NULL)
1485 {
1486 xmlError *err = xmlGetLastError ();
1487 error (0, 0, _("cannot read %s: %s"), "(internal)", err->message);
1488 return false;
1489 }
1490
1491 result = its_rule_list_add_from_doc (rules, doc);
1492 xmlFreeDoc (doc);
1493 return result;
1494 }
1495
1496 static void
its_rule_list_apply(struct its_rule_list_ty * rules,xmlDoc * doc)1497 its_rule_list_apply (struct its_rule_list_ty *rules, xmlDoc *doc)
1498 {
1499 size_t i;
1500
1501 for (i = 0; i < rules->nitems; i++)
1502 {
1503 struct its_rule_ty *rule = rules->items[i];
1504 rule->methods->apply (rule, &rules->pool, doc);
1505 }
1506 }
1507
1508 static struct its_value_list_ty *
its_rule_list_eval(its_rule_list_ty * rules,xmlNode * node)1509 its_rule_list_eval (its_rule_list_ty *rules, xmlNode *node)
1510 {
1511 struct its_value_list_ty *result;
1512 size_t i;
1513
1514 result = XCALLOC (1, struct its_value_list_ty);
1515 for (i = 0; i < rules->nitems; i++)
1516 {
1517 struct its_rule_ty *rule = rules->items[i];
1518 struct its_value_list_ty *values;
1519
1520 values = rule->methods->eval (rule, &rules->pool, node);
1521 its_value_list_merge (result, values);
1522 its_value_list_destroy (values);
1523 free (values);
1524 }
1525
1526 return result;
1527 }
1528
1529 static bool
its_rule_list_is_translatable(its_rule_list_ty * rules,xmlNode * node,int depth)1530 its_rule_list_is_translatable (its_rule_list_ty *rules,
1531 xmlNode *node,
1532 int depth)
1533 {
1534 struct its_value_list_ty *values;
1535 const char *value;
1536 xmlNode *n;
1537
1538 if (node->type != XML_ELEMENT_NODE
1539 && node->type != XML_ATTRIBUTE_NODE)
1540 return false;
1541
1542 values = its_rule_list_eval (rules, node);
1543
1544 /* Check if NODE has translate="yes". */
1545 value = its_value_list_get_value (values, "translate");
1546 if (!(value && strcmp (value, "yes") == 0))
1547 {
1548 its_value_list_destroy (values);
1549 free (values);
1550 return false;
1551 }
1552
1553 /* Check if NODE has withinText="yes", if NODE is not top-level. */
1554 if (depth > 0)
1555 {
1556 value = its_value_list_get_value (values, "withinText");
1557 if (!(value && strcmp (value, "yes") == 0))
1558 {
1559 its_value_list_destroy (values);
1560 free (values);
1561 return false;
1562 }
1563 }
1564
1565 its_value_list_destroy (values);
1566 free (values);
1567
1568 for (n = node->children; n; n = n->next)
1569 {
1570 switch (n->type)
1571 {
1572 case XML_ELEMENT_NODE:
1573 if (!its_rule_list_is_translatable (rules, n, depth + 1))
1574 return false;
1575 break;
1576
1577 case XML_TEXT_NODE:
1578 case XML_CDATA_SECTION_NODE:
1579 case XML_ENTITY_REF_NODE:
1580 case XML_COMMENT_NODE:
1581 break;
1582
1583 default:
1584 return false;
1585 }
1586 }
1587
1588 return true;
1589 }
1590
1591 static void
its_rule_list_extract_nodes(its_rule_list_ty * rules,struct its_node_list_ty * nodes,xmlNode * node)1592 its_rule_list_extract_nodes (its_rule_list_ty *rules,
1593 struct its_node_list_ty *nodes,
1594 xmlNode *node)
1595 {
1596 if (node->type == XML_ELEMENT_NODE)
1597 {
1598 xmlNode *n;
1599
1600 if (node->properties)
1601 {
1602 xmlAttr *attr = node->properties;
1603 for (; attr; attr = attr->next)
1604 {
1605 xmlNode *n = (xmlNode *) attr;
1606 if (its_rule_list_is_translatable (rules, n, 0))
1607 its_node_list_append (nodes, n);
1608 }
1609 }
1610
1611 if (its_rule_list_is_translatable (rules, node, 0))
1612 its_node_list_append (nodes, node);
1613 else
1614 {
1615 for (n = node->children; n; n = n->next)
1616 its_rule_list_extract_nodes (rules, nodes, n);
1617 }
1618 }
1619 }
1620
1621 static char *
_its_get_content(struct its_rule_list_ty * rules,xmlNode * node,const char * pointer,enum its_whitespace_type_ty whitespace,bool no_escape)1622 _its_get_content (struct its_rule_list_ty *rules, xmlNode *node,
1623 const char *pointer,
1624 enum its_whitespace_type_ty whitespace,
1625 bool no_escape)
1626 {
1627 xmlXPathContext *context;
1628 xmlXPathObject *object;
1629 size_t i;
1630 char *result = NULL;
1631
1632 context = xmlXPathNewContext (node->doc);
1633 if (!context)
1634 {
1635 error (0, 0, _("cannot create XPath context"));
1636 return NULL;
1637 }
1638
1639 for (i = 0; i < rules->nitems; i++)
1640 {
1641 struct its_rule_ty *rule = rules->items[i];
1642 if (rule->namespaces)
1643 {
1644 size_t i;
1645 for (i = 0; rule->namespaces[i] != NULL; i++)
1646 {
1647 xmlNs *ns = rule->namespaces[i];
1648 xmlXPathRegisterNs (context, ns->prefix, ns->href);
1649 }
1650 }
1651 }
1652
1653 xmlXPathSetContextNode (node, context);
1654 object = xmlXPathEvalExpression (BAD_CAST pointer, context);
1655 if (!object)
1656 {
1657 xmlXPathFreeContext (context);
1658 error (0, 0, _("cannot evaluate XPath location path: %s"),
1659 pointer);
1660 return NULL;
1661 }
1662
1663 switch (object->type)
1664 {
1665 case XPATH_NODESET:
1666 {
1667 xmlNodeSet *nodes = object->nodesetval;
1668 string_list_ty sl;
1669 size_t i;
1670
1671 string_list_init (&sl);
1672 for (i = 0; i < nodes->nodeNr; i++)
1673 {
1674 char *content = _its_collect_text_content (nodes->nodeTab[i],
1675 whitespace,
1676 no_escape);
1677 string_list_append (&sl, content);
1678 free (content);
1679 }
1680 result = string_list_concat (&sl);
1681 string_list_destroy (&sl);
1682 }
1683 break;
1684
1685 case XPATH_STRING:
1686 result = xstrdup ((const char *) object->stringval);
1687 break;
1688
1689 default:
1690 break;
1691 }
1692
1693 xmlXPathFreeObject (object);
1694 xmlXPathFreeContext (context);
1695
1696 return result;
1697 }
1698
1699 static void
_its_comment_append(string_list_ty * comments,const char * data)1700 _its_comment_append (string_list_ty *comments, const char *data)
1701 {
1702 /* Split multiline comment into lines, and remove leading and trailing
1703 whitespace. */
1704 char *copy = xstrdup (data);
1705 char *p;
1706 char *q;
1707
1708 for (p = copy; (q = strchr (p, '\n')) != NULL; p = q + 1)
1709 {
1710 while (p[0] == ' ' || p[0] == '\t')
1711 p++;
1712 while (q > p && (q[-1] == ' ' || q[-1] == '\t'))
1713 q--;
1714 *q = '\0';
1715 string_list_append (comments, p);
1716 }
1717 q = p + strlen (p);
1718 while (p[0] == ' ' || p[0] == '\t')
1719 p++;
1720 while (q > p && (q[-1] == ' ' || q[-1] == '\t'))
1721 q--;
1722 *q = '\0';
1723 string_list_append (comments, p);
1724 free (copy);
1725 }
1726
1727 static void
its_rule_list_extract_text(its_rule_list_ty * rules,xmlNode * node,const char * logical_filename,flag_context_list_table_ty * flag_table,message_list_ty * mlp,its_extract_callback_ty callback)1728 its_rule_list_extract_text (its_rule_list_ty *rules,
1729 xmlNode *node,
1730 const char *logical_filename,
1731 flag_context_list_table_ty *flag_table,
1732 message_list_ty *mlp,
1733 its_extract_callback_ty callback)
1734 {
1735 if (node->type == XML_ELEMENT_NODE
1736 || node->type == XML_ATTRIBUTE_NODE)
1737 {
1738 struct its_value_list_ty *values;
1739 const char *value;
1740 char *msgid = NULL, *msgctxt = NULL, *comment = NULL;
1741 enum its_whitespace_type_ty whitespace;
1742 bool no_escape;
1743
1744 values = its_rule_list_eval (rules, node);
1745
1746 value = its_value_list_get_value (values, "locNote");
1747 if (value)
1748 comment = xstrdup (value);
1749 else
1750 {
1751 value = its_value_list_get_value (values, "escape");
1752 no_escape = value != NULL && strcmp (value, "no") == 0;
1753
1754 value = its_value_list_get_value (values, "locNotePointer");
1755 if (value)
1756 comment = _its_get_content (rules, node, value, ITS_WHITESPACE_TRIM,
1757 no_escape);
1758 }
1759
1760 if (comment != NULL && *comment != '\0')
1761 {
1762 string_list_ty comments;
1763 char *tmp;
1764
1765 string_list_init (&comments);
1766 _its_comment_append (&comments, comment);
1767 tmp = string_list_join (&comments, "\n", '\0', false);
1768 free (comment);
1769 comment = tmp;
1770 }
1771 else
1772 /* Extract comments preceding the node. */
1773 {
1774 xmlNode *sibling;
1775 string_list_ty comments;
1776
1777 string_list_init (&comments);
1778 for (sibling = node->prev; sibling; sibling = sibling->prev)
1779 if (sibling->type != XML_COMMENT_NODE || sibling->prev == NULL)
1780 break;
1781 if (sibling)
1782 {
1783 if (sibling->type != XML_COMMENT_NODE)
1784 sibling = sibling->next;
1785 for (; sibling && sibling->type == XML_COMMENT_NODE;
1786 sibling = sibling->next)
1787 {
1788 xmlChar *content = xmlNodeGetContent (sibling);
1789 _its_comment_append (&comments, (const char *) content);
1790 xmlFree (content);
1791 }
1792 free (comment);
1793 comment = string_list_join (&comments, "\n", '\0', false);
1794 string_list_destroy (&comments);
1795 }
1796 }
1797
1798 value = its_value_list_get_value (values, "space");
1799 if (value && strcmp (value, "preserve") == 0)
1800 whitespace = ITS_WHITESPACE_PRESERVE;
1801 else if (value && strcmp (value, "trim") == 0)
1802 whitespace = ITS_WHITESPACE_TRIM;
1803 else if (value && strcmp (value, "paragraph") == 0)
1804 whitespace = ITS_WHITESPACE_NORMALIZE_PARAGRAPH;
1805 else
1806 whitespace = ITS_WHITESPACE_NORMALIZE;
1807
1808 value = its_value_list_get_value (values, "escape");
1809 no_escape = value != NULL && strcmp (value, "no") == 0;
1810
1811 value = its_value_list_get_value (values, "contextPointer");
1812 if (value)
1813 msgctxt = _its_get_content (rules, node, value, ITS_WHITESPACE_PRESERVE,
1814 no_escape);
1815
1816 value = its_value_list_get_value (values, "textPointer");
1817 if (value)
1818 msgid = _its_get_content (rules, node, value, ITS_WHITESPACE_PRESERVE,
1819 no_escape);
1820 its_value_list_destroy (values);
1821 free (values);
1822
1823 if (msgid == NULL)
1824 msgid = _its_collect_text_content (node, whitespace, no_escape);
1825 if (*msgid != '\0')
1826 {
1827 lex_pos_ty pos;
1828 char *marker;
1829
1830 pos.file_name = xstrdup (logical_filename);
1831 pos.line_number = xmlGetLineNo (node);
1832
1833 if (node->type == XML_ELEMENT_NODE)
1834 {
1835 assert (node->parent);
1836 marker = xasprintf ("%s/%s", node->parent->name, node->name);
1837 }
1838 else
1839 {
1840 assert (node->parent && node->parent->parent);
1841 marker = xasprintf ("%s/%s@%s",
1842 node->parent->parent->name,
1843 node->parent->name,
1844 node->name);
1845 }
1846
1847 if (msgctxt != NULL && *msgctxt == '\0')
1848 {
1849 free (msgctxt);
1850 msgctxt = NULL;
1851 }
1852
1853 callback (mlp, msgctxt, msgid, &pos, comment, marker, whitespace);
1854 free (marker);
1855 }
1856 free (msgctxt);
1857 free (msgid);
1858 free (comment);
1859 }
1860 }
1861
1862 void
its_rule_list_extract(its_rule_list_ty * rules,FILE * fp,const char * real_filename,const char * logical_filename,flag_context_list_table_ty * flag_table,msgdomain_list_ty * mdlp,its_extract_callback_ty callback)1863 its_rule_list_extract (its_rule_list_ty *rules,
1864 FILE *fp, const char *real_filename,
1865 const char *logical_filename,
1866 flag_context_list_table_ty *flag_table,
1867 msgdomain_list_ty *mdlp,
1868 its_extract_callback_ty callback)
1869 {
1870 xmlDoc *doc;
1871 struct its_node_list_ty nodes;
1872 size_t i;
1873
1874 doc = xmlReadFd (fileno (fp), logical_filename, NULL,
1875 XML_PARSE_NONET
1876 | XML_PARSE_NOWARNING
1877 | XML_PARSE_NOBLANKS
1878 | XML_PARSE_NOERROR);
1879 if (doc == NULL)
1880 {
1881 xmlError *err = xmlGetLastError ();
1882 error (0, 0, _("cannot read %s: %s"), logical_filename, err->message);
1883 return;
1884 }
1885
1886 its_rule_list_apply (rules, doc);
1887
1888 memset (&nodes, 0, sizeof (struct its_node_list_ty));
1889 its_rule_list_extract_nodes (rules,
1890 &nodes,
1891 xmlDocGetRootElement (doc));
1892
1893 for (i = 0; i < nodes.nitems; i++)
1894 its_rule_list_extract_text (rules, nodes.items[i],
1895 logical_filename,
1896 flag_table,
1897 mdlp->item[0]->messages,
1898 callback);
1899
1900 free (nodes.items);
1901 xmlFreeDoc (doc);
1902 }
1903
1904 struct its_merge_context_ty
1905 {
1906 its_rule_list_ty *rules;
1907 xmlDoc *doc;
1908 struct its_node_list_ty nodes;
1909 };
1910
1911 static void
its_merge_context_merge_node(struct its_merge_context_ty * context,xmlNode * node,const char * language,message_list_ty * mlp)1912 its_merge_context_merge_node (struct its_merge_context_ty *context,
1913 xmlNode *node,
1914 const char *language,
1915 message_list_ty *mlp)
1916 {
1917 if (node->type == XML_ELEMENT_NODE)
1918 {
1919 struct its_value_list_ty *values;
1920 const char *value;
1921 char *msgid = NULL, *msgctxt = NULL;
1922 enum its_whitespace_type_ty whitespace;
1923 bool no_escape;
1924
1925 values = its_rule_list_eval (context->rules, node);
1926
1927 value = its_value_list_get_value (values, "space");
1928 if (value && strcmp (value, "preserve") == 0)
1929 whitespace = ITS_WHITESPACE_PRESERVE;
1930 else if (value && strcmp (value, "trim") == 0)
1931 whitespace = ITS_WHITESPACE_TRIM;
1932 else if (value && strcmp (value, "paragraph") == 0)
1933 whitespace = ITS_WHITESPACE_NORMALIZE_PARAGRAPH;
1934 else
1935 whitespace = ITS_WHITESPACE_NORMALIZE;
1936
1937 value = its_value_list_get_value (values, "escape");
1938 no_escape = value != NULL && strcmp (value, "no") == 0;
1939
1940 value = its_value_list_get_value (values, "contextPointer");
1941 if (value)
1942 msgctxt = _its_get_content (context->rules, node, value,
1943 ITS_WHITESPACE_PRESERVE, no_escape);
1944
1945 value = its_value_list_get_value (values, "textPointer");
1946 if (value)
1947 msgid = _its_get_content (context->rules, node, value,
1948 ITS_WHITESPACE_PRESERVE, no_escape);
1949 its_value_list_destroy (values);
1950 free (values);
1951
1952 if (msgid == NULL)
1953 msgid = _its_collect_text_content (node, whitespace, no_escape);
1954 if (*msgid != '\0')
1955 {
1956 message_ty *mp;
1957
1958 mp = message_list_search (mlp, msgctxt, msgid);
1959 if (mp && *mp->msgstr != '\0')
1960 {
1961 xmlNode *translated;
1962
1963 translated = xmlNewNode (node->ns, node->name);
1964 xmlSetProp (translated, BAD_CAST "xml:lang", BAD_CAST language);
1965
1966 xmlNodeAddContent (translated, BAD_CAST mp->msgstr);
1967 xmlAddNextSibling (node, translated);
1968 }
1969 }
1970 free (msgctxt);
1971 free (msgid);
1972 }
1973 }
1974
1975 void
its_merge_context_merge(its_merge_context_ty * context,const char * language,message_list_ty * mlp)1976 its_merge_context_merge (its_merge_context_ty *context,
1977 const char *language,
1978 message_list_ty *mlp)
1979 {
1980 size_t i;
1981
1982 for (i = 0; i < context->nodes.nitems; i++)
1983 its_merge_context_merge_node (context, context->nodes.items[i],
1984 language,
1985 mlp);
1986 }
1987
1988 struct its_merge_context_ty *
its_merge_context_alloc(its_rule_list_ty * rules,const char * filename)1989 its_merge_context_alloc (its_rule_list_ty *rules,
1990 const char *filename)
1991 {
1992 xmlDoc *doc;
1993 struct its_merge_context_ty *result;
1994
1995 doc = xmlReadFile (filename, NULL,
1996 XML_PARSE_NONET
1997 | XML_PARSE_NOWARNING
1998 | XML_PARSE_NOBLANKS
1999 | XML_PARSE_NOERROR);
2000 if (doc == NULL)
2001 {
2002 xmlError *err = xmlGetLastError ();
2003 error (0, 0, _("cannot read %s: %s"), filename, err->message);
2004 return NULL;
2005 }
2006
2007 its_rule_list_apply (rules, doc);
2008
2009 result = XMALLOC (struct its_merge_context_ty);
2010 result->rules = rules;
2011 result->doc = doc;
2012
2013 /* Collect translatable nodes. */
2014 memset (&result->nodes, 0, sizeof (struct its_node_list_ty));
2015 its_rule_list_extract_nodes (result->rules,
2016 &result->nodes,
2017 xmlDocGetRootElement (result->doc));
2018
2019 return result;
2020 }
2021
2022 void
its_merge_context_write(struct its_merge_context_ty * context,FILE * fp)2023 its_merge_context_write (struct its_merge_context_ty *context,
2024 FILE *fp)
2025 {
2026 xmlDocFormatDump (fp, context->doc, 1);
2027 }
2028
2029 void
its_merge_context_free(struct its_merge_context_ty * context)2030 its_merge_context_free (struct its_merge_context_ty *context)
2031 {
2032 xmlFreeDoc (context->doc);
2033 free (context->nodes.items);
2034 free (context);
2035 }
2036