• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 #include "XLIFFFile.h"
2 
3 #include <algorithm>
4 #include <sys/time.h>
5 #include <time.h>
6 #include <cstdio>
7 
8 const char* const XLIFF_XMLNS = "urn:oasis:names:tc:xliff:document:1.2";
9 
10 const char *const NS_MAP[] = {
11     "", XLIFF_XMLNS,
12     "xml", XMLNS_XMLNS,
13     NULL, NULL
14 };
15 
16 const XMLNamespaceMap XLIFF_NAMESPACES(NS_MAP);
17 
18 int
Compare(const XLIFFFile::File & that) const19 XLIFFFile::File::Compare(const XLIFFFile::File& that) const
20 {
21     if (filename != that.filename) {
22         return filename < that.filename ? -1 : 1;
23     }
24     return 0;
25 }
26 
27 // =====================================================================================
XLIFFFile()28 XLIFFFile::XLIFFFile()
29 {
30 }
31 
~XLIFFFile()32 XLIFFFile::~XLIFFFile()
33 {
34 }
35 
36 static XMLNode*
get_unique_node(const XMLNode * parent,const string & ns,const string & name,bool required)37 get_unique_node(const XMLNode* parent, const string& ns, const string& name, bool required)
38 {
39     size_t count = parent->CountElementsByName(ns, name);
40     if (count == 1) {
41         return parent->GetElementByNameAt(ns, name, 0);
42     } else {
43         if (required) {
44             SourcePos pos = count == 0
45                                 ? parent->Position()
46                                 : parent->GetElementByNameAt(XLIFF_XMLNS, name, 1)->Position();
47             pos.Error("<%s> elements must contain exactly one <%s> element",
48                                 parent->Name().c_str(), name.c_str());
49         }
50         return NULL;
51     }
52 }
53 
54 XLIFFFile*
Parse(const string & filename)55 XLIFFFile::Parse(const string& filename)
56 {
57     XLIFFFile* result = new XLIFFFile();
58 
59     XMLNode* root = NodeHandler::ParseFile(filename, XMLNode::PRETTY);
60     if (root == NULL) {
61         return NULL;
62     }
63 
64     // <file>
65     vector<XMLNode*> files = root->GetElementsByName(XLIFF_XMLNS, "file");
66     for (size_t i=0; i<files.size(); i++) {
67         XMLNode* file = files[i];
68 
69         string datatype = file->GetAttribute("", "datatype", "");
70         string originalFile = file->GetAttribute("", "original", "");
71 
72         Configuration sourceConfig;
73         sourceConfig.locale = file->GetAttribute("", "source-language", "");
74         result->m_sourceConfig = sourceConfig;
75 
76         Configuration targetConfig;
77         targetConfig.locale = file->GetAttribute("", "target-language", "");
78         result->m_targetConfig = targetConfig;
79 
80         result->m_currentVersion = file->GetAttribute("", "build-num", "");
81         result->m_oldVersion = "old";
82 
83         // <body>
84         XMLNode* body = get_unique_node(file, XLIFF_XMLNS, "body", true);
85         if (body == NULL) continue;
86 
87         // <trans-unit>
88         vector<XMLNode*> transUnits = body->GetElementsByName(XLIFF_XMLNS, "trans-unit");
89         for (size_t j=0; j<transUnits.size(); j++) {
90             XMLNode* transUnit = transUnits[j];
91 
92             string rawID = transUnit->GetAttribute("", "id", "");
93             if (rawID == "") {
94                 transUnit->Position().Error("<trans-unit> tag requires an id");
95                 continue;
96             }
97             string id;
98             int index;
99 
100             if (!StringResource::ParseTypedID(rawID, &id, &index)) {
101                 transUnit->Position().Error("<trans-unit> has invalid id '%s'\n", rawID.c_str());
102                 continue;
103             }
104 
105             // <source>
106             XMLNode* source = get_unique_node(transUnit, XLIFF_XMLNS, "source", false);
107             if (source != NULL) {
108                 XMLNode* node = source->Clone();
109                 node->SetPrettyRecursive(XMLNode::EXACT);
110                 result->AddStringResource(StringResource(source->Position(), originalFile,
111                             sourceConfig, id, index, node, CURRENT_VERSION,
112                             result->m_currentVersion));
113             }
114 
115             // <target>
116             XMLNode* target = get_unique_node(transUnit, XLIFF_XMLNS, "target", false);
117             if (target != NULL) {
118                 XMLNode* node = target->Clone();
119                 node->SetPrettyRecursive(XMLNode::EXACT);
120                 result->AddStringResource(StringResource(target->Position(), originalFile,
121                             targetConfig, id, index, node, CURRENT_VERSION,
122                             result->m_currentVersion));
123             }
124 
125             // <alt-trans>
126             XMLNode* altTrans = get_unique_node(transUnit, XLIFF_XMLNS, "alt-trans", false);
127             if (altTrans != NULL) {
128                 // <source>
129                 XMLNode* altSource = get_unique_node(altTrans, XLIFF_XMLNS, "source", false);
130                 if (altSource != NULL) {
131                     XMLNode* node = altSource->Clone();
132                     node->SetPrettyRecursive(XMLNode::EXACT);
133                     result->AddStringResource(StringResource(altSource->Position(),
134                                 originalFile, sourceConfig, id, index, node, OLD_VERSION,
135                                 result->m_oldVersion));
136                 }
137 
138                 // <target>
139                 XMLNode* altTarget = get_unique_node(altTrans, XLIFF_XMLNS, "target", false);
140                 if (altTarget != NULL) {
141                     XMLNode* node = altTarget->Clone();
142                     node->SetPrettyRecursive(XMLNode::EXACT);
143                     result->AddStringResource(StringResource(altTarget->Position(),
144                                 originalFile, targetConfig, id, index, node, OLD_VERSION,
145                                 result->m_oldVersion));
146                 }
147             }
148         }
149     }
150     delete root;
151     return result;
152 }
153 
154 XLIFFFile*
Create(const Configuration & sourceConfig,const Configuration & targetConfig,const string & currentVersion)155 XLIFFFile::Create(const Configuration& sourceConfig, const Configuration& targetConfig,
156                                 const string& currentVersion)
157 {
158     XLIFFFile* result = new XLIFFFile();
159         result->m_sourceConfig = sourceConfig;
160         result->m_targetConfig = targetConfig;
161         result->m_currentVersion = currentVersion;
162     return result;
163 }
164 
165 set<string>
Files() const166 XLIFFFile::Files() const
167 {
168     set<string> result;
169     for (vector<File>::const_iterator f = m_files.begin(); f != m_files.end(); f++) {
170         result.insert(f->filename);
171     }
172     return result;
173 }
174 
175 void
AddStringResource(const StringResource & str)176 XLIFFFile::AddStringResource(const StringResource& str)
177 {
178     string id = str.TypedID();
179 
180     File* f = NULL;
181     const size_t I = m_files.size();
182     for (size_t i=0; i<I; i++) {
183         if (m_files[i].filename == str.file) {
184             f = &m_files[i];
185             break;
186         }
187     }
188     if (f == NULL) {
189         File file;
190         file.filename = str.file;
191         m_files.push_back(file);
192         f = &m_files[I];
193     }
194 
195     const size_t J = f->transUnits.size();
196     TransUnit* g = NULL;
197     for (size_t j=0; j<J; j++) {
198         if (f->transUnits[j].id == id) {
199             g = &f->transUnits[j];
200         }
201     }
202     if (g == NULL) {
203         TransUnit group;
204         group.id = id;
205         f->transUnits.push_back(group);
206         g = &f->transUnits[J];
207     }
208 
209     StringResource* res = find_string_res(*g, str);
210     if (res == NULL) {
211         return ;
212     }
213     if (res->id != "") {
214         str.pos.Error("Duplicate string resource: %s", res->id.c_str());
215         res->pos.Error("Previous definition here");
216         return ;
217     }
218     *res = str;
219 
220     m_strings.insert(str);
221 }
222 
223 void
Filter(bool (* func)(const string &,const TransUnit &,void *),void * cookie)224 XLIFFFile::Filter(bool (*func)(const string&,const TransUnit&,void*), void* cookie)
225 {
226     const size_t I = m_files.size();
227     for (size_t ix=0, i=I-1; ix<I; ix++, i--) {
228         File& file = m_files[i];
229 
230         const size_t J = file.transUnits.size();
231         for (size_t jx=0, j=J-1; jx<J; jx++, j--) {
232             TransUnit& tu = file.transUnits[j];
233 
234             bool keep = func(file.filename, tu, cookie);
235             if (!keep) {
236                 if (tu.source.id != "") {
237                     m_strings.erase(tu.source);
238                 }
239                 if (tu.target.id != "") {
240                     m_strings.erase(tu.target);
241                 }
242                 if (tu.altSource.id != "") {
243                     m_strings.erase(tu.altSource);
244                 }
245                 if (tu.altTarget.id != "") {
246                     m_strings.erase(tu.altTarget);
247                 }
248                 file.transUnits.erase(file.transUnits.begin()+j);
249             }
250         }
251         if (file.transUnits.size() == 0) {
252             m_files.erase(m_files.begin()+i);
253         }
254     }
255 }
256 
257 void
Map(void (* func)(const string &,TransUnit *,void *),void * cookie)258 XLIFFFile::Map(void (*func)(const string&,TransUnit*,void*), void* cookie)
259 {
260     const size_t I = m_files.size();
261     for (size_t i=0; i<I; i++) {
262         File& file = m_files[i];
263 
264         const size_t J = file.transUnits.size();
265         for (size_t j=0; j<J; j++) {
266             func(file.filename, &(file.transUnits[j]), cookie);
267         }
268     }
269 }
270 
271 TransUnit*
EditTransUnit(const string & filename,const string & id)272 XLIFFFile::EditTransUnit(const string& filename, const string& id)
273 {
274     const size_t I = m_files.size();
275     for (size_t ix=0, i=I-1; ix<I; ix++, i--) {
276         File& file = m_files[i];
277         if (file.filename == filename) {
278             const size_t J = file.transUnits.size();
279             for (size_t jx=0, j=J-1; jx<J; jx++, j--) {
280                 TransUnit& tu = file.transUnits[j];
281                 if (tu.id == id) {
282                     return &tu;
283                 }
284             }
285         }
286     }
287     return NULL;
288 }
289 
290 StringResource*
find_string_res(TransUnit & g,const StringResource & str)291 XLIFFFile::find_string_res(TransUnit& g, const StringResource& str)
292 {
293     int index;
294     if (str.version == CURRENT_VERSION) {
295         index = 0;
296     }
297     else if (str.version == OLD_VERSION) {
298         index = 2;
299     }
300     else {
301         str.pos.Error("Internal Error %s:%d\n", __FILE__, __LINE__);
302         return NULL;
303     }
304     if (str.config == m_sourceConfig) {
305         // index += 0;
306     }
307     else if (str.config == m_targetConfig) {
308         index += 1;
309     }
310     else {
311         str.pos.Error("unknown config for string %s: %s", str.id.c_str(),
312                             str.config.ToString().c_str());
313         return NULL;
314     }
315     switch (index) {
316         case 0:
317             return &g.source;
318         case 1:
319             return &g.target;
320         case 2:
321             return &g.altSource;
322         case 3:
323             return &g.altTarget;
324     }
325     str.pos.Error("Internal Error %s:%d\n", __FILE__, __LINE__);
326     return NULL;
327 }
328 
329 int
convert_html_to_xliff(const XMLNode * original,const string & name,XMLNode * addTo,int * phID)330 convert_html_to_xliff(const XMLNode* original, const string& name, XMLNode* addTo, int* phID)
331 {
332     int err = 0;
333     if (original->Type() == XMLNode::TEXT) {
334         addTo->EditChildren().push_back(original->Clone());
335         return 0;
336     } else {
337         string ctype;
338         if (original->Namespace() == "") {
339             if (original->Name() == "b") {
340                 ctype = "bold";
341             }
342             else if (original->Name() == "i") {
343                 ctype = "italic";
344             }
345             else if (original->Name() == "u") {
346                 ctype = "underline";
347             }
348         }
349         if (ctype != "") {
350             vector<XMLAttribute> attrs;
351             attrs.push_back(XMLAttribute(XLIFF_XMLNS, "ctype", ctype));
352             XMLNode* copy = XMLNode::NewElement(original->Position(), XLIFF_XMLNS, "g",
353                                                 attrs, XMLNode::EXACT);
354 
355             const vector<XMLNode*>& children = original->Children();
356             size_t I = children.size();
357             for (size_t i=0; i<I; i++) {
358                 err |= convert_html_to_xliff(children[i], name, copy, phID);
359             }
360             return err;
361         }
362         else {
363             if (original->Namespace() == XLIFF_XMLNS) {
364                 addTo->EditChildren().push_back(original->Clone());
365                 return 0;
366             } else {
367                 if (original->Namespace() == "") {
368                     // flatten out the tag into ph tags -- but only if there is no namespace
369                     // that's still unsupported because propagating the xmlns attribute is hard.
370                     vector<XMLAttribute> attrs;
371                     char idStr[30];
372                     (*phID)++;
373                     sprintf(idStr, "id-%d", *phID);
374                     attrs.push_back(XMLAttribute(XLIFF_XMLNS, "id", idStr));
375 
376                     if (original->Children().size() == 0) {
377                         XMLNode* ph = XMLNode::NewElement(original->Position(), XLIFF_XMLNS,
378                                 "ph", attrs, XMLNode::EXACT);
379                         ph->EditChildren().push_back(
380                                 XMLNode::NewText(original->Position(),
381                                     original->ToString(XLIFF_NAMESPACES),
382                                     XMLNode::EXACT));
383                         addTo->EditChildren().push_back(ph);
384                     } else {
385                         XMLNode* begin = XMLNode::NewElement(original->Position(), XLIFF_XMLNS,
386                                 "bpt", attrs, XMLNode::EXACT);
387                         begin->EditChildren().push_back(
388                                 XMLNode::NewText(original->Position(),
389                                     original->OpenTagToString(XLIFF_NAMESPACES, XMLNode::EXACT),
390                                     XMLNode::EXACT));
391                         XMLNode* end = XMLNode::NewElement(original->Position(), XLIFF_XMLNS,
392                                 "ept", attrs, XMLNode::EXACT);
393                         string endText = "</";
394                             endText += original->Name();
395                             endText += ">";
396                         end->EditChildren().push_back(XMLNode::NewText(original->Position(),
397                                 endText, XMLNode::EXACT));
398 
399                         addTo->EditChildren().push_back(begin);
400 
401                         const vector<XMLNode*>& children = original->Children();
402                         size_t I = children.size();
403                         for (size_t i=0; i<I; i++) {
404                             err |= convert_html_to_xliff(children[i], name, addTo, phID);
405                         }
406 
407                         addTo->EditChildren().push_back(end);
408                     }
409                     return err;
410                 } else {
411                     original->Position().Error("invalid <%s> element in <%s> tag\n",
412                                                 original->Name().c_str(), name.c_str());
413                     return 1;
414                 }
415             }
416         }
417     }
418 }
419 
420 XMLNode*
create_string_node(const StringResource & str,const string & name)421 create_string_node(const StringResource& str, const string& name)
422 {
423     vector<XMLAttribute> attrs;
424     attrs.push_back(XMLAttribute(XMLNS_XMLNS, "space", "preserve"));
425     XMLNode* node = XMLNode::NewElement(str.pos, XLIFF_XMLNS, name, attrs, XMLNode::EXACT);
426 
427     const vector<XMLNode*>& children = str.value->Children();
428     size_t I = children.size();
429     int err = 0;
430     for (size_t i=0; i<I; i++) {
431         int phID = 0;
432         err |= convert_html_to_xliff(children[i], name, node, &phID);
433     }
434 
435     if (err != 0) {
436         delete node;
437     }
438     return node;
439 }
440 
441 static bool
compare_id(const TransUnit & lhs,const TransUnit & rhs)442 compare_id(const TransUnit& lhs, const TransUnit& rhs)
443 {
444     string lid, rid;
445     int lindex, rindex;
446     StringResource::ParseTypedID(lhs.id, &lid, &lindex);
447     StringResource::ParseTypedID(rhs.id, &rid, &rindex);
448     if (lid < rid) return true;
449     if (lid == rid && lindex < rindex) return true;
450     return false;
451 }
452 
453 XMLNode*
ToXMLNode() const454 XLIFFFile::ToXMLNode() const
455 {
456     XMLNode* root;
457     size_t N;
458 
459     // <xliff>
460     {
461         vector<XMLAttribute> attrs;
462         XLIFF_NAMESPACES.AddToAttributes(&attrs);
463         attrs.push_back(XMLAttribute(XLIFF_XMLNS, "version", "1.2"));
464         root = XMLNode::NewElement(GENERATED_POS, XLIFF_XMLNS, "xliff", attrs, XMLNode::PRETTY);
465     }
466 
467     vector<TransUnit> groups;
468 
469     // <file>
470     vector<File> files = m_files;
471     sort(files.begin(), files.end());
472     const size_t I = files.size();
473     for (size_t i=0; i<I; i++) {
474         const File& file = files[i];
475 
476         vector<XMLAttribute> fileAttrs;
477         fileAttrs.push_back(XMLAttribute(XLIFF_XMLNS, "datatype", "x-android-res"));
478         fileAttrs.push_back(XMLAttribute(XLIFF_XMLNS, "original", file.filename));
479 
480         struct timeval tv;
481         struct timezone tz;
482         gettimeofday(&tv, &tz);
483         fileAttrs.push_back(XMLAttribute(XLIFF_XMLNS, "date", trim_string(ctime(&tv.tv_sec))));
484 
485         fileAttrs.push_back(XMLAttribute(XLIFF_XMLNS, "source-language", m_sourceConfig.locale));
486         fileAttrs.push_back(XMLAttribute(XLIFF_XMLNS, "target-language", m_targetConfig.locale));
487         fileAttrs.push_back(XMLAttribute(XLIFF_XMLNS, "build-num", m_currentVersion));
488 
489         XMLNode* fileNode = XMLNode::NewElement(GENERATED_POS, XLIFF_XMLNS, "file", fileAttrs,
490                                                 XMLNode::PRETTY);
491         root->EditChildren().push_back(fileNode);
492 
493         // <body>
494         XMLNode* bodyNode = XMLNode::NewElement(GENERATED_POS, XLIFF_XMLNS, "body",
495                                                 vector<XMLAttribute>(), XMLNode::PRETTY);
496         fileNode->EditChildren().push_back(bodyNode);
497 
498         // <trans-unit>
499         vector<TransUnit> transUnits = file.transUnits;
500         sort(transUnits.begin(), transUnits.end(), compare_id);
501         const size_t J = transUnits.size();
502         for (size_t j=0; j<J; j++) {
503             const TransUnit& transUnit = transUnits[j];
504 
505             vector<XMLAttribute> tuAttrs;
506 
507             // strings start with string:
508             tuAttrs.push_back(XMLAttribute(XLIFF_XMLNS, "id", transUnit.id));
509             XMLNode* transUnitNode = XMLNode::NewElement(GENERATED_POS, XLIFF_XMLNS, "trans-unit",
510                                                          tuAttrs, XMLNode::PRETTY);
511             bodyNode->EditChildren().push_back(transUnitNode);
512 
513             // <extradata>
514             if (transUnit.source.comment != "") {
515                 vector<XMLAttribute> extradataAttrs;
516                 XMLNode* extraNode = XMLNode::NewElement(GENERATED_POS, XLIFF_XMLNS, "extradata",
517                                                             extradataAttrs, XMLNode::EXACT);
518                 transUnitNode->EditChildren().push_back(extraNode);
519                 extraNode->EditChildren().push_back(
520                         XMLNode::NewText(GENERATED_POS, transUnit.source.comment,
521                                          XMLNode::PRETTY));
522             }
523 
524             // <source>
525             if (transUnit.source.id != "") {
526                 transUnitNode->EditChildren().push_back(
527                                     create_string_node(transUnit.source, "source"));
528             }
529 
530             // <target>
531             if (transUnit.target.id != "") {
532                 transUnitNode->EditChildren().push_back(
533                                     create_string_node(transUnit.target, "target"));
534             }
535 
536             // <alt-trans>
537             if (transUnit.altSource.id != "" || transUnit.altTarget.id != ""
538                     || transUnit.rejectComment != "") {
539                 vector<XMLAttribute> altTransAttrs;
540                 XMLNode* altTransNode = XMLNode::NewElement(GENERATED_POS, XLIFF_XMLNS, "alt-trans",
541                                                             altTransAttrs, XMLNode::PRETTY);
542                 transUnitNode->EditChildren().push_back(altTransNode);
543 
544                 // <extradata>
545                 if (transUnit.rejectComment != "") {
546                     vector<XMLAttribute> extradataAttrs;
547                     XMLNode* extraNode = XMLNode::NewElement(GENERATED_POS, XLIFF_XMLNS,
548                                                                 "extradata", extradataAttrs,
549                                                                 XMLNode::EXACT);
550                     altTransNode->EditChildren().push_back(extraNode);
551                     extraNode->EditChildren().push_back(
552                             XMLNode::NewText(GENERATED_POS, transUnit.rejectComment,
553                                              XMLNode::PRETTY));
554                 }
555 
556                 // <source>
557                 if (transUnit.altSource.id != "") {
558                     altTransNode->EditChildren().push_back(
559                                         create_string_node(transUnit.altSource, "source"));
560                 }
561 
562                 // <target>
563                 if (transUnit.altTarget.id != "") {
564                     altTransNode->EditChildren().push_back(
565                                         create_string_node(transUnit.altTarget, "target"));
566                 }
567             }
568 
569         }
570     }
571 
572     return root;
573 }
574 
575 
576 string
ToString() const577 XLIFFFile::ToString() const
578 {
579     XMLNode* xml = ToXMLNode();
580     string s = "<?xml version=\"1.0\" encoding=\"utf-8\"?>\n";
581     s += xml->ToString(XLIFF_NAMESPACES);
582     delete xml;
583     s += '\n';
584     return s;
585 }
586 
587 Stats
GetStats(const string & config) const588 XLIFFFile::GetStats(const string& config) const
589 {
590     Stats stat;
591     stat.config = config;
592     stat.files = m_files.size();
593     stat.toBeTranslated = 0;
594     stat.noComments = 0;
595 
596     for (vector<File>::const_iterator file=m_files.begin(); file!=m_files.end(); file++) {
597         stat.toBeTranslated += file->transUnits.size();
598 
599         for (vector<TransUnit>::const_iterator tu=file->transUnits.begin();
600                     tu!=file->transUnits.end(); tu++) {
601             if (tu->source.comment == "") {
602                 stat.noComments++;
603             }
604         }
605     }
606 
607     stat.totalStrings = stat.toBeTranslated;
608 
609     return stat;
610 }
611