1 #include "XLIFFFile.h"
2
3 #include <algorithm>
4 #include <sys/time.h>
5 #include <time.h>
6 #include <cstdio>
7
8 const char* const XLIFF_XMLNS = "urn:oasis:names:tc:xliff:document:1.2";
9
10 const char *const NS_MAP[] = {
11 "", XLIFF_XMLNS,
12 "xml", XMLNS_XMLNS,
13 NULL, NULL
14 };
15
16 const XMLNamespaceMap XLIFF_NAMESPACES(NS_MAP);
17
18 int
Compare(const XLIFFFile::File & that) const19 XLIFFFile::File::Compare(const XLIFFFile::File& that) const
20 {
21 if (filename != that.filename) {
22 return filename < that.filename ? -1 : 1;
23 }
24 return 0;
25 }
26
27 // =====================================================================================
XLIFFFile()28 XLIFFFile::XLIFFFile()
29 {
30 }
31
~XLIFFFile()32 XLIFFFile::~XLIFFFile()
33 {
34 }
35
36 static XMLNode*
get_unique_node(const XMLNode * parent,const string & ns,const string & name,bool required)37 get_unique_node(const XMLNode* parent, const string& ns, const string& name, bool required)
38 {
39 size_t count = parent->CountElementsByName(ns, name);
40 if (count == 1) {
41 return parent->GetElementByNameAt(ns, name, 0);
42 } else {
43 if (required) {
44 SourcePos pos = count == 0
45 ? parent->Position()
46 : parent->GetElementByNameAt(XLIFF_XMLNS, name, 1)->Position();
47 pos.Error("<%s> elements must contain exactly one <%s> element",
48 parent->Name().c_str(), name.c_str());
49 }
50 return NULL;
51 }
52 }
53
54 XLIFFFile*
Parse(const string & filename)55 XLIFFFile::Parse(const string& filename)
56 {
57 XLIFFFile* result = new XLIFFFile();
58
59 XMLNode* root = NodeHandler::ParseFile(filename, XMLNode::PRETTY);
60 if (root == NULL) {
61 return NULL;
62 }
63
64 // <file>
65 vector<XMLNode*> files = root->GetElementsByName(XLIFF_XMLNS, "file");
66 for (size_t i=0; i<files.size(); i++) {
67 XMLNode* file = files[i];
68
69 string datatype = file->GetAttribute("", "datatype", "");
70 string originalFile = file->GetAttribute("", "original", "");
71
72 Configuration sourceConfig;
73 sourceConfig.locale = file->GetAttribute("", "source-language", "");
74 result->m_sourceConfig = sourceConfig;
75
76 Configuration targetConfig;
77 targetConfig.locale = file->GetAttribute("", "target-language", "");
78 result->m_targetConfig = targetConfig;
79
80 result->m_currentVersion = file->GetAttribute("", "build-num", "");
81 result->m_oldVersion = "old";
82
83 // <body>
84 XMLNode* body = get_unique_node(file, XLIFF_XMLNS, "body", true);
85 if (body == NULL) continue;
86
87 // <trans-unit>
88 vector<XMLNode*> transUnits = body->GetElementsByName(XLIFF_XMLNS, "trans-unit");
89 for (size_t j=0; j<transUnits.size(); j++) {
90 XMLNode* transUnit = transUnits[j];
91
92 string rawID = transUnit->GetAttribute("", "id", "");
93 if (rawID == "") {
94 transUnit->Position().Error("<trans-unit> tag requires an id");
95 continue;
96 }
97 string id;
98 int index;
99
100 if (!StringResource::ParseTypedID(rawID, &id, &index)) {
101 transUnit->Position().Error("<trans-unit> has invalid id '%s'\n", rawID.c_str());
102 continue;
103 }
104
105 // <source>
106 XMLNode* source = get_unique_node(transUnit, XLIFF_XMLNS, "source", false);
107 if (source != NULL) {
108 XMLNode* node = source->Clone();
109 node->SetPrettyRecursive(XMLNode::EXACT);
110 result->AddStringResource(StringResource(source->Position(), originalFile,
111 sourceConfig, id, index, node, CURRENT_VERSION,
112 result->m_currentVersion));
113 }
114
115 // <target>
116 XMLNode* target = get_unique_node(transUnit, XLIFF_XMLNS, "target", false);
117 if (target != NULL) {
118 XMLNode* node = target->Clone();
119 node->SetPrettyRecursive(XMLNode::EXACT);
120 result->AddStringResource(StringResource(target->Position(), originalFile,
121 targetConfig, id, index, node, CURRENT_VERSION,
122 result->m_currentVersion));
123 }
124
125 // <alt-trans>
126 XMLNode* altTrans = get_unique_node(transUnit, XLIFF_XMLNS, "alt-trans", false);
127 if (altTrans != NULL) {
128 // <source>
129 XMLNode* altSource = get_unique_node(altTrans, XLIFF_XMLNS, "source", false);
130 if (altSource != NULL) {
131 XMLNode* node = altSource->Clone();
132 node->SetPrettyRecursive(XMLNode::EXACT);
133 result->AddStringResource(StringResource(altSource->Position(),
134 originalFile, sourceConfig, id, index, node, OLD_VERSION,
135 result->m_oldVersion));
136 }
137
138 // <target>
139 XMLNode* altTarget = get_unique_node(altTrans, XLIFF_XMLNS, "target", false);
140 if (altTarget != NULL) {
141 XMLNode* node = altTarget->Clone();
142 node->SetPrettyRecursive(XMLNode::EXACT);
143 result->AddStringResource(StringResource(altTarget->Position(),
144 originalFile, targetConfig, id, index, node, OLD_VERSION,
145 result->m_oldVersion));
146 }
147 }
148 }
149 }
150 delete root;
151 return result;
152 }
153
154 XLIFFFile*
Create(const Configuration & sourceConfig,const Configuration & targetConfig,const string & currentVersion)155 XLIFFFile::Create(const Configuration& sourceConfig, const Configuration& targetConfig,
156 const string& currentVersion)
157 {
158 XLIFFFile* result = new XLIFFFile();
159 result->m_sourceConfig = sourceConfig;
160 result->m_targetConfig = targetConfig;
161 result->m_currentVersion = currentVersion;
162 return result;
163 }
164
165 set<string>
Files() const166 XLIFFFile::Files() const
167 {
168 set<string> result;
169 for (vector<File>::const_iterator f = m_files.begin(); f != m_files.end(); f++) {
170 result.insert(f->filename);
171 }
172 return result;
173 }
174
175 void
AddStringResource(const StringResource & str)176 XLIFFFile::AddStringResource(const StringResource& str)
177 {
178 string id = str.TypedID();
179
180 File* f = NULL;
181 const size_t I = m_files.size();
182 for (size_t i=0; i<I; i++) {
183 if (m_files[i].filename == str.file) {
184 f = &m_files[i];
185 break;
186 }
187 }
188 if (f == NULL) {
189 File file;
190 file.filename = str.file;
191 m_files.push_back(file);
192 f = &m_files[I];
193 }
194
195 const size_t J = f->transUnits.size();
196 TransUnit* g = NULL;
197 for (size_t j=0; j<J; j++) {
198 if (f->transUnits[j].id == id) {
199 g = &f->transUnits[j];
200 }
201 }
202 if (g == NULL) {
203 TransUnit group;
204 group.id = id;
205 f->transUnits.push_back(group);
206 g = &f->transUnits[J];
207 }
208
209 StringResource* res = find_string_res(*g, str);
210 if (res == NULL) {
211 return ;
212 }
213 if (res->id != "") {
214 str.pos.Error("Duplicate string resource: %s", res->id.c_str());
215 res->pos.Error("Previous definition here");
216 return ;
217 }
218 *res = str;
219
220 m_strings.insert(str);
221 }
222
223 void
Filter(bool (* func)(const string &,const TransUnit &,void *),void * cookie)224 XLIFFFile::Filter(bool (*func)(const string&,const TransUnit&,void*), void* cookie)
225 {
226 const size_t I = m_files.size();
227 for (size_t ix=0, i=I-1; ix<I; ix++, i--) {
228 File& file = m_files[i];
229
230 const size_t J = file.transUnits.size();
231 for (size_t jx=0, j=J-1; jx<J; jx++, j--) {
232 TransUnit& tu = file.transUnits[j];
233
234 bool keep = func(file.filename, tu, cookie);
235 if (!keep) {
236 if (tu.source.id != "") {
237 m_strings.erase(tu.source);
238 }
239 if (tu.target.id != "") {
240 m_strings.erase(tu.target);
241 }
242 if (tu.altSource.id != "") {
243 m_strings.erase(tu.altSource);
244 }
245 if (tu.altTarget.id != "") {
246 m_strings.erase(tu.altTarget);
247 }
248 file.transUnits.erase(file.transUnits.begin()+j);
249 }
250 }
251 if (file.transUnits.size() == 0) {
252 m_files.erase(m_files.begin()+i);
253 }
254 }
255 }
256
257 void
Map(void (* func)(const string &,TransUnit *,void *),void * cookie)258 XLIFFFile::Map(void (*func)(const string&,TransUnit*,void*), void* cookie)
259 {
260 const size_t I = m_files.size();
261 for (size_t i=0; i<I; i++) {
262 File& file = m_files[i];
263
264 const size_t J = file.transUnits.size();
265 for (size_t j=0; j<J; j++) {
266 func(file.filename, &(file.transUnits[j]), cookie);
267 }
268 }
269 }
270
271 TransUnit*
EditTransUnit(const string & filename,const string & id)272 XLIFFFile::EditTransUnit(const string& filename, const string& id)
273 {
274 const size_t I = m_files.size();
275 for (size_t ix=0, i=I-1; ix<I; ix++, i--) {
276 File& file = m_files[i];
277 if (file.filename == filename) {
278 const size_t J = file.transUnits.size();
279 for (size_t jx=0, j=J-1; jx<J; jx++, j--) {
280 TransUnit& tu = file.transUnits[j];
281 if (tu.id == id) {
282 return &tu;
283 }
284 }
285 }
286 }
287 return NULL;
288 }
289
290 StringResource*
find_string_res(TransUnit & g,const StringResource & str)291 XLIFFFile::find_string_res(TransUnit& g, const StringResource& str)
292 {
293 int index;
294 if (str.version == CURRENT_VERSION) {
295 index = 0;
296 }
297 else if (str.version == OLD_VERSION) {
298 index = 2;
299 }
300 else {
301 str.pos.Error("Internal Error %s:%d\n", __FILE__, __LINE__);
302 return NULL;
303 }
304 if (str.config == m_sourceConfig) {
305 // index += 0;
306 }
307 else if (str.config == m_targetConfig) {
308 index += 1;
309 }
310 else {
311 str.pos.Error("unknown config for string %s: %s", str.id.c_str(),
312 str.config.ToString().c_str());
313 return NULL;
314 }
315 switch (index) {
316 case 0:
317 return &g.source;
318 case 1:
319 return &g.target;
320 case 2:
321 return &g.altSource;
322 case 3:
323 return &g.altTarget;
324 }
325 str.pos.Error("Internal Error %s:%d\n", __FILE__, __LINE__);
326 return NULL;
327 }
328
329 int
convert_html_to_xliff(const XMLNode * original,const string & name,XMLNode * addTo,int * phID)330 convert_html_to_xliff(const XMLNode* original, const string& name, XMLNode* addTo, int* phID)
331 {
332 int err = 0;
333 if (original->Type() == XMLNode::TEXT) {
334 addTo->EditChildren().push_back(original->Clone());
335 return 0;
336 } else {
337 string ctype;
338 if (original->Namespace() == "") {
339 if (original->Name() == "b") {
340 ctype = "bold";
341 }
342 else if (original->Name() == "i") {
343 ctype = "italic";
344 }
345 else if (original->Name() == "u") {
346 ctype = "underline";
347 }
348 }
349 if (ctype != "") {
350 vector<XMLAttribute> attrs;
351 attrs.push_back(XMLAttribute(XLIFF_XMLNS, "ctype", ctype));
352 XMLNode* copy = XMLNode::NewElement(original->Position(), XLIFF_XMLNS, "g",
353 attrs, XMLNode::EXACT);
354
355 const vector<XMLNode*>& children = original->Children();
356 size_t I = children.size();
357 for (size_t i=0; i<I; i++) {
358 err |= convert_html_to_xliff(children[i], name, copy, phID);
359 }
360 return err;
361 }
362 else {
363 if (original->Namespace() == XLIFF_XMLNS) {
364 addTo->EditChildren().push_back(original->Clone());
365 return 0;
366 } else {
367 if (original->Namespace() == "") {
368 // flatten out the tag into ph tags -- but only if there is no namespace
369 // that's still unsupported because propagating the xmlns attribute is hard.
370 vector<XMLAttribute> attrs;
371 char idStr[30];
372 (*phID)++;
373 sprintf(idStr, "id-%d", *phID);
374 attrs.push_back(XMLAttribute(XLIFF_XMLNS, "id", idStr));
375
376 if (original->Children().size() == 0) {
377 XMLNode* ph = XMLNode::NewElement(original->Position(), XLIFF_XMLNS,
378 "ph", attrs, XMLNode::EXACT);
379 ph->EditChildren().push_back(
380 XMLNode::NewText(original->Position(),
381 original->ToString(XLIFF_NAMESPACES),
382 XMLNode::EXACT));
383 addTo->EditChildren().push_back(ph);
384 } else {
385 XMLNode* begin = XMLNode::NewElement(original->Position(), XLIFF_XMLNS,
386 "bpt", attrs, XMLNode::EXACT);
387 begin->EditChildren().push_back(
388 XMLNode::NewText(original->Position(),
389 original->OpenTagToString(XLIFF_NAMESPACES, XMLNode::EXACT),
390 XMLNode::EXACT));
391 XMLNode* end = XMLNode::NewElement(original->Position(), XLIFF_XMLNS,
392 "ept", attrs, XMLNode::EXACT);
393 string endText = "</";
394 endText += original->Name();
395 endText += ">";
396 end->EditChildren().push_back(XMLNode::NewText(original->Position(),
397 endText, XMLNode::EXACT));
398
399 addTo->EditChildren().push_back(begin);
400
401 const vector<XMLNode*>& children = original->Children();
402 size_t I = children.size();
403 for (size_t i=0; i<I; i++) {
404 err |= convert_html_to_xliff(children[i], name, addTo, phID);
405 }
406
407 addTo->EditChildren().push_back(end);
408 }
409 return err;
410 } else {
411 original->Position().Error("invalid <%s> element in <%s> tag\n",
412 original->Name().c_str(), name.c_str());
413 return 1;
414 }
415 }
416 }
417 }
418 }
419
420 XMLNode*
create_string_node(const StringResource & str,const string & name)421 create_string_node(const StringResource& str, const string& name)
422 {
423 vector<XMLAttribute> attrs;
424 attrs.push_back(XMLAttribute(XMLNS_XMLNS, "space", "preserve"));
425 XMLNode* node = XMLNode::NewElement(str.pos, XLIFF_XMLNS, name, attrs, XMLNode::EXACT);
426
427 const vector<XMLNode*>& children = str.value->Children();
428 size_t I = children.size();
429 int err = 0;
430 for (size_t i=0; i<I; i++) {
431 int phID = 0;
432 err |= convert_html_to_xliff(children[i], name, node, &phID);
433 }
434
435 if (err != 0) {
436 delete node;
437 }
438 return node;
439 }
440
441 static bool
compare_id(const TransUnit & lhs,const TransUnit & rhs)442 compare_id(const TransUnit& lhs, const TransUnit& rhs)
443 {
444 string lid, rid;
445 int lindex, rindex;
446 StringResource::ParseTypedID(lhs.id, &lid, &lindex);
447 StringResource::ParseTypedID(rhs.id, &rid, &rindex);
448 if (lid < rid) return true;
449 if (lid == rid && lindex < rindex) return true;
450 return false;
451 }
452
453 XMLNode*
ToXMLNode() const454 XLIFFFile::ToXMLNode() const
455 {
456 XMLNode* root;
457 size_t N;
458
459 // <xliff>
460 {
461 vector<XMLAttribute> attrs;
462 XLIFF_NAMESPACES.AddToAttributes(&attrs);
463 attrs.push_back(XMLAttribute(XLIFF_XMLNS, "version", "1.2"));
464 root = XMLNode::NewElement(GENERATED_POS, XLIFF_XMLNS, "xliff", attrs, XMLNode::PRETTY);
465 }
466
467 vector<TransUnit> groups;
468
469 // <file>
470 vector<File> files = m_files;
471 sort(files.begin(), files.end());
472 const size_t I = files.size();
473 for (size_t i=0; i<I; i++) {
474 const File& file = files[i];
475
476 vector<XMLAttribute> fileAttrs;
477 fileAttrs.push_back(XMLAttribute(XLIFF_XMLNS, "datatype", "x-android-res"));
478 fileAttrs.push_back(XMLAttribute(XLIFF_XMLNS, "original", file.filename));
479
480 struct timeval tv;
481 struct timezone tz;
482 gettimeofday(&tv, &tz);
483 fileAttrs.push_back(XMLAttribute(XLIFF_XMLNS, "date", trim_string(ctime(&tv.tv_sec))));
484
485 fileAttrs.push_back(XMLAttribute(XLIFF_XMLNS, "source-language", m_sourceConfig.locale));
486 fileAttrs.push_back(XMLAttribute(XLIFF_XMLNS, "target-language", m_targetConfig.locale));
487 fileAttrs.push_back(XMLAttribute(XLIFF_XMLNS, "build-num", m_currentVersion));
488
489 XMLNode* fileNode = XMLNode::NewElement(GENERATED_POS, XLIFF_XMLNS, "file", fileAttrs,
490 XMLNode::PRETTY);
491 root->EditChildren().push_back(fileNode);
492
493 // <body>
494 XMLNode* bodyNode = XMLNode::NewElement(GENERATED_POS, XLIFF_XMLNS, "body",
495 vector<XMLAttribute>(), XMLNode::PRETTY);
496 fileNode->EditChildren().push_back(bodyNode);
497
498 // <trans-unit>
499 vector<TransUnit> transUnits = file.transUnits;
500 sort(transUnits.begin(), transUnits.end(), compare_id);
501 const size_t J = transUnits.size();
502 for (size_t j=0; j<J; j++) {
503 const TransUnit& transUnit = transUnits[j];
504
505 vector<XMLAttribute> tuAttrs;
506
507 // strings start with string:
508 tuAttrs.push_back(XMLAttribute(XLIFF_XMLNS, "id", transUnit.id));
509 XMLNode* transUnitNode = XMLNode::NewElement(GENERATED_POS, XLIFF_XMLNS, "trans-unit",
510 tuAttrs, XMLNode::PRETTY);
511 bodyNode->EditChildren().push_back(transUnitNode);
512
513 // <extradata>
514 if (transUnit.source.comment != "") {
515 vector<XMLAttribute> extradataAttrs;
516 XMLNode* extraNode = XMLNode::NewElement(GENERATED_POS, XLIFF_XMLNS, "extradata",
517 extradataAttrs, XMLNode::EXACT);
518 transUnitNode->EditChildren().push_back(extraNode);
519 extraNode->EditChildren().push_back(
520 XMLNode::NewText(GENERATED_POS, transUnit.source.comment,
521 XMLNode::PRETTY));
522 }
523
524 // <source>
525 if (transUnit.source.id != "") {
526 transUnitNode->EditChildren().push_back(
527 create_string_node(transUnit.source, "source"));
528 }
529
530 // <target>
531 if (transUnit.target.id != "") {
532 transUnitNode->EditChildren().push_back(
533 create_string_node(transUnit.target, "target"));
534 }
535
536 // <alt-trans>
537 if (transUnit.altSource.id != "" || transUnit.altTarget.id != ""
538 || transUnit.rejectComment != "") {
539 vector<XMLAttribute> altTransAttrs;
540 XMLNode* altTransNode = XMLNode::NewElement(GENERATED_POS, XLIFF_XMLNS, "alt-trans",
541 altTransAttrs, XMLNode::PRETTY);
542 transUnitNode->EditChildren().push_back(altTransNode);
543
544 // <extradata>
545 if (transUnit.rejectComment != "") {
546 vector<XMLAttribute> extradataAttrs;
547 XMLNode* extraNode = XMLNode::NewElement(GENERATED_POS, XLIFF_XMLNS,
548 "extradata", extradataAttrs,
549 XMLNode::EXACT);
550 altTransNode->EditChildren().push_back(extraNode);
551 extraNode->EditChildren().push_back(
552 XMLNode::NewText(GENERATED_POS, transUnit.rejectComment,
553 XMLNode::PRETTY));
554 }
555
556 // <source>
557 if (transUnit.altSource.id != "") {
558 altTransNode->EditChildren().push_back(
559 create_string_node(transUnit.altSource, "source"));
560 }
561
562 // <target>
563 if (transUnit.altTarget.id != "") {
564 altTransNode->EditChildren().push_back(
565 create_string_node(transUnit.altTarget, "target"));
566 }
567 }
568
569 }
570 }
571
572 return root;
573 }
574
575
576 string
ToString() const577 XLIFFFile::ToString() const
578 {
579 XMLNode* xml = ToXMLNode();
580 string s = "<?xml version=\"1.0\" encoding=\"utf-8\"?>\n";
581 s += xml->ToString(XLIFF_NAMESPACES);
582 delete xml;
583 s += '\n';
584 return s;
585 }
586
587 Stats
GetStats(const string & config) const588 XLIFFFile::GetStats(const string& config) const
589 {
590 Stats stat;
591 stat.config = config;
592 stat.files = m_files.size();
593 stat.toBeTranslated = 0;
594 stat.noComments = 0;
595
596 for (vector<File>::const_iterator file=m_files.begin(); file!=m_files.end(); file++) {
597 stat.toBeTranslated += file->transUnits.size();
598
599 for (vector<TransUnit>::const_iterator tu=file->transUnits.begin();
600 tu!=file->transUnits.end(); tu++) {
601 if (tu->source.comment == "") {
602 stat.noComments++;
603 }
604 }
605 }
606
607 stat.totalStrings = stat.toBeTranslated;
608
609 return stat;
610 }
611