1## @file 2# Collect all defined strings in multiple uni files. 3# 4# Copyright (c) 2014 - 2016, Intel Corporation. All rights reserved.<BR> 5# 6# This program and the accompanying materials are licensed and made available 7# under the terms and conditions of the BSD License which accompanies this 8# distribution. The full text of the license may be found at 9# http://opensource.org/licenses/bsd-license.php 10# 11# THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS, 12# WITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED. 13# 14""" 15Collect all defined strings in multiple uni files 16""" 17 18## 19# Import Modules 20# 21import os, codecs, re 22import distutils.util 23from Logger import ToolError 24from Logger import Log as EdkLogger 25from Logger import StringTable as ST 26from Library.String import GetLineNo 27from Library.Misc import PathClass 28from Library.Misc import GetCharIndexOutStr 29from Library import DataType as DT 30from Library.ParserValidate import CheckUTF16FileHeader 31 32## 33# Static definitions 34# 35UNICODE_WIDE_CHAR = u'\\wide' 36UNICODE_NARROW_CHAR = u'\\narrow' 37UNICODE_NON_BREAKING_CHAR = u'\\nbr' 38UNICODE_UNICODE_CR = '\r' 39UNICODE_UNICODE_LF = '\n' 40 41NARROW_CHAR = u'\uFFF0' 42WIDE_CHAR = u'\uFFF1' 43NON_BREAKING_CHAR = u'\uFFF2' 44CR = u'\u000D' 45LF = u'\u000A' 46NULL = u'\u0000' 47TAB = u'\t' 48BACK_SPLASH = u'\\' 49 50gINCLUDE_PATTERN = re.compile("^!include[\s]+([\S]+)[\s]*$", re.MULTILINE | re.UNICODE) 51 52gLANG_CONV_TABLE = {'eng':'en', 'fra':'fr', \ 53 'aar':'aa', 'abk':'ab', 'ave':'ae', 'afr':'af', 'aka':'ak', 'amh':'am', \ 54 'arg':'an', 'ara':'ar', 'asm':'as', 'ava':'av', 'aym':'ay', 'aze':'az', \ 55 'bak':'ba', 'bel':'be', 'bul':'bg', 'bih':'bh', 'bis':'bi', 'bam':'bm', \ 56 'ben':'bn', 'bod':'bo', 'bre':'br', 'bos':'bs', 'cat':'ca', 'che':'ce', \ 57 'cha':'ch', 'cos':'co', 'cre':'cr', 'ces':'cs', 'chu':'cu', 'chv':'cv', \ 58 'cym':'cy', 'dan':'da', 'deu':'de', 'div':'dv', 'dzo':'dz', 'ewe':'ee', \ 59 'ell':'el', 'epo':'eo', 'spa':'es', 'est':'et', 'eus':'eu', 'fas':'fa', \ 60 'ful':'ff', 'fin':'fi', 'fij':'fj', 'fao':'fo', 'fry':'fy', 'gle':'ga', \ 61 'gla':'gd', 'glg':'gl', 'grn':'gn', 'guj':'gu', 'glv':'gv', 'hau':'ha', \ 62 'heb':'he', 'hin':'hi', 'hmo':'ho', 'hrv':'hr', 'hat':'ht', 'hun':'hu', \ 63 'hye':'hy', 'her':'hz', 'ina':'ia', 'ind':'id', 'ile':'ie', 'ibo':'ig', \ 64 'iii':'ii', 'ipk':'ik', 'ido':'io', 'isl':'is', 'ita':'it', 'iku':'iu', \ 65 'jpn':'ja', 'jav':'jv', 'kat':'ka', 'kon':'kg', 'kik':'ki', 'kua':'kj', \ 66 'kaz':'kk', 'kal':'kl', 'khm':'km', 'kan':'kn', 'kor':'ko', 'kau':'kr', \ 67 'kas':'ks', 'kur':'ku', 'kom':'kv', 'cor':'kw', 'kir':'ky', 'lat':'la', \ 68 'ltz':'lb', 'lug':'lg', 'lim':'li', 'lin':'ln', 'lao':'lo', 'lit':'lt', \ 69 'lub':'lu', 'lav':'lv', 'mlg':'mg', 'mah':'mh', 'mri':'mi', 'mkd':'mk', \ 70 'mal':'ml', 'mon':'mn', 'mar':'mr', 'msa':'ms', 'mlt':'mt', 'mya':'my', \ 71 'nau':'na', 'nob':'nb', 'nde':'nd', 'nep':'ne', 'ndo':'ng', 'nld':'nl', \ 72 'nno':'nn', 'nor':'no', 'nbl':'nr', 'nav':'nv', 'nya':'ny', 'oci':'oc', \ 73 'oji':'oj', 'orm':'om', 'ori':'or', 'oss':'os', 'pan':'pa', 'pli':'pi', \ 74 'pol':'pl', 'pus':'ps', 'por':'pt', 'que':'qu', 'roh':'rm', 'run':'rn', \ 75 'ron':'ro', 'rus':'ru', 'kin':'rw', 'san':'sa', 'srd':'sc', 'snd':'sd', \ 76 'sme':'se', 'sag':'sg', 'sin':'si', 'slk':'sk', 'slv':'sl', 'smo':'sm', \ 77 'sna':'sn', 'som':'so', 'sqi':'sq', 'srp':'sr', 'ssw':'ss', 'sot':'st', \ 78 'sun':'su', 'swe':'sv', 'swa':'sw', 'tam':'ta', 'tel':'te', 'tgk':'tg', \ 79 'tha':'th', 'tir':'ti', 'tuk':'tk', 'tgl':'tl', 'tsn':'tn', 'ton':'to', \ 80 'tur':'tr', 'tso':'ts', 'tat':'tt', 'twi':'tw', 'tah':'ty', 'uig':'ug', \ 81 'ukr':'uk', 'urd':'ur', 'uzb':'uz', 'ven':'ve', 'vie':'vi', 'vol':'vo', \ 82 'wln':'wa', 'wol':'wo', 'xho':'xh', 'yid':'yi', 'yor':'yo', 'zha':'za', \ 83 'zho':'zh', 'zul':'zu'} 84 85## Convert a python unicode string to a normal string 86# 87# Convert a python unicode string to a normal string 88# UniToStr(u'I am a string') is 'I am a string' 89# 90# @param Uni: The python unicode string 91# 92# @retval: The formatted normal string 93# 94def UniToStr(Uni): 95 return repr(Uni)[2:-1] 96 97## Convert a unicode string to a Hex list 98# 99# Convert a unicode string to a Hex list 100# UniToHexList('ABC') is ['0x41', '0x00', '0x42', '0x00', '0x43', '0x00'] 101# 102# @param Uni: The python unicode string 103# 104# @retval List: The formatted hex list 105# 106def UniToHexList(Uni): 107 List = [] 108 for Item in Uni: 109 Temp = '%04X' % ord(Item) 110 List.append('0x' + Temp[2:4]) 111 List.append('0x' + Temp[0:2]) 112 return List 113 114## Convert special unicode characters 115# 116# Convert special characters to (c), (r) and (tm). 117# 118# @param Uni: The python unicode string 119# 120# @retval NewUni: The converted unicode string 121# 122def ConvertSpecialUnicodes(Uni): 123 NewUni = Uni 124 NewUni = NewUni.replace(u'\u00A9', '(c)') 125 NewUni = NewUni.replace(u'\u00AE', '(r)') 126 NewUni = NewUni.replace(u'\u2122', '(tm)') 127 return NewUni 128 129## GetLanguageCode1766 130# 131# Check the language code read from .UNI file and convert RFC 4646 codes to RFC 1766 codes 132# RFC 1766 language codes supported in compatiblity mode 133# RFC 4646 language codes supported in native mode 134# 135# @param LangName: Language codes read from .UNI file 136# 137# @retval LangName: Valid lanugage code in RFC 1766 format or None 138# 139def GetLanguageCode1766(LangName, File=None): 140 return LangName 141 142 length = len(LangName) 143 if length == 2: 144 if LangName.isalpha(): 145 for Key in gLANG_CONV_TABLE.keys(): 146 if gLANG_CONV_TABLE.get(Key) == LangName.lower(): 147 return Key 148 elif length == 3: 149 if LangName.isalpha() and gLANG_CONV_TABLE.get(LangName.lower()): 150 return LangName 151 else: 152 EdkLogger.Error("Unicode File Parser", 153 ToolError.FORMAT_INVALID, 154 "Invalid RFC 1766 language code : %s" % LangName, 155 File) 156 elif length == 5: 157 if LangName[0:2].isalpha() and LangName[2] == '-': 158 for Key in gLANG_CONV_TABLE.keys(): 159 if gLANG_CONV_TABLE.get(Key) == LangName[0:2].lower(): 160 return Key 161 elif length >= 6: 162 if LangName[0:2].isalpha() and LangName[2] == '-': 163 for Key in gLANG_CONV_TABLE.keys(): 164 if gLANG_CONV_TABLE.get(Key) == LangName[0:2].lower(): 165 return Key 166 if LangName[0:3].isalpha() and gLANG_CONV_TABLE.get(LangName.lower()) == None and LangName[3] == '-': 167 for Key in gLANG_CONV_TABLE.keys(): 168 if Key == LangName[0:3].lower(): 169 return Key 170 171 EdkLogger.Error("Unicode File Parser", 172 ToolError.FORMAT_INVALID, 173 "Invalid RFC 4646 language code : %s" % LangName, 174 File) 175 176## GetLanguageCode 177# 178# Check the language code read from .UNI file and convert RFC 1766 codes to RFC 4646 codes if appropriate 179# RFC 1766 language codes supported in compatiblity mode 180# RFC 4646 language codes supported in native mode 181# 182# @param LangName: Language codes read from .UNI file 183# 184# @retval LangName: Valid lanugage code in RFC 4646 format or None 185# 186def GetLanguageCode(LangName, IsCompatibleMode, File): 187 length = len(LangName) 188 if IsCompatibleMode: 189 if length == 3 and LangName.isalpha(): 190 TempLangName = gLANG_CONV_TABLE.get(LangName.lower()) 191 if TempLangName != None: 192 return TempLangName 193 return LangName 194 else: 195 EdkLogger.Error("Unicode File Parser", 196 ToolError.FORMAT_INVALID, 197 "Invalid RFC 1766 language code : %s" % LangName, 198 File) 199 if (LangName[0] == 'X' or LangName[0] == 'x') and LangName[1] == '-': 200 return LangName 201 if length == 2: 202 if LangName.isalpha(): 203 return LangName 204 elif length == 3: 205 if LangName.isalpha() and gLANG_CONV_TABLE.get(LangName.lower()) == None: 206 return LangName 207 elif length == 5: 208 if LangName[0:2].isalpha() and LangName[2] == '-': 209 return LangName 210 elif length >= 6: 211 if LangName[0:2].isalpha() and LangName[2] == '-': 212 return LangName 213 if LangName[0:3].isalpha() and gLANG_CONV_TABLE.get(LangName.lower()) == None and LangName[3] == '-': 214 return LangName 215 216 EdkLogger.Error("Unicode File Parser", 217 ToolError.FORMAT_INVALID, 218 "Invalid RFC 4646 language code : %s" % LangName, 219 File) 220 221## FormatUniEntry 222# 223# Formated the entry in Uni file. 224# 225# @param StrTokenName StrTokenName. 226# @param TokenValueList A list need to be processed. 227# @param ContainerFile ContainerFile. 228# 229# @return formated entry 230def FormatUniEntry(StrTokenName, TokenValueList, ContainerFile): 231 SubContent = '' 232 PreFormatLength = 40 233 if len(StrTokenName) > PreFormatLength: 234 PreFormatLength = len(StrTokenName) + 1 235 for (Lang, Value) in TokenValueList: 236 if not Value or Lang == DT.TAB_LANGUAGE_EN_X: 237 continue 238 if Lang == '': 239 Lang = DT.TAB_LANGUAGE_EN_US 240 if Lang == 'eng': 241 Lang = DT.TAB_LANGUAGE_EN_US 242 elif len(Lang.split('-')[0]) == 3: 243 Lang = GetLanguageCode(Lang.split('-')[0], True, ContainerFile) 244 else: 245 Lang = GetLanguageCode(Lang, False, ContainerFile) 246 ValueList = Value.split('\n') 247 SubValueContent = '' 248 for SubValue in ValueList: 249 if SubValue.strip(): 250 SubValueContent += \ 251 ' ' * (PreFormatLength + len('#language en-US ')) + '\"%s\\n\"' % SubValue.strip() + '\r\n' 252 SubValueContent = SubValueContent[(PreFormatLength + len('#language en-US ')):SubValueContent.rfind('\\n')] \ 253 + '\"' + '\r\n' 254 SubContent += ' '*PreFormatLength + '#language %-5s ' % Lang + SubValueContent 255 if SubContent: 256 SubContent = StrTokenName + ' '*(PreFormatLength - len(StrTokenName)) + SubContent[PreFormatLength:] 257 return SubContent 258 259 260## StringDefClassObject 261# 262# A structure for language definition 263# 264class StringDefClassObject(object): 265 def __init__(self, Name = None, Value = None, Referenced = False, Token = None, UseOtherLangDef = ''): 266 self.StringName = '' 267 self.StringNameByteList = [] 268 self.StringValue = '' 269 self.StringValueByteList = '' 270 self.Token = 0 271 self.Referenced = Referenced 272 self.UseOtherLangDef = UseOtherLangDef 273 self.Length = 0 274 275 if Name != None: 276 self.StringName = Name 277 self.StringNameByteList = UniToHexList(Name) 278 if Value != None: 279 self.StringValue = Value 280 self.StringValueByteList = UniToHexList(self.StringValue) 281 self.Length = len(self.StringValueByteList) 282 if Token != None: 283 self.Token = Token 284 285 def __str__(self): 286 return repr(self.StringName) + ' ' + \ 287 repr(self.Token) + ' ' + \ 288 repr(self.Referenced) + ' ' + \ 289 repr(self.StringValue) + ' ' + \ 290 repr(self.UseOtherLangDef) 291 292 def UpdateValue(self, Value = None): 293 if Value != None: 294 if self.StringValue: 295 self.StringValue = self.StringValue + '\r\n' + Value 296 else: 297 self.StringValue = Value 298 self.StringValueByteList = UniToHexList(self.StringValue) 299 self.Length = len(self.StringValueByteList) 300 301## UniFileClassObject 302# 303# A structure for .uni file definition 304# 305class UniFileClassObject(object): 306 def __init__(self, FileList = None, IsCompatibleMode = False, IncludePathList = None): 307 self.FileList = FileList 308 self.File = None 309 self.IncFileList = FileList 310 self.UniFileHeader = '' 311 self.Token = 2 312 self.LanguageDef = [] #[ [u'LanguageIdentifier', u'PrintableName'], ... ] 313 self.OrderedStringList = {} #{ u'LanguageIdentifier' : [StringDefClassObject] } 314 self.OrderedStringDict = {} #{ u'LanguageIdentifier' : {StringName:(IndexInList)} } 315 self.OrderedStringListByToken = {} #{ u'LanguageIdentifier' : {Token: StringDefClassObject} } 316 self.IsCompatibleMode = IsCompatibleMode 317 if not IncludePathList: 318 self.IncludePathList = [] 319 else: 320 self.IncludePathList = IncludePathList 321 if len(self.FileList) > 0: 322 self.LoadUniFiles(FileList) 323 324 # 325 # Get Language definition 326 # 327 def GetLangDef(self, File, Line): 328 Lang = distutils.util.split_quoted((Line.split(u"//")[0])) 329 if len(Lang) != 3: 330 try: 331 FileIn = codecs.open(File.Path, mode='rb', encoding='utf_8').readlines() 332 except UnicodeError, Xstr: 333 FileIn = codecs.open(File.Path, mode='rb', encoding='utf_16').readlines() 334 except UnicodeError, Xstr: 335 FileIn = codecs.open(File.Path, mode='rb', encoding='utf_16_le').readlines() 336 except: 337 EdkLogger.Error("Unicode File Parser", 338 ToolError.FILE_OPEN_FAILURE, 339 "File read failure: %s" % str(Xstr), 340 ExtraData=File) 341 LineNo = GetLineNo(FileIn, Line, False) 342 EdkLogger.Error("Unicode File Parser", 343 ToolError.PARSER_ERROR, 344 "Wrong language definition", 345 ExtraData="""%s\n\t*Correct format is like '#langdef en-US "English"'""" % Line, 346 File = File, Line = LineNo) 347 else: 348 LangName = GetLanguageCode(Lang[1], self.IsCompatibleMode, self.File) 349 LangPrintName = Lang[2] 350 351 IsLangInDef = False 352 for Item in self.LanguageDef: 353 if Item[0] == LangName: 354 IsLangInDef = True 355 break 356 357 if not IsLangInDef: 358 self.LanguageDef.append([LangName, LangPrintName]) 359 360 # 361 # Add language string 362 # 363 self.AddStringToList(u'$LANGUAGE_NAME', LangName, LangName, 0, True, Index=0) 364 self.AddStringToList(u'$PRINTABLE_LANGUAGE_NAME', LangName, LangPrintName, 1, True, Index=1) 365 366 if not IsLangInDef: 367 # 368 # The found STRING tokens will be added into new language string list 369 # so that the unique STRING identifier is reserved for all languages in the package list. 370 # 371 FirstLangName = self.LanguageDef[0][0] 372 if LangName != FirstLangName: 373 for Index in range (2, len (self.OrderedStringList[FirstLangName])): 374 Item = self.OrderedStringList[FirstLangName][Index] 375 if Item.UseOtherLangDef != '': 376 OtherLang = Item.UseOtherLangDef 377 else: 378 OtherLang = FirstLangName 379 self.OrderedStringList[LangName].append (StringDefClassObject(Item.StringName, 380 '', 381 Item.Referenced, 382 Item.Token, 383 OtherLang)) 384 self.OrderedStringDict[LangName][Item.StringName] = len(self.OrderedStringList[LangName]) - 1 385 return True 386 387 # 388 # Get String name and value 389 # 390 def GetStringObject(self, Item): 391 Language = '' 392 Value = '' 393 394 Name = Item.split()[1] 395 # Check the string name is the upper character 396 if Name != '': 397 MatchString = re.match('[A-Z0-9_]+', Name, re.UNICODE) 398 if MatchString == None or MatchString.end(0) != len(Name): 399 EdkLogger.Error("Unicode File Parser", 400 ToolError.FORMAT_INVALID, 401 'The string token name %s in UNI file %s must be upper case character.' %(Name, self.File)) 402 LanguageList = Item.split(u'#language ') 403 for IndexI in range(len(LanguageList)): 404 if IndexI == 0: 405 continue 406 else: 407 Language = LanguageList[IndexI].split()[0] 408 #.replace(u'\r\n', u'') 409 Value = \ 410 LanguageList[IndexI][LanguageList[IndexI].find(u'\"') + len(u'\"') : LanguageList[IndexI].rfind(u'\"')] 411 Language = GetLanguageCode(Language, self.IsCompatibleMode, self.File) 412 self.AddStringToList(Name, Language, Value) 413 414 # 415 # Get include file list and load them 416 # 417 def GetIncludeFile(self, Item, Dir = None): 418 if Dir: 419 pass 420 FileName = Item[Item.find(u'!include ') + len(u'!include ') :Item.find(u' ', len(u'!include '))][1:-1] 421 self.LoadUniFile(FileName) 422 423 # 424 # Pre-process before parse .uni file 425 # 426 def PreProcess(self, File, IsIncludeFile=False): 427 if not os.path.exists(File.Path) or not os.path.isfile(File.Path): 428 EdkLogger.Error("Unicode File Parser", 429 ToolError.FILE_NOT_FOUND, 430 ExtraData=File.Path) 431 432 # 433 # Check file header of the Uni file 434 # 435# if not CheckUTF16FileHeader(File.Path): 436# EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID, 437# ExtraData='The file %s is either invalid UTF-16LE or it is missing the BOM.' % File.Path) 438 439 try: 440 FileIn = codecs.open(File.Path, mode='rb', encoding='utf_8').readlines() 441 except UnicodeError, Xstr: 442 FileIn = codecs.open(File.Path, mode='rb', encoding='utf_16').readlines() 443 except UnicodeError: 444 FileIn = codecs.open(File.Path, mode='rb', encoding='utf_16_le').readlines() 445 except: 446 EdkLogger.Error("Unicode File Parser", ToolError.FILE_OPEN_FAILURE, ExtraData=File.Path) 447 448 449 # 450 # get the file header 451 # 452 Lines = [] 453 HeaderStart = False 454 HeaderEnd = False 455 if not self.UniFileHeader: 456 FirstGenHeader = True 457 else: 458 FirstGenHeader = False 459 for Line in FileIn: 460 Line = Line.strip() 461 if Line == u'': 462 continue 463 if Line.startswith(DT.TAB_COMMENT_EDK1_SPLIT) and (Line.find(DT.TAB_HEADER_COMMENT) > -1) \ 464 and not HeaderEnd and not HeaderStart: 465 HeaderStart = True 466 if not Line.startswith(DT.TAB_COMMENT_EDK1_SPLIT) and HeaderStart and not HeaderEnd: 467 HeaderEnd = True 468 if Line.startswith(DT.TAB_COMMENT_EDK1_SPLIT) and HeaderStart and not HeaderEnd and FirstGenHeader: 469 self.UniFileHeader += Line + '\r\n' 470 continue 471 472 # 473 # Use unique identifier 474 # 475 FindFlag = -1 476 LineCount = 0 477 MultiLineFeedExits = False 478 # 479 # 0: initial value 480 # 1: signle String entry exist 481 # 2: line feed exist under the some signle String entry 482 # 483 StringEntryExistsFlag = 0 484 for Line in FileIn: 485 Line = FileIn[LineCount] 486 LineCount += 1 487 Line = Line.strip() 488 # 489 # Ignore comment line and empty line 490 # 491 if Line == u'' or Line.startswith(u'//'): 492 # 493 # Change the single line String entry flag status 494 # 495 if StringEntryExistsFlag == 1: 496 StringEntryExistsFlag = 2 497 # 498 # If the '#string' line and the '#language' line are not in the same line, 499 # there should be only one line feed character betwwen them 500 # 501 if MultiLineFeedExits: 502 EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID, ExtraData=File.Path) 503 continue 504 505 MultiLineFeedExits = False 506 # 507 # Process comment embeded in string define lines 508 # 509 FindFlag = Line.find(u'//') 510 if FindFlag != -1 and Line.find(u'//') < Line.find(u'"'): 511 Line = Line.replace(Line[FindFlag:], u' ') 512 if FileIn[LineCount].strip().startswith('#language'): 513 Line = Line + FileIn[LineCount] 514 FileIn[LineCount-1] = Line 515 FileIn[LineCount] = '\r\n' 516 LineCount -= 1 517 for Index in xrange (LineCount + 1, len (FileIn) - 1): 518 if (Index == len(FileIn) -1): 519 FileIn[Index] = '\r\n' 520 else: 521 FileIn[Index] = FileIn[Index + 1] 522 continue 523 CommIndex = GetCharIndexOutStr(u'/', Line) 524 if CommIndex > -1: 525 if (len(Line) - 1) > CommIndex: 526 if Line[CommIndex+1] == u'/': 527 Line = Line[:CommIndex].strip() 528 else: 529 EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID, ExtraData=File.Path) 530 else: 531 EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID, ExtraData=File.Path) 532 533 Line = Line.replace(UNICODE_WIDE_CHAR, WIDE_CHAR) 534 Line = Line.replace(UNICODE_NARROW_CHAR, NARROW_CHAR) 535 Line = Line.replace(UNICODE_NON_BREAKING_CHAR, NON_BREAKING_CHAR) 536 537 Line = Line.replace(u'\\\\', u'\u0006') 538 Line = Line.replace(u'\\r\\n', CR + LF) 539 Line = Line.replace(u'\\n', CR + LF) 540 Line = Line.replace(u'\\r', CR) 541 Line = Line.replace(u'\\t', u'\t') 542 Line = Line.replace(u'''\"''', u'''"''') 543 Line = Line.replace(u'\t', u' ') 544 Line = Line.replace(u'\u0006', u'\\') 545 546 # IncList = gINCLUDE_PATTERN.findall(Line) 547 IncList = [] 548 if len(IncList) == 1: 549 for Dir in [File.Dir] + self.IncludePathList: 550 IncFile = PathClass(str(IncList[0]), Dir) 551 self.IncFileList.append(IncFile) 552 if os.path.isfile(IncFile.Path): 553 Lines.extend(self.PreProcess(IncFile, True)) 554 break 555 else: 556 EdkLogger.Error("Unicode File Parser", 557 ToolError.FILE_NOT_FOUND, 558 Message="Cannot find include file", 559 ExtraData=str(IncList[0])) 560 continue 561 562 # 563 # Between Name entry and Language entry can not contain line feed 564 # 565 if Line.startswith(u'#string') and Line.find(u'#language') == -1: 566 MultiLineFeedExits = True 567 568 if Line.startswith(u'#string') and Line.find(u'#language') > 0 and Line.find(u'"') < 0: 569 MultiLineFeedExits = True 570 571 # 572 # Between Language entry and String entry can not contain line feed 573 # 574 if Line.startswith(u'#language') and len(Line.split()) == 2: 575 MultiLineFeedExits = True 576 577 # 578 # Between two String entry, can not contain line feed 579 # 580 if Line.startswith(u'"'): 581 if StringEntryExistsFlag == 2: 582 EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID, 583 Message=ST.ERR_UNIPARSE_LINEFEED_UP_EXIST % Line, ExtraData=File.Path) 584 585 StringEntryExistsFlag = 1 586 if not Line.endswith('"'): 587 EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID, 588 ExtraData='''The line %s misses '"' at the end of it in file %s''' 589 % (LineCount, File.Path)) 590 elif Line.startswith(u'#language'): 591 if StringEntryExistsFlag == 2: 592 EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID, 593 Message=ST.ERR_UNI_MISS_STRING_ENTRY % Line, ExtraData=File.Path) 594 StringEntryExistsFlag = 0 595 else: 596 StringEntryExistsFlag = 0 597 598 Lines.append(Line) 599 600 # 601 # Convert string def format as below 602 # 603 # #string MY_STRING_1 604 # #language eng 605 # "My first English string line 1" 606 # "My first English string line 2" 607 # #string MY_STRING_1 608 # #language spa 609 # "Mi segunda secuencia 1" 610 # "Mi segunda secuencia 2" 611 # 612 613 if not IsIncludeFile and not Lines: 614 EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID, \ 615 Message=ST.ERR_UNIPARSE_NO_SECTION_EXIST, \ 616 ExtraData=File.Path) 617 618 NewLines = [] 619 StrName = u'' 620 ExistStrNameList = [] 621 for Line in Lines: 622 if StrName and not StrName.split()[1].startswith(DT.TAB_STR_TOKENCNAME + DT.TAB_UNDERLINE_SPLIT): 623 EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID, \ 624 Message=ST.ERR_UNIPARSE_STRNAME_FORMAT_ERROR % StrName.split()[1], \ 625 ExtraData=File.Path) 626 627 if StrName and len(StrName.split()[1].split(DT.TAB_UNDERLINE_SPLIT)) == 4: 628 StringTokenList = StrName.split()[1].split(DT.TAB_UNDERLINE_SPLIT) 629 if (StringTokenList[3].upper() in [DT.TAB_STR_TOKENPROMPT, DT.TAB_STR_TOKENHELP] and \ 630 StringTokenList[3] not in [DT.TAB_STR_TOKENPROMPT, DT.TAB_STR_TOKENHELP]) or \ 631 (StringTokenList[2].upper() == DT.TAB_STR_TOKENERR and StringTokenList[2] != DT.TAB_STR_TOKENERR): 632 EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID, \ 633 Message=ST.ERR_UNIPARSE_STRTOKEN_FORMAT_ERROR % StrName.split()[1], \ 634 ExtraData=File.Path) 635 636 if Line.count(u'#language') > 1: 637 EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID, \ 638 Message=ST.ERR_UNIPARSE_SEP_LANGENTRY_LINE % Line, \ 639 ExtraData=File.Path) 640 641 if Line.startswith(u'//'): 642 continue 643 elif Line.startswith(u'#langdef'): 644 if len(Line.split()) == 2: 645 NewLines.append(Line) 646 continue 647 elif len(Line.split()) > 2 and Line.find(u'"') > 0: 648 NewLines.append(Line[:Line.find(u'"')].strip()) 649 NewLines.append(Line[Line.find(u'"'):]) 650 else: 651 EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID, ExtraData=File.Path) 652 elif Line.startswith(u'#string'): 653 if len(Line.split()) == 2: 654 StrName = Line 655 if StrName: 656 if StrName.split()[1] not in ExistStrNameList: 657 ExistStrNameList.append(StrName.split()[1].strip()) 658 elif StrName.split()[1] in [DT.TAB_INF_ABSTRACT, DT.TAB_INF_DESCRIPTION, \ 659 DT.TAB_INF_BINARY_ABSTRACT, DT.TAB_INF_BINARY_DESCRIPTION, \ 660 DT.TAB_DEC_PACKAGE_ABSTRACT, DT.TAB_DEC_PACKAGE_DESCRIPTION, \ 661 DT.TAB_DEC_BINARY_ABSTRACT, DT.TAB_DEC_BINARY_DESCRIPTION]: 662 EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID, \ 663 Message=ST.ERR_UNIPARSE_MULTI_ENTRY_EXIST % StrName.split()[1], \ 664 ExtraData=File.Path) 665 continue 666 elif len(Line.split()) == 4 and Line.find(u'#language') > 0: 667 if Line[Line.find(u'#language')-1] != ' ' or \ 668 Line[Line.find(u'#language')+len(u'#language')] != u' ': 669 EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID, ExtraData=File.Path) 670 671 if Line.find(u'"') > 0: 672 EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID, ExtraData=File.Path) 673 674 StrName = Line.split()[0] + u' ' + Line.split()[1] 675 if StrName: 676 if StrName.split()[1] not in ExistStrNameList: 677 ExistStrNameList.append(StrName.split()[1].strip()) 678 elif StrName.split()[1] in [DT.TAB_INF_ABSTRACT, DT.TAB_INF_DESCRIPTION, \ 679 DT.TAB_INF_BINARY_ABSTRACT, DT.TAB_INF_BINARY_DESCRIPTION, \ 680 DT.TAB_DEC_PACKAGE_ABSTRACT, DT.TAB_DEC_PACKAGE_DESCRIPTION, \ 681 DT.TAB_DEC_BINARY_ABSTRACT, DT.TAB_DEC_BINARY_DESCRIPTION]: 682 EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID, \ 683 Message=ST.ERR_UNIPARSE_MULTI_ENTRY_EXIST % StrName.split()[1], \ 684 ExtraData=File.Path) 685 if IsIncludeFile: 686 if StrName not in NewLines: 687 NewLines.append((Line[:Line.find(u'#language')]).strip()) 688 else: 689 NewLines.append((Line[:Line.find(u'#language')]).strip()) 690 NewLines.append((Line[Line.find(u'#language'):]).strip()) 691 elif len(Line.split()) > 4 and Line.find(u'#language') > 0 and Line.find(u'"') > 0: 692 if Line[Line.find(u'#language')-1] != u' ' or \ 693 Line[Line.find(u'#language')+len(u'#language')] != u' ': 694 EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID, ExtraData=File.Path) 695 696 if Line[Line.find(u'"')-1] != u' ': 697 EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID, ExtraData=File.Path) 698 699 StrName = Line.split()[0] + u' ' + Line.split()[1] 700 if StrName: 701 if StrName.split()[1] not in ExistStrNameList: 702 ExistStrNameList.append(StrName.split()[1].strip()) 703 elif StrName.split()[1] in [DT.TAB_INF_ABSTRACT, DT.TAB_INF_DESCRIPTION, \ 704 DT.TAB_INF_BINARY_ABSTRACT, DT.TAB_INF_BINARY_DESCRIPTION, \ 705 DT.TAB_DEC_PACKAGE_ABSTRACT, DT.TAB_DEC_PACKAGE_DESCRIPTION, \ 706 DT.TAB_DEC_BINARY_ABSTRACT, DT.TAB_DEC_BINARY_DESCRIPTION]: 707 EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID, \ 708 Message=ST.ERR_UNIPARSE_MULTI_ENTRY_EXIST % StrName.split()[1], \ 709 ExtraData=File.Path) 710 if IsIncludeFile: 711 if StrName not in NewLines: 712 NewLines.append((Line[:Line.find(u'#language')]).strip()) 713 else: 714 NewLines.append((Line[:Line.find(u'#language')]).strip()) 715 NewLines.append((Line[Line.find(u'#language'):Line.find(u'"')]).strip()) 716 NewLines.append((Line[Line.find(u'"'):]).strip()) 717 else: 718 EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID, ExtraData=File.Path) 719 elif Line.startswith(u'#language'): 720 if len(Line.split()) == 2: 721 if IsIncludeFile: 722 if StrName not in NewLines: 723 NewLines.append(StrName) 724 else: 725 NewLines.append(StrName) 726 NewLines.append(Line) 727 elif len(Line.split()) > 2 and Line.find(u'"') > 0: 728 if IsIncludeFile: 729 if StrName not in NewLines: 730 NewLines.append(StrName) 731 else: 732 NewLines.append(StrName) 733 NewLines.append((Line[:Line.find(u'"')]).strip()) 734 NewLines.append((Line[Line.find(u'"'):]).strip()) 735 else: 736 EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID, ExtraData=File.Path) 737 elif Line.startswith(u'"'): 738 if u'#string' in Line or u'#language' in Line: 739 EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID, ExtraData=File.Path) 740 NewLines.append(Line) 741 else: 742 print Line 743 EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID, ExtraData=File.Path) 744 745 if StrName and not StrName.split()[1].startswith(u'STR_'): 746 EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID, \ 747 Message=ST.ERR_UNIPARSE_STRNAME_FORMAT_ERROR % StrName.split()[1], \ 748 ExtraData=File.Path) 749 750 if StrName and not NewLines: 751 EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID, \ 752 Message=ST.ERR_UNI_MISS_LANGENTRY % StrName, \ 753 ExtraData=File.Path) 754 755 # 756 # Check Abstract, Description, BinaryAbstract and BinaryDescription order, 757 # should be Abstract, Description, BinaryAbstract, BinaryDesctiption 758 AbstractPosition = -1 759 DescriptionPosition = -1 760 BinaryAbstractPosition = -1 761 BinaryDescriptionPosition = -1 762 for StrName in ExistStrNameList: 763 if DT.TAB_HEADER_ABSTRACT.upper() in StrName: 764 if 'BINARY' in StrName: 765 BinaryAbstractPosition = ExistStrNameList.index(StrName) 766 else: 767 AbstractPosition = ExistStrNameList.index(StrName) 768 if DT.TAB_HEADER_DESCRIPTION.upper() in StrName: 769 if 'BINARY' in StrName: 770 BinaryDescriptionPosition = ExistStrNameList.index(StrName) 771 else: 772 DescriptionPosition = ExistStrNameList.index(StrName) 773 774 OrderList = sorted([AbstractPosition, DescriptionPosition]) 775 BinaryOrderList = sorted([BinaryAbstractPosition, BinaryDescriptionPosition]) 776 Min = OrderList[0] 777 Max = OrderList[1] 778 BinaryMin = BinaryOrderList[0] 779 BinaryMax = BinaryOrderList[1] 780 if BinaryDescriptionPosition > -1: 781 if not(BinaryDescriptionPosition == BinaryMax and BinaryAbstractPosition == BinaryMin and \ 782 BinaryMax > Max): 783 EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID, \ 784 Message=ST.ERR_UNIPARSE_ENTRY_ORDER_WRONG, \ 785 ExtraData=File.Path) 786 elif BinaryAbstractPosition > -1: 787 if not(BinaryAbstractPosition > Max): 788 EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID, \ 789 Message=ST.ERR_UNIPARSE_ENTRY_ORDER_WRONG, \ 790 ExtraData=File.Path) 791 792 if DescriptionPosition > -1: 793 if not(DescriptionPosition == Max and AbstractPosition == Min and \ 794 DescriptionPosition > AbstractPosition): 795 EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID, \ 796 Message=ST.ERR_UNIPARSE_ENTRY_ORDER_WRONG, \ 797 ExtraData=File.Path) 798 799 if not self.UniFileHeader: 800 EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID, 801 Message = ST.ERR_NO_SOURCE_HEADER, 802 ExtraData=File.Path) 803 804 return NewLines 805 806 # 807 # Load a .uni file 808 # 809 def LoadUniFile(self, File = None): 810 if File == None: 811 EdkLogger.Error("Unicode File Parser", 812 ToolError.PARSER_ERROR, 813 Message='No unicode file is given', 814 ExtraData=File.Path) 815 816 self.File = File 817 818 # 819 # Process special char in file 820 # 821 Lines = self.PreProcess(File) 822 823 # 824 # Get Unicode Information 825 # 826 for IndexI in range(len(Lines)): 827 Line = Lines[IndexI] 828 if (IndexI + 1) < len(Lines): 829 SecondLine = Lines[IndexI + 1] 830 if (IndexI + 2) < len(Lines): 831 ThirdLine = Lines[IndexI + 2] 832 833 # 834 # Get Language def information 835 # 836 if Line.find(u'#langdef ') >= 0: 837 self.GetLangDef(File, Line + u' ' + SecondLine) 838 continue 839 840 Name = '' 841 Language = '' 842 Value = '' 843 CombineToken = False 844 # 845 # Get string def information format as below 846 # 847 # #string MY_STRING_1 848 # #language eng 849 # "My first English string line 1" 850 # "My first English string line 2" 851 # #string MY_STRING_1 852 # #language spa 853 # "Mi segunda secuencia 1" 854 # "Mi segunda secuencia 2" 855 # 856 if Line.find(u'#string ') >= 0 and Line.find(u'#language ') < 0 and \ 857 SecondLine.find(u'#string ') < 0 and SecondLine.find(u'#language ') >= 0 and \ 858 ThirdLine.find(u'#string ') < 0 and ThirdLine.find(u'#language ') < 0: 859 if Line.find('"') > 0 or SecondLine.find('"') > 0: 860 EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID, 861 Message=ST.ERR_UNIPARSE_DBLQUOTE_UNMATCHED, 862 ExtraData=File.Path) 863 864 Name = Line[Line.find(u'#string ') + len(u'#string ') : ].strip(' ') 865 Language = SecondLine[SecondLine.find(u'#language ') + len(u'#language ') : ].strip(' ') 866 for IndexJ in range(IndexI + 2, len(Lines)): 867 if Lines[IndexJ].find(u'#string ') < 0 and Lines[IndexJ].find(u'#language ') < 0 and \ 868 Lines[IndexJ].strip().startswith(u'"') and Lines[IndexJ].strip().endswith(u'"'): 869 if Lines[IndexJ][-2] == ' ': 870 CombineToken = True 871 if CombineToken: 872 if Lines[IndexJ].strip()[1:-1].strip(): 873 Value = Value + Lines[IndexJ].strip()[1:-1].rstrip() + ' ' 874 else: 875 Value = Value + Lines[IndexJ].strip()[1:-1] 876 CombineToken = False 877 else: 878 Value = Value + Lines[IndexJ].strip()[1:-1] + '\r\n' 879 else: 880 IndexI = IndexJ 881 break 882 if Value.endswith('\r\n'): 883 Value = Value[: Value.rfind('\r\n')] 884 Language = GetLanguageCode(Language, self.IsCompatibleMode, self.File) 885 self.AddStringToList(Name, Language, Value) 886 continue 887 888 # 889 # Load multiple .uni files 890 # 891 def LoadUniFiles(self, FileList): 892 if len(FileList) > 0: 893 for File in FileList: 894 FilePath = File.Path.strip() 895 if FilePath.endswith('.uni') or FilePath.endswith('.UNI') or FilePath.endswith('.Uni'): 896 self.LoadUniFile(File) 897 898 # 899 # Add a string to list 900 # 901 def AddStringToList(self, Name, Language, Value, Token = 0, Referenced = False, UseOtherLangDef = '', Index = -1): 902 for LangNameItem in self.LanguageDef: 903 if Language == LangNameItem[0]: 904 break 905 906 if Language not in self.OrderedStringList: 907 self.OrderedStringList[Language] = [] 908 self.OrderedStringDict[Language] = {} 909 910 IsAdded = True 911 if Name in self.OrderedStringDict[Language]: 912 IsAdded = False 913 if Value != None: 914 ItemIndexInList = self.OrderedStringDict[Language][Name] 915 Item = self.OrderedStringList[Language][ItemIndexInList] 916 Item.UpdateValue(Value) 917 Item.UseOtherLangDef = '' 918 919 if IsAdded: 920 Token = len(self.OrderedStringList[Language]) 921 if Index == -1: 922 self.OrderedStringList[Language].append(StringDefClassObject(Name, 923 Value, 924 Referenced, 925 Token, 926 UseOtherLangDef)) 927 self.OrderedStringDict[Language][Name] = Token 928 for LangName in self.LanguageDef: 929 # 930 # New STRING token will be added into all language string lists. 931 # so that the unique STRING identifier is reserved for all languages in the package list. 932 # 933 if LangName[0] != Language: 934 if UseOtherLangDef != '': 935 OtherLangDef = UseOtherLangDef 936 else: 937 OtherLangDef = Language 938 self.OrderedStringList[LangName[0]].append(StringDefClassObject(Name, 939 '', 940 Referenced, 941 Token, 942 OtherLangDef)) 943 self.OrderedStringDict[LangName[0]][Name] = len(self.OrderedStringList[LangName[0]]) - 1 944 else: 945 self.OrderedStringList[Language].insert(Index, StringDefClassObject(Name, 946 Value, 947 Referenced, 948 Token, 949 UseOtherLangDef)) 950 self.OrderedStringDict[Language][Name] = Index 951 952 # 953 # Set the string as referenced 954 # 955 def SetStringReferenced(self, Name): 956 # 957 # String stoken are added in the same order in all language string lists. 958 # So, only update the status of string stoken in first language string list. 959 # 960 Lang = self.LanguageDef[0][0] 961 if Name in self.OrderedStringDict[Lang]: 962 ItemIndexInList = self.OrderedStringDict[Lang][Name] 963 Item = self.OrderedStringList[Lang][ItemIndexInList] 964 Item.Referenced = True 965 966 # 967 # Search the string in language definition by Name 968 # 969 def FindStringValue(self, Name, Lang): 970 if Name in self.OrderedStringDict[Lang]: 971 ItemIndexInList = self.OrderedStringDict[Lang][Name] 972 return self.OrderedStringList[Lang][ItemIndexInList] 973 974 return None 975 976 # 977 # Search the string in language definition by Token 978 # 979 def FindByToken(self, Token, Lang): 980 for Item in self.OrderedStringList[Lang]: 981 if Item.Token == Token: 982 return Item 983 984 return None 985 986 # 987 # Re-order strings and re-generate tokens 988 # 989 def ReToken(self): 990 if len(self.LanguageDef) == 0: 991 return None 992 # 993 # Retoken all language strings according to the status of string stoken in the first language string. 994 # 995 FirstLangName = self.LanguageDef[0][0] 996 997 # Convert the OrderedStringList to be OrderedStringListByToken in order to faciliate future search by token 998 for LangNameItem in self.LanguageDef: 999 self.OrderedStringListByToken[LangNameItem[0]] = {} 1000 1001 # 1002 # Use small token for all referred string stoken. 1003 # 1004 RefToken = 0 1005 for Index in range (0, len (self.OrderedStringList[FirstLangName])): 1006 FirstLangItem = self.OrderedStringList[FirstLangName][Index] 1007 if FirstLangItem.Referenced == True: 1008 for LangNameItem in self.LanguageDef: 1009 LangName = LangNameItem[0] 1010 OtherLangItem = self.OrderedStringList[LangName][Index] 1011 OtherLangItem.Referenced = True 1012 OtherLangItem.Token = RefToken 1013 self.OrderedStringListByToken[LangName][OtherLangItem.Token] = OtherLangItem 1014 RefToken = RefToken + 1 1015 1016 # 1017 # Use big token for all unreferred string stoken. 1018 # 1019 UnRefToken = 0 1020 for Index in range (0, len (self.OrderedStringList[FirstLangName])): 1021 FirstLangItem = self.OrderedStringList[FirstLangName][Index] 1022 if FirstLangItem.Referenced == False: 1023 for LangNameItem in self.LanguageDef: 1024 LangName = LangNameItem[0] 1025 OtherLangItem = self.OrderedStringList[LangName][Index] 1026 OtherLangItem.Token = RefToken + UnRefToken 1027 self.OrderedStringListByToken[LangName][OtherLangItem.Token] = OtherLangItem 1028 UnRefToken = UnRefToken + 1 1029 1030 # 1031 # Show the instance itself 1032 # 1033 def ShowMe(self): 1034 print self.LanguageDef 1035 #print self.OrderedStringList 1036 for Item in self.OrderedStringList: 1037 print Item 1038 for Member in self.OrderedStringList[Item]: 1039 print str(Member) 1040 1041 # 1042 # Read content from '!include' UNI file 1043 # 1044 def ReadIncludeUNIfile(self, FilaPath): 1045 if self.File: 1046 pass 1047 1048 if not os.path.exists(FilaPath) or not os.path.isfile(FilaPath): 1049 EdkLogger.Error("Unicode File Parser", 1050 ToolError.FILE_NOT_FOUND, 1051 ExtraData=FilaPath) 1052 try: 1053 FileIn = codecs.open(FilaPath, mode='rb', encoding='utf_8').readlines() 1054 except UnicodeError, Xstr: 1055 FileIn = codecs.open(FilaPath, mode='rb', encoding='utf_16').readlines() 1056 except UnicodeError: 1057 FileIn = codecs.open(FilaPath, mode='rb', encoding='utf_16_le').readlines() 1058 except: 1059 EdkLogger.Error("Unicode File Parser", ToolError.FILE_OPEN_FAILURE, ExtraData=FilaPath) 1060 return FileIn 1061 1062