• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1## @file
2# Collect all defined strings in multiple uni files.
3#
4# Copyright (c) 2014 - 2016, Intel Corporation. All rights reserved.<BR>
5#
6# This program and the accompanying materials are licensed and made available
7# under the terms and conditions of the BSD License which accompanies this
8# distribution. The full text of the license may be found at
9# http://opensource.org/licenses/bsd-license.php
10#
11# THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS,
12# WITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED.
13#
14"""
15Collect all defined strings in multiple uni files
16"""
17
18##
19# Import Modules
20#
21import os, codecs, re
22import distutils.util
23from Logger import ToolError
24from Logger import Log as EdkLogger
25from Logger import StringTable as ST
26from Library.String import GetLineNo
27from Library.Misc import PathClass
28from Library.Misc import GetCharIndexOutStr
29from Library import DataType as DT
30from Library.ParserValidate import CheckUTF16FileHeader
31
32##
33# Static definitions
34#
35UNICODE_WIDE_CHAR = u'\\wide'
36UNICODE_NARROW_CHAR = u'\\narrow'
37UNICODE_NON_BREAKING_CHAR = u'\\nbr'
38UNICODE_UNICODE_CR = '\r'
39UNICODE_UNICODE_LF = '\n'
40
41NARROW_CHAR = u'\uFFF0'
42WIDE_CHAR = u'\uFFF1'
43NON_BREAKING_CHAR = u'\uFFF2'
44CR = u'\u000D'
45LF = u'\u000A'
46NULL = u'\u0000'
47TAB = u'\t'
48BACK_SPLASH = u'\\'
49
50gINCLUDE_PATTERN = re.compile("^!include[\s]+([\S]+)[\s]*$", re.MULTILINE | re.UNICODE)
51
52gLANG_CONV_TABLE = {'eng':'en', 'fra':'fr', \
53                 'aar':'aa', 'abk':'ab', 'ave':'ae', 'afr':'af', 'aka':'ak', 'amh':'am', \
54                 'arg':'an', 'ara':'ar', 'asm':'as', 'ava':'av', 'aym':'ay', 'aze':'az', \
55                 'bak':'ba', 'bel':'be', 'bul':'bg', 'bih':'bh', 'bis':'bi', 'bam':'bm', \
56                 'ben':'bn', 'bod':'bo', 'bre':'br', 'bos':'bs', 'cat':'ca', 'che':'ce', \
57                 'cha':'ch', 'cos':'co', 'cre':'cr', 'ces':'cs', 'chu':'cu', 'chv':'cv', \
58                 'cym':'cy', 'dan':'da', 'deu':'de', 'div':'dv', 'dzo':'dz', 'ewe':'ee', \
59                 'ell':'el', 'epo':'eo', 'spa':'es', 'est':'et', 'eus':'eu', 'fas':'fa', \
60                 'ful':'ff', 'fin':'fi', 'fij':'fj', 'fao':'fo', 'fry':'fy', 'gle':'ga', \
61                 'gla':'gd', 'glg':'gl', 'grn':'gn', 'guj':'gu', 'glv':'gv', 'hau':'ha', \
62                 'heb':'he', 'hin':'hi', 'hmo':'ho', 'hrv':'hr', 'hat':'ht', 'hun':'hu', \
63                 'hye':'hy', 'her':'hz', 'ina':'ia', 'ind':'id', 'ile':'ie', 'ibo':'ig', \
64                 'iii':'ii', 'ipk':'ik', 'ido':'io', 'isl':'is', 'ita':'it', 'iku':'iu', \
65                 'jpn':'ja', 'jav':'jv', 'kat':'ka', 'kon':'kg', 'kik':'ki', 'kua':'kj', \
66                 'kaz':'kk', 'kal':'kl', 'khm':'km', 'kan':'kn', 'kor':'ko', 'kau':'kr', \
67                 'kas':'ks', 'kur':'ku', 'kom':'kv', 'cor':'kw', 'kir':'ky', 'lat':'la', \
68                 'ltz':'lb', 'lug':'lg', 'lim':'li', 'lin':'ln', 'lao':'lo', 'lit':'lt', \
69                 'lub':'lu', 'lav':'lv', 'mlg':'mg', 'mah':'mh', 'mri':'mi', 'mkd':'mk', \
70                 'mal':'ml', 'mon':'mn', 'mar':'mr', 'msa':'ms', 'mlt':'mt', 'mya':'my', \
71                 'nau':'na', 'nob':'nb', 'nde':'nd', 'nep':'ne', 'ndo':'ng', 'nld':'nl', \
72                 'nno':'nn', 'nor':'no', 'nbl':'nr', 'nav':'nv', 'nya':'ny', 'oci':'oc', \
73                 'oji':'oj', 'orm':'om', 'ori':'or', 'oss':'os', 'pan':'pa', 'pli':'pi', \
74                 'pol':'pl', 'pus':'ps', 'por':'pt', 'que':'qu', 'roh':'rm', 'run':'rn', \
75                 'ron':'ro', 'rus':'ru', 'kin':'rw', 'san':'sa', 'srd':'sc', 'snd':'sd', \
76                 'sme':'se', 'sag':'sg', 'sin':'si', 'slk':'sk', 'slv':'sl', 'smo':'sm', \
77                 'sna':'sn', 'som':'so', 'sqi':'sq', 'srp':'sr', 'ssw':'ss', 'sot':'st', \
78                 'sun':'su', 'swe':'sv', 'swa':'sw', 'tam':'ta', 'tel':'te', 'tgk':'tg', \
79                 'tha':'th', 'tir':'ti', 'tuk':'tk', 'tgl':'tl', 'tsn':'tn', 'ton':'to', \
80                 'tur':'tr', 'tso':'ts', 'tat':'tt', 'twi':'tw', 'tah':'ty', 'uig':'ug', \
81                 'ukr':'uk', 'urd':'ur', 'uzb':'uz', 'ven':'ve', 'vie':'vi', 'vol':'vo', \
82                 'wln':'wa', 'wol':'wo', 'xho':'xh', 'yid':'yi', 'yor':'yo', 'zha':'za', \
83                 'zho':'zh', 'zul':'zu'}
84
85## Convert a python unicode string to a normal string
86#
87# Convert a python unicode string to a normal string
88# UniToStr(u'I am a string') is 'I am a string'
89#
90# @param Uni:  The python unicode string
91#
92# @retval:     The formatted normal string
93#
94def UniToStr(Uni):
95    return repr(Uni)[2:-1]
96
97## Convert a unicode string to a Hex list
98#
99# Convert a unicode string to a Hex list
100# UniToHexList('ABC') is ['0x41', '0x00', '0x42', '0x00', '0x43', '0x00']
101#
102# @param Uni:    The python unicode string
103#
104# @retval List:  The formatted hex list
105#
106def UniToHexList(Uni):
107    List = []
108    for Item in Uni:
109        Temp = '%04X' % ord(Item)
110        List.append('0x' + Temp[2:4])
111        List.append('0x' + Temp[0:2])
112    return List
113
114## Convert special unicode characters
115#
116# Convert special characters to (c), (r) and (tm).
117#
118# @param Uni:    The python unicode string
119#
120# @retval NewUni:  The converted unicode string
121#
122def ConvertSpecialUnicodes(Uni):
123    NewUni = Uni
124    NewUni = NewUni.replace(u'\u00A9', '(c)')
125    NewUni = NewUni.replace(u'\u00AE', '(r)')
126    NewUni = NewUni.replace(u'\u2122', '(tm)')
127    return NewUni
128
129## GetLanguageCode1766
130#
131# Check the language code read from .UNI file and convert RFC 4646 codes to RFC 1766 codes
132# RFC 1766 language codes supported in compatiblity mode
133# RFC 4646 language codes supported in native mode
134#
135# @param LangName:   Language codes read from .UNI file
136#
137# @retval LangName:  Valid lanugage code in RFC 1766 format or None
138#
139def GetLanguageCode1766(LangName, File=None):
140    return LangName
141
142    length = len(LangName)
143    if length == 2:
144        if LangName.isalpha():
145            for Key in gLANG_CONV_TABLE.keys():
146                if gLANG_CONV_TABLE.get(Key) == LangName.lower():
147                    return Key
148    elif length == 3:
149        if LangName.isalpha() and gLANG_CONV_TABLE.get(LangName.lower()):
150            return LangName
151        else:
152            EdkLogger.Error("Unicode File Parser",
153                             ToolError.FORMAT_INVALID,
154                             "Invalid RFC 1766 language code : %s" % LangName,
155                             File)
156    elif length == 5:
157        if LangName[0:2].isalpha() and LangName[2] == '-':
158            for Key in gLANG_CONV_TABLE.keys():
159                if gLANG_CONV_TABLE.get(Key) == LangName[0:2].lower():
160                    return Key
161    elif length >= 6:
162        if LangName[0:2].isalpha() and LangName[2] == '-':
163            for Key in gLANG_CONV_TABLE.keys():
164                if gLANG_CONV_TABLE.get(Key) == LangName[0:2].lower():
165                    return Key
166        if LangName[0:3].isalpha() and gLANG_CONV_TABLE.get(LangName.lower()) == None and LangName[3] == '-':
167            for Key in gLANG_CONV_TABLE.keys():
168                if Key == LangName[0:3].lower():
169                    return Key
170
171    EdkLogger.Error("Unicode File Parser",
172                             ToolError.FORMAT_INVALID,
173                             "Invalid RFC 4646 language code : %s" % LangName,
174                             File)
175
176## GetLanguageCode
177#
178# Check the language code read from .UNI file and convert RFC 1766 codes to RFC 4646 codes if appropriate
179# RFC 1766 language codes supported in compatiblity mode
180# RFC 4646 language codes supported in native mode
181#
182# @param LangName:   Language codes read from .UNI file
183#
184# @retval LangName:  Valid lanugage code in RFC 4646 format or None
185#
186def GetLanguageCode(LangName, IsCompatibleMode, File):
187    length = len(LangName)
188    if IsCompatibleMode:
189        if length == 3 and LangName.isalpha():
190            TempLangName = gLANG_CONV_TABLE.get(LangName.lower())
191            if TempLangName != None:
192                return TempLangName
193            return LangName
194        else:
195            EdkLogger.Error("Unicode File Parser",
196                             ToolError.FORMAT_INVALID,
197                             "Invalid RFC 1766 language code : %s" % LangName,
198                             File)
199    if (LangName[0] == 'X' or LangName[0] == 'x') and LangName[1] == '-':
200        return LangName
201    if length == 2:
202        if LangName.isalpha():
203            return LangName
204    elif length == 3:
205        if LangName.isalpha() and gLANG_CONV_TABLE.get(LangName.lower()) == None:
206            return LangName
207    elif length == 5:
208        if LangName[0:2].isalpha() and LangName[2] == '-':
209            return LangName
210    elif length >= 6:
211        if LangName[0:2].isalpha() and LangName[2] == '-':
212            return LangName
213        if LangName[0:3].isalpha() and gLANG_CONV_TABLE.get(LangName.lower()) == None and LangName[3] == '-':
214            return LangName
215
216    EdkLogger.Error("Unicode File Parser",
217                             ToolError.FORMAT_INVALID,
218                             "Invalid RFC 4646 language code : %s" % LangName,
219                             File)
220
221## FormatUniEntry
222#
223# Formated the entry in Uni file.
224#
225# @param StrTokenName    StrTokenName.
226# @param TokenValueList  A list need to be processed.
227# @param ContainerFile   ContainerFile.
228#
229# @return formated entry
230def FormatUniEntry(StrTokenName, TokenValueList, ContainerFile):
231    SubContent = ''
232    PreFormatLength = 40
233    if len(StrTokenName) > PreFormatLength:
234        PreFormatLength = len(StrTokenName) + 1
235    for (Lang, Value) in TokenValueList:
236        if not Value or Lang == DT.TAB_LANGUAGE_EN_X:
237            continue
238        if Lang == '':
239            Lang = DT.TAB_LANGUAGE_EN_US
240        if Lang == 'eng':
241            Lang = DT.TAB_LANGUAGE_EN_US
242        elif len(Lang.split('-')[0]) == 3:
243            Lang = GetLanguageCode(Lang.split('-')[0], True, ContainerFile)
244        else:
245            Lang = GetLanguageCode(Lang, False, ContainerFile)
246        ValueList = Value.split('\n')
247        SubValueContent = ''
248        for SubValue in ValueList:
249            if SubValue.strip():
250                SubValueContent += \
251                ' ' * (PreFormatLength + len('#language en-US ')) + '\"%s\\n\"' % SubValue.strip() + '\r\n'
252        SubValueContent = SubValueContent[(PreFormatLength + len('#language en-US ')):SubValueContent.rfind('\\n')] \
253        + '\"' + '\r\n'
254        SubContent += ' '*PreFormatLength + '#language %-5s ' % Lang + SubValueContent
255    if SubContent:
256        SubContent = StrTokenName + ' '*(PreFormatLength - len(StrTokenName)) + SubContent[PreFormatLength:]
257    return SubContent
258
259
260## StringDefClassObject
261#
262# A structure for language definition
263#
264class StringDefClassObject(object):
265    def __init__(self, Name = None, Value = None, Referenced = False, Token = None, UseOtherLangDef = ''):
266        self.StringName = ''
267        self.StringNameByteList = []
268        self.StringValue = ''
269        self.StringValueByteList = ''
270        self.Token = 0
271        self.Referenced = Referenced
272        self.UseOtherLangDef = UseOtherLangDef
273        self.Length = 0
274
275        if Name != None:
276            self.StringName = Name
277            self.StringNameByteList = UniToHexList(Name)
278        if Value != None:
279            self.StringValue = Value
280            self.StringValueByteList = UniToHexList(self.StringValue)
281            self.Length = len(self.StringValueByteList)
282        if Token != None:
283            self.Token = Token
284
285    def __str__(self):
286        return repr(self.StringName) + ' ' + \
287               repr(self.Token) + ' ' + \
288               repr(self.Referenced) + ' ' + \
289               repr(self.StringValue) + ' ' + \
290               repr(self.UseOtherLangDef)
291
292    def UpdateValue(self, Value = None):
293        if Value != None:
294            if self.StringValue:
295                self.StringValue = self.StringValue + '\r\n' + Value
296            else:
297                self.StringValue = Value
298            self.StringValueByteList = UniToHexList(self.StringValue)
299            self.Length = len(self.StringValueByteList)
300
301## UniFileClassObject
302#
303# A structure for .uni file definition
304#
305class UniFileClassObject(object):
306    def __init__(self, FileList = None, IsCompatibleMode = False, IncludePathList = None):
307        self.FileList = FileList
308        self.File = None
309        self.IncFileList = FileList
310        self.UniFileHeader = ''
311        self.Token = 2
312        self.LanguageDef = []                   #[ [u'LanguageIdentifier', u'PrintableName'], ... ]
313        self.OrderedStringList = {}             #{ u'LanguageIdentifier' : [StringDefClassObject]  }
314        self.OrderedStringDict = {}             #{ u'LanguageIdentifier' : {StringName:(IndexInList)}  }
315        self.OrderedStringListByToken = {}      #{ u'LanguageIdentifier' : {Token: StringDefClassObject} }
316        self.IsCompatibleMode = IsCompatibleMode
317        if not IncludePathList:
318            self.IncludePathList = []
319        else:
320            self.IncludePathList = IncludePathList
321        if len(self.FileList) > 0:
322            self.LoadUniFiles(FileList)
323
324    #
325    # Get Language definition
326    #
327    def GetLangDef(self, File, Line):
328        Lang = distutils.util.split_quoted((Line.split(u"//")[0]))
329        if len(Lang) != 3:
330            try:
331                FileIn = codecs.open(File.Path, mode='rb', encoding='utf_8').readlines()
332            except UnicodeError, Xstr:
333                FileIn = codecs.open(File.Path, mode='rb', encoding='utf_16').readlines()
334            except UnicodeError, Xstr:
335                FileIn = codecs.open(File.Path, mode='rb', encoding='utf_16_le').readlines()
336            except:
337                EdkLogger.Error("Unicode File Parser",
338                                ToolError.FILE_OPEN_FAILURE,
339                                "File read failure: %s" % str(Xstr),
340                                ExtraData=File)
341            LineNo = GetLineNo(FileIn, Line, False)
342            EdkLogger.Error("Unicode File Parser",
343                             ToolError.PARSER_ERROR,
344                             "Wrong language definition",
345                             ExtraData="""%s\n\t*Correct format is like '#langdef en-US "English"'""" % Line,
346                             File = File, Line = LineNo)
347        else:
348            LangName = GetLanguageCode(Lang[1], self.IsCompatibleMode, self.File)
349            LangPrintName = Lang[2]
350
351        IsLangInDef = False
352        for Item in self.LanguageDef:
353            if Item[0] == LangName:
354                IsLangInDef = True
355                break
356
357        if not IsLangInDef:
358            self.LanguageDef.append([LangName, LangPrintName])
359
360        #
361        # Add language string
362        #
363        self.AddStringToList(u'$LANGUAGE_NAME', LangName, LangName, 0, True, Index=0)
364        self.AddStringToList(u'$PRINTABLE_LANGUAGE_NAME', LangName, LangPrintName, 1, True, Index=1)
365
366        if not IsLangInDef:
367            #
368            # The found STRING tokens will be added into new language string list
369            # so that the unique STRING identifier is reserved for all languages in the package list.
370            #
371            FirstLangName = self.LanguageDef[0][0]
372            if LangName != FirstLangName:
373                for Index in range (2, len (self.OrderedStringList[FirstLangName])):
374                    Item = self.OrderedStringList[FirstLangName][Index]
375                    if Item.UseOtherLangDef != '':
376                        OtherLang = Item.UseOtherLangDef
377                    else:
378                        OtherLang = FirstLangName
379                    self.OrderedStringList[LangName].append (StringDefClassObject(Item.StringName,
380                                                                                  '',
381                                                                                  Item.Referenced,
382                                                                                  Item.Token,
383                                                                                  OtherLang))
384                    self.OrderedStringDict[LangName][Item.StringName] = len(self.OrderedStringList[LangName]) - 1
385        return True
386
387    #
388    # Get String name and value
389    #
390    def GetStringObject(self, Item):
391        Language = ''
392        Value = ''
393
394        Name = Item.split()[1]
395        # Check the string name is the upper character
396        if Name != '':
397            MatchString = re.match('[A-Z0-9_]+', Name, re.UNICODE)
398            if MatchString == None or MatchString.end(0) != len(Name):
399                EdkLogger.Error("Unicode File Parser",
400                             ToolError.FORMAT_INVALID,
401                             'The string token name %s in UNI file %s must be upper case character.' %(Name, self.File))
402        LanguageList = Item.split(u'#language ')
403        for IndexI in range(len(LanguageList)):
404            if IndexI == 0:
405                continue
406            else:
407                Language = LanguageList[IndexI].split()[0]
408                #.replace(u'\r\n', u'')
409                Value = \
410                LanguageList[IndexI][LanguageList[IndexI].find(u'\"') + len(u'\"') : LanguageList[IndexI].rfind(u'\"')]
411                Language = GetLanguageCode(Language, self.IsCompatibleMode, self.File)
412                self.AddStringToList(Name, Language, Value)
413
414    #
415    # Get include file list and load them
416    #
417    def GetIncludeFile(self, Item, Dir = None):
418        if Dir:
419            pass
420        FileName = Item[Item.find(u'!include ') + len(u'!include ') :Item.find(u' ', len(u'!include '))][1:-1]
421        self.LoadUniFile(FileName)
422
423    #
424    # Pre-process before parse .uni file
425    #
426    def PreProcess(self, File, IsIncludeFile=False):
427        if not os.path.exists(File.Path) or not os.path.isfile(File.Path):
428            EdkLogger.Error("Unicode File Parser",
429                             ToolError.FILE_NOT_FOUND,
430                             ExtraData=File.Path)
431
432        #
433        # Check file header of the Uni file
434        #
435#         if not CheckUTF16FileHeader(File.Path):
436#             EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID,
437#                             ExtraData='The file %s is either invalid UTF-16LE or it is missing the BOM.' % File.Path)
438
439        try:
440            FileIn = codecs.open(File.Path, mode='rb', encoding='utf_8').readlines()
441        except UnicodeError, Xstr:
442            FileIn = codecs.open(File.Path, mode='rb', encoding='utf_16').readlines()
443        except UnicodeError:
444            FileIn = codecs.open(File.Path, mode='rb', encoding='utf_16_le').readlines()
445        except:
446            EdkLogger.Error("Unicode File Parser", ToolError.FILE_OPEN_FAILURE, ExtraData=File.Path)
447
448
449        #
450        # get the file header
451        #
452        Lines = []
453        HeaderStart = False
454        HeaderEnd = False
455        if not self.UniFileHeader:
456            FirstGenHeader = True
457        else:
458            FirstGenHeader = False
459        for Line in FileIn:
460            Line = Line.strip()
461            if Line == u'':
462                continue
463            if Line.startswith(DT.TAB_COMMENT_EDK1_SPLIT) and (Line.find(DT.TAB_HEADER_COMMENT) > -1) \
464                and not HeaderEnd and not HeaderStart:
465                HeaderStart = True
466            if not Line.startswith(DT.TAB_COMMENT_EDK1_SPLIT) and HeaderStart and not HeaderEnd:
467                HeaderEnd = True
468            if Line.startswith(DT.TAB_COMMENT_EDK1_SPLIT) and HeaderStart and not HeaderEnd and FirstGenHeader:
469                self.UniFileHeader += Line + '\r\n'
470                continue
471
472        #
473        # Use unique identifier
474        #
475        FindFlag = -1
476        LineCount = 0
477        MultiLineFeedExits = False
478        #
479        # 0: initial value
480        # 1: signle String entry exist
481        # 2: line feed exist under the some signle String entry
482        #
483        StringEntryExistsFlag = 0
484        for Line in FileIn:
485            Line = FileIn[LineCount]
486            LineCount += 1
487            Line = Line.strip()
488            #
489            # Ignore comment line and empty line
490            #
491            if Line == u'' or Line.startswith(u'//'):
492                #
493                # Change the single line String entry flag status
494                #
495                if StringEntryExistsFlag == 1:
496                    StringEntryExistsFlag = 2
497                #
498                # If the '#string' line and the '#language' line are not in the same line,
499                # there should be only one line feed character betwwen them
500                #
501                if MultiLineFeedExits:
502                    EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID, ExtraData=File.Path)
503                continue
504
505            MultiLineFeedExits = False
506            #
507            # Process comment embeded in string define lines
508            #
509            FindFlag = Line.find(u'//')
510            if FindFlag != -1 and Line.find(u'//') < Line.find(u'"'):
511                Line = Line.replace(Line[FindFlag:], u' ')
512                if FileIn[LineCount].strip().startswith('#language'):
513                    Line = Line + FileIn[LineCount]
514                    FileIn[LineCount-1] = Line
515                    FileIn[LineCount] = '\r\n'
516                    LineCount -= 1
517                    for Index in xrange (LineCount + 1, len (FileIn) - 1):
518                        if (Index == len(FileIn) -1):
519                            FileIn[Index] = '\r\n'
520                        else:
521                            FileIn[Index] = FileIn[Index + 1]
522                    continue
523            CommIndex = GetCharIndexOutStr(u'/', Line)
524            if CommIndex > -1:
525                if (len(Line) - 1) > CommIndex:
526                    if Line[CommIndex+1] == u'/':
527                        Line = Line[:CommIndex].strip()
528                    else:
529                        EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID, ExtraData=File.Path)
530                else:
531                    EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID, ExtraData=File.Path)
532
533            Line = Line.replace(UNICODE_WIDE_CHAR, WIDE_CHAR)
534            Line = Line.replace(UNICODE_NARROW_CHAR, NARROW_CHAR)
535            Line = Line.replace(UNICODE_NON_BREAKING_CHAR, NON_BREAKING_CHAR)
536
537            Line = Line.replace(u'\\\\', u'\u0006')
538            Line = Line.replace(u'\\r\\n', CR + LF)
539            Line = Line.replace(u'\\n', CR + LF)
540            Line = Line.replace(u'\\r', CR)
541            Line = Line.replace(u'\\t', u'\t')
542            Line = Line.replace(u'''\"''', u'''"''')
543            Line = Line.replace(u'\t', u' ')
544            Line = Line.replace(u'\u0006', u'\\')
545
546            # IncList = gINCLUDE_PATTERN.findall(Line)
547            IncList = []
548            if len(IncList) == 1:
549                for Dir in [File.Dir] + self.IncludePathList:
550                    IncFile = PathClass(str(IncList[0]), Dir)
551                    self.IncFileList.append(IncFile)
552                    if os.path.isfile(IncFile.Path):
553                        Lines.extend(self.PreProcess(IncFile, True))
554                        break
555                else:
556                    EdkLogger.Error("Unicode File Parser",
557                                    ToolError.FILE_NOT_FOUND,
558                                    Message="Cannot find include file",
559                                    ExtraData=str(IncList[0]))
560                continue
561
562            #
563            # Between Name entry and Language entry can not contain line feed
564            #
565            if Line.startswith(u'#string') and Line.find(u'#language') == -1:
566                MultiLineFeedExits = True
567
568            if Line.startswith(u'#string') and Line.find(u'#language') > 0 and Line.find(u'"') < 0:
569                MultiLineFeedExits = True
570
571            #
572            # Between Language entry and String entry can not contain line feed
573            #
574            if Line.startswith(u'#language') and len(Line.split()) == 2:
575                MultiLineFeedExits = True
576
577            #
578            # Between two String entry, can not contain line feed
579            #
580            if Line.startswith(u'"'):
581                if StringEntryExistsFlag == 2:
582                    EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID,
583                                    Message=ST.ERR_UNIPARSE_LINEFEED_UP_EXIST % Line, ExtraData=File.Path)
584
585                StringEntryExistsFlag = 1
586                if not Line.endswith('"'):
587                    EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID,
588                                    ExtraData='''The line %s misses '"' at the end of it in file %s'''
589                                              % (LineCount, File.Path))
590            elif Line.startswith(u'#language'):
591                if StringEntryExistsFlag == 2:
592                    EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID,
593                                    Message=ST.ERR_UNI_MISS_STRING_ENTRY % Line, ExtraData=File.Path)
594                StringEntryExistsFlag = 0
595            else:
596                StringEntryExistsFlag = 0
597
598            Lines.append(Line)
599
600        #
601        # Convert string def format as below
602        #
603        #     #string MY_STRING_1
604        #     #language eng
605        #     "My first English string line 1"
606        #     "My first English string line 2"
607        #     #string MY_STRING_1
608        #     #language spa
609        #     "Mi segunda secuencia 1"
610        #     "Mi segunda secuencia 2"
611        #
612
613        if not IsIncludeFile and not Lines:
614            EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID, \
615                Message=ST.ERR_UNIPARSE_NO_SECTION_EXIST, \
616                ExtraData=File.Path)
617
618        NewLines = []
619        StrName = u''
620        ExistStrNameList = []
621        for Line in Lines:
622            if StrName and not StrName.split()[1].startswith(DT.TAB_STR_TOKENCNAME + DT.TAB_UNDERLINE_SPLIT):
623                EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID, \
624                                Message=ST.ERR_UNIPARSE_STRNAME_FORMAT_ERROR % StrName.split()[1], \
625                                ExtraData=File.Path)
626
627            if StrName and len(StrName.split()[1].split(DT.TAB_UNDERLINE_SPLIT)) == 4:
628                StringTokenList = StrName.split()[1].split(DT.TAB_UNDERLINE_SPLIT)
629                if (StringTokenList[3].upper() in [DT.TAB_STR_TOKENPROMPT, DT.TAB_STR_TOKENHELP] and \
630                    StringTokenList[3] not in [DT.TAB_STR_TOKENPROMPT, DT.TAB_STR_TOKENHELP]) or \
631                    (StringTokenList[2].upper() == DT.TAB_STR_TOKENERR and StringTokenList[2] != DT.TAB_STR_TOKENERR):
632                    EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID, \
633                                Message=ST.ERR_UNIPARSE_STRTOKEN_FORMAT_ERROR % StrName.split()[1], \
634                                ExtraData=File.Path)
635
636            if Line.count(u'#language') > 1:
637                EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID, \
638                                Message=ST.ERR_UNIPARSE_SEP_LANGENTRY_LINE % Line, \
639                                ExtraData=File.Path)
640
641            if Line.startswith(u'//'):
642                continue
643            elif Line.startswith(u'#langdef'):
644                if len(Line.split()) == 2:
645                    NewLines.append(Line)
646                    continue
647                elif len(Line.split()) > 2 and Line.find(u'"') > 0:
648                    NewLines.append(Line[:Line.find(u'"')].strip())
649                    NewLines.append(Line[Line.find(u'"'):])
650                else:
651                    EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID, ExtraData=File.Path)
652            elif Line.startswith(u'#string'):
653                if len(Line.split()) == 2:
654                    StrName = Line
655                    if StrName:
656                        if StrName.split()[1] not in ExistStrNameList:
657                            ExistStrNameList.append(StrName.split()[1].strip())
658                        elif StrName.split()[1] in [DT.TAB_INF_ABSTRACT, DT.TAB_INF_DESCRIPTION, \
659                                                    DT.TAB_INF_BINARY_ABSTRACT, DT.TAB_INF_BINARY_DESCRIPTION, \
660                                                    DT.TAB_DEC_PACKAGE_ABSTRACT, DT.TAB_DEC_PACKAGE_DESCRIPTION, \
661                                                    DT.TAB_DEC_BINARY_ABSTRACT, DT.TAB_DEC_BINARY_DESCRIPTION]:
662                            EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID, \
663                                            Message=ST.ERR_UNIPARSE_MULTI_ENTRY_EXIST % StrName.split()[1], \
664                                            ExtraData=File.Path)
665                    continue
666                elif len(Line.split()) == 4 and Line.find(u'#language') > 0:
667                    if Line[Line.find(u'#language')-1] != ' ' or \
668                       Line[Line.find(u'#language')+len(u'#language')] != u' ':
669                        EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID, ExtraData=File.Path)
670
671                    if Line.find(u'"') > 0:
672                        EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID, ExtraData=File.Path)
673
674                    StrName = Line.split()[0] + u' ' + Line.split()[1]
675                    if StrName:
676                        if StrName.split()[1] not in ExistStrNameList:
677                            ExistStrNameList.append(StrName.split()[1].strip())
678                        elif StrName.split()[1] in [DT.TAB_INF_ABSTRACT, DT.TAB_INF_DESCRIPTION, \
679                                                    DT.TAB_INF_BINARY_ABSTRACT, DT.TAB_INF_BINARY_DESCRIPTION, \
680                                                    DT.TAB_DEC_PACKAGE_ABSTRACT, DT.TAB_DEC_PACKAGE_DESCRIPTION, \
681                                                    DT.TAB_DEC_BINARY_ABSTRACT, DT.TAB_DEC_BINARY_DESCRIPTION]:
682                            EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID, \
683                                            Message=ST.ERR_UNIPARSE_MULTI_ENTRY_EXIST % StrName.split()[1], \
684                                            ExtraData=File.Path)
685                    if IsIncludeFile:
686                        if StrName not in NewLines:
687                            NewLines.append((Line[:Line.find(u'#language')]).strip())
688                    else:
689                        NewLines.append((Line[:Line.find(u'#language')]).strip())
690                    NewLines.append((Line[Line.find(u'#language'):]).strip())
691                elif len(Line.split()) > 4 and Line.find(u'#language') > 0 and Line.find(u'"') > 0:
692                    if Line[Line.find(u'#language')-1] != u' ' or \
693                       Line[Line.find(u'#language')+len(u'#language')] != u' ':
694                        EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID, ExtraData=File.Path)
695
696                    if Line[Line.find(u'"')-1] != u' ':
697                        EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID, ExtraData=File.Path)
698
699                    StrName = Line.split()[0] + u' ' + Line.split()[1]
700                    if StrName:
701                        if StrName.split()[1] not in ExistStrNameList:
702                            ExistStrNameList.append(StrName.split()[1].strip())
703                        elif StrName.split()[1] in [DT.TAB_INF_ABSTRACT, DT.TAB_INF_DESCRIPTION, \
704                                                    DT.TAB_INF_BINARY_ABSTRACT, DT.TAB_INF_BINARY_DESCRIPTION, \
705                                                    DT.TAB_DEC_PACKAGE_ABSTRACT, DT.TAB_DEC_PACKAGE_DESCRIPTION, \
706                                                    DT.TAB_DEC_BINARY_ABSTRACT, DT.TAB_DEC_BINARY_DESCRIPTION]:
707                            EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID, \
708                                            Message=ST.ERR_UNIPARSE_MULTI_ENTRY_EXIST % StrName.split()[1], \
709                                            ExtraData=File.Path)
710                    if IsIncludeFile:
711                        if StrName not in NewLines:
712                            NewLines.append((Line[:Line.find(u'#language')]).strip())
713                    else:
714                        NewLines.append((Line[:Line.find(u'#language')]).strip())
715                    NewLines.append((Line[Line.find(u'#language'):Line.find(u'"')]).strip())
716                    NewLines.append((Line[Line.find(u'"'):]).strip())
717                else:
718                    EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID, ExtraData=File.Path)
719            elif Line.startswith(u'#language'):
720                if len(Line.split()) == 2:
721                    if IsIncludeFile:
722                        if StrName not in NewLines:
723                            NewLines.append(StrName)
724                    else:
725                        NewLines.append(StrName)
726                    NewLines.append(Line)
727                elif len(Line.split()) > 2 and Line.find(u'"') > 0:
728                    if IsIncludeFile:
729                        if StrName not in NewLines:
730                            NewLines.append(StrName)
731                    else:
732                        NewLines.append(StrName)
733                    NewLines.append((Line[:Line.find(u'"')]).strip())
734                    NewLines.append((Line[Line.find(u'"'):]).strip())
735                else:
736                    EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID, ExtraData=File.Path)
737            elif Line.startswith(u'"'):
738                if u'#string' in Line  or u'#language' in Line:
739                    EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID, ExtraData=File.Path)
740                NewLines.append(Line)
741            else:
742                print Line
743                EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID, ExtraData=File.Path)
744
745        if StrName and not StrName.split()[1].startswith(u'STR_'):
746            EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID, \
747                                Message=ST.ERR_UNIPARSE_STRNAME_FORMAT_ERROR % StrName.split()[1], \
748                                ExtraData=File.Path)
749
750        if StrName and not NewLines:
751            EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID, \
752                            Message=ST.ERR_UNI_MISS_LANGENTRY % StrName, \
753                            ExtraData=File.Path)
754
755        #
756        # Check Abstract, Description, BinaryAbstract and BinaryDescription order,
757        # should be Abstract, Description, BinaryAbstract, BinaryDesctiption
758        AbstractPosition = -1
759        DescriptionPosition = -1
760        BinaryAbstractPosition = -1
761        BinaryDescriptionPosition = -1
762        for StrName in ExistStrNameList:
763            if DT.TAB_HEADER_ABSTRACT.upper() in StrName:
764                if 'BINARY' in StrName:
765                    BinaryAbstractPosition = ExistStrNameList.index(StrName)
766                else:
767                    AbstractPosition = ExistStrNameList.index(StrName)
768            if DT.TAB_HEADER_DESCRIPTION.upper() in StrName:
769                if 'BINARY' in StrName:
770                    BinaryDescriptionPosition = ExistStrNameList.index(StrName)
771                else:
772                    DescriptionPosition = ExistStrNameList.index(StrName)
773
774        OrderList = sorted([AbstractPosition, DescriptionPosition])
775        BinaryOrderList = sorted([BinaryAbstractPosition, BinaryDescriptionPosition])
776        Min = OrderList[0]
777        Max = OrderList[1]
778        BinaryMin = BinaryOrderList[0]
779        BinaryMax = BinaryOrderList[1]
780        if BinaryDescriptionPosition > -1:
781            if not(BinaryDescriptionPosition == BinaryMax and BinaryAbstractPosition == BinaryMin and \
782                   BinaryMax > Max):
783                EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID, \
784                                Message=ST.ERR_UNIPARSE_ENTRY_ORDER_WRONG, \
785                                ExtraData=File.Path)
786        elif BinaryAbstractPosition > -1:
787            if not(BinaryAbstractPosition > Max):
788                EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID, \
789                                Message=ST.ERR_UNIPARSE_ENTRY_ORDER_WRONG, \
790                                ExtraData=File.Path)
791
792        if  DescriptionPosition > -1:
793            if not(DescriptionPosition == Max and AbstractPosition == Min and \
794                   DescriptionPosition > AbstractPosition):
795                EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID, \
796                                Message=ST.ERR_UNIPARSE_ENTRY_ORDER_WRONG, \
797                                ExtraData=File.Path)
798
799        if not self.UniFileHeader:
800            EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID,
801                            Message = ST.ERR_NO_SOURCE_HEADER,
802                            ExtraData=File.Path)
803
804        return NewLines
805
806    #
807    # Load a .uni file
808    #
809    def LoadUniFile(self, File = None):
810        if File == None:
811            EdkLogger.Error("Unicode File Parser",
812                            ToolError.PARSER_ERROR,
813                            Message='No unicode file is given',
814                            ExtraData=File.Path)
815
816        self.File = File
817
818        #
819        # Process special char in file
820        #
821        Lines = self.PreProcess(File)
822
823        #
824        # Get Unicode Information
825        #
826        for IndexI in range(len(Lines)):
827            Line = Lines[IndexI]
828            if (IndexI + 1) < len(Lines):
829                SecondLine = Lines[IndexI + 1]
830            if (IndexI + 2) < len(Lines):
831                ThirdLine = Lines[IndexI + 2]
832
833            #
834            # Get Language def information
835            #
836            if Line.find(u'#langdef ') >= 0:
837                self.GetLangDef(File, Line + u' ' + SecondLine)
838                continue
839
840            Name = ''
841            Language = ''
842            Value = ''
843            CombineToken = False
844            #
845            # Get string def information format as below
846            #
847            #     #string MY_STRING_1
848            #     #language eng
849            #     "My first English string line 1"
850            #     "My first English string line 2"
851            #     #string MY_STRING_1
852            #     #language spa
853            #     "Mi segunda secuencia 1"
854            #     "Mi segunda secuencia 2"
855            #
856            if Line.find(u'#string ') >= 0 and Line.find(u'#language ') < 0 and \
857                SecondLine.find(u'#string ') < 0 and SecondLine.find(u'#language ') >= 0 and \
858                ThirdLine.find(u'#string ') < 0 and ThirdLine.find(u'#language ') < 0:
859                if Line.find('"') > 0 or SecondLine.find('"') > 0:
860                    EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID,
861                                Message=ST.ERR_UNIPARSE_DBLQUOTE_UNMATCHED,
862                                ExtraData=File.Path)
863
864                Name = Line[Line.find(u'#string ') + len(u'#string ') : ].strip(' ')
865                Language = SecondLine[SecondLine.find(u'#language ') + len(u'#language ') : ].strip(' ')
866                for IndexJ in range(IndexI + 2, len(Lines)):
867                    if Lines[IndexJ].find(u'#string ') < 0 and Lines[IndexJ].find(u'#language ') < 0 and \
868                    Lines[IndexJ].strip().startswith(u'"') and Lines[IndexJ].strip().endswith(u'"'):
869                        if Lines[IndexJ][-2] == ' ':
870                            CombineToken = True
871                        if CombineToken:
872                            if Lines[IndexJ].strip()[1:-1].strip():
873                                Value = Value + Lines[IndexJ].strip()[1:-1].rstrip() + ' '
874                            else:
875                                Value = Value + Lines[IndexJ].strip()[1:-1]
876                            CombineToken = False
877                        else:
878                            Value = Value + Lines[IndexJ].strip()[1:-1] + '\r\n'
879                    else:
880                        IndexI = IndexJ
881                        break
882                if Value.endswith('\r\n'):
883                    Value = Value[: Value.rfind('\r\n')]
884                Language = GetLanguageCode(Language, self.IsCompatibleMode, self.File)
885                self.AddStringToList(Name, Language, Value)
886                continue
887
888    #
889    # Load multiple .uni files
890    #
891    def LoadUniFiles(self, FileList):
892        if len(FileList) > 0:
893            for File in FileList:
894                FilePath = File.Path.strip()
895                if FilePath.endswith('.uni') or FilePath.endswith('.UNI') or FilePath.endswith('.Uni'):
896                    self.LoadUniFile(File)
897
898    #
899    # Add a string to list
900    #
901    def AddStringToList(self, Name, Language, Value, Token = 0, Referenced = False, UseOtherLangDef = '', Index = -1):
902        for LangNameItem in self.LanguageDef:
903            if Language == LangNameItem[0]:
904                break
905
906        if Language not in self.OrderedStringList:
907            self.OrderedStringList[Language] = []
908            self.OrderedStringDict[Language] = {}
909
910        IsAdded = True
911        if Name in self.OrderedStringDict[Language]:
912            IsAdded = False
913            if Value != None:
914                ItemIndexInList = self.OrderedStringDict[Language][Name]
915                Item = self.OrderedStringList[Language][ItemIndexInList]
916                Item.UpdateValue(Value)
917                Item.UseOtherLangDef = ''
918
919        if IsAdded:
920            Token = len(self.OrderedStringList[Language])
921            if Index == -1:
922                self.OrderedStringList[Language].append(StringDefClassObject(Name,
923                                                                             Value,
924                                                                             Referenced,
925                                                                             Token,
926                                                                             UseOtherLangDef))
927                self.OrderedStringDict[Language][Name] = Token
928                for LangName in self.LanguageDef:
929                    #
930                    # New STRING token will be added into all language string lists.
931                    # so that the unique STRING identifier is reserved for all languages in the package list.
932                    #
933                    if LangName[0] != Language:
934                        if UseOtherLangDef != '':
935                            OtherLangDef = UseOtherLangDef
936                        else:
937                            OtherLangDef = Language
938                        self.OrderedStringList[LangName[0]].append(StringDefClassObject(Name,
939                                                                                        '',
940                                                                                        Referenced,
941                                                                                        Token,
942                                                                                        OtherLangDef))
943                        self.OrderedStringDict[LangName[0]][Name] = len(self.OrderedStringList[LangName[0]]) - 1
944            else:
945                self.OrderedStringList[Language].insert(Index, StringDefClassObject(Name,
946                                                                                    Value,
947                                                                                    Referenced,
948                                                                                    Token,
949                                                                                    UseOtherLangDef))
950                self.OrderedStringDict[Language][Name] = Index
951
952    #
953    # Set the string as referenced
954    #
955    def SetStringReferenced(self, Name):
956        #
957        # String stoken are added in the same order in all language string lists.
958        # So, only update the status of string stoken in first language string list.
959        #
960        Lang = self.LanguageDef[0][0]
961        if Name in self.OrderedStringDict[Lang]:
962            ItemIndexInList = self.OrderedStringDict[Lang][Name]
963            Item = self.OrderedStringList[Lang][ItemIndexInList]
964            Item.Referenced = True
965
966    #
967    # Search the string in language definition by Name
968    #
969    def FindStringValue(self, Name, Lang):
970        if Name in self.OrderedStringDict[Lang]:
971            ItemIndexInList = self.OrderedStringDict[Lang][Name]
972            return self.OrderedStringList[Lang][ItemIndexInList]
973
974        return None
975
976    #
977    # Search the string in language definition by Token
978    #
979    def FindByToken(self, Token, Lang):
980        for Item in self.OrderedStringList[Lang]:
981            if Item.Token == Token:
982                return Item
983
984        return None
985
986    #
987    # Re-order strings and re-generate tokens
988    #
989    def ReToken(self):
990        if len(self.LanguageDef) == 0:
991            return None
992        #
993        # Retoken all language strings according to the status of string stoken in the first language string.
994        #
995        FirstLangName = self.LanguageDef[0][0]
996
997        # Convert the OrderedStringList to be OrderedStringListByToken in order to faciliate future search by token
998        for LangNameItem in self.LanguageDef:
999            self.OrderedStringListByToken[LangNameItem[0]] = {}
1000
1001        #
1002        # Use small token for all referred string stoken.
1003        #
1004        RefToken = 0
1005        for Index in range (0, len (self.OrderedStringList[FirstLangName])):
1006            FirstLangItem = self.OrderedStringList[FirstLangName][Index]
1007            if FirstLangItem.Referenced == True:
1008                for LangNameItem in self.LanguageDef:
1009                    LangName = LangNameItem[0]
1010                    OtherLangItem = self.OrderedStringList[LangName][Index]
1011                    OtherLangItem.Referenced = True
1012                    OtherLangItem.Token = RefToken
1013                    self.OrderedStringListByToken[LangName][OtherLangItem.Token] = OtherLangItem
1014                RefToken = RefToken + 1
1015
1016        #
1017        # Use big token for all unreferred string stoken.
1018        #
1019        UnRefToken = 0
1020        for Index in range (0, len (self.OrderedStringList[FirstLangName])):
1021            FirstLangItem = self.OrderedStringList[FirstLangName][Index]
1022            if FirstLangItem.Referenced == False:
1023                for LangNameItem in self.LanguageDef:
1024                    LangName = LangNameItem[0]
1025                    OtherLangItem = self.OrderedStringList[LangName][Index]
1026                    OtherLangItem.Token = RefToken + UnRefToken
1027                    self.OrderedStringListByToken[LangName][OtherLangItem.Token] = OtherLangItem
1028                UnRefToken = UnRefToken + 1
1029
1030    #
1031    # Show the instance itself
1032    #
1033    def ShowMe(self):
1034        print self.LanguageDef
1035        #print self.OrderedStringList
1036        for Item in self.OrderedStringList:
1037            print Item
1038            for Member in self.OrderedStringList[Item]:
1039                print str(Member)
1040
1041    #
1042    # Read content from '!include' UNI file
1043    #
1044    def ReadIncludeUNIfile(self, FilaPath):
1045        if self.File:
1046            pass
1047
1048        if not os.path.exists(FilaPath) or not os.path.isfile(FilaPath):
1049            EdkLogger.Error("Unicode File Parser",
1050                             ToolError.FILE_NOT_FOUND,
1051                             ExtraData=FilaPath)
1052        try:
1053            FileIn = codecs.open(FilaPath, mode='rb', encoding='utf_8').readlines()
1054        except UnicodeError, Xstr:
1055            FileIn = codecs.open(FilaPath, mode='rb', encoding='utf_16').readlines()
1056        except UnicodeError:
1057            FileIn = codecs.open(FilaPath, mode='rb', encoding='utf_16_le').readlines()
1058        except:
1059            EdkLogger.Error("Unicode File Parser", ToolError.FILE_OPEN_FAILURE, ExtraData=FilaPath)
1060        return FileIn
1061
1062