1#!/usr/bin/python2.4 2# 3# Copyright (C) 2010 The Android Open Source Project 4# 5# Licensed under the Apache License, Version 2.0 (the "License"); 6# you may not use this file except in compliance with the License. 7# You may obtain a copy of the License at 8# 9# http://www.apache.org/licenses/LICENSE-2.0 10# 11# Unless required by applicable law or agreed to in writing, software 12# distributed under the License is distributed on an "AS IS" BASIS, 13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14# See the License for the specific language governing permissions and 15# limitations under the License. 16# 17""" 18Creates the list of search engines 19 20The created list is placed in the res/values-<locale> directory. Also updates 21res/values/all_search_engines.xml if required with new data. 22 23Usage: get_search_engines.py 24 25Copyright (C) 2010 The Android Open Source Project 26""" 27 28import os 29import re 30import sys 31import urllib 32from xml.dom import minidom 33 34# Locales to generate search engine lists for 35locales = ["cs-CZ", "da-DK", "de-AT", "de-CH", "de-DE", "el-GR", "en-AU", 36 "en-GB", "en-IE", "en-NZ", "en-SG", "en-ZA", "es-ES", "fr-BE", "fr-FR", 37 "it-IT", "ja-JP", "ko-KR", "nb-NO", "nl-BE", "nl-NL", "pl-PL", "pt-PT", 38 "pt-BR", "ru-RU", "sv-SE", "tr-TR", "zh-CN", "zh-HK", "zh-MO", "zh-TW"] 39 40google_data = ["google", "Google", "google.com", 41 "http://www.google.com/favicon.ico", 42 "http://www.google.com/search?ie={inputEncoding}&source=android-browser&q={searchTerms}", 43 "UTF-8", 44 "http://www.google.com/complete/search?client=android&q={searchTerms}"] 45 46class SearchEngineManager(object): 47 """Manages list of search engines and creates locale specific lists. 48 49 The main method useful for the caller is generateListForLocale(), which 50 creates a locale specific donottranslate-search_engines.xml file. 51 """ 52 53 def __init__(self): 54 """Inits SearchEngineManager with relevant search engine data. 55 56 The search engine data is downloaded from the Chrome source repository. 57 """ 58 self.chrome_data = urllib.urlopen( 59 'http://src.chromium.org/viewvc/chrome/trunk/src/chrome/' 60 'browser/search_engines/template_url_prepopulate_data.cc').read() 61 if self.chrome_data.lower().find('repository not found') != -1: 62 print 'Unable to get Chrome source data for search engine list.\nExiting.' 63 sys.exit(2) 64 65 self.resdir = os.path.normpath(os.path.join(sys.path[0], '../res')) 66 67 self.all_engines = set() 68 69 def getXmlString(self, str): 70 """Returns an XML-safe string for the given string. 71 72 Given a string from the search engine data structure, convert it to a 73 string suitable to write to our XML data file by stripping away NULLs, 74 unwanted quotes, wide-string declarations (L"") and replacing C-style 75 unicode characters with XML equivalents. 76 """ 77 str = str.strip() 78 if str.upper() == 'NULL': 79 return '' 80 81 if str.startswith('L"'): 82 str = str[2:] 83 if str.startswith('@') or str.startswith('?'): 84 str = '\\' + str 85 86 str = str.strip('"') 87 str = str.replace('&', '&').replace('<', '<').replace('>', '>') 88 str = str.replace('"', '"').replace('\'', ''') 89 str = re.sub(r'\\x([a-fA-F0-9]{1,4})', r'&#x\1;', str) 90 91 return str 92 93 def getEngineData(self, name): 94 """Returns an array of strings describing the specified search engine. 95 96 The returned strings are in the same order as in the Chrome source data file 97 except that the internal name of the search engine is inserted at the 98 beginning of the list. 99 """ 100 101 if name == "google": 102 return google_data 103 104 # Find the first occurance of this search engine name in the form 105 # " <name> =" in the chrome data file. 106 re_exp = '\s' + name + '\s*=' 107 search_obj = re.search(re_exp, self.chrome_data) 108 if not search_obj: 109 print ('Unable to find data for search engine ' + name + 110 '. Please check the chrome data file for format changes.') 111 return None 112 113 # Extract the struct declaration between the curly braces. 114 start_pos = self.chrome_data.find('{', search_obj.start()) + 1; 115 end_pos = self.chrome_data.find('};', start_pos); 116 engine_data_str = self.chrome_data[start_pos:end_pos] 117 118 # Remove c++ style '//' comments at the ends of each line 119 engine_data_lines = engine_data_str.split('\n') 120 engine_data_str = "" 121 for line in engine_data_lines: 122 start_pos = line.find(' // ') 123 if start_pos != -1: 124 line = line[:start_pos] 125 engine_data_str = engine_data_str + line + '\n' 126 127 # Join multiple line strings into a single string. 128 engine_data_str = re.sub('\"\s+\"', '', engine_data_str) 129 engine_data_str = re.sub('\"\s+L\"', '', engine_data_str) 130 engine_data_str = engine_data_str.replace('"L"', '') 131 132 engine_data = engine_data_str.split(',') 133 for i in range(len(engine_data)): 134 engine_data[i] = self.getXmlString(engine_data[i]) 135 136 # If the last element was an empty string (due to an extra comma at the 137 # end), ignore it. 138 if not engine_data[len(engine_data) - 1]: 139 engine_data.pop() 140 141 engine_data.insert(0, name) 142 143 return engine_data 144 145 def getSearchEnginesForCountry(self, country): 146 """Returns the list of search engine names for the given country. 147 148 The data comes from the Chrome data file. 149 """ 150 # The Chrome data file has an array defined with the name 'engines_XX' 151 # where XX = country. 152 pos = self.chrome_data.find('engines_' + country) 153 if pos == -1: 154 print ('Unable to find search engine data for country ' + country + '.') 155 return 156 157 # Extract the text between the curly braces for this array declaration 158 engines_start = self.chrome_data.find('{', pos) + 1; 159 engines_end = self.chrome_data.find('}', engines_start); 160 engines_str = self.chrome_data[engines_start:engines_end] 161 162 # Remove embedded /**/ style comments, white spaces, address-of operators 163 # and the trailing comma if any. 164 engines_str = re.sub('\/\*.+\*\/', '', engines_str) 165 engines_str = re.sub('\s+', '', engines_str) 166 engines_str = engines_str.replace('&','') 167 engines_str = engines_str.rstrip(',') 168 169 # Split the array into it's elements 170 engines = engines_str.split(',') 171 172 return engines 173 174 def writeAllEngines(self): 175 """Writes all search engines to the all_search_engines.xml file. 176 """ 177 178 all_search_engines_path = os.path.join(self.resdir, 'values/all_search_engines.xml') 179 180 text = [] 181 182 for engine_name in self.all_engines: 183 engine_data = self.getEngineData(engine_name) 184 text.append(' <string-array name="%s" translatable="false">\n' % (engine_data[0])) 185 for i in range(1, 7): 186 text.append(' <item>%s</item>\n' % (engine_data[i])) 187 text.append(' </string-array>\n') 188 print engine_data[1] + " added to all_search_engines.xml" 189 190 self.generateXmlFromTemplate(os.path.join(sys.path[0], 'all_search_engines.template.xml'), 191 all_search_engines_path, text) 192 193 def generateDefaultList(self): 194 self.writeEngineList(os.path.join(self.resdir, 'values'), "default") 195 196 def generateListForLocale(self, locale): 197 """Creates a new locale specific donottranslate-search_engines.xml file. 198 199 The new file contains search engines specific to that country. If required 200 this function updates all_search_engines.xml file with any new search 201 engine data necessary. 202 """ 203 separator_pos = locale.find('-') 204 if separator_pos == -1: 205 print ('Locale must be of format <language>-<country>. For e.g.' 206 ' "es-US" or "en-GB"') 207 return 208 209 language = locale[0:separator_pos] 210 country = locale[separator_pos + 1:].upper() 211 dir_path = os.path.join(self.resdir, 'values-' + language + '-r' + country) 212 213 self.writeEngineList(dir_path, country) 214 215 def writeEngineList(self, dir_path, country): 216 if os.path.exists(dir_path) and not os.path.isdir(dir_path): 217 print "File exists in output directory path " + dir_path + ". Please remove it and try again." 218 return 219 220 engines = self.getSearchEnginesForCountry(country) 221 if not engines: 222 return 223 for engine in engines: 224 self.all_engines.add(engine) 225 226 # Create the locale specific search_engines.xml file. Each 227 # search_engines.xml file has a hardcoded list of 7 items. If there are less 228 # than 7 search engines for this country, the remaining items are marked as 229 # enabled=false. 230 text = [] 231 text.append(' <string-array name="search_engines" translatable="false">\n'); 232 for engine in engines: 233 engine_data = self.getEngineData(engine) 234 name = engine_data[0] 235 text.append(' <item>%s</item>\n' % (name)) 236 text.append(' </string-array>\n'); 237 238 self.generateXmlFromTemplate(os.path.join(sys.path[0], 'search_engines.template.xml'), 239 os.path.join(dir_path, 'donottranslate-search_engines.xml'), 240 text) 241 242 def generateXmlFromTemplate(self, template_path, out_path, text): 243 # Load the template file and insert the new contents before the last line. 244 template_text = open(template_path).read() 245 pos = template_text.rfind('\n', 0, -2) + 1 246 contents = template_text[0:pos] + ''.join(text) + template_text[pos:] 247 248 # Make sure what we have created is valid XML :) No need to check for errors 249 # as the script will terminate with an exception if the XML was malformed. 250 engines_dom = minidom.parseString(contents) 251 252 dir_path = os.path.dirname(out_path) 253 if not os.path.exists(dir_path): 254 os.makedirs(dir_path) 255 print 'Created directory ' + dir_path 256 file = open(out_path, 'w') 257 file.write(contents) 258 file.close() 259 print 'Wrote ' + out_path 260 261if __name__ == "__main__": 262 manager = SearchEngineManager() 263 manager.generateDefaultList() 264 for locale in locales: 265 manager.generateListForLocale(locale) 266 manager.writeAllEngines() 267