1"""Tests to cover the Tools/i18n package""" 2 3import os 4import re 5import sys 6import unittest 7from textwrap import dedent 8from pathlib import Path 9 10from test.support.script_helper import assert_python_ok 11from test.test_tools import skip_if_missing, toolsdir 12from test.support.os_helper import temp_cwd, temp_dir 13 14 15skip_if_missing() 16 17DATA_DIR = Path(__file__).resolve().parent / 'i18n_data' 18 19 20def normalize_POT_file(pot): 21 """Normalize the POT creation timestamp, charset and 22 file locations to make the POT file easier to compare. 23 24 """ 25 # Normalize the creation date. 26 date_pattern = re.compile(r'"POT-Creation-Date: .+?\\n"') 27 header = r'"POT-Creation-Date: 2000-01-01 00:00+0000\\n"' 28 pot = re.sub(date_pattern, header, pot) 29 30 # Normalize charset to UTF-8 (currently there's no way to specify the output charset). 31 charset_pattern = re.compile(r'"Content-Type: text/plain; charset=.+?\\n"') 32 charset = r'"Content-Type: text/plain; charset=UTF-8\\n"' 33 pot = re.sub(charset_pattern, charset, pot) 34 35 # Normalize file location path separators in case this test is 36 # running on Windows (which uses '\'). 37 fileloc_pattern = re.compile(r'#:.+') 38 39 def replace(match): 40 return match[0].replace(os.sep, "/") 41 pot = re.sub(fileloc_pattern, replace, pot) 42 return pot 43 44 45class Test_pygettext(unittest.TestCase): 46 """Tests for the pygettext.py tool""" 47 48 script = Path(toolsdir, 'i18n', 'pygettext.py') 49 50 def get_header(self, data): 51 """ utility: return the header of a .po file as a dictionary """ 52 headers = {} 53 for line in data.split('\n'): 54 if not line or line.startswith(('#', 'msgid', 'msgstr')): 55 continue 56 line = line.strip('"') 57 key, val = line.split(':', 1) 58 headers[key] = val.strip() 59 return headers 60 61 def get_msgids(self, data): 62 """ utility: return all msgids in .po file as a list of strings """ 63 msgids = [] 64 reading_msgid = False 65 cur_msgid = [] 66 for line in data.split('\n'): 67 if reading_msgid: 68 if line.startswith('"'): 69 cur_msgid.append(line.strip('"')) 70 else: 71 msgids.append('\n'.join(cur_msgid)) 72 cur_msgid = [] 73 reading_msgid = False 74 continue 75 if line.startswith('msgid '): 76 line = line[len('msgid '):] 77 cur_msgid.append(line.strip('"')) 78 reading_msgid = True 79 else: 80 if reading_msgid: 81 msgids.append('\n'.join(cur_msgid)) 82 83 return msgids 84 85 def assert_POT_equal(self, expected, actual): 86 """Check if two POT files are equal""" 87 self.maxDiff = None 88 self.assertEqual(normalize_POT_file(expected), normalize_POT_file(actual)) 89 90 def extract_from_str(self, module_content, *, args=(), strict=True): 91 """Return all msgids extracted from module_content.""" 92 filename = 'test.py' 93 with temp_cwd(None): 94 with open(filename, 'w', encoding='utf-8') as fp: 95 fp.write(module_content) 96 res = assert_python_ok('-Xutf8', self.script, *args, filename) 97 if strict: 98 self.assertEqual(res.err, b'') 99 with open('messages.pot', encoding='utf-8') as fp: 100 data = fp.read() 101 return self.get_msgids(data) 102 103 def extract_docstrings_from_str(self, module_content): 104 """Return all docstrings extracted from module_content.""" 105 return self.extract_from_str(module_content, args=('--docstrings',), strict=False) 106 107 def test_header(self): 108 """Make sure the required fields are in the header, according to: 109 http://www.gnu.org/software/gettext/manual/gettext.html#Header-Entry 110 """ 111 with temp_cwd(None) as cwd: 112 assert_python_ok('-Xutf8', self.script) 113 with open('messages.pot', encoding='utf-8') as fp: 114 data = fp.read() 115 header = self.get_header(data) 116 117 self.assertIn("Project-Id-Version", header) 118 self.assertIn("POT-Creation-Date", header) 119 self.assertIn("PO-Revision-Date", header) 120 self.assertIn("Last-Translator", header) 121 self.assertIn("Language-Team", header) 122 self.assertIn("MIME-Version", header) 123 self.assertIn("Content-Type", header) 124 self.assertIn("Content-Transfer-Encoding", header) 125 self.assertIn("Generated-By", header) 126 127 # not clear if these should be required in POT (template) files 128 #self.assertIn("Report-Msgid-Bugs-To", header) 129 #self.assertIn("Language", header) 130 131 #"Plural-Forms" is optional 132 133 @unittest.skipIf(sys.platform.startswith('aix'), 134 'bpo-29972: broken test on AIX') 135 def test_POT_Creation_Date(self): 136 """ Match the date format from xgettext for POT-Creation-Date """ 137 from datetime import datetime 138 with temp_cwd(None) as cwd: 139 assert_python_ok('-Xutf8', self.script) 140 with open('messages.pot', encoding='utf-8') as fp: 141 data = fp.read() 142 header = self.get_header(data) 143 creationDate = header['POT-Creation-Date'] 144 145 # peel off the escaped newline at the end of string 146 if creationDate.endswith('\\n'): 147 creationDate = creationDate[:-len('\\n')] 148 149 # This will raise if the date format does not exactly match. 150 datetime.strptime(creationDate, '%Y-%m-%d %H:%M%z') 151 152 def test_funcdocstring(self): 153 for doc in ('"""doc"""', "r'''doc'''", "R'doc'", 'u"doc"'): 154 with self.subTest(doc): 155 msgids = self.extract_docstrings_from_str(dedent('''\ 156 def foo(bar): 157 %s 158 ''' % doc)) 159 self.assertIn('doc', msgids) 160 161 def test_funcdocstring_bytes(self): 162 msgids = self.extract_docstrings_from_str(dedent('''\ 163 def foo(bar): 164 b"""doc""" 165 ''')) 166 self.assertFalse([msgid for msgid in msgids if 'doc' in msgid]) 167 168 def test_funcdocstring_fstring(self): 169 msgids = self.extract_docstrings_from_str(dedent('''\ 170 def foo(bar): 171 f"""doc""" 172 ''')) 173 self.assertFalse([msgid for msgid in msgids if 'doc' in msgid]) 174 175 def test_classdocstring(self): 176 for doc in ('"""doc"""', "r'''doc'''", "R'doc'", 'u"doc"'): 177 with self.subTest(doc): 178 msgids = self.extract_docstrings_from_str(dedent('''\ 179 class C: 180 %s 181 ''' % doc)) 182 self.assertIn('doc', msgids) 183 184 def test_classdocstring_bytes(self): 185 msgids = self.extract_docstrings_from_str(dedent('''\ 186 class C: 187 b"""doc""" 188 ''')) 189 self.assertFalse([msgid for msgid in msgids if 'doc' in msgid]) 190 191 def test_classdocstring_fstring(self): 192 msgids = self.extract_docstrings_from_str(dedent('''\ 193 class C: 194 f"""doc""" 195 ''')) 196 self.assertFalse([msgid for msgid in msgids if 'doc' in msgid]) 197 198 def test_moduledocstring(self): 199 for doc in ('"""doc"""', "r'''doc'''", "R'doc'", 'u"doc"'): 200 with self.subTest(doc): 201 msgids = self.extract_docstrings_from_str(dedent('''\ 202 %s 203 ''' % doc)) 204 self.assertIn('doc', msgids) 205 206 def test_moduledocstring_bytes(self): 207 msgids = self.extract_docstrings_from_str(dedent('''\ 208 b"""doc""" 209 ''')) 210 self.assertFalse([msgid for msgid in msgids if 'doc' in msgid]) 211 212 def test_moduledocstring_fstring(self): 213 msgids = self.extract_docstrings_from_str(dedent('''\ 214 f"""doc""" 215 ''')) 216 self.assertFalse([msgid for msgid in msgids if 'doc' in msgid]) 217 218 def test_msgid(self): 219 msgids = self.extract_docstrings_from_str( 220 '''_("""doc""" r'str' u"ing")''') 221 self.assertIn('docstring', msgids) 222 223 def test_msgid_bytes(self): 224 msgids = self.extract_docstrings_from_str('_(b"""doc""")') 225 self.assertFalse([msgid for msgid in msgids if 'doc' in msgid]) 226 227 def test_msgid_fstring(self): 228 msgids = self.extract_docstrings_from_str('_(f"""doc""")') 229 self.assertFalse([msgid for msgid in msgids if 'doc' in msgid]) 230 231 def test_funcdocstring_annotated_args(self): 232 """ Test docstrings for functions with annotated args """ 233 msgids = self.extract_docstrings_from_str(dedent('''\ 234 def foo(bar: str): 235 """doc""" 236 ''')) 237 self.assertIn('doc', msgids) 238 239 def test_funcdocstring_annotated_return(self): 240 """ Test docstrings for functions with annotated return type """ 241 msgids = self.extract_docstrings_from_str(dedent('''\ 242 def foo(bar) -> str: 243 """doc""" 244 ''')) 245 self.assertIn('doc', msgids) 246 247 def test_funcdocstring_defvalue_args(self): 248 """ Test docstring for functions with default arg values """ 249 msgids = self.extract_docstrings_from_str(dedent('''\ 250 def foo(bar=()): 251 """doc""" 252 ''')) 253 self.assertIn('doc', msgids) 254 255 def test_funcdocstring_multiple_funcs(self): 256 """ Test docstring extraction for multiple functions combining 257 annotated args, annotated return types and default arg values 258 """ 259 msgids = self.extract_docstrings_from_str(dedent('''\ 260 def foo1(bar: tuple=()) -> str: 261 """doc1""" 262 263 def foo2(bar: List[1:2]) -> (lambda x: x): 264 """doc2""" 265 266 def foo3(bar: 'func'=lambda x: x) -> {1: 2}: 267 """doc3""" 268 ''')) 269 self.assertIn('doc1', msgids) 270 self.assertIn('doc2', msgids) 271 self.assertIn('doc3', msgids) 272 273 def test_classdocstring_early_colon(self): 274 """ Test docstring extraction for a class with colons occurring within 275 the parentheses. 276 """ 277 msgids = self.extract_docstrings_from_str(dedent('''\ 278 class D(L[1:2], F({1: 2}), metaclass=M(lambda x: x)): 279 """doc""" 280 ''')) 281 self.assertIn('doc', msgids) 282 283 def test_calls_in_fstrings(self): 284 msgids = self.extract_docstrings_from_str(dedent('''\ 285 f"{_('foo bar')}" 286 ''')) 287 self.assertIn('foo bar', msgids) 288 289 def test_calls_in_fstrings_raw(self): 290 msgids = self.extract_docstrings_from_str(dedent('''\ 291 rf"{_('foo bar')}" 292 ''')) 293 self.assertIn('foo bar', msgids) 294 295 def test_calls_in_fstrings_nested(self): 296 msgids = self.extract_docstrings_from_str(dedent('''\ 297 f"""{f'{_("foo bar")}'}""" 298 ''')) 299 self.assertIn('foo bar', msgids) 300 301 def test_calls_in_fstrings_attribute(self): 302 msgids = self.extract_docstrings_from_str(dedent('''\ 303 f"{obj._('foo bar')}" 304 ''')) 305 self.assertIn('foo bar', msgids) 306 307 def test_calls_in_fstrings_with_call_on_call(self): 308 msgids = self.extract_docstrings_from_str(dedent('''\ 309 f"{type(str)('foo bar')}" 310 ''')) 311 self.assertNotIn('foo bar', msgids) 312 313 def test_calls_in_fstrings_with_format(self): 314 msgids = self.extract_docstrings_from_str(dedent('''\ 315 f"{_('foo {bar}').format(bar='baz')}" 316 ''')) 317 self.assertIn('foo {bar}', msgids) 318 319 def test_calls_in_fstrings_with_wrong_input_1(self): 320 msgids = self.extract_docstrings_from_str(dedent('''\ 321 f"{_(f'foo {bar}')}" 322 ''')) 323 self.assertFalse([msgid for msgid in msgids if 'foo {bar}' in msgid]) 324 325 def test_calls_in_fstrings_with_wrong_input_2(self): 326 msgids = self.extract_docstrings_from_str(dedent('''\ 327 f"{_(1)}" 328 ''')) 329 self.assertNotIn(1, msgids) 330 331 def test_calls_in_fstring_with_multiple_args(self): 332 msgids = self.extract_docstrings_from_str(dedent('''\ 333 f"{_('foo', 'bar')}" 334 ''')) 335 self.assertNotIn('foo', msgids) 336 self.assertNotIn('bar', msgids) 337 338 def test_calls_in_fstring_with_keyword_args(self): 339 msgids = self.extract_docstrings_from_str(dedent('''\ 340 f"{_('foo', bar='baz')}" 341 ''')) 342 self.assertNotIn('foo', msgids) 343 self.assertNotIn('bar', msgids) 344 self.assertNotIn('baz', msgids) 345 346 def test_calls_in_fstring_with_partially_wrong_expression(self): 347 msgids = self.extract_docstrings_from_str(dedent('''\ 348 f"{_(f'foo') + _('bar')}" 349 ''')) 350 self.assertNotIn('foo', msgids) 351 self.assertIn('bar', msgids) 352 353 def test_function_and_class_names(self): 354 """Test that function and class names are not mistakenly extracted.""" 355 msgids = self.extract_from_str(dedent('''\ 356 def _(x): 357 pass 358 359 def _(x="foo"): 360 pass 361 362 async def _(x): 363 pass 364 365 class _(object): 366 pass 367 ''')) 368 self.assertEqual(msgids, ['']) 369 370 def test_pygettext_output(self): 371 """Test that the pygettext output exactly matches snapshots.""" 372 for input_file in DATA_DIR.glob('*.py'): 373 output_file = input_file.with_suffix('.pot') 374 with self.subTest(input_file=f'i18n_data/{input_file}'): 375 contents = input_file.read_text(encoding='utf-8') 376 with temp_cwd(None): 377 Path(input_file.name).write_text(contents) 378 assert_python_ok('-Xutf8', self.script, '--docstrings', input_file.name) 379 output = Path('messages.pot').read_text(encoding='utf-8') 380 381 expected = output_file.read_text(encoding='utf-8') 382 self.assert_POT_equal(expected, output) 383 384 def test_files_list(self): 385 """Make sure the directories are inspected for source files 386 bpo-31920 387 """ 388 text1 = 'Text to translate1' 389 text2 = 'Text to translate2' 390 text3 = 'Text to ignore' 391 with temp_cwd(None), temp_dir(None) as sdir: 392 pymod = Path(sdir, 'pypkg', 'pymod.py') 393 pymod.parent.mkdir() 394 pymod.write_text(f'_({text1!r})', encoding='utf-8') 395 396 pymod2 = Path(sdir, 'pkg.py', 'pymod2.py') 397 pymod2.parent.mkdir() 398 pymod2.write_text(f'_({text2!r})', encoding='utf-8') 399 400 pymod3 = Path(sdir, 'CVS', 'pymod3.py') 401 pymod3.parent.mkdir() 402 pymod3.write_text(f'_({text3!r})', encoding='utf-8') 403 404 assert_python_ok('-Xutf8', self.script, sdir) 405 data = Path('messages.pot').read_text(encoding='utf-8') 406 self.assertIn(f'msgid "{text1}"', data) 407 self.assertIn(f'msgid "{text2}"', data) 408 self.assertNotIn(text3, data) 409 410 411def update_POT_snapshots(): 412 for input_file in DATA_DIR.glob('*.py'): 413 output_file = input_file.with_suffix('.pot') 414 contents = input_file.read_bytes() 415 with temp_cwd(None): 416 Path(input_file.name).write_bytes(contents) 417 assert_python_ok('-Xutf8', Test_pygettext.script, '--docstrings', input_file.name) 418 output = Path('messages.pot').read_text(encoding='utf-8') 419 420 output = normalize_POT_file(output) 421 output_file.write_text(output, encoding='utf-8') 422 423 424if __name__ == '__main__': 425 # To regenerate POT files 426 if len(sys.argv) > 1 and sys.argv[1] == '--snapshot-update': 427 update_POT_snapshots() 428 sys.exit(0) 429 unittest.main() 430