• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1"""Tests to cover the Tools/i18n package"""
2
3import os
4import re
5import sys
6import unittest
7from textwrap import dedent
8from pathlib import Path
9
10from test.support.script_helper import assert_python_ok
11from test.test_tools import skip_if_missing, toolsdir
12from test.support.os_helper import temp_cwd, temp_dir
13
14
15skip_if_missing()
16
17DATA_DIR = Path(__file__).resolve().parent / 'i18n_data'
18
19
20def normalize_POT_file(pot):
21    """Normalize the POT creation timestamp, charset and
22    file locations to make the POT file easier to compare.
23
24    """
25    # Normalize the creation date.
26    date_pattern = re.compile(r'"POT-Creation-Date: .+?\\n"')
27    header = r'"POT-Creation-Date: 2000-01-01 00:00+0000\\n"'
28    pot = re.sub(date_pattern, header, pot)
29
30    # Normalize charset to UTF-8 (currently there's no way to specify the output charset).
31    charset_pattern = re.compile(r'"Content-Type: text/plain; charset=.+?\\n"')
32    charset = r'"Content-Type: text/plain; charset=UTF-8\\n"'
33    pot = re.sub(charset_pattern, charset, pot)
34
35    # Normalize file location path separators in case this test is
36    # running on Windows (which uses '\').
37    fileloc_pattern = re.compile(r'#:.+')
38
39    def replace(match):
40        return match[0].replace(os.sep, "/")
41    pot = re.sub(fileloc_pattern, replace, pot)
42    return pot
43
44
45class Test_pygettext(unittest.TestCase):
46    """Tests for the pygettext.py tool"""
47
48    script = Path(toolsdir, 'i18n', 'pygettext.py')
49
50    def get_header(self, data):
51        """ utility: return the header of a .po file as a dictionary """
52        headers = {}
53        for line in data.split('\n'):
54            if not line or line.startswith(('#', 'msgid', 'msgstr')):
55                continue
56            line = line.strip('"')
57            key, val = line.split(':', 1)
58            headers[key] = val.strip()
59        return headers
60
61    def get_msgids(self, data):
62        """ utility: return all msgids in .po file as a list of strings """
63        msgids = []
64        reading_msgid = False
65        cur_msgid = []
66        for line in data.split('\n'):
67            if reading_msgid:
68                if line.startswith('"'):
69                    cur_msgid.append(line.strip('"'))
70                else:
71                    msgids.append('\n'.join(cur_msgid))
72                    cur_msgid = []
73                    reading_msgid = False
74                    continue
75            if line.startswith('msgid '):
76                line = line[len('msgid '):]
77                cur_msgid.append(line.strip('"'))
78                reading_msgid = True
79        else:
80            if reading_msgid:
81                msgids.append('\n'.join(cur_msgid))
82
83        return msgids
84
85    def assert_POT_equal(self, expected, actual):
86        """Check if two POT files are equal"""
87        self.maxDiff = None
88        self.assertEqual(normalize_POT_file(expected), normalize_POT_file(actual))
89
90    def extract_from_str(self, module_content, *, args=(), strict=True):
91        """Return all msgids extracted from module_content."""
92        filename = 'test.py'
93        with temp_cwd(None):
94            with open(filename, 'w', encoding='utf-8') as fp:
95                fp.write(module_content)
96            res = assert_python_ok('-Xutf8', self.script, *args, filename)
97            if strict:
98                self.assertEqual(res.err, b'')
99            with open('messages.pot', encoding='utf-8') as fp:
100                data = fp.read()
101        return self.get_msgids(data)
102
103    def extract_docstrings_from_str(self, module_content):
104        """Return all docstrings extracted from module_content."""
105        return self.extract_from_str(module_content, args=('--docstrings',), strict=False)
106
107    def test_header(self):
108        """Make sure the required fields are in the header, according to:
109           http://www.gnu.org/software/gettext/manual/gettext.html#Header-Entry
110        """
111        with temp_cwd(None) as cwd:
112            assert_python_ok('-Xutf8', self.script)
113            with open('messages.pot', encoding='utf-8') as fp:
114                data = fp.read()
115            header = self.get_header(data)
116
117            self.assertIn("Project-Id-Version", header)
118            self.assertIn("POT-Creation-Date", header)
119            self.assertIn("PO-Revision-Date", header)
120            self.assertIn("Last-Translator", header)
121            self.assertIn("Language-Team", header)
122            self.assertIn("MIME-Version", header)
123            self.assertIn("Content-Type", header)
124            self.assertIn("Content-Transfer-Encoding", header)
125            self.assertIn("Generated-By", header)
126
127            # not clear if these should be required in POT (template) files
128            #self.assertIn("Report-Msgid-Bugs-To", header)
129            #self.assertIn("Language", header)
130
131            #"Plural-Forms" is optional
132
133    @unittest.skipIf(sys.platform.startswith('aix'),
134                     'bpo-29972: broken test on AIX')
135    def test_POT_Creation_Date(self):
136        """ Match the date format from xgettext for POT-Creation-Date """
137        from datetime import datetime
138        with temp_cwd(None) as cwd:
139            assert_python_ok('-Xutf8', self.script)
140            with open('messages.pot', encoding='utf-8') as fp:
141                data = fp.read()
142            header = self.get_header(data)
143            creationDate = header['POT-Creation-Date']
144
145            # peel off the escaped newline at the end of string
146            if creationDate.endswith('\\n'):
147                creationDate = creationDate[:-len('\\n')]
148
149            # This will raise if the date format does not exactly match.
150            datetime.strptime(creationDate, '%Y-%m-%d %H:%M%z')
151
152    def test_funcdocstring(self):
153        for doc in ('"""doc"""', "r'''doc'''", "R'doc'", 'u"doc"'):
154            with self.subTest(doc):
155                msgids = self.extract_docstrings_from_str(dedent('''\
156                def foo(bar):
157                    %s
158                ''' % doc))
159                self.assertIn('doc', msgids)
160
161    def test_funcdocstring_bytes(self):
162        msgids = self.extract_docstrings_from_str(dedent('''\
163        def foo(bar):
164            b"""doc"""
165        '''))
166        self.assertFalse([msgid for msgid in msgids if 'doc' in msgid])
167
168    def test_funcdocstring_fstring(self):
169        msgids = self.extract_docstrings_from_str(dedent('''\
170        def foo(bar):
171            f"""doc"""
172        '''))
173        self.assertFalse([msgid for msgid in msgids if 'doc' in msgid])
174
175    def test_classdocstring(self):
176        for doc in ('"""doc"""', "r'''doc'''", "R'doc'", 'u"doc"'):
177            with self.subTest(doc):
178                msgids = self.extract_docstrings_from_str(dedent('''\
179                class C:
180                    %s
181                ''' % doc))
182                self.assertIn('doc', msgids)
183
184    def test_classdocstring_bytes(self):
185        msgids = self.extract_docstrings_from_str(dedent('''\
186        class C:
187            b"""doc"""
188        '''))
189        self.assertFalse([msgid for msgid in msgids if 'doc' in msgid])
190
191    def test_classdocstring_fstring(self):
192        msgids = self.extract_docstrings_from_str(dedent('''\
193        class C:
194            f"""doc"""
195        '''))
196        self.assertFalse([msgid for msgid in msgids if 'doc' in msgid])
197
198    def test_moduledocstring(self):
199        for doc in ('"""doc"""', "r'''doc'''", "R'doc'", 'u"doc"'):
200            with self.subTest(doc):
201                msgids = self.extract_docstrings_from_str(dedent('''\
202                %s
203                ''' % doc))
204                self.assertIn('doc', msgids)
205
206    def test_moduledocstring_bytes(self):
207        msgids = self.extract_docstrings_from_str(dedent('''\
208        b"""doc"""
209        '''))
210        self.assertFalse([msgid for msgid in msgids if 'doc' in msgid])
211
212    def test_moduledocstring_fstring(self):
213        msgids = self.extract_docstrings_from_str(dedent('''\
214        f"""doc"""
215        '''))
216        self.assertFalse([msgid for msgid in msgids if 'doc' in msgid])
217
218    def test_msgid(self):
219        msgids = self.extract_docstrings_from_str(
220                '''_("""doc""" r'str' u"ing")''')
221        self.assertIn('docstring', msgids)
222
223    def test_msgid_bytes(self):
224        msgids = self.extract_docstrings_from_str('_(b"""doc""")')
225        self.assertFalse([msgid for msgid in msgids if 'doc' in msgid])
226
227    def test_msgid_fstring(self):
228        msgids = self.extract_docstrings_from_str('_(f"""doc""")')
229        self.assertFalse([msgid for msgid in msgids if 'doc' in msgid])
230
231    def test_funcdocstring_annotated_args(self):
232        """ Test docstrings for functions with annotated args """
233        msgids = self.extract_docstrings_from_str(dedent('''\
234        def foo(bar: str):
235            """doc"""
236        '''))
237        self.assertIn('doc', msgids)
238
239    def test_funcdocstring_annotated_return(self):
240        """ Test docstrings for functions with annotated return type """
241        msgids = self.extract_docstrings_from_str(dedent('''\
242        def foo(bar) -> str:
243            """doc"""
244        '''))
245        self.assertIn('doc', msgids)
246
247    def test_funcdocstring_defvalue_args(self):
248        """ Test docstring for functions with default arg values """
249        msgids = self.extract_docstrings_from_str(dedent('''\
250        def foo(bar=()):
251            """doc"""
252        '''))
253        self.assertIn('doc', msgids)
254
255    def test_funcdocstring_multiple_funcs(self):
256        """ Test docstring extraction for multiple functions combining
257        annotated args, annotated return types and default arg values
258        """
259        msgids = self.extract_docstrings_from_str(dedent('''\
260        def foo1(bar: tuple=()) -> str:
261            """doc1"""
262
263        def foo2(bar: List[1:2]) -> (lambda x: x):
264            """doc2"""
265
266        def foo3(bar: 'func'=lambda x: x) -> {1: 2}:
267            """doc3"""
268        '''))
269        self.assertIn('doc1', msgids)
270        self.assertIn('doc2', msgids)
271        self.assertIn('doc3', msgids)
272
273    def test_classdocstring_early_colon(self):
274        """ Test docstring extraction for a class with colons occurring within
275        the parentheses.
276        """
277        msgids = self.extract_docstrings_from_str(dedent('''\
278        class D(L[1:2], F({1: 2}), metaclass=M(lambda x: x)):
279            """doc"""
280        '''))
281        self.assertIn('doc', msgids)
282
283    def test_calls_in_fstrings(self):
284        msgids = self.extract_docstrings_from_str(dedent('''\
285        f"{_('foo bar')}"
286        '''))
287        self.assertIn('foo bar', msgids)
288
289    def test_calls_in_fstrings_raw(self):
290        msgids = self.extract_docstrings_from_str(dedent('''\
291        rf"{_('foo bar')}"
292        '''))
293        self.assertIn('foo bar', msgids)
294
295    def test_calls_in_fstrings_nested(self):
296        msgids = self.extract_docstrings_from_str(dedent('''\
297        f"""{f'{_("foo bar")}'}"""
298        '''))
299        self.assertIn('foo bar', msgids)
300
301    def test_calls_in_fstrings_attribute(self):
302        msgids = self.extract_docstrings_from_str(dedent('''\
303        f"{obj._('foo bar')}"
304        '''))
305        self.assertIn('foo bar', msgids)
306
307    def test_calls_in_fstrings_with_call_on_call(self):
308        msgids = self.extract_docstrings_from_str(dedent('''\
309        f"{type(str)('foo bar')}"
310        '''))
311        self.assertNotIn('foo bar', msgids)
312
313    def test_calls_in_fstrings_with_format(self):
314        msgids = self.extract_docstrings_from_str(dedent('''\
315        f"{_('foo {bar}').format(bar='baz')}"
316        '''))
317        self.assertIn('foo {bar}', msgids)
318
319    def test_calls_in_fstrings_with_wrong_input_1(self):
320        msgids = self.extract_docstrings_from_str(dedent('''\
321        f"{_(f'foo {bar}')}"
322        '''))
323        self.assertFalse([msgid for msgid in msgids if 'foo {bar}' in msgid])
324
325    def test_calls_in_fstrings_with_wrong_input_2(self):
326        msgids = self.extract_docstrings_from_str(dedent('''\
327        f"{_(1)}"
328        '''))
329        self.assertNotIn(1, msgids)
330
331    def test_calls_in_fstring_with_multiple_args(self):
332        msgids = self.extract_docstrings_from_str(dedent('''\
333        f"{_('foo', 'bar')}"
334        '''))
335        self.assertNotIn('foo', msgids)
336        self.assertNotIn('bar', msgids)
337
338    def test_calls_in_fstring_with_keyword_args(self):
339        msgids = self.extract_docstrings_from_str(dedent('''\
340        f"{_('foo', bar='baz')}"
341        '''))
342        self.assertNotIn('foo', msgids)
343        self.assertNotIn('bar', msgids)
344        self.assertNotIn('baz', msgids)
345
346    def test_calls_in_fstring_with_partially_wrong_expression(self):
347        msgids = self.extract_docstrings_from_str(dedent('''\
348        f"{_(f'foo') + _('bar')}"
349        '''))
350        self.assertNotIn('foo', msgids)
351        self.assertIn('bar', msgids)
352
353    def test_function_and_class_names(self):
354        """Test that function and class names are not mistakenly extracted."""
355        msgids = self.extract_from_str(dedent('''\
356        def _(x):
357            pass
358
359        def _(x="foo"):
360            pass
361
362        async def _(x):
363            pass
364
365        class _(object):
366            pass
367        '''))
368        self.assertEqual(msgids, [''])
369
370    def test_pygettext_output(self):
371        """Test that the pygettext output exactly matches snapshots."""
372        for input_file in DATA_DIR.glob('*.py'):
373            output_file = input_file.with_suffix('.pot')
374            with self.subTest(input_file=f'i18n_data/{input_file}'):
375                contents = input_file.read_text(encoding='utf-8')
376                with temp_cwd(None):
377                    Path(input_file.name).write_text(contents)
378                    assert_python_ok('-Xutf8', self.script, '--docstrings', input_file.name)
379                    output = Path('messages.pot').read_text(encoding='utf-8')
380
381                expected = output_file.read_text(encoding='utf-8')
382                self.assert_POT_equal(expected, output)
383
384    def test_files_list(self):
385        """Make sure the directories are inspected for source files
386           bpo-31920
387        """
388        text1 = 'Text to translate1'
389        text2 = 'Text to translate2'
390        text3 = 'Text to ignore'
391        with temp_cwd(None), temp_dir(None) as sdir:
392            pymod = Path(sdir, 'pypkg', 'pymod.py')
393            pymod.parent.mkdir()
394            pymod.write_text(f'_({text1!r})', encoding='utf-8')
395
396            pymod2 = Path(sdir, 'pkg.py', 'pymod2.py')
397            pymod2.parent.mkdir()
398            pymod2.write_text(f'_({text2!r})', encoding='utf-8')
399
400            pymod3 = Path(sdir, 'CVS', 'pymod3.py')
401            pymod3.parent.mkdir()
402            pymod3.write_text(f'_({text3!r})', encoding='utf-8')
403
404            assert_python_ok('-Xutf8', self.script, sdir)
405            data = Path('messages.pot').read_text(encoding='utf-8')
406            self.assertIn(f'msgid "{text1}"', data)
407            self.assertIn(f'msgid "{text2}"', data)
408            self.assertNotIn(text3, data)
409
410
411def update_POT_snapshots():
412    for input_file in DATA_DIR.glob('*.py'):
413        output_file = input_file.with_suffix('.pot')
414        contents = input_file.read_bytes()
415        with temp_cwd(None):
416            Path(input_file.name).write_bytes(contents)
417            assert_python_ok('-Xutf8', Test_pygettext.script, '--docstrings', input_file.name)
418            output = Path('messages.pot').read_text(encoding='utf-8')
419
420        output = normalize_POT_file(output)
421        output_file.write_text(output, encoding='utf-8')
422
423
424if __name__ == '__main__':
425    # To regenerate POT files
426    if len(sys.argv) > 1 and sys.argv[1] == '--snapshot-update':
427        update_POT_snapshots()
428        sys.exit(0)
429    unittest.main()
430