1# Tests the attempted automatic coercion of the C locale to a UTF-8 locale 2 3import locale 4import os 5import subprocess 6import sys 7import sysconfig 8import unittest 9from collections import namedtuple 10 11from test import support 12from test.support.script_helper import run_python_until_end 13 14 15# Set the list of ways we expect to be able to ask for the "C" locale 16EXPECTED_C_LOCALE_EQUIVALENTS = ["C", "invalid.ascii"] 17 18# Set our expectation for the default encoding used in the C locale 19# for the filesystem encoding and the standard streams 20EXPECTED_C_LOCALE_STREAM_ENCODING = "ascii" 21EXPECTED_C_LOCALE_FS_ENCODING = "ascii" 22 23# Set our expectation for the default locale used when none is specified 24EXPECT_COERCION_IN_DEFAULT_LOCALE = True 25 26TARGET_LOCALES = ["C.UTF-8", "C.utf8", "UTF-8"] 27 28# Apply some platform dependent overrides 29if sys.platform.startswith("linux"): 30 if support.is_android: 31 # Android defaults to using UTF-8 for all system interfaces 32 EXPECTED_C_LOCALE_STREAM_ENCODING = "utf-8" 33 EXPECTED_C_LOCALE_FS_ENCODING = "utf-8" 34 else: 35 # Linux distros typically alias the POSIX locale directly to the C 36 # locale. 37 # TODO: Once https://bugs.python.org/issue30672 is addressed, we'll be 38 # able to check this case unconditionally 39 EXPECTED_C_LOCALE_EQUIVALENTS.append("POSIX") 40elif sys.platform.startswith("aix"): 41 # AIX uses iso8859-1 in the C locale, other *nix platforms use ASCII 42 EXPECTED_C_LOCALE_STREAM_ENCODING = "iso8859-1" 43 EXPECTED_C_LOCALE_FS_ENCODING = "iso8859-1" 44elif sys.platform == "darwin": 45 # FS encoding is UTF-8 on macOS 46 EXPECTED_C_LOCALE_FS_ENCODING = "utf-8" 47elif sys.platform == "cygwin": 48 # Cygwin defaults to using C.UTF-8 49 # TODO: Work out a robust dynamic test for this that doesn't rely on 50 # CPython's own locale handling machinery 51 EXPECT_COERCION_IN_DEFAULT_LOCALE = False 52elif sys.platform == "vxworks": 53 # VxWorks defaults to using UTF-8 for all system interfaces 54 EXPECTED_C_LOCALE_STREAM_ENCODING = "utf-8" 55 EXPECTED_C_LOCALE_FS_ENCODING = "utf-8" 56 57# Note that the above expectations are still wrong in some cases, such as: 58# * Windows when PYTHONLEGACYWINDOWSFSENCODING is set 59# * Any platform other than AIX that uses latin-1 in the C locale 60# * Any Linux distro where POSIX isn't a simple alias for the C locale 61# * Any Linux distro where the default locale is something other than "C" 62# 63# Options for dealing with this: 64# * Don't set the PY_COERCE_C_LOCALE preprocessor definition on 65# such platforms (e.g. it isn't set on Windows) 66# * Fix the test expectations to match the actual platform behaviour 67 68# In order to get the warning messages to match up as expected, the candidate 69# order here must much the target locale order in Python/pylifecycle.c 70_C_UTF8_LOCALES = ("C.UTF-8", "C.utf8", "UTF-8") 71 72# There's no reliable cross-platform way of checking locale alias 73# lists, so the only way of knowing which of these locales will work 74# is to try them with locale.setlocale(). We do that in a subprocess 75# in setUpModule() below to avoid altering the locale of the test runner. 76# 77# If the relevant locale module attributes exist, and we're not on a platform 78# where we expect it to always succeed, we also check that 79# `locale.nl_langinfo(locale.CODESET)` works, as if it fails, the interpreter 80# will skip locale coercion for that particular target locale 81_check_nl_langinfo_CODESET = bool( 82 sys.platform not in ("darwin", "linux") and 83 hasattr(locale, "nl_langinfo") and 84 hasattr(locale, "CODESET") 85) 86 87def _set_locale_in_subprocess(locale_name): 88 cmd_fmt = "import locale; print(locale.setlocale(locale.LC_CTYPE, '{}'))" 89 if _check_nl_langinfo_CODESET: 90 # If there's no valid CODESET, we expect coercion to be skipped 91 cmd_fmt += "; import sys; sys.exit(not locale.nl_langinfo(locale.CODESET))" 92 cmd = cmd_fmt.format(locale_name) 93 result, py_cmd = run_python_until_end("-c", cmd, PYTHONCOERCECLOCALE='') 94 return result.rc == 0 95 96 97 98_fields = "fsencoding stdin_info stdout_info stderr_info lang lc_ctype lc_all" 99_EncodingDetails = namedtuple("EncodingDetails", _fields) 100 101class EncodingDetails(_EncodingDetails): 102 # XXX (ncoghlan): Using JSON for child state reporting may be less fragile 103 CHILD_PROCESS_SCRIPT = ";".join([ 104 "import sys, os", 105 "print(sys.getfilesystemencoding())", 106 "print(sys.stdin.encoding + ':' + sys.stdin.errors)", 107 "print(sys.stdout.encoding + ':' + sys.stdout.errors)", 108 "print(sys.stderr.encoding + ':' + sys.stderr.errors)", 109 "print(os.environ.get('LANG', 'not set'))", 110 "print(os.environ.get('LC_CTYPE', 'not set'))", 111 "print(os.environ.get('LC_ALL', 'not set'))", 112 ]) 113 114 @classmethod 115 def get_expected_details(cls, coercion_expected, fs_encoding, stream_encoding, env_vars): 116 """Returns expected child process details for a given encoding""" 117 _stream = stream_encoding + ":{}" 118 # stdin and stdout should use surrogateescape either because the 119 # coercion triggered, or because the C locale was detected 120 stream_info = 2*[_stream.format("surrogateescape")] 121 # stderr should always use backslashreplace 122 stream_info.append(_stream.format("backslashreplace")) 123 expected_lang = env_vars.get("LANG", "not set") 124 if coercion_expected: 125 expected_lc_ctype = CLI_COERCION_TARGET 126 else: 127 expected_lc_ctype = env_vars.get("LC_CTYPE", "not set") 128 expected_lc_all = env_vars.get("LC_ALL", "not set") 129 env_info = expected_lang, expected_lc_ctype, expected_lc_all 130 return dict(cls(fs_encoding, *stream_info, *env_info)._asdict()) 131 132 @classmethod 133 def get_child_details(cls, env_vars): 134 """Retrieves fsencoding and standard stream details from a child process 135 136 Returns (encoding_details, stderr_lines): 137 138 - encoding_details: EncodingDetails for eager decoding 139 - stderr_lines: result of calling splitlines() on the stderr output 140 141 The child is run in isolated mode if the current interpreter supports 142 that. 143 """ 144 result, py_cmd = run_python_until_end( 145 "-X", "utf8=0", "-c", cls.CHILD_PROCESS_SCRIPT, 146 **env_vars 147 ) 148 if not result.rc == 0: 149 result.fail(py_cmd) 150 # All subprocess outputs in this test case should be pure ASCII 151 stdout_lines = result.out.decode("ascii").splitlines() 152 child_encoding_details = dict(cls(*stdout_lines)._asdict()) 153 stderr_lines = result.err.decode("ascii").rstrip().splitlines() 154 return child_encoding_details, stderr_lines 155 156 157# Details of the shared library warning emitted at runtime 158LEGACY_LOCALE_WARNING = ( 159 "Python runtime initialized with LC_CTYPE=C (a locale with default ASCII " 160 "encoding), which may cause Unicode compatibility problems. Using C.UTF-8, " 161 "C.utf8, or UTF-8 (if available) as alternative Unicode-compatible " 162 "locales is recommended." 163) 164 165# Details of the CLI locale coercion warning emitted at runtime 166CLI_COERCION_WARNING_FMT = ( 167 "Python detected LC_CTYPE=C: LC_CTYPE coerced to {} (set another locale " 168 "or PYTHONCOERCECLOCALE=0 to disable this locale coercion behavior)." 169) 170 171 172AVAILABLE_TARGETS = None 173CLI_COERCION_TARGET = None 174CLI_COERCION_WARNING = None 175 176def setUpModule(): 177 global AVAILABLE_TARGETS 178 global CLI_COERCION_TARGET 179 global CLI_COERCION_WARNING 180 181 if AVAILABLE_TARGETS is not None: 182 # initialization already done 183 return 184 AVAILABLE_TARGETS = [] 185 186 # Find the target locales available in the current system 187 for target_locale in _C_UTF8_LOCALES: 188 if _set_locale_in_subprocess(target_locale): 189 AVAILABLE_TARGETS.append(target_locale) 190 191 if AVAILABLE_TARGETS: 192 # Coercion is expected to use the first available target locale 193 CLI_COERCION_TARGET = AVAILABLE_TARGETS[0] 194 CLI_COERCION_WARNING = CLI_COERCION_WARNING_FMT.format(CLI_COERCION_TARGET) 195 196 if support.verbose: 197 print(f"AVAILABLE_TARGETS = {AVAILABLE_TARGETS!r}") 198 print(f"EXPECTED_C_LOCALE_EQUIVALENTS = {EXPECTED_C_LOCALE_EQUIVALENTS!r}") 199 print(f"EXPECTED_C_LOCALE_STREAM_ENCODING = {EXPECTED_C_LOCALE_STREAM_ENCODING!r}") 200 print(f"EXPECTED_C_LOCALE_FS_ENCODING = {EXPECTED_C_LOCALE_FS_ENCODING!r}") 201 print(f"EXPECT_COERCION_IN_DEFAULT_LOCALE = {EXPECT_COERCION_IN_DEFAULT_LOCALE!r}") 202 print(f"_C_UTF8_LOCALES = {_C_UTF8_LOCALES!r}") 203 print(f"_check_nl_langinfo_CODESET = {_check_nl_langinfo_CODESET!r}") 204 205 206class _LocaleHandlingTestCase(unittest.TestCase): 207 # Base class to check expected locale handling behaviour 208 209 def _check_child_encoding_details(self, 210 env_vars, 211 expected_fs_encoding, 212 expected_stream_encoding, 213 expected_warnings, 214 coercion_expected): 215 """Check the C locale handling for the given process environment 216 217 Parameters: 218 expected_fs_encoding: expected sys.getfilesystemencoding() result 219 expected_stream_encoding: expected encoding for standard streams 220 expected_warning: stderr output to expect (if any) 221 """ 222 result = EncodingDetails.get_child_details(env_vars) 223 encoding_details, stderr_lines = result 224 expected_details = EncodingDetails.get_expected_details( 225 coercion_expected, 226 expected_fs_encoding, 227 expected_stream_encoding, 228 env_vars 229 ) 230 self.assertEqual(encoding_details, expected_details) 231 if expected_warnings is None: 232 expected_warnings = [] 233 self.assertEqual(stderr_lines, expected_warnings) 234 235 236class LocaleConfigurationTests(_LocaleHandlingTestCase): 237 # Test explicit external configuration via the process environment 238 239 @classmethod 240 def setUpClass(cls): 241 # This relies on setUpModule() having been run, so it can't be 242 # handled via the @unittest.skipUnless decorator 243 if not AVAILABLE_TARGETS: 244 raise unittest.SkipTest("No C-with-UTF-8 locale available") 245 246 def test_external_target_locale_configuration(self): 247 248 # Explicitly setting a target locale should give the same behaviour as 249 # is seen when implicitly coercing to that target locale 250 self.maxDiff = None 251 252 expected_fs_encoding = "utf-8" 253 expected_stream_encoding = "utf-8" 254 255 base_var_dict = { 256 "LANG": "", 257 "LC_CTYPE": "", 258 "LC_ALL": "", 259 "PYTHONCOERCECLOCALE": "", 260 } 261 for env_var in ("LANG", "LC_CTYPE"): 262 for locale_to_set in AVAILABLE_TARGETS: 263 # XXX (ncoghlan): LANG=UTF-8 doesn't appear to work as 264 # expected, so skip that combination for now 265 # See https://bugs.python.org/issue30672 for discussion 266 if env_var == "LANG" and locale_to_set == "UTF-8": 267 continue 268 269 with self.subTest(env_var=env_var, 270 configured_locale=locale_to_set): 271 var_dict = base_var_dict.copy() 272 var_dict[env_var] = locale_to_set 273 self._check_child_encoding_details(var_dict, 274 expected_fs_encoding, 275 expected_stream_encoding, 276 expected_warnings=None, 277 coercion_expected=False) 278 279 280 281@support.cpython_only 282@unittest.skipUnless(sysconfig.get_config_var("PY_COERCE_C_LOCALE"), 283 "C locale coercion disabled at build time") 284class LocaleCoercionTests(_LocaleHandlingTestCase): 285 # Test implicit reconfiguration of the environment during CLI startup 286 287 def _check_c_locale_coercion(self, 288 fs_encoding, stream_encoding, 289 coerce_c_locale, 290 expected_warnings=None, 291 coercion_expected=True, 292 **extra_vars): 293 """Check the C locale handling for various configurations 294 295 Parameters: 296 fs_encoding: expected sys.getfilesystemencoding() result 297 stream_encoding: expected encoding for standard streams 298 coerce_c_locale: setting to use for PYTHONCOERCECLOCALE 299 None: don't set the variable at all 300 str: the value set in the child's environment 301 expected_warnings: expected warning lines on stderr 302 extra_vars: additional environment variables to set in subprocess 303 """ 304 self.maxDiff = None 305 306 if not AVAILABLE_TARGETS: 307 # Locale coercion is disabled when there aren't any target locales 308 fs_encoding = EXPECTED_C_LOCALE_FS_ENCODING 309 stream_encoding = EXPECTED_C_LOCALE_STREAM_ENCODING 310 coercion_expected = False 311 if expected_warnings: 312 expected_warnings = [LEGACY_LOCALE_WARNING] 313 314 base_var_dict = { 315 "LANG": "", 316 "LC_CTYPE": "", 317 "LC_ALL": "", 318 "PYTHONCOERCECLOCALE": "", 319 } 320 base_var_dict.update(extra_vars) 321 if coerce_c_locale is not None: 322 base_var_dict["PYTHONCOERCECLOCALE"] = coerce_c_locale 323 324 # Check behaviour for the default locale 325 with self.subTest(default_locale=True, 326 PYTHONCOERCECLOCALE=coerce_c_locale): 327 if EXPECT_COERCION_IN_DEFAULT_LOCALE: 328 _expected_warnings = expected_warnings 329 _coercion_expected = coercion_expected 330 else: 331 _expected_warnings = None 332 _coercion_expected = False 333 # On Android CLI_COERCION_WARNING is not printed when all the 334 # locale environment variables are undefined or empty. When 335 # this code path is run with environ['LC_ALL'] == 'C', then 336 # LEGACY_LOCALE_WARNING is printed. 337 if (support.is_android and 338 _expected_warnings == [CLI_COERCION_WARNING]): 339 _expected_warnings = None 340 self._check_child_encoding_details(base_var_dict, 341 fs_encoding, 342 stream_encoding, 343 _expected_warnings, 344 _coercion_expected) 345 346 # Check behaviour for explicitly configured locales 347 for locale_to_set in EXPECTED_C_LOCALE_EQUIVALENTS: 348 for env_var in ("LANG", "LC_CTYPE"): 349 with self.subTest(env_var=env_var, 350 nominal_locale=locale_to_set, 351 PYTHONCOERCECLOCALE=coerce_c_locale): 352 var_dict = base_var_dict.copy() 353 var_dict[env_var] = locale_to_set 354 # Check behaviour on successful coercion 355 self._check_child_encoding_details(var_dict, 356 fs_encoding, 357 stream_encoding, 358 expected_warnings, 359 coercion_expected) 360 361 def test_PYTHONCOERCECLOCALE_not_set(self): 362 # This should coerce to the first available target locale by default 363 self._check_c_locale_coercion("utf-8", "utf-8", coerce_c_locale=None) 364 365 def test_PYTHONCOERCECLOCALE_not_zero(self): 366 # *Any* string other than "0" is considered "set" for our purposes 367 # and hence should result in the locale coercion being enabled 368 for setting in ("", "1", "true", "false"): 369 self._check_c_locale_coercion("utf-8", "utf-8", coerce_c_locale=setting) 370 371 def test_PYTHONCOERCECLOCALE_set_to_warn(self): 372 # PYTHONCOERCECLOCALE=warn enables runtime warnings for legacy locales 373 self._check_c_locale_coercion("utf-8", "utf-8", 374 coerce_c_locale="warn", 375 expected_warnings=[CLI_COERCION_WARNING]) 376 377 378 def test_PYTHONCOERCECLOCALE_set_to_zero(self): 379 # The setting "0" should result in the locale coercion being disabled 380 self._check_c_locale_coercion(EXPECTED_C_LOCALE_FS_ENCODING, 381 EXPECTED_C_LOCALE_STREAM_ENCODING, 382 coerce_c_locale="0", 383 coercion_expected=False) 384 # Setting LC_ALL=C shouldn't make any difference to the behaviour 385 self._check_c_locale_coercion(EXPECTED_C_LOCALE_FS_ENCODING, 386 EXPECTED_C_LOCALE_STREAM_ENCODING, 387 coerce_c_locale="0", 388 LC_ALL="C", 389 coercion_expected=False) 390 391 def test_LC_ALL_set_to_C(self): 392 # Setting LC_ALL should render the locale coercion ineffective 393 self._check_c_locale_coercion(EXPECTED_C_LOCALE_FS_ENCODING, 394 EXPECTED_C_LOCALE_STREAM_ENCODING, 395 coerce_c_locale=None, 396 LC_ALL="C", 397 coercion_expected=False) 398 # And result in a warning about a lack of locale compatibility 399 self._check_c_locale_coercion(EXPECTED_C_LOCALE_FS_ENCODING, 400 EXPECTED_C_LOCALE_STREAM_ENCODING, 401 coerce_c_locale="warn", 402 LC_ALL="C", 403 expected_warnings=[LEGACY_LOCALE_WARNING], 404 coercion_expected=False) 405 406 def test_PYTHONCOERCECLOCALE_set_to_one(self): 407 # skip the test if the LC_CTYPE locale is C or coerced 408 old_loc = locale.setlocale(locale.LC_CTYPE, None) 409 self.addCleanup(locale.setlocale, locale.LC_CTYPE, old_loc) 410 try: 411 loc = locale.setlocale(locale.LC_CTYPE, "") 412 except locale.Error as e: 413 self.skipTest(str(e)) 414 if loc == "C": 415 self.skipTest("test requires LC_CTYPE locale different than C") 416 if loc in TARGET_LOCALES : 417 self.skipTest("coerced LC_CTYPE locale: %s" % loc) 418 419 # bpo-35336: PYTHONCOERCECLOCALE=1 must not coerce the LC_CTYPE locale 420 # if it's not equal to "C" 421 code = 'import locale; print(locale.setlocale(locale.LC_CTYPE, None))' 422 env = dict(os.environ, PYTHONCOERCECLOCALE='1') 423 cmd = subprocess.run([sys.executable, '-c', code], 424 stdout=subprocess.PIPE, 425 env=env, 426 text=True) 427 self.assertEqual(cmd.stdout.rstrip(), loc) 428 429 430def test_main(): 431 support.run_unittest( 432 LocaleConfigurationTests, 433 LocaleCoercionTests 434 ) 435 support.reap_children() 436 437if __name__ == "__main__": 438 test_main() 439