From f54773fa78268e636d0add21650c1ae41f80362b Mon Sep 17 00:00:00 2001 From: rajkripal Date: Mon, 20 Apr 2026 01:20:21 -0700 Subject: [PATCH 1/2] gh-148798: fix crash in _interpreters.create on surrogate str in config _config_dict_copy_str passed the result of PyUnicode_AsUTF8 straight to strncpy. When the string contained an unpaired surrogate, PyUnicode_AsUTF8 returned NULL and set UnicodeEncodeError, and strncpy dereferenced NULL. Check the return value and propagate the error, mirroring the pattern used at Modules/_interpretersmodule.c:425. Add a regression test alongside the existing gh-126221 case. --- Lib/test/test_interpreters/test_api.py | 12 ++++++++++++ ...6-04-20-01-45-00.gh-issue-148798.interpconfig.rst | 5 +++++ Python/interpconfig.c | 7 ++++++- 3 files changed, 23 insertions(+), 1 deletion(-) create mode 100644 Misc/NEWS.d/next/Core_and_Builtins/2026-04-20-01-45-00.gh-issue-148798.interpconfig.rst diff --git a/Lib/test/test_interpreters/test_api.py b/Lib/test/test_interpreters/test_api.py index 13d23af5aceb47..09bb902e96a8a5 100644 --- a/Lib/test/test_interpreters/test_api.py +++ b/Lib/test/test_interpreters/test_api.py @@ -117,6 +117,18 @@ def test_in_main(self): # GH-126221: Passing an invalid Unicode character used to cause a SystemError self.assertRaises(UnicodeEncodeError, _interpreters.create, '\udc80') + # A config object with a surrogate in a string field must raise, not crash. + class BadConfig: + use_main_obmalloc = False + allow_fork = False + allow_exec = False + allow_threads = False + allow_daemon_threads = False + check_multi_interp_extensions = False + own_gil = True + gil = 'own\udc80' + self.assertRaises(UnicodeEncodeError, _interpreters.create, BadConfig()) + def test_in_thread(self): lock = threading.Lock() interp = None diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2026-04-20-01-45-00.gh-issue-148798.interpconfig.rst b/Misc/NEWS.d/next/Core_and_Builtins/2026-04-20-01-45-00.gh-issue-148798.interpconfig.rst new file mode 100644 index 00000000000000..b5bb7a25abeb2e --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2026-04-20-01-45-00.gh-issue-148798.interpconfig.rst @@ -0,0 +1,5 @@ +Fix a crash in :func:`!_interpreters.create` when a config object passes a +string with an unpaired surrogate as a value (for example ``gil``). The +internal helper ``_config_dict_copy_str`` now checks the return of +:c:func:`PyUnicode_AsUTF8` before copying, turning the segfault into a +:exc:`UnicodeEncodeError`. diff --git a/Python/interpconfig.c b/Python/interpconfig.c index a37bd3f5b23a01..6e5fc807641df0 100644 --- a/Python/interpconfig.c +++ b/Python/interpconfig.c @@ -133,7 +133,12 @@ _config_dict_copy_str(PyObject *dict, const char *name, config_dict_invalid_type(name); return -1; } - strncpy(buf, PyUnicode_AsUTF8(item), bufsize-1); + const char *utf8 = PyUnicode_AsUTF8(item); + if (utf8 == NULL) { + Py_DECREF(item); + return -1; + } + strncpy(buf, utf8, bufsize-1); buf[bufsize-1] = '\0'; Py_DECREF(item); return 0; From 02053a97bdc9faad6d93af25cd95159e19b4f7f4 Mon Sep 17 00:00:00 2001 From: Raj Kripal Danday Date: Mon, 20 Apr 2026 19:15:13 -0700 Subject: [PATCH 2/2] gh-148798: address review on _interpreters.create surrogate fix Reword the NEWS entry to drop the named config key and the reference to internal helpers. Move the regression test into its own method in CreateTests, and build the config from _interpreters.new_config() instead of a hand-rolled class, so only the surrogate path is exercised. Using new_config() also fixes the test on main, where an instance of BadConfig had an empty __dict__ and was rejected before the surrogate check. --- Lib/test/test_interpreters/test_api.py | 17 ++++++----------- ...20-01-45-00.gh-issue-148798.interpconfig.rst | 9 ++++----- 2 files changed, 10 insertions(+), 16 deletions(-) diff --git a/Lib/test/test_interpreters/test_api.py b/Lib/test/test_interpreters/test_api.py index 09bb902e96a8a5..4ccb1922dadd33 100644 --- a/Lib/test/test_interpreters/test_api.py +++ b/Lib/test/test_interpreters/test_api.py @@ -117,17 +117,12 @@ def test_in_main(self): # GH-126221: Passing an invalid Unicode character used to cause a SystemError self.assertRaises(UnicodeEncodeError, _interpreters.create, '\udc80') - # A config object with a surrogate in a string field must raise, not crash. - class BadConfig: - use_main_obmalloc = False - allow_fork = False - allow_exec = False - allow_threads = False - allow_daemon_threads = False - check_multi_interp_extensions = False - own_gil = True - gil = 'own\udc80' - self.assertRaises(UnicodeEncodeError, _interpreters.create, BadConfig()) + def test_config_with_surrogate_str_field(self): + # gh-148798: a config whose string field contains an unpaired + # surrogate used to crash the interpreter. It must raise instead. + config = _interpreters.new_config() + config.gil = 'own\udc80' + self.assertRaises(UnicodeEncodeError, _interpreters.create, config) def test_in_thread(self): lock = threading.Lock() diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2026-04-20-01-45-00.gh-issue-148798.interpconfig.rst b/Misc/NEWS.d/next/Core_and_Builtins/2026-04-20-01-45-00.gh-issue-148798.interpconfig.rst index b5bb7a25abeb2e..e951b81752a821 100644 --- a/Misc/NEWS.d/next/Core_and_Builtins/2026-04-20-01-45-00.gh-issue-148798.interpconfig.rst +++ b/Misc/NEWS.d/next/Core_and_Builtins/2026-04-20-01-45-00.gh-issue-148798.interpconfig.rst @@ -1,5 +1,4 @@ -Fix a crash in :func:`!_interpreters.create` when a config object passes a -string with an unpaired surrogate as a value (for example ``gil``). The -internal helper ``_config_dict_copy_str`` now checks the return of -:c:func:`PyUnicode_AsUTF8` before copying, turning the segfault into a -:exc:`UnicodeEncodeError`. +Fix a crash in :func:`!_interpreters.create` when a config value contains +a string with an unpaired surrogate. :c:func:`PyUnicode_AsUTF8` returned +``NULL`` and the result was passed to :c:func:`!strncpy`, dereferencing +it. The caller now propagates the :exc:`UnicodeEncodeError` instead.