Skip to content

Commit 24216d0

Browse files
[3.12] gh-111942: Fix crashes in TextIOWrapper.reconfigure() (GH-111976) (GH-112058)
* Fix crash when encoding is not string or None. * Fix crash when both line_buffering and write_through raise exception when converted ti int. * Add a number of tests for constructor and reconfigure() method with invalid arguments. (cherry picked from commit ee06fff) Co-authored-by: Serhiy Storchaka <[email protected]>
1 parent c003de9 commit 24216d0

File tree

3 files changed

+122
-5
lines changed

3 files changed

+122
-5
lines changed

Lib/test/test_io.py

+84-2
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,10 @@ def _default_chunk_size():
8181
)
8282

8383

84+
class BadIndex:
85+
def __index__(self):
86+
1/0
87+
8488
class MockRawIOWithoutRead:
8589
"""A RawIO implementation without read(), so as to exercise the default
8690
RawIO.read() which calls readinto()."""
@@ -2716,8 +2720,31 @@ def test_constructor(self):
27162720
self.assertEqual(t.encoding, "utf-8")
27172721
self.assertEqual(t.line_buffering, True)
27182722
self.assertEqual("\xe9\n", t.readline())
2719-
self.assertRaises(TypeError, t.__init__, b, encoding="utf-8", newline=42)
2720-
self.assertRaises(ValueError, t.__init__, b, encoding="utf-8", newline='xyzzy')
2723+
invalid_type = TypeError if self.is_C else ValueError
2724+
with self.assertRaises(invalid_type):
2725+
t.__init__(b, encoding=42)
2726+
with self.assertRaises(UnicodeEncodeError):
2727+
t.__init__(b, encoding='\udcfe')
2728+
with self.assertRaises(ValueError):
2729+
t.__init__(b, encoding='utf-8\0')
2730+
with self.assertRaises(invalid_type):
2731+
t.__init__(b, encoding="utf-8", errors=42)
2732+
if support.Py_DEBUG or sys.flags.dev_mode or self.is_C:
2733+
with self.assertRaises(UnicodeEncodeError):
2734+
t.__init__(b, encoding="utf-8", errors='\udcfe')
2735+
if support.Py_DEBUG or sys.flags.dev_mode:
2736+
# TODO: If encoded to UTF-8, should also be checked for
2737+
# embedded null characters.
2738+
with self.assertRaises(ValueError):
2739+
t.__init__(b, encoding="utf-8", errors='replace\0')
2740+
with self.assertRaises(TypeError):
2741+
t.__init__(b, encoding="utf-8", newline=42)
2742+
with self.assertRaises(ValueError):
2743+
t.__init__(b, encoding="utf-8", newline='\udcfe')
2744+
with self.assertRaises(ValueError):
2745+
t.__init__(b, encoding="utf-8", newline='\n\0')
2746+
with self.assertRaises(ValueError):
2747+
t.__init__(b, encoding="utf-8", newline='xyzzy')
27212748

27222749
def test_uninitialized(self):
27232750
t = self.TextIOWrapper.__new__(self.TextIOWrapper)
@@ -3766,6 +3793,59 @@ def test_reconfigure_defaults(self):
37663793

37673794
self.assertEqual(txt.detach().getvalue(), b'LF\nCRLF\r\n')
37683795

3796+
def test_reconfigure_errors(self):
3797+
txt = self.TextIOWrapper(self.BytesIO(), 'ascii', 'replace', '\r')
3798+
with self.assertRaises(TypeError): # there was a crash
3799+
txt.reconfigure(encoding=42)
3800+
if self.is_C:
3801+
with self.assertRaises(UnicodeEncodeError):
3802+
txt.reconfigure(encoding='\udcfe')
3803+
with self.assertRaises(LookupError):
3804+
txt.reconfigure(encoding='locale\0')
3805+
# TODO: txt.reconfigure(encoding='utf-8\0')
3806+
# TODO: txt.reconfigure(encoding='nonexisting')
3807+
with self.assertRaises(TypeError):
3808+
txt.reconfigure(errors=42)
3809+
if self.is_C:
3810+
with self.assertRaises(UnicodeEncodeError):
3811+
txt.reconfigure(errors='\udcfe')
3812+
# TODO: txt.reconfigure(errors='ignore\0')
3813+
# TODO: txt.reconfigure(errors='nonexisting')
3814+
with self.assertRaises(TypeError):
3815+
txt.reconfigure(newline=42)
3816+
with self.assertRaises(ValueError):
3817+
txt.reconfigure(newline='\udcfe')
3818+
with self.assertRaises(ValueError):
3819+
txt.reconfigure(newline='xyz')
3820+
if not self.is_C:
3821+
# TODO: Should fail in C too.
3822+
with self.assertRaises(ValueError):
3823+
txt.reconfigure(newline='\n\0')
3824+
if self.is_C:
3825+
# TODO: Use __bool__(), not __index__().
3826+
with self.assertRaises(ZeroDivisionError):
3827+
txt.reconfigure(line_buffering=BadIndex())
3828+
with self.assertRaises(OverflowError):
3829+
txt.reconfigure(line_buffering=2**1000)
3830+
with self.assertRaises(ZeroDivisionError):
3831+
txt.reconfigure(write_through=BadIndex())
3832+
with self.assertRaises(OverflowError):
3833+
txt.reconfigure(write_through=2**1000)
3834+
with self.assertRaises(ZeroDivisionError): # there was a crash
3835+
txt.reconfigure(line_buffering=BadIndex(),
3836+
write_through=BadIndex())
3837+
self.assertEqual(txt.encoding, 'ascii')
3838+
self.assertEqual(txt.errors, 'replace')
3839+
self.assertIs(txt.line_buffering, False)
3840+
self.assertIs(txt.write_through, False)
3841+
3842+
txt.reconfigure(encoding='latin1', errors='ignore', newline='\r\n',
3843+
line_buffering=True, write_through=True)
3844+
self.assertEqual(txt.encoding, 'latin1')
3845+
self.assertEqual(txt.errors, 'ignore')
3846+
self.assertIs(txt.line_buffering, True)
3847+
self.assertIs(txt.write_through, True)
3848+
37693849
def test_reconfigure_newline(self):
37703850
raw = self.BytesIO(b'CR\rEOF')
37713851
txt = self.TextIOWrapper(raw, 'ascii', newline='\n')
@@ -4791,9 +4871,11 @@ def load_tests(loader, tests, pattern):
47914871
if test.__name__.startswith("C"):
47924872
for name, obj in c_io_ns.items():
47934873
setattr(test, name, obj)
4874+
test.is_C = True
47944875
elif test.__name__.startswith("Py"):
47954876
for name, obj in py_io_ns.items():
47964877
setattr(test, name, obj)
4878+
test.is_C = False
47974879

47984880
suite = loader.suiteClass()
47994881
for test in tests:
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
Fix crashes in :meth:`io.TextIOWrapper.reconfigure` when pass invalid
2+
arguments, e.g. non-string encoding.

Modules/_io/textio.c

+36-3
Original file line numberDiff line numberDiff line change
@@ -1292,30 +1292,40 @@ textiowrapper_change_encoding(textio *self, PyObject *encoding,
12921292
errors = &_Py_ID(strict);
12931293
}
12941294
}
1295+
Py_INCREF(errors);
12951296

1297+
const char *c_encoding = PyUnicode_AsUTF8(encoding);
1298+
if (c_encoding == NULL) {
1299+
Py_DECREF(encoding);
1300+
Py_DECREF(errors);
1301+
return -1;
1302+
}
12961303
const char *c_errors = PyUnicode_AsUTF8(errors);
12971304
if (c_errors == NULL) {
12981305
Py_DECREF(encoding);
1306+
Py_DECREF(errors);
12991307
return -1;
13001308
}
13011309

13021310
// Create new encoder & decoder
13031311
PyObject *codec_info = _PyCodec_LookupTextEncoding(
1304-
PyUnicode_AsUTF8(encoding), "codecs.open()");
1312+
c_encoding, "codecs.open()");
13051313
if (codec_info == NULL) {
13061314
Py_DECREF(encoding);
1315+
Py_DECREF(errors);
13071316
return -1;
13081317
}
13091318
if (_textiowrapper_set_decoder(self, codec_info, c_errors) != 0 ||
13101319
_textiowrapper_set_encoder(self, codec_info, c_errors) != 0) {
13111320
Py_DECREF(codec_info);
13121321
Py_DECREF(encoding);
1322+
Py_DECREF(errors);
13131323
return -1;
13141324
}
13151325
Py_DECREF(codec_info);
13161326

13171327
Py_SETREF(self->encoding, encoding);
1318-
Py_SETREF(self->errors, Py_NewRef(errors));
1328+
Py_SETREF(self->errors, errors);
13191329

13201330
return _textiowrapper_fix_encoder_state(self);
13211331
}
@@ -1346,6 +1356,26 @@ _io_TextIOWrapper_reconfigure_impl(textio *self, PyObject *encoding,
13461356
int write_through;
13471357
const char *newline = NULL;
13481358

1359+
if (encoding != Py_None && !PyUnicode_Check(encoding)) {
1360+
PyErr_Format(PyExc_TypeError,
1361+
"reconfigure() argument 'encoding' must be str or None, not %s",
1362+
Py_TYPE(encoding)->tp_name);
1363+
return NULL;
1364+
}
1365+
if (errors != Py_None && !PyUnicode_Check(errors)) {
1366+
PyErr_Format(PyExc_TypeError,
1367+
"reconfigure() argument 'errors' must be str or None, not %s",
1368+
Py_TYPE(errors)->tp_name);
1369+
return NULL;
1370+
}
1371+
if (newline_obj != NULL && newline_obj != Py_None &&
1372+
!PyUnicode_Check(newline_obj))
1373+
{
1374+
PyErr_Format(PyExc_TypeError,
1375+
"reconfigure() argument 'newline' must be str or None, not %s",
1376+
Py_TYPE(newline_obj)->tp_name);
1377+
return NULL;
1378+
}
13491379
/* Check if something is in the read buffer */
13501380
if (self->decoded_chars != NULL) {
13511381
if (encoding != Py_None || errors != Py_None || newline_obj != NULL) {
@@ -1365,9 +1395,12 @@ _io_TextIOWrapper_reconfigure_impl(textio *self, PyObject *encoding,
13651395

13661396
line_buffering = convert_optional_bool(line_buffering_obj,
13671397
self->line_buffering);
1398+
if (line_buffering < 0) {
1399+
return NULL;
1400+
}
13681401
write_through = convert_optional_bool(write_through_obj,
13691402
self->write_through);
1370-
if (line_buffering < 0 || write_through < 0) {
1403+
if (write_through < 0) {
13711404
return NULL;
13721405
}
13731406

0 commit comments

Comments
 (0)