diff --git a/flanker/mime/message/charsets.py b/flanker/mime/message/charsets.py index 7ecded7a..220d0a1a 100644 --- a/flanker/mime/message/charsets.py +++ b/flanker/mime/message/charsets.py @@ -16,16 +16,22 @@ def _translate_charset(charset): Otherwise returns unmodified. """ # ev: (ticket #2819) - if "sjis" in charset.lower(): + if 'sjis' in charset.lower(): return 'shift_jis' # cp874 looks to be an alias for windows-874 - if "windows-874" == charset.lower(): - return "cp874" + if 'windows-874' == charset.lower(): + return 'cp874' if 'koi8-r' in charset.lower(): return 'koi8_r' + if 'gb2312' in charset.lower(): + return 'gb2312' + + if 'gb18030' in charset.lower(): + return 'gb18030' + if 'utf-8' in charset.lower() or charset.lower() == 'x-unknown': return 'utf-8' diff --git a/tests/mime/message/headers/encodedword_test.py b/tests/mime/message/headers/encodedword_test.py index 852109d6..037e6efc 100644 --- a/tests/mime/message/headers/encodedword_test.py +++ b/tests/mime/message/headers/encodedword_test.py @@ -131,6 +131,12 @@ def various_encodings_test(): v = u'=?utf-8?Q?Evaneos-Concepci=C3=B3n.pdf?=' eq_(u'Evaneos-Concepción.pdf', encodedword.mime_to_unicode(v)) + v = u'=?gb2312?Q?Hey_There=D7=B2=D8=B0?=' + eq_(u'Hey There撞匕', encodedword.mime_to_unicode(v)) + + v = u'=?gb18030?Q?Hey_There=D7=B2=D8=B0?=' + eq_(u'Hey There撞匕', encodedword.mime_to_unicode(v)) + @patch.object(utils, '_guess_and_convert', Mock(side_effect=errors.EncodingError())) def test_convert_to_utf8_unknown_encoding():