Skip to content

Commit

Permalink
Add support for Chinese character sets GB2312 and GB18030.
Browse files Browse the repository at this point in the history
These character sets are mostly used in mainland China.
  • Loading branch information
khamidou authored and spang committed Oct 6, 2017
1 parent 1c3e780 commit 271a241
Show file tree
Hide file tree
Showing 2 changed files with 15 additions and 3 deletions.
12 changes: 9 additions & 3 deletions flanker/mime/message/charsets.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,16 +16,22 @@ def _translate_charset(charset):
Otherwise returns unmodified.
"""
# ev: (ticket #2819)
if "sjis" in charset.lower():
if 'sjis' in charset.lower():
return 'shift_jis'

# cp874 looks to be an alias for windows-874
if "windows-874" == charset.lower():
return "cp874"
if 'windows-874' == charset.lower():
return 'cp874'

if 'koi8-r' in charset.lower():
return 'koi8_r'

if 'gb2312' in charset.lower():
return 'gb2312'

if 'gb18030' in charset.lower():
return 'gb18030'

if 'utf-8' in charset.lower() or charset.lower() == 'x-unknown':
return 'utf-8'

Expand Down
6 changes: 6 additions & 0 deletions tests/mime/message/headers/encodedword_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,12 @@ def various_encodings_test():
v = u'=?utf-8?Q?Evaneos-Concepci=C3=B3n.pdf?='
eq_(u'Evaneos-Concepción.pdf', encodedword.mime_to_unicode(v))

v = u'=?gb2312?Q?Hey_There=D7=B2=D8=B0?='
eq_(u'Hey There撞匕', encodedword.mime_to_unicode(v))

v = u'=?gb18030?Q?Hey_There=D7=B2=D8=B0?='
eq_(u'Hey There撞匕', encodedword.mime_to_unicode(v))


@patch.object(utils, '_guess_and_convert', Mock(side_effect=errors.EncodingError()))
def test_convert_to_utf8_unknown_encoding():
Expand Down

0 comments on commit 271a241

Please sign in to comment.