Skip to content

Commit

Permalink
Merge pull request #104 from rocky/unicode-from-python3
Browse files Browse the repository at this point in the history
Interpret Python2 unicode in Python3
  • Loading branch information
rocky authored Apr 23, 2023
2 parents 30fe258 + 5594cba commit c21eebc
Show file tree
Hide file tree
Showing 6 changed files with 61 additions and 53 deletions.
2 changes: 1 addition & 1 deletion admin-tools/pyenv-newest-versions
Original file line number Diff line number Diff line change
Expand Up @@ -6,4 +6,4 @@ if [[ $0 == ${BASH_SOURCE[0]} ]] ; then
exit 1
fi

export PYVERSIONS='3.6.15 pypy3.6-7.3.1 3.7.16 pypy3.7-7.3.9 pypy3.8-7.3.10 pypy3.9-7.3.10 pyston-2.3.5 3.8.16 3.9.16 3.10.9'
export PYVERSIONS='3.6.15 pypy3.6-7.3.1 3.7.16 pypy3.7-7.3.9 pypy3.8-7.3.10 pypy3.9-7.3.10 pyston-2.3.5 3.8.16 3.9.16 3.10.10'
2 changes: 1 addition & 1 deletion admin-tools/setup-master.sh
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#!/bin/bash
PYTHON_VERSION=3.10.9
PYTHON_VERSION=3.10.10
pyenv local $PYTHON_VERSION

if [[ $0 == $${BASH_SOURCE[0]} ]] ; then
Expand Down
51 changes: 21 additions & 30 deletions xdis/codetype/code13.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# (C) Copyright 2020-2021 by Rocky Bernstein
# (C) Copyright 2020-2021, 2023 by Rocky Bernstein
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
Expand All @@ -14,44 +14,35 @@
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.

from xdis.version_info import PYTHON_VERSION_TRIPLE
from xdis.codetype.base import CodeBase
from copy import deepcopy

# If there is a list of types, then any will work, but the 1st one is the corect one for types.CodeType
if PYTHON_VERSION_TRIPLE <= (2, 7):
Code13FieldTypes = {
"co_argcount": int,
"co_nlocals": int,
"co_flags": int,
"co_code": (str, bytes, list, tuple),
"co_consts": (tuple, list),
"co_names": (tuple, list),
"co_varnames": (tuple, list),
"co_filename": (str, unicode),
"co_name": (str, unicode),
}
else:
Code13FieldTypes = {
"co_argcount": int,
"co_nlocals": int,
"co_flags": int,
"co_code": (str, bytes, list, tuple),
"co_consts": (tuple, list),
"co_names": (tuple, list),
"co_varnames": (tuple, list),
"co_filename": (str, bytes),
"co_name": (str, bytes),
}
from xdis.codetype.base import CodeBase
from xdis.cross_types import UnicodeForPython3

# If there is a list of types, then any will work, but the 1st one is
# the corect one for types.CodeType
Code13FieldTypes = {
"co_argcount": int,
"co_nlocals": int,
"co_flags": int,
"co_code": (str, bytes, list, tuple),
"co_consts": (tuple, list),
"co_names": (tuple, list),
"co_varnames": (tuple, list),
"co_filename": (str, bytes, UnicodeForPython3),
"co_name": (str, bytes, UnicodeForPython3),
}


class Code13(CodeBase):
"""Class for a Python 1.0 .. 1.4 code object used for Python interpreters other than 1.0 .. 1.4
"""Class for a Python 1.0 .. 1.4 code object used for Python
interpreters other than 1.0 .. 1.4
For convenience in generating code objects, fields like
`co_consts`, co_names which are (immutable) tuples in the end-result can be stored
instead as (mutable) lists. Likewise the line number table `co_lnotab`
can be stored as a simple list of offset, line_number tuples.
"""

def __init__(
Expand All @@ -76,7 +67,7 @@ def __init__(
self.co_filename = co_filename
self.co_name = co_name
self.fieldtypes = Code13FieldTypes
if type(self) == Code13:
if type(self) is Code13:
self.check()
return

Expand Down
23 changes: 23 additions & 0 deletions xdis/cross_types.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,3 +34,26 @@ def __repr__(self):
This ensures we get the "L" suffix on long types.
"""
return f"""{self.value}L"""


class UnicodeForPython3(str):
"""
Define a Python3 unicode type which exists in
Python 2 but does not exist in Python 3.
"""

def __init__(self, value):
self.value = value

def __repr__(self):
"""
Replacement __str__ and str() for Python3.
This ensures we get the "u" suffix on unicode types.
"""
try:
value = self.value.decode("utf-8")
# Do we need to handle utf-16 and utf-32?
except UnicodeDecodeError:
return f"""u'{str(self.value)[1:]}'"""
else:
return f"""u'{str(value)}'"""
18 changes: 11 additions & 7 deletions xdis/load.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,25 +13,29 @@
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.

import marshal, py_compile, sys, tempfile, types
from struct import unpack, pack
from datetime import datetime
import marshal
import os.path as osp
import py_compile
import sys
import tempfile
import types
from datetime import datetime
from struct import pack, unpack

import xdis.marsh
import xdis.unmarshal
from xdis.version_info import PYTHON3, PYTHON_VERSION_TRIPLE
from xdis.dropbox.decrypt25 import fix_dropbox_pyc
from xdis.magics import (
IS_PYPY3,
PYTHON_MAGIC_INT,
int2magic,
magic_int2tuple,
magic2int,
magic_int2tuple,
magicint2version,
py_str2tuple,
versions,
)
from xdis.dropbox.decrypt25 import fix_dropbox_pyc
from xdis.version_info import PYTHON3, PYTHON_VERSION_TRIPLE


def is_python_source(path):
Expand Down Expand Up @@ -305,7 +309,7 @@ def load_module_from_file_object(
pass
else:
co = None
except:
except Exception:
kind, msg = sys.exc_info()[0:2]
import traceback

Expand Down
18 changes: 4 additions & 14 deletions xdis/unmarshal.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,9 +31,9 @@
from struct import unpack

from xdis.codetype import to_portable
from xdis.cross_types import LongTypeForPython3
from xdis.cross_types import LongTypeForPython3, UnicodeForPython3
from xdis.magics import magic_int2tuple
from xdis.version_info import IS_PYPY, PYTHON3, PYTHON_VERSION_TRIPLE
from xdis.version_info import PYTHON3, PYTHON_VERSION_TRIPLE

if PYTHON3:

Expand Down Expand Up @@ -361,18 +361,8 @@ def t_interned(self, save_ref, bytes_for_s=False):
def t_unicode(self, save_ref, bytes_for_s=False):
strsize = unpack("<i", self.fp.read(4))[0]
unicodestring = self.fp.read(strsize)
if PYTHON_VERSION_TRIPLE == (3, 2) and IS_PYPY:
# FIXME: this isn't quite right. See
# pypy3-2.4.0/lib-python/3/email/message.py
# '([^\ud800-\udbff]|\A)[\udc00-\udfff]([^\udc00-\udfff]|\Z)')
return self.r_ref(unicodestring.decode("utf-8", errors="ignore"), save_ref)
else:
try:
return self.r_ref(unicodestring.decode("utf-8"), save_ref)
except UnicodeDecodeError as e:
return self.r_ref(
unicodestring.decode("utf-8", errors="ignore"), save_ref
)
if PYTHON_VERSION_TRIPLE >= (3, 0):
return self.r_ref(UnicodeForPython3(unicodestring), save_ref)

# Since Python 3.4
def t_small_tuple(self, save_ref, bytes_for_s=False):
Expand Down

0 comments on commit c21eebc

Please sign in to comment.