Skip to content
This repository has been archived by the owner on Sep 14, 2018. It is now read-only.

Commit

Permalink
Merge remote-tracking branch 'paweljasinski/cp34951' into ipy-2.7-maint
Browse files Browse the repository at this point in the history
  • Loading branch information
jdhardy committed Mar 19, 2014
2 parents 3881f69 + 1efe917 commit edd1cc4
Show file tree
Hide file tree
Showing 3 changed files with 81 additions and 5 deletions.
62 changes: 57 additions & 5 deletions Languages/IronPython/IronPython.Modules/_codecs.cs
Original file line number Diff line number Diff line change
Expand Up @@ -641,18 +641,22 @@ private static PythonTuple DoDecode(Encoding encoding, object input, string erro

#if FEATURE_ENCODING // DecoderFallback
encoding = (Encoding)encoding.Clone();

ExceptionFallBack fallback = null;
if (fAlwaysThrow) {
encoding.DecoderFallback = DecoderFallback.ExceptionFallback;
} else {
fallback = new ExceptionFallBack(bytes);
fallback = (encoding is UTF8Encoding && DotNet) ?
// This is a workaround for a bug, see ExceptionFallbackBufferUtf8DotNet
// for more details.
new ExceptionFallBackUtf8DotNet(bytes):
new ExceptionFallBack(bytes);
encoding.DecoderFallback = fallback;
}
#endif
string decoded = encoding.GetString(bytes, 0, bytes.Length);
int badByteCount = 0;


#if FEATURE_ENCODING // DecoderFallback
if (!fAlwaysThrow) {
byte[] badBytes = fallback.buffer.badBytes;
Expand All @@ -666,6 +670,14 @@ private static PythonTuple DoDecode(Encoding encoding, object input, string erro
return tuple;
}


internal static readonly bool DotNet;

static PythonCodecs() {
DotNet = Type.GetType("Mono.Runtime") == null;
}


private static int CheckPreamble(Encoding enc, string buffer) {
byte[] preamble = enc.GetPreamble();

Expand Down Expand Up @@ -723,6 +735,11 @@ private static PythonTuple DoEncode(Encoding encoding, object input, string erro
class ExceptionFallBack : DecoderFallback {
internal ExceptionFallbackBuffer buffer;

// This ctor can be removed as soon as workaround for utf8 encoding in .net is
// no longer necessary.
protected ExceptionFallBack() {
}

public ExceptionFallBack(byte[] bytes) {
buffer = new ExceptionFallbackBuffer(bytes);
}
Expand All @@ -738,16 +755,17 @@ public override int MaxCharCount {

class ExceptionFallbackBuffer : DecoderFallbackBuffer {
internal byte[] badBytes;
private byte[] inputBytes;
protected byte[] inputBytes;

public ExceptionFallbackBuffer(byte[] bytes) {
inputBytes = bytes;
}

public override bool Fallback(byte[] bytesUnknown, int index) {
if (index > 0 && index + bytesUnknown.Length != inputBytes.Length) {
throw PythonOps.UnicodeEncodeError("failed to decode bytes at index {0}", index);
throw PythonOps.UnicodeDecodeError(
String.Format("failed to decode bytes at index: {0}", index), bytesUnknown, index);
}

// just some bad bytes at the end
badBytes = bytesUnknown;
return false;
Expand All @@ -765,6 +783,40 @@ public override int Remaining {
get { return 0; }
}
}

// This class can be removed as soon as workaround for utf8 encoding in .net is
// no longer necessary.
class ExceptionFallBackUtf8DotNet : ExceptionFallBack {
public ExceptionFallBackUtf8DotNet(byte[] bytes) {
buffer = new ExceptionFallbackBufferUtf8DotNet(bytes);
}
}

// This class can be removed as soon as workaround for utf8 encoding in .net is
// no longer necessary.
class ExceptionFallbackBufferUtf8DotNet : ExceptionFallbackBuffer {
private bool ignoreNext = false;

public ExceptionFallbackBufferUtf8DotNet(byte[] bytes) : base(bytes) {
}

public override bool Fallback(byte[] bytesUnknown, int index) {
// In case of dot net and utf-8 value of index does not conform to documentation provided by
// Microsoft http://msdn.microsoft.com/en-us/library/bdftay9c%28v=vs.100%29.aspx
// The value of index is mysteriously decreased by the size of bytesUnknown
// Tested on Windows 7 64, .NET 4.0.30319.18408, all recommended patches as of 06.02.2014
if (ignoreNext) {
// dot net sometimes calls second time after this method returns false
// if this is the case, do nothing
return false;
}
// adjust index
index = index + bytesUnknown.Length;
ignoreNext = true;
return base.Fallback(bytesUnknown, index);
}

}
#endif

}
Original file line number Diff line number Diff line change
Expand Up @@ -4053,6 +4053,10 @@ public static Exception UnicodeDecodeError(string format, params object[] args)
return new System.Text.DecoderFallbackException(string.Format(format, args));
}

public static Exception UnicodeDecodeError(string message, byte[] bytesUnknown, int index) {
return new System.Text.DecoderFallbackException(message, bytesUnknown, index);
}

public static Exception UnicodeEncodeError(string format, params object[] args) {
return new System.Text.EncoderFallbackException(string.Format(format, args));
}
Expand Down
20 changes: 20 additions & 0 deletions Languages/IronPython/Tests/modules/io_related/codecs_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -347,6 +347,26 @@ def test_utf_8_decode():
AreEqual(new_str, u'abc')
AreEqual(size, 3)


def test_cp34951():
def internal_cp34951(sample1):
AreEqual(codecs.utf_8_decode(sample1), (u'12\u20ac\x0a', 6))
sample1 = sample1[:-1] # 12<euro>
AreEqual(codecs.utf_8_decode(sample1), (u'12\u20ac', 5))
sample1 = sample1[:-1] # 12<uncomplete euro>
AreEqual(codecs.utf_8_decode(sample1), (u'12', 2))

sample1 = sample1 + 'x7f' # makes it invalid
try:
r = codecs.utf_8_decode(sample1)
Assert(False, "expected UncodeDecodeError not raised")
except Exception as e:
AreEqual(type(e), UnicodeDecodeError)

internal_cp34951(b'\x31\x32\xe2\x82\xac\x0a') # 12<euro><cr>
internal_cp34951(b'\xef\xbb\xbf\x31\x32\xe2\x82\xac\x0a') # <BOM>12<euro><cr>


def test_utf_8_encode():
'''
'''
Expand Down

0 comments on commit edd1cc4

Please sign in to comment.