Skip to content
This repository has been archived by the owner on Sep 14, 2018. It is now read-only.

Commit

Permalink
moved workaround into separate classes
Browse files Browse the repository at this point in the history
  • Loading branch information
paweljasinski committed Feb 11, 2014
1 parent bba3817 commit df802a5
Showing 1 changed file with 48 additions and 22 deletions.
70 changes: 48 additions & 22 deletions Languages/IronPython/IronPython.Modules/_codecs.cs
Original file line number Diff line number Diff line change
Expand Up @@ -644,8 +644,11 @@ private static PythonTuple DoDecode(Encoding encoding, object input, string erro
if (fAlwaysThrow) {
encoding.DecoderFallback = DecoderFallback.ExceptionFallback;
} else {
var utf8Workaround = encoding == Encoding.UTF8 && DotNet;
fallback = new ExceptionFallBack(bytes, utf8Workaround);
fallback = (encoding is UTF8Encoding && DotNet) ?
// This is a workaround for a bug, see ExceptionFallbackBufferUtf8DotNet
// for more details.
new ExceptionFallBackUtf8DotNet(bytes):
new ExceptionFallBack(bytes);
encoding.DecoderFallback = fallback;
}
#endif
Expand Down Expand Up @@ -731,8 +734,13 @@ private static PythonTuple DoEncode(Encoding encoding, object input, string erro
class ExceptionFallBack : DecoderFallback {
internal ExceptionFallbackBuffer buffer;

public ExceptionFallBack(byte[] bytes, bool workaround) {
buffer = new ExceptionFallbackBuffer(bytes, workaround);
// This ctor can be removed as soon as workaround for utf8 encoding in .net is
// no longer necessary.
protected ExceptionFallBack() {
}

public ExceptionFallBack(byte[] bytes) {
buffer = new ExceptionFallbackBuffer(bytes);
}

public override DecoderFallbackBuffer CreateFallbackBuffer() {
Expand All @@ -746,35 +754,19 @@ public override int MaxCharCount {

class ExceptionFallbackBuffer : DecoderFallbackBuffer {
internal byte[] badBytes;
private byte[] inputBytes;
private bool ignoreNext = false;
private bool _dotNetUtf8 = false;
protected byte[] inputBytes;

public ExceptionFallbackBuffer(byte[] bytes, bool dotNetUtf8) {
public ExceptionFallbackBuffer(byte[] bytes) {
inputBytes = bytes;
_dotNetUtf8 = dotNetUtf8;
}

public override bool Fallback(byte[] bytesUnknown, int index) {
if (_dotNetUtf8) {
// in case of dot net and utf-8 value index does not conform to spec
if (ignoreNext) {
// dot net sometimes calls second time after this method returns false
// if this is the case, do nothing
return false;
}
// adjust index, adjustment value was discovered experimentally
index = index + bytesUnknown.Length;
}

if (index > 0 && index + bytesUnknown.Length != inputBytes.Length) {
throw PythonOps.UnicodeDecodeError(
String.Format("failed to decode bytes at index: {0}", index), bytesUnknown, index);
}

// just some bad bytes at the end
badBytes = bytesUnknown;
ignoreNext = true;
return false;
}

Expand All @@ -790,6 +782,40 @@ public override int Remaining {
get { return 0; }
}
}

// This class can be removed as soon as workaround for utf8 encoding in .net is
// no longer necessary.
class ExceptionFallBackUtf8DotNet : ExceptionFallBack {
public ExceptionFallBackUtf8DotNet(byte[] bytes) {
buffer = new ExceptionFallbackBufferUtf8DotNet(bytes);
}
}

// This class can be removed as soon as workaround for utf8 encoding in .net is
// no longer necessary.
class ExceptionFallbackBufferUtf8DotNet : ExceptionFallbackBuffer {
private bool ignoreNext = false;

public ExceptionFallbackBufferUtf8DotNet(byte[] bytes) : base(bytes) {
}

public override bool Fallback(byte[] bytesUnknown, int index) {
// In case of dot net and utf-8 value of index does not conform to documentation provided by
// Microsoft. Tested on Windows 7 64, .NET 4.0.30319.18408, all recommended patches as of 06.02.2014
// http://msdn.microsoft.com/en-us/library/System.Text.EncoderFallbackBuffer%28v=vs.110%29.aspx
// The value of index is mysteriously decreased by the size of bytesUnknown
if (ignoreNext) {
// dot net sometimes calls second time after this method returns false
// if this is the case, do nothing
return false;
}
// adjust index
index = index + bytesUnknown.Length;
ignoreNext = true;
return base.Fallback(bytesUnknown, index);
}

}
#endif

}

0 comments on commit df802a5

Please sign in to comment.