Skip to content

Commit

Permalink
Trim codepoint indices when trimming line data
Browse files Browse the repository at this point in the history
Trim these indices to be consistent with the trimmed string.
This prevents a potential index out of bounds when
padding is trimmed.
  • Loading branch information
sidkurella committed Feb 8, 2024
1 parent ee607f5 commit 4fb0249
Show file tree
Hide file tree
Showing 2 changed files with 99 additions and 16 deletions.
56 changes: 40 additions & 16 deletions decode.go
Original file line number Diff line number Diff line change
Expand Up @@ -197,20 +197,26 @@ func (d *Decoder) readLine(v reflect.Value) (err error, ok bool) {
}

func rawValueFromLine(value rawValue, startPos, endPos int, format format) rawValue {
var trimFunc func(string) string
var trimFunc func(in string) (out string, leftRemoved int, rightRemoved int)

switch format.alignment {
case left:
trimFunc = func(s string) string {
return strings.TrimRight(s, string(format.padChar))
trimFunc = func(s string) (out string, leftRemoved int, rightRemoved int) {
out = strings.TrimRight(s, string(format.padChar))
return out, 0, len(s) - len(out)
}
case right:
trimFunc = func(s string) string {
return strings.TrimLeft(s, string(format.padChar))
trimFunc = func(s string) (out string, leftRemoved int, rightRemoved int) {
out = strings.TrimLeft(s, string(format.padChar))
return out, len(s) - len(out), 0
}
default:
trimFunc = func(s string) string {
return strings.Trim(s, string(format.padChar))
trimFunc = func(s string) (out string, leftRemoved int, rightRemoved int) {
leftTrimmed := strings.TrimLeft(s, string(format.padChar))
leftRemoved = len(s) - len(leftTrimmed)
rightTrimmed := strings.TrimRight(leftTrimmed, string(format.padChar))
rightRemoved = len(leftTrimmed) - len(rightTrimmed)
return rightTrimmed, leftRemoved, rightRemoved
}
}

Expand All @@ -228,17 +234,34 @@ func rawValueFromLine(value rawValue, startPos, endPos int, format format) rawVa
lineData = value.data[relevantIndices[0]:value.codepointIndices[endPos]]
}

// We trimmed data from the front of the string.
// We need to adjust the codepoint indices to reflect this, as they have shifted.
removedFromFront := relevantIndices[0]
newIndices := make([]int, 0, len(relevantIndices))
for _, idx := range relevantIndices {
newIndices = append(newIndices, idx-removedFromFront)
newIndices := relevantIndices
if relevantIndices[0] > 0 {
// We trimmed data from the front of the string.
// We need to adjust the codepoint indices to reflect this, as they have shifted.
removedFromFront := relevantIndices[0]
newIndices = make([]int, 0, len(relevantIndices))
for _, idx := range relevantIndices {
newIndices = append(newIndices, idx-removedFromFront)
}
}

// Trim the new line data.
newLineData, leftRemovedBytes, rightRemovedBytes := trimFunc(lineData)
trimmedIndices := newIndices
if leftRemovedBytes > 0 || rightRemovedBytes > 0 {
// We must trim our codepoint indices list in order to match
// the newly trimmed line data string.
trimmedIndices = []int{}
for _, idx := range newIndices {
if idx >= leftRemovedBytes && idx < len(lineData)-rightRemovedBytes {
trimmedIndices = append(trimmedIndices, idx-leftRemovedBytes)
}
}
}

return rawValue{
data: trimFunc(lineData),
codepointIndices: newIndices,
data: newLineData,
codepointIndices: trimmedIndices,
}
} else {
if len(value.data) == 0 || startPos > len(value.data) {
Expand All @@ -247,8 +270,9 @@ func rawValueFromLine(value rawValue, startPos, endPos int, format format) rawVa
if endPos > len(value.data) {
endPos = len(value.data)
}
newLineData, _, _ := trimFunc(value.data[startPos-1 : endPos])
return rawValue{
data: trimFunc(value.data[startPos-1 : endPos]),
data: newLineData,
}
}
}
Expand Down
59 changes: 59 additions & 0 deletions decode_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -443,6 +443,65 @@ func TestDecodeSetUseCodepointIndices_Nested(t *testing.T) {
}
}

func TestDecodeSetUseCodepointIndices_PaddingTrimmed(t *testing.T) {
type Nested struct {
First int64 `fixed:"1,2,right,0"`
Second string `fixed:"3,4"`
Third string `fixed:"5,6"`
Fourth string `fixed:"7,8"`
}
type Test struct {
First Nested `fixed:"1,8"`
Second string `fixed:"9,10"`
}

for _, tt := range []struct {
name string
raw []byte
expected Test
}{
{
name: "All ASCII characters",
raw: []byte("00 11"),
expected: Test{
First: Nested{
First: 0,
Second: "",
Third: "",
Fourth: "",
},
Second: "11",
},
},
{
name: "Multi-byte characters",
raw: []byte("00 ☃☃"),
expected: Test{
First: Nested{
First: 0,
Second: "",
Third: "",
Fourth: "",
},
Second: "☃☃",
},
},
} {
t.Run(tt.name, func(t *testing.T) {
d := NewDecoder(bytes.NewReader(tt.raw))
d.SetUseCodepointIndices(true)
var s Test
err := d.Decode(&s)
if err != nil {
t.Errorf("Unexpected err: %v", err)
}
if !reflect.DeepEqual(tt.expected, s) {
t.Errorf("Decode(%v) want %v, have %v", tt.raw, tt.expected, s)
}
})
}
}

// Verify the behavior of Decoder.Decode at the end of a file. See
// https://github.com/ianlopshire/go-fixedwidth/issues/6 for more details.
func TestDecode_EOF(t *testing.T) {
Expand Down

0 comments on commit 4fb0249

Please sign in to comment.