Skip to content

Commit

Permalink
flate: Cleanup & reduce casts (#1050)
Browse files Browse the repository at this point in the history
Small improvement by reducing casts for matchlen.
  • Loading branch information
klauspost authored Jan 30, 2025
1 parent e0f89a9 commit 0bf3ecb
Show file tree
Hide file tree
Showing 9 changed files with 54 additions and 35 deletions.
8 changes: 4 additions & 4 deletions flate/fast_encoder.go
Original file line number Diff line number Diff line change
Expand Up @@ -136,7 +136,7 @@ func hashLen(u uint64, length, mls uint8) uint32 {
// matchlen will return the match length between offsets and t in src.
// The maximum length returned is maxMatchLength - 4.
// It is assumed that s > t, that t >=0 and s < len(src).
func (e *fastGen) matchlen(s, t int32, src []byte) int32 {
func (e *fastGen) matchlen(s, t int, src []byte) int32 {
if debugDeflate {
if t >= s {
panic(fmt.Sprint("t >=s:", t, s))
Expand All @@ -151,7 +151,7 @@ func (e *fastGen) matchlen(s, t int32, src []byte) int32 {
panic(fmt.Sprint(s, "-", t, "(", s-t, ") > maxMatchLength (", maxMatchOffset, ")"))
}
}
s1 := min(s+maxMatchLength-4, int32(len(src)))
s1 := min(s+maxMatchLength-4, len(src))
left := s1 - s
n := int32(0)
for left >= 8 {
Expand All @@ -178,7 +178,7 @@ func (e *fastGen) matchlen(s, t int32, src []byte) int32 {

// matchlenLong will return the match length between offsets and t in src.
// It is assumed that s > t, that t >=0 and s < len(src).
func (e *fastGen) matchlenLong(s, t int32, src []byte) int32 {
func (e *fastGen) matchlenLong(s, t int, src []byte) int32 {
if debugDeflate {
if t >= s {
panic(fmt.Sprint("t >=s:", t, s))
Expand All @@ -194,7 +194,7 @@ func (e *fastGen) matchlenLong(s, t int32, src []byte) int32 {
}
}
// Extend the match to be as long as possible.
left := int32(len(src)) - s
left := len(src) - s
n := int32(0)
for left >= 8 {
diff := le.Load64(src, s) ^ le.Load64(src, t)
Expand Down
21 changes: 11 additions & 10 deletions flate/level1.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@ package flate

import (
"fmt"

"github.com/klauspost/compress/internal/le"
)

// fastGen maintains the table for matches,
Expand Down Expand Up @@ -75,6 +77,7 @@ func (e *fastEncL1) Encode(dst *tokens, src []byte) {

nextS := s
var candidate tableEntry
var t int32
for {
nextHash := hashLen(cv, tableBits, hashBytes)
candidate = e.table[nextHash]
Expand All @@ -86,9 +89,8 @@ func (e *fastEncL1) Encode(dst *tokens, src []byte) {
now := load6432(src, nextS)
e.table[nextHash] = tableEntry{offset: s + e.cur}
nextHash = hashLen(now, tableBits, hashBytes)

offset := s - (candidate.offset - e.cur)
if offset < maxMatchOffset && uint32(cv) == load3232(src, candidate.offset-e.cur) {
t = candidate.offset - e.cur
if s-t < maxMatchOffset && uint32(cv) == load3232(src, t) {
e.table[nextHash] = tableEntry{offset: nextS + e.cur}
break
}
Expand All @@ -101,8 +103,8 @@ func (e *fastEncL1) Encode(dst *tokens, src []byte) {
now >>= 8
e.table[nextHash] = tableEntry{offset: s + e.cur}

offset = s - (candidate.offset - e.cur)
if offset < maxMatchOffset && uint32(cv) == load3232(src, candidate.offset-e.cur) {
t = candidate.offset - e.cur
if s-t < maxMatchOffset && uint32(cv) == load3232(src, t) {
e.table[nextHash] = tableEntry{offset: nextS + e.cur}
break
}
Expand All @@ -118,11 +120,10 @@ func (e *fastEncL1) Encode(dst *tokens, src []byte) {
// literal bytes prior to s.

// Extend the 4-byte match as long as possible.
t := candidate.offset - e.cur
l := e.matchlenLong(s+4, t+4, src) + 4
l := e.matchlenLong(int(s+4), int(t+4), src) + 4

// Extend backwards
for t > 0 && s > nextEmit && src[t-1] == src[s-1] {
for t > 0 && s > nextEmit && le.Load8(src, t-1) == le.Load8(src, s-1) {
s--
t--
l++
Expand Down Expand Up @@ -194,8 +195,8 @@ func (e *fastEncL1) Encode(dst *tokens, src []byte) {
candidate = e.table[currHash]
e.table[currHash] = tableEntry{offset: o + 2}

offset := s - (candidate.offset - e.cur)
if offset > maxMatchOffset || uint32(x) != load3232(src, candidate.offset-e.cur) {
t = candidate.offset - e.cur
if s-t > maxMatchOffset || uint32(x) != load3232(src, t) {
cv = x >> 8
s++
break
Expand Down
2 changes: 1 addition & 1 deletion flate/level2.go
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,7 @@ func (e *fastEncL2) Encode(dst *tokens, src []byte) {

// Extend the 4-byte match as long as possible.
t := candidate.offset - e.cur
l := e.matchlenLong(s+4, t+4, src) + 4
l := e.matchlenLong(int(s+4), int(t+4), src) + 4

// Extend backwards
for t > 0 && s > nextEmit && src[t-1] == src[s-1] {
Expand Down
2 changes: 1 addition & 1 deletion flate/level3.go
Original file line number Diff line number Diff line change
Expand Up @@ -135,7 +135,7 @@ func (e *fastEncL3) Encode(dst *tokens, src []byte) {
// Extend the 4-byte match as long as possible.
//
t := candidate.offset - e.cur
l := e.matchlenLong(s+4, t+4, src) + 4
l := e.matchlenLong(int(s+4), int(t+4), src) + 4

// Extend backwards
for t > 0 && s > nextEmit && src[t-1] == src[s-1] {
Expand Down
2 changes: 1 addition & 1 deletion flate/level4.go
Original file line number Diff line number Diff line change
Expand Up @@ -127,7 +127,7 @@ func (e *fastEncL4) Encode(dst *tokens, src []byte) {
// them as literal bytes.

// Extend the 4-byte match as long as possible.
l := e.matchlenLong(s+4, t+4, src) + 4
l := e.matchlenLong(int(s+4), int(t+4), src) + 4

// Extend backwards
for t > 0 && s > nextEmit && src[t-1] == src[s-1] {
Expand Down
16 changes: 8 additions & 8 deletions flate/level5.go
Original file line number Diff line number Diff line change
Expand Up @@ -119,8 +119,8 @@ func (e *fastEncL5) Encode(dst *tokens, src []byte) {

t2 := lCandidate.Prev.offset - e.cur
if s-t2 < maxMatchOffset && uint32(cv) == load3232(src, t2) {
l = e.matchlen(s+4, t+4, src) + 4
ml1 := e.matchlen(s+4, t2+4, src) + 4
l = e.matchlen(int(s+4), int(t+4), src) + 4
ml1 := e.matchlen(int(s+4), int(t2+4), src) + 4
if ml1 > l {
t = t2
l = ml1
Expand All @@ -142,7 +142,7 @@ func (e *fastEncL5) Encode(dst *tokens, src []byte) {
t = sCandidate.offset - e.cur
if s-t < maxMatchOffset && uint32(cv) == load3232(src, t) {
// Found a 4 match...
l = e.matchlen(s+4, t+4, src) + 4
l = e.matchlen(int(s+4), int(t+4), src) + 4
lCandidate = e.bTable[nextHashL]
// Store the next match

Expand All @@ -154,7 +154,7 @@ func (e *fastEncL5) Encode(dst *tokens, src []byte) {
t2 := lCandidate.Cur.offset - e.cur
if nextS-t2 < maxMatchOffset {
if load3232(src, t2) == uint32(next) {
ml := e.matchlen(nextS+4, t2+4, src) + 4
ml := e.matchlen(int(nextS+4), int(t2+4), src) + 4
if ml > l {
t = t2
s = nextS
Expand All @@ -165,7 +165,7 @@ func (e *fastEncL5) Encode(dst *tokens, src []byte) {
// If the previous long is a candidate, use that...
t2 = lCandidate.Prev.offset - e.cur
if nextS-t2 < maxMatchOffset && load3232(src, t2) == uint32(next) {
ml := e.matchlen(nextS+4, t2+4, src) + 4
ml := e.matchlen(int(nextS+4), int(t2+4), src) + 4
if ml > l {
t = t2
s = nextS
Expand All @@ -185,9 +185,9 @@ func (e *fastEncL5) Encode(dst *tokens, src []byte) {

if l == 0 {
// Extend the 4-byte match as long as possible.
l = e.matchlenLong(s+4, t+4, src) + 4
l = e.matchlenLong(int(s+4), int(t+4), src) + 4
} else if l == maxMatchLength {
l += e.matchlenLong(s+l, t+l, src)
l += e.matchlenLong(int(s+l), int(t+l), src)
}

// Try to locate a better match by checking the end of best match...
Expand All @@ -203,7 +203,7 @@ func (e *fastEncL5) Encode(dst *tokens, src []byte) {
s2 := s + skipBeginning
off := s2 - t2
if t2 >= 0 && off < maxMatchOffset && off > 0 {
if l2 := e.matchlenLong(s2, t2, src); l2 > l {
if l2 := e.matchlenLong(int(s2), int(t2), src); l2 > l {
t = t2
l = l2
s = s2
Expand Down
20 changes: 10 additions & 10 deletions flate/level6.go
Original file line number Diff line number Diff line change
Expand Up @@ -124,8 +124,8 @@ func (e *fastEncL6) Encode(dst *tokens, src []byte) {
// Check the previous long candidate as well.
t2 := lCandidate.Prev.offset - e.cur
if s-t2 < maxMatchOffset && uint32(cv) == load3232(src, t2) {
l = e.matchlen(s+4, t+4, src) + 4
ml1 := e.matchlen(s+4, t2+4, src) + 4
l = e.matchlen(int(s+4), int(t+4), src) + 4
ml1 := e.matchlen(int(s+4), int(t2+4), src) + 4
if ml1 > l {
t = t2
l = ml1
Expand All @@ -148,7 +148,7 @@ func (e *fastEncL6) Encode(dst *tokens, src []byte) {
t = sCandidate.offset - e.cur
if s-t < maxMatchOffset && uint32(cv) == load3232(src, t) {
// Found a 4 match...
l = e.matchlen(s+4, t+4, src) + 4
l = e.matchlen(int(s+4), int(t+4), src) + 4

// Look up next long candidate (at nextS)
lCandidate = e.bTable[nextHashL]
Expand All @@ -162,7 +162,7 @@ func (e *fastEncL6) Encode(dst *tokens, src []byte) {
const repOff = 1
t2 := s - repeat + repOff
if load3232(src, t2) == uint32(cv>>(8*repOff)) {
ml := e.matchlen(s+4+repOff, t2+4, src) + 4
ml := e.matchlen(int(s+4+repOff), int(t2+4), src) + 4
if ml > l {
t = t2
l = ml
Expand All @@ -176,7 +176,7 @@ func (e *fastEncL6) Encode(dst *tokens, src []byte) {
t2 = lCandidate.Cur.offset - e.cur
if nextS-t2 < maxMatchOffset {
if load3232(src, t2) == uint32(next) {
ml := e.matchlen(nextS+4, t2+4, src) + 4
ml := e.matchlen(int(nextS+4), int(t2+4), src) + 4
if ml > l {
t = t2
s = nextS
Expand All @@ -187,7 +187,7 @@ func (e *fastEncL6) Encode(dst *tokens, src []byte) {
// If the previous long is a candidate, use that...
t2 = lCandidate.Prev.offset - e.cur
if nextS-t2 < maxMatchOffset && load3232(src, t2) == uint32(next) {
ml := e.matchlen(nextS+4, t2+4, src) + 4
ml := e.matchlen(int(nextS+4), int(t2+4), src) + 4
if ml > l {
t = t2
s = nextS
Expand All @@ -207,9 +207,9 @@ func (e *fastEncL6) Encode(dst *tokens, src []byte) {

// Extend the 4-byte match as long as possible.
if l == 0 {
l = e.matchlenLong(s+4, t+4, src) + 4
l = e.matchlenLong(int(s+4), int(t+4), src) + 4
} else if l == maxMatchLength {
l += e.matchlenLong(s+l, t+l, src)
l += e.matchlenLong(int(s+l), int(t+l), src)
}

// Try to locate a better match by checking the end-of-match...
Expand All @@ -227,7 +227,7 @@ func (e *fastEncL6) Encode(dst *tokens, src []byte) {
off := s2 - t2
if off < maxMatchOffset {
if off > 0 && t2 >= 0 {
if l2 := e.matchlenLong(s2, t2, src); l2 > l {
if l2 := e.matchlenLong(int(s2), int(t2), src); l2 > l {
t = t2
l = l2
s = s2
Expand All @@ -237,7 +237,7 @@ func (e *fastEncL6) Encode(dst *tokens, src []byte) {
t2 = eLong.Prev.offset - e.cur - l + skipBeginning
off := s2 - t2
if off > 0 && off < maxMatchOffset && t2 >= 0 {
if l2 := e.matchlenLong(s2, t2, src); l2 > l {
if l2 := e.matchlenLong(int(s2), int(t2), src); l2 > l {
t = t2
l = l2
s = s2
Expand Down
11 changes: 11 additions & 0 deletions internal/le/unsafe_disabled.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,26 +6,37 @@ import (
"encoding/binary"
)

// Load8 will load from b at index i.
func Load8[I Indexer](b []byte, i I) byte {
return b[i]
}

// Load16 will load from b at index i.
func Load16[I Indexer](b []byte, i I) uint16 {
return binary.LittleEndian.Uint16(b[i:])
}

// Load32 will load from b at index i.
func Load32[I Indexer](b []byte, i I) uint32 {
return binary.LittleEndian.Uint32(b[i:])
}

// Load64 will load from b at index i.
func Load64[I Indexer](b []byte, i I) uint64 {
return binary.LittleEndian.Uint64(b[i:])
}

// Store16 will store v at b.
func Store16(b []byte, v uint16) {
binary.LittleEndian.PutUint16(b, v)
}

// Store32 will store v at b.
func Store32(b []byte, v uint32) {
binary.LittleEndian.PutUint32(b, v)
}

// Store64 will store v at b.
func Store64(b []byte, v uint64) {
binary.LittleEndian.PutUint64(b, v)
}
7 changes: 7 additions & 0 deletions internal/le/unsafe_enabled.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,13 @@ import (
"unsafe"
)

// Load8 will load from b at index i.
func Load8[I Indexer](b []byte, i I) byte {
//return binary.LittleEndian.Uint16(b[i:])
//return *(*uint16)(unsafe.Pointer(&b[i]))
return *(*byte)(unsafe.Add(unsafe.Pointer(unsafe.SliceData(b)), i))
}

// Load16 will load from b at index i.
func Load16[I Indexer](b []byte, i I) uint16 {
//return binary.LittleEndian.Uint16(b[i:])
Expand Down

0 comments on commit 0bf3ecb

Please sign in to comment.