Skip to content

Commit 09d76e5

Browse files
limeidangopherbot
authored andcommitted
cmd/compile: set unalignedOK to make memcombine work properly on loong64
goos: linux goarch: loong64 pkg: unicode/utf8 cpu: Loongson-3A6000-HV @ 2500.00MHz │ old │ new │ │ sec/op │ sec/op vs base │ ValidTenASCIIChars 7.604n ± 0% 6.805n ± 0% -10.51% (p=0.000 n=10) Valid100KASCIIChars 37.41µ ± 0% 16.58µ ± 0% -55.67% (p=0.000 n=10) ValidTenJapaneseChars 60.84n ± 0% 58.62n ± 0% -3.64% (p=0.000 n=10) ValidLongMostlyASCII 113.5µ ± 0% 113.5µ ± 0% ~ (p=0.303 n=10) ValidLongJapanese 204.6µ ± 0% 206.8µ ± 0% +1.07% (p=0.000 n=10) ValidStringTenASCIIChars 7.604n ± 0% 6.803n ± 0% -10.53% (p=0.000 n=10) ValidString100KASCIIChars 38.05µ ± 0% 17.14µ ± 0% -54.97% (p=0.000 n=10) ValidStringTenJapaneseChars 60.58n ± 0% 59.48n ± 0% -1.82% (p=0.000 n=10) ValidStringLongMostlyASCII 113.5µ ± 0% 113.4µ ± 0% -0.10% (p=0.000 n=10) ValidStringLongJapanese 205.9µ ± 0% 207.3µ ± 0% +0.67% (p=0.000 n=10) geomean 3.324µ 2.756µ -17.08% Change-Id: Id43b6e2e41907bd4b92f421dacde31f048db47d6 Reviewed-on: https://go-review.googlesource.com/c/go/+/662495 LUCI-TryBot-Result: Go LUCI <[email protected]> Reviewed-by: Dmitri Shuralyov <[email protected]> Reviewed-by: Keith Randall <[email protected]> Auto-Submit: Keith Randall <[email protected]> Reviewed-by: abner chenc <[email protected]> Reviewed-by: Keith Randall <[email protected]>
1 parent ecc06f0 commit 09d76e5

File tree

3 files changed

+8
-7
lines changed

3 files changed

+8
-7
lines changed

src/cmd/compile/internal/ssa/config.go

+1
Original file line numberDiff line numberDiff line change
@@ -283,6 +283,7 @@ func NewConfig(arch string, types Types, ctxt *obj.Link, optimize, softfloat boo
283283
c.FPReg = framepointerRegLOONG64
284284
c.LinkReg = linkRegLOONG64
285285
c.hasGReg = true
286+
c.unalignedOK = true
286287
case "s390x":
287288
c.PtrSize = 8
288289
c.RegSize = 8

src/cmd/internal/sys/arch.go

+1-1
Original file line numberDiff line numberDiff line change
@@ -144,7 +144,7 @@ var ArchLoong64 = &Arch{
144144
RegSize: 8,
145145
MinLC: 4,
146146
Alignment: 8, // Unaligned accesses are not guaranteed to be fast
147-
CanMergeLoads: false,
147+
CanMergeLoads: true,
148148
HasLR: true,
149149
FixedFrameSize: 8, // LR
150150
}

test/codegen/memcombine.go

+6-6
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ func load_le64(b []byte) uint64 {
1919
// amd64:`MOVQ\s\(.*\),`,-`MOV[BWL]\t[^$]`,-`OR`
2020
// s390x:`MOVDBR\s\(.*\),`
2121
// arm64:`MOVD\s\(R[0-9]+\),`,-`MOV[BHW]`
22-
// loong64:`MOVBU\s\(R[0-9]+\),`
22+
// loong64:`MOVV\s\(R[0-9]+\),`
2323
// ppc64le:`MOVD\s`,-`MOV[BHW]Z`
2424
// ppc64:`MOVDBR\s`,-`MOV[BHW]Z`
2525
return binary.LittleEndian.Uint64(b)
@@ -29,7 +29,7 @@ func load_le64_idx(b []byte, idx int) uint64 {
2929
// amd64:`MOVQ\s\(.*\)\(.*\*1\),`,-`MOV[BWL]\t[^$]`,-`OR`
3030
// s390x:`MOVDBR\s\(.*\)\(.*\*1\),`
3131
// arm64:`MOVD\s\(R[0-9]+\)\(R[0-9]+\),`,-`MOV[BHW]`
32-
// loong64:`MOVBU\s\(R[0-9]+\)\(R[0-9]+\),`
32+
// loong64:`MOVV\s\(R[0-9]+\)\(R[0-9]+\),`
3333
// ppc64le:`MOVD\s`,-`MOV[BHW]Z\s`
3434
// ppc64:`MOVDBR\s`,-`MOV[BHW]Z\s`
3535
return binary.LittleEndian.Uint64(b[idx:])
@@ -40,7 +40,7 @@ func load_le32(b []byte) uint32 {
4040
// 386:`MOVL\s\(.*\),`,-`MOV[BW]`,-`OR`
4141
// s390x:`MOVWBR\s\(.*\),`
4242
// arm64:`MOVWU\s\(R[0-9]+\),`,-`MOV[BH]`
43-
// loong64:`MOVBU\s\(R[0-9]+\),`
43+
// loong64:`MOVWU\s\(R[0-9]+\),`
4444
// ppc64le:`MOVWZ\s`,-`MOV[BH]Z\s`
4545
// ppc64:`MOVWBR\s`,-`MOV[BH]Z\s`
4646
return binary.LittleEndian.Uint32(b)
@@ -51,7 +51,7 @@ func load_le32_idx(b []byte, idx int) uint32 {
5151
// 386:`MOVL\s\(.*\)\(.*\*1\),`,-`MOV[BW]`,-`OR`
5252
// s390x:`MOVWBR\s\(.*\)\(.*\*1\),`
5353
// arm64:`MOVWU\s\(R[0-9]+\)\(R[0-9]+\),`,-`MOV[BH]`
54-
// loong64:`MOVBU\s\(R[0-9]+\)\(R[0-9]+\),`
54+
// loong64:`MOVWU\s\(R[0-9]+\)\(R[0-9]+\),`
5555
// ppc64le:`MOVWZ\s`,-`MOV[BH]Z\s`
5656
// ppc64:`MOVWBR\s`,-`MOV[BH]Z\s'
5757
return binary.LittleEndian.Uint32(b[idx:])
@@ -61,7 +61,7 @@ func load_le16(b []byte) uint16 {
6161
// amd64:`MOVWLZX\s\(.*\),`,-`MOVB`,-`OR`
6262
// ppc64le:`MOVHZ\s`,-`MOVBZ`
6363
// arm64:`MOVHU\s\(R[0-9]+\),`,-`MOVB`
64-
// loong64:`MOVBU\s\(R[0-9]+\),`
64+
// loong64:`MOVHU\s\(R[0-9]+\),`
6565
// s390x:`MOVHBR\s\(.*\),`
6666
// ppc64:`MOVHBR\s`,-`MOVBZ`
6767
return binary.LittleEndian.Uint16(b)
@@ -72,7 +72,7 @@ func load_le16_idx(b []byte, idx int) uint16 {
7272
// ppc64le:`MOVHZ\s`,-`MOVBZ`
7373
// ppc64:`MOVHBR\s`,-`MOVBZ`
7474
// arm64:`MOVHU\s\(R[0-9]+\)\(R[0-9]+\),`,-`MOVB`
75-
// loong64:`MOVBU\s\(R[0-9]+\)\(R[0-9]+\),`
75+
// loong64:`MOVHU\s\(R[0-9]+\)\(R[0-9]+\),`
7676
// s390x:`MOVHBR\s\(.*\)\(.*\*1\),`
7777
return binary.LittleEndian.Uint16(b[idx:])
7878
}

0 commit comments

Comments
 (0)