From 179cf086e7e06245208704de4b2548a2db1b1eaa Mon Sep 17 00:00:00 2001 From: Rui Ueyama Date: Sun, 17 Sep 2023 13:18:24 +0900 Subject: [PATCH] Relax a GOT load into a PC-relative address materialization https://github.com/riscv-non-isa/riscv-elf-psabi-doc/pull/397 --- elf/arch-riscv.cc | 71 ++++++++++++++++++++++++++++++++++- test/elf/riscv64_relax-got.sh | 53 ++++++++++++++++++++++++++ 2 files changed, 122 insertions(+), 2 deletions(-) create mode 100755 test/elf/riscv64_relax-got.sh diff --git a/elf/arch-riscv.cc b/elf/arch-riscv.cc index c6fb8b36b1..552e941baa 100644 --- a/elf/arch-riscv.cc +++ b/elf/arch-riscv.cc @@ -362,9 +362,55 @@ void InputSection::apply_reloc_alloc(Context &ctx, u8 *base) { } break; } - case R_RISCV_GOT_HI20: - write_utype(loc, G + GOT + A - P); + case R_RISCV_GOT_HI20: { + // This relocation usually refers to an AUIPC + LD instruction + // pair to load a symbol value from the GOT. If the symbol value + // is actually a link-time constant, we can materialize the value + // directly into a register to eliminate a memory load. + i64 rd = get_rd(rel.r_offset); + + switch (removed_bytes) { + case 6: + // c.li , val + *(ul16 *)loc = 0b010'0'00000'00000'01 | (rd << 7); + write_citype(loc, sym.get_addr(ctx)); + i += 3; + break; + case 4: + // addi , zero, val + *(ul32 *)loc = 0b0010011 | (rd << 7); + write_itype(loc, sym.get_addr(ctx)); + i += 3; + break; + case 0: + if (ctx.arg.relax && + sym.is_pcrel_linktime_const(ctx) && + i + 3 < rels.size() && + rels[i + 1].r_type == R_RISCV_RELAX && + rels[i + 2].r_type == R_RISCV_PCREL_LO12_I && + rels[i + 2].r_offset == rels[i].r_offset + 4 && + file.symbols[rels[i + 1].r_sym]->value == r_offset && + rels[i + 3].r_type == R_RISCV_RELAX) { + i64 val = S + A - P; + if ((i32)val == val) { + // auipc , %hi20(val) + write_utype(loc, val); + + // addi , , %lo12(val) + *(ul32 *)(loc + 4) = 0b0010011 | (rd << 15) | (rd << 7); + write_itype(loc + 4, val); + i += 3; + break; + } + } + + write_utype(loc, G + GOT + A - P); + break; + default: + unreachable(); + } break; + } case R_RISCV_TLS_GOT_HI20: write_utype(loc, sym.get_gottp_addr(ctx) + A - P); break; @@ -932,6 +978,27 @@ static void shrink_section(Context &ctx, InputSection &isec, bool use_rvc) } break; } + case R_RISCV_GOT_HI20: { + if (sym.is_absolute() && + i + 3 < rels.size() && + rels[i + 1].r_type == R_RISCV_RELAX && + rels[i + 2].r_type == R_RISCV_PCREL_LO12_I && + rels[i + 2].r_offset == rels[i].r_offset + 4 && + isec.file.symbols[rels[i + 2].r_sym]->value == rels[i].r_offset && + rels[i + 3].r_type == R_RISCV_RELAX) { + u64 val = sym.get_addr(ctx) + r.r_addend; + i64 rd = get_rd(r.r_offset); + + if (use_rvc && rd != 0 && sign_extend(val, 5) == val) { + // Replace AUIPC + LD with C.LI. + delta += 6; + } else if (sign_extend(val, 11) == val) { + // Replace AUIPC + LD with ADDI. + delta += 4; + } + } + break; + } case R_RISCV_HI20: { u64 val = sym.get_addr(ctx) + r.r_addend; i64 rd = get_rd(r.r_offset); diff --git a/test/elf/riscv64_relax-got.sh b/test/elf/riscv64_relax-got.sh new file mode 100755 index 0000000000..257d4b5646 --- /dev/null +++ b/test/elf/riscv64_relax-got.sh @@ -0,0 +1,53 @@ +#!/bin/bash +. $(dirname $0)/common.inc + +[[ $MACHINE = riscv* ]] || skip + +cat < + +int get_sym1(); +int get_sym2(); +int get_sym3(); +int get_sym4(); +int get_sym5(); + +int main() { + printf("%x %x %x %x %x\n", + get_sym1(), get_sym2(), get_sym3(), get_sym4(), get_sym5()); +} +EOF + +$CC -B. -o $t/exe1 $t/a.o $t/b.o $t/c.o -Wl,--no-relax +$QEMU $t/exe1 | grep -Eq '^0 ba beef 11beef deadbeef$' + +$CC -B. -o $t/exe2 $t/a.o $t/b.o $t/c.o +$QEMU $t/exe2 | grep -Eq '^0 ba beef 11beef deadbeef$'