Skip to content

Commit

Permalink
pulley: Implement a new br_table instruction
Browse files Browse the repository at this point in the history
This is intended to match WebAssembly's `br_table` and Cranelift's
version as well. This is implemented as a new `br_table32` opcode where
a 32-bit number of branch targets are encoded after `br_table32` all as
a `PcRelOffset`, a 32-bit offset. This helps bake in a more "macro
opcode" into the interpreter rather than a handful of more primitive
opcodes that would achieve the same result with loads/indirect
jumps/comparisons/etc.
  • Loading branch information
alexcrichton committed Nov 22, 2024
1 parent 6bc7c44 commit 708fa5d
Show file tree
Hide file tree
Showing 14 changed files with 369 additions and 44 deletions.
11 changes: 11 additions & 0 deletions cranelift/codegen/src/isa/pulley_shared/inst.isle
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,13 @@
;; A pseudo-instruction to update unwind info.
(Unwind (inst UnwindInst))

;; Implementation of `br_table`, uses `idx` to jump to one of `targets` or
;; jumps to `default` is it's out-of-bounds.
(BrTable
(idx XReg)
(default MachLabel)
(targets BoxVecMachLabel))

;;;; Actual Instructions ;;;;

;; Raise a trap.
Expand Down Expand Up @@ -547,6 +554,10 @@
(_ Unit (emit (MInst.BitcastIntFromFloat64 dst src))))
dst))

(decl gen_br_table (XReg MachLabel BoxVecMachLabel) Unit)
(rule (gen_br_table idx default labels)
(emit (MInst.BrTable idx default labels)))

;;;; Helpers for Emitting Calls ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

(decl gen_call (SigRef ExternalName RelocDistance ValueSlice) InstOutput)
Expand Down
71 changes: 56 additions & 15 deletions cranelift/codegen/src/isa/pulley_shared/inst/emit.rs
Original file line number Diff line number Diff line change
Expand Up @@ -104,8 +104,8 @@ where
// (with an `EmitIsland`). We check this in debug builds. This is `mut`
// to allow disabling the check for `JTSequence`, which is always
// emitted following an `EmitIsland`.
let start = sink.cur_offset();
pulley_emit(self, sink, emit_info, state, start);
let mut start = sink.cur_offset();
pulley_emit(self, sink, emit_info, state, &mut start);

let end = sink.cur_offset();
assert!(
Expand All @@ -124,9 +124,9 @@ where
fn pulley_emit<P>(
inst: &Inst,
sink: &mut MachBuffer<InstAndKind<P>>,
_emit_info: &EmitInfo,
emit_info: &EmitInfo,
state: &mut EmitState<P>,
start_offset: u32,
start_offset: &mut u32,
) where
P: PulleyTargetKind,
{
Expand Down Expand Up @@ -218,8 +218,8 @@ fn pulley_emit<P>(
Inst::IndirectCall { .. } => todo!(),

Inst::Jump { label } => {
sink.use_label_at_offset(start_offset + 1, *label, LabelUse::Jump(1));
sink.add_uncond_branch(start_offset, start_offset + 5, *label);
sink.use_label_at_offset(*start_offset + 1, *label, LabelUse::Jump(1));
sink.add_uncond_branch(*start_offset, *start_offset + 5, *label);
enc::jump(sink, 0x00000000);
}

Expand All @@ -229,18 +229,18 @@ fn pulley_emit<P>(
not_taken,
} => {
// If taken.
let taken_start = start_offset + 2;
let taken_start = *start_offset + 2;
let taken_end = taken_start + 4;

sink.use_label_at_offset(taken_start, *taken, LabelUse::Jump(2));
let mut inverted = SmallVec::<[u8; 16]>::new();
enc::br_if_not(&mut inverted, c, 0x00000000);
debug_assert_eq!(
inverted.len(),
usize::try_from(taken_end - start_offset).unwrap()
usize::try_from(taken_end - *start_offset).unwrap()
);

sink.add_cond_branch(start_offset, taken_end, *taken, &inverted);
sink.add_cond_branch(*start_offset, taken_end, *taken, &inverted);
enc::br_if(sink, c, 0x00000000);
debug_assert_eq!(sink.cur_offset(), taken_end);

Expand All @@ -261,7 +261,7 @@ fn pulley_emit<P>(
} => {
br_if_cond_helper(
sink,
start_offset,
*start_offset,
*src1,
*src2,
taken,
Expand All @@ -279,7 +279,7 @@ fn pulley_emit<P>(
} => {
br_if_cond_helper(
sink,
start_offset,
*start_offset,
*src1,
*src2,
taken,
Expand All @@ -297,7 +297,7 @@ fn pulley_emit<P>(
} => {
br_if_cond_helper(
sink,
start_offset,
*start_offset,
*src1,
*src2,
taken,
Expand All @@ -315,7 +315,7 @@ fn pulley_emit<P>(
} => {
br_if_cond_helper(
sink,
start_offset,
*start_offset,
*src1,
*src2,
taken,
Expand All @@ -333,7 +333,7 @@ fn pulley_emit<P>(
} => {
br_if_cond_helper(
sink,
start_offset,
*start_offset,
*src1,
*src2,
taken,
Expand All @@ -351,7 +351,7 @@ fn pulley_emit<P>(
} => {
br_if_cond_helper(
sink,
start_offset,
*start_offset,
*src1,
*src2,
taken,
Expand Down Expand Up @@ -484,6 +484,47 @@ fn pulley_emit<P>(
Inst::BitcastIntFromFloat64 { dst, src } => enc::bitcast_int_from_float_64(sink, dst, src),
Inst::BitcastFloatFromInt32 { dst, src } => enc::bitcast_float_from_int_32(sink, dst, src),
Inst::BitcastFloatFromInt64 { dst, src } => enc::bitcast_float_from_int_64(sink, dst, src),

Inst::BrTable {
idx,
default,
targets,
} => {
// Encode the `br_table32` instruction directly which expects the
// next `amt` 4-byte integers to all be relative offsets. Each
// offset is the pc-relative offset of the branch destination.
//
// Pulley clamps the branch targets to the `amt` specified so the
// final branch target is the default jump target.
//
// Note that this instruction may have many branch targets so it
// manually checks to see if an island is needed. If so we emit a
// jump around the island before the `br_table32` itself gets
// emitted.
let amt = u32::try_from(targets.len() + 1).expect("too many branch targets");
let br_table_size = amt * 4 + 6;
if sink.island_needed(br_table_size) {
let label = sink.get_label();
<InstAndKind<P>>::from(Inst::Jump { label }).emit(sink, emit_info, state);
sink.emit_island(br_table_size, &mut state.ctrl_plane);
sink.bind_label(label, &mut state.ctrl_plane);
}
enc::br_table32(sink, *idx, amt);
for target in targets.iter() {
let offset = sink.cur_offset();
sink.use_label_at_offset(offset, *target, LabelUse::Jump(0));
sink.put4(0);
}
let offset = sink.cur_offset();
sink.use_label_at_offset(offset, *default, LabelUse::Jump(0));
sink.put4(0);

// We manually handled `emit_island` above when dealing with
// `island_needed` so update the starting offset to the current
// offset so this instruction doesn't accidentally trigger
// the assertion that we're always under worst-case-size.
*start_offset = sink.cur_offset();
}
}
}

Expand Down
18 changes: 16 additions & 2 deletions cranelift/codegen/src/isa/pulley_shared/inst/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -244,6 +244,10 @@ fn pulley_get_operands(inst: &mut Inst, collector: &mut impl OperandVisitor) {
collector.reg_use(src);
collector.reg_def(dst);
}

Inst::BrTable { idx, .. } => {
collector.reg_use(idx);
}
}
}

Expand Down Expand Up @@ -370,6 +374,7 @@ where
| Inst::BrIfXslteq32 { .. }
| Inst::BrIfXult32 { .. }
| Inst::BrIfXulteq32 { .. } => MachTerminator::Cond,
Inst::BrTable { .. } => MachTerminator::Indirect,
_ => MachTerminator::None,
}
}
Expand Down Expand Up @@ -437,8 +442,8 @@ where
}
}

fn gen_jump(_target: MachLabel) -> Self {
todo!()
fn gen_jump(target: MachLabel) -> Self {
Inst::Jump { label: target }.into()
}

fn worst_case_size() -> CodeOffset {
Expand Down Expand Up @@ -839,6 +844,15 @@ impl Inst {
let src = format_reg(**src);
format!("{dst} = bitcast_float_from_int64 {src}")
}

Inst::BrTable {
idx,
default,
targets,
} => {
let idx = format_reg(**idx);
format!("br_table {idx} {default:?} {targets:?}")
}
}
}
}
Expand Down
6 changes: 2 additions & 4 deletions cranelift/codegen/src/isa/pulley_shared/lower.isle
Original file line number Diff line number Diff line change
Expand Up @@ -49,10 +49,8 @@
(lower_brif_of_icmp32 (IntCC.UnsignedLessThanOrEqual) b a then else))

;; Branch tables.
(decl lower_br_table (Reg MachLabelSlice) Unit)
(extern constructor lower_br_table lower_br_table)
(rule (lower_branch (br_table index _) targets)
(lower_br_table index targets))
(rule (lower_branch (br_table index _) (jump_table_targets default targets))
(gen_br_table index default targets))

;;;; Rules for `trap` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

Expand Down
4 changes: 0 additions & 4 deletions cranelift/codegen/src/isa/pulley_shared/lower/isle.rs
Original file line number Diff line number Diff line change
Expand Up @@ -51,10 +51,6 @@ where
crate::isle_lower_prelude_methods!(InstAndKind<P>);
crate::isle_prelude_caller_methods!(PulleyABICallSite<P>);

fn lower_br_table(&mut self, _index: Reg, _targets: &[MachLabel]) -> Unit {
todo!()
}

fn vreg_new(&mut self, r: Reg) -> VReg {
VReg::new(r).unwrap()
}
Expand Down
69 changes: 69 additions & 0 deletions cranelift/filetests/filetests/isa/pulley32/br_table.clif
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
test compile precise-output
target pulley32

function %br_table(i32) -> i32 {
block0(v0: i32):
br_table v0, block4, [block1, block2, block2, block3]

block1:
v1 = iconst.i32 1
jump block5(v1)

block2:
v2 = iconst.i32 2
jump block5(v2)

block3:
v3 = iconst.i32 3
jump block5(v3)

block4:
v4 = iconst.i32 4
jump block5(v4)

block5(v5: i32):
v6 = iadd.i32 v0, v5
return v6
}

; VCode:
; block0:
; br_table x0 MachLabel(6) [MachLabel(5), MachLabel(1), MachLabel(2), MachLabel(3)]
; block1:
; jump label4
; block2:
; jump label4
; block3:
; x5 = xconst8 3
; jump label7
; block4:
; x5 = xconst8 2
; jump label7
; block5:
; x5 = xconst8 1
; jump label7
; block6:
; x5 = xconst8 4
; jump label7
; block7:
; x0 = xadd32 x0, x5
; ret
;
; Disassembled:
; br_table32 x0, 5
; 0x29 // target = 0x2f
; 0x1d // target = 0x27
; 0x19 // target = 0x27
; 0xd // target = 0x1f
; 0x21 // target = 0x37
; jump 0xd // target = 0x27
; xconst8 x5, 3
; jump 0x18 // target = 0x3a
; xconst8 x5, 2
; jump 0x10 // target = 0x3a
; xconst8 x5, 1
; jump 0x8 // target = 0x3a
; xconst8 x5, 4
; xadd32 x0, x0, x5
; ret

69 changes: 69 additions & 0 deletions cranelift/filetests/filetests/isa/pulley64/br_table.clif
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
test compile precise-output
target pulley64

function %br_table(i32) -> i32 {
block0(v0: i32):
br_table v0, block4, [block1, block2, block2, block3]

block1:
v1 = iconst.i32 1
jump block5(v1)

block2:
v2 = iconst.i32 2
jump block5(v2)

block3:
v3 = iconst.i32 3
jump block5(v3)

block4:
v4 = iconst.i32 4
jump block5(v4)

block5(v5: i32):
v6 = iadd.i32 v0, v5
return v6
}

; VCode:
; block0:
; br_table x0 MachLabel(6) [MachLabel(5), MachLabel(1), MachLabel(2), MachLabel(3)]
; block1:
; jump label4
; block2:
; jump label4
; block3:
; x5 = xconst8 3
; jump label7
; block4:
; x5 = xconst8 2
; jump label7
; block5:
; x5 = xconst8 1
; jump label7
; block6:
; x5 = xconst8 4
; jump label7
; block7:
; x0 = xadd32 x0, x5
; ret
;
; Disassembled:
; br_table32 x0, 5
; 0x29 // target = 0x2f
; 0x1d // target = 0x27
; 0x19 // target = 0x27
; 0xd // target = 0x1f
; 0x21 // target = 0x37
; jump 0xd // target = 0x27
; xconst8 x5, 3
; jump 0x18 // target = 0x3a
; xconst8 x5, 2
; jump 0x10 // target = 0x3a
; xconst8 x5, 1
; jump 0x8 // target = 0x3a
; xconst8 x5, 4
; xadd32 x0, x0, x5
; ret

Loading

0 comments on commit 708fa5d

Please sign in to comment.