From 15ce5b7fff912d3dfa154e58da2dc1860b6b2e0c Mon Sep 17 00:00:00 2001 From: Hannes Karppila Date: Thu, 22 Aug 2024 01:41:04 +0200 Subject: [PATCH 01/15] Rewrite fuel_asm::impl_instructions! to a proc macro --- fuel-asm/Cargo.toml | 1 + fuel-asm/src/lib.rs | 51 +- fuel-asm/src/macros.rs | 1403 ---------------------------------------- fuel-asm/src/pack.rs | 112 ---- fuel-asm/src/unpack.rs | 118 ---- fuel-derive/Cargo.toml | 1 + fuel-derive/src/asm.rs | 794 +++++++++++++++++++++++ fuel-derive/src/lib.rs | 7 + 8 files changed, 823 insertions(+), 1664 deletions(-) delete mode 100644 fuel-asm/src/pack.rs delete mode 100644 fuel-asm/src/unpack.rs create mode 100644 fuel-derive/src/asm.rs diff --git a/fuel-asm/Cargo.toml b/fuel-asm/Cargo.toml index 5d5b7bd201..1568e1ef4c 100644 --- a/fuel-asm/Cargo.toml +++ b/fuel-asm/Cargo.toml @@ -14,6 +14,7 @@ description = "Atomic types of the FuelVM." arbitrary = { version = "1.1", features = ["derive"], optional = true } bitflags = { workspace = true } fuel-types = { workspace = true } +fuel-derive = { workspace = true } serde = { version = "1.0", default-features = false, features = ["derive"], optional = true } strum = { version = "0.24", default-features = false, features = ["derive"] } wasm-bindgen = { version = "0.2.88", optional = true } diff --git a/fuel-asm/src/lib.rs b/fuel-asm/src/lib.rs index 9c89930cc5..19d1d6bb17 100644 --- a/fuel-asm/src/lib.rs +++ b/fuel-asm/src/lib.rs @@ -18,15 +18,10 @@ extern crate alloc; mod args; -mod panic_instruction; -// This is `pub` to make documentation for the private `impl_instructions!` macro more -// accessible. -#[macro_use] -pub mod macros; +mod macros; pub mod op; -mod pack; +mod panic_instruction; mod panic_reason; -mod unpack; #[cfg(test)] mod encoding_tests; @@ -93,6 +88,7 @@ bitflags::bitflags! { const WRAPPING = 0x02; } } + /// Type is convertible to a [`RegId`] pub trait CheckRegId { /// Convert to a [`RegId`], or panic @@ -114,7 +110,7 @@ impl CheckRegId for u8 { // Defines the `Instruction` and `Opcode` types, along with an `op` module declaring a // unique type for each opcode's instruction variant. For a detailed explanation of how // this works, see the `fuel_asm::macros` module level documentation. -impl_instructions! { +fuel_derive::impl_instructions! { "Adds two registers." 0x10 ADD add [dst: RegId lhs: RegId rhs: RegId] "Bitwise ANDs two registers." @@ -608,6 +604,10 @@ impl Imm06 { pub const fn to_u8(self) -> u8 { self.0 } + + pub(crate) fn to_smallest_int(self) -> u8 { + self.to_u8() + } } impl Imm12 { @@ -633,6 +633,10 @@ impl Imm12 { pub const fn to_u16(self) -> u16 { self.0 } + + pub(crate) fn to_smallest_int(self) -> u16 { + self.to_u16() + } } impl Imm18 { @@ -658,6 +662,10 @@ impl Imm18 { pub const fn to_u32(self) -> u32 { self.0 } + + pub(crate) fn to_smallest_int(self) -> u32 { + self.to_u32() + } } impl Imm24 { @@ -683,6 +691,10 @@ impl Imm24 { pub const fn to_u32(self) -> u32 { self.0 } + + pub(crate) fn to_smallest_int(self) -> u32 { + self.to_u32() + } } impl Opcode { @@ -934,29 +946,6 @@ where us.into_iter().map(Instruction::try_from) } -// Short-hand, `panic!`ing constructors for the short-hand instruction construtors (e.g -// op::add). - -fn check_imm06(u: u8) -> Imm06 { - Imm06::new_checked(u) - .unwrap_or_else(|| panic!("Value `{u}` out of range for 6-bit immediate")) -} - -fn check_imm12(u: u16) -> Imm12 { - Imm12::new_checked(u) - .unwrap_or_else(|| panic!("Value `{u}` out of range for 12-bit immediate")) -} - -fn check_imm18(u: u32) -> Imm18 { - Imm18::new_checked(u) - .unwrap_or_else(|| panic!("Value `{u}` out of range for 18-bit immediate")) -} - -fn check_imm24(u: u32) -> Imm24 { - Imm24::new_checked(u) - .unwrap_or_else(|| panic!("Value `{u}` out of range for 24-bit immediate")) -} - // -------------------------------------------------------- // The size of the instruction isn't larger than necessary. diff --git a/fuel-asm/src/macros.rs b/fuel-asm/src/macros.rs index 45bef211a3..59195aa6c9 100644 --- a/fuel-asm/src/macros.rs +++ b/fuel-asm/src/macros.rs @@ -1,1285 +1,3 @@ -//! # The `impl_instructions!` macro -//! -//! The heart of this crate's implementation is the private `impl_instructions!` macro. -//! This macro is used to generate the `Instruction` and `Opcode` types along with their -//! implementations. -//! -//! The intention is to allow for having a single source of truth from which each of the -//! instruction-related types and implementations are derived. -//! -//! Its usage looks like this: -//! -//! ```rust,ignore -//! impl_instructions! { -//! "Adds two registers." -//! 0x10 ADD add [RegId RegId RegId] -//! "Bitwise ANDs two registers." -//! 0x11 AND and [RegId RegId RegId] -//! // ... -//! } -//! ``` -//! -//! Each instruction's row includes: -//! -//! - A short docstring. -//! - The Opcode byte value. -//! - An uppercase identifier (for generating variants and types). -//! - A lowercase identifier (for generating the shorthand instruction constructor). -//! - The instruction layout (for the `new` and `unpack` functions). -//! -//! The following sections describe each of the items that are derived from the -//! `impl_instructions!` table in more detail. -//! -//! ## The `Opcode` enum -//! -//! Represents the bytecode portion of an instruction. -//! -//! ```rust,ignore -//! /// Solely the opcode portion of an instruction represented as a single byte. -//! #[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)] -//! #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] -//! #[repr(u8)] -//! pub enum Opcode { -//! /// Adds two registers. -//! ADD = 0x10, -//! /// Bitwise ANDs two registers. -//! AND = 0x11, -//! // ... -//! } -//! ``` -//! -//! A `TryFrom` implementation is also provided, producing an `Err(InvalidOpcode)` in -//! the case that the byte represents a reserved or undefined value. -//! -//! ```rust -//! # use fuel_asm::{InvalidOpcode, Opcode}; -//! assert_eq!(Opcode::try_from(0x10), Ok(Opcode::ADD)); -//! assert_eq!(Opcode::try_from(0x11), Ok(Opcode::AND)); -//! assert_eq!(Opcode::try_from(0), Err(InvalidOpcode)); -//! ``` -//! -//! ## The `Instruction` enum -//! -//! Represents a single, full instruction, discriminated by its `Opcode`. -//! -//! ```rust,ignore -//! /// Representation of a single instruction for the interpreter. -//! /// -//! /// The opcode is represented in the tag (variant), or may be retrieved in the form of an -//! /// `Opcode` byte using the `opcode` method. -//! /// -//! /// The register and immediate data associated with the instruction is represented within -//! /// an inner unit type wrapper around the 3 remaining bytes. -//! #[derive(Clone, Copy, Eq, Hash, PartialEq)] -//! #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] -//! pub enum Instruction { -//! /// Adds two registers. -//! ADD(op::ADD), -//! /// Bitwise ANDs two registers. -//! AND(op::AND), -//! // ... -//! } -//! ``` -//! -//! The `From for u32` (aka `RawInstruction`) and `TryFrom for -//! Instruction` implementations can be found in the crate root. -//! -//! ## A unique unit type per operation -//! -//! In order to reduce the likelihood of misusing unrelated register IDs or immediate -//! values, we generate a unique unit type for each type of operation (i.e instruction -//! variant) and guard access to the relevant register IDs and immediate values behind -//! each type's unique methods. -//! -//! These unique operation types are generated as follows within a dedicated `op` module: -//! -//! ```rust,ignore -//! pub mod op { -//! //! Definitions and implementations for each unique instruction type, one for each -//! //! unique `Opcode` variant. -//! -//! // A unique type for each operation. -//! -//! /// Adds two registers. -//! pub struct ADD([u8; 3]); -//! -//! /// Bitwise ANDs two registers. -//! pub struct AND([u8; 3]); -//! -//! // ... -//! -//! // An implementation for each unique type. -//! -//! impl ADD { -//! pub const OPCODE: Opcode = Opcode::ADD; -//! -//! /// Construct the instruction from its parts. -//! pub fn new(ra: RegId, rb: RegId, rc: RegId) -> Self { -//! Self(pack::bytes_from_ra_rb_rc(ra, rb, rc)) -//! } -//! -//! /// Convert the instruction into its parts. -//! pub fn unpack(self) -> (RegId, RegId, RegId) { -//! unpack::ra_rb_rc_from_bytes(self.0) -//! } -//! } -//! -//! impl AND { -//! // ... -//! } -//! -//! // ... -//! -//! // A short-hand `Instruction` constructor for each operation to make it easier to -//! // hand-write assembly for tests and benchmarking. As these constructors are public and -//! // accept literal values, we check that the values are within range. -//! -//! /// Adds two registers. -//! pub fn add(ra: u8, rb: u8, rc: u8) -> Instruction { -//! ADD::new(check_reg_id(ra), check_reg_id(rb), check_reg_id(rc)).into() -//! } -//! -//! /// Bitwise ANDs two registers. -//! pub fn and(ra: u8, rb: u8, rc: u8) -> Instruction { -//! AND::new(check_reg_id(ra), check_reg_id(rb), check_reg_id(rc)).into() -//! } -//! -//! // ... -//! }; -//! ``` -//! -//! ### Instruction Layout -//! -//! The function signatures of the `new` and `unpack` functions are derived from the -//! instruction's data layout described in the `impl_instructions!` table. -//! -//! For example, the `unpack` method for `ADD` looks like this: -//! -//! ```rust,ignore -//! // 0x10 ADD add [RegId RegId RegId] -//! pub fn unpack(self) -> (RegId, RegId, RegId) -//! ``` -//! -//! While the `unpack` method for `ADDI` looks like this: -//! -//! ```rust,ignore -//! // 0x50 ADDI addi [RegId RegId Imm12] -//! pub fn unpack(self) -> (RegId, RegId, Imm12) -//! ``` -//! -//! ### Shorthand Constructors -//! -//! The shorthand instruction constructors (e.g. `add`, `and`, etc) are specifically -//! designed to make it easier to handwrite assembly for tests or benchmarking. Unlike the -//! `$OP::new` constructors which require typed register ID or immediate inputs, the -//! instruction constructors allow for constructing `Instruction`s from convenient literal -//! value inputs. E.g. -//! -//! ```rust -//! use fuel_asm::{op, Instruction}; -//! -//! // A sample program to perform ecrecover -//! let program: Vec = vec![ -//! op::move_(0x10, 0x01), // set r[0x10] := $one -//! op::slli(0x20, 0x10, 5), // set r[0x20] := `r[0x10] << 5 == 32` -//! op::slli(0x21, 0x10, 6), // set r[0x21] := `r[0x10] << 6 == 64` -//! op::aloc(0x21), // alloc `r[0x21] == 64` to the heap -//! op::addi(0x10, 0x07, 1), // set r[0x10] := `$hp + 1` (allocated heap) -//! op::move_(0x11, 0x04), // set r[0x11] := $ssp -//! op::add(0x12, 0x04, 0x20), // set r[0x12] := `$ssp + r[0x20]` -//! op::eck1(0x10, 0x11, 0x12),// recover public key in memory[r[0x10], 64] -//! op::ret(0x01), // return `1` -//! ]; -//! ``` - -// Generate a shorthand free function named after the $op for constructing an -// `Instruction`. -macro_rules! op_constructor { - ($doc:literal $Op:ident $op:ident[$ra:ident : RegId]) => { - #[doc = $doc] - pub fn $op($ra: A) -> Instruction { - $Op::new($ra.check()).into() - } - - #[cfg(feature = "typescript")] - const _: () = { - use super::*; - - #[wasm_bindgen::prelude::wasm_bindgen] - #[doc = $doc] - pub fn $op($ra: u8) -> typescript::Instruction { - crate::op::$op($ra).into() - } - }; - }; - ($doc:literal $Op:ident $op:ident[$ra:ident : RegId $rb:ident : RegId]) => { - #[doc = $doc] - pub fn $op($ra: A, $rb: B) -> Instruction { - $Op::new($ra.check(), $rb.check()).into() - } - - #[cfg(feature = "typescript")] - const _: () = { - use super::*; - - #[wasm_bindgen::prelude::wasm_bindgen] - #[doc = $doc] - pub fn $op($ra: u8, $rb: u8) -> typescript::Instruction { - crate::op::$op($ra, $rb).into() - } - }; - }; - ( - $doc:literal - $Op:ident - $op:ident[$ra:ident : RegId $rb:ident : RegId $rc:ident : RegId] - ) => { - #[doc = $doc] - pub fn $op( - $ra: A, - $rb: B, - $rc: C, - ) -> Instruction { - $Op::new($ra.check(), $rb.check(), $rc.check()).into() - } - - #[cfg(feature = "typescript")] - const _: () = { - use super::*; - - #[wasm_bindgen::prelude::wasm_bindgen] - #[doc = $doc] - pub fn $op($ra: u8, $rb: u8, $rc: u8) -> typescript::Instruction { - crate::op::$op($ra, $rb, $rc).into() - } - }; - }; - ( - $doc:literal - $Op:ident - $op:ident[$ra:ident : RegId $rb:ident : RegId $rc:ident : RegId $rd:ident : RegId] - ) => { - #[doc = $doc] - pub fn $op( - $ra: A, - $rb: B, - $rc: C, - $rd: D, - ) -> Instruction { - $Op::new($ra.check(), $rb.check(), $rc.check(), $rd.check()).into() - } - - #[cfg(feature = "typescript")] - const _: () = { - use super::*; - - #[wasm_bindgen::prelude::wasm_bindgen] - #[doc = $doc] - pub fn $op($ra: u8, $rb: u8, $rc: u8, $rd: u8) -> typescript::Instruction { - crate::op::$op($ra, $rb, $rc, $rd).into() - } - }; - }; - ( - $doc:literal - $Op:ident - $op:ident[$ra:ident : RegId $rb:ident : RegId $rc:ident : RegId $imm:ident : Imm06] - ) => { - #[doc = $doc] - pub fn $op( - $ra: A, - $rb: B, - $rc: C, - $imm: u8, - ) -> Instruction { - $Op::new($ra.check(), $rb.check(), $rc.check(), check_imm06($imm)).into() - } - - #[cfg(feature = "typescript")] - const _: () = { - use super::*; - - #[wasm_bindgen::prelude::wasm_bindgen] - #[doc = $doc] - pub fn $op($ra: u8, $rb: u8, $rc: u8, $imm: u8) -> typescript::Instruction { - crate::op::$op($ra, $rb, $rc, $imm).into() - } - }; - }; - ( - $doc:literal - $Op:ident - $op:ident[$ra:ident : RegId $rb:ident : RegId $imm:ident : Imm12] - ) => { - #[doc = $doc] - pub fn $op( - $ra: A, - $rb: B, - $imm: u16, - ) -> Instruction { - $Op::new($ra.check(), $rb.check(), check_imm12($imm)).into() - } - - #[cfg(feature = "typescript")] - const _: () = { - use super::*; - - #[wasm_bindgen::prelude::wasm_bindgen] - #[doc = $doc] - pub fn $op($ra: u8, $rb: u8, $imm: u16) -> typescript::Instruction { - crate::op::$op($ra, $rb, $imm).into() - } - }; - }; - ($doc:literal $Op:ident $op:ident[$ra:ident : RegId $imm:ident : Imm18]) => { - #[doc = $doc] - pub fn $op($ra: A, $imm: u32) -> Instruction { - $Op::new($ra.check(), check_imm18($imm)).into() - } - - #[cfg(feature = "typescript")] - const _: () = { - use super::*; - - #[wasm_bindgen::prelude::wasm_bindgen] - #[doc = $doc] - pub fn $op($ra: u8, $imm: u32) -> typescript::Instruction { - crate::op::$op($ra, $imm).into() - } - }; - }; - ($doc:literal $Op:ident $op:ident[$imm:ident : Imm24]) => { - #[doc = $doc] - pub fn $op($imm: u32) -> Instruction { - $Op::new(check_imm24($imm)).into() - } - - #[cfg(feature = "typescript")] - const _: () = { - use super::*; - - #[wasm_bindgen::prelude::wasm_bindgen] - #[doc = $doc] - pub fn $op($imm: u32) -> typescript::Instruction { - crate::op::$op($imm).into() - } - }; - }; - ($doc:literal $Op:ident $op:ident[]) => { - #[doc = $doc] - pub fn $op() -> Instruction { - $Op::new().into() - } - - #[cfg(feature = "typescript")] - const _: () = { - use super::*; - - #[wasm_bindgen::prelude::wasm_bindgen] - #[doc = $doc] - pub fn $op() -> typescript::Instruction { - crate::op::$op().into() - } - }; - }; -} - -// Generate approriate `new` constructor for the instruction -macro_rules! op_new { - // Generate a constructor based on the field layout. - ($Op:ident $ra:ident : RegId) => { - impl $Op { - /// Construct the instruction from its parts. - pub fn new($ra: RegId) -> Self { - Self(pack::bytes_from_ra($ra)) - } - } - - #[cfg(feature = "typescript")] - #[wasm_bindgen::prelude::wasm_bindgen] - impl $Op { - #[wasm_bindgen(constructor)] - /// Construct the instruction from its parts. - pub fn new_typescript($ra: RegId) -> Self { - Self::new($ra) - } - } - }; - ($Op:ident $ra:ident : RegId $rb:ident : RegId) => { - impl $Op { - /// Construct the instruction from its parts. - pub fn new($ra: RegId, $rb: RegId) -> Self { - Self(pack::bytes_from_ra_rb($ra, $rb)) - } - } - - #[cfg(feature = "typescript")] - #[wasm_bindgen::prelude::wasm_bindgen] - impl $Op { - #[wasm_bindgen(constructor)] - /// Construct the instruction from its parts. - pub fn new_typescript($ra: RegId, $rb: RegId) -> Self { - Self::new($ra, $rb) - } - } - }; - ($Op:ident $ra:ident : RegId $rb:ident : RegId $rc:ident : RegId) => { - impl $Op { - /// Construct the instruction from its parts. - pub fn new($ra: RegId, $rb: RegId, $rc: RegId) -> Self { - Self(pack::bytes_from_ra_rb_rc($ra, $rb, $rc)) - } - } - - #[cfg(feature = "typescript")] - #[wasm_bindgen::prelude::wasm_bindgen] - impl $Op { - #[wasm_bindgen(constructor)] - /// Construct the instruction from its parts. - pub fn new_typescript($ra: RegId, $rb: RegId, $rc: RegId) -> Self { - Self::new($ra, $rb, $rc) - } - } - }; - ( - $Op:ident $ra:ident : RegId $rb:ident : RegId $rc:ident : RegId $rd:ident : RegId - ) => { - impl $Op { - /// Construct the instruction from its parts. - pub fn new($ra: RegId, $rb: RegId, $rc: RegId, $rd: RegId) -> Self { - Self(pack::bytes_from_ra_rb_rc_rd($ra, $rb, $rc, $rd)) - } - } - - #[cfg(feature = "typescript")] - #[wasm_bindgen::prelude::wasm_bindgen] - impl $Op { - #[wasm_bindgen(constructor)] - /// Construct the instruction from its parts. - pub fn new_typescript( - $ra: RegId, - $rb: RegId, - $rc: RegId, - $rd: RegId, - ) -> Self { - Self::new($ra, $rb, $rc, $rd) - } - } - }; - ( - $Op:ident - $ra:ident : RegId - $rb:ident : RegId - $rc:ident : RegId - $imm:ident : Imm06 - ) => { - impl $Op { - /// Construct the instruction from its parts. - pub fn new($ra: RegId, $rb: RegId, $rc: RegId, $imm: Imm06) -> Self { - Self(pack::bytes_from_ra_rb_rc_imm06($ra, $rb, $rc, $imm)) - } - } - - #[cfg(feature = "typescript")] - #[wasm_bindgen::prelude::wasm_bindgen] - impl $Op { - #[wasm_bindgen(constructor)] - /// Construct the instruction from its parts. - pub fn new_typescript( - $ra: RegId, - $rb: RegId, - $rc: RegId, - $imm: Imm06, - ) -> Self { - Self::new($ra, $rb, $rc, $imm) - } - } - }; - ($Op:ident $ra:ident : RegId $rb:ident : RegId $imm:ident : Imm12) => { - impl $Op { - /// Construct the instruction from its parts. - pub fn new($ra: RegId, $rb: RegId, $imm: Imm12) -> Self { - Self(pack::bytes_from_ra_rb_imm12($ra, $rb, $imm)) - } - } - - #[cfg(feature = "typescript")] - #[wasm_bindgen::prelude::wasm_bindgen] - impl $Op { - #[wasm_bindgen(constructor)] - /// Construct the instruction from its parts. - pub fn new_typescript($ra: RegId, $rb: RegId, $imm: Imm12) -> Self { - Self::new($ra, $rb, $imm) - } - } - }; - ($Op:ident $ra:ident : RegId $imm:ident : Imm18) => { - impl $Op { - /// Construct the instruction from its parts. - pub fn new($ra: RegId, $imm: Imm18) -> Self { - Self(pack::bytes_from_ra_imm18($ra, $imm)) - } - } - - #[cfg(feature = "typescript")] - #[wasm_bindgen::prelude::wasm_bindgen] - impl $Op { - #[wasm_bindgen(constructor)] - /// Construct the instruction from its parts. - pub fn new_typescript($ra: RegId, $imm: Imm18) -> Self { - Self::new($ra, $imm) - } - } - }; - ($Op:ident $imm:ident : Imm24) => { - impl $Op { - /// Construct the instruction from its parts. - pub fn new($imm: Imm24) -> Self { - Self(pack::bytes_from_imm24($imm)) - } - } - - #[cfg(feature = "typescript")] - #[wasm_bindgen::prelude::wasm_bindgen] - impl $Op { - #[wasm_bindgen(constructor)] - /// Construct the instruction from its parts. - pub fn new_typescript($imm: Imm24) -> Self { - Self::new($imm) - } - } - }; - ($Op:ident) => { - impl $Op { - /// Construct the instruction. - #[allow(clippy::new_without_default)] - pub fn new() -> Self { - Self([0; 3]) - } - } - - #[cfg(feature = "typescript")] - #[wasm_bindgen::prelude::wasm_bindgen] - impl $Op { - #[wasm_bindgen(constructor)] - /// Construct the instruction. - #[allow(clippy::new_without_default)] - pub fn new_typescript() -> Self { - Self::new() - } - } - }; -} - -// Generate an accessor method for each field. Recurse based on layout. -macro_rules! op_accessors { - ($Op:ident $ra:ident: RegId) => { - #[cfg_attr(feature = "typescript", wasm_bindgen::prelude::wasm_bindgen)] - impl $Op { - /// Access the ID for register A. - pub fn ra(&self) -> RegId { - unpack::ra_from_bytes(self.0) - } - } - }; - ($Op:ident $ra:ident: RegId $rb:ident: RegId) => { - op_accessors!($Op ra: RegId); - - #[cfg_attr(feature = "typescript", wasm_bindgen::prelude::wasm_bindgen)] - impl $Op { - /// Access the ID for register B. - pub fn rb(&self) -> RegId { - unpack::rb_from_bytes(self.0) - } - } - }; - ($Op:ident $ra:ident: RegId $rb:ident: RegId $rc:ident: RegId) => { - op_accessors!($Op $ra: RegId $rb: RegId); - - #[cfg_attr(feature = "typescript", wasm_bindgen::prelude::wasm_bindgen)] - impl $Op { - /// Access the ID for register C. - pub fn rc(&self) -> RegId { - unpack::rc_from_bytes(self.0) - } - } - }; - ($Op:ident $ra:ident: RegId $rb:ident: RegId $rc:ident: RegId $rd:ident: RegId) => { - op_accessors!($Op $ra: RegId $rb: RegId $rc: RegId); - - #[cfg_attr(feature = "typescript", wasm_bindgen::prelude::wasm_bindgen)] - impl $Op { - /// Access the ID for register D. - pub fn rd(&self) -> RegId { - unpack::rd_from_bytes(self.0) - } - } - }; - ($Op:ident $ra:ident: RegId $rb:ident: RegId $rc:ident: RegId $imm:ident: Imm06) => { - op_accessors!($Op $ra: RegId rb: RegId $rc: RegId); - - #[cfg_attr(feature = "typescript", wasm_bindgen::prelude::wasm_bindgen)] - impl $Op { - /// Access the 6-bit immediate value. - pub fn imm06(&self) -> Imm06 { - unpack::imm06_from_bytes(self.0) - } - } - }; - ($Op:ident $ra:ident: RegId $rb:ident: RegId $imm:ident: Imm12) => { - op_accessors!($Op $ra: RegId $rb: RegId); - - #[cfg_attr(feature = "typescript", wasm_bindgen::prelude::wasm_bindgen)] - impl $Op { - /// Access the 12-bit immediate value. - pub fn imm12(&self) -> Imm12 { - unpack::imm12_from_bytes(self.0) - } - } - }; - ($Op:ident $ra:ident: RegId $imm:ident: Imm18) => { - op_accessors!($Op $ra: RegId); - - #[cfg_attr(feature = "typescript", wasm_bindgen::prelude::wasm_bindgen)] - impl $Op { - /// Access the 18-bit immediate value. - pub fn imm18(&self) -> Imm18 { - unpack::imm18_from_bytes(self.0) - } - } - }; - ($Op:ident $ra:ident: Imm24) => { - #[cfg_attr(feature = "typescript", wasm_bindgen::prelude::wasm_bindgen)] - impl $Op { - /// Access the 24-bit immediate value. - pub fn imm24(&self) -> Imm24 { - unpack::imm24_from_bytes(self.0) - } - } - }; - ($Op:ident) => {}; -} - -// Generate a method for converting the instruction into its parts. -macro_rules! op_unpack { - (RegId) => { - /// Convert the instruction into its parts. - pub fn unpack(self) -> RegId { - unpack::ra_from_bytes(self.0) - } - }; - (RegId RegId) => { - /// Convert the instruction into its parts. - pub fn unpack(self) -> (RegId, RegId) { - unpack::ra_rb_from_bytes(self.0) - } - }; - (RegId RegId RegId) => { - /// Convert the instruction into its parts. - pub fn unpack(self) -> (RegId, RegId, RegId) { - unpack::ra_rb_rc_from_bytes(self.0) - } - }; - (RegId RegId RegId RegId) => { - /// Convert the instruction into its parts. - pub fn unpack(self) -> (RegId, RegId, RegId, RegId) { - unpack::ra_rb_rc_rd_from_bytes(self.0) - } - }; - (RegId RegId RegId Imm06) => { - /// Convert the instruction into its parts. - pub fn unpack(self) -> (RegId, RegId, RegId, Imm06) { - unpack::ra_rb_rc_imm06_from_bytes(self.0) - } - }; - (RegId RegId Imm12) => { - /// Convert the instruction into its parts. - pub fn unpack(self) -> (RegId, RegId, Imm12) { - unpack::ra_rb_imm12_from_bytes(self.0) - } - }; - (RegId Imm18) => { - /// Convert the instruction into its parts. - pub fn unpack(self) -> (RegId, Imm18) { - unpack::ra_imm18_from_bytes(self.0) - } - }; - (Imm24) => { - /// Convert the instruction into its parts. - pub fn unpack(self) -> Imm24 { - unpack::imm24_from_bytes(self.0) - } - }; - () => {}; -} - -// Generate a method for checking that the reserved part of the -// instruction is zero. This is private, as invalid instructions -// cannot be constructed outside this crate. -macro_rules! op_reserved_part { - (RegId) => { - pub(crate) fn reserved_part_is_zero(self) -> bool { - let (_, imm) = unpack::ra_imm18_from_bytes(self.0); - imm.0 == 0 - } - }; - (RegId RegId) => { - pub(crate) fn reserved_part_is_zero(self) -> bool { - let (_, _, imm) = unpack::ra_rb_imm12_from_bytes(self.0); - imm.0 == 0 - } - }; - (RegId RegId RegId) => { - pub(crate) fn reserved_part_is_zero(self) -> bool { - let (_, _, _, imm) = unpack::ra_rb_rc_imm06_from_bytes(self.0); - imm.0 == 0 - } - }; - (RegId RegId RegId RegId) => { - pub(crate) fn reserved_part_is_zero(self) -> bool { - true - } - }; - (RegId RegId RegId Imm06) => { - pub(crate) fn reserved_part_is_zero(self) -> bool { - true - } - }; - (RegId RegId Imm12) => { - pub(crate) fn reserved_part_is_zero(self) -> bool { - true - } - }; - (RegId Imm18) => { - pub(crate) fn reserved_part_is_zero(self) -> bool { - true - } - }; - (Imm24) => { - pub(crate) fn reserved_part_is_zero(self) -> bool { - true - } - }; - () => { - pub(crate) fn reserved_part_is_zero(self) -> bool { - self.0 == [0; 3] - } - }; -} - -// Generate a private fn for use within the `Instruction::reg_ids` implementation. -macro_rules! op_reg_ids { - (RegId) => { - pub(super) fn reg_ids(&self) -> [Option; 4] { - let ra = self.unpack(); - [Some(ra), None, None, None] - } - }; - (RegId RegId) => { - pub(super) fn reg_ids(&self) -> [Option; 4] { - let (ra, rb) = self.unpack(); - [Some(ra), Some(rb), None, None] - } - }; - (RegId RegId RegId) => { - pub(super) fn reg_ids(&self) -> [Option; 4] { - let (ra, rb, rc) = self.unpack(); - [Some(ra), Some(rb), Some(rc), None] - } - }; - (RegId RegId RegId RegId) => { - pub(super) fn reg_ids(&self) -> [Option; 4] { - let (ra, rb, rc, rd) = self.unpack(); - [Some(ra), Some(rb), Some(rc), Some(rd)] - } - }; - (RegId RegId RegId Imm06) => { - pub(super) fn reg_ids(&self) -> [Option; 4] { - let (ra, rb, rc, _) = self.unpack(); - [Some(ra), Some(rb), Some(rc), None] - } - }; - (RegId RegId Imm12) => { - pub(super) fn reg_ids(&self) -> [Option; 4] { - let (ra, rb, _) = self.unpack(); - [Some(ra), Some(rb), None, None] - } - }; - (RegId Imm18) => { - pub(super) fn reg_ids(&self) -> [Option; 4] { - let (ra, _) = self.unpack(); - [Some(ra), None, None, None] - } - }; - ($($rest:tt)*) => { - pub(super) fn reg_ids(&self) -> [Option; 4] { - [None; 4] - } - }; -} - -// Generate test constructors that can be used to generate instructions from non-matching -// input. -#[cfg(test)] -macro_rules! op_test_construct_fn { - (RegId) => { - /// Construct the instruction from all possible raw fields, ignoring inapplicable - /// ones. - pub fn test_construct( - ra: RegId, - _rb: RegId, - _rc: RegId, - _rd: RegId, - _imm: u32, - ) -> Self { - Self(pack::bytes_from_ra(ra)) - } - }; - (RegId RegId) => { - /// Construct the instruction from all possible raw fields, ignoring inapplicable - /// ones. - pub fn test_construct( - ra: RegId, - rb: RegId, - _rc: RegId, - _rd: RegId, - _imm: u32, - ) -> Self { - Self(pack::bytes_from_ra_rb(ra, rb)) - } - }; - (RegId RegId RegId) => { - /// Construct the instruction from all possible raw fields, ignoring inapplicable - /// ones. - pub fn test_construct( - ra: RegId, - rb: RegId, - rc: RegId, - _rd: RegId, - _imm: u32, - ) -> Self { - Self(pack::bytes_from_ra_rb_rc(ra, rb, rc)) - } - }; - (RegId RegId RegId RegId) => { - /// Construct the instruction from all possible raw fields, ignoring inapplicable - /// ones. - pub fn test_construct( - ra: RegId, - rb: RegId, - rc: RegId, - rd: RegId, - _imm: u32, - ) -> Self { - Self(pack::bytes_from_ra_rb_rc_rd(ra, rb, rc, rd)) - } - }; - (RegId RegId RegId Imm06) => { - /// Construct the instruction from all possible raw fields, ignoring inapplicable - /// ones. - pub fn test_construct( - ra: RegId, - rb: RegId, - rc: RegId, - _rd: RegId, - imm: u32, - ) -> Self { - Self(pack::bytes_from_ra_rb_rc_imm06( - ra, - rb, - rc, - Imm06::from(imm as u8), - )) - } - }; - (RegId RegId Imm12) => { - /// Construct the instruction from all possible raw fields, ignoring inapplicable - /// ones. - pub fn test_construct( - ra: RegId, - rb: RegId, - _rc: RegId, - _rd: RegId, - imm: u32, - ) -> Self { - Self(pack::bytes_from_ra_rb_imm12( - ra, - rb, - Imm12::from(imm as u16), - )) - } - }; - (RegId Imm18) => { - /// Construct the instruction from all possible raw fields, ignoring inapplicable - /// ones. - pub fn test_construct( - ra: RegId, - _rb: RegId, - _rc: RegId, - _rd: RegId, - imm: u32, - ) -> Self { - Self(pack::bytes_from_ra_imm18(ra, Imm18::from(imm))) - } - }; - (Imm24) => { - /// Construct the instruction from all possible raw fields, ignoring inapplicable - /// ones. - pub fn test_construct( - _ra: RegId, - _rb: RegId, - _rc: RegId, - _rd: RegId, - imm: u32, - ) -> Self { - Self(pack::bytes_from_imm24(Imm24::from(imm))) - } - }; - () => { - /// Construct the instruction from all possible raw fields, ignoring inapplicable - /// ones. - #[allow(clippy::new_without_default)] - pub fn test_construct( - _ra: RegId, - _rb: RegId, - _rc: RegId, - _rd: RegId, - _imm: u32, - ) -> Self { - Self([0; 3]) - } - }; -} - -// Debug implementations for each instruction. -macro_rules! op_debug_fmt { - ($Op:ident[$ra:ident : RegId]) => { - fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { - let ra = self.unpack(); - f.debug_struct(stringify!($Op)) - .field(stringify!($ra), &format_args!("{:#02x}", u8::from(ra))) - .finish() - } - }; - ($Op:ident[$ra:ident : RegId $rb:ident : RegId]) => { - fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { - let (ra, rb) = self.unpack(); - f.debug_struct(stringify!($Op)) - .field(stringify!($ra), &format_args!("{:#02x}", u8::from(ra))) - .field(stringify!($rb), &format_args!("{:#02x}", u8::from(rb))) - .finish() - } - }; - ($Op:ident[$ra:ident : RegId $rb:ident : RegId $rc:ident : RegId]) => { - fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { - let (ra, rb, rc) = self.unpack(); - f.debug_struct(stringify!($Op)) - .field(stringify!($ra), &format_args!("{:#02x}", u8::from(ra))) - .field(stringify!($rb), &format_args!("{:#02x}", u8::from(rb))) - .field(stringify!($rc), &format_args!("{:#02x}", u8::from(rc))) - .finish() - } - }; - ( - $Op:ident[$ra:ident : RegId $rb:ident : RegId $rc:ident : RegId $rd:ident : RegId] - ) => { - fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { - let (ra, rb, rc, rd) = self.unpack(); - f.debug_struct(stringify!($Op)) - .field(stringify!($ra), &format_args!("{:#02x}", u8::from(ra))) - .field(stringify!($rb), &format_args!("{:#02x}", u8::from(rb))) - .field(stringify!($rc), &format_args!("{:#02x}", u8::from(rc))) - .field(stringify!($rd), &format_args!("{:#02x}", u8::from(rd))) - .finish() - } - }; - ( - $Op:ident[$ra:ident : RegId $rb:ident : RegId $rc:ident : RegId $imm:ident : Imm06] - ) => { - fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { - let (ra, rb, rc, imm) = self.unpack(); - f.debug_struct(stringify!($Op)) - .field(stringify!($ra), &format_args!("{:#02x}", u8::from(ra))) - .field(stringify!($rb), &format_args!("{:#02x}", u8::from(rb))) - .field(stringify!($rc), &format_args!("{:#02x}", u8::from(rc))) - .field(stringify!($imm), &u8::from(imm)) - .finish() - } - }; - ($Op:ident[$ra:ident : RegId $rb:ident : RegId $imm:ident : Imm12]) => { - fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { - let (ra, rb, imm) = self.unpack(); - f.debug_struct(stringify!($Op)) - .field(stringify!($ra), &format_args!("{:#02x}", u8::from(ra))) - .field(stringify!($rb), &format_args!("{:#02x}", u8::from(rb))) - .field(stringify!($imm), &u16::from(imm)) - .finish() - } - }; - ($Op:ident[$ra:ident : RegId $imm:ident : Imm18]) => { - fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { - let (ra, imm) = self.unpack(); - f.debug_struct(stringify!($Op)) - .field(stringify!($ra), &format_args!("{:#02x}", u8::from(ra))) - .field(stringify!($imm), &u32::from(imm)) - .finish() - } - }; - ($Op:ident[$imm:ident : Imm24]) => { - fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { - let imm = self.unpack(); - f.debug_struct(stringify!($Op)) - .field(stringify!($imm), &u32::from(imm)) - .finish() - } - }; - ($Op:ident[]) => { - fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { - f.debug_struct(stringify!($Op)).finish() - } - }; -} - -// Recursively declares a unique struct for each opcode. -macro_rules! decl_op_struct { - ($doc:literal $ix:literal $Op:ident $op:ident [$($fname:ident: $field:ident)*] $($rest:tt)*) => { - #[doc = $doc] - #[derive(Clone, Copy, Eq, Hash, PartialEq)] - #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] - #[cfg_attr(feature = "typescript", wasm_bindgen::prelude::wasm_bindgen)] - pub struct $Op(pub (super) [u8; 3]); - decl_op_struct!($($rest)*); - }; - () => {}; -} - -/// This macro is intentionaly private. See the module-level documentation for a thorough -/// explanation of how this macro works. -macro_rules! impl_instructions { - // Define the `Opcode` enum. - (decl_opcode_enum $($doc:literal $ix:literal $Op:ident $op:ident [$($fname:ident: $field:ident)*])*) => { - /// Solely the opcode portion of an instruction represented as a single byte. - #[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)] - #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] - #[repr(u8)] - pub enum Opcode { - $( - #[doc = $doc] - $Op = $ix, - )* - } - }; - - // Define the `Instruction` enum. - (decl_instruction_enum $($doc:literal $ix:literal $Op:ident $op:ident [$($fname:ident: $field:ident)*])*) => { - /// Representation of a single instruction for the interpreter. - /// - /// The opcode is represented in the tag (variant), or may be retrieved in the form of an - /// `Opcode` byte using the `opcode` method. - /// - /// The register and immediate data associated with the instruction is represented within - /// an inner unit type wrapper around the 3 remaining bytes. - #[derive(Clone, Copy, Eq, Hash, PartialEq)] - #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] - pub enum Instruction { - $( - #[doc = $doc] - $Op(op::$Op), - )* - } - }; - - // Recursively generate a test constructor for each opcode - (impl_opcode_test_construct $doc:literal $ix:literal $Op:ident $op:ident [$($fname:ident: $field:ident)*] $($rest:tt)*) => { - #[cfg(test)] - impl crate::_op::$Op { - op_test_construct_fn!($($field)*); - } - impl_instructions!(impl_opcode_test_construct $($rest)*); - }; - (impl_opcode_test_construct) => {}; - - // Recursively generate a test constructor for each opcode - (tests $doc:literal $ix:literal $Op:ident $op:ident [$($fname:ident: $field:ident)*] $($rest:tt)*) => { - op_test!($Op $op [$($field)*]); - impl_instructions!(tests $($rest)*); - }; - (tests) => {}; - - // Implement constructors and accessors for register and immediate values. - (impl_op $doc:literal $ix:literal $Op:ident $op:ident [$($fname:ident: $field:ident)*] $($rest:tt)*) => { - impl $Op { - /// The associated 8-bit Opcode value. - pub const OPCODE: Opcode = Opcode::$Op; - } - - op_new!($Op $($fname: $field)*); - op_accessors!($Op $($fname: $field)*); - - impl $Op { - op_unpack!($($field)*); - op_reserved_part!($($field)*); - op_reg_ids!($($field)*); - } - - op_constructor!($doc $Op $op [$($fname: $field)*]); - - impl From<$Op> for [u8; 3] { - fn from($Op(arr): $Op) -> Self { - arr - } - } - - impl From<$Op> for [u8; 4] { - fn from($Op([a, b, c]): $Op) -> Self { - [$Op::OPCODE as u8, a, b, c] - } - } - - impl From<$Op> for u32 { - fn from(op: $Op) -> Self { - u32::from_be_bytes(op.into()) - } - } - - impl From<$Op> for Instruction { - fn from(op: $Op) -> Self { - Instruction::$Op(op) - } - } - - #[cfg(feature = "typescript")] - impl From<$Op> for typescript::Instruction { - fn from(opcode: $Op) -> Self { - typescript::Instruction::new(opcode.into()) - } - } - - impl core::fmt::Debug for $Op { - op_debug_fmt!($Op [$($fname: $field)*]); - } - - impl_instructions!(impl_op $($rest)*); - }; - (impl_op) => {}; - - // Implement functions for all opcode variants - (impl_opcode $($doc:literal $ix:literal $Op:ident $op:ident [$($fname:ident: $field:ident)*])*) => { - impl core::convert::TryFrom for Opcode { - type Error = InvalidOpcode; - fn try_from(u: u8) -> Result { - match u { - $( - $ix => Ok(Opcode::$Op), - )* - _ => Err(InvalidOpcode), - } - } - } - - impl Opcode { - /// Construct the instruction from all possible raw fields, ignoring inapplicable ones. - #[cfg(test)] - pub fn test_construct(self, ra: RegId, rb: RegId, rc: RegId, rd: RegId, imm: u32) -> Instruction { - match self { - $( - Self::$Op => Instruction::$Op(crate::_op::$Op::test_construct(ra, rb, rc, rd, imm)), - )* - } - } - } - }; - - // Implement accessors for register and immediate values. - (impl_instruction $($doc:literal $ix:literal $Op:ident $op:ident [$($fname:ident: $field:ident)*])*) => { - impl Instruction { - /// This instruction's opcode. - pub fn opcode(&self) -> Opcode { - match self { - $( - Self::$Op(_) => Opcode::$Op, - )* - } - } - - /// Unpacks all register IDs into a slice of options. - pub fn reg_ids(&self) -> [Option; 4] { - match self { - $( - Self::$Op(op) => op.reg_ids(), - )* - } - } - } - - impl From for [u8; 4] { - fn from(inst: Instruction) -> Self { - match inst { - $( - Instruction::$Op(op) => op.into(), - )* - } - } - } - - #[cfg(feature = "typescript")] - impl From for typescript::Instruction { - fn from(inst: Instruction) -> Self { - typescript::Instruction::new(inst) - } - } - - impl core::convert::TryFrom<[u8; 4]> for Instruction { - type Error = InvalidOpcode; - fn try_from([op, a, b, c]: [u8; 4]) -> Result { - match Opcode::try_from(op)? { - $( - Opcode::$Op => Ok(Self::$Op({ - let op = op::$Op([a, b, c]); - if !op.reserved_part_is_zero() { - return Err(InvalidOpcode); - } - op - })), - )* - } - } - } - - impl core::fmt::Debug for Instruction { - fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { - match self { - $( - Self::$Op(op) => op.fmt(f), - )* - } - } - } - }; - - // Entrypoint to the macro, generates structs, methods, opcode enum and instruction enum - // separately. - ($($tts:tt)*) => { - mod _op { - use super::*; - decl_op_struct!($($tts)*); - impl_instructions!(impl_op $($tts)*); - } - impl_instructions!(decl_opcode_enum $($tts)*); - impl_instructions!(decl_instruction_enum $($tts)*); - impl_instructions!(impl_opcode $($tts)*); - impl_instructions!(impl_instruction $($tts)*); - impl_instructions!(impl_opcode_test_construct $($tts)*); - - - #[cfg(test)] - mod opcode_tests { - use super::*; - impl_instructions!(tests $($tts)*); - } - }; -} - /// Defines the enum with `TryFrom` trait implementation. #[macro_export] macro_rules! enum_try_from { @@ -1306,124 +24,3 @@ macro_rules! enum_try_from { } } } - -#[cfg(test)] -// Generate a test for the instruction. -macro_rules! op_test { - ($Op:ident $op:ident[RegId]) => { - #[test] - fn $op() { - crate::macros::test_reserved_part(Opcode::$Op, true, false, false, false); - } - }; - ($Op:ident $op:ident[RegId RegId]) => { - #[test] - fn $op() { - crate::macros::test_reserved_part(Opcode::$Op, true, true, false, false); - } - }; - ($Op:ident $op:ident[RegId RegId RegId]) => { - #[test] - fn $op() { - crate::macros::test_reserved_part(Opcode::$Op, true, true, true, false); - } - }; - ($Op:ident $op:ident[RegId RegId RegId RegId]) => { - #[test] - fn $op() { - crate::macros::test_reserved_part(Opcode::$Op, true, true, true, true); - } - }; - ($Op:ident $op:ident[RegId RegId RegId Imm06]) => { - #[test] - fn $op() { - crate::macros::test_reserved_part(Opcode::$Op, true, true, true, true); - } - }; - ($Op:ident $op:ident[RegId RegId Imm12]) => { - #[test] - fn $op() { - crate::macros::test_reserved_part(Opcode::$Op, true, true, true, true); - } - }; - ($Op:ident $op:ident[RegId Imm18]) => { - #[test] - fn $op() { - crate::macros::test_reserved_part(Opcode::$Op, true, true, true, true); - } - }; - ($Op:ident $op:ident[Imm24]) => { - #[test] - fn $op() { - crate::macros::test_reserved_part(Opcode::$Op, true, true, true, true); - } - }; - ($Op:ident $op:ident[]) => { - #[test] - fn $op() { - crate::macros::test_reserved_part(Opcode::$Op, false, false, false, false); - } - }; -} - -#[cfg(test)] -fn bytes(a: u8, b: u8, c: u8, d: u8) -> [u8; 3] { - use crate::RegId; - crate::pack::bytes_from_ra_rb_rc_rd( - RegId::new(a), - RegId::new(b), - RegId::new(c), - RegId::new(d), - ) -} - -#[cfg(test)] -pub(crate) fn test_reserved_part( - opcode: crate::Opcode, - zero_should_pass: bool, - first_should_pass: bool, - second_should_pass: bool, - third_should_pass: bool, -) { - use crate::Instruction; - - // Args: 0 - let [a, b, c] = bytes(0, 0, 0, 0); - Instruction::try_from([opcode as u8, a, b, c]).unwrap(); - let [a, b, c] = bytes(1, 0, 0, 0); - let zero_is_error = Instruction::try_from([opcode as u8, a, b, c]).is_ok(); - assert_eq!( - zero_should_pass, zero_is_error, - "Opcode: {opcode:?} failed zero" - ); - - // Args: 1 - let [a, b, c] = bytes(0, 0, 0, 0); - Instruction::try_from([opcode as u8, a, b, c]).unwrap(); - let [a, b, c] = bytes(0, 1, 0, 0); - let first_is_error = Instruction::try_from([opcode as u8, a, b, c]).is_ok(); - assert_eq!( - first_should_pass, first_is_error, - "Opcode: {opcode:?} failed first" - ); - - // Args: 2 - let [a, b, c] = bytes(0, 0, 0, 0); - Instruction::try_from([opcode as u8, a, b, c]).unwrap(); - let [a, b, c] = bytes(0, 0, 1, 0); - let second_is_error = Instruction::try_from([opcode as u8, a, b, c]).is_ok(); - assert_eq!( - second_should_pass, second_is_error, - "Opcode: {opcode:?} failed second" - ); - - // Args: 3 - let [a, b, c] = bytes(0, 0, 0, 0); - Instruction::try_from([opcode as u8, a, b, c]).unwrap(); - let [a, b, c] = bytes(0, 0, 0, 1); - let third_is_error = Instruction::try_from([opcode as u8, a, b, c]).is_ok(); - assert_eq!( - third_should_pass, third_is_error, - "Opcode: {opcode:?} failed third" - ); -} diff --git a/fuel-asm/src/pack.rs b/fuel-asm/src/pack.rs deleted file mode 100644 index 6eedeb59a9..0000000000 --- a/fuel-asm/src/pack.rs +++ /dev/null @@ -1,112 +0,0 @@ -//! Functions for packing instruction data into bytes or u32s. - -use crate::{ - Imm06, - Imm12, - Imm18, - Imm24, - RegId, -}; - -pub(super) fn bytes_from_ra(ra: RegId) -> [u8; 3] { - u8x3_from_u8x4(u32_from_ra(ra).to_be_bytes()) -} - -pub(super) fn bytes_from_ra_rb(ra: RegId, rb: RegId) -> [u8; 3] { - u8x3_from_u8x4(u32_from_ra_rb(ra, rb).to_be_bytes()) -} - -pub(super) fn bytes_from_ra_rb_rc(ra: RegId, rb: RegId, rc: RegId) -> [u8; 3] { - u8x3_from_u8x4(u32_from_ra_rb_rc(ra, rb, rc).to_be_bytes()) -} - -pub(super) fn bytes_from_ra_rb_rc_rd( - ra: RegId, - rb: RegId, - rc: RegId, - rd: RegId, -) -> [u8; 3] { - u8x3_from_u8x4(u32_from_ra_rb_rc_rd(ra, rb, rc, rd).to_be_bytes()) -} - -pub(super) fn bytes_from_ra_rb_rc_imm06( - ra: RegId, - rb: RegId, - rc: RegId, - imm: Imm06, -) -> [u8; 3] { - u8x3_from_u8x4(u32_from_ra_rb_rc_imm06(ra, rb, rc, imm).to_be_bytes()) -} - -pub(super) fn bytes_from_ra_rb_imm12(ra: RegId, rb: RegId, imm: Imm12) -> [u8; 3] { - u8x3_from_u8x4(u32_from_ra_rb_imm12(ra, rb, imm).to_be_bytes()) -} - -pub(super) fn bytes_from_ra_imm18(ra: RegId, imm: Imm18) -> [u8; 3] { - u8x3_from_u8x4(u32_from_ra_imm18(ra, imm).to_be_bytes()) -} - -pub(super) fn bytes_from_imm24(imm: Imm24) -> [u8; 3] { - u8x3_from_u8x4(u32_from_imm24(imm).to_be_bytes()) -} - -fn u32_from_ra(r: RegId) -> u32 { - (r.0 as u32) << 18 -} - -fn u32_from_rb(r: RegId) -> u32 { - (r.0 as u32) << 12 -} - -fn u32_from_rc(r: RegId) -> u32 { - (r.0 as u32) << 6 -} - -fn u32_from_rd(r: RegId) -> u32 { - r.0 as u32 -} - -fn u32_from_imm06(imm: Imm06) -> u32 { - imm.0 as u32 -} - -fn u32_from_imm12(imm: Imm12) -> u32 { - imm.0 as u32 -} - -fn u32_from_imm18(imm: Imm18) -> u32 { - imm.0 -} - -fn u32_from_imm24(imm: Imm24) -> u32 { - imm.0 -} - -fn u32_from_ra_rb(ra: RegId, rb: RegId) -> u32 { - u32_from_ra(ra) | u32_from_rb(rb) -} - -fn u32_from_ra_rb_rc(ra: RegId, rb: RegId, rc: RegId) -> u32 { - u32_from_ra_rb(ra, rb) | u32_from_rc(rc) -} - -fn u32_from_ra_rb_rc_rd(ra: RegId, rb: RegId, rc: RegId, rd: RegId) -> u32 { - u32_from_ra_rb_rc(ra, rb, rc) | u32_from_rd(rd) -} - -fn u32_from_ra_rb_rc_imm06(ra: RegId, rb: RegId, rc: RegId, imm: Imm06) -> u32 { - u32_from_ra_rb_rc(ra, rb, rc) | u32_from_imm06(imm) -} - -fn u32_from_ra_rb_imm12(ra: RegId, rb: RegId, imm: Imm12) -> u32 { - u32_from_ra_rb(ra, rb) | u32_from_imm12(imm) -} - -fn u32_from_ra_imm18(ra: RegId, imm: Imm18) -> u32 { - u32_from_ra(ra) | u32_from_imm18(imm) -} - -// Ignore the opcode byte, take the remaining instruction data. -fn u8x3_from_u8x4([_, a, b, c]: [u8; 4]) -> [u8; 3] { - [a, b, c] -} diff --git a/fuel-asm/src/unpack.rs b/fuel-asm/src/unpack.rs deleted file mode 100644 index 0bd19b5b9d..0000000000 --- a/fuel-asm/src/unpack.rs +++ /dev/null @@ -1,118 +0,0 @@ -//! Functions for unpacking instruction data from bytes or u32s. - -use crate::{ - Imm06, - Imm12, - Imm18, - Imm24, - RegId, -}; - -pub(super) fn rd_from_bytes(bs: [u8; 3]) -> RegId { - rd_from_u32(u32::from_be_bytes(u8x4_from_u8x3(bs))) -} - -pub(super) fn imm06_from_bytes(bs: [u8; 3]) -> Imm06 { - imm06_from_u32(u32::from_be_bytes(u8x4_from_u8x3(bs))) -} - -pub(super) fn imm12_from_bytes(bs: [u8; 3]) -> Imm12 { - imm12_from_u32(u32::from_be_bytes(u8x4_from_u8x3(bs))) -} - -pub(super) fn imm18_from_bytes(bs: [u8; 3]) -> Imm18 { - imm18_from_u32(u32::from_be_bytes(u8x4_from_u8x3(bs))) -} - -pub(super) fn imm24_from_bytes(bs: [u8; 3]) -> Imm24 { - imm24_from_u32(u32::from_be_bytes(u8x4_from_u8x3(bs))) -} - -pub(super) fn ra_rb_from_bytes(bs: [u8; 3]) -> (RegId, RegId) { - (ra_from_bytes(bs), rb_from_bytes(bs)) -} - -pub(super) fn ra_rb_rc_from_bytes(bs: [u8; 3]) -> (RegId, RegId, RegId) { - (ra_from_bytes(bs), rb_from_bytes(bs), rc_from_bytes(bs)) -} - -pub(super) fn ra_rb_rc_rd_from_bytes(bs: [u8; 3]) -> (RegId, RegId, RegId, RegId) { - ( - ra_from_bytes(bs), - rb_from_bytes(bs), - rc_from_bytes(bs), - rd_from_bytes(bs), - ) -} - -pub(super) fn ra_rb_rc_imm06_from_bytes(bs: [u8; 3]) -> (RegId, RegId, RegId, Imm06) { - ( - ra_from_bytes(bs), - rb_from_bytes(bs), - rc_from_bytes(bs), - imm06_from_bytes(bs), - ) -} - -pub(super) fn ra_rb_imm12_from_bytes(bs: [u8; 3]) -> (RegId, RegId, Imm12) { - (ra_from_bytes(bs), rb_from_bytes(bs), imm12_from_bytes(bs)) -} - -pub(super) fn ra_imm18_from_bytes(bs: [u8; 3]) -> (RegId, Imm18) { - (ra_from_bytes(bs), imm18_from_bytes(bs)) -} - -#[allow(clippy::cast_possible_truncation)] -fn ra_from_u32(u: u32) -> RegId { - RegId::new((u >> 18) as u8) -} - -#[allow(clippy::cast_possible_truncation)] -fn rb_from_u32(u: u32) -> RegId { - RegId::new((u >> 12) as u8) -} - -#[allow(clippy::cast_possible_truncation)] -fn rc_from_u32(u: u32) -> RegId { - RegId::new((u >> 6) as u8) -} - -#[allow(clippy::cast_possible_truncation)] -fn rd_from_u32(u: u32) -> RegId { - RegId::new(u as u8) -} - -#[allow(clippy::cast_possible_truncation)] -fn imm06_from_u32(u: u32) -> Imm06 { - Imm06::new(u as u8) -} - -#[allow(clippy::cast_possible_truncation)] -fn imm12_from_u32(u: u32) -> Imm12 { - Imm12::new(u as u16) -} - -fn imm18_from_u32(u: u32) -> Imm18 { - Imm18::new(u) -} - -fn imm24_from_u32(u: u32) -> Imm24 { - Imm24::new(u) -} - -pub(super) fn ra_from_bytes(bs: [u8; 3]) -> RegId { - ra_from_u32(u32::from_be_bytes(u8x4_from_u8x3(bs))) -} - -pub(super) fn rb_from_bytes(bs: [u8; 3]) -> RegId { - rb_from_u32(u32::from_be_bytes(u8x4_from_u8x3(bs))) -} - -pub(super) fn rc_from_bytes(bs: [u8; 3]) -> RegId { - rc_from_u32(u32::from_be_bytes(u8x4_from_u8x3(bs))) -} - -// Produce the big-endian bytes for an instruction's data, with a zeroed opcode byte. -fn u8x4_from_u8x3([a, b, c]: [u8; 3]) -> [u8; 4] { - [0, a, b, c] -} diff --git a/fuel-derive/Cargo.toml b/fuel-derive/Cargo.toml index 6bddd0d857..77d163ae74 100644 --- a/fuel-derive/Cargo.toml +++ b/fuel-derive/Cargo.toml @@ -18,3 +18,4 @@ quote = "1" syn = { version = "2", features = ["full"] } proc-macro2 = "1" synstructure = "0.13" +itertools = "0.13" diff --git a/fuel-derive/src/asm.rs b/fuel-derive/src/asm.rs new file mode 100644 index 0000000000..1bc4fc7777 --- /dev/null +++ b/fuel-derive/src/asm.rs @@ -0,0 +1,794 @@ +//! fuel-asm types from macros + +use proc_macro2::{ + Ident, + Span, + TokenStream, +}; +use quote::quote; +use syn::parse::Parse; + +const IMM_TYPES: &[&str] = &["Imm06", "Imm12", "Imm18", "Imm24"]; + +enum ArgType { + Reg, + Imm(usize), +} +impl ArgType { + fn size_bits(&self) -> usize { + match self { + ArgType::Reg => 6, + ArgType::Imm(bits) => *bits, + } + } + + fn smallest_containing_integer_type(&self) -> syn::Ident { + match self { + Self::Reg => syn::Ident::new("u8", Span::call_site()), + Self::Imm(6) => syn::Ident::new("u8", Span::call_site()), + Self::Imm(12) => syn::Ident::new("u16", Span::call_site()), + Self::Imm(18) => syn::Ident::new("u32", Span::call_site()), + Self::Imm(24) => syn::Ident::new("u32", Span::call_site()), + _ => panic!("Invalid immediate size"), + } + } +} + +struct InstructionArgument { + name: syn::Ident, + type_: syn::Ident, +} +impl Parse for InstructionArgument { + fn parse(input: syn::parse::ParseStream) -> syn::Result { + let name: syn::Ident = input.parse()?; + let _: syn::Token![:] = input.parse()?; + let type_: syn::Ident = input.parse()?; + + let tn = type_.to_string(); + if !(tn == "RegId" || IMM_TYPES.contains(&tn.as_str())) { + return Err(syn::Error::new_spanned( + &type_, + format!("Invalid argument type: {}", tn), + )); + } + + Ok(Self { name, type_ }) + } +} +impl InstructionArgument { + fn is_imm(&self) -> bool { + self.type_.to_string().starts_with("Imm") + } + + fn typeinfo(&self) -> ArgType { + if self.is_imm() { + let imm_size = self + .type_ + .to_string() + .trim_start_matches("Imm") + .parse() + .unwrap(); + ArgType::Imm(imm_size) + } else { + ArgType::Reg + } + } +} + +struct Instruction { + description: syn::LitStr, + opcode_number: syn::LitInt, + opcode_name: syn::Ident, + opcode_fn_name: syn::Ident, + args: Vec, +} +impl Parse for Instruction { + fn parse(input: syn::parse::ParseStream) -> syn::Result { + let description: syn::LitStr = input.parse()?; + let opcode_number: syn::LitInt = input.parse()?; + let opcode_name: syn::Ident = input.parse()?; + let opcode_fn_name: syn::Ident = input.parse()?; + let mut args = Vec::new(); + + let content; + let _bracket_token = syn::bracketed!(content in input); + + while !content.is_empty() { + let item: InstructionArgument = content.parse()?; + args.push(item); + } + + // Check argument format + if args.len() > 4 { + return Err(syn::Error::new_spanned( + &opcode_name, + format!("Too many arguments: {}", args.len()), + )); + } + + for arg in args.iter().rev().skip(1) { + if arg.is_imm() { + return Err(syn::Error::new_spanned( + &arg.type_, + "Immediate argument only allowed as last argument", + )); + } + } + + if args.iter().map(|a| a.typeinfo().size_bits()).sum::() > 24 { + return Err(syn::Error::new_spanned( + &opcode_name, + "Arguments exceed 24 bits", + )); + } + + Ok(Self { + description, + opcode_number, + opcode_name, + opcode_fn_name, + args, + }) + } +} +impl Instruction { + fn has_imm(&self) -> bool { + self.args.last().map(|arg| arg.is_imm()).unwrap_or(false) + } + + #[allow(clippy::arithmetic_side_effects)] // Checked in opcode construction + fn reserved_bits(&self) -> usize { + if self.has_imm() { + 0 + } else { + 24 - self.args.len() * 6 + } + } +} + +struct InstructionList(Vec); +impl Parse for InstructionList { + fn parse(input: syn::parse::ParseStream) -> syn::Result { + let mut instructions = Vec::new(); + while !input.is_empty() { + let item: Instruction = input.parse()?; + instructions.push(item); + } + Ok(Self(instructions)) + } +} + +/// Constructor functions and theirs shorthands +fn make_constructors(instructions: &InstructionList) -> TokenStream { + instructions + .0 + .iter() + .map( + |Instruction { + description, + opcode_name, + opcode_fn_name, + args, + .. + }| { + let strict_arguments: TokenStream = args + .iter() + .map(|arg| { + let name = &arg.name; + let type_ = &arg.type_; + quote! { #name: #type_, } + }) + .collect(); + + let pack_strict_arguments: TokenStream = args + .iter() + .enumerate() + .map(|(i, arg)| { + let name = &arg.name; + if arg.is_imm() { + quote! { + packed_integer |= (#name.to_smallest_int() as u32); + } + } else { + quote! { + packed_integer |= (#name.to_u8() as u32) << (6 * (3 - #i)); + } + } + }) + .collect(); + + let pack_test_arguments: TokenStream = args + .iter() + .enumerate() + .map(|(i, arg)| { + let reg_name = Ident::new(&format!("reg{i}"), Span::call_site()); + match arg.typeinfo() { + ArgType::Imm(bits) =>{ + let bits: u32 = bits.try_into().expect("Type size is checked"); + quote! { + packed_integer |= imm & ((#bits << 1u32) -1); + } + }, + ArgType::Reg => quote! { + packed_integer |= (#reg_name.to_u8() as u32) << (6 * (3 - #i)); + } + } + }) + .collect(); + + let flexible_arguments: TokenStream = args + .iter() + .map(|arg| { + let name = &arg.name; + let type_ = &arg.type_; + if arg.is_imm() { + let int_type = arg.typeinfo().smallest_containing_integer_type(); + quote! { #name: #int_type, } + } else { + let check_trait = Ident::new( + &format!("Check{type_}"), + Span::call_site(), + ); + quote! { #name: impl crate::#check_trait, } + } + }) + .collect(); + + let check_flexible_arguments: TokenStream = args + .iter() + .map(|arg| if arg.is_imm() { + let name = &arg.name; + let type_ = &arg.type_; + quote! { #type_::new_checked(#name).expect("Immediate value overflows"), } + } else { + let name = &arg.name; + quote! { #name.check(), } + }) + .collect(); + + let pass_arguments: TokenStream = args + .iter() + .map(|InstructionArgument { name, .. }| quote! { #name, }) + .collect(); + + quote! { + #[doc = #description] + pub fn #opcode_fn_name(#flexible_arguments) -> Instruction { + #opcode_name::new(#check_flexible_arguments).into() + } + + impl #opcode_name { + #[doc = "Construct the instruction from its parts."] + pub fn new(#strict_arguments) -> Self { + let mut packed_integer: u32 = 0; + #pack_strict_arguments + let packed = packed_integer.to_be_bytes(); + Self([packed[1], packed[2], packed[3]]) + } + + #[doc = "Construct the instruction from all possible raw fields, ignoring inapplicable ones."] + pub fn test_construct( + reg0: RegId, + reg1: RegId, + reg2: RegId, + reg3: RegId, + imm: u32, + ) -> Self { + let mut packed_integer: u32 = 0; + #pack_test_arguments + let packed = packed_integer.to_be_bytes(); + Self([packed[1], packed[2], packed[3]]) + } + } + + + #[cfg(feature = "typescript")] + #[wasm_bindgen::prelude::wasm_bindgen] + impl #opcode_name { + #[wasm_bindgen(constructor)] + #[doc = "Construct the instruction from its parts."] + pub fn new_typescript(#strict_arguments) -> Self { + Self::new(#pass_arguments) + } + } + } + }, + ) + .collect() +} + +fn make_op_unpacks(instructions: &InstructionList) -> TokenStream { + instructions + .0 + .iter() + .map( + |instr| { + let Instruction { + opcode_name, args, .. + } = instr; + let arg_types: Vec<_> = args + .iter() + .map(|InstructionArgument { type_, .. }| type_) + .collect(); + let convert_reg_args: Vec<_> = args + .iter() + .enumerate() + .filter_map( + |(i, arg)| { + let type_ = &arg.type_; + if arg.is_imm() { + None + } else { + Some(quote! { + #type_::new((integer >> (6 * (3 - #i))) as u8) + }) + } + }, + ) + .collect(); + let reserved_bits = instr.reserved_bits(); + + let mut ret_args = convert_reg_args; + if let Some(convert_imm_arg) = args.last().and_then(|arg| { + let type_: &Ident = &arg.type_; + if arg.is_imm() { + Some(quote! { #type_::new(integer as _) }) + } else {None}} + ) { + ret_args.push(convert_imm_arg); + } + + + // Return value for unpack. If there is only one argument, doesn't wrap it in a tuple. + let retval = if ret_args.len() == 1 { + let ra = &ret_args[0]; + quote! { #ra } + } else { + let ra: TokenStream = itertools::Itertools::intersperse( + ret_args.iter().cloned(), + quote!{,} + ) + .collect(); + quote! { ( #ra ) } + }; + let arg_types = if arg_types.len() == 1 { + let at = arg_types[0]; + quote! { #at } + } else { + let ats: TokenStream = arg_types.iter().map(|at| quote! {#at,} ).collect(); + quote! { (#ats) } + }; + + // Like above but always tuple-wraps + let raw_regs = { + let ra: TokenStream = + ret_args.iter().map(|a| quote! {#a,}) + .collect(); + quote! { ( #ra ) } + }; + + let reg_ids: TokenStream = (0..4).map(|i| { + if let Some(arg) = args.get(i) { + let tuple_index = proc_macro2::Literal::usize_unsuffixed(i); + if !arg.is_imm() { + return quote! { Some(fields.#tuple_index), }; + } + } + quote![ None, ] + }).collect(); + + quote! { + impl #opcode_name { + #[doc = "Convert the instruction into its parts, without checking for correctness."] + pub fn unpack(self) -> #arg_types { + let integer = u32::from_be_bytes([0, self.0[0], self.0[1], self.0[2]]); + #retval + } + + #[doc = "Verify that the unused bits after the instruction are zero."] + pub(crate) fn reserved_part_is_zero(self) -> bool { + let integer = u32::from_be_bytes([0, self.0[0], self.0[1], self.0[2]]); + let mask = (1u32 << #reserved_bits) - 1; + (integer & mask) == 0 + } + + pub(crate) fn reg_ids(self) -> [Option; 4] { + let integer = u32::from_be_bytes([0, self.0[0], self.0[1], self.0[2]]); + let fields = #raw_regs; + [ #reg_ids ] + } + } + } + }, + ) + .collect() +} + +/// Make a struct for each opcode +fn make_op_structs(instructions: &InstructionList) -> TokenStream { + instructions + .0 + .iter() + .map( + |Instruction { + description, + opcode_name, + .. + }| { + quote! { + #[doc = #description] + #[derive(Clone, Copy, Eq, Hash, PartialEq)] + #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] + #[cfg_attr(feature = "typescript", wasm_bindgen::prelude::wasm_bindgen)] + pub struct #opcode_name(pub (super) [u8; 3]); + + impl #opcode_name { + /// The opcode number for this instruction. + pub const OPCODE: Opcode = Opcode::#opcode_name; + } + } + }) + .collect() +} + +fn make_op_debug_impl(instructions: &InstructionList) -> TokenStream { + instructions + .0 + .iter() + .map( + |Instruction { + opcode_name, + args, + .. + }| { + let values: TokenStream = itertools::Itertools::intersperse(args.iter().map(|arg| { + let name = &arg.name; + quote! { + #name + } + }), quote!{,}).collect(); + let fields: TokenStream = args.iter().map(|arg| { + let name = &arg.name; + if arg.is_imm() { + quote! { + .field(stringify!(#name), &format_args!("{}", #name.to_smallest_int())) + } + } else { + quote! { + .field(stringify!(#name), &format_args!("{:#02x}", u8::from(#name))) + } + } + }).collect(); + + let unpack_if_needed = if args.is_empty() { + quote! {} + } else { + quote! { + let (#values) = self.unpack(); + } + }; + + quote! { + impl core::fmt::Debug for #opcode_name { + #[warn(clippy::unused_unit)] // Simplify code + fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { + #unpack_if_needed + f.debug_struct(stringify!(#opcode_name)) + #fields + .finish() + } + } + } + }, + ) + .collect() +} + +fn make_opcode_enum(instructions: &InstructionList) -> TokenStream { + let variants: TokenStream = instructions + .0 + .iter() + .map( + |Instruction { + description, + opcode_name, + opcode_number, + .. + }| { + quote! { + #[doc = #description] + #opcode_name = #opcode_number, + } + }, + ) + .collect(); + let variants_test_construct: TokenStream = instructions + .0 + .iter() + .map( + |Instruction { + description, + opcode_name, + .. + }| { + quote! { + #[doc = #description] + Self::#opcode_name => Instruction::#opcode_name( + crate::_op::#opcode_name::test_construct(ra, rb, rc, rd, imm) + ), + } + }, + ) + .collect(); + quote! { + #[doc = "The opcode numbers for each instruction."] + #[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)] + #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] + #[cfg_attr(feature = "typescript", wasm_bindgen::prelude::wasm_bindgen)] + pub enum Opcode { + #variants + } + + impl Opcode { + /// Construct the instruction from all possible raw fields, ignoring inapplicable ones. + #[cfg(test)] + pub fn test_construct(self, ra: RegId, rb: RegId, rc: RegId, rd: RegId, imm: u32) -> Instruction { + match self { + #variants_test_construct + } + } + } + } +} + +fn make_opcode_try_from(instructions: &InstructionList) -> TokenStream { + let arms: TokenStream = instructions + .0 + .iter() + .map( + |Instruction { + opcode_number, + opcode_name, + .. + }| { + quote! { + #opcode_number => Ok(Opcode::#opcode_name), + } + }, + ) + .collect(); + quote! { + impl std::convert::TryFrom for Opcode { + type Error = InvalidOpcode; + + fn try_from(value: u8) -> Result { + match value { + #arms + _ => Err(InvalidOpcode), + } + } + } + } +} + +fn make_from_op(instructions: &InstructionList) -> TokenStream { + instructions + .0 + .iter() + .map(|Instruction { opcode_name, .. }| { + quote! { + impl From<#opcode_name> for [u8; 3] { + fn from(#opcode_name(arr): #opcode_name) -> Self { + arr + } + } + + impl From<#opcode_name> for [u8; 4] { + fn from(#opcode_name([a, b, c]): #opcode_name) -> Self { + [#opcode_name::OPCODE as u8, a, b, c] + } + } + + impl From<#opcode_name> for u32 { + fn from(op: #opcode_name) -> Self { + u32::from_be_bytes(op.into()) + } + } + + impl From<#opcode_name> for Instruction { + fn from(op: #opcode_name) -> Self { + Instruction::#opcode_name(op) + } + } + + #[cfg(feature = "typescript")] + impl From<#opcode_name> for typescript::Instruction { + fn from(opcode: #opcode_name) -> Self { + typescript::Instruction::new(opcode.into()) + } + } + } + }) + .collect() +} + +fn make_instruction_enum(instructions: &InstructionList) -> TokenStream { + let variants: TokenStream = instructions + .0 + .iter() + .map( + |Instruction { + description, + opcode_name, + .. + }| { + quote! { + #[doc = #description] + #opcode_name(_op::#opcode_name), + } + }, + ) + .collect(); + let variant_opcodes: TokenStream = instructions + .0 + .iter() + .map(|Instruction { opcode_name, .. }| { + quote! { + Self::#opcode_name(_) => Opcode::#opcode_name, + } + }) + .collect(); + let variant_reg_ids: TokenStream = instructions + .0 + .iter() + .map(|Instruction { opcode_name, .. }| { + quote! { + Self::#opcode_name(op) => op.reg_ids(), + } + }) + .collect(); + + let variant_debug: TokenStream = instructions + .0 + .iter() + .map(|Instruction { opcode_name, .. }| { + quote! { + Self::#opcode_name(op) => op.fmt(f), + } + }) + .collect(); + + quote! { + #[doc = r" + Representation of a single instruction for the interpreter. + + The opcode is represented in the tag (variant), or may be retrieved in the form of an + `Opcode` byte using the `opcode` method. + + The register and immediate data associated with the instruction is represented within + an inner unit type wrapper around the 3 remaining bytes. + "] + #[derive(Clone, Copy, Eq, Hash, PartialEq)] + #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] + pub enum Instruction { + #variants + } + + impl Instruction { + #[doc = "This instruction's opcode."] + pub fn opcode(&self) -> Opcode { + match self { + #variant_opcodes + } + } + + #[doc = "Unpacks all register IDs into a slice of options."] + pub fn reg_ids(&self) -> [Option; 4] { + match self { + #variant_reg_ids + } + } + } + + impl core::fmt::Debug for Instruction { + fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { + match self { + #variant_debug + } + } + } + } +} + +fn make_instruction_try_from_bytes(instructions: &InstructionList) -> TokenStream { + let arms: TokenStream = instructions + .0 + .iter() + .map(|Instruction { opcode_name, .. }| { + quote! { + Opcode::#opcode_name => Ok(Self::#opcode_name({ + let op = op::#opcode_name([a, b, c]); + if !op.reserved_part_is_zero() { + return Err(InvalidOpcode); + } + op + })), + } + }) + .collect(); + quote! { + impl std::convert::TryFrom<[u8; 4]> for Instruction { + type Error = InvalidOpcode; + + fn try_from([op, a, b, c]: [u8; 4]) -> Result { + match Opcode::try_from(op)? { + #arms + _ => Err(InvalidOpcode), + } + } + } + } +} + +fn make_bytes_from_instruction(instructions: &InstructionList) -> TokenStream { + let arms: TokenStream = instructions + .0 + .iter() + .map(|Instruction { opcode_name, .. }| { + quote! { + Instruction::#opcode_name(op) => op.into(), + } + }) + .collect(); + quote! { + impl std::convert::From for [u8; 4] { + fn from(instruction: Instruction) -> [u8; 4] { + match instruction { + #arms + } + } + } + } +} + +/// TODO: docs +pub fn impl_instructions(input: proc_macro::TokenStream) -> proc_macro::TokenStream { + let instructions: InstructionList = syn::parse_macro_input!(input as InstructionList); + + let op_structs = make_op_structs(&instructions); + let op_debug_impl = make_op_debug_impl(&instructions); + let from_op = make_from_op(&instructions); + let constructors = make_constructors(&instructions); + let op_unpacks = make_op_unpacks(&instructions); + let opcode_enum = make_opcode_enum(&instructions); + let opcode_try_from = make_opcode_try_from(&instructions); + let instruction_enum = make_instruction_enum(&instructions); + let instruction_try_from_bytes = make_instruction_try_from_bytes(&instructions); + let bytes_from_instruction = make_bytes_from_instruction(&instructions); + (quote! { + /// Opcode-specific definitions and implementations. + #[allow(clippy::unused_unit)] // Simplify codegen + pub mod _op { + use super::*; + #op_structs + #op_debug_impl + #from_op + #constructors + #op_unpacks + } + #opcode_enum + #opcode_try_from + #instruction_enum + #instruction_try_from_bytes + #bytes_from_instruction + + #[cfg(feature = "typescript")] + impl From for typescript::Instruction { + fn from(inst: Instruction) -> Self { + typescript::Instruction::new(inst) + } + } + + }) + .into() +} diff --git a/fuel-derive/src/lib.rs b/fuel-derive/src/lib.rs index 758a7d4a07..a462dcb28e 100644 --- a/fuel-derive/src/lib.rs +++ b/fuel-derive/src/lib.rs @@ -10,6 +10,7 @@ )] extern crate proc_macro; +mod asm; mod attribute; mod deserialize; mod serialize; @@ -28,3 +29,9 @@ synstructure::decl_derive!( /// Derives `Serialize` trait for the given `struct` or `enum`. serialize_derive ); + +/// TODO: docs +#[proc_macro] +pub fn impl_instructions(input: proc_macro::TokenStream) -> proc_macro::TokenStream { + asm::impl_instructions(input) +} From bde9ee2847f53ae4d78e01a2cca5072693e7f855 Mon Sep 17 00:00:00 2001 From: Hannes Karppila Date: Thu, 22 Aug 2024 09:29:37 +0200 Subject: [PATCH 02/15] Add changelog entry --- CHANGELOG.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 9b75a5a2d7..ca78d86cbe 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,9 @@ and this project adheres to [Semantic Versioning](http://semver.org/). ## [Unreleased] +### Changed +- [#804](https://github.com/FuelLabs/fuel-vm/pull/804): Refactor `impl_instructions!` macro. No externally visible changes. + ## [Version 0.56.0] ### Added From 26d3d67986a5a4aa3828490df6a6853e61602d32 Mon Sep 17 00:00:00 2001 From: Hannes Karppila Date: Thu, 22 Aug 2024 09:29:58 +0200 Subject: [PATCH 03/15] cargo sort --- fuel-asm/Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fuel-asm/Cargo.toml b/fuel-asm/Cargo.toml index 1568e1ef4c..28e4f43262 100644 --- a/fuel-asm/Cargo.toml +++ b/fuel-asm/Cargo.toml @@ -13,8 +13,8 @@ description = "Atomic types of the FuelVM." [dependencies] arbitrary = { version = "1.1", features = ["derive"], optional = true } bitflags = { workspace = true } -fuel-types = { workspace = true } fuel-derive = { workspace = true } +fuel-types = { workspace = true } serde = { version = "1.0", default-features = false, features = ["derive"], optional = true } strum = { version = "0.24", default-features = false, features = ["derive"] } wasm-bindgen = { version = "0.2.88", optional = true } From e997a5ee4a6c4e1d7bac3564c8bb607877801235 Mon Sep 17 00:00:00 2001 From: Hannes Karppila Date: Thu, 22 Aug 2024 09:37:42 +0200 Subject: [PATCH 04/15] no_std fix for fuel-asm --- fuel-derive/src/asm.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fuel-derive/src/asm.rs b/fuel-derive/src/asm.rs index 1bc4fc7777..e7ed51729b 100644 --- a/fuel-derive/src/asm.rs +++ b/fuel-derive/src/asm.rs @@ -558,7 +558,7 @@ fn make_opcode_try_from(instructions: &InstructionList) -> TokenStream { ) .collect(); quote! { - impl std::convert::TryFrom for Opcode { + impl core::convert::TryFrom for Opcode { type Error = InvalidOpcode; fn try_from(value: u8) -> Result { @@ -717,7 +717,7 @@ fn make_instruction_try_from_bytes(instructions: &InstructionList) -> TokenStrea }) .collect(); quote! { - impl std::convert::TryFrom<[u8; 4]> for Instruction { + impl core::convert::TryFrom<[u8; 4]> for Instruction { type Error = InvalidOpcode; fn try_from([op, a, b, c]: [u8; 4]) -> Result { @@ -741,7 +741,7 @@ fn make_bytes_from_instruction(instructions: &InstructionList) -> TokenStream { }) .collect(); quote! { - impl std::convert::From for [u8; 4] { + impl core::convert::From for [u8; 4] { fn from(instruction: Instruction) -> [u8; 4] { match instruction { #arms From 079c666e1d5406a8674f40e503869b1776ecfbcd Mon Sep 17 00:00:00 2001 From: Hannes Karppila Date: Thu, 22 Aug 2024 09:42:59 +0200 Subject: [PATCH 05/15] Resolve doc todos --- fuel-derive/src/asm.rs | 2 +- fuel-derive/src/lib.rs | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/fuel-derive/src/asm.rs b/fuel-derive/src/asm.rs index e7ed51729b..2e6f94e667 100644 --- a/fuel-derive/src/asm.rs +++ b/fuel-derive/src/asm.rs @@ -751,7 +751,7 @@ fn make_bytes_from_instruction(instructions: &InstructionList) -> TokenStream { } } -/// TODO: docs +/// Generates implementations for the FuelVM instruction types. pub fn impl_instructions(input: proc_macro::TokenStream) -> proc_macro::TokenStream { let instructions: InstructionList = syn::parse_macro_input!(input as InstructionList); diff --git a/fuel-derive/src/lib.rs b/fuel-derive/src/lib.rs index a462dcb28e..fdf10e3911 100644 --- a/fuel-derive/src/lib.rs +++ b/fuel-derive/src/lib.rs @@ -30,7 +30,7 @@ synstructure::decl_derive!( serialize_derive ); -/// TODO: docs +/// Generates implementations for the FuelVM instruction types. #[proc_macro] pub fn impl_instructions(input: proc_macro::TokenStream) -> proc_macro::TokenStream { asm::impl_instructions(input) From 892b688dc0564922e7844e5030b90a136dc369ea Mon Sep 17 00:00:00 2001 From: Hannes Karppila Date: Thu, 22 Aug 2024 15:48:51 +0200 Subject: [PATCH 06/15] Fix some attributes in proc-macro --- fuel-derive/src/asm.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fuel-derive/src/asm.rs b/fuel-derive/src/asm.rs index 2e6f94e667..8bfb001744 100644 --- a/fuel-derive/src/asm.rs +++ b/fuel-derive/src/asm.rs @@ -267,6 +267,7 @@ fn make_constructors(instructions: &InstructionList) -> TokenStream { } #[doc = "Construct the instruction from all possible raw fields, ignoring inapplicable ones."] + #[cfg(test)] pub fn test_construct( reg0: RegId, reg1: RegId, @@ -530,7 +531,7 @@ fn make_opcode_enum(instructions: &InstructionList) -> TokenStream { } impl Opcode { - /// Construct the instruction from all possible raw fields, ignoring inapplicable ones. + #[doc = "Construct the instruction from all possible raw fields, ignoring inapplicable ones."] #[cfg(test)] pub fn test_construct(self, ra: RegId, rb: RegId, rc: RegId, rd: RegId, imm: u32) -> Instruction { match self { From 5090fa819b9a3312290ce5c70b33449e160b590e Mon Sep 17 00:00:00 2001 From: Hannes Karppila Date: Thu, 22 Aug 2024 15:56:01 +0200 Subject: [PATCH 07/15] Re-add the doc explaining what the impl_instructions! macro does --- fuel-derive/src/asm.rs | 194 ++++++++++++++++++++++++++++++++++++++++- 1 file changed, 193 insertions(+), 1 deletion(-) diff --git a/fuel-derive/src/asm.rs b/fuel-derive/src/asm.rs index 8bfb001744..973f0fd49d 100644 --- a/fuel-derive/src/asm.rs +++ b/fuel-derive/src/asm.rs @@ -1,4 +1,196 @@ -//! fuel-asm types from macros +//! # The `impl_instructions!` macro +//! +//! The heart of this crate's implementation is the private `impl_instructions!` macro. +//! This macro is used to generate the `Instruction` and `Opcode` types along with their +//! implementations. +//! +//! The intention is to allow for having a single source of truth from which each of the +//! instruction-related types and implementations are derived. +//! +//! Its usage looks like this: +//! +//! ```rust,ignore +//! impl_instructions! { +//! "Adds two registers." +//! 0x10 ADD add [RegId RegId RegId] +//! "Bitwise ANDs two registers." +//! 0x11 AND and [RegId RegId RegId] +//! // ... +//! } +//! ``` +//! +//! Each instruction's row includes: +//! +//! - A short docstring. +//! - The Opcode byte value. +//! - An uppercase identifier (for generating variants and types). +//! - A lowercase identifier (for generating the shorthand instruction constructor). +//! - The instruction layout (for the `new` and `unpack` functions). +//! +//! The following sections describe each of the items that are derived from the +//! `impl_instructions!` table in more detail. +//! +//! ## The `Opcode` enum +//! +//! Represents the bytecode portion of an instruction. +//! +//! ```rust,ignore +//! /// Solely the opcode portion of an instruction represented as a single byte. +//! #[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)] +//! #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] +//! #[repr(u8)] +//! pub enum Opcode { +//! /// Adds two registers. +//! ADD = 0x10, +//! /// Bitwise ANDs two registers. +//! AND = 0x11, +//! // ... +//! } +//! ``` +//! +//! A `TryFrom` implementation is also provided, producing an `Err(InvalidOpcode)` in +//! the case that the byte represents a reserved or undefined value. +//! +//! ```rust +//! # use fuel_asm::{InvalidOpcode, Opcode}; +//! assert_eq!(Opcode::try_from(0x10), Ok(Opcode::ADD)); +//! assert_eq!(Opcode::try_from(0x11), Ok(Opcode::AND)); +//! assert_eq!(Opcode::try_from(0), Err(InvalidOpcode)); +//! ``` +//! +//! ## The `Instruction` enum +//! +//! Represents a single, full instruction, discriminated by its `Opcode`. +//! +//! ```rust,ignore +//! /// Representation of a single instruction for the interpreter. +//! /// +//! /// The opcode is represented in the tag (variant), or may be retrieved in the form of an +//! /// `Opcode` byte using the `opcode` method. +//! /// +//! /// The register and immediate data associated with the instruction is represented within +//! /// an inner unit type wrapper around the 3 remaining bytes. +//! #[derive(Clone, Copy, Eq, Hash, PartialEq)] +//! #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] +//! pub enum Instruction { +//! /// Adds two registers. +//! ADD(op::ADD), +//! /// Bitwise ANDs two registers. +//! AND(op::AND), +//! // ... +//! } +//! ``` +//! +//! The `From for u32` (aka `RawInstruction`) and `TryFrom for +//! Instruction` implementations can be found in the crate root. +//! +//! ## A unique unit type per operation +//! +//! In order to reduce the likelihood of misusing unrelated register IDs or immediate +//! values, we generate a unique unit type for each type of operation (i.e instruction +//! variant) and guard access to the relevant register IDs and immediate values behind +//! each type's unique methods. +//! +//! These unique operation types are generated as follows within a dedicated `op` module: +//! +//! ```rust,ignore +//! pub mod op { +//! //! Definitions and implementations for each unique instruction type, one for each +//! //! unique `Opcode` variant. +//! +//! // A unique type for each operation. +//! +//! /// Adds two registers. +//! pub struct ADD([u8; 3]); +//! +//! /// Bitwise ANDs two registers. +//! pub struct AND([u8; 3]); +//! +//! // ... +//! +//! // An implementation for each unique type. +//! +//! impl ADD { +//! pub const OPCODE: Opcode = Opcode::ADD; +//! +//! /// Construct the instruction from its parts. +//! pub fn new(ra: RegId, rb: RegId, rc: RegId) -> Self { +//! Self(pack::bytes_from_ra_rb_rc(ra, rb, rc)) +//! } +//! +//! /// Convert the instruction into its parts. +//! pub fn unpack(self) -> (RegId, RegId, RegId) { +//! unpack::ra_rb_rc_from_bytes(self.0) +//! } +//! } +//! +//! impl AND { +//! // ... +//! } +//! +//! // ... +//! +//! // A short-hand `Instruction` constructor for each operation to make it easier to +//! // hand-write assembly for tests and benchmarking. As these constructors are public and +//! // accept literal values, we check that the values are within range. +//! +//! /// Adds two registers. +//! pub fn add(ra: u8, rb: u8, rc: u8) -> Instruction { +//! ADD::new(check_reg_id(ra), check_reg_id(rb), check_reg_id(rc)).into() +//! } +//! +//! /// Bitwise ANDs two registers. +//! pub fn and(ra: u8, rb: u8, rc: u8) -> Instruction { +//! AND::new(check_reg_id(ra), check_reg_id(rb), check_reg_id(rc)).into() +//! } +//! +//! // ... +//! }; +//! ``` +//! +//! ### Instruction Layout +//! +//! The function signatures of the `new` and `unpack` functions are derived from the +//! instruction's data layout described in the `impl_instructions!` table. +//! +//! For example, the `unpack` method for `ADD` looks like this: +//! +//! ```rust,ignore +//! // 0x10 ADD add [RegId RegId RegId] +//! pub fn unpack(self) -> (RegId, RegId, RegId) +//! ``` +//! +//! While the `unpack` method for `ADDI` looks like this: +//! +//! ```rust,ignore +//! // 0x50 ADDI addi [RegId RegId Imm12] +//! pub fn unpack(self) -> (RegId, RegId, Imm12) +//! ``` +//! +//! ### Shorthand Constructors +//! +//! The shorthand instruction constructors (e.g. `add`, `and`, etc) are specifically +//! designed to make it easier to handwrite assembly for tests or benchmarking. Unlike the +//! `$OP::new` constructors which require typed register ID or immediate inputs, the +//! instruction constructors allow for constructing `Instruction`s from convenient literal +//! value inputs. E.g. +//! +//! ```rust +//! use fuel_asm::{op, Instruction}; +//! +//! // A sample program to perform ecrecover +//! let program: Vec = vec![ +//! op::move_(0x10, 0x01), // set r[0x10] := $one +//! op::slli(0x20, 0x10, 5), // set r[0x20] := `r[0x10] << 5 == 32` +//! op::slli(0x21, 0x10, 6), // set r[0x21] := `r[0x10] << 6 == 64` +//! op::aloc(0x21), // alloc `r[0x21] == 64` to the heap +//! op::addi(0x10, 0x07, 1), // set r[0x10] := `$hp + 1` (allocated heap) +//! op::move_(0x11, 0x04), // set r[0x11] := $ssp +//! op::add(0x12, 0x04, 0x20), // set r[0x12] := `$ssp + r[0x20]` +//! op::eck1(0x10, 0x11, 0x12),// recover public key in memory[r[0x10], 64] +//! op::ret(0x01), // return `1` +//! ]; +//! ``` use proc_macro2::{ Ident, From 3597b6080088c766672ba9f3cd102ea7a1308a15 Mon Sep 17 00:00:00 2001 From: Hannes Karppila Date: Thu, 22 Aug 2024 16:17:29 +0200 Subject: [PATCH 08/15] Typescript fixes --- fuel-derive/Cargo.toml | 3 +++ fuel-derive/src/asm.rs | 19 +++++++++++++++++++ 2 files changed, 22 insertions(+) diff --git a/fuel-derive/Cargo.toml b/fuel-derive/Cargo.toml index 77d163ae74..1d8fe30f05 100644 --- a/fuel-derive/Cargo.toml +++ b/fuel-derive/Cargo.toml @@ -19,3 +19,6 @@ syn = { version = "2", features = ["full"] } proc-macro2 = "1" synstructure = "0.13" itertools = "0.13" + +[dev-dependencies] +fuel-asm = { workspace = true } diff --git a/fuel-derive/src/asm.rs b/fuel-derive/src/asm.rs index 973f0fd49d..5016a8d8e8 100644 --- a/fuel-derive/src/asm.rs +++ b/fuel-derive/src/asm.rs @@ -443,12 +443,31 @@ fn make_constructors(instructions: &InstructionList) -> TokenStream { .map(|InstructionArgument { name, .. }| quote! { #name, }) .collect(); + let typescript_arguments: TokenStream = args + .iter() + .map(|arg| { + let name = &arg.name; + let inttype = arg.typeinfo().smallest_containing_integer_type(); + quote! { #name: #inttype, } + }) + .collect(); + quote! { #[doc = #description] pub fn #opcode_fn_name(#flexible_arguments) -> Instruction { #opcode_name::new(#check_flexible_arguments).into() } + #[cfg(feature = "typescript")] + const _: () = { + use super::*; + #[wasm_bindgen::prelude::wasm_bindgen] + #[doc = #description] + pub fn #opcode_fn_name(#typescript_arguments) -> typescript::Instruction { + crate::op::#opcode_fn_name(#pass_arguments).into() + } + }; + impl #opcode_name { #[doc = "Construct the instruction from its parts."] pub fn new(#strict_arguments) -> Self { From 2c4be2ac075e6de950bd0ae6fb08a93ec26aad4a Mon Sep 17 00:00:00 2001 From: Hannes Karppila Date: Fri, 23 Aug 2024 01:19:41 +0200 Subject: [PATCH 09/15] Apply PR review comments --- Cargo.toml | 2 + fuel-asm/Cargo.toml | 3 +- fuel-asm/derive/Cargo.toml | 24 + fuel-asm/derive/src/codegen.rs | 507 +++++++++++++++ fuel-asm/derive/src/input.rs | 299 +++++++++ fuel-asm/derive/src/lib.rs | 270 ++++++++ fuel-asm/derive/src/packing.rs | 7 + fuel-asm/derive/src/serialize.rs | 223 +++++++ fuel-asm/src/encoding_tests.rs | 101 ++- fuel-asm/src/lib.rs | 2 +- fuel-derive/src/asm.rs | 1006 ------------------------------ fuel-derive/src/lib.rs | 7 - 12 files changed, 1410 insertions(+), 1041 deletions(-) create mode 100644 fuel-asm/derive/Cargo.toml create mode 100644 fuel-asm/derive/src/codegen.rs create mode 100644 fuel-asm/derive/src/input.rs create mode 100644 fuel-asm/derive/src/lib.rs create mode 100644 fuel-asm/derive/src/packing.rs create mode 100644 fuel-asm/derive/src/serialize.rs delete mode 100644 fuel-derive/src/asm.rs diff --git a/Cargo.toml b/Cargo.toml index 0d41d91da7..fa1d8a7ff0 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,7 @@ [workspace] members = [ "fuel-asm", + "fuel-asm/derive", "fuel-crypto", "fuel-merkle", "fuel-storage", @@ -21,6 +22,7 @@ version = "0.56.0" [workspace.dependencies] fuel-asm = { version = "0.56.0", path = "fuel-asm", default-features = false } +fuel-asm-derive = { version = "0.56.0", path = "fuel-asm/derive", default-features = false } fuel-crypto = { version = "0.56.0", path = "fuel-crypto", default-features = false } fuel-derive = { version = "0.56.0", path = "fuel-derive", default-features = false } fuel-merkle = { version = "0.56.0", path = "fuel-merkle", default-features = false } diff --git a/fuel-asm/Cargo.toml b/fuel-asm/Cargo.toml index 28e4f43262..faafbb49a6 100644 --- a/fuel-asm/Cargo.toml +++ b/fuel-asm/Cargo.toml @@ -13,7 +13,7 @@ description = "Atomic types of the FuelVM." [dependencies] arbitrary = { version = "1.1", features = ["derive"], optional = true } bitflags = { workspace = true } -fuel-derive = { workspace = true } +fuel-asm-derive = { workspace = true } fuel-types = { workspace = true } serde = { version = "1.0", default-features = false, features = ["derive"], optional = true } strum = { version = "0.24", default-features = false, features = ["derive"] } @@ -23,6 +23,7 @@ wasm-bindgen = { version = "0.2.88", optional = true } bincode = { workspace = true } fuel-asm = { path = ".", features = ["serde"] } rstest = "0.16" +proptest = "1.5" [features] default = ["std"] diff --git a/fuel-asm/derive/Cargo.toml b/fuel-asm/derive/Cargo.toml new file mode 100644 index 0000000000..2f1379b292 --- /dev/null +++ b/fuel-asm/derive/Cargo.toml @@ -0,0 +1,24 @@ +[package] +name = "fuel-asm-derive" +version = { workspace = true } +authors = { workspace = true } +categories = { workspace = true } +edition = { workspace = true } +homepage = { workspace = true } +keywords = ["blockchain", "cryptocurrencies", "fuel-vm", "vm"] +license = { workspace = true } +repository = { workspace = true } +description = "FuelVM (de)serialization derive macros for `fuel-vm` data structures." + +[lib] +proc-macro = true + +[dependencies] +quote = "1" +syn = { version = "2", features = ["full"] } +proc-macro2 = "1" +synstructure = "0.13" +itertools = "0.13" + +[dev-dependencies] +fuel-asm = { workspace = true } diff --git a/fuel-asm/derive/src/codegen.rs b/fuel-asm/derive/src/codegen.rs new file mode 100644 index 0000000000..05b685a5fd --- /dev/null +++ b/fuel-asm/derive/src/codegen.rs @@ -0,0 +1,507 @@ +use proc_macro2::{ + Ident, + Span, + TokenStream, +}; +use quote::quote; + +use crate::{ + input::{ + Instruction, + InstructionList, + }, + packing, +}; + +/// Helper function to generate a comma-separated list of tokens. +fn comma_separated(items: impl Iterator) -> TokenStream { + itertools::Itertools::intersperse(items, quote! {,}).collect() +} + +/// Wraps the items in a tuple, unless there is exactly one item. +fn tuple_or_single(items: impl IntoIterator + Clone) -> TokenStream { + let items: Vec<_> = items.clone().into_iter().collect(); + if items.len() == 1 { + items.into_iter().next().unwrap() + } else { + quote! { (#(#items),*) } + } +} + +/// `op::name(...)` shorthand +pub fn op_constructor_shorthand(instructions: &InstructionList) -> TokenStream { + instructions.map_to_tokens( + |Instruction { + description, + opcode_name, + opcode_fn_name, + args, + .. + }| { + // Arguments that can be easily converted to form user input types, but might + // be incorrect so these need to be checked. + let arguments: TokenStream = args.map_to_tokens(|arg| { + let name = &arg.name; + if arg.is_imm() { + let int_type = arg.type_.smallest_containing_integer_type(); + quote! { #name: #int_type, } + } else { + let check_trait = Ident::new("CheckRegId", Span::call_site()); + quote! { #name: impl crate::#check_trait, } + } + }); + + let check_arguments: TokenStream = comma_separated(args.map(|arg| if arg.is_imm() { + let name = &arg.name; + let type_ = &arg.type_.token(); + quote! { #type_::new_checked(#name).expect("Immediate value overflows") } + } else { + let name = &arg.name; + quote! { #name.check() } + })); + + quote! { + #[doc = #description] + pub fn #opcode_fn_name(#arguments) -> Instruction { + #opcode_name::new(#check_arguments).into() + } + } + }, + ) +} + +pub fn op_fn_new(instructions: &InstructionList) -> TokenStream { + instructions.map_to_tokens( + |Instruction { + opcode_name, args, .. + }| { + let arguments: TokenStream = comma_separated(args.singature_pairs()); + + let pack_strict_arguments: TokenStream = args + .iter() + .enumerate() + .map(|(i, arg)| { + let name = &arg.name; + if arg.is_imm() { + quote! { + packed_integer |= (#name.to_smallest_int() as u32); + } + } else { + let offset = packing::argument_offset(i); + quote! { + packed_integer |= (#name.to_u8() as u32) << #offset; + } + } + }) + .collect(); + + quote! { + impl #opcode_name { + #[doc = "Construct the instruction from its parts."] + pub fn new(#arguments) -> Self { + let mut packed_integer: u32 = 0; + #pack_strict_arguments + let packed = packed_integer.to_be_bytes(); + Self([packed[1], packed[2], packed[3]]) + } + } + } + }, + ) +} + +pub fn op_constructors_typescript(instructions: &InstructionList) -> TokenStream { + instructions.map_to_tokens(|Instruction { + description, + opcode_name, + opcode_fn_name, + args, + .. + }| { + let arguments: TokenStream = comma_separated(args.singature_pairs()); + let pass_arguments: TokenStream = comma_separated(args.names()); + let raw_int_arguments: TokenStream = comma_separated(args + .map(|arg| { + let name = &arg.name; + let inttype = arg.type_.smallest_containing_integer_type(); + quote! { #name: #inttype } + })); + + quote! { + #[cfg(feature = "typescript")] + const _: () = { + use super::*; + #[wasm_bindgen::prelude::wasm_bindgen] + #[doc = #description] + pub fn #opcode_fn_name(#raw_int_arguments) -> typescript::Instruction { + crate::op::#opcode_fn_name(#pass_arguments).into() + } + }; + + #[cfg(feature = "typescript")] + #[wasm_bindgen::prelude::wasm_bindgen] + impl #opcode_name { + #[wasm_bindgen(constructor)] + #[doc = "Construct the instruction from its parts."] + pub fn new_typescript(#arguments) -> Self { + Self::new(#pass_arguments) + } + } + } + }) +} + +pub fn op_fn_unpack(instructions: &InstructionList) -> TokenStream { + instructions.map_to_tokens(|Instruction { + opcode_name, args, .. + }| { + let mut ret_args: Vec<_> = args.regs().enumerate().map(|(i, arg)| { + let type_ = &arg.type_.token(); + let offset = packing::argument_offset(i); + quote! { + #type_::new((integer >> #offset) as u8) + } + }).collect(); + if let Some(imm) = args.imm() { + let type_: &Ident = imm.type_.token(); + ret_args.push(quote! { #type_::new(integer as _) }); + } + let ret_val = tuple_or_single(ret_args); + let arg_types = tuple_or_single(args.types()); + quote! { + impl #opcode_name { + #[doc = "Convert the instruction into its parts, without checking for correctness."] + pub fn unpack(self) -> #arg_types { + let integer = u32::from_be_bytes([0, self.0[0], self.0[1], self.0[2]]); + #ret_val + } + } + } + }) +} + +pub fn op_fn_reserved_part_is_zero(instructions: &InstructionList) -> TokenStream { + instructions.map_to_tokens(|Instruction { + opcode_name, args, .. + }| { + let reserved_bits = args.reserved_bits(); + quote! { + impl #opcode_name { + #[doc = "Verify that the unused bits after the instruction are zero."] + pub(crate) fn reserved_part_is_zero(self) -> bool { + let integer = u32::from_be_bytes([0, self.0[0], self.0[1], self.0[2]]); + let with_zeroed_reserved = (integer >> #reserved_bits) << #reserved_bits; + with_zeroed_reserved == integer + } + } + } + }) +} + +pub fn op_fn_reg_ids(instructions: &InstructionList) -> TokenStream { + instructions.map_to_tokens(|Instruction { + opcode_name, args, .. + }| { + let reg_ids: Vec<_> = args.regs().enumerate().map(|(i, arg)| { + let type_ = &arg.type_.token(); + let offset = packing::argument_offset(i); + quote! { + #type_::new((integer >> #offset) as u8) + } + }).collect(); + + let reg_id_opts = comma_separated((0..4).map(|i| match reg_ids.get(i) { + Some(reg_id) => quote! { Some(#reg_id) }, + None => quote! { None }, + })); + + quote! { + impl #opcode_name { + pub(crate) fn reg_ids(self) -> [Option; 4] { + let integer = u32::from_be_bytes([0, self.0[0], self.0[1], self.0[2]]); + [ #reg_id_opts ] + } + } + } + }) +} + +pub fn op_structs(instructions: &InstructionList) -> TokenStream { + instructions.map_to_tokens(|Instruction { + description, + opcode_name, + .. + }| quote! { + #[doc = #description] + #[derive(Clone, Copy, Eq, Hash, PartialEq)] + #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] + #[cfg_attr(feature = "typescript", wasm_bindgen::prelude::wasm_bindgen)] + pub struct #opcode_name(pub (super) [u8; 3]); + + impl #opcode_name { + /// The opcode number for this instruction. + pub const OPCODE: Opcode = Opcode::#opcode_name; + } + }) +} + +pub fn op_debug_impl(instructions: &InstructionList) -> TokenStream { + instructions.map_to_tokens(|Instruction { + opcode_name, + args, + .. + }| { + let values: TokenStream = comma_separated(args.names()); + let fields: TokenStream = args.map_to_tokens(|arg| { + let name = &arg.name; + if arg.is_imm() { + quote! { + .field(stringify!(#name), &format_args!("{}", #name.to_smallest_int())) + } + } else { + quote! { + .field(stringify!(#name), &format_args!("{:#02x}", u8::from(#name))) + } + } + }); + + let unpack_if_needed = if args.is_empty() { + quote! {} + } else { + quote! { + let (#values) = self.unpack(); + } + }; + + quote! { + impl core::fmt::Debug for #opcode_name { + #[warn(clippy::unused_unit)] // Simplify code + fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { + #unpack_if_needed + f.debug_struct(stringify!(#opcode_name)) + #fields + .finish() + } + } + } + }) +} + +pub fn opcode_enum(instructions: &InstructionList) -> TokenStream { + let variants: TokenStream = instructions.map_to_tokens( + |Instruction { + description, + opcode_name, + opcode_number, + .. + }| { + quote! { + #[doc = #description] + #opcode_name = #opcode_number, + } + }, + ); + quote! { + #[doc = "The opcode numbers for each instruction."] + #[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)] + #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] + #[cfg_attr(feature = "typescript", wasm_bindgen::prelude::wasm_bindgen)] + pub enum Opcode { + #variants + } + } +} + +pub fn opcode_try_from(instructions: &InstructionList) -> TokenStream { + let arms = instructions.map_to_tokens( + |Instruction { + opcode_number, + opcode_name, + .. + }| { + quote! { + #opcode_number => Ok(Opcode::#opcode_name), + } + }, + ); + quote! { + impl core::convert::TryFrom for Opcode { + type Error = InvalidOpcode; + + fn try_from(value: u8) -> Result { + match value { + #arms + _ => Err(InvalidOpcode), + } + } + } + } +} + +pub fn from_op(instructions: &InstructionList) -> TokenStream { + instructions.map_to_tokens(|Instruction { opcode_name, .. }| { + quote! { + impl From<#opcode_name> for [u8; 3] { + fn from(#opcode_name(arr): #opcode_name) -> Self { + arr + } + } + + impl From<#opcode_name> for [u8; 4] { + fn from(#opcode_name([a, b, c]): #opcode_name) -> Self { + [#opcode_name::OPCODE as u8, a, b, c] + } + } + + impl From<#opcode_name> for u32 { + fn from(op: #opcode_name) -> Self { + u32::from_be_bytes(op.into()) + } + } + + impl From<#opcode_name> for Instruction { + fn from(op: #opcode_name) -> Self { + Instruction::#opcode_name(op) + } + } + + #[cfg(feature = "typescript")] + impl From<#opcode_name> for typescript::Instruction { + fn from(opcode: #opcode_name) -> Self { + typescript::Instruction::new(opcode.into()) + } + } + } + }) +} + +pub fn instruction_enum(instructions: &InstructionList) -> TokenStream { + let variants = instructions.map_to_tokens( + |Instruction { + description, + opcode_name, + .. + }| { + quote! { + #[doc = #description] + #opcode_name(_op::#opcode_name), + } + }, + ); + quote! { + #[doc = r" + Representation of a single instruction for the interpreter. + + The opcode is represented in the tag (variant), or may be retrieved in the form of an + `Opcode` byte using the `opcode` method. + + The register and immediate data associated with the instruction is represented within + an inner unit type wrapper around the 3 remaining bytes. + "] + #[derive(Clone, Copy, Eq, Hash, PartialEq)] + #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] + pub enum Instruction { + #variants + } + } +} + +pub fn instruction_enum_fn_opcode(instructions: &InstructionList) -> TokenStream { + let arms = instructions.map_to_tokens(|Instruction { opcode_name, .. }| { + quote! { + Self::#opcode_name(_) => Opcode::#opcode_name, + } + }); + + quote! { + impl Instruction { + #[doc = "This instruction's opcode."] + pub fn opcode(&self) -> Opcode { + match self { + #arms + } + } + } + } +} + +pub fn instruction_enum_fn_reg_ids(instructions: &InstructionList) -> TokenStream { + let variant_reg_ids = + instructions.map_to_tokens(|Instruction { opcode_name, .. }| { + quote! { + Self::#opcode_name(op) => op.reg_ids(), + } + }); + + quote! { + impl Instruction { + #[doc = "Unpacks all register IDs into a slice of options."] + pub fn reg_ids(&self) -> [Option; 4] { + match self { + #variant_reg_ids + } + } + } + } +} + +pub fn instruction_enum_debug(instructions: &InstructionList) -> TokenStream { + let arms = instructions.map_to_tokens(|Instruction { opcode_name, .. }| { + quote! { + Self::#opcode_name(op) => op.fmt(f), + } + }); + + quote! { + impl core::fmt::Debug for Instruction { + fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { + match self { + #arms + } + } + } + } +} + +pub fn instruction_try_from_bytes(instructions: &InstructionList) -> TokenStream { + let arms = instructions.map_to_tokens(|Instruction { opcode_name, .. }| { + quote! { + Opcode::#opcode_name => Ok(Self::#opcode_name({ + let op = op::#opcode_name([a, b, c]); + if !op.reserved_part_is_zero() { + return Err(InvalidOpcode); + } + op + })), + } + }); + quote! { + impl core::convert::TryFrom<[u8; 4]> for Instruction { + type Error = InvalidOpcode; + + fn try_from([op, a, b, c]: [u8; 4]) -> Result { + match Opcode::try_from(op)? { + #arms + _ => Err(InvalidOpcode), + } + } + } + } +} + +pub fn bytes_from_instruction(instructions: &InstructionList) -> TokenStream { + let arms = instructions.map_to_tokens(|Instruction { opcode_name, .. }| { + quote! { + Instruction::#opcode_name(op) => op.into(), + } + }); + quote! { + impl core::convert::From for [u8; 4] { + fn from(instruction: Instruction) -> [u8; 4] { + match instruction { + #arms + } + } + } + } +} diff --git a/fuel-asm/derive/src/input.rs b/fuel-asm/derive/src/input.rs new file mode 100644 index 0000000000..647551cc3c --- /dev/null +++ b/fuel-asm/derive/src/input.rs @@ -0,0 +1,299 @@ +//! Input parsing + +use proc_macro2::TokenStream; +use quote::quote; +use syn::parse::Parse; + +#[derive(Debug, Clone)] +pub struct RegType { + pub token: syn::Ident, +} + +impl RegType { + pub fn token(&self) -> &syn::Ident { + &self.token + } + + pub fn smallest_containing_integer_type(&self) -> syn::Ident { + syn::Ident::new("u8", proc_macro2::Span::call_site()) + } + + pub fn size_bits(&self) -> usize { + 6 + } +} +#[derive(Debug, Clone)] +pub enum ImmType { + Imm06 { token: syn::Ident }, + Imm12 { token: syn::Ident }, + Imm18 { token: syn::Ident }, + Imm24 { token: syn::Ident }, +} +impl ImmType { + pub fn token(&self) -> &syn::Ident { + match self { + Self::Imm06 { token } => token, + Self::Imm12 { token } => token, + Self::Imm18 { token } => token, + Self::Imm24 { token } => token, + } + } + + pub fn smallest_containing_integer_type(&self) -> syn::Ident { + match self { + Self::Imm06 { .. } => syn::Ident::new("u8", proc_macro2::Span::call_site()), + Self::Imm12 { .. } => syn::Ident::new("u16", proc_macro2::Span::call_site()), + Self::Imm18 { .. } => syn::Ident::new("u32", proc_macro2::Span::call_site()), + Self::Imm24 { .. } => syn::Ident::new("u32", proc_macro2::Span::call_site()), + } + } + + pub fn size_bits(&self) -> usize { + match self { + Self::Imm06 { .. } => 6, + Self::Imm12 { .. } => 12, + Self::Imm18 { .. } => 18, + Self::Imm24 { .. } => 24, + } + } +} + +#[derive(Debug, Clone)] +pub enum AnyInstructionArgument { + Reg(RegType), + Imm(ImmType), +} +impl AnyInstructionArgument { + pub fn token(&self) -> &syn::Ident { + match self { + Self::Reg(a) => a.token(), + Self::Imm(a) => a.token(), + } + } + + pub fn smallest_containing_integer_type(&self) -> syn::Ident { + match self { + Self::Reg(a) => a.smallest_containing_integer_type(), + Self::Imm(a) => a.smallest_containing_integer_type(), + } + } + + pub fn size_bits(&self) -> usize { + match self { + Self::Reg(a) => a.size_bits(), + Self::Imm(a) => a.size_bits(), + } + } +} + +#[derive(Debug, Clone)] +pub struct InstructionArgument { + pub name: syn::Ident, + pub type_: T, +} +impl Parse for InstructionArgument { + fn parse(input: syn::parse::ParseStream) -> syn::Result { + let name: syn::Ident = input.parse()?; + let _: syn::Token![:] = input.parse()?; + let type_: syn::Ident = input.parse()?; + + let type_ = match type_.to_string().as_str() { + "RegId" => AnyInstructionArgument::Reg(RegType { token: type_ }), + "Imm06" => AnyInstructionArgument::Imm(ImmType::Imm06 { token: type_ }), + "Imm12" => AnyInstructionArgument::Imm(ImmType::Imm12 { token: type_ }), + "Imm18" => AnyInstructionArgument::Imm(ImmType::Imm18 { token: type_ }), + "Imm24" => AnyInstructionArgument::Imm(ImmType::Imm24 { token: type_ }), + _ => { + return Err(syn::Error::new_spanned( + type_.clone(), + format!("Invalid argument type: {}", type_), + )) + } + }; + + Ok(Self { name, type_ }) + } +} +impl InstructionArgument { + pub fn is_imm(&self) -> bool { + matches!(self.type_, AnyInstructionArgument::Imm(_)) + } +} + +#[derive(Debug, Clone)] +pub struct InstructionArguments(Vec); +impl Parse for InstructionArguments { + fn parse(input: syn::parse::ParseStream) -> syn::Result { + let mut args = Vec::new(); + + let content; + let _ = syn::bracketed!(content in input); + let full_span = content.span(); + + while !content.is_empty() { + let item: InstructionArgument = content.parse()?; + args.push(item); + } + + // Check argument format + if args.len() > 4 { + return Err(syn::Error::new( + full_span, + format!("Too many arguments: {}", args.len()), + )); + } + + for arg in args.iter().rev().skip(1) { + if arg.is_imm() { + return Err(syn::Error::new_spanned( + arg.type_.token(), + "Immediate argument only allowed as last argument", + )); + } + } + + if args.iter().map(|a| a.type_.size_bits()).sum::() > 24 { + return Err(syn::Error::new(full_span, "Arguments exceed 24 bits")); + } + + Ok(Self(args)) + } +} + +impl InstructionArguments { + pub fn has_imm(&self) -> bool { + self.0.last().map(|arg| arg.is_imm()).unwrap_or(false) + } + + #[allow(clippy::arithmetic_side_effects)] // Checked in opcode construction + pub fn reserved_bits(&self) -> usize { + if self.has_imm() { + 0 + } else { + 24 - self.0.len() * 6 + } + } + + pub fn is_empty(&self) -> bool { + self.0.is_empty() + } + + /// Immedate argument, if any + pub fn imm(&self) -> Option> { + let last = self.0.last()?; + if let AnyInstructionArgument::Imm(type_) = last.type_.clone() { + Some(InstructionArgument { + name: last.name.clone(), + type_, + }) + } else { + None + } + } + + /// Register arguments + pub fn regs(&self) -> impl Iterator> + '_ { + self.iter().filter_map(|arg| { + if let AnyInstructionArgument::Reg(type_) = arg.type_.clone() { + Some(InstructionArgument { + name: arg.name.clone(), + type_, + }) + } else { + None + } + }) + } + + pub fn map<'a, F: FnMut(&InstructionArgument) -> T + 'a, T>( + &'a self, + f: F, + ) -> impl Iterator + 'a { + self.0.iter().map(f) + } + + pub fn map_to_tokens TokenStream>( + &self, + f: F, + ) -> TokenStream { + self.map(f).collect() + } + + pub fn iter(&self) -> impl Iterator + '_ + Clone { + self.0.iter() + } + + /// `name: type` pairs like in a function signature + pub fn singature_pairs(&self) -> impl Iterator + '_ + Clone { + self.0.iter().map(|arg| { + let name = &arg.name; + let type_ = &arg.type_.token(); + quote! { + #name: #type_ + } + }) + } + + /// Just the names of the arguments as tokens + pub fn names(&self) -> impl Iterator + '_ + Clone { + self.0 + .iter() + .map(|InstructionArgument { name, .. }| quote! { #name }) + } + + /// Just the types of the arguments as tokens + pub fn types(&self) -> impl Iterator + '_ + Clone { + self.0.iter().map(|InstructionArgument { type_, .. }| { + let type_ = &type_.token(); + quote! { #type_ } + }) + } +} + +#[derive(Debug, Clone)] +pub struct Instruction { + pub description: syn::LitStr, + pub opcode_number: syn::LitInt, + pub opcode_name: syn::Ident, + pub opcode_fn_name: syn::Ident, + pub args: InstructionArguments, +} +impl Parse for Instruction { + fn parse(input: syn::parse::ParseStream) -> syn::Result { + let description: syn::LitStr = input.parse()?; + let opcode_number: syn::LitInt = input.parse()?; + let opcode_name: syn::Ident = input.parse()?; + let opcode_fn_name: syn::Ident = input.parse()?; + let args: InstructionArguments = input.parse()?; + + Ok(Self { + description, + opcode_number, + opcode_name, + opcode_fn_name, + args, + }) + } +} + +#[derive(Debug, Clone)] +pub struct InstructionList(Vec); +impl Parse for InstructionList { + fn parse(input: syn::parse::ParseStream) -> syn::Result { + let mut instructions = Vec::new(); + while !input.is_empty() { + let item: Instruction = input.parse()?; + instructions.push(item); + } + Ok(Self(instructions)) + } +} + +impl InstructionList { + pub fn map_to_tokens TokenStream>( + &self, + f: F, + ) -> TokenStream { + self.0.iter().map(f).collect() + } +} diff --git a/fuel-asm/derive/src/lib.rs b/fuel-asm/derive/src/lib.rs new file mode 100644 index 0000000000..89b5f054b1 --- /dev/null +++ b/fuel-asm/derive/src/lib.rs @@ -0,0 +1,270 @@ +//! # The `impl_instructions!` macro +//! +//! The heart of this crate's implementation is the private `impl_instructions!` macro. +//! This macro is used to generate the `Instruction` and `Opcode` types along with their +//! implementations. +//! +//! The intention is to allow for having a single source of truth from which each of the +//! instruction-related types and implementations are derived. +//! +//! Its usage looks like this: +//! +//! ```rust,ignore +//! impl_instructions! { +//! "Adds two registers." +//! 0x10 ADD add [RegId RegId RegId] +//! "Bitwise ANDs two registers." +//! 0x11 AND and [RegId RegId RegId] +//! // ... +//! } +//! ``` +//! +//! Each instruction's row includes: +//! +//! - A short docstring. +//! - The Opcode byte value. +//! - An uppercase identifier (for generating variants and types). +//! - A lowercase identifier (for generating the shorthand instruction constructor). +//! - The instruction layout (for the `new` and `unpack` functions). +//! +//! The following sections describe each of the items that are derived from the +//! `impl_instructions!` table in more detail. +//! +//! ## The `Opcode` enum +//! +//! Represents the bytecode portion of an instruction. +//! +//! ```rust,ignore +//! /// Solely the opcode portion of an instruction represented as a single byte. +//! #[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)] +//! #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] +//! #[repr(u8)] +//! pub enum Opcode { +//! /// Adds two registers. +//! ADD = 0x10, +//! /// Bitwise ANDs two registers. +//! AND = 0x11, +//! // ... +//! } +//! ``` +//! +//! A `TryFrom` implementation is also provided, producing an `Err(InvalidOpcode)` in +//! the case that the byte represents a reserved or undefined value. +//! +//! ```rust +//! # use fuel_asm::{InvalidOpcode, Opcode}; +//! assert_eq!(Opcode::try_from(0x10), Ok(Opcode::ADD)); +//! assert_eq!(Opcode::try_from(0x11), Ok(Opcode::AND)); +//! assert_eq!(Opcode::try_from(0), Err(InvalidOpcode)); +//! ``` +//! +//! ## The `Instruction` enum +//! +//! Represents a single, full instruction, discriminated by its `Opcode`. +//! +//! ```rust,ignore +//! /// Representation of a single instruction for the interpreter. +//! /// +//! /// The opcode is represented in the tag (variant), or may be retrieved in the form of an +//! /// `Opcode` byte using the `opcode` method. +//! /// +//! /// The register and immediate data associated with the instruction is represented within +//! /// an inner unit type wrapper around the 3 remaining bytes. +//! #[derive(Clone, Copy, Eq, Hash, PartialEq)] +//! #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] +//! pub enum Instruction { +//! /// Adds two registers. +//! ADD(op::ADD), +//! /// Bitwise ANDs two registers. +//! AND(op::AND), +//! // ... +//! } +//! ``` +//! +//! The `From for u32` (aka `RawInstruction`) and `TryFrom for +//! Instruction` implementations can be found in the crate root. +//! +//! ## A unique unit type per operation +//! +//! In order to reduce the likelihood of misusing unrelated register IDs or immediate +//! values, we generate a unique unit type for each type of operation (i.e instruction +//! variant) and guard access to the relevant register IDs and immediate values behind +//! each type's unique methods. +//! +//! These unique operation types are generated as follows within a dedicated `op` module: +//! +//! ```rust,ignore +//! pub mod op { +//! //! Definitions and implementations for each unique instruction type, one for each +//! //! unique `Opcode` variant. +//! +//! // A unique type for each operation. +//! +//! /// Adds two registers. +//! pub struct ADD([u8; 3]); +//! +//! /// Bitwise ANDs two registers. +//! pub struct AND([u8; 3]); +//! +//! // ... +//! +//! // An implementation for each unique type. +//! +//! impl ADD { +//! pub const OPCODE: Opcode = Opcode::ADD; +//! +//! /// Construct the instruction from its parts. +//! pub fn new(ra: RegId, rb: RegId, rc: RegId) -> Self { +//! Self(pack::bytes_from_ra_rb_rc(ra, rb, rc)) +//! } +//! +//! /// Convert the instruction into its parts. +//! pub fn unpack(self) -> (RegId, RegId, RegId) { +//! unpack::ra_rb_rc_from_bytes(self.0) +//! } +//! } +//! +//! impl AND { +//! // ... +//! } +//! +//! // ... +//! +//! // A short-hand `Instruction` constructor for each operation to make it easier to +//! // hand-write assembly for tests and benchmarking. As these constructors are public and +//! // accept literal values, we check that the values are within range. +//! +//! /// Adds two registers. +//! pub fn add(ra: u8, rb: u8, rc: u8) -> Instruction { +//! ADD::new(check_reg_id(ra), check_reg_id(rb), check_reg_id(rc)).into() +//! } +//! +//! /// Bitwise ANDs two registers. +//! pub fn and(ra: u8, rb: u8, rc: u8) -> Instruction { +//! AND::new(check_reg_id(ra), check_reg_id(rb), check_reg_id(rc)).into() +//! } +//! +//! // ... +//! }; +//! ``` +//! +//! ### Instruction Layout +//! +//! The function signatures of the `new` and `unpack` functions are derived from the +//! instruction's data layout described in the `impl_instructions!` table. +//! +//! For example, the `unpack` method for `ADD` looks like this: +//! +//! ```rust,ignore +//! // 0x10 ADD add [RegId RegId RegId] +//! pub fn unpack(self) -> (RegId, RegId, RegId) +//! ``` +//! +//! While the `unpack` method for `ADDI` looks like this: +//! +//! ```rust,ignore +//! // 0x50 ADDI addi [RegId RegId Imm12] +//! pub fn unpack(self) -> (RegId, RegId, Imm12) +//! ``` +//! +//! ### Shorthand Constructors +//! +//! The shorthand instruction constructors (e.g. `add`, `and`, etc) are specifically +//! designed to make it easier to handwrite assembly for tests or benchmarking. Unlike the +//! `$OP::new` constructors which require typed register ID or immediate inputs, the +//! instruction constructors allow for constructing `Instruction`s from convenient literal +//! value inputs. E.g. +//! +//! ```rust +//! use fuel_asm::{op, Instruction}; +//! +//! // A sample program to perform ecrecover +//! let program: Vec = vec![ +//! op::move_(0x10, 0x01), // set r[0x10] := $one +//! op::slli(0x20, 0x10, 5), // set r[0x20] := `r[0x10] << 5 == 32` +//! op::slli(0x21, 0x10, 6), // set r[0x21] := `r[0x10] << 6 == 64` +//! op::aloc(0x21), // alloc `r[0x21] == 64` to the heap +//! op::addi(0x10, 0x07, 1), // set r[0x10] := `$hp + 1` (allocated heap) +//! op::move_(0x11, 0x04), // set r[0x11] := $ssp +//! op::add(0x12, 0x04, 0x20), // set r[0x12] := `$ssp + r[0x20]` +//! op::eck1(0x10, 0x11, 0x12),// recover public key in memory[r[0x10], 64] +//! op::ret(0x01), // return `1` +//! ]; +//! ``` + +#![deny(unused_must_use, missing_docs)] +#![deny( + clippy::arithmetic_side_effects, + clippy::cast_sign_loss, + clippy::cast_possible_truncation, + clippy::cast_possible_wrap, + clippy::string_slice +)] + +extern crate proc_macro; + +use input::InstructionList; +use quote::quote; + +mod codegen; +mod input; +mod packing; + +/// Generates implementations for the FuelVM instruction types. +#[proc_macro] +pub fn impl_instructions(input: proc_macro::TokenStream) -> proc_macro::TokenStream { + let instructions = syn::parse_macro_input!(input as InstructionList); + + let op_structs = codegen::op_structs(&instructions); + let op_debug_impl = codegen::op_debug_impl(&instructions); + let from_op = codegen::from_op(&instructions); + let op_constructor_shorthand = codegen::op_constructor_shorthand(&instructions); + let op_fn_new = codegen::op_fn_new(&instructions); + let op_constructors_typescript = codegen::op_constructors_typescript(&instructions); + let op_fn_unpack = codegen::op_fn_unpack(&instructions); + let op_fn_reserved_part_is_zero = codegen::op_fn_reserved_part_is_zero(&instructions); + let op_fn_reg_ids = codegen::op_fn_reg_ids(&instructions); + + let opcode_enum = codegen::opcode_enum(&instructions); + let opcode_try_from = codegen::opcode_try_from(&instructions); + let instruction_enum = codegen::instruction_enum(&instructions); + let instruction_enum_debug = codegen::instruction_enum_debug(&instructions); + let instruction_enum_fn_opcode = codegen::instruction_enum_fn_opcode(&instructions); + let instruction_enum_fn_reg_ids = codegen::instruction_enum_fn_reg_ids(&instructions); + let instruction_try_from_bytes = codegen::instruction_try_from_bytes(&instructions); + let bytes_from_instruction = codegen::bytes_from_instruction(&instructions); + + (quote! { + #[doc = "Opcode-specific definitions and implementations."] + #[allow(clippy::unused_unit)] // Simplify codegen + pub mod _op { + use super::*; + #op_structs + #op_debug_impl + #from_op + #op_constructor_shorthand + #op_fn_new + #op_constructors_typescript + #op_fn_unpack + #op_fn_reserved_part_is_zero + #op_fn_reg_ids + } + #opcode_enum + #opcode_try_from + #instruction_enum + #instruction_enum_debug + #instruction_enum_fn_opcode + #instruction_enum_fn_reg_ids + #instruction_try_from_bytes + #bytes_from_instruction + + #[cfg(feature = "typescript")] + impl From for typescript::Instruction { + fn from(inst: Instruction) -> Self { + typescript::Instruction::new(inst) + } + } + + }) + .into() +} diff --git a/fuel-asm/derive/src/packing.rs b/fuel-asm/derive/src/packing.rs new file mode 100644 index 0000000000..dbe86485e8 --- /dev/null +++ b/fuel-asm/derive/src/packing.rs @@ -0,0 +1,7 @@ +/// The shift amount for the given argument index, from left to right. +/// The input must be a valid argument index (0..=3). +#[allow(clippy::arithmetic_side_effects)] // Contract, double-checked with an assertion +pub fn argument_offset(i: usize) -> usize { + assert!(i <= 3); + 6 * (3 - i) +} diff --git a/fuel-asm/derive/src/serialize.rs b/fuel-asm/derive/src/serialize.rs new file mode 100644 index 0000000000..42b2eefdc1 --- /dev/null +++ b/fuel-asm/derive/src/serialize.rs @@ -0,0 +1,223 @@ +use proc_macro2::TokenStream as TokenStream2; +use quote::quote; + +use crate::attribute::{ + should_skip_field_binding, + StructAttrs, +}; + +fn serialize_struct(s: &synstructure::Structure) -> TokenStream2 { + let attrs = StructAttrs::parse(s); + let mut s = s.clone(); + + assert_eq!(s.variants().len(), 1, "structs must have one variant"); + + let variant: &mut synstructure::VariantInfo = &mut s.variants_mut()[0]; + variant.filter(|binding| !should_skip_field_binding(binding)); + + let encode_static = variant.each(|binding| { + quote! { + ::fuel_types::canonical::Serialize::encode_static(#binding, buffer)?; + } + }); + + let encode_dynamic = variant.each(|binding| { + quote! { + ::fuel_types::canonical::Serialize::encode_dynamic(#binding, buffer)?; + } + }); + + let size_static_code = variant.each(|binding| { + quote! { + size = size.saturating_add(#binding.size_static()); + } + }); + + let initial_size = if attrs.prefix.is_some() { + quote! { let mut size = 8usize; } + } else { + quote! { let mut size = 0usize; } + }; + let size_static_code = quote! { #initial_size match self { #size_static_code}; size }; + + let size_dynamic_code = variant.each(|binding| { + quote! { + size = size.saturating_add(#binding.size_dynamic()); + } + }); + let size_dynamic_code = + quote! { let mut size = 0usize; match self { #size_dynamic_code}; size }; + + let prefix = if let Some(prefix_type) = attrs.prefix.as_ref() { + quote! { + <_ as ::fuel_types::canonical::Serialize>::encode(&#prefix_type, buffer)?; + } + } else { + quote! {} + }; + + s.gen_impl(quote! { + gen impl ::fuel_types::canonical::Serialize for @Self { + #[inline(always)] + fn size_static(&self) -> usize { + #size_static_code + } + + #[inline(always)] + fn size_dynamic(&self) -> usize { + #size_dynamic_code + } + + #[inline(always)] + fn encode_static(&self, buffer: &mut O) -> ::core::result::Result<(), ::fuel_types::canonical::Error> { + #prefix + match self { + #encode_static + }; + + ::core::result::Result::Ok(()) + } + + fn encode_dynamic(&self, buffer: &mut O) -> ::core::result::Result<(), ::fuel_types::canonical::Error> { + match self { + #encode_dynamic + }; + + ::core::result::Result::Ok(()) + } + } + }) +} + +fn serialize_enum(s: &synstructure::Structure) -> TokenStream2 { + assert!(!s.variants().is_empty(), "got invalid empty enum"); + let mut s = s.clone(); + let mut next_discriminant = quote! { { 0u64 } }; + + s.variants_mut().iter_mut().for_each(|v| { + v.filter(|binding| !should_skip_field_binding(binding)); + }); + + let encode_static = s.variants().iter().map(|v| { + let pat = v.pat(); + + let encode_static_iter = v.bindings().iter().map(|binding| { + quote! { + ::fuel_types::canonical::Serialize::encode_static(#binding, buffer)?; + } + }); + + if v.ast().discriminant.is_some() { + let variant_ident = v.ast().ident; + next_discriminant = quote! { { Self::#variant_ident as u64 } }; + } + + let encode_discriminant = quote! { + <::core::primitive::u64 as ::fuel_types::canonical::Serialize>::encode(&#next_discriminant, buffer)?; + }; + next_discriminant = quote! { ( (#next_discriminant) + 1u64 ) }; + + quote! { + #pat => { + #encode_discriminant + #( + { #encode_static_iter } + )* + } + } + }); + let encode_dynamic = s.variants().iter().map(|v| { + let encode_dynamic_iter = v.each(|binding| { + quote! { + ::fuel_types::canonical::Serialize::encode_dynamic(#binding, buffer)?; + } + }); + quote! { + #encode_dynamic_iter + } + }); + + let match_size_static: TokenStream2 = s + .variants() + .iter() + .map(|variant| { + variant.each(|binding| { + quote! { + size = size.saturating_add(#binding.size_static()); + } + }) + }) + .collect(); + let match_size_static = quote! {{ + // `repr(128)` is unstable, so because of that we can use 8 bytes. + let mut size = 8usize; + match self { #match_size_static } size } + }; + + let match_size_dynamic: TokenStream2 = s + .variants() + .iter() + .map(|variant| { + variant.each(|binding| { + quote! { + size = size.saturating_add(#binding.size_dynamic()); + } + }) + }) + .collect(); + let match_size_dynamic = + quote! {{ let mut size = 0usize; match self { #match_size_dynamic } size }}; + + let impl_code = s.gen_impl(quote! { + gen impl ::fuel_types::canonical::Serialize for @Self { + #[inline(always)] + fn size_static(&self) -> usize { + #match_size_static + } + + #[inline(always)] + fn size_dynamic(&self) -> usize { + #match_size_dynamic + } + + #[inline(always)] + fn encode_static(&self, buffer: &mut O) -> ::core::result::Result<(), ::fuel_types::canonical::Error> { + match self { + #( + #encode_static + )*, + _ => return ::core::result::Result::Err(::fuel_types::canonical::Error::UnknownDiscriminant), + }; + + ::core::result::Result::Ok(()) + } + + fn encode_dynamic(&self, buffer: &mut O) -> ::core::result::Result<(), ::fuel_types::canonical::Error> { + match self { + #( + #encode_dynamic + )*, + _ => return ::core::result::Result::Err(::fuel_types::canonical::Error::UnknownDiscriminant), + }; + + ::core::result::Result::Ok(()) + } + } + }); + + quote! { + #impl_code + } +} + +/// Derives `Serialize` trait for the given `struct` or `enum`. +pub fn serialize_derive(mut s: synstructure::Structure) -> TokenStream2 { + s.add_bounds(synstructure::AddBounds::Fields) + .underscore_const(true); + + match s.ast().data { + syn::Data::Struct(_) => serialize_struct(&s), + syn::Data::Enum(_) => serialize_enum(&s), + _ => panic!("Can't derive `Serialize` for `union`s"), + } +} diff --git a/fuel-asm/src/encoding_tests.rs b/fuel-asm/src/encoding_tests.rs index e793095c18..84e01bdc65 100644 --- a/fuel-asm/src/encoding_tests.rs +++ b/fuel-asm/src/encoding_tests.rs @@ -2,40 +2,71 @@ use crate::*; use fuel_asm as _; +use proptest::prelude::*; use strum::IntoEnumIterator; -#[test] -#[cfg(test)] -fn opcode() { - // values picked to test edge cases - let r = RegId::new_checked(0x2d).unwrap(); - let imm12 = 0x0bfd; - let imm18 = 0x02fffd; - let imm24 = 0xbffffd; - - let mut instructions = Vec::new(); +proptest! { + #[test] + fn test_instruction_encoding(raw_instruction in 0..=u32::MAX) { + let ins = Instruction::try_from(raw_instruction); + prop_assume!(ins.is_ok()); // Only valid instructions are considered + let ins = ins.unwrap(); - for opcode_int in 0..64 { - let Ok(op) = Opcode::try_from(opcode_int) else { - continue - }; - - instructions.push(op.test_construct(r, r, r, r, imm12)); - instructions.push(op.test_construct(r, r, r, r, imm18)); - instructions.push(op.test_construct(r, r, r, r, imm24)); + assert_eq!(ins, Instruction::try_from(raw_instruction.to_be_bytes()).unwrap()); + assert_eq!(ins.to_bytes(), raw_instruction.to_be_bytes()); + assert_eq!(ins.opcode() as u8, (raw_instruction >> 24) as u8); } +} - for gm_arg in GMArgs::iter() { - instructions.push(op::gm_args(r, gm_arg)); - } +/// Go through all possible opcodes and argument position combinations. +/// Verify that those that can be converted to instructions can be converted back and they +/// stay identical. +#[test] +fn validate_all_opcodes() { + let arg_offsets = [0, 6, 12, 18]; - for gtf_arg in GTFArgs::iter() { - instructions.push(op::gtf_args(r, r, gtf_arg)); + let mut instructions = Vec::new(); + for mask_pattern in [ + 0, + u32::MAX, + 0b1010_1010_1010_1010_1010_1010_1010_1010, + 0b0101_0101_0101_0101_0101_0101_0101_0101, + ] { + for opcode_int in 0..=u8::MAX { + // Valid opcodes only + let Ok(op) = Opcode::try_from(opcode_int) else { + continue + }; + for regs_in_use in 0..=3 { + for has_imm in [false, true] { + // Construct the instruction + let mut raw: RawInstruction = (op as u32) << 24u32; + for i in 0..regs_in_use { + raw |= mask_pattern & (0b11_1111) << arg_offsets[i]; + } + + if has_imm { + let imm_bits = 6 * (3 - regs_in_use); + raw |= mask_pattern & ((1 << imm_bits) - 1); + } + + let Ok(ins) = Instruction::try_from(raw) else { + continue + }; + instructions.push(ins); + } + } + } } - // Pad to even length - if instructions.len() % 2 != 0 { - instructions.push(op::noop()); + for r in [0, 1, 0b11_1111] { + for gm_arg in GMArgs::iter() { + instructions.push(op::gm_args(r, gm_arg)); + } + + for gtf_arg in GTFArgs::iter() { + instructions.push(op::gtf_args(r, r, gtf_arg)); + } } let bytes: Vec = instructions.iter().copied().collect(); @@ -54,6 +85,24 @@ fn opcode() { } } +#[test] +fn instruction_try_from_fails_with_invalid_opcode() { + let unused: u8 = 0xff; // Some unused opcode + Opcode::try_from(unused).expect_err("The opcode should be unused"); + Instruction::try_from([unused, 0, 0, 0]).expect_err("Invalid opcode should fail"); +} + +#[test] +fn instruction_try_from_fails_with_reserved_bits_set() { + let op_with_reserved_part = Opcode::NOOP as u8; // This has reserved bits + Instruction::try_from((op_with_reserved_part as u32) << 24) + .expect("Reserved bits zero should succeed"); + for mask in 1..(1 << 24) { + let raw = (op_with_reserved_part as u32) << 24 | mask; + Instruction::try_from(raw).expect_err("Reserved bits set should fail"); + } +} + #[test] fn panic_reason_description() { let imm24 = 0xbfffff; diff --git a/fuel-asm/src/lib.rs b/fuel-asm/src/lib.rs index 19d1d6bb17..599c69066d 100644 --- a/fuel-asm/src/lib.rs +++ b/fuel-asm/src/lib.rs @@ -110,7 +110,7 @@ impl CheckRegId for u8 { // Defines the `Instruction` and `Opcode` types, along with an `op` module declaring a // unique type for each opcode's instruction variant. For a detailed explanation of how // this works, see the `fuel_asm::macros` module level documentation. -fuel_derive::impl_instructions! { +fuel_asm_derive::impl_instructions! { "Adds two registers." 0x10 ADD add [dst: RegId lhs: RegId rhs: RegId] "Bitwise ANDs two registers." diff --git a/fuel-derive/src/asm.rs b/fuel-derive/src/asm.rs deleted file mode 100644 index 5016a8d8e8..0000000000 --- a/fuel-derive/src/asm.rs +++ /dev/null @@ -1,1006 +0,0 @@ -//! # The `impl_instructions!` macro -//! -//! The heart of this crate's implementation is the private `impl_instructions!` macro. -//! This macro is used to generate the `Instruction` and `Opcode` types along with their -//! implementations. -//! -//! The intention is to allow for having a single source of truth from which each of the -//! instruction-related types and implementations are derived. -//! -//! Its usage looks like this: -//! -//! ```rust,ignore -//! impl_instructions! { -//! "Adds two registers." -//! 0x10 ADD add [RegId RegId RegId] -//! "Bitwise ANDs two registers." -//! 0x11 AND and [RegId RegId RegId] -//! // ... -//! } -//! ``` -//! -//! Each instruction's row includes: -//! -//! - A short docstring. -//! - The Opcode byte value. -//! - An uppercase identifier (for generating variants and types). -//! - A lowercase identifier (for generating the shorthand instruction constructor). -//! - The instruction layout (for the `new` and `unpack` functions). -//! -//! The following sections describe each of the items that are derived from the -//! `impl_instructions!` table in more detail. -//! -//! ## The `Opcode` enum -//! -//! Represents the bytecode portion of an instruction. -//! -//! ```rust,ignore -//! /// Solely the opcode portion of an instruction represented as a single byte. -//! #[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)] -//! #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] -//! #[repr(u8)] -//! pub enum Opcode { -//! /// Adds two registers. -//! ADD = 0x10, -//! /// Bitwise ANDs two registers. -//! AND = 0x11, -//! // ... -//! } -//! ``` -//! -//! A `TryFrom` implementation is also provided, producing an `Err(InvalidOpcode)` in -//! the case that the byte represents a reserved or undefined value. -//! -//! ```rust -//! # use fuel_asm::{InvalidOpcode, Opcode}; -//! assert_eq!(Opcode::try_from(0x10), Ok(Opcode::ADD)); -//! assert_eq!(Opcode::try_from(0x11), Ok(Opcode::AND)); -//! assert_eq!(Opcode::try_from(0), Err(InvalidOpcode)); -//! ``` -//! -//! ## The `Instruction` enum -//! -//! Represents a single, full instruction, discriminated by its `Opcode`. -//! -//! ```rust,ignore -//! /// Representation of a single instruction for the interpreter. -//! /// -//! /// The opcode is represented in the tag (variant), or may be retrieved in the form of an -//! /// `Opcode` byte using the `opcode` method. -//! /// -//! /// The register and immediate data associated with the instruction is represented within -//! /// an inner unit type wrapper around the 3 remaining bytes. -//! #[derive(Clone, Copy, Eq, Hash, PartialEq)] -//! #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] -//! pub enum Instruction { -//! /// Adds two registers. -//! ADD(op::ADD), -//! /// Bitwise ANDs two registers. -//! AND(op::AND), -//! // ... -//! } -//! ``` -//! -//! The `From for u32` (aka `RawInstruction`) and `TryFrom for -//! Instruction` implementations can be found in the crate root. -//! -//! ## A unique unit type per operation -//! -//! In order to reduce the likelihood of misusing unrelated register IDs or immediate -//! values, we generate a unique unit type for each type of operation (i.e instruction -//! variant) and guard access to the relevant register IDs and immediate values behind -//! each type's unique methods. -//! -//! These unique operation types are generated as follows within a dedicated `op` module: -//! -//! ```rust,ignore -//! pub mod op { -//! //! Definitions and implementations for each unique instruction type, one for each -//! //! unique `Opcode` variant. -//! -//! // A unique type for each operation. -//! -//! /// Adds two registers. -//! pub struct ADD([u8; 3]); -//! -//! /// Bitwise ANDs two registers. -//! pub struct AND([u8; 3]); -//! -//! // ... -//! -//! // An implementation for each unique type. -//! -//! impl ADD { -//! pub const OPCODE: Opcode = Opcode::ADD; -//! -//! /// Construct the instruction from its parts. -//! pub fn new(ra: RegId, rb: RegId, rc: RegId) -> Self { -//! Self(pack::bytes_from_ra_rb_rc(ra, rb, rc)) -//! } -//! -//! /// Convert the instruction into its parts. -//! pub fn unpack(self) -> (RegId, RegId, RegId) { -//! unpack::ra_rb_rc_from_bytes(self.0) -//! } -//! } -//! -//! impl AND { -//! // ... -//! } -//! -//! // ... -//! -//! // A short-hand `Instruction` constructor for each operation to make it easier to -//! // hand-write assembly for tests and benchmarking. As these constructors are public and -//! // accept literal values, we check that the values are within range. -//! -//! /// Adds two registers. -//! pub fn add(ra: u8, rb: u8, rc: u8) -> Instruction { -//! ADD::new(check_reg_id(ra), check_reg_id(rb), check_reg_id(rc)).into() -//! } -//! -//! /// Bitwise ANDs two registers. -//! pub fn and(ra: u8, rb: u8, rc: u8) -> Instruction { -//! AND::new(check_reg_id(ra), check_reg_id(rb), check_reg_id(rc)).into() -//! } -//! -//! // ... -//! }; -//! ``` -//! -//! ### Instruction Layout -//! -//! The function signatures of the `new` and `unpack` functions are derived from the -//! instruction's data layout described in the `impl_instructions!` table. -//! -//! For example, the `unpack` method for `ADD` looks like this: -//! -//! ```rust,ignore -//! // 0x10 ADD add [RegId RegId RegId] -//! pub fn unpack(self) -> (RegId, RegId, RegId) -//! ``` -//! -//! While the `unpack` method for `ADDI` looks like this: -//! -//! ```rust,ignore -//! // 0x50 ADDI addi [RegId RegId Imm12] -//! pub fn unpack(self) -> (RegId, RegId, Imm12) -//! ``` -//! -//! ### Shorthand Constructors -//! -//! The shorthand instruction constructors (e.g. `add`, `and`, etc) are specifically -//! designed to make it easier to handwrite assembly for tests or benchmarking. Unlike the -//! `$OP::new` constructors which require typed register ID or immediate inputs, the -//! instruction constructors allow for constructing `Instruction`s from convenient literal -//! value inputs. E.g. -//! -//! ```rust -//! use fuel_asm::{op, Instruction}; -//! -//! // A sample program to perform ecrecover -//! let program: Vec = vec![ -//! op::move_(0x10, 0x01), // set r[0x10] := $one -//! op::slli(0x20, 0x10, 5), // set r[0x20] := `r[0x10] << 5 == 32` -//! op::slli(0x21, 0x10, 6), // set r[0x21] := `r[0x10] << 6 == 64` -//! op::aloc(0x21), // alloc `r[0x21] == 64` to the heap -//! op::addi(0x10, 0x07, 1), // set r[0x10] := `$hp + 1` (allocated heap) -//! op::move_(0x11, 0x04), // set r[0x11] := $ssp -//! op::add(0x12, 0x04, 0x20), // set r[0x12] := `$ssp + r[0x20]` -//! op::eck1(0x10, 0x11, 0x12),// recover public key in memory[r[0x10], 64] -//! op::ret(0x01), // return `1` -//! ]; -//! ``` - -use proc_macro2::{ - Ident, - Span, - TokenStream, -}; -use quote::quote; -use syn::parse::Parse; - -const IMM_TYPES: &[&str] = &["Imm06", "Imm12", "Imm18", "Imm24"]; - -enum ArgType { - Reg, - Imm(usize), -} -impl ArgType { - fn size_bits(&self) -> usize { - match self { - ArgType::Reg => 6, - ArgType::Imm(bits) => *bits, - } - } - - fn smallest_containing_integer_type(&self) -> syn::Ident { - match self { - Self::Reg => syn::Ident::new("u8", Span::call_site()), - Self::Imm(6) => syn::Ident::new("u8", Span::call_site()), - Self::Imm(12) => syn::Ident::new("u16", Span::call_site()), - Self::Imm(18) => syn::Ident::new("u32", Span::call_site()), - Self::Imm(24) => syn::Ident::new("u32", Span::call_site()), - _ => panic!("Invalid immediate size"), - } - } -} - -struct InstructionArgument { - name: syn::Ident, - type_: syn::Ident, -} -impl Parse for InstructionArgument { - fn parse(input: syn::parse::ParseStream) -> syn::Result { - let name: syn::Ident = input.parse()?; - let _: syn::Token![:] = input.parse()?; - let type_: syn::Ident = input.parse()?; - - let tn = type_.to_string(); - if !(tn == "RegId" || IMM_TYPES.contains(&tn.as_str())) { - return Err(syn::Error::new_spanned( - &type_, - format!("Invalid argument type: {}", tn), - )); - } - - Ok(Self { name, type_ }) - } -} -impl InstructionArgument { - fn is_imm(&self) -> bool { - self.type_.to_string().starts_with("Imm") - } - - fn typeinfo(&self) -> ArgType { - if self.is_imm() { - let imm_size = self - .type_ - .to_string() - .trim_start_matches("Imm") - .parse() - .unwrap(); - ArgType::Imm(imm_size) - } else { - ArgType::Reg - } - } -} - -struct Instruction { - description: syn::LitStr, - opcode_number: syn::LitInt, - opcode_name: syn::Ident, - opcode_fn_name: syn::Ident, - args: Vec, -} -impl Parse for Instruction { - fn parse(input: syn::parse::ParseStream) -> syn::Result { - let description: syn::LitStr = input.parse()?; - let opcode_number: syn::LitInt = input.parse()?; - let opcode_name: syn::Ident = input.parse()?; - let opcode_fn_name: syn::Ident = input.parse()?; - let mut args = Vec::new(); - - let content; - let _bracket_token = syn::bracketed!(content in input); - - while !content.is_empty() { - let item: InstructionArgument = content.parse()?; - args.push(item); - } - - // Check argument format - if args.len() > 4 { - return Err(syn::Error::new_spanned( - &opcode_name, - format!("Too many arguments: {}", args.len()), - )); - } - - for arg in args.iter().rev().skip(1) { - if arg.is_imm() { - return Err(syn::Error::new_spanned( - &arg.type_, - "Immediate argument only allowed as last argument", - )); - } - } - - if args.iter().map(|a| a.typeinfo().size_bits()).sum::() > 24 { - return Err(syn::Error::new_spanned( - &opcode_name, - "Arguments exceed 24 bits", - )); - } - - Ok(Self { - description, - opcode_number, - opcode_name, - opcode_fn_name, - args, - }) - } -} -impl Instruction { - fn has_imm(&self) -> bool { - self.args.last().map(|arg| arg.is_imm()).unwrap_or(false) - } - - #[allow(clippy::arithmetic_side_effects)] // Checked in opcode construction - fn reserved_bits(&self) -> usize { - if self.has_imm() { - 0 - } else { - 24 - self.args.len() * 6 - } - } -} - -struct InstructionList(Vec); -impl Parse for InstructionList { - fn parse(input: syn::parse::ParseStream) -> syn::Result { - let mut instructions = Vec::new(); - while !input.is_empty() { - let item: Instruction = input.parse()?; - instructions.push(item); - } - Ok(Self(instructions)) - } -} - -/// Constructor functions and theirs shorthands -fn make_constructors(instructions: &InstructionList) -> TokenStream { - instructions - .0 - .iter() - .map( - |Instruction { - description, - opcode_name, - opcode_fn_name, - args, - .. - }| { - let strict_arguments: TokenStream = args - .iter() - .map(|arg| { - let name = &arg.name; - let type_ = &arg.type_; - quote! { #name: #type_, } - }) - .collect(); - - let pack_strict_arguments: TokenStream = args - .iter() - .enumerate() - .map(|(i, arg)| { - let name = &arg.name; - if arg.is_imm() { - quote! { - packed_integer |= (#name.to_smallest_int() as u32); - } - } else { - quote! { - packed_integer |= (#name.to_u8() as u32) << (6 * (3 - #i)); - } - } - }) - .collect(); - - let pack_test_arguments: TokenStream = args - .iter() - .enumerate() - .map(|(i, arg)| { - let reg_name = Ident::new(&format!("reg{i}"), Span::call_site()); - match arg.typeinfo() { - ArgType::Imm(bits) =>{ - let bits: u32 = bits.try_into().expect("Type size is checked"); - quote! { - packed_integer |= imm & ((#bits << 1u32) -1); - } - }, - ArgType::Reg => quote! { - packed_integer |= (#reg_name.to_u8() as u32) << (6 * (3 - #i)); - } - } - }) - .collect(); - - let flexible_arguments: TokenStream = args - .iter() - .map(|arg| { - let name = &arg.name; - let type_ = &arg.type_; - if arg.is_imm() { - let int_type = arg.typeinfo().smallest_containing_integer_type(); - quote! { #name: #int_type, } - } else { - let check_trait = Ident::new( - &format!("Check{type_}"), - Span::call_site(), - ); - quote! { #name: impl crate::#check_trait, } - } - }) - .collect(); - - let check_flexible_arguments: TokenStream = args - .iter() - .map(|arg| if arg.is_imm() { - let name = &arg.name; - let type_ = &arg.type_; - quote! { #type_::new_checked(#name).expect("Immediate value overflows"), } - } else { - let name = &arg.name; - quote! { #name.check(), } - }) - .collect(); - - let pass_arguments: TokenStream = args - .iter() - .map(|InstructionArgument { name, .. }| quote! { #name, }) - .collect(); - - let typescript_arguments: TokenStream = args - .iter() - .map(|arg| { - let name = &arg.name; - let inttype = arg.typeinfo().smallest_containing_integer_type(); - quote! { #name: #inttype, } - }) - .collect(); - - quote! { - #[doc = #description] - pub fn #opcode_fn_name(#flexible_arguments) -> Instruction { - #opcode_name::new(#check_flexible_arguments).into() - } - - #[cfg(feature = "typescript")] - const _: () = { - use super::*; - #[wasm_bindgen::prelude::wasm_bindgen] - #[doc = #description] - pub fn #opcode_fn_name(#typescript_arguments) -> typescript::Instruction { - crate::op::#opcode_fn_name(#pass_arguments).into() - } - }; - - impl #opcode_name { - #[doc = "Construct the instruction from its parts."] - pub fn new(#strict_arguments) -> Self { - let mut packed_integer: u32 = 0; - #pack_strict_arguments - let packed = packed_integer.to_be_bytes(); - Self([packed[1], packed[2], packed[3]]) - } - - #[doc = "Construct the instruction from all possible raw fields, ignoring inapplicable ones."] - #[cfg(test)] - pub fn test_construct( - reg0: RegId, - reg1: RegId, - reg2: RegId, - reg3: RegId, - imm: u32, - ) -> Self { - let mut packed_integer: u32 = 0; - #pack_test_arguments - let packed = packed_integer.to_be_bytes(); - Self([packed[1], packed[2], packed[3]]) - } - } - - - #[cfg(feature = "typescript")] - #[wasm_bindgen::prelude::wasm_bindgen] - impl #opcode_name { - #[wasm_bindgen(constructor)] - #[doc = "Construct the instruction from its parts."] - pub fn new_typescript(#strict_arguments) -> Self { - Self::new(#pass_arguments) - } - } - } - }, - ) - .collect() -} - -fn make_op_unpacks(instructions: &InstructionList) -> TokenStream { - instructions - .0 - .iter() - .map( - |instr| { - let Instruction { - opcode_name, args, .. - } = instr; - let arg_types: Vec<_> = args - .iter() - .map(|InstructionArgument { type_, .. }| type_) - .collect(); - let convert_reg_args: Vec<_> = args - .iter() - .enumerate() - .filter_map( - |(i, arg)| { - let type_ = &arg.type_; - if arg.is_imm() { - None - } else { - Some(quote! { - #type_::new((integer >> (6 * (3 - #i))) as u8) - }) - } - }, - ) - .collect(); - let reserved_bits = instr.reserved_bits(); - - let mut ret_args = convert_reg_args; - if let Some(convert_imm_arg) = args.last().and_then(|arg| { - let type_: &Ident = &arg.type_; - if arg.is_imm() { - Some(quote! { #type_::new(integer as _) }) - } else {None}} - ) { - ret_args.push(convert_imm_arg); - } - - - // Return value for unpack. If there is only one argument, doesn't wrap it in a tuple. - let retval = if ret_args.len() == 1 { - let ra = &ret_args[0]; - quote! { #ra } - } else { - let ra: TokenStream = itertools::Itertools::intersperse( - ret_args.iter().cloned(), - quote!{,} - ) - .collect(); - quote! { ( #ra ) } - }; - let arg_types = if arg_types.len() == 1 { - let at = arg_types[0]; - quote! { #at } - } else { - let ats: TokenStream = arg_types.iter().map(|at| quote! {#at,} ).collect(); - quote! { (#ats) } - }; - - // Like above but always tuple-wraps - let raw_regs = { - let ra: TokenStream = - ret_args.iter().map(|a| quote! {#a,}) - .collect(); - quote! { ( #ra ) } - }; - - let reg_ids: TokenStream = (0..4).map(|i| { - if let Some(arg) = args.get(i) { - let tuple_index = proc_macro2::Literal::usize_unsuffixed(i); - if !arg.is_imm() { - return quote! { Some(fields.#tuple_index), }; - } - } - quote![ None, ] - }).collect(); - - quote! { - impl #opcode_name { - #[doc = "Convert the instruction into its parts, without checking for correctness."] - pub fn unpack(self) -> #arg_types { - let integer = u32::from_be_bytes([0, self.0[0], self.0[1], self.0[2]]); - #retval - } - - #[doc = "Verify that the unused bits after the instruction are zero."] - pub(crate) fn reserved_part_is_zero(self) -> bool { - let integer = u32::from_be_bytes([0, self.0[0], self.0[1], self.0[2]]); - let mask = (1u32 << #reserved_bits) - 1; - (integer & mask) == 0 - } - - pub(crate) fn reg_ids(self) -> [Option; 4] { - let integer = u32::from_be_bytes([0, self.0[0], self.0[1], self.0[2]]); - let fields = #raw_regs; - [ #reg_ids ] - } - } - } - }, - ) - .collect() -} - -/// Make a struct for each opcode -fn make_op_structs(instructions: &InstructionList) -> TokenStream { - instructions - .0 - .iter() - .map( - |Instruction { - description, - opcode_name, - .. - }| { - quote! { - #[doc = #description] - #[derive(Clone, Copy, Eq, Hash, PartialEq)] - #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] - #[cfg_attr(feature = "typescript", wasm_bindgen::prelude::wasm_bindgen)] - pub struct #opcode_name(pub (super) [u8; 3]); - - impl #opcode_name { - /// The opcode number for this instruction. - pub const OPCODE: Opcode = Opcode::#opcode_name; - } - } - }) - .collect() -} - -fn make_op_debug_impl(instructions: &InstructionList) -> TokenStream { - instructions - .0 - .iter() - .map( - |Instruction { - opcode_name, - args, - .. - }| { - let values: TokenStream = itertools::Itertools::intersperse(args.iter().map(|arg| { - let name = &arg.name; - quote! { - #name - } - }), quote!{,}).collect(); - let fields: TokenStream = args.iter().map(|arg| { - let name = &arg.name; - if arg.is_imm() { - quote! { - .field(stringify!(#name), &format_args!("{}", #name.to_smallest_int())) - } - } else { - quote! { - .field(stringify!(#name), &format_args!("{:#02x}", u8::from(#name))) - } - } - }).collect(); - - let unpack_if_needed = if args.is_empty() { - quote! {} - } else { - quote! { - let (#values) = self.unpack(); - } - }; - - quote! { - impl core::fmt::Debug for #opcode_name { - #[warn(clippy::unused_unit)] // Simplify code - fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { - #unpack_if_needed - f.debug_struct(stringify!(#opcode_name)) - #fields - .finish() - } - } - } - }, - ) - .collect() -} - -fn make_opcode_enum(instructions: &InstructionList) -> TokenStream { - let variants: TokenStream = instructions - .0 - .iter() - .map( - |Instruction { - description, - opcode_name, - opcode_number, - .. - }| { - quote! { - #[doc = #description] - #opcode_name = #opcode_number, - } - }, - ) - .collect(); - let variants_test_construct: TokenStream = instructions - .0 - .iter() - .map( - |Instruction { - description, - opcode_name, - .. - }| { - quote! { - #[doc = #description] - Self::#opcode_name => Instruction::#opcode_name( - crate::_op::#opcode_name::test_construct(ra, rb, rc, rd, imm) - ), - } - }, - ) - .collect(); - quote! { - #[doc = "The opcode numbers for each instruction."] - #[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)] - #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] - #[cfg_attr(feature = "typescript", wasm_bindgen::prelude::wasm_bindgen)] - pub enum Opcode { - #variants - } - - impl Opcode { - #[doc = "Construct the instruction from all possible raw fields, ignoring inapplicable ones."] - #[cfg(test)] - pub fn test_construct(self, ra: RegId, rb: RegId, rc: RegId, rd: RegId, imm: u32) -> Instruction { - match self { - #variants_test_construct - } - } - } - } -} - -fn make_opcode_try_from(instructions: &InstructionList) -> TokenStream { - let arms: TokenStream = instructions - .0 - .iter() - .map( - |Instruction { - opcode_number, - opcode_name, - .. - }| { - quote! { - #opcode_number => Ok(Opcode::#opcode_name), - } - }, - ) - .collect(); - quote! { - impl core::convert::TryFrom for Opcode { - type Error = InvalidOpcode; - - fn try_from(value: u8) -> Result { - match value { - #arms - _ => Err(InvalidOpcode), - } - } - } - } -} - -fn make_from_op(instructions: &InstructionList) -> TokenStream { - instructions - .0 - .iter() - .map(|Instruction { opcode_name, .. }| { - quote! { - impl From<#opcode_name> for [u8; 3] { - fn from(#opcode_name(arr): #opcode_name) -> Self { - arr - } - } - - impl From<#opcode_name> for [u8; 4] { - fn from(#opcode_name([a, b, c]): #opcode_name) -> Self { - [#opcode_name::OPCODE as u8, a, b, c] - } - } - - impl From<#opcode_name> for u32 { - fn from(op: #opcode_name) -> Self { - u32::from_be_bytes(op.into()) - } - } - - impl From<#opcode_name> for Instruction { - fn from(op: #opcode_name) -> Self { - Instruction::#opcode_name(op) - } - } - - #[cfg(feature = "typescript")] - impl From<#opcode_name> for typescript::Instruction { - fn from(opcode: #opcode_name) -> Self { - typescript::Instruction::new(opcode.into()) - } - } - } - }) - .collect() -} - -fn make_instruction_enum(instructions: &InstructionList) -> TokenStream { - let variants: TokenStream = instructions - .0 - .iter() - .map( - |Instruction { - description, - opcode_name, - .. - }| { - quote! { - #[doc = #description] - #opcode_name(_op::#opcode_name), - } - }, - ) - .collect(); - let variant_opcodes: TokenStream = instructions - .0 - .iter() - .map(|Instruction { opcode_name, .. }| { - quote! { - Self::#opcode_name(_) => Opcode::#opcode_name, - } - }) - .collect(); - let variant_reg_ids: TokenStream = instructions - .0 - .iter() - .map(|Instruction { opcode_name, .. }| { - quote! { - Self::#opcode_name(op) => op.reg_ids(), - } - }) - .collect(); - - let variant_debug: TokenStream = instructions - .0 - .iter() - .map(|Instruction { opcode_name, .. }| { - quote! { - Self::#opcode_name(op) => op.fmt(f), - } - }) - .collect(); - - quote! { - #[doc = r" - Representation of a single instruction for the interpreter. - - The opcode is represented in the tag (variant), or may be retrieved in the form of an - `Opcode` byte using the `opcode` method. - - The register and immediate data associated with the instruction is represented within - an inner unit type wrapper around the 3 remaining bytes. - "] - #[derive(Clone, Copy, Eq, Hash, PartialEq)] - #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] - pub enum Instruction { - #variants - } - - impl Instruction { - #[doc = "This instruction's opcode."] - pub fn opcode(&self) -> Opcode { - match self { - #variant_opcodes - } - } - - #[doc = "Unpacks all register IDs into a slice of options."] - pub fn reg_ids(&self) -> [Option; 4] { - match self { - #variant_reg_ids - } - } - } - - impl core::fmt::Debug for Instruction { - fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { - match self { - #variant_debug - } - } - } - } -} - -fn make_instruction_try_from_bytes(instructions: &InstructionList) -> TokenStream { - let arms: TokenStream = instructions - .0 - .iter() - .map(|Instruction { opcode_name, .. }| { - quote! { - Opcode::#opcode_name => Ok(Self::#opcode_name({ - let op = op::#opcode_name([a, b, c]); - if !op.reserved_part_is_zero() { - return Err(InvalidOpcode); - } - op - })), - } - }) - .collect(); - quote! { - impl core::convert::TryFrom<[u8; 4]> for Instruction { - type Error = InvalidOpcode; - - fn try_from([op, a, b, c]: [u8; 4]) -> Result { - match Opcode::try_from(op)? { - #arms - _ => Err(InvalidOpcode), - } - } - } - } -} - -fn make_bytes_from_instruction(instructions: &InstructionList) -> TokenStream { - let arms: TokenStream = instructions - .0 - .iter() - .map(|Instruction { opcode_name, .. }| { - quote! { - Instruction::#opcode_name(op) => op.into(), - } - }) - .collect(); - quote! { - impl core::convert::From for [u8; 4] { - fn from(instruction: Instruction) -> [u8; 4] { - match instruction { - #arms - } - } - } - } -} - -/// Generates implementations for the FuelVM instruction types. -pub fn impl_instructions(input: proc_macro::TokenStream) -> proc_macro::TokenStream { - let instructions: InstructionList = syn::parse_macro_input!(input as InstructionList); - - let op_structs = make_op_structs(&instructions); - let op_debug_impl = make_op_debug_impl(&instructions); - let from_op = make_from_op(&instructions); - let constructors = make_constructors(&instructions); - let op_unpacks = make_op_unpacks(&instructions); - let opcode_enum = make_opcode_enum(&instructions); - let opcode_try_from = make_opcode_try_from(&instructions); - let instruction_enum = make_instruction_enum(&instructions); - let instruction_try_from_bytes = make_instruction_try_from_bytes(&instructions); - let bytes_from_instruction = make_bytes_from_instruction(&instructions); - (quote! { - /// Opcode-specific definitions and implementations. - #[allow(clippy::unused_unit)] // Simplify codegen - pub mod _op { - use super::*; - #op_structs - #op_debug_impl - #from_op - #constructors - #op_unpacks - } - #opcode_enum - #opcode_try_from - #instruction_enum - #instruction_try_from_bytes - #bytes_from_instruction - - #[cfg(feature = "typescript")] - impl From for typescript::Instruction { - fn from(inst: Instruction) -> Self { - typescript::Instruction::new(inst) - } - } - - }) - .into() -} diff --git a/fuel-derive/src/lib.rs b/fuel-derive/src/lib.rs index fdf10e3911..758a7d4a07 100644 --- a/fuel-derive/src/lib.rs +++ b/fuel-derive/src/lib.rs @@ -10,7 +10,6 @@ )] extern crate proc_macro; -mod asm; mod attribute; mod deserialize; mod serialize; @@ -29,9 +28,3 @@ synstructure::decl_derive!( /// Derives `Serialize` trait for the given `struct` or `enum`. serialize_derive ); - -/// Generates implementations for the FuelVM instruction types. -#[proc_macro] -pub fn impl_instructions(input: proc_macro::TokenStream) -> proc_macro::TokenStream { - asm::impl_instructions(input) -} From e124edfc3f7feb39f08355f456f454d2b28a8a61 Mon Sep 17 00:00:00 2001 From: Hannes Karppila Date: Fri, 23 Aug 2024 01:23:25 +0200 Subject: [PATCH 10/15] Fix derive crate descriptions --- fuel-asm/derive/Cargo.toml | 2 +- fuel-derive/Cargo.toml | 4 ---- 2 files changed, 1 insertion(+), 5 deletions(-) diff --git a/fuel-asm/derive/Cargo.toml b/fuel-asm/derive/Cargo.toml index 2f1379b292..4cde78315d 100644 --- a/fuel-asm/derive/Cargo.toml +++ b/fuel-asm/derive/Cargo.toml @@ -8,7 +8,7 @@ homepage = { workspace = true } keywords = ["blockchain", "cryptocurrencies", "fuel-vm", "vm"] license = { workspace = true } repository = { workspace = true } -description = "FuelVM (de)serialization derive macros for `fuel-vm` data structures." +description = "FuelVM asm instruction helper macros." [lib] proc-macro = true diff --git a/fuel-derive/Cargo.toml b/fuel-derive/Cargo.toml index 1d8fe30f05..6bddd0d857 100644 --- a/fuel-derive/Cargo.toml +++ b/fuel-derive/Cargo.toml @@ -18,7 +18,3 @@ quote = "1" syn = { version = "2", features = ["full"] } proc-macro2 = "1" synstructure = "0.13" -itertools = "0.13" - -[dev-dependencies] -fuel-asm = { workspace = true } From c277ad4b953f74dbcb3f2f83e6b3ac0cbbc3a45f Mon Sep 17 00:00:00 2001 From: Hannes Karppila Date: Fri, 23 Aug 2024 01:25:07 +0200 Subject: [PATCH 11/15] Remove extra file --- fuel-asm/derive/src/serialize.rs | 223 ------------------------------- 1 file changed, 223 deletions(-) delete mode 100644 fuel-asm/derive/src/serialize.rs diff --git a/fuel-asm/derive/src/serialize.rs b/fuel-asm/derive/src/serialize.rs deleted file mode 100644 index 42b2eefdc1..0000000000 --- a/fuel-asm/derive/src/serialize.rs +++ /dev/null @@ -1,223 +0,0 @@ -use proc_macro2::TokenStream as TokenStream2; -use quote::quote; - -use crate::attribute::{ - should_skip_field_binding, - StructAttrs, -}; - -fn serialize_struct(s: &synstructure::Structure) -> TokenStream2 { - let attrs = StructAttrs::parse(s); - let mut s = s.clone(); - - assert_eq!(s.variants().len(), 1, "structs must have one variant"); - - let variant: &mut synstructure::VariantInfo = &mut s.variants_mut()[0]; - variant.filter(|binding| !should_skip_field_binding(binding)); - - let encode_static = variant.each(|binding| { - quote! { - ::fuel_types::canonical::Serialize::encode_static(#binding, buffer)?; - } - }); - - let encode_dynamic = variant.each(|binding| { - quote! { - ::fuel_types::canonical::Serialize::encode_dynamic(#binding, buffer)?; - } - }); - - let size_static_code = variant.each(|binding| { - quote! { - size = size.saturating_add(#binding.size_static()); - } - }); - - let initial_size = if attrs.prefix.is_some() { - quote! { let mut size = 8usize; } - } else { - quote! { let mut size = 0usize; } - }; - let size_static_code = quote! { #initial_size match self { #size_static_code}; size }; - - let size_dynamic_code = variant.each(|binding| { - quote! { - size = size.saturating_add(#binding.size_dynamic()); - } - }); - let size_dynamic_code = - quote! { let mut size = 0usize; match self { #size_dynamic_code}; size }; - - let prefix = if let Some(prefix_type) = attrs.prefix.as_ref() { - quote! { - <_ as ::fuel_types::canonical::Serialize>::encode(&#prefix_type, buffer)?; - } - } else { - quote! {} - }; - - s.gen_impl(quote! { - gen impl ::fuel_types::canonical::Serialize for @Self { - #[inline(always)] - fn size_static(&self) -> usize { - #size_static_code - } - - #[inline(always)] - fn size_dynamic(&self) -> usize { - #size_dynamic_code - } - - #[inline(always)] - fn encode_static(&self, buffer: &mut O) -> ::core::result::Result<(), ::fuel_types::canonical::Error> { - #prefix - match self { - #encode_static - }; - - ::core::result::Result::Ok(()) - } - - fn encode_dynamic(&self, buffer: &mut O) -> ::core::result::Result<(), ::fuel_types::canonical::Error> { - match self { - #encode_dynamic - }; - - ::core::result::Result::Ok(()) - } - } - }) -} - -fn serialize_enum(s: &synstructure::Structure) -> TokenStream2 { - assert!(!s.variants().is_empty(), "got invalid empty enum"); - let mut s = s.clone(); - let mut next_discriminant = quote! { { 0u64 } }; - - s.variants_mut().iter_mut().for_each(|v| { - v.filter(|binding| !should_skip_field_binding(binding)); - }); - - let encode_static = s.variants().iter().map(|v| { - let pat = v.pat(); - - let encode_static_iter = v.bindings().iter().map(|binding| { - quote! { - ::fuel_types::canonical::Serialize::encode_static(#binding, buffer)?; - } - }); - - if v.ast().discriminant.is_some() { - let variant_ident = v.ast().ident; - next_discriminant = quote! { { Self::#variant_ident as u64 } }; - } - - let encode_discriminant = quote! { - <::core::primitive::u64 as ::fuel_types::canonical::Serialize>::encode(&#next_discriminant, buffer)?; - }; - next_discriminant = quote! { ( (#next_discriminant) + 1u64 ) }; - - quote! { - #pat => { - #encode_discriminant - #( - { #encode_static_iter } - )* - } - } - }); - let encode_dynamic = s.variants().iter().map(|v| { - let encode_dynamic_iter = v.each(|binding| { - quote! { - ::fuel_types::canonical::Serialize::encode_dynamic(#binding, buffer)?; - } - }); - quote! { - #encode_dynamic_iter - } - }); - - let match_size_static: TokenStream2 = s - .variants() - .iter() - .map(|variant| { - variant.each(|binding| { - quote! { - size = size.saturating_add(#binding.size_static()); - } - }) - }) - .collect(); - let match_size_static = quote! {{ - // `repr(128)` is unstable, so because of that we can use 8 bytes. - let mut size = 8usize; - match self { #match_size_static } size } - }; - - let match_size_dynamic: TokenStream2 = s - .variants() - .iter() - .map(|variant| { - variant.each(|binding| { - quote! { - size = size.saturating_add(#binding.size_dynamic()); - } - }) - }) - .collect(); - let match_size_dynamic = - quote! {{ let mut size = 0usize; match self { #match_size_dynamic } size }}; - - let impl_code = s.gen_impl(quote! { - gen impl ::fuel_types::canonical::Serialize for @Self { - #[inline(always)] - fn size_static(&self) -> usize { - #match_size_static - } - - #[inline(always)] - fn size_dynamic(&self) -> usize { - #match_size_dynamic - } - - #[inline(always)] - fn encode_static(&self, buffer: &mut O) -> ::core::result::Result<(), ::fuel_types::canonical::Error> { - match self { - #( - #encode_static - )*, - _ => return ::core::result::Result::Err(::fuel_types::canonical::Error::UnknownDiscriminant), - }; - - ::core::result::Result::Ok(()) - } - - fn encode_dynamic(&self, buffer: &mut O) -> ::core::result::Result<(), ::fuel_types::canonical::Error> { - match self { - #( - #encode_dynamic - )*, - _ => return ::core::result::Result::Err(::fuel_types::canonical::Error::UnknownDiscriminant), - }; - - ::core::result::Result::Ok(()) - } - } - }); - - quote! { - #impl_code - } -} - -/// Derives `Serialize` trait for the given `struct` or `enum`. -pub fn serialize_derive(mut s: synstructure::Structure) -> TokenStream2 { - s.add_bounds(synstructure::AddBounds::Fields) - .underscore_const(true); - - match s.ast().data { - syn::Data::Struct(_) => serialize_struct(&s), - syn::Data::Enum(_) => serialize_enum(&s), - _ => panic!("Can't derive `Serialize` for `union`s"), - } -} From d7288af0af75228f4e284a0f55e152c6a2561de2 Mon Sep 17 00:00:00 2001 From: Hannes Karppila Date: Fri, 23 Aug 2024 04:50:26 +0200 Subject: [PATCH 12/15] add doc comments --- fuel-asm/derive/Cargo.toml | 4 ++-- fuel-asm/derive/src/input.rs | 4 ++++ 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/fuel-asm/derive/Cargo.toml b/fuel-asm/derive/Cargo.toml index 4cde78315d..f9876b0a03 100644 --- a/fuel-asm/derive/Cargo.toml +++ b/fuel-asm/derive/Cargo.toml @@ -14,11 +14,11 @@ description = "FuelVM asm instruction helper macros." proc-macro = true [dependencies] +itertools = "0.13" +proc-macro2 = "1" quote = "1" syn = { version = "2", features = ["full"] } -proc-macro2 = "1" synstructure = "0.13" -itertools = "0.13" [dev-dependencies] fuel-asm = { workspace = true } diff --git a/fuel-asm/derive/src/input.rs b/fuel-asm/derive/src/input.rs index 647551cc3c..515d786aeb 100644 --- a/fuel-asm/derive/src/input.rs +++ b/fuel-asm/derive/src/input.rs @@ -161,10 +161,12 @@ impl Parse for InstructionArguments { } impl InstructionArguments { + /// Returns true if the instruction has an immediate argument pub fn has_imm(&self) -> bool { self.0.last().map(|arg| arg.is_imm()).unwrap_or(false) } + /// Returns the number (unused) reserved bits at the end of the instruction #[allow(clippy::arithmetic_side_effects)] // Checked in opcode construction pub fn reserved_bits(&self) -> usize { if self.has_imm() { @@ -174,6 +176,7 @@ impl InstructionArguments { } } + /// Returns true if the instruction has no arguments pub fn is_empty(&self) -> bool { self.0.is_empty() } @@ -212,6 +215,7 @@ impl InstructionArguments { self.0.iter().map(f) } + /// Shortcut for `map` that collects the results into a `TokenStream` pub fn map_to_tokens TokenStream>( &self, f: F, From 2758c7403fe33dbc51f99430ada62163469fa47b Mon Sep 17 00:00:00 2001 From: Hannes Karppila Date: Fri, 23 Aug 2024 04:50:38 +0200 Subject: [PATCH 13/15] cargo sort --- fuel-asm/Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fuel-asm/Cargo.toml b/fuel-asm/Cargo.toml index faafbb49a6..32c5473310 100644 --- a/fuel-asm/Cargo.toml +++ b/fuel-asm/Cargo.toml @@ -22,8 +22,8 @@ wasm-bindgen = { version = "0.2.88", optional = true } [dev-dependencies] bincode = { workspace = true } fuel-asm = { path = ".", features = ["serde"] } -rstest = "0.16" proptest = "1.5" +rstest = "0.16" [features] default = ["std"] From 8364d5a6ee28c07b825ed8c59da7917848d10e6f Mon Sep 17 00:00:00 2001 From: Hannes Karppila Date: Fri, 23 Aug 2024 04:52:49 +0200 Subject: [PATCH 14/15] clippy --- fuel-asm/src/encoding_tests.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fuel-asm/src/encoding_tests.rs b/fuel-asm/src/encoding_tests.rs index 84e01bdc65..4f044cf0c3 100644 --- a/fuel-asm/src/encoding_tests.rs +++ b/fuel-asm/src/encoding_tests.rs @@ -41,8 +41,8 @@ fn validate_all_opcodes() { for has_imm in [false, true] { // Construct the instruction let mut raw: RawInstruction = (op as u32) << 24u32; - for i in 0..regs_in_use { - raw |= mask_pattern & (0b11_1111) << arg_offsets[i]; + for offset in arg_offsets.iter().take(regs_in_use) { + raw |= (mask_pattern & 0b11_1111) << offset; } if has_imm { From 43172b1c4638145ed6664727650843c0f3aa5086 Mon Sep 17 00:00:00 2001 From: Hannes Karppila Date: Mon, 26 Aug 2024 14:20:06 +0300 Subject: [PATCH 15/15] PR comments on naming --- fuel-asm/derive/src/codegen.rs | 2 +- fuel-asm/derive/src/lib.rs | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/fuel-asm/derive/src/codegen.rs b/fuel-asm/derive/src/codegen.rs index 05b685a5fd..509e6f3fd5 100644 --- a/fuel-asm/derive/src/codegen.rs +++ b/fuel-asm/derive/src/codegen.rs @@ -338,7 +338,7 @@ pub fn opcode_try_from(instructions: &InstructionList) -> TokenStream { } } -pub fn from_op(instructions: &InstructionList) -> TokenStream { +pub fn op_conversions(instructions: &InstructionList) -> TokenStream { instructions.map_to_tokens(|Instruction { opcode_name, .. }| { quote! { impl From<#opcode_name> for [u8; 3] { diff --git a/fuel-asm/derive/src/lib.rs b/fuel-asm/derive/src/lib.rs index 89b5f054b1..d4deedfe10 100644 --- a/fuel-asm/derive/src/lib.rs +++ b/fuel-asm/derive/src/lib.rs @@ -217,7 +217,7 @@ pub fn impl_instructions(input: proc_macro::TokenStream) -> proc_macro::TokenStr let op_structs = codegen::op_structs(&instructions); let op_debug_impl = codegen::op_debug_impl(&instructions); - let from_op = codegen::from_op(&instructions); + let op_conversions = codegen::op_conversions(&instructions); let op_constructor_shorthand = codegen::op_constructor_shorthand(&instructions); let op_fn_new = codegen::op_fn_new(&instructions); let op_constructors_typescript = codegen::op_constructors_typescript(&instructions); @@ -241,7 +241,7 @@ pub fn impl_instructions(input: proc_macro::TokenStream) -> proc_macro::TokenStr use super::*; #op_structs #op_debug_impl - #from_op + #op_conversions #op_constructor_shorthand #op_fn_new #op_constructors_typescript