From 04e71b7f8b8e1fda5f95c812a94cb38e82ef884e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20Kr=C3=B6ning?= Date: Thu, 21 Sep 2023 01:23:37 +0200 Subject: [PATCH] feat: remove NASM dependency MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This migrates the x86-64 application processor boot code from NASM to GNU assembly language. `boot.asm` is replaced by `boot.s`. Instead of NASM directives, their GNU assembler (`as`) equivalents are used. This assembly file is embedded in an LLVM assembly language file with only one module level (global) assembly statement. Rust bundles `llvm-as`, which we use to translate the LLVM assembly to LLVM bitcode. The resulting file is linked to binary using the bundled `rust-lld`. The final binary is exactly the same as before, bit for bit. This finally removes NASM as the last non-Rust toolchain dependency. Signed-off-by: Martin Kröning --- .github/workflows/ci.yml | 28 +---- .github/workflows/publish_docs.yml | 4 - Cargo.lock | 83 +------------- Cargo.toml | 7 +- README.md | 1 - build.rs | 88 +++++++++++++++ src/arch/x86_64/kernel/apic.rs | 4 +- src/arch/x86_64/kernel/boot.asm | 151 ------------------------- src/arch/x86_64/kernel/boot.s | 173 +++++++++++++++++++++++++++++ 9 files changed, 275 insertions(+), 264 deletions(-) create mode 100644 build.rs delete mode 100644 src/arch/x86_64/kernel/boot.asm create mode 100644 src/arch/x86_64/kernel/boot.s diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index b0bda7e1f6..e70cb3cfa2 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -22,10 +22,6 @@ jobs: steps: - uses: actions/checkout@v4 - uses: taiki-e/install-action@cargo-hack - - name: Install NASM - run: | - sudo apt-get update - sudo apt-get install nasm - uses: mkroening/rust-toolchain-toml@main - uses: Swatinem/rust-cache@v2 - name: Check each feature @@ -38,10 +34,6 @@ jobs: runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 - - name: Install NASM - run: | - sudo apt-get update - sudo apt-get install nasm - uses: mkroening/rust-toolchain-toml@main - run: rustup component add clippy - uses: Swatinem/rust-cache@v2 @@ -61,10 +53,6 @@ jobs: runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 - - name: Install NASM - run: | - sudo apt-get update - sudo apt-get install nasm - uses: mkroening/rust-toolchain-toml@main - run: rustup target add aarch64-unknown-none-softfloat - uses: Swatinem/rust-cache@v2 @@ -78,10 +66,6 @@ jobs: runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 - - name: Install NASM - run: | - sudo apt-get update - sudo apt-get install nasm - uses: mkroening/rust-toolchain-toml@main - uses: Swatinem/rust-cache@v2 - name: Build minimal kernel @@ -94,10 +78,10 @@ jobs: runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 - - name: Install QEMU, NASM + - name: Install QEMU run: | sudo apt-get update - sudo apt-get install qemu-system-x86 nasm + sudo apt-get install qemu-system-x86 - uses: mkroening/rust-toolchain-toml@main - uses: Swatinem/rust-cache@v2 - name: Unit tests @@ -133,10 +117,10 @@ jobs: uses: actions/checkout@v4 with: path: kernel - - name: Install QEMU, NASM + - name: Install QEMU run: | sudo apt-get update - sudo apt-get install qemu-system-x86 nasm libcap-ng-dev libseccomp-dev socat + sudo apt-get install qemu-system-x86 libcap-ng-dev libseccomp-dev socat - uses: mkroening/rust-toolchain-toml@main - uses: mkroening/rust-toolchain-toml@main with: @@ -238,10 +222,10 @@ jobs: uses: actions/checkout@v4 with: path: kernel - - name: Install QEMU, NASM + - name: Install QEMU run: | sudo apt-get update - sudo apt-get install -y --no-install-recommends qemu-system-x86 nasm + sudo apt-get install -y --no-install-recommends qemu-system-x86 - name: Check KVM availability shell: bash run: | diff --git a/.github/workflows/publish_docs.yml b/.github/workflows/publish_docs.yml index c967c14a60..9b1864e94a 100644 --- a/.github/workflows/publish_docs.yml +++ b/.github/workflows/publish_docs.yml @@ -16,10 +16,6 @@ jobs: uses: actions/checkout@v4 - name: Rustup (apply rust-toolchain.toml) run: rustup show - - name: Install NASM - run: | - sudo apt-get update - sudo apt-get install nasm - name: Generate documentation run: cargo doc --target x86_64-unknown-none --package hermit-kernel - name: Generate index.html diff --git a/Cargo.lock b/Cargo.lock index fb749ade17..e0da110f12 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -339,39 +339,12 @@ version = "1.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a26ae43d7bcc3b814de94796a5e736d4029efb0ee900c12e2d54c993ad1a1e07" -[[package]] -name = "errno" -version = "0.3.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6b30f669a7961ef1631673d2766cc92f52d64f7ef354d4fe0ddfd30ed52f0f4f" -dependencies = [ - "errno-dragonfly", - "libc", - "windows-sys", -] - -[[package]] -name = "errno-dragonfly" -version = "0.1.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "aa68f1b12764fab894d2755d2518754e71b4fd80ecfb822714a1206c2aab39bf" -dependencies = [ - "cc", - "libc", -] - [[package]] name = "exclusive_cell" version = "0.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d5b9e9908e50b47ebbc3d6fd66ed295b997c270e8d2312a035bcc62722a160ef" -[[package]] -name = "fastrand" -version = "2.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6999dc1837253364c2ebb0704ba97994bd874e8f195d665c50b7548f6ea92764" - [[package]] name = "flate2" version = "1.0.27" @@ -483,6 +456,7 @@ dependencies = [ "aarch64", "ahash", "align-address", + "anyhow", "arm-gic", "bit_field", "bitflags 2.4.0", @@ -493,7 +467,7 @@ dependencies = [ "hermit-dtb", "hermit-entry", "hermit-sync", - "include-transformed", + "llvm-tools", "lock_api", "log", "multiboot", @@ -540,18 +514,6 @@ dependencies = [ "unicode-normalization", ] -[[package]] -name = "include-transformed" -version = "0.2.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9fdd0ac3785f538abb2b8221505e8244726756de924f670d06f4518c9a70a13d" -dependencies = [ - "proc-macro2", - "quote", - "syn 2.0.37", - "tempfile", -] - [[package]] name = "interrupts" version = "0.1.0" @@ -573,12 +535,6 @@ version = "0.2.147" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b4668fb0ea861c1df094127ac5f1da3409a82116a4ba74fca2e58ef927159bb3" -[[package]] -name = "linux-raw-sys" -version = "0.4.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "57bcfdad1b858c2db7c38303a6d2ad4dfaf5eb53dfeb0910128b2c26d6158503" - [[package]] name = "llvm-tools" version = "0.1.1" @@ -952,15 +908,6 @@ dependencies = [ "num_cpus", ] -[[package]] -name = "redox_syscall" -version = "0.3.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "567664f262709473930a4bf9e51bf2ebf3348f2e748ccc50dea20646858f8f29" -dependencies = [ - "bitflags 1.3.2", -] - [[package]] name = "ring" version = "0.16.20" @@ -985,19 +932,6 @@ dependencies = [ "semver", ] -[[package]] -name = "rustix" -version = "0.38.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "172891ebdceb05aa0005f533a6cbfca599ddd7d966f6f5d4d9b2e70478e70399" -dependencies = [ - "bitflags 2.4.0", - "errno", - "libc", - "linux-raw-sys", - "windows-sys", -] - [[package]] name = "rustls" version = "0.21.7" @@ -1191,19 +1125,6 @@ dependencies = [ "lock_api", ] -[[package]] -name = "tempfile" -version = "3.7.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dc02fddf48964c42031a0b3fe0428320ecf3a73c401040fc0096f97794310651" -dependencies = [ - "cfg-if", - "fastrand", - "redox_syscall", - "rustix", - "windows-sys", -] - [[package]] name = "thiserror" version = "1.0.44" diff --git a/Cargo.toml b/Cargo.toml index 0bdc4355de..ac83ee066e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -57,7 +57,7 @@ fsgsbase = [] newlib = [] pci = [] rtl8139 = ["tcp", "pci"] -smp = ["include-transformed"] +smp = [] tcp = ["smoltcp", "smoltcp/socket-tcp"] udp = ["smoltcp", "smoltcp/socket-udp"] trace = [] @@ -73,7 +73,6 @@ dyn-clone = "1.0" hashbrown = { version = "0.14", default-features = false } hermit-entry = { version = "0.9", features = ["kernel"] } hermit-sync = "0.1" -include-transformed = { version = "0.2", optional = true } lock_api = "0.4" log = { version = "0.4", default-features = false } num = { version = "0.4", default-features = false } @@ -130,6 +129,10 @@ float-cmp = "0.9" num-traits = { version = "0.2", default-features = false } x86 = { version = "0.52", default-features = false } +[build-dependencies] +anyhow = "1" +llvm-tools = "0.1" + [workspace] members = [ "xtask", diff --git a/README.md b/README.md index caa1fa3eba..064d12c082 100644 --- a/README.md +++ b/README.md @@ -11,7 +11,6 @@ This is the kernel of the [Hermit](https://github.com/hermit-os) unikernel proje ## Requirements * [`rustup`](https://www.rust-lang.org/tools/install) -* [NASM](https://nasm.us/) (only for SMP on x86_64) ## Building the kernel diff --git a/build.rs b/build.rs new file mode 100644 index 0000000000..1b3b25b1ac --- /dev/null +++ b/build.rs @@ -0,0 +1,88 @@ +use std::fs::File; +use std::io::Write; +use std::path::{Path, PathBuf}; +use std::process::Command; +use std::{env, fs}; + +use anyhow::{anyhow, Context, Result}; +use llvm_tools::LlvmTools; + +fn main() -> Result<()> { + if env::var("CARGO_CFG_TARGET_ARCH").unwrap() == "x86_64" + && env::var_os("CARGO_FEATURE_SMP").is_some() + { + assemble_x86_64_smp_boot()?; + } + + Ok(()) +} + +fn assemble_x86_64_smp_boot() -> Result<()> { + let out_dir = PathBuf::from(env::var_os("OUT_DIR").unwrap()); + + let boot_s = Path::new("src/arch/x86_64/kernel/boot.s"); + let boot_ll = out_dir.join("boot.ll"); + let boot_bc = out_dir.join("boot.bc"); + let boot_bin = out_dir.join("boot.bin"); + + let llvm_as = binutil("llvm-as")?; + let rust_lld = binutil("rust-lld")?; + + let assembly = fs::read_to_string(boot_s)?; + + let mut llvm_file = File::create(&boot_ll)?; + writeln!( + &mut llvm_file, + r#" +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-none-elf" + +module asm " +{assembly} +" +"# + )?; + llvm_file.flush()?; + drop(llvm_file); + + let status = Command::new(&llvm_as) + .arg("-o") + .arg(&boot_bc) + .arg(boot_ll) + .status() + .with_context(|| format!("Failed to run llvm-as from {}", llvm_as.display()))?; + assert!(status.success()); + + let status = Command::new(&rust_lld) + .arg("-flavor") + .arg("gnu") + .arg("--section-start=.text=0x8000") + .arg("--oformat=binary") + .arg("-o") + .arg(&boot_bin) + .arg(&boot_bc) + .status() + .with_context(|| format!("Failed to run rust-lld from {}", rust_lld.display()))?; + assert!(status.success()); + + println!("cargo:rerun-if-changed={}", boot_s.display()); + Ok(()) +} + +fn binutil(name: &str) -> Result { + let exe = format!("{name}{}", env::consts::EXE_SUFFIX); + + let path = LlvmTools::new() + .map_err(|err| match err { + llvm_tools::Error::NotFound => anyhow!( + "Could not find llvm-tools component\n\ + \n\ + Maybe the rustup component `llvm-tools` is missing? Install it through: `rustup component add llvm-tools`" + ), + err => anyhow!("{err:?}"), + })? + .tool(&exe) + .ok_or_else(|| anyhow!("could not find {exe}"))?; + + Ok(path) +} diff --git a/src/arch/x86_64/kernel/apic.rs b/src/arch/x86_64/kernel/apic.rs index 59e24b69f1..3d66c15804 100644 --- a/src/arch/x86_64/kernel/apic.rs +++ b/src/arch/x86_64/kernel/apic.rs @@ -682,11 +682,9 @@ pub fn init_next_processor_variables() { pub fn boot_application_processors() { use core::hint; - use include_transformed::include_nasm_bin; - use super::{raw_boot_info, start}; - let smp_boot_code = include_nasm_bin!("boot.asm"); + let smp_boot_code = include_bytes!(concat!(core::env!("OUT_DIR"), "/boot.bin")); // We shouldn't have any problems fitting the boot code into a single page, but let's better be sure. assert!( diff --git a/src/arch/x86_64/kernel/boot.asm b/src/arch/x86_64/kernel/boot.asm deleted file mode 100644 index 681cbb30d5..0000000000 --- a/src/arch/x86_64/kernel/boot.asm +++ /dev/null @@ -1,151 +0,0 @@ -; This is the entry point for the application processors. -; It is loaded at 0x8000 by Hermit and filled with parameters. -; It does the switch from Real Mode -> Protected Mode -> Long Mode, -; sets up CR3 for this CPU, and then calls into _start. -; -; In contrast to this self-contained entry point, _start is linked -; to the rest of Hermit and thus has access to all exported symbols -; (like the actual Rust entry point). - - -CR0_PG equ (1 << 31) -CR4_PAE equ (1 << 5) -MSR_EFER equ 0xC0000080 -EFER_LME equ (1 << 8) -EFER_NXE equ (1 << 11) - -[BITS 16] -SECTION .text -GLOBAL _start -ORG 0x8000 -_start: - jmp _rmstart - -; PARAMETERS -align 8 - entry_point dq 0xDEADC0DE - cpu_id dd 0xC0DECAFE - boot_info dq 0xBEEFBEEF - pml4 dd 0xDEADBEEF - pad dd 0; - -_rmstart: - cli - lgdt [gdtr] - - ; switch to protected mode by setting PE bit - mov eax, cr0 - or al, 0x1 - mov cr0, eax - - ; far jump to the 32bit code - jmp dword codesel : _pmstart - -[BITS 32] -ALIGN 4 -_pmstart: - xor eax, eax - mov ax, datasel - mov ds, ax - mov es, ax - mov fs, ax - mov gs, ax - mov ss, ax - - jmp short stublet - jmp $ - -; GDT for the protected mode -ALIGN 4 -gdtr: ; descritor table - dw gdt_end-gdt-1 ; limit - dd gdt ; base address -gdt: - dd 0,0 ; null descriptor -codesel equ $-gdt - dw 0xFFFF ; segment size 0..15 - dw 0x0000 ; segment address 0..15 - db 0x00 ; segment address 16..23 - db 0x9A ; access permissions und type - db 0xCF ; additional information and segment size 16...19 - db 0x00 ; segment address 24..31 -datasel equ $-gdt - dw 0xFFFF ; segment size 0..15 - dw 0x0000 ; segment address 0..15 - db 0x00 ; segment address 16..23 - db 0x92 ; access permissions and type - db 0xCF ; additional informationen and degment size 16...19 - db 0x00 ; segment address 24..31 -gdt_end: - -ALIGN 4 -GDTR64: - dw GDT64_end - GDT64 - 1 ; Limit. - dq GDT64 ; Base. - -; we need a new GDT to switch in the 64bit modus -GDT64: ; Global Descriptor Table (64-bit). - .Null: equ $ - GDT64 ; The null descriptor. - dw 0 ; Limit (low). - dw 0 ; Base (low). - db 0 ; Base (middle) - db 0 ; Access. - db 0 ; Granularity. - db 0 ; Base (high). - .Code: equ $ - GDT64 ; The code descriptor. - dw 0 ; Limit (low). - dw 0 ; Base (low). - db 0 ; Base (middle) - db 10011010b ; Access. - db 00100000b ; Granularity. - db 0 ; Base (high). - .Data: equ $ - GDT64 ; The data descriptor. - dw 0 ; Limit (low). - dw 0 ; Base (low). - db 0 ; Base (middle) - db 10010010b ; Access. - db 00000000b ; Granularity. - db 0 ; Base (high). -GDT64_end: - -ALIGN 4 -stublet: - ; Enable PAE mode. - mov eax, cr4 - or eax, CR4_PAE - mov cr4, eax - - ; Set the address to PML4 in CR3. - mov eax, dword [pml4] - mov cr3, eax - - ; Enable x86-64 Compatibility Mode by setting EFER_LME. - ; Also enable early access to NO_EXECUTE-protected memory through EFER_NXE. - mov ecx, MSR_EFER - rdmsr - or eax, EFER_LME | EFER_NXE - wrmsr - - ; Enable Paging. - mov eax, cr0 - or eax, CR0_PG - mov cr0, eax - - ; Load the 64-bit global descriptor table. - lgdt [GDTR64] - mov ax, GDT64.Data - mov ss, ax - mov ds, ax - mov es, ax - - ; Set the code segment and enter 64-bit long mode. - jmp GDT64.Code:start64 - -[BITS 64] -ALIGN 8 -start64: - ; forward address to boot info - mov rdi, qword [boot_info] - mov esi, dword [cpu_id] - ; Jump to _start - jmp qword [entry_point] diff --git a/src/arch/x86_64/kernel/boot.s b/src/arch/x86_64/kernel/boot.s new file mode 100644 index 0000000000..29d53b010b --- /dev/null +++ b/src/arch/x86_64/kernel/boot.s @@ -0,0 +1,173 @@ +# This is the entry point for the application processors. +# It is loaded at 0x8000 by Hermit and filled with parameters. +# It does the switch from Real Mode -> Protected Mode -> Long Mode, +# sets up CR3 for this CPU, and then calls into _start. +# +# In contrast to this self-contained entry point, _start is linked +# to the rest of Hermit and thus has access to all exported symbols +# (like the actual Rust entry point). + +.intel_syntax noprefix + +.set CR0_PG, 1 << 31 +.set CR4_PAE, 1 << 5 +.set MSR_EFER, 0xC0000080 +.set EFER_LME, 1 << 8 +.set EFER_NXE, 1 << 11 + +.code16 +.section .text +.global _start +_start: + jmp _rmstart + + # Fill padding with `nop` to generate exact same binary as NASM + .rept 5 + nop + .endr + +# PARAMETERS +.align 8 + entry_point: .8byte 0xDEADC0DE + cpu_id: .4byte 0xC0DECAFE + boot_info: .8byte 0xBEEFBEEF + pml4: .4byte 0xDEADBEEF + pad: .4byte 0 + +_rmstart: + cli + lgdt [gdtr] + + # switch to protected mode by setting PE bit + mov eax, cr0 + or al, 0x1 + mov cr0, eax + + # https://github.com/llvm/llvm-project/issues/46048 + .att_syntax prefix + # far jump to the 32bit code + ljmpl $codesel, $_pmstart + .intel_syntax noprefix + + # Fill padding with `nop` to generate exact same binary as NASM + .rept 2 + nop + .endr + +.code32 +.align 4 +_pmstart: + xor eax, eax + mov ax, OFFSET datasel + mov ds, eax + mov es, eax + mov fs, eax + mov gs, eax + mov ss, eax + + jmp short stublet +2: + jmp 2b + +# GDT for the protected mode +.align 4 +gdtr: # descritor table + .2byte gdt_end - gdt - 1 # limit + .4byte gdt # base address +gdt: + .8byte 0 # null descriptor +.set codesel, . - gdt + .2byte 0xFFFF # segment size 0..15 + .2byte 0 # segment address 0..15 + .byte 0 # segment address 16..23 + .byte 0x9A # access permissions und type + .byte 0xCF # additional information and segment size 16...19 + .byte 0 # segment address 24..31 +.set datasel, . - gdt + .2byte 0xFFFF # segment size 0..15 + .2byte 0 # segment address 0..15 + .byte 0 # segment address 16..23 + .byte 0x92 # access permissions and type + .byte 0xCF # additional informationen and degment size 16...19 + .byte 0 # segment address 24..31 +gdt_end: + +.align 4 +GDTR64: + .2byte GDT64_end - GDT64 - 1 # Limit. + .8byte GDT64 # Base. + +# we need a new GDT to switch in the 64bit modus +GDT64: # Global Descriptor Table (64-bit). +.set GDT64.Null, . - GDT64 # The null descriptor. + .2byte 0 # Limit (low). + .2byte 0 # Base (low). + .byte 0 # Base (middle) + .byte 0 # Access. + .byte 0 # Granularity. + .byte 0 # Base (high). +.set GDT64.Code, . - GDT64 # The code descriptor. + .2byte 0 # Limit (low). + .2byte 0 # Base (low). + .byte 0 # Base (middle) + .byte 0b10011010 # Access. + .byte 0b00100000 # Granularity. + .byte 0 # Base (high). +.set GDT64.Data, . - GDT64 # The data descriptor. + .2byte 0 # Limit (low). + .2byte 0 # Base (low). + .byte 0 # Base (middle) + .byte 0b10010010 # Access. + .byte 0b00000000 # Granularity. + .byte 0 # Base (high). +GDT64_end: + +.align 4 +stublet: + # Enable PAE mode. + mov eax, cr4 + or eax, CR4_PAE + mov cr4, eax + + # Set the address to PML4 in CR3. + mov eax, [pml4] + mov cr3, eax + + # Enable x86-64 Compatibility Mode by setting EFER_LME. + # Also enable early access to NO_EXECUTE-protected memory through EFER_NXE. + mov ecx, MSR_EFER + rdmsr + or eax, EFER_LME | EFER_NXE + wrmsr + + # Enable Paging. + mov eax, cr0 + or eax, CR0_PG + mov cr0, eax + + # Load the 64-bit global descriptor table. + lgdt [GDTR64] + mov ax, OFFSET GDT64.Data + mov ss, eax + mov ds, eax + mov es, eax + + # https://github.com/llvm/llvm-project/issues/46048 + .att_syntax prefix + # Set the code segment and enter 64-bit long mode. + ljmpl $GDT64.Code, $start64 + .intel_syntax noprefix + + # Fill padding with `nop` to generate exact same binary as NASM + .rept 1 + nop + .endr + +.code64 +.align 8 +start64: + # forward address to boot info + mov rdi, [boot_info] + mov esi, [cpu_id] + # Jump to _start + jmp [entry_point]