From a3362a6359d79a0a68f4f81fe5c6c741f0e1380d Mon Sep 17 00:00:00 2001 From: Stefan Lankes Date: Fri, 16 Feb 2024 14:11:45 +0100 Subject: [PATCH 01/10] prepare kernel to use it as common monolithic kernel - currently, only x86 is supported - add system call interface and system call table, which includes function pointer to the implementations - save GS register during a context switch - swap GS register, if the user spaces is interrupted - introduce privilege level dor the the user-space - create for every process an own page table --- src/arch/mod.rs | 69 +++++++++------- src/arch/x86_64/kernel/apic.rs | 11 ++- src/arch/x86_64/kernel/gdt.rs | 19 ++++- src/arch/x86_64/kernel/interrupts.rs | 58 +++++++------ src/arch/x86_64/kernel/mod.rs | 119 ++++++++++++++++++++++++++- src/arch/x86_64/kernel/pic.rs | 10 ++- src/arch/x86_64/kernel/processor.rs | 32 +++++++ src/arch/x86_64/kernel/scheduler.rs | 14 +++- src/arch/x86_64/kernel/switch.rs | 70 +++++++++++++++- src/arch/x86_64/kernel/syscall.rs | 49 +++++++++++ src/arch/x86_64/mm/mod.rs | 61 +++++++++++++- src/arch/x86_64/mm/paging.rs | 79 +++++++++++++++--- src/arch/x86_64/mm/virtualmem.rs | 10 ++- src/arch/x86_64/mod.rs | 19 +++++ src/config.rs | 2 +- src/drivers/net/mod.rs | 4 +- src/env.rs | 2 +- src/lib.rs | 13 +-- src/mm/mod.rs | 57 ++++++++++++- src/scheduler/mod.rs | 47 +++++++++-- src/scheduler/task.rs | 29 +++++-- src/syscalls/entropy.rs | 1 + src/syscalls/mod.rs | 10 ++- src/syscalls/table.rs | 88 ++++++++++++++++++++ src/syscalls/tasks.rs | 16 +--- 25 files changed, 761 insertions(+), 128 deletions(-) create mode 100644 src/arch/x86_64/kernel/syscall.rs create mode 100644 src/syscalls/table.rs diff --git a/src/arch/mod.rs b/src/arch/mod.rs index c093434eda..81b75b3c33 100644 --- a/src/arch/mod.rs +++ b/src/arch/mod.rs @@ -2,64 +2,70 @@ cfg_if::cfg_if! { if #[cfg(target_arch = "aarch64")] { - pub mod aarch64; - pub use self::aarch64::*; + pub(crate) mod aarch64; + pub(crate) use self::aarch64::*; #[cfg(target_os = "none")] - pub use self::aarch64::kernel::boot_processor_init; - pub use self::aarch64::kernel::core_local; - pub use self::aarch64::kernel::interrupts; - pub use self::aarch64::kernel::interrupts::wakeup_core; + pub(crate) use self::aarch64::kernel::boot_processor_init; + pub(crate) use self::aarch64::kernel::core_local; + pub(crate) use self::aarch64::kernel::interrupts; + pub(crate) use self::aarch64::kernel::interrupts::wakeup_core; #[cfg(feature = "pci")] - pub use self::aarch64::kernel::pci; - pub use self::aarch64::kernel::processor; - pub use self::aarch64::kernel::processor::set_oneshot_timer; - pub use self::aarch64::kernel::scheduler; - pub use self::aarch64::kernel::switch; + pub(crate) use self::aarch64::kernel::pci; + pub(crate) use self::aarch64::kernel::processor; + pub(crate) use self::aarch64::kernel::processor::set_oneshot_timer; + pub(crate) use self::aarch64::kernel::scheduler; + pub(crate) use self::aarch64::kernel::switch; #[cfg(feature = "smp")] - pub use self::aarch64::kernel::application_processor_init; - pub use self::aarch64::kernel::{ + pub(crate) use self::aarch64::kernel::application_processor_init; + pub(crate) use self::aarch64::kernel::{ boot_application_processors, get_processor_count, message_output_init, output_message_buf, }; + pub use self::aarch64::mm::paging::{BasePageSize, PageSize}; } else if #[cfg(target_arch = "x86_64")] { - pub mod x86_64; - pub use self::x86_64::*; + pub(crate) mod x86_64; + pub(crate) use self::x86_64::*; - pub use self::x86_64::kernel::apic::{ + pub(crate) use self::x86_64::kernel::apic::{ set_oneshot_timer, wakeup_core, }; #[cfg(all(target_os = "none", feature = "smp"))] - pub use self::x86_64::kernel::application_processor_init; - pub use self::x86_64::kernel::core_local; - pub use self::x86_64::kernel::gdt::set_current_kernel_stack; - pub use self::x86_64::kernel::interrupts; + pub(crate) use self::x86_64::kernel::application_processor_init; + pub(crate) use self::x86_64::kernel::core_local; + pub(crate) use self::x86_64::kernel::gdt::set_current_kernel_stack; + pub(crate) use self::x86_64::kernel::interrupts; #[cfg(feature = "pci")] - pub use self::x86_64::kernel::pci; - pub use self::x86_64::kernel::processor; - pub use self::x86_64::kernel::scheduler; - pub use self::x86_64::kernel::switch; + pub(crate) use self::x86_64::kernel::pci; + pub(crate) use self::x86_64::kernel::processor; + pub(crate) use self::x86_64::kernel::scheduler; + pub(crate) use self::x86_64::kernel::switch; #[cfg(target_os = "none")] - pub use self::x86_64::kernel::{ + pub(crate) use self::x86_64::kernel::{ boot_application_processors, boot_processor_init, }; - pub use self::x86_64::kernel::{ + pub(crate) use self::x86_64::kernel::{ get_processor_count, message_output_init, output_message_buf, }; + pub use self::x86_64::mm::paging::{BasePageSize, PageSize}; + #[cfg(feature = "common-os")] + pub use self::x86_64::mm::create_new_root_page_table; + #[cfg(feature = "common-os")] + pub use self::x86_64::kernel::{load_application, jump_to_user_land}; } else if #[cfg(target_arch = "riscv64")] { - pub mod riscv64; - pub use self::riscv64::*; + pub(crate) mod riscv64; + pub(crate) use self::riscv64::*; #[cfg(feature = "smp")] - pub use self::riscv64::kernel::application_processor_init; - pub use self::riscv64::kernel::processor::{self, set_oneshot_timer, wakeup_core}; - pub use self::riscv64::kernel::{ + pub(crate) use self::riscv64::kernel::application_processor_init; + pub(crate) use self::riscv64::kernel::processor::{self, set_oneshot_timer, wakeup_core}; + pub(crate) use self::riscv64::kernel::{ boot_application_processors, boot_processor_init, core_local, @@ -70,5 +76,6 @@ cfg_if::cfg_if! { scheduler, switch, }; + pub use self::riscv64::mm::paging::{BasePageSize, PageSize}; } } diff --git a/src/arch/x86_64/kernel/apic.rs b/src/arch/x86_64/kernel/apic.rs index adc3ee1498..e16cfe16ec 100644 --- a/src/arch/x86_64/kernel/apic.rs +++ b/src/arch/x86_64/kernel/apic.rs @@ -22,6 +22,7 @@ use crate::arch::x86_64::mm::paging::{ BasePageSize, PageSize, PageTableEntryFlags, PageTableEntryFlagsExt, }; use crate::arch::x86_64::mm::{paging, virtualmem, PhysAddr, VirtAddr}; +use crate::arch::x86_64::swapgs; use crate::config::*; use crate::scheduler::CoreId; use crate::{arch, env, mm, scheduler}; @@ -199,16 +200,19 @@ impl fmt::Display for IoApicRecord { } #[cfg(feature = "smp")] -extern "x86-interrupt" fn tlb_flush_handler(_stack_frame: interrupts::ExceptionStackFrame) { +extern "x86-interrupt" fn tlb_flush_handler(stack_frame: interrupts::ExceptionStackFrame) { + swapgs(&stack_frame); debug!("Received TLB Flush Interrupt"); increment_irq_counter(TLB_FLUSH_INTERRUPT_NUMBER); unsafe { cr3_write(cr3()); } eoi(); + swapgs(&stack_frame); } extern "x86-interrupt" fn error_interrupt_handler(stack_frame: interrupts::ExceptionStackFrame) { + swapgs(&stack_frame); error!("APIC LVT Error Interrupt"); error!("ESR: {:#X}", local_apic_read(IA32_X2APIC_ESR)); error!("{:#?}", stack_frame); @@ -217,12 +221,14 @@ extern "x86-interrupt" fn error_interrupt_handler(stack_frame: interrupts::Excep } extern "x86-interrupt" fn spurious_interrupt_handler(stack_frame: interrupts::ExceptionStackFrame) { + swapgs(&stack_frame); error!("Spurious Interrupt: {:#?}", stack_frame); scheduler::abort(); } #[cfg(feature = "smp")] -extern "x86-interrupt" fn wakeup_handler(_stack_frame: interrupts::ExceptionStackFrame) { +extern "x86-interrupt" fn wakeup_handler(stack_frame: interrupts::ExceptionStackFrame) { + swapgs(&stack_frame); use crate::scheduler::PerCoreSchedulerExt; debug!("Received Wakeup Interrupt"); @@ -233,6 +239,7 @@ extern "x86-interrupt" fn wakeup_handler(_stack_frame: interrupts::ExceptionStac if core_scheduler.is_scheduling() { core_scheduler.reschedule(); } + swapgs(&stack_frame); } #[inline] diff --git a/src/arch/x86_64/kernel/gdt.rs b/src/arch/x86_64/kernel/gdt.rs index e81908d6cf..bd0a6dfdfe 100644 --- a/src/arch/x86_64/kernel/gdt.rs +++ b/src/arch/x86_64/kernel/gdt.rs @@ -3,6 +3,8 @@ use core::sync::atomic::Ordering; use x86_64::instructions::tables; use x86_64::registers::segmentation::{Segment, CS, DS, ES, SS}; +#[cfg(feature = "common-os")] +use x86_64::structures::gdt::DescriptorFlags; use x86_64::structures::gdt::{Descriptor, GlobalDescriptorTable}; use x86_64::structures::tss::TaskStateSegment; use x86_64::VirtAddr; @@ -15,9 +17,16 @@ use crate::arch::x86_64::mm::paging::{BasePageSize, PageSize}; use crate::config::KERNEL_STACK_SIZE; pub fn add_current_core() { - let gdt = Box::leak(Box::new(GlobalDescriptorTable::new())); + let gdt: &mut GlobalDescriptorTable = Box::leak(Box::new(GlobalDescriptorTable::new())); let kernel_code_selector = gdt.add_entry(Descriptor::kernel_code_segment()); let kernel_data_selector = gdt.add_entry(Descriptor::kernel_data_segment()); + #[cfg(feature = "common-os")] + { + let _user_code32_selector = + gdt.add_entry(Descriptor::UserSegment(DescriptorFlags::USER_CODE32.bits())); + let _user_data64_selector = gdt.add_entry(Descriptor::user_data_segment()); + let _user_code64_selector = gdt.add_entry(Descriptor::user_code_segment()); + } // Dynamically allocate memory for a Task-State Segment (TSS) for this core. let tss = Box::leak(Box::new(TaskStateSegment::new())); @@ -60,5 +69,13 @@ pub fn add_current_core() { } pub extern "C" fn set_current_kernel_stack() { + #[cfg(feature = "common-os")] + unsafe { + let root = crate::scheduler::get_root_page_table(); + if root != x86::controlregs::cr3().try_into().unwrap() { + x86::controlregs::cr3_write(root.try_into().unwrap()); + } + } + core_scheduler().set_current_kernel_stack(); } diff --git a/src/arch/x86_64/kernel/interrupts.rs b/src/arch/x86_64/kernel/interrupts.rs index d3381a2535..7bd3b15de6 100644 --- a/src/arch/x86_64/kernel/interrupts.rs +++ b/src/arch/x86_64/kernel/interrupts.rs @@ -6,23 +6,23 @@ use ahash::RandomState; use hashbrown::HashMap; use hermit_sync::{InterruptSpinMutex, InterruptTicketMutex}; pub use x86_64::instructions::interrupts::{disable, enable, enable_and_hlt as enable_and_wait}; -use x86_64::registers::control::Cr2; use x86_64::set_general_handler; pub use x86_64::structures::idt::InterruptStackFrame as ExceptionStackFrame; -use x86_64::structures::idt::{InterruptDescriptorTable, InterruptStackFrame, PageFaultErrorCode}; +use x86_64::structures::idt::{InterruptDescriptorTable, InterruptStackFrame}; use crate::arch::x86_64::kernel::core_local::{core_scheduler, increment_irq_counter}; use crate::arch::x86_64::kernel::{apic, processor}; -use crate::arch::x86_64::mm::paging::{BasePageSize, PageSize}; +use crate::arch::x86_64::mm::paging::{page_fault_handler, BasePageSize, PageSize}; +use crate::arch::x86_64::swapgs; use crate::scheduler::{self, CoreId}; -pub const IST_ENTRIES: usize = 4; -pub const IST_SIZE: usize = 8 * BasePageSize::SIZE as usize; +pub(crate) const IST_ENTRIES: usize = 4; +pub(crate) const IST_SIZE: usize = 8 * BasePageSize::SIZE as usize; -pub static IDT: InterruptSpinMutex = +pub(crate) static IDT: InterruptSpinMutex = InterruptSpinMutex::new(InterruptDescriptorTable::new()); -pub fn load_idt() { +pub(crate) fn load_idt() { // FIXME: This is not sound! For this to be sound, the table must never be // modified or destroyed while in use. This is _not_ the case here. Instead, we // disable interrupts on the current core when modifying the table and hope for @@ -32,7 +32,7 @@ pub fn load_idt() { } } -pub fn install() { +pub(crate) fn install() { let mut idt = IDT.lock(); set_general_handler!(&mut *idt, abort, 0..32); @@ -144,41 +144,49 @@ fn unknown(_stack_frame: ExceptionStackFrame, index: u8, _error_code: Option ! { + swapgs(&stack_frame); error!( "Double Fault (#DF) Exception: {:#?}, error {:#X}", stack_frame, error_code @@ -245,39 +259,31 @@ extern "x86-interrupt" fn double_fault_exception( } extern "x86-interrupt" fn floating_point_exception(stack_frame: ExceptionStackFrame) { + swapgs(&stack_frame); error!("Floating-Point Error (#MF) Exception: {:#?}", stack_frame); scheduler::abort(); } extern "x86-interrupt" fn alignment_check_exception(stack_frame: ExceptionStackFrame, _code: u64) { + swapgs(&stack_frame); error!("Alignment Check (#AC) Exception: {:#?}", stack_frame); scheduler::abort(); } -pub extern "x86-interrupt" fn page_fault_handler( - stack_frame: ExceptionStackFrame, - error_code: PageFaultErrorCode, -) { - error!("Page fault (#PF)!"); - error!("page_fault_linear_address = {:p}", Cr2::read()); - error!("error_code = {error_code:?}"); - error!("fs = {:#X}", processor::readfs()); - error!("gs = {:#X}", processor::readgs()); - error!("stack_frame = {stack_frame:#?}"); - scheduler::abort(); -} - extern "x86-interrupt" fn machine_check_exception(stack_frame: ExceptionStackFrame) -> ! { + swapgs(&stack_frame); error!("Machine Check (#MC) Exception: {:#?}", stack_frame); scheduler::abort() } extern "x86-interrupt" fn simd_floating_point_exception(stack_frame: ExceptionStackFrame) { + swapgs(&stack_frame); error!("SIMD Floating-Point (#XM) Exception: {:#?}", stack_frame); scheduler::abort(); } extern "x86-interrupt" fn virtualization_exception(stack_frame: ExceptionStackFrame) { + swapgs(&stack_frame); error!("Virtualization (#VE) Exception: {:#?}", stack_frame); scheduler::abort(); } @@ -285,7 +291,7 @@ extern "x86-interrupt" fn virtualization_exception(stack_frame: ExceptionStackFr static IRQ_NAMES: InterruptTicketMutex> = InterruptTicketMutex::new(HashMap::with_hasher(RandomState::with_seeds(0, 0, 0, 0))); -pub fn add_irq_name(irq_number: u8, name: &'static str) { +pub(crate) fn add_irq_name(irq_number: u8, name: &'static str) { debug!("Register name \"{}\" for interrupt {}", name, irq_number); IRQ_NAMES.lock().insert(32 + irq_number, name); } @@ -294,10 +300,10 @@ fn get_irq_name(irq_number: u8) -> Option<&'static str> { IRQ_NAMES.lock().get(&irq_number).copied() } -pub static IRQ_COUNTERS: InterruptSpinMutex> = +pub(crate) static IRQ_COUNTERS: InterruptSpinMutex> = InterruptSpinMutex::new(BTreeMap::new()); -pub struct IrqStatistics { +pub(crate) struct IrqStatistics { pub counters: [AtomicU64; 256], } @@ -315,7 +321,7 @@ impl IrqStatistics { } } -pub fn print_statistics() { +pub(crate) fn print_statistics() { info!("Number of interrupts"); for (core_id, irg_statistics) in IRQ_COUNTERS.lock().iter() { for (i, counter) in irg_statistics.counters.iter().enumerate() { diff --git a/src/arch/x86_64/kernel/mod.rs b/src/arch/x86_64/kernel/mod.rs index a5a34c68b1..23f1a58a11 100644 --- a/src/arch/x86_64/kernel/mod.rs +++ b/src/arch/x86_64/kernel/mod.rs @@ -1,3 +1,5 @@ +#[cfg(feature = "common-os")] +use core::arch::asm; #[cfg(feature = "newlib")] use core::slice; use core::sync::atomic::{AtomicU32, AtomicU64, Ordering}; @@ -29,7 +31,9 @@ pub mod serial; #[cfg(target_os = "none")] mod start; pub mod switch; -pub mod systemtime; +#[cfg(feature = "common-os")] +mod syscall; +pub(crate) mod systemtime; #[cfg(feature = "vga")] mod vga; @@ -269,3 +273,116 @@ unsafe extern "C" fn pre_init(boot_info: &'static RawBootInfo, cpu_id: u32) -> ! crate::application_processor_main(); } } + +#[cfg(feature = "common-os")] +const LOADER_START: usize = 0x10000000000; +#[cfg(feature = "common-os")] +const LOADER_STACK_SIZE: usize = 0x8000; + +#[cfg(feature = "common-os")] +pub fn load_application(code_size: u64, tls_size: u64, func: F) -> Result<(), ()> +where + F: FnOnce(&'static mut [u8], Option<&'static mut [u8]>) -> Result<(), ()>, +{ + use core::ptr::slice_from_raw_parts_mut; + + use align_address::Align; + use x86_64::structures::paging::{PageSize, Size4KiB as BasePageSize}; + + use crate::arch::x86_64::mm::paging::{self, PageTableEntryFlags, PageTableEntryFlagsExt}; + use crate::arch::x86_64::mm::physicalmem; + + let code_size = (code_size as usize + LOADER_STACK_SIZE).align_up(BasePageSize::SIZE as usize); + let physaddr = + physicalmem::allocate_aligned(code_size as usize, BasePageSize::SIZE as usize).unwrap(); + + let mut flags = PageTableEntryFlags::empty(); + flags.normal().writable().user().execute_enable(); + paging::map::( + VirtAddr::from(LOADER_START), + physaddr, + code_size / BasePageSize::SIZE as usize, + flags, + ); + + let code_slice = unsafe { &mut *slice_from_raw_parts_mut(LOADER_START as *mut u8, code_size) }; + + if tls_size > 0 { + // To access TLS blocks on x86-64, TLS offsets are *subtracted* from the thread register value. + // So the thread pointer needs to be `block_ptr + tls_offset`. + // GNU style TLS requires `gs:0` to represent the same address as the thread pointer. + // Since the thread pointer points to the end of the TLS blocks, we need to store it there. + let tcb_size = core::mem::size_of::<*mut ()>(); + let tls_offset = tls_size as usize; + + let tls_memsz = (tls_offset + tcb_size).align_up(BasePageSize::SIZE as usize); + let physaddr = + physicalmem::allocate_aligned(tls_memsz, BasePageSize::SIZE as usize).unwrap(); + + let mut flags = PageTableEntryFlags::empty(); + flags.normal().writable().user().execute_disable(); + let tls_virt = VirtAddr::from(LOADER_START + code_size + BasePageSize::SIZE as usize); + paging::map::( + tls_virt, + physaddr, + tls_memsz / BasePageSize::SIZE as usize, + flags, + ); + let block = unsafe { + &mut *slice_from_raw_parts_mut(tls_virt.as_mut_ptr() as *mut u8, tls_offset + tcb_size) + }; + for elem in block.iter_mut() { + *elem = 0; + } + + // thread_ptr = block_ptr + tls_offset + let thread_ptr = block[tls_offset..].as_mut_ptr().cast::<()>(); + unsafe { + thread_ptr.cast::<*mut ()>().write(thread_ptr); + } + crate::arch::x86_64::kernel::processor::writefs(thread_ptr as usize); + + func(code_slice, Some(block)) + } else { + func(code_slice, None) + } +} + +#[cfg(feature = "common-os")] +pub unsafe fn jump_to_user_land(entry_point: u64, code_size: u64) -> ! { + use align_address::Align; + use x86_64::structures::paging::{PageSize, Size4KiB as BasePageSize}; + + use crate::arch::x86_64::kernel::scheduler::TaskStacks; + + let ds = 0x23u64; + let cs = 0x2bu64; + let entry_point: u64 = (LOADER_START as u64) | entry_point; + let stack_pointer: u64 = LOADER_START as u64 + + (code_size + LOADER_STACK_SIZE as u64).align_up(BasePageSize::SIZE) + - 128 /* red zone */ - 8; + + debug!( + "Jump to user space at 0x{:x}, stack pointer 0x{:x}", + entry_point, stack_pointer + ); + unsafe { + asm!( + "and rsp, {0}", + "swapgs", + "push {1}", + "push {2}", + "push {3}", + "push {4}", + "push {5}", + "iretq", + const u64::MAX - (TaskStacks::MARKER_SIZE as u64 - 1), + in(reg) ds, + in(reg) stack_pointer, + const 0x1202u64, + in(reg) cs, + in(reg) entry_point, + options(nostack, noreturn) + ); + } +} diff --git a/src/arch/x86_64/kernel/pic.rs b/src/arch/x86_64/kernel/pic.rs index 70c79ed744..96c9503e84 100644 --- a/src/arch/x86_64/kernel/pic.rs +++ b/src/arch/x86_64/kernel/pic.rs @@ -2,6 +2,8 @@ use x86::io::*; use super::interrupts::IDT; use crate::arch::x86_64::kernel::interrupts::ExceptionStackFrame; +use crate::arch::x86_64::swapgs; +use crate::scheduler; const PIC1_COMMAND_PORT: u16 = 0x20; const PIC1_DATA_PORT: u16 = 0x21; @@ -72,11 +74,14 @@ pub fn init() { } } -extern "x86-interrupt" fn spurious_interrupt_on_master(_stack_frame: ExceptionStackFrame) { +extern "x86-interrupt" fn spurious_interrupt_on_master(stack_frame: ExceptionStackFrame) { + swapgs(&stack_frame); debug!("Spurious Interrupt on Master PIC (IRQ7)"); + scheduler::abort(); } -extern "x86-interrupt" fn spurious_interrupt_on_slave(_stack_frame: ExceptionStackFrame) { +extern "x86-interrupt" fn spurious_interrupt_on_slave(stack_frame: ExceptionStackFrame) { + swapgs(&stack_frame); debug!("Spurious Interrupt on Slave PIC (IRQ15)"); // As this is an interrupt forwarded by the master, we have to acknowledge it on the master @@ -84,6 +89,7 @@ extern "x86-interrupt" fn spurious_interrupt_on_slave(_stack_frame: ExceptionSta unsafe { outb(PIC1_COMMAND_PORT, PIC_EOI_COMMAND); } + scheduler::abort(); } fn edit_mask(int_no: u8, insert: bool) { diff --git a/src/arch/x86_64/kernel/processor.rs b/src/arch/x86_64/kernel/processor.rs index 711bedd537..32d2605385 100644 --- a/src/arch/x86_64/kernel/processor.rs +++ b/src/arch/x86_64/kernel/processor.rs @@ -778,6 +778,8 @@ pub fn detect_features() { } pub fn configure() { + let cpuid = CpuId::new(); + // setup MSR EFER unsafe { wrmsr(IA32_EFER, rdmsr(IA32_EFER) | EFER_LMA | EFER_SCE | EFER_NXE); @@ -808,6 +810,15 @@ pub fn configure() { // let mut cr4 = unsafe { cr4() }; + let has_pge = match cpuid.get_feature_info() { + Some(finfo) => finfo.has_pge(), + None => false, + }; + + if has_pge { + cr4 |= Cr4::CR4_ENABLE_GLOBAL_PAGES; + } + // Enable Machine Check Exceptions. // No need to check for support here, all x86-64 CPUs support it. cr4.insert(Cr4::CR4_ENABLE_MACHINE_CHECK); @@ -862,6 +873,27 @@ pub fn configure() { } } + // enable support of syscall and sysret + #[cfg(feature = "common-os")] + unsafe { + let has_syscall = match cpuid.get_extended_processor_and_feature_identifiers() { + Some(finfo) => finfo.has_syscall_sysret(), + None => false, + }; + + if has_syscall { + info!("Enable SYSCALL support"); + } else { + panic!("Syscall support is missing"); + } + wrmsr(IA32_STAR, (0x1Bu64 << 48) | (0x08u64 << 32)); + wrmsr( + IA32_LSTAR, + crate::arch::x86_64::kernel::syscall::syscall_handler as u64, + ); + wrmsr(IA32_FMASK, 1 << 9); // clear IF flag during system call + } + // Initialize the FS register, which is later used for Thread-Local Storage. writefs(0); diff --git a/src/arch/x86_64/kernel/scheduler.rs b/src/arch/x86_64/kernel/scheduler.rs index 9f4e96098e..369b7512f3 100644 --- a/src/arch/x86_64/kernel/scheduler.rs +++ b/src/arch/x86_64/kernel/scheduler.rs @@ -1,9 +1,13 @@ //! Architecture dependent interface to initialize a task +#[cfg(not(feature = "common-os"))] use alloc::boxed::Box; use core::arch::asm; +#[cfg(not(feature = "common-os"))] use core::mem::MaybeUninit; -use core::{mem, ptr, slice}; +#[cfg(not(feature = "common-os"))] +use core::slice; +use core::{mem, ptr}; use align_address::Align; @@ -15,12 +19,16 @@ use crate::arch::x86_64::mm::paging::{ }; use crate::arch::x86_64::mm::{PhysAddr, VirtAddr}; use crate::config::*; +#[cfg(not(feature = "common-os"))] use crate::kernel; use crate::scheduler::task::{Task, TaskFrame}; use crate::scheduler::PerCoreSchedulerExt; #[repr(C, packed)] struct State { + #[cfg(feature = "common-os")] + /// GS register + gs: u64, /// FS register for TLS support fs: u64, /// R15 register @@ -231,11 +239,13 @@ impl Drop for TaskStacks { } } +#[cfg(not(feature = "common-os"))] pub struct TaskTLS { _block: Box<[MaybeUninit]>, thread_ptr: *mut (), } +#[cfg(not(feature = "common-os"))] impl TaskTLS { // For details on thread-local storage data structures see // @@ -328,6 +338,7 @@ extern "C" fn task_entry(func: extern "C" fn(usize), arg: usize) -> ! { impl TaskFrame for Task { fn create_stack_frame(&mut self, func: extern "C" fn(usize), arg: usize) { // Check if TLS is allocated already and if the task uses thread-local storage. + #[cfg(not(feature = "common-os"))] if self.tls.is_none() { self.tls = TaskTLS::from_environment(); } @@ -344,6 +355,7 @@ impl TaskFrame for Task { let state = stack.as_mut_ptr::(); ptr::write_bytes(stack.as_mut_ptr::(), 0, mem::size_of::()); + #[cfg(not(feature = "common-os"))] if let Some(tls) = &self.tls { (*state).fs = tls.thread_ptr().addr() as u64; } diff --git a/src/arch/x86_64/kernel/switch.rs b/src/arch/x86_64/kernel/switch.rs index eb9b19f7f5..71c276de48 100644 --- a/src/arch/x86_64/kernel/switch.rs +++ b/src/arch/x86_64/kernel/switch.rs @@ -4,6 +4,68 @@ use core::{mem, ptr}; use crate::core_local::CoreLocal; use crate::set_current_kernel_stack; +#[cfg(not(feature = "common-os"))] +macro_rules! push_gs { + () => { + r#" + "# + }; +} + +#[cfg(not(feature = "common-os"))] +macro_rules! pop_gs { + () => { + r#" + "# + }; +} + +#[cfg(all(feature = "fsgsbase", feature = "common-os"))] +macro_rules! push_gs { + () => { + r#" + rdfsbase rax + push rax + "# + }; +} + +#[cfg(all(feature = "fsgsbase", feature = "common-os"))] +macro_rules! pop_gs { + () => { + r#" + pop rax + wrfsbase rax + "# + }; +} + +#[cfg(all(not(feature = "fsgsbase"), feature = "common-os"))] +macro_rules! push_gs { + () => { + r#" + mov ecx, 0xc0000101 // Kernel GS.Base Model Specific Register + rdmsr + sub rsp, 8 + mov [rsp+4], edx + mov [rsp], eax + "# + }; +} + +#[cfg(all(not(feature = "fsgsbase"), feature = "common-os"))] +macro_rules! pop_gs { + () => { + r#" + mov ecx, 0xc0000101 // Kernel GS.Base Model Specific Register + mov edx, [rsp+4] + mov eax, [rsp] + add rsp, 8 + wrmsr + "# + }; +} + #[cfg(feature = "fsgsbase")] macro_rules! push_fs { () => { @@ -71,7 +133,8 @@ macro_rules! save_context { push r14 push r15 "#, - push_fs!() + push_fs!(), + push_gs!() ) }; } @@ -79,6 +142,7 @@ macro_rules! save_context { macro_rules! restore_context { () => { concat!( + pop_gs!(), pop_fs!(), r#" pop r15 @@ -104,7 +168,7 @@ macro_rules! restore_context { } #[naked] -pub unsafe extern "C" fn switch_to_task(_old_stack: *mut usize, _new_stack: usize) { +pub(crate) unsafe extern "C" fn switch_to_task(_old_stack: *mut usize, _new_stack: usize) { // `old_stack` is in `rdi` register // `new_stack` is in `rsi` register @@ -131,7 +195,7 @@ pub unsafe extern "C" fn switch_to_task(_old_stack: *mut usize, _new_stack: usiz /// Performa a context switch to an idle task or a task, which already is owner /// of the FPU. #[naked] -pub unsafe extern "C" fn switch_to_fpu_owner(_old_stack: *mut usize, _new_stack: usize) { +pub(crate) unsafe extern "C" fn switch_to_fpu_owner(_old_stack: *mut usize, _new_stack: usize) { // `old_stack` is in `rdi` register // `new_stack` is in `rsi` register diff --git a/src/arch/x86_64/kernel/syscall.rs b/src/arch/x86_64/kernel/syscall.rs new file mode 100644 index 0000000000..2c99d0c27c --- /dev/null +++ b/src/arch/x86_64/kernel/syscall.rs @@ -0,0 +1,49 @@ +use core::arch::asm; + +use crate::syscalls::table::SYSHANDLER_TABLE; + +#[no_mangle] +#[naked] +pub(crate) unsafe extern "C" fn syscall_handler() -> ! { + unsafe { + asm!( + // save context, see x86_64 ABI + "push rcx", + "push rdx", + "push rsi", + "push rdi", + "push r8", + "push r9", + "push r10", + "push r11", + // switch to kernel stack + "swapgs", + "mov rcx, rsp", + "mov rsp, gs:32", + // save user stack pointer + "push rcx", + // copy 4th argument to rcx to adhere x86_64 ABI + "mov rcx, r10", + "sti", + "mov r10, qword ptr [rip + {table}@GOTPCREL]", + "call [r10 + 8*rax]", + "cli", + // restore user stack pointer + "pop rcx", + "mov rsp, rcx", + "swapgs", + // restore context, see x86_64 ABI + "pop r11", + "pop r10", + "pop r9", + "pop r8", + "pop rdi", + "pop rsi", + "pop rdx", + "pop rcx", + "sysretq", + table = sym SYSHANDLER_TABLE, + options(noreturn) + ); + } +} diff --git a/src/arch/x86_64/mm/mod.rs b/src/arch/x86_64/mm/mod.rs index 1a8b356438..b4e606ee09 100644 --- a/src/arch/x86_64/mm/mod.rs +++ b/src/arch/x86_64/mm/mod.rs @@ -1,12 +1,18 @@ -pub mod paging; -pub mod physicalmem; -pub mod virtualmem; +pub(crate) mod paging; +pub(crate) mod physicalmem; +pub(crate) mod virtualmem; use core::slice; +#[cfg(feature = "common-os")] +use align_address::Align; pub use x86::bits64::paging::{PAddr as PhysAddr, VAddr as VirtAddr}; +#[cfg(feature = "common-os")] +use x86_64::structures::paging::{PageSize, Size4KiB as BasePageSize}; pub use self::paging::init_page_tables; +#[cfg(feature = "common-os")] +use crate::arch::mm::paging::{PageTableEntryFlags, PageTableEntryFlagsExt}; /// Memory translation, allocation and deallocation for MultibootInformation struct MultibootMemory; @@ -34,8 +40,57 @@ impl multiboot::information::MemoryManagement for MultibootMemory { } } +#[cfg(feature = "common-os")] +pub fn create_new_root_page_table() -> usize { + let physaddr = + physicalmem::allocate_aligned(BasePageSize::SIZE as usize, BasePageSize::SIZE as usize) + .unwrap(); + let virtaddr = + virtualmem::allocate_aligned(2 * BasePageSize::SIZE as usize, BasePageSize::SIZE as usize) + .unwrap(); + let mut flags = PageTableEntryFlags::empty(); + flags.normal().writable(); + + let entry: u64 = unsafe { + let cr3 = x86::controlregs::cr3().align_down(BasePageSize::SIZE); + paging::map::(virtaddr, PhysAddr(cr3), 1, flags); + let entry: &u64 = &*virtaddr.as_ptr(); + + *entry + }; + + let slice_addr = virtaddr + BasePageSize::SIZE; + paging::map::(slice_addr, physaddr, 1, flags); + + unsafe { + let pml4 = core::slice::from_raw_parts_mut(slice_addr.as_mut_ptr() as *mut u64, 512); + + // clear PML4 + for elem in pml4.iter_mut() { + *elem = 0; + } + + // copy first element and the self reference + pml4[0] = entry; + // create self reference + pml4[511] = physaddr.as_u64() + 0x3; // PG_PRESENT | PG_RW + }; + + paging::unmap::(virtaddr, 2); + virtualmem::deallocate(virtaddr, 2 * BasePageSize::SIZE as usize); + + physaddr.as_usize() +} + pub fn init() { paging::init(); physicalmem::init(); virtualmem::init(); + + #[cfg(feature = "common-os")] + unsafe { + crate::scheduler::BOOT_ROOT_PAGE_TABLE + .set(x86::controlregs::cr3().try_into().unwrap()) + .unwrap(); + } } diff --git a/src/arch/x86_64/mm/paging.rs b/src/arch/x86_64/mm/paging.rs index 1f44845a81..fcff993c71 100644 --- a/src/arch/x86_64/mm/paging.rs +++ b/src/arch/x86_64/mm/paging.rs @@ -2,15 +2,18 @@ use core::fmt::Debug; use core::ptr; use x86_64::instructions::tlb; -use x86_64::registers::control::Cr3; +use x86_64::registers::control::Cr2; +pub use x86_64::structures::idt::InterruptStackFrame as ExceptionStackFrame; +use x86_64::structures::idt::PageFaultErrorCode; use x86_64::structures::paging::mapper::{TranslateResult, UnmapError}; pub use x86_64::structures::paging::PageTableFlags as PageTableEntryFlags; use x86_64::structures::paging::{ - Mapper, Page, PageTable, PageTableIndex, PhysFrame, RecursivePageTable, Size2MiB, Translate, + Mapper, Page, PageTableIndex, PhysFrame, RecursivePageTable, Size2MiB, Translate, }; +use crate::arch::x86_64::kernel::processor; use crate::arch::x86_64::mm::{physicalmem, PhysAddr, VirtAddr}; -use crate::{env, mm}; +use crate::{env, mm, scheduler}; pub trait PageTableEntryFlagsExt { fn device(&mut self) -> &mut Self; @@ -22,6 +25,12 @@ pub trait PageTableEntryFlagsExt { fn writable(&mut self) -> &mut Self; fn execute_disable(&mut self) -> &mut Self; + + fn execute_enable(&mut self) -> &mut Self; + + fn user(&mut self) -> &mut Self; + + fn kernel(&mut self) -> &mut Self; } impl PageTableEntryFlagsExt for PageTableEntryFlags { @@ -49,6 +58,21 @@ impl PageTableEntryFlagsExt for PageTableEntryFlags { self.insert(PageTableEntryFlags::NO_EXECUTE); self } + + fn execute_enable(&mut self) -> &mut Self { + self.remove(PageTableEntryFlags::NO_EXECUTE); + self + } + + fn user(&mut self) -> &mut Self { + self.insert(PageTableEntryFlags::USER_ACCESSIBLE); + self + } + + fn kernel(&mut self) -> &mut Self { + self.remove(PageTableEntryFlags::USER_ACCESSIBLE); + self + } } pub use x86_64::structures::paging::{ @@ -218,7 +242,7 @@ where // FIXME: Some sentinel pages around stacks are supposed to be unmapped. // We should handle this case there instead of here. Err(UnmapError::PageNotMapped) => { - debug!("Tried to unmap {page:?}, which was not mapped.") + info!("Tried to unmap {page:?}, which was not mapped.") } Err(err) => panic!("{err:?}"), } @@ -230,6 +254,39 @@ pub fn get_application_page_size() -> usize { LargePageSize::SIZE as usize } +#[cfg(not(feature = "common-os"))] +pub(crate) extern "x86-interrupt" fn page_fault_handler( + stack_frame: ExceptionStackFrame, + error_code: PageFaultErrorCode, +) { + error!("Page fault (#PF)!"); + error!("page_fault_linear_address = {:p}", Cr2::read()); + error!("error_code = {error_code:?}"); + error!("fs = {:#X}", processor::readfs()); + error!("gs = {:#X}", processor::readgs()); + error!("stack_frame = {stack_frame:#?}"); + scheduler::abort(); +} + +#[cfg(feature = "common-os")] +pub(crate) extern "x86-interrupt" fn page_fault_handler( + mut stack_frame: ExceptionStackFrame, + error_code: PageFaultErrorCode, +) { + unsafe { + if stack_frame.as_mut().read().code_segment != 0x08 { + core::arch::asm!("swapgs", options(nostack)); + } + } + error!("Page fault (#PF)!"); + error!("page_fault_linear_address = {:p}", Cr2::read()); + error!("error_code = {error_code:?}"); + error!("fs = {:#X}", processor::readfs()); + error!("gs = {:#X}", processor::readgs()); + error!("stack_frame = {stack_frame:#?}"); + scheduler::abort(); +} + pub fn init() {} pub fn init_page_tables() { @@ -306,7 +363,7 @@ unsafe fn disect(pt: PT, virt_addr: x86_64::VirtAddr) { } #[allow(dead_code)] -unsafe fn print_page_tables(levels: usize) { +pub(crate) unsafe fn print_page_tables(levels: usize) { assert!((1..=4).contains(&levels)); fn print(table: &x86_64::structures::paging::PageTable, level: usize, min_level: usize) { @@ -332,14 +389,14 @@ unsafe fn print_page_tables(levels: usize) { } // Recursive - // let mut recursive_page_table = unsafe { recursive_page_table() }; - // let pt = recursive_page_table.level_4_table(); + let mut recursive_page_table = unsafe { recursive_page_table() }; + let pt = recursive_page_table.level_4_table(); // Identity mapped - let level_4_table_addr = Cr3::read().0.start_address().as_u64(); - let level_4_table_ptr = - ptr::from_exposed_addr::(level_4_table_addr.try_into().unwrap()); - let pt = unsafe { &*level_4_table_ptr }; + //let level_4_table_addr = Cr3::read().0.start_address().as_u64(); + //let level_4_table_ptr = + // ptr::from_exposed_addr::(level_4_table_addr.try_into().unwrap()); + //let pt = unsafe { &*level_4_table_ptr }; print(pt, 4, 5 - levels); } diff --git a/src/arch/x86_64/mm/virtualmem.rs b/src/arch/x86_64/mm/virtualmem.rs index 2645d7bd43..fd849b4c59 100644 --- a/src/arch/x86_64/mm/virtualmem.rs +++ b/src/arch/x86_64/mm/virtualmem.rs @@ -140,13 +140,19 @@ pub fn print_information() { /// End of the virtual memory address space reserved for kernel memory. /// This also marks the start of the virtual memory address space reserved for the task heap. /// In case of pure rust applications, we don't have a task heap. -#[cfg(not(feature = "newlib"))] +#[cfg(all(not(feature = "common-os"), not(feature = "newlib")))] #[inline] pub const fn kernel_heap_end() -> VirtAddr { VirtAddr(0x8000_0000_0000u64) } -#[cfg(feature = "newlib")] +#[cfg(all(feature = "common-os", not(feature = "newlib")))] +#[inline] +pub const fn kernel_heap_end() -> VirtAddr { + VirtAddr(0x200_0000_0000u64) +} + +#[cfg(all(not(featur = "common-os"), feature = "newlib"))] #[inline] pub const fn kernel_heap_end() -> VirtAddr { VirtAddr(0x1_0000_0000u64) diff --git a/src/arch/x86_64/mod.rs b/src/arch/x86_64/mod.rs index 7809b7212c..e4ccd0eb34 100644 --- a/src/arch/x86_64/mod.rs +++ b/src/arch/x86_64/mod.rs @@ -1,6 +1,25 @@ pub mod kernel; pub mod mm; +use crate::arch::mm::paging::ExceptionStackFrame; + +/// Helper function to swap the GS register, if the user-space is +/// is interrupted. +#[cfg(feature = "common-os")] +#[inline(always)] +pub(crate) fn swapgs(stack_frame: &ExceptionStackFrame) { + use core::arch::asm; + if stack_frame.code_segment != 8 { + unsafe { + asm!("swapgs", options(nomem, nostack, preserves_flags)); + } + } +} + +#[cfg(not(feature = "common-os"))] +#[inline(always)] +pub(crate) fn swapgs(_stack_frame: &ExceptionStackFrame) {} + /// Force strict CPU ordering, serializes load and store operations. #[allow(dead_code)] #[inline(always)] diff --git a/src/config.rs b/src/config.rs index 9acbcd848f..8b32b25ca8 100644 --- a/src/config.rs +++ b/src/config.rs @@ -1,6 +1,6 @@ pub(crate) const KERNEL_STACK_SIZE: usize = 32_768; -pub(crate) const DEFAULT_STACK_SIZE: usize = 65_536; +pub const DEFAULT_STACK_SIZE: usize = 65_536; pub(crate) const USER_STACK_SIZE: usize = 1_048_576; diff --git a/src/drivers/net/mod.rs b/src/drivers/net/mod.rs index d56e73e5b6..66ab04aa93 100644 --- a/src/drivers/net/mod.rs +++ b/src/drivers/net/mod.rs @@ -71,7 +71,8 @@ pub(crate) fn network_irqhandler(_state: &State) -> bool { } #[cfg(target_arch = "x86_64")] -pub(crate) extern "x86-interrupt" fn network_irqhandler(_stack_frame: ExceptionStackFrame) { +pub(crate) extern "x86-interrupt" fn network_irqhandler(stack_frame: ExceptionStackFrame) { + crate::arch::x86_64::swapgs(&stack_frame); use crate::scheduler::PerCoreSchedulerExt; debug!("Receive network interrupt"); @@ -79,6 +80,7 @@ pub(crate) extern "x86-interrupt" fn network_irqhandler(_stack_frame: ExceptionS let _ = _irqhandler(); core_scheduler().reschedule(); + crate::arch::x86_64::swapgs(&stack_frame); } #[cfg(target_arch = "riscv64")] diff --git a/src/env.rs b/src/env.rs index 09d6c0a5ba..ccbc37892b 100644 --- a/src/env.rs +++ b/src/env.rs @@ -10,7 +10,7 @@ use hashbrown::HashMap; use hermit_entry::boot_info::PlatformInfo; use hermit_sync::OnceCell; -pub use crate::arch::kernel::{self, get_base_address, get_image_size, get_ram_address}; +pub(crate) use crate::arch::kernel::{self, get_base_address, get_image_size, get_ram_address}; use crate::kernel::boot_info; static CLI: OnceCell = OnceCell::new(); diff --git a/src/lib.rs b/src/lib.rs index 2626d9d374..cb128aed56 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -50,7 +50,9 @@ extern crate std; #[macro_use] extern crate num_derive; +#[cfg(not(feature = "common-os"))] use alloc::alloc::Layout; +#[cfg(not(feature = "common-os"))] use core::alloc::GlobalAlloc; #[cfg(feature = "smp")] use core::hint::spin_loop; @@ -64,6 +66,7 @@ pub use env::is_uhyve as _is_uhyve; use mm::allocator::LockedAllocator; pub(crate) use crate::arch::*; +pub use crate::config::DEFAULT_STACK_SIZE; pub(crate) use crate::config::*; pub use crate::fs::create_file; use crate::kernel::is_uhyve_with_pci; @@ -76,7 +79,7 @@ mod macros; #[macro_use] mod logging; -mod arch; +pub mod arch; mod config; pub mod console; mod drivers; @@ -88,7 +91,7 @@ pub mod fd; pub mod fs; pub mod io; mod mm; -mod scheduler; +pub mod scheduler; mod synch; pub mod syscalls; pub mod time; @@ -132,7 +135,7 @@ static ALLOCATOR: LockedAllocator = LockedAllocator::new(); /// Returning a null pointer indicates that either memory is exhausted or /// `size` and `align` do not meet this allocator's size or alignment constraints. /// -#[cfg(target_os = "none")] +#[cfg(all(target_os = "none", not(feature = "common-os")))] pub(crate) extern "C" fn __sys_malloc(size: usize, align: usize) -> *mut u8 { let layout_res = Layout::from_size_align(size, align); if layout_res.is_err() || size == 0 { @@ -174,7 +177,7 @@ pub(crate) extern "C" fn __sys_malloc(size: usize, align: usize) -> *mut u8 { /// # Errors /// Returns null if the new layout does not meet the size and alignment constraints of the /// allocator, or if reallocation otherwise fails. -#[cfg(target_os = "none")] +#[cfg(all(target_os = "none", not(feature = "common-os")))] pub(crate) extern "C" fn __sys_realloc( ptr: *mut u8, size: usize, @@ -219,7 +222,7 @@ pub(crate) extern "C" fn __sys_realloc( /// /// # Errors /// May panic if debug assertions are enabled and invalid parameters `size` or `align` where passed. -#[cfg(target_os = "none")] +#[cfg(all(target_os = "none", not(feature = "common-os")))] pub(crate) extern "C" fn __sys_free(ptr: *mut u8, size: usize, align: usize) { unsafe { let layout_res = Layout::from_size_align(size, align); diff --git a/src/mm/mod.rs b/src/mm/mod.rs index ca6bdfc385..815960c350 100644 --- a/src/mm/mod.rs +++ b/src/mm/mod.rs @@ -104,12 +104,13 @@ pub(crate) fn init() { - reserved_space) .align_down(LargePageSize::SIZE as usize); - // we reserve 10% of the memory for stack allocations - let stack_reserve: usize = (available_memory * 10) / 100; let heap_start_addr; - #[cfg(feature = "newlib")] + #[cfg(all(feature = "newlib", not(feature = "common-os")))] { + // we reserve 10% of the memory for stack allocations + let stack_reserve: usize = (available_memory * 10) / 100; + info!("An application with a C-based runtime is running on top of Hermit!"); let kernel_heap_size = 10 * LargePageSize::SIZE as usize; @@ -131,8 +132,56 @@ pub(crate) fn init() { heap_start_addr = map_addr; } - #[cfg(not(feature = "newlib"))] + #[cfg(all(not(feature = "newlib"), feature = "common-os"))] + { + info!("Using HermitOS as common OS!"); + + // we reserve at least 75% of the memory for the user space + let reserve: usize = (available_memory * 75) / 100; + // 64 MB is enough as kernel heap + let reserve = core::cmp::min(reserve, 0x4000000); + + let virt_size: usize = reserve.align_down(LargePageSize::SIZE as usize); + let virt_addr = + arch::mm::virtualmem::allocate_aligned(virt_size, LargePageSize::SIZE as usize) + .unwrap(); + heap_start_addr = virt_addr; + + info!( + "Heap: size {} MB, start address {:p}", + virt_size >> 20, + virt_addr + ); + + #[cfg(any(target_arch = "x86_64", target_arch = "riscv64"))] + if has_1gib_pages && virt_size > HugePageSize::SIZE as usize { + // Mount large pages to the next huge page boundary + let npages = (virt_addr.align_up_to_huge_page().as_usize() - virt_addr.as_usize()) + / LargePageSize::SIZE as usize; + if let Err(n) = paging::map_heap::(virt_addr, npages) { + map_addr = virt_addr + n * LargePageSize::SIZE as usize; + map_size = virt_size - (map_addr - virt_addr).as_usize(); + } else { + map_addr = virt_addr.align_up_to_huge_page(); + map_size = virt_size - (map_addr - virt_addr).as_usize(); + } + } else { + map_addr = virt_addr; + map_size = virt_size; + } + + #[cfg(not(any(target_arch = "x86_64", target_arch = "riscv64")))] + { + map_addr = virt_addr; + map_size = virt_size; + } + } + + #[cfg(all(not(feature = "newlib"), not(feature = "common-os")))] { + // we reserve 10% of the memory for stack allocations + let stack_reserve: usize = (available_memory * 10) / 100; + info!("A pure Rust application is running on top of Hermit!"); // At first, we map only a small part into the heap. diff --git a/src/scheduler/mod.rs b/src/scheduler/mod.rs index 6637c31e20..22a5d8648f 100644 --- a/src/scheduler/mod.rs +++ b/src/scheduler/mod.rs @@ -14,15 +14,15 @@ use riscv::register::sstatus; use crate::arch; use crate::arch::core_local::*; -use crate::arch::interrupts; #[cfg(target_arch = "riscv64")] use crate::arch::switch::switch_to_task; #[cfg(target_arch = "x86_64")] use crate::arch::switch::{switch_to_fpu_owner, switch_to_task}; +use crate::arch::{get_processor_count, interrupts}; use crate::kernel::scheduler::TaskStacks; use crate::scheduler::task::*; -pub(crate) mod task; +pub mod task; static NO_TASKS: AtomicU32 = AtomicU32::new(0); /// Map between Core ID and per-core scheduler @@ -40,7 +40,7 @@ static TASKS: InterruptTicketMutex> = pub type CoreId = u32; #[cfg(feature = "smp")] -pub struct SchedulerInput { +pub(crate) struct SchedulerInput { /// Queue of new tasks new_tasks: VecDeque, /// Queue of task, which are wakeup by another core @@ -62,7 +62,7 @@ impl SchedulerInput { not(any(target_arch = "x86_64", target_arch = "aarch64")), repr(align(64)) )] -pub struct PerCoreScheduler { +pub(crate) struct PerCoreScheduler { /// Core ID of this per-core scheduler #[cfg(feature = "smp")] core_id: CoreId, @@ -81,7 +81,7 @@ pub struct PerCoreScheduler { blocked_tasks: BlockedTaskQueue, } -pub trait PerCoreSchedulerExt { +pub(crate) trait PerCoreSchedulerExt { /// Triggers the scheduler to reschedule the tasks. /// Interrupt flag will be cleared during the reschedule fn reschedule(self); @@ -708,12 +708,12 @@ fn get_tid() -> TaskId { } #[inline] -pub fn abort() -> ! { +pub(crate) fn abort() -> ! { core_scheduler().exit(-1) } /// Add a per-core scheduler for the current core. -pub fn add_current_core() { +pub(crate) fn add_current_core() { // Create an idle task for this core. let core_id = core_id(); let tid = get_tid(); @@ -764,6 +764,30 @@ fn get_scheduler_input(core_id: CoreId) -> &'static InterruptTicketMutex TaskId { + static CORE_COUNTER: AtomicU32 = AtomicU32::new(1); + + let core_id = if selector < 0 { + // use Round Robin to schedule the cores + CORE_COUNTER.fetch_add(1, Ordering::SeqCst) % get_processor_count() + } else { + selector as u32 + }; + + PerCoreScheduler::spawn(func, arg, prio, core_id, stack_size) +} + +pub fn getpid() -> TaskId { + core_scheduler().get_current_task_id() +} + +#[allow(clippy::result_unit_err)] pub fn join(id: TaskId) -> Result<(), ()> { let core_scheduler = core_scheduler(); @@ -792,3 +816,12 @@ pub fn join(id: TaskId) -> Result<(), ()> { fn get_task_handle(id: TaskId) -> Option { TASKS.lock().get(&id).copied() } + +#[cfg(all(target_arch = "x86_64", feature = "common-os"))] +pub(crate) static BOOT_ROOT_PAGE_TABLE: OnceCell = OnceCell::new(); + +#[cfg(all(target_arch = "x86_64", feature = "common-os"))] +pub(crate) fn get_root_page_table() -> usize { + let current_task_borrowed = core_scheduler().current_task.borrow_mut(); + current_task_borrowed.root_page_table +} diff --git a/src/scheduler/task.rs b/src/scheduler/task.rs index 735169ac0d..681b435313 100644 --- a/src/scheduler/task.rs +++ b/src/scheduler/task.rs @@ -1,3 +1,4 @@ +#[cfg(not(feature = "common-os"))] use alloc::boxed::Box; use alloc::collections::{LinkedList, VecDeque}; use alloc::rc::Rc; @@ -12,7 +13,9 @@ use core::ops::DerefMut; use crate::arch; use crate::arch::core_local::*; use crate::arch::mm::VirtAddr; -use crate::arch::scheduler::{TaskStacks, TaskTLS}; +use crate::arch::scheduler::TaskStacks; +#[cfg(not(feature = "common-os"))] +use crate::arch::scheduler::TaskTLS; use crate::scheduler::CoreId; /// Returns the most significant bit. @@ -31,7 +34,7 @@ fn msb(n: u64) -> Option { /// The status of the task - used for scheduling #[derive(Copy, Clone, Debug, Eq, PartialEq)] -pub enum TaskStatus { +pub(crate) enum TaskStatus { Invalid, Ready, Running, @@ -91,7 +94,7 @@ pub const IDLE_PRIO: Priority = Priority::from(0); pub const NO_PRIORITIES: usize = 31; #[derive(Copy, Clone, Debug)] -pub struct TaskHandle { +pub(crate) struct TaskHandle { id: TaskId, priority: Priority, #[cfg(feature = "smp")] @@ -144,7 +147,7 @@ impl Eq for TaskHandle {} /// Realize a priority queue for task handles #[derive(Default)] -pub struct TaskHandlePriorityQueue { +pub(crate) struct TaskHandlePriorityQueue { queues: [Option>; NO_PRIORITIES], prio_bitmap: u64, } @@ -240,7 +243,7 @@ impl TaskHandlePriorityQueue { } /// Realize a priority queue for tasks -pub struct PriorityTaskQueue { +pub(crate) struct PriorityTaskQueue { queues: [LinkedList>>; NO_PRIORITIES], prio_bitmap: u64, } @@ -358,7 +361,7 @@ impl PriorityTaskQueue { not(any(target_arch = "x86_64", target_arch = "aarch64")), repr(align(64)) )] -pub struct Task { +pub(crate) struct Task { /// The ID of this context pub id: TaskId, /// Status of a task, e.g. if the task is ready or blocked @@ -376,13 +379,17 @@ pub struct Task { /// Stack of the task pub stacks: TaskStacks, /// Task Thread-Local-Storage (TLS) + #[cfg(not(feature = "common-os"))] pub tls: Option>, + // Physical address of the 1st level page table + #[cfg(all(target_arch = "x86_64", feature = "common-os"))] + pub root_page_table: usize, /// lwIP error code for this task #[cfg(feature = "newlib")] pub lwip_errno: i32, } -pub trait TaskFrame { +pub(crate) trait TaskFrame { /// Create the initial stack frame for a new task fn create_stack_frame(&mut self, func: extern "C" fn(usize), arg: usize); } @@ -406,7 +413,10 @@ impl Task { last_fpu_state: arch::processor::FPUState::new(), core_id, stacks, + #[cfg(not(feature = "common-os"))] tls: None, + #[cfg(all(target_arch = "x86_64", feature = "common-os"))] + root_page_table: arch::create_new_root_page_table(), #[cfg(feature = "newlib")] lwip_errno: 0, } @@ -424,7 +434,10 @@ impl Task { last_fpu_state: arch::processor::FPUState::new(), core_id, stacks: TaskStacks::from_boot_stacks(), + #[cfg(not(feature = "common-os"))] tls: None, + #[cfg(all(target_arch = "x86_64", feature = "common-os"))] + root_page_table: *crate::scheduler::BOOT_ROOT_PAGE_TABLE.get().unwrap(), #[cfg(feature = "newlib")] lwip_errno: 0, } @@ -448,7 +461,7 @@ impl BlockedTask { } } -pub struct BlockedTaskQueue { +pub(crate) struct BlockedTaskQueue { list: LinkedList, #[cfg(any(feature = "tcp", feature = "udp"))] network_wakeup_time: Option, diff --git a/src/syscalls/entropy.rs b/src/syscalls/entropy.rs index 32db7fc73e..e93ae6e564 100644 --- a/src/syscalls/entropy.rs +++ b/src/syscalls/entropy.rs @@ -49,6 +49,7 @@ unsafe extern "C" fn __sys_read_entropy(buf: *mut u8, len: usize, flags: u32) -> /// Returns either the number of bytes written to buf (a positive value) or /// * `-EINVAL` if `flags` contains unknown flags. /// * `-ENOSYS` if the system does not support random data generation. +#[allow(unsafe_op_in_unsafe_fn)] #[no_mangle] #[cfg_attr(target_arch = "riscv64", allow(unsafe_op_in_unsafe_fn))] // FIXME pub unsafe extern "C" fn sys_read_entropy(buf: *mut u8, len: usize, flags: u32) -> isize { diff --git a/src/syscalls/mod.rs b/src/syscalls/mod.rs index 04f7fba4e1..b8ca9e79ad 100644 --- a/src/syscalls/mod.rs +++ b/src/syscalls/mod.rs @@ -25,7 +25,7 @@ use crate::fd::{ }; use crate::fs::{self, FileAttr}; use crate::syscalls::interfaces::SyscallInterface; -#[cfg(target_os = "none")] +#[cfg(all(target_os = "none", not(feature = "common-os")))] use crate::{__sys_free, __sys_malloc, __sys_realloc}; mod condvar; @@ -42,6 +42,8 @@ mod recmutex; mod semaphore; mod spinlock; mod system; +#[cfg(feature = "common-os")] +pub(crate) mod table; mod tasks; mod timer; @@ -70,19 +72,19 @@ pub(crate) fn init() { sbrk_init(); } -#[cfg(target_os = "none")] +#[cfg(all(target_os = "none", not(feature = "common-os")))] #[no_mangle] pub extern "C" fn sys_malloc(size: usize, align: usize) -> *mut u8 { kernel_function!(__sys_malloc(size, align)) } -#[cfg(target_os = "none")] +#[cfg(all(target_os = "none", not(feature = "common-os")))] #[no_mangle] pub extern "C" fn sys_realloc(ptr: *mut u8, size: usize, align: usize, new_size: usize) -> *mut u8 { kernel_function!(__sys_realloc(ptr, size, align, new_size)) } -#[cfg(target_os = "none")] +#[cfg(all(target_os = "none", not(feature = "common-os")))] #[no_mangle] pub extern "C" fn sys_free(ptr: *mut u8, size: usize, align: usize) { kernel_function!(__sys_free(ptr, size, align)) diff --git a/src/syscalls/table.rs b/src/syscalls/table.rs new file mode 100644 index 0000000000..c0b1152598 --- /dev/null +++ b/src/syscalls/table.rs @@ -0,0 +1,88 @@ +use core::arch::asm; + +use crate::syscalls::*; + +/// number of the system call `exit` +const SYSNO_EXIT: usize = 0; +/// number of the system call `write` +const SYSNO_WRITE: usize = 1; +/// number of the system call `read` +const SYSNO_READ: usize = 2; +/// number of the system call `abort` +const SYSNO_ABORT: usize = 3; +/// number of the system call `usleep` +const SYSNO_USLEEP: usize = 4; +/// number of the system call `getpid` +const SYSNO_GETPID: usize = 5; +/// number of the system call `yield` +const SYSNO_YIELD: usize = 6; +/// number of the system call `read_entropy` +const SYSNO_READ_ENTROPY: usize = 7; +/// number of the system call `get_processor_count` +const SYSNO_GET_PROCESSOR_COUNT: usize = 8; +/// number of the system call `close` +const SYSNO_CLOSE: usize = 9; +/// number of the system call `futex_wait` +const SYSNO_FUTEX_WAIT: usize = 10; +/// number of the system call `futex_wake` +const SYSNO_FUTEX_WAKE: usize = 11; +/// number of the system call `open` +const SYSNO_OPEN: usize = 12; + +/// total number of system calls +const NO_SYSCALLS: usize = 32; + +extern "C" fn invalid_syscall(sys_no: u64) -> ! { + error!("Invalid syscall {}", sys_no); + sys_exit(1); +} + +#[allow(unused_assignments)] +#[no_mangle] +#[naked] +pub(crate) unsafe extern "C" fn sys_invalid() { + unsafe { + asm!( + "mov rdi, rax", + "call {}", + sym invalid_syscall, + options(noreturn) + ); + } +} + +#[repr(align(64))] +#[repr(C)] +pub(crate) struct SyscallTable { + handle: [*const usize; NO_SYSCALLS], +} + +impl SyscallTable { + pub const fn new() -> Self { + let mut table = SyscallTable { + handle: [sys_invalid as *const _; NO_SYSCALLS], + }; + + table.handle[SYSNO_EXIT] = sys_exit as *const _; + table.handle[SYSNO_WRITE] = sys_write as *const _; + table.handle[SYSNO_READ] = sys_read as *const _; + table.handle[SYSNO_ABORT] = sys_abort as *const _; + table.handle[SYSNO_USLEEP] = sys_usleep as *const _; + table.handle[SYSNO_GETPID] = sys_getpid as *const _; + table.handle[SYSNO_YIELD] = sys_yield as *const _; + table.handle[SYSNO_READ_ENTROPY] = sys_read_entropy as *const _; + table.handle[SYSNO_GET_PROCESSOR_COUNT] = sys_get_processor_count as *const _; + table.handle[SYSNO_CLOSE] = sys_close as *const _; + table.handle[SYSNO_FUTEX_WAIT] = sys_futex_wait as *const _; + table.handle[SYSNO_FUTEX_WAKE] = sys_futex_wake as *const _; + table.handle[SYSNO_OPEN] = sys_open as *const _; + + table + } +} + +unsafe impl Send for SyscallTable {} +unsafe impl Sync for SyscallTable {} + +#[no_mangle] +pub(crate) static SYSHANDLER_TABLE: SyscallTable = SyscallTable::new(); diff --git a/src/syscalls/tasks.rs b/src/syscalls/tasks.rs index 62ec673715..dfe07097d7 100644 --- a/src/syscalls/tasks.rs +++ b/src/syscalls/tasks.rs @@ -1,13 +1,11 @@ use alloc::collections::BTreeMap; use core::isize; #[cfg(feature = "newlib")] -use core::sync::atomic::AtomicUsize; -use core::sync::atomic::{AtomicU32, Ordering}; +use core::sync::atomic::{AtomicUsize, Ordering}; use hermit_sync::InterruptTicketMutex; use crate::arch::core_local::*; -use crate::arch::get_processor_count; use crate::arch::processor::{get_frequency, get_timestamp}; use crate::config::USER_STACK_SIZE; use crate::errno::*; @@ -229,17 +227,7 @@ extern "C" fn __sys_spawn2( stack_size: usize, selector: isize, ) -> Tid { - static CORE_COUNTER: AtomicU32 = AtomicU32::new(1); - - let core_id = if selector < 0 { - // use Round Robin to schedule the cores - CORE_COUNTER.fetch_add(1, Ordering::SeqCst) % get_processor_count() - } else { - selector as u32 - }; - - scheduler::PerCoreScheduler::spawn(func, arg, Priority::from(prio), core_id, stack_size).into() - as Tid + scheduler::spawn(func, arg, Priority::from(prio), stack_size, selector).into() } #[no_mangle] From 5cd50f9b556200bcc5da07673f670f70625ea394 Mon Sep 17 00:00:00 2001 From: Stefan Lankes Date: Fri, 16 Feb 2024 15:44:13 +0100 Subject: [PATCH 02/10] remove clippy warnings --- src/scheduler/task.rs | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/scheduler/task.rs b/src/scheduler/task.rs index 681b435313..081116562c 100644 --- a/src/scheduler/task.rs +++ b/src/scheduler/task.rs @@ -373,6 +373,7 @@ pub(crate) struct Task { /// Last stack pointer on the user stack before jumping to kernel space pub user_stack_pointer: VirtAddr, /// Last FPU state before a context switch to another task using the FPU + #[cfg(any(target_arch = "x86_64", target_arch = "riscv64"))] pub last_fpu_state: arch::processor::FPUState, /// ID of the core this task is running on pub core_id: CoreId, @@ -410,6 +411,7 @@ impl Task { prio: task_prio, last_stack_pointer: VirtAddr(0u64), user_stack_pointer: VirtAddr(0u64), + #[cfg(any(target_arch = "x86_64", target_arch = "riscv64"))] last_fpu_state: arch::processor::FPUState::new(), core_id, stacks, @@ -431,6 +433,7 @@ impl Task { prio: IDLE_PRIO, last_stack_pointer: VirtAddr(0u64), user_stack_pointer: VirtAddr(0u64), + #[cfg(any(target_arch = "x86_64", target_arch = "riscv64"))] last_fpu_state: arch::processor::FPUState::new(), core_id, stacks: TaskStacks::from_boot_stacks(), From 492f6ef78ff2a96a577227a0a4c827e48b091df2 Mon Sep 17 00:00:00 2001 From: Stefan Lankes Date: Fri, 16 Feb 2024 15:48:23 +0100 Subject: [PATCH 03/10] remove dead code --- src/arch/aarch64/kernel/processor.rs | 8 -------- 1 file changed, 8 deletions(-) diff --git a/src/arch/aarch64/kernel/processor.rs b/src/arch/aarch64/kernel/processor.rs index 50faf312f9..3ba97d4b2b 100644 --- a/src/arch/aarch64/kernel/processor.rs +++ b/src/arch/aarch64/kernel/processor.rs @@ -91,14 +91,6 @@ impl fmt::Display for CpuFrequency { } } -pub struct FPUState; - -impl FPUState { - pub fn new() -> Self { - Self {} - } -} - pub fn seed_entropy() -> Option<[u8; 32]> { None } From f7ab5c85e10be4843fc5f5756ca38a8d95b97132 Mon Sep 17 00:00:00 2001 From: Stefan Lankes Date: Fri, 16 Feb 2024 16:11:30 +0100 Subject: [PATCH 04/10] some remove typos --- src/arch/x86_64/mm/virtualmem.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/arch/x86_64/mm/virtualmem.rs b/src/arch/x86_64/mm/virtualmem.rs index fd849b4c59..3b751cedc6 100644 --- a/src/arch/x86_64/mm/virtualmem.rs +++ b/src/arch/x86_64/mm/virtualmem.rs @@ -149,10 +149,10 @@ pub const fn kernel_heap_end() -> VirtAddr { #[cfg(all(feature = "common-os", not(feature = "newlib")))] #[inline] pub const fn kernel_heap_end() -> VirtAddr { - VirtAddr(0x200_0000_0000u64) + VirtAddr(0x100_0000_0000u64) } -#[cfg(all(not(featur = "common-os"), feature = "newlib"))] +#[cfg(all(not(feature = "common-os"), feature = "newlib"))] #[inline] pub const fn kernel_heap_end() -> VirtAddr { VirtAddr(0x1_0000_0000u64) From 05379adf8b07ec13e98f7b2d619dd5741b99c6f7 Mon Sep 17 00:00:00 2001 From: Stefan Lankes Date: Sat, 17 Feb 2024 15:24:13 +0100 Subject: [PATCH 05/10] move file descriptor map to the task control block Currently, all tasks / threads share the same hash map. In the future, a process is able to create their own hash map. --- src/executor/mod.rs | 4 +- src/fd/mod.rs | 136 ++++++------------------------------------ src/fs/mod.rs | 11 +--- src/lib.rs | 1 - src/scheduler/mod.rs | 125 +++++++++++++++++++++++++++++++++++++- src/scheduler/task.rs | 74 +++++++++++++++++++++-- src/syscalls/net.rs | 14 ++--- 7 files changed, 219 insertions(+), 146 deletions(-) diff --git a/src/executor/mod.rs b/src/executor/mod.rs index 5025965b63..3105e76caf 100644 --- a/src/executor/mod.rs +++ b/src/executor/mod.rs @@ -134,7 +134,7 @@ where // run background tasks run(); - if let Poll::Ready(t) = future.as_mut().poll(&mut cx) { + if let Poll::Ready(t) = without_interrupts(|| future.as_mut().poll(&mut cx)) { #[cfg(any(feature = "tcp", feature = "udp"))] if !no_retransmission { let wakeup_time = @@ -205,7 +205,7 @@ where run(); let now = now(); - if let Poll::Ready(t) = future.as_mut().poll(&mut cx) { + if let Poll::Ready(t) = without_interrupts(|| future.as_mut().poll(&mut cx)) { #[cfg(any(feature = "tcp", feature = "udp"))] if !no_retransmission { let network_timer = diff --git a/src/fd/mod.rs b/src/fd/mod.rs index e871d38426..c5417e7737 100644 --- a/src/fd/mod.rs +++ b/src/fd/mod.rs @@ -2,30 +2,26 @@ use alloc::boxed::Box; use alloc::sync::Arc; use alloc::vec::Vec; use core::future::{self, Future}; -use core::sync::atomic::{AtomicI32, Ordering}; use core::task::Poll::{Pending, Ready}; use core::time::Duration; -use ahash::RandomState; use async_trait::async_trait; use dyn_clone::DynClone; -use hashbrown::HashMap; #[cfg(all(any(feature = "tcp", feature = "udp"), not(feature = "newlib")))] use smoltcp::wire::{IpEndpoint, IpListenEndpoint}; -use crate::env; +use crate::arch::kernel::core_local::core_scheduler; use crate::executor::{block_on, poll_on}; -use crate::fd::stdio::*; use crate::fs::{self, DirectoryEntry, FileAttr, SeekWhence}; mod eventfd; #[cfg(all(any(feature = "tcp", feature = "udp"), not(feature = "newlib")))] pub(crate) mod socket; -mod stdio; +pub(crate) mod stdio; -const STDIN_FILENO: FileDescriptor = 0; -const STDOUT_FILENO: FileDescriptor = 1; -const STDERR_FILENO: FileDescriptor = 2; +pub(crate) const STDIN_FILENO: FileDescriptor = 0; +pub(crate) const STDOUT_FILENO: FileDescriptor = 1; +pub(crate) const STDERR_FILENO: FileDescriptor = 2; // TODO: Integrate with src/errno.rs ? #[allow(clippy::upper_case_acronyms)] @@ -46,6 +42,7 @@ pub enum IoError { EMFILE = crate::errno::EMFILE as isize, EEXIST = crate::errno::EEXIST as isize, EADDRINUSE = crate::errno::EADDRINUSE as isize, + EOVERFLOW = crate::errno::EOVERFLOW as isize, } #[allow(dead_code)] @@ -62,17 +59,6 @@ pub(crate) enum IoCtl { pub(crate) type FileDescriptor = i32; -/// Mapping between file descriptor and the referenced object -static OBJECT_MAP: async_lock::RwLock< - HashMap, RandomState>, -> = async_lock::RwLock::new(HashMap::< - FileDescriptor, - Arc, - RandomState, ->::with_hasher(RandomState::with_seeds(0, 0, 0, 0))); -/// Atomic counter to determine the next unused file descriptor -pub(crate) static FD_COUNTER: AtomicI32 = AtomicI32::new(3); - bitflags! { /// Options for opening files #[derive(Debug, Copy, Clone, Default)] @@ -309,17 +295,8 @@ pub(crate) fn open( let fs = fs::FILESYSTEM.get().unwrap(); if let Ok(file) = fs.open(name, flags, mode) { - let fd = FD_COUNTER.fetch_add(1, Ordering::SeqCst); - block_on( - async { - if OBJECT_MAP.write().await.try_insert(fd, file).is_err() { - Err(IoError::EINVAL) - } else { - Ok(fd as FileDescriptor) - } - }, - None, - ) + let fd = insert_object(file)?; + Ok(fd) } else { Err(IoError::EINVAL) } @@ -384,7 +361,7 @@ async fn poll_fds(fds: &mut [PollFd]) -> Result { for i in &mut *fds { let fd = i.fd; i.revents = PollEvent::empty(); - let mut pinned_obj = core::pin::pin!(async_get_object(fd)); + let mut pinned_obj = core::pin::pin!(core_scheduler().get_object(fd)); if let Ready(Ok(obj)) = pinned_obj.as_mut().poll(cx) { let mut pinned = core::pin::pin!(obj.poll(i.events)); if let Ready(Ok(e)) = pinned.as_mut().poll(cx) { @@ -431,36 +408,25 @@ pub fn poll(fds: &mut [PollFd], timeout: Option) -> Result Result { let obj = self::eventfd::EventFd::new(initval, flags); - let fd = FD_COUNTER.fetch_add(1, Ordering::SeqCst); - block_on(async_insert_object(fd, Arc::new(obj)), None)?; + let fd = block_on(core_scheduler().insert_object(Arc::new(obj)), None)?; Ok(fd) } -#[inline] -async fn async_get_object(fd: FileDescriptor) -> Result, IoError> { - Ok((*(OBJECT_MAP.read().await.get(&fd).ok_or(IoError::EINVAL)?)).clone()) -} - pub(crate) fn get_object(fd: FileDescriptor) -> Result, IoError> { - block_on(async_get_object(fd), None) + block_on(core_scheduler().get_object(fd), None) } -#[inline] -async fn async_insert_object( - fd: FileDescriptor, - obj: Arc, -) -> Result<(), IoError> { - let _ = OBJECT_MAP.write().await.insert(fd, obj); - Ok(()) +pub(crate) fn insert_object(obj: Arc) -> Result { + block_on(core_scheduler().insert_object(obj), None) } -pub(crate) fn insert_object( +pub(crate) fn replace_object( fd: FileDescriptor, obj: Arc, ) -> Result<(), IoError> { - block_on(async_insert_object(fd, obj), None) + block_on(core_scheduler().replace_object(fd, obj), None) } // The dup system call allocates a new file descriptor that refers @@ -468,77 +434,9 @@ pub(crate) fn insert_object( // file descriptor number is guaranteed to be the lowest-numbered // file descriptor that was unused in the calling process. pub(crate) fn dup_object(fd: FileDescriptor) -> Result { - block_on( - async { - let mut guard = OBJECT_MAP.write().await; - let obj = (*(guard.get(&fd).ok_or(IoError::EINVAL)?)).clone(); - - let new_fd = || -> i32 { - for i in 3..FD_COUNTER.load(Ordering::SeqCst) { - if !guard.contains_key(&i) { - return i; - } - } - FD_COUNTER.fetch_add(1, Ordering::SeqCst) - }; - - let fd = new_fd(); - if guard.try_insert(fd, obj).is_err() { - Err(IoError::EMFILE) - } else { - Ok(fd as FileDescriptor) - } - }, - None, - ) + block_on(core_scheduler().dup_object(fd), None) } pub(crate) fn remove_object(fd: FileDescriptor) -> Result, IoError> { - block_on( - async { - if fd <= 2 { - Err(IoError::EINVAL) - } else { - let obj = OBJECT_MAP - .write() - .await - .remove(&fd) - .ok_or(IoError::EINVAL)?; - Ok(obj) - } - }, - None, - ) -} - -pub(crate) fn init() -> Result<(), IoError> { - block_on( - async { - let mut guard = OBJECT_MAP.write().await; - if env::is_uhyve() { - guard - .try_insert(STDIN_FILENO, Arc::new(UhyveStdin::new())) - .map_err(|_| IoError::EIO)?; - guard - .try_insert(STDOUT_FILENO, Arc::new(UhyveStdout::new())) - .map_err(|_| IoError::EIO)?; - guard - .try_insert(STDERR_FILENO, Arc::new(UhyveStderr::new())) - .map_err(|_| IoError::EIO)?; - } else { - guard - .try_insert(STDIN_FILENO, Arc::new(GenericStdin::new())) - .map_err(|_| IoError::EIO)?; - guard - .try_insert(STDOUT_FILENO, Arc::new(GenericStdout::new())) - .map_err(|_| IoError::EIO)?; - guard - .try_insert(STDERR_FILENO, Arc::new(GenericStderr::new())) - .map_err(|_| IoError::EIO)?; - } - - Ok(()) - }, - None, - ) + block_on(core_scheduler().remove_object(fd), None) } diff --git a/src/fs/mod.rs b/src/fs/mod.rs index c9ce5382ce..de86fa3a0d 100644 --- a/src/fs/mod.rs +++ b/src/fs/mod.rs @@ -8,14 +8,11 @@ use alloc::boxed::Box; use alloc::string::{String, ToString}; use alloc::sync::Arc; use alloc::vec::Vec; -use core::sync::atomic::Ordering; use hermit_sync::OnceCell; use mem::MemDirectory; -use crate::fd::{ - insert_object, AccessPermission, IoError, ObjectInterface, OpenOption, FD_COUNTER, -}; +use crate::fd::{insert_object, AccessPermission, IoError, ObjectInterface, OpenOption}; use crate::io::Write; use crate::time::{timespec, SystemTime}; @@ -384,11 +381,7 @@ pub fn readdir(name: &str) -> Result, IoError> { /// Open a directory to read the directory entries pub(crate) fn opendir(name: &str) -> Result { let obj = FILESYSTEM.get().unwrap().opendir(name)?; - let fd = FD_COUNTER.fetch_add(1, Ordering::SeqCst); - - let _ = insert_object(fd, obj); - - Ok(fd) + Ok(insert_object(obj)?) } use crate::fd::{self, FileDescriptor}; diff --git a/src/lib.rs b/src/lib.rs index cb128aed56..cc16dc8452 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -281,7 +281,6 @@ extern "C" fn initd(_arg: usize) { riscv64::kernel::init_drivers(); syscalls::init(); - fd::init().expect("Unable to initialized standard file descriptors"); fs::init(); // Get the application arguments and environment variables. diff --git a/src/scheduler/mod.rs b/src/scheduler/mod.rs index 22a5d8648f..c8484bf002 100644 --- a/src/scheduler/mod.rs +++ b/src/scheduler/mod.rs @@ -1,13 +1,16 @@ use alloc::boxed::Box; use alloc::collections::{BTreeMap, VecDeque}; use alloc::rc::Rc; +use alloc::sync::Arc; #[cfg(feature = "smp")] use alloc::vec::Vec; use core::cell::RefCell; use core::ptr; use core::sync::atomic::{AtomicU32, Ordering}; +use ahash::RandomState; use crossbeam_utils::Backoff; +use hashbrown::HashMap; use hermit_sync::{without_interrupts, *}; #[cfg(target_arch = "riscv64")] use riscv::register::sstatus; @@ -19,6 +22,7 @@ use crate::arch::switch::switch_to_task; #[cfg(target_arch = "x86_64")] use crate::arch::switch::{switch_to_fpu_owner, switch_to_task}; use crate::arch::{get_processor_count, interrupts}; +use crate::fd::{FileDescriptor, IoError, ObjectInterface}; use crate::kernel::scheduler::TaskStacks; use crate::scheduler::task::*; @@ -199,6 +203,8 @@ struct NewTask { prio: Priority, core_id: CoreId, stacks: TaskStacks, + object_map: + Arc, RandomState>>>, } impl From for Task { @@ -210,8 +216,9 @@ impl From for Task { prio, core_id, stacks, + object_map, } = value; - let mut task = Self::new(tid, core_id, TaskStatus::Ready, prio, stacks); + let mut task = Self::new(tid, core_id, TaskStatus::Ready, prio, stacks, object_map); task.create_stack_frame(func, arg); task } @@ -236,6 +243,7 @@ impl PerCoreScheduler { prio, core_id, stacks, + object_map: core_scheduler().get_current_task_object_map(), }; // Add it to the task lists. @@ -445,6 +453,121 @@ impl PerCoreScheduler { without_interrupts(|| self.current_task.borrow().id) } + #[inline] + pub fn get_current_task_object_map( + &self, + ) -> Arc, RandomState>>> { + without_interrupts(|| self.current_task.borrow().object_map.clone()) + } + + #[inline] + pub async fn get_object( + &self, + fd: FileDescriptor, + ) -> Result, IoError> { + // executor disables the interrupts during the polling of the futures + // => Borrowing of the current_task is not interrupted by the scheduler + Ok((*(self + .current_task + .borrow() + .object_map + .read() + .await + .get(&fd) + .ok_or(IoError::EINVAL)?)) + .clone()) + } + + #[inline] + pub async fn insert_object( + &self, + obj: Arc, + ) -> Result { + // executor disables the interrupts during the polling of the futures + // => Borrowing of the current_task is not interrupted by the scheduler + let borrowed = self.current_task.borrow(); + let mut guard = borrowed.object_map.write().await; + + let new_fd = || -> Result { + let mut fd: FileDescriptor = 0; + loop { + if !guard.contains_key(&fd) { + break Ok(fd); + } else if fd == FileDescriptor::MAX { + break Err(IoError::EOVERFLOW); + } + + fd = fd.saturating_add(1); + } + }; + + let fd = new_fd()?; + let _ = guard.insert(fd, obj.clone()); + + Ok(fd) + } + + #[inline] + pub async fn replace_object( + &self, + fd: FileDescriptor, + obj: Arc, + ) -> Result<(), IoError> { + // executor disables the interrupts during the polling of the futures + // => Borrowing of the current_task is not interrupted by the scheduler + let _ = self + .current_task + .borrow() + .object_map + .write() + .await + .insert(fd, obj); + Ok(()) + } + + pub async fn dup_object(&self, fd: FileDescriptor) -> Result { + // executor disables the interrupts during the polling of the futures + // => Borrowing of the current_task is not interrupted by the scheduler + let borrowed = self.current_task.borrow(); + let mut guard = borrowed.object_map.write().await; + let obj = (*(guard.get(&fd).ok_or(IoError::EINVAL)?)).clone(); + + let new_fd = || -> Result { + let mut fd: FileDescriptor = 0; + loop { + if !guard.contains_key(&fd) { + break Ok(fd); + } else if fd == FileDescriptor::MAX { + break Err(IoError::EOVERFLOW); + } + + fd = fd.saturating_add(1); + } + }; + + let fd = new_fd()?; + if guard.try_insert(fd, obj).is_err() { + Err(IoError::EMFILE) + } else { + Ok(fd as FileDescriptor) + } + } + + pub async fn remove_object( + &self, + fd: FileDescriptor, + ) -> Result, IoError> { + // executor disables the interrupts during the polling of the futures + // => Borrowing of the current_task is not interrupted by the scheduler + self.current_task + .borrow() + .object_map + .write() + .await + .remove(&fd) + .ok_or(IoError::EINVAL) + } + #[inline] pub fn get_current_task_prio(&self) -> Priority { without_interrupts(|| self.current_task.borrow().prio) diff --git a/src/scheduler/task.rs b/src/scheduler/task.rs index 081116562c..af56accfda 100644 --- a/src/scheduler/task.rs +++ b/src/scheduler/task.rs @@ -2,21 +2,30 @@ use alloc::boxed::Box; use alloc::collections::{LinkedList, VecDeque}; use alloc::rc::Rc; +use alloc::sync::Arc; use alloc::vec::Vec; use core::cell::RefCell; -use core::cmp::Ordering; -use core::fmt; use core::num::NonZeroU64; #[cfg(any(feature = "tcp", feature = "udp"))] use core::ops::DerefMut; +use core::{cmp, fmt}; + +use ahash::RandomState; +use hashbrown::HashMap; +use hermit_sync::OnceCell; -use crate::arch; use crate::arch::core_local::*; use crate::arch::mm::VirtAddr; use crate::arch::scheduler::TaskStacks; #[cfg(not(feature = "common-os"))] use crate::arch::scheduler::TaskTLS; +use crate::executor::poll_on; +use crate::fd::stdio::*; +use crate::fd::{ + FileDescriptor, IoError, ObjectInterface, STDERR_FILENO, STDIN_FILENO, STDOUT_FILENO, +}; use crate::scheduler::CoreId; +use crate::{arch, env}; /// Returns the most significant bit. /// @@ -126,13 +135,13 @@ impl TaskHandle { } impl Ord for TaskHandle { - fn cmp(&self, other: &Self) -> Ordering { + fn cmp(&self, other: &Self) -> cmp::Ordering { self.id.cmp(&other.id) } } impl PartialOrd for TaskHandle { - fn partial_cmp(&self, other: &Self) -> Option { + fn partial_cmp(&self, other: &Self) -> Option { Some(self.cmp(other)) } } @@ -379,6 +388,9 @@ pub(crate) struct Task { pub core_id: CoreId, /// Stack of the task pub stacks: TaskStacks, + /// Mapping between file descriptor and the referenced object + pub object_map: + Arc, RandomState>>>, /// Task Thread-Local-Storage (TLS) #[cfg(not(feature = "common-os"))] pub tls: Option>, @@ -402,6 +414,9 @@ impl Task { task_status: TaskStatus, task_prio: Priority, stacks: TaskStacks, + object_map: Arc< + async_lock::RwLock, RandomState>>, + >, ) -> Task { debug!("Creating new task {} on core {}", tid, core_id); @@ -415,6 +430,7 @@ impl Task { last_fpu_state: arch::processor::FPUState::new(), core_id, stacks, + object_map, #[cfg(not(feature = "common-os"))] tls: None, #[cfg(all(target_arch = "x86_64", feature = "common-os"))] @@ -427,6 +443,53 @@ impl Task { pub fn new_idle(tid: TaskId, core_id: CoreId) -> Task { debug!("Creating idle task {}", tid); + /// All cores use the same mapping between file descriptor and the referenced object + static OBJECT_MAP: OnceCell< + Arc, RandomState>>>, + > = OnceCell::new(); + + if core_id == 0 { + OBJECT_MAP + .set(Arc::new(async_lock::RwLock::new(HashMap::< + FileDescriptor, + Arc, + RandomState, + >::with_hasher( + RandomState::with_seeds(0, 0, 0, 0), + )))) + .unwrap(); + let objmap = OBJECT_MAP.get().unwrap().clone(); + let _ = poll_on( + async { + let mut guard = objmap.write().await; + if env::is_uhyve() { + guard + .try_insert(STDIN_FILENO, Arc::new(UhyveStdin::new())) + .map_err(|_| IoError::EIO)?; + guard + .try_insert(STDOUT_FILENO, Arc::new(UhyveStdout::new())) + .map_err(|_| IoError::EIO)?; + guard + .try_insert(STDERR_FILENO, Arc::new(UhyveStderr::new())) + .map_err(|_| IoError::EIO)?; + } else { + guard + .try_insert(STDIN_FILENO, Arc::new(GenericStdin::new())) + .map_err(|_| IoError::EIO)?; + guard + .try_insert(STDOUT_FILENO, Arc::new(GenericStdout::new())) + .map_err(|_| IoError::EIO)?; + guard + .try_insert(STDERR_FILENO, Arc::new(GenericStderr::new())) + .map_err(|_| IoError::EIO)?; + } + + Ok(()) + }, + None, + ); + } + Task { id: tid, status: TaskStatus::Idle, @@ -437,6 +500,7 @@ impl Task { last_fpu_state: arch::processor::FPUState::new(), core_id, stacks: TaskStacks::from_boot_stacks(), + object_map: OBJECT_MAP.get().unwrap().clone(), #[cfg(not(feature = "common-os"))] tls: None, #[cfg(all(target_arch = "x86_64", feature = "common-os"))] diff --git a/src/syscalls/net.rs b/src/syscalls/net.rs index cc105dc2d1..6692d12b17 100644 --- a/src/syscalls/net.rs +++ b/src/syscalls/net.rs @@ -4,7 +4,6 @@ use alloc::sync::Arc; use core::ffi::c_void; use core::mem::size_of; use core::ops::DerefMut; -use core::sync::atomic::Ordering; #[cfg(all(any(feature = "tcp", feature = "udp"), not(feature = "newlib")))] use smoltcp::wire::{IpAddress, IpEndpoint, IpListenEndpoint}; @@ -15,7 +14,7 @@ use crate::executor::network::{NetworkState, NIC}; use crate::fd::socket::tcp; #[cfg(feature = "udp")] use crate::fd::socket::udp; -use crate::fd::{get_object, insert_object, ObjectInterface, SocketOption, FD_COUNTER}; +use crate::fd::{get_object, insert_object, replace_object, ObjectInterface, SocketOption}; use crate::syscalls::{IoCtl, __sys_write}; pub const AF_INET: i32 = 0; @@ -271,8 +270,6 @@ extern "C" fn __sys_socket(domain: i32, type_: SockType, protocol: i32) -> i32 { let mut guard = NIC.lock(); if let NetworkState::Initialized(nic) = guard.deref_mut() { - let fd = FD_COUNTER.fetch_add(1, Ordering::SeqCst); - #[cfg(feature = "udp")] if type_.contains(SockType::SOCK_DGRAM) { let handle = nic.create_udp_handle().unwrap(); @@ -283,7 +280,7 @@ extern "C" fn __sys_socket(domain: i32, type_: SockType, protocol: i32) -> i32 { socket.ioctl(IoCtl::NonBlocking, true).unwrap(); } - insert_object(fd, Arc::new(socket)).expect("FD is already used"); + let fd = insert_object(Arc::new(socket)).expect("FD is already used"); return fd; } @@ -298,7 +295,7 @@ extern "C" fn __sys_socket(domain: i32, type_: SockType, protocol: i32) -> i32 { socket.ioctl(IoCtl::NonBlocking, true).unwrap(); } - insert_object(fd, Arc::new(socket)).expect("FD is already used"); + let fd = insert_object(Arc::new(socket)).expect("FD is already used"); return fd; } @@ -319,9 +316,8 @@ extern "C" fn __sys_accept(fd: i32, addr: *mut sockaddr, addrlen: *mut socklen_t |e| -num::ToPrimitive::to_i32(&e).unwrap(), |endpoint| { let new_obj = dyn_clone::clone_box(&*v); - insert_object(fd, Arc::from(new_obj)).unwrap(); - let new_fd = FD_COUNTER.fetch_add(1, Ordering::SeqCst); - insert_object(new_fd, v).expect("FD is already used"); + replace_object(fd, Arc::from(new_obj)).unwrap(); + let new_fd = insert_object(v).unwrap(); if !addr.is_null() && !addrlen.is_null() { let addrlen = unsafe { &mut *addrlen }; From b7009f0c5cba3bca9a5758b7fef39beddf3cdba0 Mon Sep 17 00:00:00 2001 From: Stefan Lankes Date: Sat, 17 Feb 2024 16:20:13 +0100 Subject: [PATCH 06/10] remove clippy warnings --- src/fd/mod.rs | 1 + src/fs/mod.rs | 2 +- src/scheduler/mod.rs | 165 ++++++++++++++++++++++++++---------------- src/scheduler/task.rs | 2 + 4 files changed, 106 insertions(+), 64 deletions(-) diff --git a/src/fd/mod.rs b/src/fd/mod.rs index c5417e7737..ea35941165 100644 --- a/src/fd/mod.rs +++ b/src/fd/mod.rs @@ -422,6 +422,7 @@ pub(crate) fn insert_object(obj: Arc) -> Result, diff --git a/src/fs/mod.rs b/src/fs/mod.rs index de86fa3a0d..16b03ecf23 100644 --- a/src/fs/mod.rs +++ b/src/fs/mod.rs @@ -381,7 +381,7 @@ pub fn readdir(name: &str) -> Result, IoError> { /// Open a directory to read the directory entries pub(crate) fn opendir(name: &str) -> Result { let obj = FILESYSTEM.get().unwrap().opendir(name)?; - Ok(insert_object(obj)?) + insert_object(obj) } use crate::fd::{self, FileDescriptor}; diff --git a/src/scheduler/mod.rs b/src/scheduler/mod.rs index c8484bf002..45985c25c6 100644 --- a/src/scheduler/mod.rs +++ b/src/scheduler/mod.rs @@ -1,3 +1,5 @@ +#![allow(clippy::type_complexity)] + use alloc::boxed::Box; use alloc::collections::{BTreeMap, VecDeque}; use alloc::rc::Rc; @@ -5,8 +7,10 @@ use alloc::sync::Arc; #[cfg(feature = "smp")] use alloc::vec::Vec; use core::cell::RefCell; +use core::future::{self, Future}; use core::ptr; use core::sync::atomic::{AtomicU32, Ordering}; +use core::task::Poll::{Pending, Ready}; use ahash::RandomState; use crossbeam_utils::Backoff; @@ -323,6 +327,7 @@ impl PerCoreScheduler { prio: current_task_borrowed.prio, core_id, stacks: TaskStacks::new(current_task_borrowed.stacks.get_user_stack_size()), + object_map: current_task_borrowed.object_map.clone(), }; // Add it to the task lists. @@ -467,15 +472,19 @@ impl PerCoreScheduler { ) -> Result, IoError> { // executor disables the interrupts during the polling of the futures // => Borrowing of the current_task is not interrupted by the scheduler - Ok((*(self - .current_task - .borrow() - .object_map - .read() - .await - .get(&fd) - .ok_or(IoError::EINVAL)?)) - .clone()) + future::poll_fn(|cx| { + let borrowed = self.current_task.borrow(); + let mut pinned_obj = core::pin::pin!(borrowed.object_map.write()); + + let x = if let Ready(guard) = pinned_obj.as_mut().poll(cx) { + Ready(guard.get(&fd).cloned().ok_or(IoError::EINVAL)) + } else { + Pending + }; + + x + }) + .await } #[inline] @@ -485,28 +494,37 @@ impl PerCoreScheduler { ) -> Result { // executor disables the interrupts during the polling of the futures // => Borrowing of the current_task is not interrupted by the scheduler - let borrowed = self.current_task.borrow(); - let mut guard = borrowed.object_map.write().await; - - let new_fd = || -> Result { - let mut fd: FileDescriptor = 0; - loop { - if !guard.contains_key(&fd) { - break Ok(fd); - } else if fd == FileDescriptor::MAX { - break Err(IoError::EOVERFLOW); - } - - fd = fd.saturating_add(1); - } - }; + future::poll_fn(|cx| { + let borrowed = self.current_task.borrow(); + let mut pinned_obj = core::pin::pin!(borrowed.object_map.write()); + + let x = if let Ready(mut guard) = pinned_obj.as_mut().poll(cx) { + let new_fd = || -> Result { + let mut fd: FileDescriptor = 0; + loop { + if !guard.contains_key(&fd) { + break Ok(fd); + } else if fd == FileDescriptor::MAX { + break Err(IoError::EOVERFLOW); + } + + fd = fd.saturating_add(1); + } + }; - let fd = new_fd()?; - let _ = guard.insert(fd, obj.clone()); + let fd = new_fd()?; + let _ = guard.insert(fd, obj.clone()); + Ready(Ok(fd)) + } else { + Pending + }; - Ok(fd) + x + }) + .await } + #[allow(dead_code)] #[inline] pub async fn replace_object( &self, @@ -515,42 +533,58 @@ impl PerCoreScheduler { ) -> Result<(), IoError> { // executor disables the interrupts during the polling of the futures // => Borrowing of the current_task is not interrupted by the scheduler - let _ = self - .current_task - .borrow() - .object_map - .write() - .await - .insert(fd, obj); - Ok(()) + future::poll_fn(|cx| { + let borrowed = self.current_task.borrow(); + let mut pinned_obj = core::pin::pin!(borrowed.object_map.write()); + + let x = if let Ready(mut guard) = pinned_obj.as_mut().poll(cx) { + guard.insert(fd, obj.clone()); + Ready(Ok(())) + } else { + Pending + }; + + x + }) + .await } pub async fn dup_object(&self, fd: FileDescriptor) -> Result { // executor disables the interrupts during the polling of the futures // => Borrowing of the current_task is not interrupted by the scheduler - let borrowed = self.current_task.borrow(); - let mut guard = borrowed.object_map.write().await; - let obj = (*(guard.get(&fd).ok_or(IoError::EINVAL)?)).clone(); - - let new_fd = || -> Result { - let mut fd: FileDescriptor = 0; - loop { - if !guard.contains_key(&fd) { - break Ok(fd); - } else if fd == FileDescriptor::MAX { - break Err(IoError::EOVERFLOW); - } + future::poll_fn(|cx| { + let borrowed = self.current_task.borrow(); + let mut pinned_obj = core::pin::pin!(borrowed.object_map.write()); + + let x = if let Ready(mut guard) = pinned_obj.as_mut().poll(cx) { + let obj = (*(guard.get(&fd).ok_or(IoError::EINVAL)?)).clone(); + + let new_fd = || -> Result { + let mut fd: FileDescriptor = 0; + loop { + if !guard.contains_key(&fd) { + break Ok(fd); + } else if fd == FileDescriptor::MAX { + break Err(IoError::EOVERFLOW); + } + + fd = fd.saturating_add(1); + } + }; - fd = fd.saturating_add(1); - } - }; + let fd = new_fd()?; + if guard.try_insert(fd, obj).is_err() { + Ready(Err(IoError::EMFILE)) + } else { + Ready(Ok(fd as FileDescriptor)) + } + } else { + Pending + }; - let fd = new_fd()?; - if guard.try_insert(fd, obj).is_err() { - Err(IoError::EMFILE) - } else { - Ok(fd as FileDescriptor) - } + x + }) + .await } pub async fn remove_object( @@ -559,13 +593,18 @@ impl PerCoreScheduler { ) -> Result, IoError> { // executor disables the interrupts during the polling of the futures // => Borrowing of the current_task is not interrupted by the scheduler - self.current_task - .borrow() - .object_map - .write() - .await - .remove(&fd) - .ok_or(IoError::EINVAL) + future::poll_fn(|cx| { + let borrowed = self.current_task.borrow(); + let mut pinned_obj = core::pin::pin!(borrowed.object_map.write()); + let x = if let Ready(mut guard) = pinned_obj.as_mut().poll(cx) { + Ready(guard.remove(&fd).ok_or(IoError::EINVAL)) + } else { + Pending + }; + + x + }) + .await } #[inline] diff --git a/src/scheduler/task.rs b/src/scheduler/task.rs index af56accfda..a777dba692 100644 --- a/src/scheduler/task.rs +++ b/src/scheduler/task.rs @@ -1,3 +1,5 @@ +#![allow(clippy::type_complexity)] + #[cfg(not(feature = "common-os"))] use alloc::boxed::Box; use alloc::collections::{LinkedList, VecDeque}; From 1404a29784c31fe8e7b13f00cd577c50eca53c42 Mon Sep 17 00:00:00 2001 From: Stefan Lankes Date: Sat, 17 Feb 2024 16:35:41 +0100 Subject: [PATCH 07/10] disable interrupts within asynchronous functions => executor has not to disable interrupts --- src/executor/mod.rs | 4 +- src/scheduler/mod.rs | 158 +++++++++++++++++++++++-------------------- 2 files changed, 86 insertions(+), 76 deletions(-) diff --git a/src/executor/mod.rs b/src/executor/mod.rs index 3105e76caf..5025965b63 100644 --- a/src/executor/mod.rs +++ b/src/executor/mod.rs @@ -134,7 +134,7 @@ where // run background tasks run(); - if let Poll::Ready(t) = without_interrupts(|| future.as_mut().poll(&mut cx)) { + if let Poll::Ready(t) = future.as_mut().poll(&mut cx) { #[cfg(any(feature = "tcp", feature = "udp"))] if !no_retransmission { let wakeup_time = @@ -205,7 +205,7 @@ where run(); let now = now(); - if let Poll::Ready(t) = without_interrupts(|| future.as_mut().poll(&mut cx)) { + if let Poll::Ready(t) = future.as_mut().poll(&mut cx) { #[cfg(any(feature = "tcp", feature = "udp"))] if !no_retransmission { let network_timer = diff --git a/src/scheduler/mod.rs b/src/scheduler/mod.rs index 45985c25c6..613c07697d 100644 --- a/src/scheduler/mod.rs +++ b/src/scheduler/mod.rs @@ -470,17 +470,19 @@ impl PerCoreScheduler { &self, fd: FileDescriptor, ) -> Result, IoError> { - // executor disables the interrupts during the polling of the futures - // => Borrowing of the current_task is not interrupted by the scheduler future::poll_fn(|cx| { - let borrowed = self.current_task.borrow(); - let mut pinned_obj = core::pin::pin!(borrowed.object_map.write()); + let x = without_interrupts(|| { + let borrowed = self.current_task.borrow(); + let mut pinned_obj = core::pin::pin!(borrowed.object_map.write()); - let x = if let Ready(guard) = pinned_obj.as_mut().poll(cx) { - Ready(guard.get(&fd).cloned().ok_or(IoError::EINVAL)) - } else { - Pending - }; + let x = if let Ready(guard) = pinned_obj.as_mut().poll(cx) { + Ready(guard.get(&fd).cloned().ok_or(IoError::EINVAL)) + } else { + Pending + }; + + x + }); x }) @@ -492,32 +494,34 @@ impl PerCoreScheduler { &self, obj: Arc, ) -> Result { - // executor disables the interrupts during the polling of the futures - // => Borrowing of the current_task is not interrupted by the scheduler future::poll_fn(|cx| { - let borrowed = self.current_task.borrow(); - let mut pinned_obj = core::pin::pin!(borrowed.object_map.write()); - - let x = if let Ready(mut guard) = pinned_obj.as_mut().poll(cx) { - let new_fd = || -> Result { - let mut fd: FileDescriptor = 0; - loop { - if !guard.contains_key(&fd) { - break Ok(fd); - } else if fd == FileDescriptor::MAX { - break Err(IoError::EOVERFLOW); + let x = without_interrupts(|| { + let borrowed = self.current_task.borrow(); + let mut pinned_obj = core::pin::pin!(borrowed.object_map.write()); + + let x = if let Ready(mut guard) = pinned_obj.as_mut().poll(cx) { + let new_fd = || -> Result { + let mut fd: FileDescriptor = 0; + loop { + if !guard.contains_key(&fd) { + break Ok(fd); + } else if fd == FileDescriptor::MAX { + break Err(IoError::EOVERFLOW); + } + + fd = fd.saturating_add(1); } + }; - fd = fd.saturating_add(1); - } + let fd = new_fd()?; + let _ = guard.insert(fd, obj.clone()); + Ready(Ok(fd)) + } else { + Pending }; - let fd = new_fd()?; - let _ = guard.insert(fd, obj.clone()); - Ready(Ok(fd)) - } else { - Pending - }; + x + }); x }) @@ -531,18 +535,20 @@ impl PerCoreScheduler { fd: FileDescriptor, obj: Arc, ) -> Result<(), IoError> { - // executor disables the interrupts during the polling of the futures - // => Borrowing of the current_task is not interrupted by the scheduler future::poll_fn(|cx| { - let borrowed = self.current_task.borrow(); - let mut pinned_obj = core::pin::pin!(borrowed.object_map.write()); + let x = without_interrupts(|| { + let borrowed = self.current_task.borrow(); + let mut pinned_obj = core::pin::pin!(borrowed.object_map.write()); - let x = if let Ready(mut guard) = pinned_obj.as_mut().poll(cx) { - guard.insert(fd, obj.clone()); - Ready(Ok(())) - } else { - Pending - }; + let x = if let Ready(mut guard) = pinned_obj.as_mut().poll(cx) { + guard.insert(fd, obj.clone()); + Ready(Ok(())) + } else { + Pending + }; + + x + }); x }) @@ -550,37 +556,39 @@ impl PerCoreScheduler { } pub async fn dup_object(&self, fd: FileDescriptor) -> Result { - // executor disables the interrupts during the polling of the futures - // => Borrowing of the current_task is not interrupted by the scheduler future::poll_fn(|cx| { - let borrowed = self.current_task.borrow(); - let mut pinned_obj = core::pin::pin!(borrowed.object_map.write()); - - let x = if let Ready(mut guard) = pinned_obj.as_mut().poll(cx) { - let obj = (*(guard.get(&fd).ok_or(IoError::EINVAL)?)).clone(); - - let new_fd = || -> Result { - let mut fd: FileDescriptor = 0; - loop { - if !guard.contains_key(&fd) { - break Ok(fd); - } else if fd == FileDescriptor::MAX { - break Err(IoError::EOVERFLOW); + let x = without_interrupts(|| { + let borrowed = self.current_task.borrow(); + let mut pinned_obj = core::pin::pin!(borrowed.object_map.write()); + + let x = if let Ready(mut guard) = pinned_obj.as_mut().poll(cx) { + let obj = (*(guard.get(&fd).ok_or(IoError::EINVAL)?)).clone(); + + let new_fd = || -> Result { + let mut fd: FileDescriptor = 0; + loop { + if !guard.contains_key(&fd) { + break Ok(fd); + } else if fd == FileDescriptor::MAX { + break Err(IoError::EOVERFLOW); + } + + fd = fd.saturating_add(1); } + }; - fd = fd.saturating_add(1); + let fd = new_fd()?; + if guard.try_insert(fd, obj).is_err() { + Ready(Err(IoError::EMFILE)) + } else { + Ready(Ok(fd as FileDescriptor)) } + } else { + Pending }; - let fd = new_fd()?; - if guard.try_insert(fd, obj).is_err() { - Ready(Err(IoError::EMFILE)) - } else { - Ready(Ok(fd as FileDescriptor)) - } - } else { - Pending - }; + x + }); x }) @@ -591,16 +599,18 @@ impl PerCoreScheduler { &self, fd: FileDescriptor, ) -> Result, IoError> { - // executor disables the interrupts during the polling of the futures - // => Borrowing of the current_task is not interrupted by the scheduler future::poll_fn(|cx| { - let borrowed = self.current_task.borrow(); - let mut pinned_obj = core::pin::pin!(borrowed.object_map.write()); - let x = if let Ready(mut guard) = pinned_obj.as_mut().poll(cx) { - Ready(guard.remove(&fd).ok_or(IoError::EINVAL)) - } else { - Pending - }; + let x = without_interrupts(|| { + let borrowed = self.current_task.borrow(); + let mut pinned_obj = core::pin::pin!(borrowed.object_map.write()); + let x = if let Ready(mut guard) = pinned_obj.as_mut().poll(cx) { + Ready(guard.remove(&fd).ok_or(IoError::EINVAL)) + } else { + Pending + }; + + x + }); x }) From f4861a85a3db881526e347cdaf9d9483201e46af Mon Sep 17 00:00:00 2001 From: Stefan Lankes Date: Sat, 17 Feb 2024 17:47:21 +0100 Subject: [PATCH 08/10] improve type handling --- src/scheduler/mod.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/scheduler/mod.rs b/src/scheduler/mod.rs index 613c07697d..a9b43a4eab 100644 --- a/src/scheduler/mod.rs +++ b/src/scheduler/mod.rs @@ -500,7 +500,7 @@ impl PerCoreScheduler { let mut pinned_obj = core::pin::pin!(borrowed.object_map.write()); let x = if let Ready(mut guard) = pinned_obj.as_mut().poll(cx) { - let new_fd = || -> Result { + let new_fd = || -> Result { let mut fd: FileDescriptor = 0; loop { if !guard.contains_key(&fd) { @@ -564,7 +564,7 @@ impl PerCoreScheduler { let x = if let Ready(mut guard) = pinned_obj.as_mut().poll(cx) { let obj = (*(guard.get(&fd).ok_or(IoError::EINVAL)?)).clone(); - let new_fd = || -> Result { + let new_fd = || -> Result { let mut fd: FileDescriptor = 0; loop { if !guard.contains_key(&fd) { @@ -581,7 +581,7 @@ impl PerCoreScheduler { if guard.try_insert(fd, obj).is_err() { Ready(Err(IoError::EMFILE)) } else { - Ready(Ok(fd as FileDescriptor)) + Ready(Ok(fd)) } } else { Pending From d374f28244c87dcaef68db7e2b4477d558c97703 Mon Sep 17 00:00:00 2001 From: Stefan Lankes Date: Sat, 17 Feb 2024 23:25:41 +0100 Subject: [PATCH 09/10] add option to create a file descriptor hash map per task --- src/arch/x86_64/kernel/mod.rs | 4 +++ src/scheduler/mod.rs | 52 +++++++++++++++++++++++++++++++++-- src/scheduler/task.rs | 2 +- 3 files changed, 54 insertions(+), 4 deletions(-) diff --git a/src/arch/x86_64/kernel/mod.rs b/src/arch/x86_64/kernel/mod.rs index 23f1a58a11..5fa19f5fb8 100644 --- a/src/arch/x86_64/kernel/mod.rs +++ b/src/arch/x86_64/kernel/mod.rs @@ -354,6 +354,10 @@ pub unsafe fn jump_to_user_land(entry_point: u64, code_size: u64) -> ! { use x86_64::structures::paging::{PageSize, Size4KiB as BasePageSize}; use crate::arch::x86_64::kernel::scheduler::TaskStacks; + use crate::executor::block_on; + + info!("Create new file descriptor table"); + block_on(core_scheduler().recreate_objmap(), None).unwrap(); let ds = 0x23u64; let cs = 0x2bu64; diff --git a/src/scheduler/mod.rs b/src/scheduler/mod.rs index a9b43a4eab..c9522baf7c 100644 --- a/src/scheduler/mod.rs +++ b/src/scheduler/mod.rs @@ -465,6 +465,8 @@ impl PerCoreScheduler { without_interrupts(|| self.current_task.borrow().object_map.clone()) } + /// Map a file descriptor to their IO interface and returns + /// the shared reference #[inline] pub async fn get_object( &self, @@ -473,7 +475,7 @@ impl PerCoreScheduler { future::poll_fn(|cx| { let x = without_interrupts(|| { let borrowed = self.current_task.borrow(); - let mut pinned_obj = core::pin::pin!(borrowed.object_map.write()); + let mut pinned_obj = core::pin::pin!(borrowed.object_map.read()); let x = if let Ready(guard) = pinned_obj.as_mut().poll(cx) { Ready(guard.get(&fd).cloned().ok_or(IoError::EINVAL)) @@ -489,7 +491,48 @@ impl PerCoreScheduler { .await } - #[inline] + /// Creates a new map between file descriptor and their IO interface and + /// clone the standard descriptors. + #[allow(dead_code)] + pub async fn recreate_objmap(&self) -> Result<(), IoError> { + let mut map = HashMap::, RandomState>::with_hasher( + RandomState::with_seeds(0, 0, 0, 0), + ); + + future::poll_fn(|cx| { + let x = without_interrupts(|| { + let borrowed = self.current_task.borrow(); + let mut pinned_obj = core::pin::pin!(borrowed.object_map.read()); + + let x = if let Ready(guard) = pinned_obj.as_mut().poll(cx) { + // clone standard file descriptors + for i in 0..3 { + if let Some(obj) = guard.get(&i) { + map.insert(i, obj.clone()); + } + } + + Ready(Ok(())) + } else { + Pending + }; + + x + }); + + x + }) + .await?; + + without_interrupts(|| { + self.current_task.borrow_mut().object_map = Arc::new(async_lock::RwLock::new(map)); + }); + + Ok(()) + } + + /// Insert a new IO interface and returns a file descriptor as + /// identifier to this object pub async fn insert_object( &self, obj: Arc, @@ -528,8 +571,8 @@ impl PerCoreScheduler { .await } + /// Replace an existing IO interface by a new one #[allow(dead_code)] - #[inline] pub async fn replace_object( &self, fd: FileDescriptor, @@ -555,6 +598,8 @@ impl PerCoreScheduler { .await } + /// Duplicate a IO interface and returns a new file descriptor as + /// identifier to the new copy pub async fn dup_object(&self, fd: FileDescriptor) -> Result { future::poll_fn(|cx| { let x = without_interrupts(|| { @@ -595,6 +640,7 @@ impl PerCoreScheduler { .await } + /// Remove a IO interface, which is named by the file descriptor pub async fn remove_object( &self, fd: FileDescriptor, diff --git a/src/scheduler/task.rs b/src/scheduler/task.rs index a777dba692..6b31a97956 100644 --- a/src/scheduler/task.rs +++ b/src/scheduler/task.rs @@ -390,7 +390,7 @@ pub(crate) struct Task { pub core_id: CoreId, /// Stack of the task pub stacks: TaskStacks, - /// Mapping between file descriptor and the referenced object + /// Mapping between file descriptor and the referenced IO interface pub object_map: Arc, RandomState>>>, /// Task Thread-Local-Storage (TLS) From 55c620ea4d38b553fdcf894b9cf133cec1b170e8 Mon Sep 17 00:00:00 2001 From: Stefan Lankes Date: Sat, 17 Feb 2024 23:36:26 +0100 Subject: [PATCH 10/10] use more valid file name for the socket interface --- src/fd/socket/tcp.rs | 8 +++++++- src/syscalls/mod.rs | 4 ++-- src/syscalls/{net.rs => socket.rs} | 3 --- 3 files changed, 9 insertions(+), 6 deletions(-) rename src/syscalls/{net.rs => socket.rs} (99%) diff --git a/src/fd/socket/tcp.rs b/src/fd/socket/tcp.rs index f01229dfc5..f4c33df3bc 100644 --- a/src/fd/socket/tcp.rs +++ b/src/fd/socket/tcp.rs @@ -13,9 +13,15 @@ use smoltcp::wire::{IpEndpoint, IpListenEndpoint}; use crate::executor::block_on; use crate::executor::network::{now, Handle, NetworkState, NIC}; use crate::fd::{IoCtl, IoError, ObjectInterface, PollEvent, SocketOption}; -use crate::syscalls::net::*; use crate::DEFAULT_KEEP_ALIVE_INTERVAL; +/// further receives will be disallowed +pub const SHUT_RD: i32 = 0; +/// further sends will be disallowed +pub const SHUT_WR: i32 = 1; +/// further sends and receives will be disallowed +pub const SHUT_RDWR: i32 = 2; + fn get_ephemeral_port() -> u16 { static LOCAL_ENDPOINT: AtomicU16 = AtomicU16::new(49152); diff --git a/src/syscalls/mod.rs b/src/syscalls/mod.rs index b8ca9e79ad..3118b342f4 100644 --- a/src/syscalls/mod.rs +++ b/src/syscalls/mod.rs @@ -34,12 +34,12 @@ mod futex; mod interfaces; #[cfg(feature = "newlib")] mod lwip; -#[cfg(all(any(feature = "tcp", feature = "udp"), not(feature = "newlib")))] -pub mod net; mod processor; #[cfg(feature = "newlib")] mod recmutex; mod semaphore; +#[cfg(all(any(feature = "tcp", feature = "udp"), not(feature = "newlib")))] +pub mod socket; mod spinlock; mod system; #[cfg(feature = "common-os")] diff --git a/src/syscalls/net.rs b/src/syscalls/socket.rs similarity index 99% rename from src/syscalls/net.rs rename to src/syscalls/socket.rs index 6692d12b17..4a20bc0133 100644 --- a/src/syscalls/net.rs +++ b/src/syscalls/socket.rs @@ -32,9 +32,6 @@ pub const IP_MULTICAST_TTL: i32 = 5; pub const IP_MULTICAST_LOOP: i32 = 7; pub const IP_ADD_MEMBERSHIP: i32 = 3; pub const IP_DROP_MEMBERSHIP: i32 = 4; -pub const SHUT_RD: i32 = 0; -pub const SHUT_WR: i32 = 1; -pub const SHUT_RDWR: i32 = 2; pub const SOL_SOCKET: i32 = 4095; pub const SO_BROADCAST: i32 = 32; pub const SO_ERROR: i32 = 4103;