From 571714d3a7ea6e75bb8359d74fd31ed9a7bb9809 Mon Sep 17 00:00:00 2001 From: Eugene Rodionov Date: Wed, 25 Dec 2024 18:38:20 +0000 Subject: [PATCH 1/6] Implement MMU support in LKL arch. In its current implementation MMU support is limited to 64-bit LKL configurations. However, nothing limits it to be extended for 32-bit configurations. It is largely inspired by MMU implementation in UML. Currently, we implement 3-level page tables: page global directory, page middle directory and page table entries (while p4ds and puds are folded into pgd level). This enables it to translate 39-bit virtual addresses to physical addresses in flat memory configuration with kernel virtual addresses identity-mapped to the corresponding physical addresses (with the exception of vmalloc-allocated virtual memory). To build LKL with MMU enabled run: ``` make -C tools/lkl MMU=1 clean-conf all ``` Signed-off-by: Eugene Rodionov --- arch/lkl/Kconfig | 31 +- arch/lkl/include/asm/Kbuild | 1 + arch/lkl/include/asm/elf.h | 7 + arch/lkl/include/asm/io.h | 8 + arch/lkl/include/asm/mmu_context.h | 12 + arch/lkl/include/asm/page-mmu.h | 68 ++++ arch/lkl/include/asm/page.h | 8 +- arch/lkl/include/asm/pgalloc.h | 38 ++ arch/lkl/include/asm/pgtable-mmu-3level.h | 435 ++++++++++++++++++++++ arch/lkl/include/asm/pgtable.h | 6 + arch/lkl/include/asm/processor.h | 9 + arch/lkl/include/asm/tlb.h | 8 + arch/lkl/include/asm/tlbflush.h | 17 + arch/lkl/include/uapi/asm/host_ops.h | 4 + arch/lkl/kernel/setup.c | 1 + arch/lkl/mm/Makefile | 2 + arch/lkl/mm/bootmem.c | 19 +- arch/lkl/mm/mmu_mem.c | 57 +++ tools/lkl/Makefile.autoconf | 15 + tools/lkl/lib/posix-host.c | 35 ++ 20 files changed, 773 insertions(+), 8 deletions(-) create mode 100644 arch/lkl/include/asm/page-mmu.h create mode 100644 arch/lkl/include/asm/pgalloc.h create mode 100644 arch/lkl/include/asm/pgtable-mmu-3level.h create mode 100644 arch/lkl/include/asm/tlbflush.h create mode 100644 arch/lkl/mm/mmu_mem.c diff --git a/arch/lkl/Kconfig b/arch/lkl/Kconfig index 8ee71d181df9fc..47733c24eaa6f0 100644 --- a/arch/lkl/Kconfig +++ b/arch/lkl/Kconfig @@ -1,6 +1,6 @@ config LKL def_bool y - depends on !SMP && !MMU && !COREDUMP && !SECCOMP && !UPROBES && !COMPAT && !USER_RETURN_NOTIFIER + depends on !SMP && !COREDUMP && !SECCOMP && !UPROBES && !COMPAT && !USER_RETURN_NOTIFIER select ARCH_THREAD_STACK_ALLOCATOR select RWSEM_GENERIC_SPINLOCK select GENERIC_ATOMIC64 @@ -77,6 +77,35 @@ config 64BIT bool "64bit kernel" default n +config MMU + bool "LKL MMU implementation" + depends on 64BIT + default n + +if MMU +config PGTABLE_LEVELS + int + default 3 + +config LKL_MEMORY_START + hex + default 0x50000000 + help + Base address of kernel virtual address space for LKL. No particular + reason for picking this specific value. It is assumed that the host + mmap is able to map requested amount of memory at this address. + STACK_TOP and VMALLOC_START values depend on it. + + +config LKL_TASK_UNMAPPED_BASE + hex + default 0x100000 + help + Starting virtual address for LKL user-space mmap. It is assumed that + the host mmap is able to map requested amount of memory starting from + this address. +endif + config COREDUMP def_bool n diff --git a/arch/lkl/include/asm/Kbuild b/arch/lkl/include/asm/Kbuild index cfd85497034e79..267b80c7300862 100644 --- a/arch/lkl/include/asm/Kbuild +++ b/arch/lkl/include/asm/Kbuild @@ -75,3 +75,4 @@ generic-y += vga.h generic-y += word-at-a-time.h generic-y += kprobes.h generic-y += uaccess.h +generic-y += mman.h diff --git a/arch/lkl/include/asm/elf.h b/arch/lkl/include/asm/elf.h index 8357ec89542e68..721dfb5e4e8976 100644 --- a/arch/lkl/include/asm/elf.h +++ b/arch/lkl/include/asm/elf.h @@ -9,6 +9,13 @@ #define ELF_CLASS ELFCLASS32 #endif +#ifdef CONFIG_MMU +#define ELF_EXEC_PAGESIZE 4096 +#define ELF_PLATFORM "i586" +#define ELF_HWCAP 0L +#define ELF_ET_DYN_BASE (TASK_SIZE) +#endif // CONFIG_MMU + #define elf_gregset_t long #define elf_fpregset_t double #endif diff --git a/arch/lkl/include/asm/io.h b/arch/lkl/include/asm/io.h index fd6f4afa961c14..4a2aba31d8f5e2 100644 --- a/arch/lkl/include/asm/io.h +++ b/arch/lkl/include/asm/io.h @@ -98,6 +98,14 @@ static inline void __iomem *ioremap(phys_addr_t offset, size_t size) return (void __iomem *)lkl_ops->ioremap(offset, size); } +#ifdef CONFIG_MMU +static inline void __iounmap(void __iomem *addr) +{ + // No-op according to ioremap implementation. +} +#define iounmap __iounmap +#endif + #include #endif /* _ASM_LKL_IO_H */ diff --git a/arch/lkl/include/asm/mmu_context.h b/arch/lkl/include/asm/mmu_context.h index 53ce94e7929150..64a2e9c46684f5 100644 --- a/arch/lkl/include/asm/mmu_context.h +++ b/arch/lkl/include/asm/mmu_context.h @@ -2,6 +2,18 @@ #ifndef _ASM_LKL_MMU_CONTEXT_H #define _ASM_LKL_MMU_CONTEXT_H +#ifdef CONFIG_MMU +static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, + struct task_struct *tsk) { + // No-op for LKL as it doesn't support multiple user-mode address spaces. +} + +/* Generic hooks for arch_dup_mmap and arch_exit_mmap */ +#include +#include + +#else #include +#endif #endif /* _ASM_LKL_MMU_CONTEXT_H */ diff --git a/arch/lkl/include/asm/page-mmu.h b/arch/lkl/include/asm/page-mmu.h new file mode 100644 index 00000000000000..a1a54ef82dd95e --- /dev/null +++ b/arch/lkl/include/asm/page-mmu.h @@ -0,0 +1,68 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_LKL_PAGE_MMU_H +#define _ASM_LKL_PAGE_MMU_H + +#include + +/* PAGE_SHIFT determines the page size */ +#define PAGE_SHIFT 12 +#define PAGE_SIZE (_AC(1, UL) << PAGE_SHIFT) +#define PAGE_MASK (~(PAGE_SIZE-1)) +#define PFN_PTE_SHIFT PAGE_SHIFT + +#ifndef __ASSEMBLY__ + +struct page; +#define clear_page(page) memset((void *)(page), 0, PAGE_SIZE) +#define copy_page(to, from) memcpy((void *)(to), (void *)(from), PAGE_SIZE) +#define clear_user_page(page, vaddr, pg) clear_page(page) +#define copy_user_page(to, from, vaddr, pg) copy_page(to, from) + +typedef struct {unsigned long pte; } pte_t; +typedef struct {unsigned long pmd; } pmd_t; +typedef struct {unsigned long pgd; } pgd_t; +typedef struct {unsigned long pgprot; } pgprot_t; +typedef struct page *pgtable_t; + +#define pte_val(p) ((p).pte) +#define pgd_val(x) ((x).pgd) +#define pgprot_val(x) ((x).pgprot) + +#define pmd_val(x) ((x).pmd) +#define __pmd(x) ((pmd_t) { (x) }) + +#define __pte(x) ((pte_t) { (x) }) +#define __pgd(x) ((pgd_t) { (x) }) +#define __pgprot(x) ((pgprot_t) { (x) }) + +#define pte_get_bits(p, bits) ((p).pte & (bits)) +#define pte_set_bits(p, bits) ((p).pte |= (bits)) +#define pte_clear_bits(p, bits) ((p).pte &= ~(bits)) +#define pte_copy(to, from) ({ (to).pte = (from).pte; }) +#define pte_is_zero(p) (!((p).pte & ~_PAGE_NEWPAGE)) +#define pte_set_val(p, phys, prot) ({ (p).pte = (phys) | pgprot_val(prot); }) + +extern unsigned long memory_start; +extern unsigned long memory_end; + +#define PAGE_OFFSET (memory_start) +// Define ARCH_PFN_OFFSET so that kernel virtual addresses are the same as +// the corresponding physical addresses (i.e. 1-to-1 mapping). +#define ARCH_PFN_OFFSET (memory_start >> PAGE_SHIFT) + +#define __pa(virt) ((unsigned long) (virt)) +#define __va(phys) ((void *)(phys)) + +#define virt_to_pfn(kaddr) (__pa(kaddr) >> PAGE_SHIFT) + +#define phys_to_pfn(p) PHYS_PFN(p) +#define pfn_to_phys(pfn) PFN_PHYS(pfn) + +#define virt_addr_valid(v) pfn_valid(phys_to_pfn(__pa(v))) + +#endif // __ASSEMBLY__ + +#include +#include + +#endif // _ASM_LKL_PAGE_MMU_H diff --git a/arch/lkl/include/asm/page.h b/arch/lkl/include/asm/page.h index 00fb33367d990f..1c5189d8679ea7 100644 --- a/arch/lkl/include/asm/page.h +++ b/arch/lkl/include/asm/page.h @@ -1,15 +1,19 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef _ASM_LKL_PAGE_H #define _ASM_LKL_PAGE_H +#ifndef CONFIG_MMU #define ARCH_PFN_OFFSET (memory_start >> PAGE_SHIFT) +#include +#else // CONFIG_MMU +#include +#endif // CONFIG_MMU #ifndef __ASSEMBLY__ void free_mem(void); void bootmem_init(unsigned long mem_size); #endif -#include - #undef PAGE_OFFSET #define PAGE_OFFSET memory_start diff --git a/arch/lkl/include/asm/pgalloc.h b/arch/lkl/include/asm/pgalloc.h new file mode 100644 index 00000000000000..b23cce097e32d3 --- /dev/null +++ b/arch/lkl/include/asm/pgalloc.h @@ -0,0 +1,38 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LKL_PGALLOC_H +#define _LKL_PGALLOC_H + +#include +#include + +#include + +#ifdef CONFIG_MMU + +static inline void pmd_populate_kernel(struct mm_struct *mm, + pmd_t *pmd, pte_t *pte) +{ + set_pmd(pmd, __pmd(_PAGE_TABLE + (unsigned long) __pa(pte))); +} + +static inline void pmd_populate(struct mm_struct *mm, + pmd_t *pmd, pgtable_t pte) +{ + set_pmd(pmd, __pmd(_PAGE_TABLE + (page_to_pfn(pte) << PAGE_SHIFT))); +} + +#define pmd_pgtable(pmd) pmd_page(pmd) + +extern pgd_t *pgd_alloc(struct mm_struct *mm); + +#define __pte_free_tlb(tlb, pte, address) tlb_remove_page((tlb), (pte)) + +#define __pmd_free_tlb(tlb, pmd, address) \ +do { \ + pagetable_pmd_dtor(virt_to_ptdesc(pmd)); \ + tlb_remove_page_ptdesc((tlb), virt_to_ptdesc(pmd)); \ +} while (0) + +#endif // CONFIG_MMU + +#endif /* _LKL_PGALLOC_H */ diff --git a/arch/lkl/include/asm/pgtable-mmu-3level.h b/arch/lkl/include/asm/pgtable-mmu-3level.h new file mode 100644 index 00000000000000..232b6cee13078c --- /dev/null +++ b/arch/lkl/include/asm/pgtable-mmu-3level.h @@ -0,0 +1,435 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Derived from arch/um/include/asm/pgtable.h + */ +#ifndef _LKL_PGTABLE_MMU_3_LEVEL_H +#define _LKL_PGTABLE_MMU_3_LEVEL_H + +#include +#include +#include + +#ifndef CONFIG_64BIT +#error Not supported bittness for LKL MMU. +#endif + +#define _PAGE_PRESENT 0x001 +#define _PAGE_NEWPAGE 0x002 +#define _PAGE_NEWPROT 0x004 +#define _PAGE_RW 0x020 +#define _PAGE_USER 0x040 +#define _PAGE_ACCESSED 0x080 +#define _PAGE_DIRTY 0x100 +/* If _PAGE_PRESENT is clear, we use these: */ +#define _PAGE_PROTNONE 0x010 /* if the user mapped it with PROT_NONE; pte_present gives true */ +#define _PAGE_SWP_EXCLUSIVE 0x400 + +/* + * although we don't distinguish between user space and kernel space + * reserver half of PGD for user space + */ +#define USER_PTRS_PER_PGD 256 +#define FIRST_USER_ADDRESS 0UL + + +#define PGDIR_SHIFT 30 +#define PGDIR_SIZE (1UL << PGDIR_SHIFT) +#define PGDIR_MASK (~(PGDIR_SIZE-1)) + +#define PMD_SHIFT 21 +#define PMD_SIZE (1UL << PMD_SHIFT) +#define PMD_MASK (~(PMD_SIZE-1)) + +/* + * entries per page directory level + */ +#define PTRS_PER_PTE 512 +#define PTRS_PER_PMD 512 +#define PTRS_PER_PGD 512 + +#define pte_ERROR(e) \ + pr_err("%s:%d: bad pte %p(%016lx).\n", __FILE__, __LINE__, &(e), \ + pte_val(e)) +#define pmd_ERROR(e) \ + pr_err("%s:%d: bad pmd %p(%016lx).\n", __FILE__, __LINE__, &(e), \ + pmd_val(e)) +#define pgd_ERROR(e) \ + pr_err("%s:%d: bad pgd %p(%016lx).\n", __FILE__, __LINE__, &(e), \ + pgd_val(e)) + +#define pud_none(x) (!(pud_val(x) & ~_PAGE_NEWPAGE)) +#define pud_bad(x) ((pud_val(x) & (~PAGE_MASK & ~_PAGE_USER)) != _KERNPG_TABLE) +#define pud_present(x) (pud_val(x) & _PAGE_PRESENT) +#define pud_populate(mm, pud, pmd) \ + set_pud(pud, __pud(_PAGE_TABLE + __pa(pmd))) + +#define set_pud(pudptr, pudval) (*(pudptr) = (pudval)) + +static inline int pgd_newpage(pgd_t pgd) +{ + return(pgd_val(pgd) & _PAGE_NEWPAGE); +} + +static inline void pgd_mkuptodate(pgd_t pgd) { pgd_val(pgd) &= ~_PAGE_NEWPAGE; } + +#define set_pmd(pmdptr, pmdval) (*(pmdptr) = (pmdval)) + +static inline void pud_clear(pud_t *pud) +{ + set_pud(pud, __pud(_PAGE_NEWPAGE)); +} + +#define pud_page(pud) phys_to_page(pud_val(pud) & PAGE_MASK) +#define pud_pgtable(pud) ((pmd_t *) __va(pud_val(pud) & PAGE_MASK)) + +static inline unsigned long pte_pfn(pte_t pte) +{ + return phys_to_pfn(pte_val(pte)); +} + +typedef unsigned long phys_t; + +static inline pte_t pfn_pte(unsigned long page_nr, pgprot_t pgprot) +{ + pte_t pte; + phys_t phys = pfn_to_phys(page_nr); + + pte_set_val(pte, phys, pgprot); + return pte; +} + +static inline pmd_t pfn_pmd(unsigned long page_nr, pgprot_t pgprot) +{ + return __pmd((page_nr << PAGE_SHIFT) | pgprot_val(pgprot)); +} + +#define set_pmd(pmdptr, pmdval) (*(pmdptr) = (pmdval)) + +#define pte_pfn(x) phys_to_pfn(pte_val(x)) +#define pfn_pte(pfn, prot) __pte(pfn_to_phys(pfn) | pgprot_val(prot)) +#define pfn_pmd(pfn, prot) __pmd(pfn_to_phys(pfn) | pgprot_val(prot)) + +#define pmd_pfn(pmd) (pmd_val(pmd) >> PAGE_SHIFT) + +extern pgd_t swapper_pg_dir[PTRS_PER_PGD]; + +/* zero page used for uninitialized stuff */ +extern void *empty_zero_page; + +/* Just any arbitrary offset to the start of the vmalloc VM area: the + * current 8MB value just means that there will be a 8MB "hole" after the + * physical memory until the kernel virtual memory starts. That means that + * any out-of-bounds memory accesses will hopefully be caught. + * The vmalloc() routines leaves a hole of 4kB between each vmalloced + * area for the same reason. ;) + */ + +extern unsigned long memory_end; + +#define __va_space (8*1024*1024) + +#define VMALLOC_OFFSET (__va_space) +#define VMALLOC_START ((memory_end + VMALLOC_OFFSET) & ~(VMALLOC_OFFSET-1)) +// TODO: +#define PKMAP_BASE (((TASK_SIZE - 2 * PAGE_SIZE) - LAST_PKMAP * PAGE_SIZE) & PMD_MASK) +#define VMALLOC_END ((TASK_SIZE - 4 * PAGE_SIZE)) +#define MODULES_VADDR VMALLOC_START +#define MODULES_END VMALLOC_END +#define MODULES_LEN (MODULES_VADDR - MODULES_END) + +#define _PAGE_TABLE (_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | _PAGE_ACCESSED | _PAGE_DIRTY) +#define _KERNPG_TABLE (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY) +#define _PAGE_CHG_MASK (PAGE_MASK | _PAGE_ACCESSED | _PAGE_DIRTY) +#define __PAGE_KERNEL_EXEC \ + (_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_ACCESSED) +#define PAGE_NONE __pgprot(_PAGE_PROTNONE | _PAGE_ACCESSED) +#define PAGE_SHARED __pgprot(_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | _PAGE_ACCESSED) +#define PAGE_COPY __pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_ACCESSED) +#define PAGE_READONLY __pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_ACCESSED) +#define PAGE_KERNEL __pgprot(_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_ACCESSED) +#define PAGE_KERNEL_EXEC __pgprot(__PAGE_KERNEL_EXEC) + +/* + * ZERO_PAGE is a global shared page that is always zero: used + * for zero-mapped memory areas etc.. + */ +#define ZERO_PAGE(vaddr) virt_to_page(empty_zero_page) + + +#define pmd_none(x) (!((unsigned long)pmd_val(x) & ~_PAGE_NEWPAGE)) +#define pmd_bad(x) ((pmd_val(x) & (~PAGE_MASK & ~_PAGE_USER)) != _KERNPG_TABLE) + +#define pmd_present(x) (pmd_val(x) & _PAGE_PRESENT) +#define pmd_clear(xp) do { pmd_val(*(xp)) = _PAGE_NEWPAGE; } while (0) + +#define pmd_newpage(x) (pmd_val(x) & _PAGE_NEWPAGE) +#define pmd_mkuptodate(x) (pmd_val(x) &= ~_PAGE_NEWPAGE) + +#define pud_newpage(x) (pud_val(x) & _PAGE_NEWPAGE) +#define pud_mkuptodate(x) (pud_val(x) &= ~_PAGE_NEWPAGE) + +#define p4d_newpage(x) (p4d_val(x) & _PAGE_NEWPAGE) +#define p4d_mkuptodate(x) (p4d_val(x) &= ~_PAGE_NEWPAGE) + +#define pmd_page(pmd) phys_to_page(pmd_val(pmd) & PAGE_MASK) + +#define pte_page(x) pfn_to_page(pte_pfn(x)) + +#define pte_present(x) pte_get_bits(x, (_PAGE_PRESENT | _PAGE_PROTNONE)) + +void mmap_pages_for_ptes(unsigned long va, unsigned int nr, pte_t pte); +void munmap_page_for_pte(unsigned long addr, pte_t *xp); + +static inline void pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *xp) +{ + if (pte_present(*xp)) + munmap_page_for_pte(addr, xp); + + pte_set_val(*(xp), 0, __pgprot(_PAGE_NEWPAGE)); +} + +/* + * ================================= + * Flags checking section. + * ================================= + */ + +static inline int pte_none(pte_t pte) +{ + return pte_is_zero(pte); +} + +/* + * The following only work if pte_present() is true. + * Undefined behaviour if not.. + */ +static inline int pte_read(pte_t pte) +{ + return ((pte_get_bits(pte, _PAGE_USER)) && + !(pte_get_bits(pte, _PAGE_PROTNONE))); +} + +static inline int pte_exec(pte_t pte) +{ + return ((pte_get_bits(pte, _PAGE_USER)) && + !(pte_get_bits(pte, _PAGE_PROTNONE))); +} + +static inline int pte_write(pte_t pte) +{ + return ((pte_get_bits(pte, _PAGE_RW)) && + !(pte_get_bits(pte, _PAGE_PROTNONE))); +} + +static inline int pte_dirty(pte_t pte) +{ + return pte_get_bits(pte, _PAGE_DIRTY); +} + +static inline int pte_young(pte_t pte) +{ + return pte_get_bits(pte, _PAGE_ACCESSED); +} + +static inline int pte_newpage(pte_t pte) +{ + return pte_get_bits(pte, _PAGE_NEWPAGE); +} + +static inline int pte_newprot(pte_t pte) +{ + return(pte_present(pte) && (pte_get_bits(pte, _PAGE_NEWPROT))); +} + +/* + * ================================= + * Flags setting section. + * ================================= + */ + +static inline pte_t pte_mknewprot(pte_t pte) +{ + pte_set_bits(pte, _PAGE_NEWPROT); + return pte; +} + +static inline pte_t pte_mkclean(pte_t pte) +{ + pte_clear_bits(pte, _PAGE_DIRTY); + return pte; +} + +static inline pte_t pte_mkold(pte_t pte) +{ + pte_clear_bits(pte, _PAGE_ACCESSED); + return pte; +} + +static inline pte_t pte_wrprotect(pte_t pte) +{ + if (likely(pte_get_bits(pte, _PAGE_RW))) + pte_clear_bits(pte, _PAGE_RW); + else + return pte; + return pte_mknewprot(pte); +} + +static inline pte_t pte_mkread(pte_t pte) +{ + if (unlikely(pte_get_bits(pte, _PAGE_USER))) + return pte; + pte_set_bits(pte, _PAGE_USER); + return pte_mknewprot(pte); +} + +static inline pte_t pte_mkdirty(pte_t pte) +{ + pte_set_bits(pte, _PAGE_DIRTY); + return pte; +} + +static inline pte_t pte_mkyoung(pte_t pte) +{ + pte_set_bits(pte, _PAGE_ACCESSED); + return pte; +} + +static inline pte_t pte_mkwrited(pte_t pte, struct vm_area_struct *vma) +{ + if (unlikely(pte_get_bits(pte, _PAGE_RW))) + return pte; + pte_set_bits(pte, _PAGE_RW); + return pte_mknewprot(pte); +} + +static inline pte_t pte_mkwrite_novma(pte_t pte) +{ + if (unlikely(pte_get_bits(pte, _PAGE_RW))) + return pte; + pte_set_bits(pte, _PAGE_RW); + return pte_mknewprot(pte); +} + +static inline int pte_swp_exclusive(pte_t pte) +{ + return pte_get_bits(pte, _PAGE_SWP_EXCLUSIVE); +} + +static inline pte_t pte_swp_mkexclusive(pte_t pte) +{ + pte_set_bits(pte, _PAGE_SWP_EXCLUSIVE); + return pte; +} + +static inline pte_t pte_swp_clear_exclusive(pte_t pte) +{ + pte_clear_bits(pte, _PAGE_SWP_EXCLUSIVE); + return pte; +} + +static inline void update_mmu_cache_range(struct vm_fault *vmf, + struct vm_area_struct *vma, unsigned long address, + pte_t *ptep, unsigned int nr) +{ +} + +static inline pte_t pte_mkuptodate(pte_t pte) +{ + pte_clear_bits(pte, _PAGE_NEWPAGE); + if (pte_present(pte)) + pte_clear_bits(pte, _PAGE_NEWPROT); + return pte; +} + +static inline pte_t pte_mknewpage(pte_t pte) +{ + pte_set_bits(pte, _PAGE_NEWPAGE); + return pte; +} + +static inline void __set_pte(pte_t *pteptr, pte_t pteval) +{ + pte_copy(*pteptr, pteval); + + /* If it's a swap entry, it needs to be marked _PAGE_NEWPAGE so + * fix_range knows to unmap it. _PAGE_NEWPROT is specific to + * mapped pages. + */ + + *pteptr = pte_mknewpage(*pteptr); + if (pte_present(*pteptr)) + *pteptr = pte_mknewprot(*pteptr); +} + +static inline pte_t __pte_next_pfn(pte_t pte) +{ + return __pte(pte_val(pte) + (1UL << PFN_PTE_SHIFT)); +} + +static inline void __set_ptes(struct mm_struct *mm, unsigned long addr, + pte_t *ptep, pte_t pte, unsigned int nr) +{ + if (pte_present(pte)) + mmap_pages_for_ptes(addr, nr, pte); + + for (;;) { + __set_pte(ptep, pte); + if (--nr == 0) + break; + ptep++; + pte = __pte_next_pfn(pte); + } +} +#define set_ptes(mm, addr, ptep, pte, nr) __set_ptes(mm, addr, ptep, pte, nr) + +#define __HAVE_ARCH_PTE_SAME +static inline int pte_same(pte_t pte_a, pte_t pte_b) +{ + return !((pte_val(pte_a) ^ pte_val(pte_b)) & ~_PAGE_NEWPAGE); +} + +/* + * Conversion functions: convert a page and protection to a page entry, + * and a page entry and page directory to the page they refer to. + */ + +#define phys_to_page(phys) pfn_to_page(phys_to_pfn(phys)) +#define __virt_to_page(virt) phys_to_page(__pa(virt)) +#define page_to_phys(page) pfn_to_phys(page_to_pfn(page)) +#define virt_to_page(addr) __virt_to_page((const unsigned long) addr) + +#define mk_pte(page, pgprot) \ + ({ pte_t pte; \ + \ + pte_set_val(pte, page_to_phys(page), (pgprot)); \ + if (pte_present(pte)) \ + pte_mknewprot(pte_mknewpage(pte)); \ + pte; }) + +static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) +{ + pte_set_val(pte, (pte_val(pte) & _PAGE_CHG_MASK), newprot); + return pte; +} + +/* + * the pmd page can be thought of an array like this: pmd_t[PTRS_PER_PMD] + * + * this macro returns the index of the entry in the pmd page which would + * control the given virtual address + */ +#define pmd_page_vaddr(pmd) ((unsigned long) __va(pmd_val(pmd) & PAGE_MASK)) + +#define update_mmu_cache(vma, address, ptep) do { } while (0) + +/* Encode and de-code a swap entry */ +#define __swp_type(x) (((x).val >> 5) & 0x1f) +#define __swp_offset(x) ((x).val >> 11) + +#define __swp_entry(type, offset) \ + ((swp_entry_t) { ((type) << 5) | ((offset) << 11) }) +#define __pte_to_swp_entry(pte) \ + ((swp_entry_t) { pte_val(pte_mkuptodate(pte)) }) +#define __swp_entry_to_pte(x) ((pte_t) { (x).val }) + +#endif // _LKL_PGTABLE_MMU_3_LEVEL_H diff --git a/arch/lkl/include/asm/pgtable.h b/arch/lkl/include/asm/pgtable.h index 81cf20b64af2ad..18d14c1f04630e 100644 --- a/arch/lkl/include/asm/pgtable.h +++ b/arch/lkl/include/asm/pgtable.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef _LKL_PGTABLE_H #define _LKL_PGTABLE_H @@ -5,6 +6,7 @@ * (C) Copyright 2000-2002, Greg Ungerer */ +#ifndef CONFIG_MMU #include #include #include @@ -52,4 +54,8 @@ extern void *empty_zero_page; #define PTRS_PER_PTE 0 #define PTRS_PER_PMD 0 +#else +#include +#endif /* CONFIG_MMU */ + #endif diff --git a/arch/lkl/include/asm/processor.h b/arch/lkl/include/asm/processor.h index 2761f1fe54e842..d3a79f41231941 100644 --- a/arch/lkl/include/asm/processor.h +++ b/arch/lkl/include/asm/processor.h @@ -42,7 +42,16 @@ struct thread_struct { }; /* We don't have strict user/kernel spaces */ #define TASK_SIZE ((unsigned long)-1) +#ifndef CONFIG_MMU #define TASK_UNMAPPED_BASE 0 +#else +#define TASK_UNMAPPED_BASE CONFIG_LKL_TASK_UNMAPPED_BASE +#define STACK_TOP (CONFIG_LKL_MEMORY_START - 16 * PAGE_SIZE) +#define STACK_TOP_MAX STACK_TOP + +static inline void start_thread(struct pt_regs *regs, unsigned long entry, + unsigned long stack) {} +#endif // CONFIG_MMU #define KSTK_EIP(tsk) (0) #define KSTK_ESP(tsk) (0) diff --git a/arch/lkl/include/asm/tlb.h b/arch/lkl/include/asm/tlb.h index d474890d317d61..6133d8f7e4ea8c 100644 --- a/arch/lkl/include/asm/tlb.h +++ b/arch/lkl/include/asm/tlb.h @@ -2,6 +2,7 @@ #ifndef _ASM_LKL_TLB_H #define _ASM_LKL_TLB_H +#ifndef CONFIG_MMU #define tlb_start_vma(tlb, vma) do { } while (0) #define tlb_end_vma(tlb, vma) do { } while (0) #define __tlb_remove_tlb_entry(tlb, pte, address) do { } while (0) @@ -9,4 +10,11 @@ #include +#else +#include +#include +#include +#include +#endif // CONFIG_MMU + #endif /* _ASM_LKL_TLB_H */ diff --git a/arch/lkl/include/asm/tlbflush.h b/arch/lkl/include/asm/tlbflush.h new file mode 100644 index 00000000000000..494a0bee4c075b --- /dev/null +++ b/arch/lkl/include/asm/tlbflush.h @@ -0,0 +1,17 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#ifndef __UM_TLBFLUSH_H +#define __UM_TLBFLUSH_H + +#include + +// No-op implementation of TLB flushing for LKL arch. +static inline void flush_tlb_mm(struct mm_struct *mm) {} +static inline void flush_tlb_range(struct vm_area_struct *vma, + unsigned long start, unsigned long end) {} +static inline void flush_tlb_page(struct vm_area_struct *vma, + unsigned long address) {} +static inline void flush_tlb_kernel_range(unsigned long start, + unsigned long end) {} + +#endif diff --git a/arch/lkl/include/uapi/asm/host_ops.h b/arch/lkl/include/uapi/asm/host_ops.h index 5d2735322cec6c..1ce742caeaa85b 100644 --- a/arch/lkl/include/uapi/asm/host_ops.h +++ b/arch/lkl/include/uapi/asm/host_ops.h @@ -188,6 +188,10 @@ struct lkl_host_operations { void* (*mmap)(void *addr, unsigned long size, enum lkl_prot prot); int (*munmap)(void *addr, unsigned long size); + void (*shmem_init)(unsigned long size); + void *(*shmem_mmap)(void *addr, unsigned long pg_off, unsigned long size, + enum lkl_prot prot); + struct lkl_dev_pci_ops *pci_ops; }; diff --git a/arch/lkl/kernel/setup.c b/arch/lkl/kernel/setup.c index 50da71decae1c3..143ec27962bc09 100644 --- a/arch/lkl/kernel/setup.c +++ b/arch/lkl/kernel/setup.c @@ -10,6 +10,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/lkl/mm/Makefile b/arch/lkl/mm/Makefile index dc9caf3075b8c0..a8c4cba1a88885 100644 --- a/arch/lkl/mm/Makefile +++ b/arch/lkl/mm/Makefile @@ -3,3 +3,5 @@ KASAN_SANITIZE_kasan.o := n obj-y = bootmem.o obj-$(CONFIG_KASAN) += kasan.o + +obj-$(CONFIG_MMU) += mmu_mem.o \ No newline at end of file diff --git a/arch/lkl/mm/bootmem.c b/arch/lkl/mm/bootmem.c index bef66ec7d65b35..d912d7accfee92 100644 --- a/arch/lkl/mm/bootmem.c +++ b/arch/lkl/mm/bootmem.c @@ -2,6 +2,7 @@ #include #include #include +#include unsigned long memory_start, memory_end; static void *_memory_start; @@ -15,13 +16,21 @@ void __init bootmem_init(unsigned long mem_sz) mem_size = mem_sz; +#ifdef CONFIG_MMU + mem_size = PAGE_ALIGN(mem_size); + lkl_ops->shmem_init(mem_size); + void *lkl_va_base = (void *)CONFIG_LKL_MEMORY_START; + enum lkl_prot shmem_prot = LKL_PROT_READ | LKL_PROT_WRITE; + + _memory_start = lkl_ops->shmem_mmap(lkl_va_base, 0, mem_size, shmem_prot); +#else if (lkl_ops->page_alloc) { mem_size = PAGE_ALIGN(mem_size); _memory_start = lkl_ops->page_alloc(mem_size); } else { _memory_start = lkl_ops->mem_alloc(mem_size); } - +#endif memory_start = (unsigned long)_memory_start; BUG_ON(!memory_start); memory_end = memory_start + mem_size; @@ -39,7 +48,7 @@ void __init bootmem_init(unsigned long mem_sz) */ max_low_pfn = virt_to_pfn((void *)memory_end); min_low_pfn = virt_to_pfn((void *)memory_start); - memblock_add(memory_start, mem_size); + memblock_add(__pa(memory_start), mem_size); empty_zero_page = memblock_alloc(PAGE_SIZE, PAGE_SIZE); memset(empty_zero_page, 0, PAGE_SIZE); @@ -51,9 +60,9 @@ void __init bootmem_init(unsigned long mem_sz) void __init mem_init(void) { memblock_free_all(); - max_low_pfn = totalram_pages(); - max_pfn = max_low_pfn; - max_mapnr = max_pfn; + max_mapnr = totalram_pages(); + max_low_pfn = max_mapnr + ARCH_PFN_OFFSET; + max_pfn = max_mapnr + ARCH_PFN_OFFSET; } /* diff --git a/arch/lkl/mm/mmu_mem.c b/arch/lkl/mm/mmu_mem.c new file mode 100644 index 00000000000000..a113586692a8bc --- /dev/null +++ b/arch/lkl/mm/mmu_mem.c @@ -0,0 +1,57 @@ +// SPDX-License-Identifier: GPL-2.0 +#include +#include +#include +#include + +pgd_t swapper_pg_dir[PTRS_PER_PGD]; + +static const pgprot_t protection_map[16] = { + [VM_NONE] = PAGE_NONE, + [VM_READ] = PAGE_READONLY, + [VM_WRITE] = PAGE_COPY, + [VM_WRITE | VM_READ] = PAGE_COPY, + [VM_EXEC] = PAGE_READONLY, + [VM_EXEC | VM_READ] = PAGE_READONLY, + [VM_EXEC | VM_WRITE] = PAGE_COPY, + [VM_EXEC | VM_WRITE | VM_READ] = PAGE_COPY, + [VM_SHARED] = PAGE_NONE, + [VM_SHARED | VM_READ] = PAGE_READONLY, + [VM_SHARED | VM_WRITE] = PAGE_SHARED, + [VM_SHARED | VM_WRITE | VM_READ] = PAGE_SHARED, + [VM_SHARED | VM_EXEC] = PAGE_READONLY, + [VM_SHARED | VM_EXEC | VM_READ] = PAGE_READONLY, + [VM_SHARED | VM_EXEC | VM_WRITE] = PAGE_SHARED, + [VM_SHARED | VM_EXEC | VM_WRITE | VM_READ] = PAGE_SHARED +}; +DECLARE_VM_GET_PAGE_PROT + + +pgd_t *pgd_alloc(struct mm_struct *mm) +{ + pgd_t *pgd = (pgd_t *)__get_free_page(GFP_KERNEL); + + // There is no user-space & kernel-space virtual memory boundary for LKL. + if (pgd) + memcpy(pgd, swapper_pg_dir, sizeof(swapper_pg_dir)); + + return pgd; +} + +void mmap_pages_for_ptes(unsigned long va, unsigned int nr, pte_t pte) +{ + // TODO: At the moment we mmap memory as RWX. However, we should mmap pages + // with proper access flags (read-only, read-write, etc) + enum lkl_prot prot = LKL_PROT_READ | LKL_PROT_WRITE | LKL_PROT_EXEC; + unsigned long pa = pte.pte & PAGE_MASK; + unsigned long pg_off = pa - (ARCH_PFN_OFFSET << PAGE_SHIFT); + + void *res = lkl_ops->shmem_mmap((void *)va, pg_off, PAGE_SIZE * nr, prot); + + BUG_ON(res != (void *)va); +} + +void munmap_page_for_pte(unsigned long addr, pte_t *xp) +{ + BUG_ON(lkl_ops->munmap((void *)addr, PAGE_SIZE) != 0); +} diff --git a/tools/lkl/Makefile.autoconf b/tools/lkl/Makefile.autoconf index 48e58540779b89..3c908f6031f3a4 100644 --- a/tools/lkl/Makefile.autoconf +++ b/tools/lkl/Makefile.autoconf @@ -187,6 +187,20 @@ define do_autoconf_fuzzing $(if $(LKL_LINE_COV),$(call set_kernel_config,LKL_LINE_COV,y)) endef +define do_autoconf_mmu + $(call set_autoconf_var,MMU,y) + $(call set_kernel_config,MMU,y) + # Explicitly set LKL MMU configuration parameters for Kconfig + $(call set_autoconf_var,LKL_MEMORY_START,0x50000000) + $(call set_kernel_config,LKL_MEMORY_START,0x50000000) + $(call set_autoconf_var,LKL_TASK_UNMAPPED_BASE,0x100000) + $(call set_kernel_config,LKL_TASK_UNMAPPED_BASE,0x100000) + # Don't need swap in LKL. CONFIG_SHMEM generates a compile-time assertion + # when build with gcc, however, works fine with clang. The issues is in macro + # HPAGE_PMD_SIZE which should be optimized out but is not in case of gcc. + $(call set_kernel_config,SHMEM,n) +endef + define do_autoconf $(if $(LKL_FUZZING),$(call do_autoconf_fuzzing)) $(if $(LLVM),$(call do_autoconf_llvm),$(call do_autoconf_gnu)) @@ -197,6 +211,7 @@ define do_autoconf $(if $(filter $(EXEC_FMT),$(POSIX_HOSTS)),$(call posix_host,$(LD_FMT))) $(if $(filter $(EXEC_FMT),$(NT_HOSTS)),$(call nt_host,$(LD_FMT))) $(if $(and $(filter yes,$(kasan)),$(filter $(LD_FMT),$(KASAN_HOSTS))),$(call kasan_enable,$(LD_FMT))) + $(if $(MMU),$(call do_autoconf_mmu)) endef export do_autoconf diff --git a/tools/lkl/lib/posix-host.c b/tools/lkl/lib/posix-host.c index a667de02efa123..5b45924d470c65 100644 --- a/tools/lkl/lib/posix-host.c +++ b/tools/lkl/lib/posix-host.c @@ -1,11 +1,13 @@ // SPDX-License-Identifier: GPL-2.0-only #define _GNU_SOURCE #include +#include #include #include #include #include #include +#include #include #include #include @@ -490,6 +492,35 @@ static int lkl_munmap(void *addr, unsigned long size) extern struct lkl_dev_pci_ops vfio_pci_ops; #endif +#ifdef LKL_HOST_CONFIG_MMU +static int shared_mem_fd = -1; +static unsigned long shared_mem_size; + +static void shmem_init(unsigned long size) +{ + int rwx = 00700; + char lkl_shmem_id[NAME_MAX]; + + snprintf(lkl_shmem_id, sizeof(lkl_shmem_id), "/lkl_phys_mem_%d", getpid()); + shared_mem_fd = shm_open(lkl_shmem_id, O_RDWR | O_CREAT, rwx); + assert(shared_mem_fd >= 0); + shared_mem_size = size; + assert(ftruncate(shared_mem_fd, shared_mem_size) == 0); +} + +static void *lkl_shmem_mmap(void *addr, unsigned long pg_off, + unsigned long size, enum lkl_prot mem_prot_flags) +{ + int prot = get_prot(mem_prot_flags); + int flags = MAP_SHARED | MAP_FIXED_NOREPLACE; + void *ret = mmap(addr, size, prot, flags, shared_mem_fd, pg_off); + + if (ret == MAP_FAILED) + return NULL; + return ret; +} +#endif // LKL_HOST_CONFIG_MMU + struct lkl_host_operations lkl_host_ops = { .panic = panic, .thread_create = thread_create, @@ -530,6 +561,10 @@ struct lkl_host_operations lkl_host_ops = { .memset = memset, .mmap = lkl_mmap, .munmap = lkl_munmap, +#ifdef LKL_HOST_CONFIG_MMU + .shmem_init = shmem_init, + .shmem_mmap = lkl_shmem_mmap, +#endif #ifdef LKL_HOST_CONFIG_VFIO_PCI .pci_ops = &vfio_pci_ops, #endif From c54ac0adc3e40eae255f7b6a62bead276921e769 Mon Sep 17 00:00:00 2001 From: Eugene Rodionov Date: Wed, 25 Dec 2024 18:49:06 +0000 Subject: [PATCH 2/6] Add mrproper to clean-conf LKL make target. Running `make -C tools/lkl clean-conf` doesn't remove generated kernel configuration (.config and .config.old) as well as generated kernel header files. As a result, subsequent `make -C tools/lkl all` might use .config from the previous build even if arch/lkl/Kconfig is changed. Adding mrproper target to clean-conf fixes this issue. Signed-off-by: Eugene Rodionov --- tools/lkl/Makefile | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/tools/lkl/Makefile b/tools/lkl/Makefile index 777f97e8aefabe..a31f05bde3a74b 100644 --- a/tools/lkl/Makefile +++ b/tools/lkl/Makefile @@ -115,7 +115,10 @@ clean: $(call QUIET_CLEAN, liblkl.a)$(RM) $(OUTPUT)/liblkl.a $(call QUIET_CLEAN, targets)$(RM) $(TARGETS) bin/stat -clean-conf: clean +mrproper: clean + $(call QUIET_CLEAN, vmlinux)$(MAKE) -C ../.. ARCH=lkl $(KOPT) mrproper + +clean-conf: mrproper $(call QUIET_CLEAN, Makefile.conf)$(RM) $(OUTPUT)/Makefile.conf $(call QUIET_CLEAN, kernel_config.h)$(RM) $(OUTPUT)/include/kernel_config.h $(call QUIET_CLEAN, kernel.config)$(RM) $(OUTPUT)/kernel.config @@ -150,7 +153,7 @@ FUZZ_TARGETS := $(fuzzers-y:%=$(OUTPUT)%$(EXESUF)) fuzzers: $(FUZZ_TARGETS) FORCE: ; -.PHONY: all clean FORCE run-tests +.PHONY: all clean clean-conf mrproper FORCE run-tests .PHONY: headers_install libraries_install programs_install install .NOTPARALLEL : lib/lkl.o .SECONDARY: From 3b8a3911df568ab46f236ae233181951b7644a63 Mon Sep 17 00:00:00 2001 From: Eugene Rodionov Date: Wed, 25 Dec 2024 19:08:55 +0000 Subject: [PATCH 3/6] Add MMU tests to LKL boot test suite. The two test cases verify that `mmap` works both for `MAP_SHARED` and `MAP_PRIVATE` mappings. It is important to use `MAP_POPULATE` flag when calling `lkl_sys_mmap` syscall. This flag populates the mapping with the pages so that no page fault happens when the mapping is accessed. ``` make -C tools/lkl MMU=1 clean-conf all tools/lkl/test/boot ``` Signed-off-by: Eugene Rodionov --- arch/lkl/scripts/headers_install.py | 1 + tools/lkl/tests/boot.c | 79 +++++++++++++++++++++++++++++ 2 files changed, 80 insertions(+) diff --git a/arch/lkl/scripts/headers_install.py b/arch/lkl/scripts/headers_install.py index e676566f52aa85..258a4f5006dce9 100755 --- a/arch/lkl/scripts/headers_install.py +++ b/arch/lkl/scripts/headers_install.py @@ -133,6 +133,7 @@ def replace(h): find_headers("arch/lkl/include/uapi/asm/syscalls.h") headers.add("arch/lkl/include/uapi/asm/host_ops.h") find_headers("include/uapi/linux/uhid.h") +find_headers("include/uapi/linux/mman.h") find_headers("include/uapi/linux/input-event-codes.h") if 'LKL_INSTALL_ADDITIONAL_HEADERS' in os.environ: diff --git a/tools/lkl/tests/boot.c b/tools/lkl/tests/boot.c index 118903dc68bdfc..f4d739bb2e9528 100644 --- a/tools/lkl/tests/boot.c +++ b/tools/lkl/tests/boot.c @@ -19,6 +19,8 @@ #include #endif +#include + #include "test.h" #ifndef __MINGW32__ @@ -528,6 +530,79 @@ static int lkl_test_kasan(void) #define KASAN_CMD_LINE #endif +#ifdef LKL_HOST_CONFIG_MMU + +// TODO: for some reason doesn't contain definition of +// LKL_MAP_FAILED macro. Thus, temporarily define it here. +#ifndef LKL_MAP_FAILED +#define LKL_MAP_FAILED ((void *)-1) +#endif + +static int lkl_test_shared_mmap(void) +{ + int fd = lkl_sys_open("/tmp_file_", LKL_O_RDWR | LKL_O_CREAT, LKL_S_IRWXU); + + if (fd < 0) + return TEST_FAILURE; + + unsigned long mem_size = 15 * 4096; + + if (lkl_sys_ftruncate(fd, mem_size) < 0) + return TEST_FAILURE; + + int mem_prot = LKL_PROT_WRITE | LKL_PROT_READ; + // Super important to use LKL_MAP_POPULATE to force populating pages in the + // mapping as in the LKL context we don't have a way to fault in the pages. + int mem_flags = LKL_MAP_SHARED | LKL_MAP_POPULATE; + void *mem1 = lkl_sys_mmap(0, mem_size, mem_prot, mem_flags, fd, 0); + + if (mem1 == LKL_MAP_FAILED) + return TEST_FAILURE; + + memset(mem1, 0x17, mem_size); + + void *mem2 = lkl_sys_mmap(0, mem_size, mem_prot, mem_flags, fd, 0); + + if (mem2 == LKL_MAP_FAILED) + return TEST_FAILURE; + + // This should not happen + if (mem1 == mem2) + return TEST_FAILURE; + + if (memcmp(mem1, mem2, mem_size) != 0) + return TEST_FAILURE; + + if (lkl_sys_munmap((unsigned long)mem1, mem_size) != 0) + return TEST_FAILURE; + + if (lkl_sys_munmap((unsigned long)mem2, mem_size) != 0) + return TEST_FAILURE; + + return TEST_SUCCESS; +} + +static int lkl_test_private_mmap(void) +{ + unsigned long mem_size = 3 * 4096; + int mem_prot = LKL_PROT_WRITE | LKL_PROT_READ; + // Super important to use LKL_MAP_POPULATE to force populating pages in the + // mapping as in the LKL context we don't have a way to fault in the pages. + int mem_flags = LKL_MAP_ANONYMOUS | LKL_MAP_PRIVATE | LKL_MAP_POPULATE; + void *mem = lkl_sys_mmap(0, mem_size, mem_prot, mem_flags, -1, 0); + + if (mem == LKL_MAP_FAILED) + return TEST_FAILURE; + + *(unsigned int *)mem = 13; + + if (lkl_sys_munmap((unsigned long)mem, mem_size) < 0) + return TEST_FAILURE; + + return TEST_SUCCESS; +} +#endif // LKL_HOST_CONFIG_MMU + #define CMD_LINE "mem=32M loglevel=8 " KASAN_CMD_LINE static int lkl_test_start_kernel(void) @@ -590,6 +665,10 @@ struct lkl_test tests[] = { */ #ifndef __MINGW32__ LKL_TEST(many_syscall_threads), +#endif +#ifdef LKL_HOST_CONFIG_MMU + LKL_TEST(shared_mmap), + LKL_TEST(private_mmap), #endif LKL_TEST(stop_kernel), }; From 440d1906790f4b7ef468b6cde0f5fbaed933262f Mon Sep 17 00:00:00 2001 From: Eugene Rodionov Date: Wed, 25 Dec 2024 19:16:03 +0000 Subject: [PATCH 4/6] Implement LKL MMU Kunit test suite. The test suite implements just a single test case verifying that `vmalloc` works as expected. ``` make -C tools/lkl MMU=1 MMU_KUNIT=1 clean-conf all tools/lkl/tests/boot ``` Signed-off-by: Eugene Rodionov --- arch/lkl/Kconfig | 8 ++++++ arch/lkl/mm/Makefile | 4 +-- arch/lkl/mm/mmu_test.c | 54 +++++++++++++++++++++++++++++++++++++ tools/lkl/Makefile.autoconf | 7 +++++ tools/lkl/tests/boot.c | 34 ++++++++++++++++++++++- 5 files changed, 104 insertions(+), 3 deletions(-) create mode 100644 arch/lkl/mm/mmu_test.c diff --git a/arch/lkl/Kconfig b/arch/lkl/Kconfig index 47733c24eaa6f0..25c228533887c7 100644 --- a/arch/lkl/Kconfig +++ b/arch/lkl/Kconfig @@ -104,6 +104,14 @@ config LKL_TASK_UNMAPPED_BASE Starting virtual address for LKL user-space mmap. It is assumed that the host mmap is able to map requested amount of memory starting from this address. + +config LKL_MMU_KUNIT_TEST + bool "Kunit tests for LKL MMU" + default n + depends on KUNIT + help + Kunit tests to test correctness of MMU-related kernel interfaces (such + as vmalloc, for example) which are difficult to test in user-space. endif config COREDUMP diff --git a/arch/lkl/mm/Makefile b/arch/lkl/mm/Makefile index a8c4cba1a88885..1e92fe7c6b17ae 100644 --- a/arch/lkl/mm/Makefile +++ b/arch/lkl/mm/Makefile @@ -3,5 +3,5 @@ KASAN_SANITIZE_kasan.o := n obj-y = bootmem.o obj-$(CONFIG_KASAN) += kasan.o - -obj-$(CONFIG_MMU) += mmu_mem.o \ No newline at end of file +obj-$(CONFIG_MMU) += mmu_mem.o +obj-$(CONFIG_LKL_MMU_KUNIT_TEST) += mmu_test.o \ No newline at end of file diff --git a/arch/lkl/mm/mmu_test.c b/arch/lkl/mm/mmu_test.c new file mode 100644 index 00000000000000..2409fd69b09776 --- /dev/null +++ b/arch/lkl/mm/mmu_test.c @@ -0,0 +1,54 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include + +#include +#include +#include +#include +#include +#include + +#include + +static void vmalloc_test(struct kunit *test) +{ + unsigned long nr_pages = 255; + void *ptr = vmalloc(nr_pages * PAGE_SIZE); + + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr); + + for (int i = 0; i < nr_pages; i++) + memset(ptr + i * PAGE_SIZE, i, PAGE_SIZE); + + for (int i = 0; i < nr_pages; i++) { + struct page *pg = vmalloc_to_page(ptr + i * PAGE_SIZE); + + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, pg); + + unsigned char *va = (unsigned char *)kmap_local_page(pg); + + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, va); + KUNIT_ASSERT_PTR_NE(test, va, ptr + i * PAGE_SIZE); + + KUNIT_ASSERT_TRUE(test, va[0] == (unsigned char)i); + + kunmap_local(va); + } + + vfree(ptr); +} + +static struct kunit_case mmu_kunit_test_cases[] = { + KUNIT_CASE(vmalloc_test), + {} +}; + +static struct kunit_suite lkl_mmu_kunit_test_suite = { + .name = "lkl_mmu", + .test_cases = mmu_kunit_test_cases, +}; + +kunit_test_suite(lkl_mmu_kunit_test_suite); + +MODULE_LICENSE("GPL"); diff --git a/tools/lkl/Makefile.autoconf b/tools/lkl/Makefile.autoconf index 3c908f6031f3a4..e884f364521912 100644 --- a/tools/lkl/Makefile.autoconf +++ b/tools/lkl/Makefile.autoconf @@ -187,6 +187,12 @@ define do_autoconf_fuzzing $(if $(LKL_LINE_COV),$(call set_kernel_config,LKL_LINE_COV,y)) endef +define mmu_test_enable + $(call set_autoconf_var,LKL_MMU_TEST,y) + $(call set_kernel_config,KUNIT,y) + $(call set_kernel_config,LKL_MMU_KUNIT_TEST,y) +endef + define do_autoconf_mmu $(call set_autoconf_var,MMU,y) $(call set_kernel_config,MMU,y) @@ -199,6 +205,7 @@ define do_autoconf_mmu # when build with gcc, however, works fine with clang. The issues is in macro # HPAGE_PMD_SIZE which should be optimized out but is not in case of gcc. $(call set_kernel_config,SHMEM,n) + $(if $(MMU_KUNIT), $(call mmu_test_enable)) endef define do_autoconf diff --git a/tools/lkl/tests/boot.c b/tools/lkl/tests/boot.c index f4d739bb2e9528..86c11e6c788d7d 100644 --- a/tools/lkl/tests/boot.c +++ b/tools/lkl/tests/boot.c @@ -603,7 +603,36 @@ static int lkl_test_private_mmap(void) } #endif // LKL_HOST_CONFIG_MMU -#define CMD_LINE "mem=32M loglevel=8 " KASAN_CMD_LINE +#ifdef LKL_HOST_CONFIG_LKL_MMU_TEST +static int lkl_test_kunit_mmu(void) +{ + char *log = strdup(boot_log); + char *line = NULL; + int n; + char c, d; + + line = strtok(log, "\n"); + while (line) { + if (sscanf(line, "[ %*f] ok %d lkl_m%c%c", &n, &c, &d) == 3 && + c == 'm' && d == 'u') { + lkl_test_logf("%s", line); + return TEST_SUCCESS; + } + + line = strtok(NULL, "\n"); + } + + free(log); + + return TEST_FAILURE; +} + +#define LKL_MMU_TEST_CMD_LINE "kunit.filter_glob=lkl_mmu " +#else +#define LKL_MMU_TEST_CMD_LINE +#endif // LKL_HOST_CONFIG_LKL_MMU_TEST + +#define CMD_LINE "mem=32M loglevel=8 " KASAN_CMD_LINE LKL_MMU_TEST_CMD_LINE static int lkl_test_start_kernel(void) { @@ -669,6 +698,9 @@ struct lkl_test tests[] = { #ifdef LKL_HOST_CONFIG_MMU LKL_TEST(shared_mmap), LKL_TEST(private_mmap), +#endif +#ifdef LKL_HOST_CONFIG_LKL_MMU_TEST + LKL_TEST(kunit_mmu), #endif LKL_TEST(stop_kernel), }; From a01f3ceceaa127fe5922f76c537fcdb8bdc205ee Mon Sep 17 00:00:00 2001 From: Eugene Rodionov Date: Wed, 25 Dec 2024 18:20:25 -0800 Subject: [PATCH 5/6] Fix makefile to handle PATH variable with whitespaces. If PATH variable contains paths with whitespaces this could lead to build breackage. This change fixes this issue. Signed-off-by: Eugene Rodionov --- tools/lkl/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/lkl/Makefile b/tools/lkl/Makefile index a31f05bde3a74b..17f105ddecb5cf 100644 --- a/tools/lkl/Makefile +++ b/tools/lkl/Makefile @@ -81,7 +81,7 @@ $(ASM_CONFIG): $(OUTPUT)include/kernel_config.h # rule to build lkl.o $(OUTPUT)lib/lkl.o: bin/stat $(ASM_CONFIG) $(DOT_CONFIG) # this workaround is for arm32 linker (ld.gold) - $(Q)export PATH=$(srctree)/tools/lkl/bin/:${PATH} ;\ + $(Q)export PATH="$(srctree)/tools/lkl/bin/:${PATH}" ;\ $(MAKE) -C ../.. ARCH=lkl $(KOPT) $(MAKE) -C ../.. ARCH=lkl $(KOPT) install INSTALL_PATH=$(OUTPUT) From 1ee079e00a92bccd153ab16ace2bf26a10a9f212 Mon Sep 17 00:00:00 2001 From: Eugene Rodionov Date: Thu, 2 Jan 2025 13:51:05 -0800 Subject: [PATCH 6/6] Add LKL test suite with MMU for github CI. Run LKL tests (boot, disk, network) for LKL built with MMU configuration and KASan enabled. Signed-off-by: Eugene Rodionov --- .github/workflows/ci.yml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 2b5114ca55ac05..c41f3e84db57f9 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -22,6 +22,11 @@ jobs: runs_on: ubuntu-22.04 shell: bash build_options: "kasan=yes kasan_test=yes" + - displayTargetName: mmu_kasan + os: unix + runs_on: ubuntu-22.04 + shell: bash + build_options: "MMU=1 MMU_KUNIT=1 kasan=yes" - displayTargetName: windows-2019 os: windows runs_on: windows-2019