From 886176c618e1ba700fefc6e1bfecb6e5ce69cae7 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Sat, 19 Apr 2025 07:38:43 +0000 Subject: [PATCH 1/3] Replace the `nm` symbol check with a Rust implementation --- Cargo.toml | 1 + ci/run.sh | 179 +++++++++++++++++--------------- crates/symbol-check/Cargo.toml | 10 ++ crates/symbol-check/src/main.rs | 154 +++++++++++++++++++++++++++ 4 files changed, 263 insertions(+), 81 deletions(-) create mode 100644 crates/symbol-check/Cargo.toml create mode 100644 crates/symbol-check/src/main.rs diff --git a/Cargo.toml b/Cargo.toml index 75bb81ec1..91ae662c3 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -6,6 +6,7 @@ members = [ "crates/libm-macros", "crates/musl-math-sys", "crates/panic-handler", + "crates/symbol-check", "crates/util", "libm", "libm-test", diff --git a/ci/run.sh b/ci/run.sh index 68d13c130..e7ebba644 100755 --- a/ci/run.sh +++ b/ci/run.sh @@ -48,20 +48,26 @@ else fi -declare -a rlib_paths - -# Set the `rlib_paths` global array to a list of all compiler-builtins rlibs -update_rlib_paths() { +# Run a command for each `compiler_builtins` rlib file +for_each_rlib() { if [ -d /builtins-target ]; then rlib_paths=( /builtins-target/"${target}"/debug/deps/libcompiler_builtins-*.rlib ) else rlib_paths=( target/"${target}"/debug/deps/libcompiler_builtins-*.rlib ) fi + + if [ "${#rlib_paths[@]}" -lt 1 ]; then + echo "rlibs expected but not found" + exit 1 + fi + + "$@" "${rlib_paths[@]}" } + # Remove any existing artifacts from previous tests that don't set #![compiler_builtins] update_rlib_paths -rm -f "${rlib_paths[@]}" +for_each_rlib rm -f cargo build -p compiler_builtins --target "$target" cargo build -p compiler_builtins --target "$target" --release @@ -72,57 +78,64 @@ cargo build -p compiler_builtins --target "$target" --features no-asm --release cargo build -p compiler_builtins --target "$target" --features no-f16-f128 cargo build -p compiler_builtins --target "$target" --features no-f16-f128 --release -PREFIX=${target//unknown-/}- -case "$target" in - armv7-*) - PREFIX=arm-linux-gnueabihf- - ;; - thumb*) - PREFIX=arm-none-eabi- - ;; - *86*-*) - PREFIX= - ;; -esac - -NM=$(find "$(rustc --print sysroot)" \( -name llvm-nm -o -name llvm-nm.exe \) ) -if [ "$NM" = "" ]; then - NM="${PREFIX}nm" -fi - -# i686-pc-windows-gnu tools have a dependency on some DLLs, so run it with -# rustup run to ensure that those are in PATH. -TOOLCHAIN="$(rustup show active-toolchain | sed 's/ (default)//')" -if [[ "$TOOLCHAIN" == *i686-pc-windows-gnu ]]; then - NM="rustup run $TOOLCHAIN $NM" -fi +symcheck=(cargo run -p symbol-check) +[[ "$target" = "wasm"* ]] && symcheck+=(--features wasm) # Look out for duplicated symbols when we include the compiler-rt (C) implementation -update_rlib_paths -for rlib in "${rlib_paths[@]}"; do - set +x - echo "================================================================" - echo "checking $rlib for duplicate symbols" - echo "================================================================" - set -x +for_each_rlib "${symcheck[@]}" -- check-duplicates +for_each_rlib rm -f + +# PREFIX=${target//unknown-/}- +# case "$target" in +# armv7-*) +# PREFIX=arm-linux-gnueabihf- +# ;; +# thumb*) +# PREFIX=arm-none-eabi- +# ;; +# *86*-*) +# PREFIX= +# ;; +# esac + +# NM=$(find "$(rustc --print sysroot)" \( -name llvm-nm -o -name llvm-nm.exe \) ) +# if [ "$NM" = "" ]; then +# NM="${PREFIX}nm" +# fi + +# # i686-pc-windows-gnu tools have a dependency on some DLLs, so run it with +# # rustup run to ensure that those are in PATH. +# TOOLCHAIN="$(rustup show active-toolchain | sed 's/ (default)//')" +# if [[ "$TOOLCHAIN" == *i686-pc-windows-gnu ]]; then +# NM="rustup run $TOOLCHAIN $NM" +# fi + +# # Look out for duplicated symbols when we include the compiler-rt (C) implementation +# update_rlib_paths +# for rlib in "${rlib_paths[@]}"; do +# set +x +# echo "================================================================" +# echo "checking $rlib for duplicate symbols" +# echo "================================================================" +# set -x - duplicates_found=0 - - # NOTE On i586, It's normal that the get_pc_thunk symbol appears several - # times so ignore it - $NM -g --defined-only "$rlib" 2>&1 | - sort | - uniq -d | - grep -v __x86.get_pc_thunk --quiet | - grep 'T __' && duplicates_found=1 - - if [ "$duplicates_found" != 0 ]; then - echo "error: found duplicate symbols" - exit 1 - else - echo "success; no duplicate symbols found" - fi -done +# duplicates_found=0 + +# # NOTE On i586, It's normal that the get_pc_thunk symbol appears several +# # times so ignore it +# $NM -g --defined-only "$rlib" 2>&1 | +# sort | +# uniq -d | +# grep -v __x86.get_pc_thunk --quiet | +# grep 'T __' && duplicates_found=1 + +# if [ "$duplicates_found" != 0 ]; then +# echo "error: found duplicate symbols" +# exit 1 +# else +# echo "success; no duplicate symbols found" +# fi +# done rm -f "${rlib_paths[@]}" @@ -143,34 +156,38 @@ build_intrinsics_test --features c --release CARGO_PROFILE_DEV_LTO=true build_intrinsics_test CARGO_PROFILE_RELEASE_LTO=true build_intrinsics_test --release -# Ensure no references to any symbols from core -update_rlib_paths -for rlib in "${rlib_paths[@]}"; do - set +x - echo "================================================================" - echo "checking $rlib for references to core" - echo "================================================================" - set -x - - tmpdir="${CARGO_TARGET_DIR:-target}/tmp" - test -d "$tmpdir" || mkdir "$tmpdir" - defined="$tmpdir/defined_symbols.txt" - undefined="$tmpdir/defined_symbols.txt" - - $NM --quiet -U "$rlib" | grep 'T _ZN4core' | awk '{print $3}' | sort | uniq > "$defined" - $NM --quiet -u "$rlib" | grep 'U _ZN4core' | awk '{print $2}' | sort | uniq > "$undefined" - grep_has_results=0 - grep -v -F -x -f "$defined" "$undefined" && grep_has_results=1 - - if [ "$target" = "powerpc64-unknown-linux-gnu" ]; then - echo "FIXME: powerpc64 fails these tests" - elif [ "$grep_has_results" != 0 ]; then - echo "error: found unexpected references to core" - exit 1 - else - echo "success; no references to core found" - fi -done +for_each_rlib "${symcheck[@]}" -- check-core-syms + +# # Ensure no references to any symbols from core +# update_rlib_paths +# for rlib in "${rlib_paths[@]}"; do +# set +x +# echo "================================================================" +# echo "checking $rlib for references to core" +# echo "================================================================" +# set -x + +# tmpdir="${CARGO_TARGET_DIR:-target}/tmp" +# test -d "$tmpdir" || mkdir "$tmpdir" +# defined="$tmpdir/defined_symbols.txt" +# undefined="$tmpdir/defined_symbols.txt" + +# $NM --quiet -U "$rlib" | grep 'T _ZN4core' | awk '{print $3}' | sort | uniq > "$defined" +# $NM --quiet -u "$rlib" | grep 'U _ZN4core' | awk '{print $2}' | sort | uniq > "$undefined" +# grep_has_results=0 +# grep -v -F -x -f "$defined" "$undefined" && grep_has_results=1 + +# if [ "$target" = "powerpc64-unknown-linux-gnu" ]; then +# echo "FIXME: powerpc64 fails these tests" +# elif [ "$grep_has_results" != 0 ]; then +# echo "error: found unexpected references to core" +# exit 1 +# else +# echo "success; no references to core found" +# fi +# done + + # Test libm diff --git a/crates/symbol-check/Cargo.toml b/crates/symbol-check/Cargo.toml new file mode 100644 index 000000000..31c2f4bad --- /dev/null +++ b/crates/symbol-check/Cargo.toml @@ -0,0 +1,10 @@ +[package] +name = "symbol-check" +version = "0.1.0" +edition = "2024" + +[dependencies] +object = "0.36.7" + +[features] +wasm = ["object/wasm"] diff --git a/crates/symbol-check/src/main.rs b/crates/symbol-check/src/main.rs new file mode 100644 index 000000000..c1d6f12b1 --- /dev/null +++ b/crates/symbol-check/src/main.rs @@ -0,0 +1,154 @@ +//! Tool used by CI to inspect compiler-builtins archives and help ensure we won't run into any +//! linking errors. + +use object::read::archive::{ArchiveFile, ArchiveMember}; +use object::{Object, ObjectSymbol, Symbol, SymbolKind, SymbolScope, SymbolSection}; +use std::collections::{BTreeMap, BTreeSet}; +use std::{fs, path::Path}; + +const USAGE: &str = "Usage: + + symbol-check check-duplicates ARCHIVE ... + symbol-check check-core-syms ARCHIVE ... + +Note that multiple archives may be specified but they are checked independently +rather than as a group."; + +fn main() { + // Create a `&str` vec so we can match on it. + let args = std::env::args().collect::>(); + let args_ref = args.iter().map(String::as_str).collect::>(); + + match &args_ref[1..] { + ["check-duplicates", rest @ ..] if !rest.is_empty() => { + rest.iter().for_each(verify_no_duplicates); + } + ["check-core-syms", rest @ ..] if !rest.is_empty() => { + rest.iter().for_each(verify_core_symbols); + } + _ => { + println!("{USAGE}"); + std::process::exit(1); + } + } +} + +#[expect(unused)] // only for printing +#[derive(Clone, Debug)] +struct SymInfo { + name: String, + kind: SymbolKind, + scope: SymbolScope, + section: SymbolSection, + is_undefined: bool, + is_global: bool, + is_local: bool, + is_weak: bool, + is_common: bool, + address: u64, + object: String, +} + +impl SymInfo { + fn new(sym: &Symbol, member: &ArchiveMember) -> Self { + Self { + name: sym.name().expect("missing name").to_owned(), + kind: sym.kind(), + scope: sym.scope(), + section: sym.section(), + is_undefined: sym.is_undefined(), + is_global: sym.is_global(), + is_local: sym.is_local(), + is_weak: sym.is_weak(), + is_common: sym.is_common(), + address: sym.address(), + object: String::from_utf8_lossy(member.name()).into_owned(), + } + } +} + +/// Ensure that the same global symbol isn't defined in multiple object files within an archive. +fn verify_no_duplicates(path: impl AsRef) { + println!("Checking `{}` for duplicates", path.as_ref().display()); + + let mut syms = BTreeMap::::new(); + let mut dups = Vec::new(); + + for_each_symbol(path, |sym, member| { + // Only check defined globals, exclude wasm file symbols + if !sym.is_global() || sym.is_undefined() || sym.kind() == SymbolKind::File { + return; + } + + let info = SymInfo::new(&sym, member); + match syms.get(&info.name) { + Some(existing) => { + dups.push(info); + dups.push(existing.clone()); + } + None => { + syms.insert(info.name.clone(), info); + } + } + }); + + if cfg!(windows) { + // Ignore literal constants + let allowed_dup_pfx = ["__real@", "__xmm@"]; + dups.retain(|sym| !allowed_dup_pfx.iter().any(|pfx| sym.name.starts_with(pfx))); + } + + if !dups.is_empty() { + dups.sort_unstable_by(|a, b| a.name.cmp(&b.name)); + panic!("found duplicate symbols: {dups:#?}"); + } + + println!("success: no duplicate symbols found"); +} + +/// Ensure that there are no references to symbols from `core` that aren't also (somehow) defined. +fn verify_core_symbols(path: impl AsRef) { + println!( + "Checking `{}` for references to core", + path.as_ref().display() + ); + + let mut defined = BTreeSet::new(); + let mut undefined = Vec::new(); + + for_each_symbol(path, |sym, member| { + // Find only symbols from `core` + if !sym.name().unwrap().contains("_ZN4core") { + return; + } + + let info = SymInfo::new(&sym, member); + if info.is_undefined { + undefined.push(info); + } else { + defined.insert(info.name); + } + }); + + // Discard any symbols that are defined somewhere in the archive + undefined.retain(|sym| !defined.contains(&sym.name)); + + if !undefined.is_empty() { + undefined.sort_unstable_by(|a, b| a.name.cmp(&b.name)); + panic!("found undefined symbols from core: {undefined:#?}"); + } + + println!("success: no undefined references to core found"); +} + +/// For a given archive path, do something with each symbol. +fn for_each_symbol(path: impl AsRef, mut f: impl FnMut(Symbol, &ArchiveMember)) { + let data = fs::read(path).expect("reading file failed"); + let archive = ArchiveFile::parse(data.as_slice()).expect("archive parse failed"); + for member in archive.members() { + let member = member.expect("failed to access member"); + let obj_data = member.data(&*data).expect("failed to access object"); + let obj = object::File::parse(obj_data).expect("failed to parse object"); + obj.symbols().for_each(|sym| f(sym, &member)); + } +} From 9646d92392cb09d4af546c7e8e98d2ce49215c62 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Tue, 22 Apr 2025 06:58:30 +0000 Subject: [PATCH 2/3] test with patched object --- crates/symbol-check/Cargo.toml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/crates/symbol-check/Cargo.toml b/crates/symbol-check/Cargo.toml index 31c2f4bad..436e126c3 100644 --- a/crates/symbol-check/Cargo.toml +++ b/crates/symbol-check/Cargo.toml @@ -4,7 +4,8 @@ version = "0.1.0" edition = "2024" [dependencies] -object = "0.36.7" +# object = "0.36.7" +object = { git = "https://github.com/philipc/object.git", branch = "issue-471" } [features] wasm = ["object/wasm"] From 7360137cd3dae9e61f5a77539b7582f314f00443 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Tue, 22 Apr 2025 07:02:15 +0000 Subject: [PATCH 3/3] fixup! Replace the `nm` symbol check with a Rust implementation --- ci/run.sh | 1 - 1 file changed, 1 deletion(-) diff --git a/ci/run.sh b/ci/run.sh index e7ebba644..0f3da8160 100755 --- a/ci/run.sh +++ b/ci/run.sh @@ -66,7 +66,6 @@ for_each_rlib() { # Remove any existing artifacts from previous tests that don't set #![compiler_builtins] -update_rlib_paths for_each_rlib rm -f cargo build -p compiler_builtins --target "$target"