diff --git a/elf/arch-arm32.cc b/elf/arch-arm32.cc index 65a9dede1a..cb7a960282 100644 --- a/elf/arch-arm32.cc +++ b/elf/arch-arm32.cc @@ -316,7 +316,7 @@ void InputSection::apply_reloc_nonalloc(Context &ctx, u8 *base) { u8 *loc = base + rel.r_offset; if (!sym.file) { - record_undef_error(ctx, rel); + add_undef(ctx, file, sym, this, rel.r_offset); continue; } @@ -370,7 +370,7 @@ void InputSection::scan_relocations(Context &ctx) { Symbol &sym = *file.symbols[rel.r_sym]; if (!sym.file) { - record_undef_error(ctx, rel); + add_undef(ctx, file, sym, this, rel.r_offset); continue; } diff --git a/elf/arch-arm64.cc b/elf/arch-arm64.cc index 46312bd365..4e17fa5e4a 100644 --- a/elf/arch-arm64.cc +++ b/elf/arch-arm64.cc @@ -358,7 +358,7 @@ void InputSection::apply_reloc_nonalloc(Context &ctx, u8 *base) { u8 *loc = base + rel.r_offset; if (!sym.file) { - record_undef_error(ctx, rel); + add_undef(ctx, file, sym, this, rel.r_offset); continue; } @@ -409,7 +409,7 @@ void InputSection::scan_relocations(Context &ctx) { Symbol &sym = *file.symbols[rel.r_sym]; if (!sym.file) { - record_undef_error(ctx, rel); + add_undef(ctx, file, sym, this, rel.r_offset); continue; } diff --git a/elf/arch-i386.cc b/elf/arch-i386.cc index 86526fc823..069e172bd5 100644 --- a/elf/arch-i386.cc +++ b/elf/arch-i386.cc @@ -336,7 +336,7 @@ void InputSection::apply_reloc_nonalloc(Context &ctx, u8 *base) { u8 *loc = base + rel.r_offset; if (!sym.file) { - record_undef_error(ctx, rel); + add_undef(ctx, file, sym, this, rel.r_offset); continue; } @@ -442,7 +442,7 @@ void InputSection::scan_relocations(Context &ctx) { Symbol &sym = *file.symbols[rel.r_sym]; if (!sym.file) { - record_undef_error(ctx, rel); + add_undef(ctx, file, sym, this, rel.r_offset); continue; } diff --git a/elf/arch-riscv64.cc b/elf/arch-riscv64.cc index 2eef19fbd0..87dc7fb466 100644 --- a/elf/arch-riscv64.cc +++ b/elf/arch-riscv64.cc @@ -405,7 +405,7 @@ void InputSection::apply_reloc_nonalloc(Context &ctx, u8 *base) { u8 *loc = base + rel.r_offset; if (!sym.file) { - record_undef_error(ctx, rel); + add_undef(ctx, file, sym, this, rel.r_offset); continue; } @@ -526,7 +526,7 @@ void InputSection::scan_relocations(Context &ctx) { Symbol &sym = *file.symbols[rel.r_sym]; if (!sym.file) { - record_undef_error(ctx, rel); + add_undef(ctx, file, sym, this, rel.r_offset); continue; } diff --git a/elf/arch-x86-64.cc b/elf/arch-x86-64.cc index 06fde383b9..bd0763346a 100644 --- a/elf/arch-x86-64.cc +++ b/elf/arch-x86-64.cc @@ -556,7 +556,7 @@ void InputSection::apply_reloc_nonalloc(Context &ctx, u8 *base) { u8 *loc = base + rel.r_offset; if (!sym.file) { - record_undef_error(ctx, rel); + add_undef(ctx, file, sym, this, rel.r_offset); continue; } @@ -667,7 +667,7 @@ void InputSection::scan_relocations(Context &ctx) { u8 *loc = (u8 *)(contents.data() + rel.r_offset); if (!sym.file) { - record_undef_error(ctx, rel); + add_undef(ctx, file, sym, this, rel.r_offset); continue; } diff --git a/elf/dwarf.cc b/elf/dwarf.cc index da6db0877d..0bab3f1bf8 100644 --- a/elf/dwarf.cc +++ b/elf/dwarf.cc @@ -495,6 +495,276 @@ read_address_areas(Context &ctx, ObjectFile &file, i64 offset) { return {}; } +// Read the DWARFv2 file and directory info from .debug_line (i.e. from the include_directories +// and file_names fields in the header). +static +std::pair +read_line_file_v2(const u8 *file_data, const u8 *end, u32 file) { + const u8 *data = file_data; + // First skip include_directories to read file and find out which directory is needed + // (include_directories ends with an empty item containing only null). + for (;;) { + if (*data++ == '\0') + break; + data = (u8 *)memchr(data, '\0', end - data); + if (data == nullptr || end - data < 2) + return {}; + ++data; + } + + // Skip file entries before the one we want. + for (int i = 1; i < file; ++i) { + data = (u8 *)memchr(data, '\0', end - data); + if (data == nullptr) + return {}; + ++data; + read_uleb(data); + read_uleb(data); + read_uleb(data); + if (*data == '\0') + return {}; + } + + std::string_view name((const char*)data); + data += name.size() + 1; + u32 directory_index = read_uleb(data); + std::string_view directory; + if (directory_index > 0) { + data = file_data; + // Skip directory entries before the one we want. + for (int i = 1; i < directory_index; ++i) { + data = (u8 *)memchr(data, '\0', end - data); + if (data == nullptr) + return {}; + ++data; + if (*data == '\0') + return {}; + } + directory = std::string_view((const char*)data); + } + return {name, directory}; +} + +// Process .debug_line for the given compilation unit and find the source location +// for the given address. +// The .debug_line section is instructions for a state machine that builds a list +// of addresses and their source information. +template +static +std::tuple +find_source_location_cu(Context &ctx, ObjectFile &object_file, i64 offset, u64 addr) { + const u8 *data = get_buffer(ctx, ctx.debug_line) + offset; + + ul32 len = *(ul32 *)data; + if (len == 0xffffffff) + return {}; // DWARF64 + data += 4; + const u8 *end = data + len; + + u32 dwarf_version = *(ul16 *)data; + if (dwarf_version < 2 || dwarf_version > 5) + return {}; // unknown DWARF version + data += 2; + + if (dwarf_version == 5) { + if (u32 address_size = *data; address_size != E::word_size) + return {}; // unsupported address size + data += 2; + } + u32 header_length = *(ul32 *)data; + if (header_length == 0xffffffff) + return {}; // DWARF64 + data += 4; + const u8 *data_after_header = data + header_length; + u8 minimum_instruction_length = *data++; + u8 maximum_operations_per_instruction = 1; + if (dwarf_version >= 4) + maximum_operations_per_instruction = *data++; + ++data; // default_is_stmt + i8 line_base = *(i8 *)data++; + u8 line_range = *data++; + u8 opcode_base = *data++; + std::span standard_opcode_lengths = std::span(data, opcode_base - 1); + data += opcode_base - 1; + const u8 *file_data = data; + data = data_after_header; + + // This is a partially interpreter for the .debug_line instructions for the state + // machine (DWARF spec section 6.2). We only care about the address, file, line + // and column data (and additionally op_index, since that one is needed for address). + u64 address = 0; + u32 op_index = 0; + u32 file = 1; + u32 line = 1; + u32 column = 0; + u64 last_address; + u32 last_file; + u32 last_line; + u32 last_column; + bool last_valid = false; + + auto advance = [&](i32 operation_advance) { + address += minimum_instruction_length * ((op_index + operation_advance) + / maximum_operations_per_instruction); + op_index = (op_index + operation_advance) % maximum_operations_per_instruction; + }; + auto advance_opcode = [&](u8 opcode) { + i32 adjusted_opcode = opcode - (i16)opcode_base; + line += line_base + (adjusted_opcode % line_range); + i32 operation_advance = adjusted_opcode / line_range; + return advance(operation_advance); + }; + + while (data < end) { + bool check_address = false; + bool end_sequence = false; + u8 opcode = *data; + ++data; + if (opcode < opcode_base) { + // standard opcodes (including extended opcodes) + switch (opcode) { + case DW_LNS_copy: + check_address = true; + break; + case DW_LNS_advance_pc: + advance(read_uleb(data)); + check_address = true; + break; + case DW_LNS_advance_line: + line += read_sleb(data); + break; + case DW_LNS_set_file: + file = read_uleb(data); + break; + case DW_LNS_set_column: + column = read_uleb(data); + break; + case DW_LNS_const_add_pc: + advance_opcode(255); + break; + case DW_LNS_fixed_advance_pc: + address += *(ul16*)data; + data += 2; + op_index = 0; + break; + case 0: { + // extended opcodes + u32 bytes = read_uleb(data); + u8 extended_opcode = *data; + ++data; + switch (extended_opcode) { + case DW_LNE_end_sequence: + check_address = true; + end_sequence = true; + break; + case DW_LNE_set_address: + address = *(typename E::WordTy *)data; + data += E::word_size; + op_index = 0; + break; + case DW_LNE_set_discriminator: + read_uleb(data); + break; + case DW_LNE_define_file: + return {}; // deprecated + default: + data += bytes; + break; + } + break; + } + default: + // All the unhandled standard opcodes, including unknown (vendor + // extensions), skip their arguments. + for (u8 i = 0; i < standard_opcode_lengths[opcode - 1]; ++i) + read_uleb(data); + break; + } + } else { + // special opcodes + advance_opcode(opcode); + check_address = true; + } + + if (check_address) { + // Check since the last (valid) address until before the current one. + // If found, the last location is the location of the asked for address. + if (last_valid && addr >= last_address && addr < address) { + std::string_view filename; + std::string_view directory; + if (dwarf_version <= 4) + std::tie(filename, directory) = read_line_file_v2(file_data, data_after_header, file); + else + return {}; // TODO + if (filename.empty()) + return {}; + return {filename, directory, last_line, last_column}; + } + last_address = address; + last_file = file; + last_line = line; + last_column = column; + last_valid = true; + } + if (end_sequence) { + address = 0; + op_index = 0; + file = 1; + line = 1; + column = 0; + end_sequence = false; + } + } + + return {}; +} + +// Return filename, line and column as source location for the address +// in the given object file, by finding it in .debug_line . +// +// It is necessary to find find the compilation unit for the given address, +// and then process the relevant part of .debug_line for that unit. +template +std::tuple +find_source_location(Context &ctx, ObjectFile &file, u64 address) { + if (!file.debug_info) + return {}; + + // Find the compilation unit that contains the given address. + u64 offset = file.debug_info->offset; + + for (i64 i = 0; i < file.compunits.size(); i++) { + std::vector addrs = read_address_areas(ctx, file, offset); + for (i64 j = 0; j < addrs.size(); j += 2) { + if (address >= addrs[j] && address < addrs[j + 1]) { + return find_source_location_cu(ctx, file, offset, address); + } + offset += file.compunits[i].size(); + } + } + + return {}; +} + +template +void setup_context_debuginfo(Context &ctx) { + for (Chunk *chunk : ctx.chunks) { + std::string_view name = chunk->name; + if (name == ".debug_info" || name == ".zdebug_info") + ctx.debug_info = chunk; + if (name == ".debug_abbrev" || name == ".zdebug_abbrev") + ctx.debug_abbrev = chunk; + if (name == ".debug_ranges" || name == ".zdebug_ranges") + ctx.debug_ranges = chunk; + if (name == ".debug_addr" || name == ".zdebug_addr") + ctx.debug_addr = chunk; + if (name == ".debug_rnglists" || name == ".zdebug_rnglists") + ctx.debug_rnglists = chunk; + if (name == ".debug_line" || name == ".zdebug_line") + ctx.debug_line = chunk; + } +} + #define INSTANTIATE(E) \ template std::vector \ read_compunits(Context &, ObjectFile &); \ @@ -503,7 +773,11 @@ read_address_areas(Context &ctx, ObjectFile &file, i64 offset) { template i64 \ estimate_address_areas(Context &, ObjectFile &); \ template std::vector \ - read_address_areas(Context &, ObjectFile &, i64) + read_address_areas(Context &, ObjectFile &, i64); \ + template std::tuple \ + find_source_location(Context &ctx, ObjectFile &file, u64 address); \ + template void \ + setup_context_debuginfo(Context &ctx) INSTANTIATE_ALL; diff --git a/elf/elf.h b/elf/elf.h index 4f037b7e65..6aa3d481c4 100644 --- a/elf/elf.h +++ b/elf/elf.h @@ -1123,6 +1123,34 @@ static constexpr u32 DW_RLE_base_address = 0x05; static constexpr u32 DW_RLE_start_end = 0x06; static constexpr u32 DW_RLE_start_length = 0x07; +static constexpr u8 DW_LNS_copy = 0x01; +static constexpr u8 DW_LNS_advance_pc = 0x02; +static constexpr u8 DW_LNS_advance_line = 0x03; +static constexpr u8 DW_LNS_set_file = 0x04; +static constexpr u8 DW_LNS_set_column = 0x05; +static constexpr u8 DW_LNS_negate_stmt = 0x06; +static constexpr u8 DW_LNS_set_basic_block = 0x07; +static constexpr u8 DW_LNS_const_add_pc = 0x08; +static constexpr u8 DW_LNS_fixed_advance_pc = 0x09; +static constexpr u8 DW_LNS_set_prologue_end = 0x0a; +static constexpr u8 DW_LNS_set_prologue_begin = 0x0b; +static constexpr u8 DW_LNS_set_isa = 0x0c; + +static constexpr u8 DW_LNE_end_sequence = 0x01; +static constexpr u8 DW_LNE_set_address = 0x02; +static constexpr u8 DW_LNE_define_file = 0x03; +static constexpr u8 DW_LNE_set_discriminator = 0x04; +static constexpr u8 DW_LNE_lo_user = 0x80; +static constexpr u8 DW_LNE_hi_user = 0xff; + +static constexpr u16 DW_LNCT_path = 0x01; +static constexpr u16 DW_LNCT_directory = 0x02; +static constexpr u16 DW_LNCT_timestamp = 0x03; +static constexpr u16 DW_LNCT_size = 0x04; +static constexpr u16 DW_LNCT_MD5 = 0x05; +static constexpr u16 DW_LNCT_lo_user = 0x2000; +static constexpr u16 DW_LNCT_hi_user = 0x3fff; + struct Elf64Sym { bool is_defined() const { return !is_undef(); } bool is_undef() const { return st_shndx == SHN_UNDEF; } diff --git a/elf/input-files.cc b/elf/input-files.cc index 6681610115..49ad9d84fe 100644 --- a/elf/input-files.cc +++ b/elf/input-files.cc @@ -63,15 +63,6 @@ void InputFile::clear_symbols() { } } -// Find the source filename. It should be listed in symtab as STT_FILE. -template -std::string_view InputFile::get_source_name() const { - for (i64 i = 0; i < first_global; i++) - if (Symbol *sym = symbols[i]; sym->get_type() == STT_FILE) - return sym->name(); - return ""; -} - template ObjectFile::ObjectFile(Context &ctx, MappedFile> *mf, std::string archive_name, bool is_in_lib) @@ -131,8 +122,7 @@ void ObjectFile::initialize_sections(Context &ctx) { for (i64 i = 0; i < this->elf_sections.size(); i++) { const ElfShdr &shdr = this->elf_sections[i]; - if ((shdr.sh_flags & SHF_EXCLUDE) && !(shdr.sh_flags & SHF_ALLOC) && - shdr.sh_type != SHT_LLVM_ADDRSIG) + if ((shdr.sh_flags & SHF_EXCLUDE) && !(shdr.sh_flags & SHF_ALLOC) && shdr.sh_type != SHT_LLVM_ADDRSIG) continue; switch (shdr.sh_type) { @@ -141,7 +131,7 @@ void ObjectFile::initialize_sections(Context &ctx) { if (shdr.sh_info >= this->elf_syms.size()) Fatal(ctx) << *this << ": invalid symbol index"; const ElfSym &sym = this->elf_syms[shdr.sh_info]; - std::string_view signature = this->symbol_strtab.data() + sym.st_name; + std::string_view signature = this->symbol_strtab_name(sym.st_name); // Ignore a broken comdat group GCC emits for .debug_macros. // https://github.com/rui314/mold/issues/438 @@ -223,12 +213,14 @@ void ObjectFile::initialize_sections(Context &ctx) { llvm_addrsig = this->sections[i].get(); } + // Save debug sections for undefined symbol reporting. + InputSection *isec = this->sections[i].get(); + + if (name == ".debug_info") + debug_info = isec; + // Save debug sections for --gdb-index. if (ctx.arg.gdb_index) { - InputSection *isec = this->sections[i].get(); - - if (name == ".debug_info") - debug_info = isec; if (name == ".debug_ranges") debug_ranges = isec; if (name == ".debug_rnglists") @@ -442,7 +434,7 @@ void ObjectFile::initialize_symbols(Context &ctx) { if (esym.is_common()) Fatal(ctx) << *this << ": common local symbol?"; - std::string_view name = this->symbol_strtab.data() + esym.st_name; + std::string_view name = this->symbol_strtab_name(esym.st_name); if (name.empty() && esym.st_type == STT_SECTION) if (InputSection *sec = get_section(esym)) name = sec->name(); @@ -471,7 +463,7 @@ void ObjectFile::initialize_symbols(Context &ctx) { const ElfSym &esym = this->elf_syms[i]; // Get a symbol name - std::string_view key = this->symbol_strtab.data() + esym.st_name; + std::string_view key = this->symbol_strtab_name(esym.st_name); std::string_view name = key; // Parse symbol version after atsign @@ -993,18 +985,6 @@ void ObjectFile::claim_unresolved_symbols(Context &ctx) { if (!this->is_alive) return; - auto report_undef = [&](Symbol &sym) { - std::stringstream ss; - if (std::string_view source = this->get_source_name(); !source.empty()) - ss << ">>> referenced by " << source << "\n"; - else - ss << ">>> referenced by " << *this << "\n"; - - typename decltype(ctx.undef_errors)::accessor acc; - ctx.undef_errors.insert(acc, {sym.name(), {}}); - acc->second.push_back(ss.str()); - }; - for (i64 i = this->first_global; i < this->symbols.size(); i++) { const ElfSym &esym = this->elf_syms[i]; Symbol &sym = *this->symbols[i]; @@ -1017,7 +997,7 @@ void ObjectFile::claim_unresolved_symbols(Context &ctx) { // imported symbol, it's handled as if no symbols were found. if (sym.file && sym.file->is_dso && (sym.visibility == STV_PROTECTED || sym.visibility == STV_HIDDEN)) { - report_undef(sym); + add_undef(ctx, *this, sym, (InputSection *)nullptr, 0); continue; } @@ -1027,7 +1007,7 @@ void ObjectFile::claim_unresolved_symbols(Context &ctx) { // If a symbol name is in the form of "foo@version", search for // symbol "foo" and check if the symbol has version "version". - std::string_view key = this->symbol_strtab.data() + esym.st_name; + std::string_view key = this->symbol_strtab_name(esym.st_name); if (i64 pos = key.find('@'); pos != key.npos) { Symbol *sym2 = get_symbol(ctx, key.substr(0, pos)); if (sym2->file && sym2->file->is_dso && @@ -1047,7 +1027,7 @@ void ObjectFile::claim_unresolved_symbols(Context &ctx) { }; if (ctx.arg.unresolved_symbols == UNRESOLVED_WARN) - report_undef(sym); + add_undef(ctx, *this, sym, (InputSection *)nullptr, 0); // Convert remaining undefined symbols to dynamic symbols. if (ctx.arg.shared) { @@ -1370,7 +1350,7 @@ void SharedFile::parse(Context &ctx) { if (ver == VER_NDX_LOCAL) continue; - std::string_view name = this->symbol_strtab.data() + esyms[i].st_name; + std::string_view name = this->symbol_strtab_name(esyms[i].st_name); bool is_hidden = (!vers.empty() && (vers[i] & VERSYM_HIDDEN)); this->elf_syms2.push_back(esyms[i]); diff --git a/elf/input-sections.cc b/elf/input-sections.cc index 213fa1f88c..f99c7b6932 100644 --- a/elf/input-sections.cc +++ b/elf/input-sections.cc @@ -234,76 +234,130 @@ void InputSection::write_to(Context &ctx, u8 *buf) { apply_reloc_nonalloc(ctx, buf); } -// Get the name of a function containin a given offset. template -std::string_view InputSection::get_func_name(Context &ctx, i64 offset) { - for (const ElfSym &esym : file.elf_syms) { - if (esym.st_shndx == shndx && esym.st_type == STT_FUNC && - esym.st_value <= offset && offset < esym.st_value + esym.st_size) { - std::string_view name = file.symbol_strtab.data() + esym.st_name; - if (ctx.arg.demangle) - return demangle(name); - return name; - } - } - return ""; -} - -// Record an undefined symbol error which will be displayed all at -// once by report_undef_errors(). -template -void InputSection::record_undef_error(Context &ctx, const ElfRel &rel) { - std::stringstream ss; - if (std::string_view source = file.get_source_name(); !source.empty()) - ss << ">>> referenced by " << source << "\n"; - else - ss << ">>> referenced by " << *this << "\n"; - - ss << ">>> " << file; - if (std::string_view func = get_func_name(ctx, rel.r_offset); !func.empty()) - ss << ":(" << func << ")"; - - Symbol &sym = *file.symbols[rel.r_sym]; - - typename decltype(ctx.undef_errors)::accessor acc; - ctx.undef_errors.insert(acc, {sym.name(), {}}); - acc->second.push_back(ss.str()); +void add_undef(Context &ctx, InputFile &file, Symbol &sym, + InputSection *section, typename E::WordTy r_offset) { + assert(!ctx.undefined_done); + ctx.undefined.push_back({file, sym, section, r_offset}); } -// Report all undefined symbols, grouped by symbol. template -void report_undef_errors(Context &ctx) { - constexpr i64 max_errors = 3; - - for (auto &pair : ctx.undef_errors) { - std::string_view sym_name = pair.first; - std::span errors = pair.second; - - if (ctx.arg.demangle) - sym_name = demangle(sym_name); +void report_undef(Context &ctx) { + setup_context_debuginfo(ctx); + + // Report all undefined symbols, grouped by symbol. + std::unordered_set*> handled; + for (const typename Context::Undefined &group : ctx.undefined) { + if (handled.contains(&group.sym)) + continue; + handled.emplace(&group.sym); + + std::stringstream report; + report << "undefined symbol: " << group.sym << "\n"; + + int count = 0; + constexpr int max_reported_count = 3; + for (const typename Context::Undefined &undef : ctx.undefined) { + if (&undef.sym != &group.sym) + continue; + if (++count > max_reported_count) + continue; + + InputFile &file = undef.file; + + // Find the source file which references the symbol. First try debuginfo, + // as that one provides also source location. Debuginfo needs to be relocated, + // so this uses the resulting debuginfo rather than debuginfo in the object file. + std::string_view source_name; + std::string_view directory; + i32 line = 0; + i32 column = 0; + bool line_valid = false; + ObjectFile * object_file = dynamic_cast *>(&file); + if (object_file != nullptr && object_file->debug_info && undef.section != nullptr) { + if (object_file->compunits.empty()) + object_file->compunits = read_compunits(ctx, *object_file); + std::tie(source_name, directory, line, column) = find_source_location(ctx, + *object_file, undef.r_offset + undef.section->get_addr()); + line_valid = !source_name.empty(); + } - std::stringstream ss; - ss << "undefined symbol: " << sym_name << "\n"; + if (source_name.empty()) { + // If using debuginfo fails, find the source file from symtab. It should be listed + // in symtab as STT_FILE, the closest one before the undefined entry. + auto sym_pos = std::find(file.symbols.begin(), file.symbols.end(), &undef.sym); + if (sym_pos != file.symbols.end()) { + while (sym_pos != file.symbols.begin()) { + --sym_pos; + Symbol *tmp = *sym_pos; + if (tmp->file && tmp->get_type() == STT_FILE) { + source_name = tmp->name(); + break; + } + } + } + } - for (i64 i = 0; i < errors.size() && i < max_errors; i++) - ss << errors[i]; + // Find the function that references the symbol by trying to find the relocation offset + // inside the section in one of the function ranges given by symtab. + std::string function_name; + if (undef.section != nullptr) { + for (const ElfSym & elfsym : file.elf_syms) { + if (elfsym.st_shndx == undef.section->shndx && elfsym.st_type == STT_FUNC + && undef.r_offset >= elfsym.st_value && undef.r_offset < elfsym.st_value + elfsym.st_size) { + function_name = file.symbol_strtab_name(elfsym.st_name); + if (ctx.arg.demangle) + function_name = demangle(function_name); + break; + } + } + } - if (errors.size() > max_errors) - ss << ">>> referenced " << (errors.size() - max_errors) << " more times\n"; + if (!source_name.empty()) { + std::string location(source_name); + if (line != 0) + location += ":" + std::to_string(line); + if (column != 0) + location += ":" + std::to_string(column); + if (!directory.empty()) + report << ">>> referenced by " << location << " (" << directory << "/" << location << ")\n"; + else + report << ">>> referenced by " << location << "\n"; + } else + report << ">>> referenced by " << file << "\n"; + report << ">>> " << file; + if (!function_name.empty()) + report << ":(" << function_name << ")"; + report << "\n"; + + if (ctx.arg.warn_once) + break; + } - if (ctx.arg.unresolved_symbols == UNRESOLVED_ERROR) - Error(ctx) << ss.str(); - else if (ctx.arg.unresolved_symbols == UNRESOLVED_WARN) - Warn(ctx) << ss.str(); + if (count > max_reported_count) + report << ">>> referenced " << (count - max_reported_count) << " more times\n"; + + switch (ctx.arg.unresolved_symbols) { + case UNRESOLVED_ERROR: + Error(ctx) << report.str(); + break; + case UNRESOLVED_WARN: + Warn(ctx) << report.str(); + break; + case UNRESOLVED_IGNORE: + break; + } } - ctx.checkpoint(); + ctx.undefined_done = true; } #define INSTANTIATE(E) \ template struct CieRecord; \ template class InputSection; \ - template void report_undef_errors(Context &) + template void add_undef(Context &, InputFile &, Symbol &, \ + InputSection *section, typename E::WordTy r_offset); \ + template void report_undef(Context &) INSTANTIATE_ALL; diff --git a/elf/main.cc b/elf/main.cc index 7ebdc3777c..8566ca2363 100644 --- a/elf/main.cc +++ b/elf/main.cc @@ -588,10 +588,6 @@ static int elf_main(int argc, char **argv) { // be added to .dynsym. ctx.dynsym->finalize(ctx); - // Print reports about undefined symbols, if needed. - if (ctx.arg.unresolved_symbols == UNRESOLVED_ERROR) - report_undef_errors(ctx); - // Fill .gnu.version_d section contents. if (ctx.verdef) ctx.verdef->construct(ctx); @@ -678,9 +674,14 @@ static int elf_main(int argc, char **argv) { chunk->copy_buf(ctx); }); - report_undef_errors(ctx); + ctx.checkpoint(); } + // Print warnings about undefined symbols, if needed. + // This requires relocated debuginfo, so it's done only here. + // TODO move up + report_undef(ctx); + if constexpr (std::is_same_v) sort_arm_exidx(ctx); diff --git a/elf/mold.h b/elf/mold.h index 18e3f73c29..56986c5cb7 100644 --- a/elf/mold.h +++ b/elf/mold.h @@ -24,6 +24,7 @@ #include #include #include +#include #include #include #include @@ -276,9 +277,6 @@ class InputSection { const ElfShdr &shdr() const; std::span> get_rels(Context &ctx) const; std::span> get_fdes() const; - std::string_view get_func_name(Context &ctx, i64 offset); - - void record_undef_error(Context &ctx, const ElfRel &rel); ObjectFile &file; OutputSection *output_section = nullptr; @@ -331,7 +329,10 @@ class InputSection { }; template -void report_undef_errors(Context &ctx); +void add_undef(Context &ctx, InputFile &file, Symbol &sym, + InputSection *section, typename E::WordTy r_offset); +template +void report_undef(Context &ctx); // // output-chunks.cc @@ -999,6 +1000,10 @@ template std::vector read_address_areas(Context &ctx, ObjectFile &file, i64 offset); +template +std::tuple +find_source_location(Context &ctx, ObjectFile &file, u64 address); + // // input-files.cc // @@ -1064,7 +1069,10 @@ class InputFile { std::function *)> feeder) = 0; std::span *> get_global_syms(); - std::string_view get_source_name() const; + + std::string_view symbol_strtab_name( ul32 st_name ) const { + return symbol_strtab.data() + st_name; + } MappedFile> *mf = nullptr; std::span> elf_sections; @@ -1077,8 +1085,6 @@ class InputFile { u32 priority; std::atomic_bool is_alive = false; std::string_view shstrtab; - std::unique_ptr[]> local_syms; - std::string_view symbol_strtab; // To create an output .symtab u64 local_symtab_idx = 0; @@ -1090,6 +1096,11 @@ class InputFile { // For --emit-relocs std::vector output_sym_indices; + +protected: + std::unique_ptr[]> local_syms; + + std::string_view symbol_strtab; }; // ObjectFile represents an input .o file. @@ -1631,7 +1642,16 @@ struct Context { std::atomic_bool has_gottp_rel = false; std::atomic_bool has_textrel = false; - tbb::concurrent_hash_map> undef_errors; + // Undefined symbols + struct Undefined + { + InputFile &file; + Symbol &sym; + InputSection *section; + typename E::WordTy r_offset; + }; + tbb::concurrent_vector undefined; + std::atomic_bool undefined_done = false; // Output chunks std::unique_ptr> ehdr; @@ -1667,12 +1687,13 @@ struct Context { std::unique_ptr thumb_to_arm; std::unique_ptr tls_trampoline; - // For --gdb-index + // For undefined symbol reports and for --gdb-index Chunk *debug_info = nullptr; Chunk *debug_abbrev = nullptr; Chunk *debug_ranges = nullptr; Chunk *debug_addr = nullptr; Chunk *debug_rnglists = nullptr; + Chunk *debug_line = nullptr; // For --relocatable std::vector *> r_chunks; @@ -1727,6 +1748,9 @@ int main(int argc, char **argv); template std::ostream &operator<<(std::ostream &out, const InputFile &file); +template +void setup_context_debuginfo(Context &ctx); + // // Symbol // diff --git a/elf/output-chunks.cc b/elf/output-chunks.cc index 92bb11669c..c41f10e3c7 100644 --- a/elf/output-chunks.cc +++ b/elf/output-chunks.cc @@ -2339,19 +2339,7 @@ void GdbIndexSection::write_address_areas(Context &ctx) { u8 *base = ctx.buf + this->shdr.sh_offset; - for (Chunk *chunk : ctx.chunks) { - std::string_view name = chunk->name; - if (name == ".debug_info" || name == ".zdebug_info") - ctx.debug_info = chunk; - if (name == ".debug_abbrev" || name == ".zdebug_abbrev") - ctx.debug_abbrev = chunk; - if (name == ".debug_ranges" || name == ".zdebug_ranges") - ctx.debug_ranges = chunk; - if (name == ".debug_addr" || name == ".zdebug_addr") - ctx.debug_addr = chunk; - if (name == ".debug_rnglists" || name == ".zdebug_rnglists") - ctx.debug_rnglists = chunk; - } + setup_context_debuginfo(ctx); assert(ctx.debug_info); assert(ctx.debug_abbrev); diff --git a/mold.h b/mold.h index bb16226dbe..cb49628d4d 100644 --- a/mold.h +++ b/mold.h @@ -318,6 +318,26 @@ inline i64 uleb_size(u64 val) { return 9; } +inline i64 read_sleb(u8 *&buf) { + i64 val = 0; + u8 shift = 0; + u8 byte; + for(;;) { + byte = *buf++; + val |= (byte & 0x7f) << shift; + shift += 7; + if ((byte & 0x80) == 0) { + if (shift < 64 && (byte & 0x40) != 0) + return val | (i64(-1) << shift); // sign-extend + return val; + } + } +} + +inline i64 read_sleb(u8 const*&buf) { + return read_sleb(const_cast(buf)); +} + template std::string_view save_string(C &ctx, const std::string &str) { u8 *buf = new u8[str.size() + 1]; diff --git a/test/elf/hidden-undef.sh b/test/elf/hidden-undef.sh index 038ea058fd..2ac68118bc 100755 --- a/test/elf/hidden-undef.sh +++ b/test/elf/hidden-undef.sh @@ -24,5 +24,6 @@ EOF ! $CC -B. -o $t/exe $t/a.so $t/b.o >& $t/log grep -q 'undefined symbol: foo' $t/log +grep -q '>>> .*b.o' $t/log echo OK