Skip to content

Commit

Permalink
[ELF] Fix unnecessary inclusion of unreferenced provide symbols
Browse files Browse the repository at this point in the history
Previously, linker was unnecessarily including a PROVIDE symbol which
was referenced by another unused PROVIDE symbol. For example, if a
linker script contained the below code and 'not_used_sym' provide symbol
is not included, then linker was still unnecessarily including 'foo' PROVIDE
symbol because it was referenced by 'not_used_sym'. This commit fixes
this behavior.

PROVIDE(not_used_sym = foo)
PROVIDE(foo = 0x1000)

This commit fixes this behavior by using dfs-like algorithm to find
all the symbols referenced in provide expressions of included provide
symbols.

Closes llvm#74771
  • Loading branch information
partaror committed Mar 8, 2024
1 parent 6a8e6c9 commit 1b58d0d
Show file tree
Hide file tree
Showing 4 changed files with 99 additions and 11 deletions.
60 changes: 53 additions & 7 deletions lld/ELF/Driver.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2659,6 +2659,58 @@ static void postParseObjectFile(ELFFileBase *file) {
}
}

// Returns true if the provide symbol should be added to the link.
bool shouldAddProvideSym(StringRef symName) {
Symbol *b = symtab.find(symName);
return b && !b->isDefined() && !b->isCommon();
}

// Add symbols referred by the provide symbol to the symbol table.
// This function must only be called for provide symbols that should be added
// to the link.
static void
addProvideSymReferences(StringRef provideSym,
llvm::StringSet<> &addedRefsFromProvideSym) {

if (addedRefsFromProvideSym.count(provideSym))
return;
assert(shouldAddProvideSym(provideSym) &&
"This function must only be called for provide symbols that should be "
"added to the link.");
addedRefsFromProvideSym.insert(provideSym);
for (StringRef name : script->provideMap[provideSym].keys()) {
Symbol *sym = addUnusedUndefined(name);
sym->isUsedInRegularObj = true;
sym->referenced = true;
script->referencedSymbols.push_back(name);
if (script->provideMap.count(name) && shouldAddProvideSym(name) &&
!addedRefsFromProvideSym.count(name))
addProvideSymReferences(name, addedRefsFromProvideSym);
}
}

// Add symbols that are referenced in the linker script.
// Symbols referenced in a PROVIDE command are only added to the symbol table if
// the PROVIDE command actually provides the symbol.
static void addScriptReferencedSymbols() {
// Some symbols (such as __ehdr_start) are defined lazily only when there
// are undefined symbols for them, so we add these to trigger that logic.
for (StringRef name : script->referencedSymbols) {
Symbol *sym = addUnusedUndefined(name);
sym->isUsedInRegularObj = true;
sym->referenced = true;
}

// Keeps track of references from which PROVIDE symbols have been added to the
// symbol table.
llvm::StringSet<> addedRefsFromProvideSym;
for (StringRef provideSym : script->provideMap.keys()) {
if (!addedRefsFromProvideSym.count(provideSym) &&
shouldAddProvideSym(provideSym))
addProvideSymReferences(provideSym, addedRefsFromProvideSym);
}
}

// Do actual linking. Note that when this function is called,
// all linker scripts have already been parsed.
void LinkerDriver::link(opt::InputArgList &args) {
Expand Down Expand Up @@ -2735,13 +2787,7 @@ void LinkerDriver::link(opt::InputArgList &args) {
config->hasDynSymTab =
!ctx.sharedFiles.empty() || config->isPic || config->exportDynamic;

// Some symbols (such as __ehdr_start) are defined lazily only when there
// are undefined symbols for them, so we add these to trigger that logic.
for (StringRef name : script->referencedSymbols) {
Symbol *sym = addUnusedUndefined(name);
sym->isUsedInRegularObj = true;
sym->referenced = true;
}
addScriptReferencedSymbols();

// Prevent LTO from removing any definition referenced by -u.
for (StringRef name : config->undefined)
Expand Down
10 changes: 10 additions & 0 deletions lld/ELF/LinkerScript.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,9 @@
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/MapVector.h"
#include "llvm/ADT/StringMap.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/StringSet.h"
#include "llvm/Support/Compiler.h"
#include <cstddef>
#include <cstdint>
Expand Down Expand Up @@ -379,6 +381,14 @@ class LinkerScript final {

// Sections that will be warned/errored by --orphan-handling.
SmallVector<const InputSectionBase *, 0> orphanSections;

// Stores the mapping: provide symbol -> symbols referred in the provide
// expression. For example, if the PROVIDE command is:
//
// PROVIDE(v = a + b + c);
//
// then provideMap should contain the mapping: 'v' -> ['a', 'b', 'c']
llvm::StringMap<llvm::StringSet<>> provideMap;
};

LLVM_LIBRARY_VISIBILITY extern std::unique_ptr<LinkerScript> script;
Expand Down
13 changes: 12 additions & 1 deletion lld/ELF/ScriptParser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
#include "llvm/Support/TimeProfiler.h"
#include <cassert>
#include <limits>
#include <optional>
#include <vector>

using namespace llvm;
Expand Down Expand Up @@ -138,6 +139,10 @@ class ScriptParser final : ScriptLexer {

// A set to detect an INCLUDE() cycle.
StringSet<> seen;

// If we are currently parsing a PROVIDE|PROVIDE_HIDDEN command,
// then this member is set to the provide symbol name.
std::optional<llvm::StringRef> activeProvideSym;
};
} // namespace

Expand Down Expand Up @@ -1055,6 +1060,9 @@ SymbolAssignment *ScriptParser::readProvideHidden(bool provide, bool hidden) {
;
return nullptr;
}
llvm::SaveAndRestore saveActiveProvideSym(activeProvideSym);
if (provide)
activeProvideSym = name;
SymbolAssignment *cmd = readSymbolAssignment(name);
cmd->provide = provide;
cmd->hidden = hidden;
Expand Down Expand Up @@ -1570,7 +1578,10 @@ Expr ScriptParser::readPrimary() {
tok = unquote(tok);
else if (!isValidSymbolName(tok))
setError("malformed number: " + tok);
script->referencedSymbols.push_back(tok);
if (activeProvideSym)
script->provideMap[activeProvideSym.value()].insert(tok);
else
script->referencedSymbols.push_back(tok);
return [=] { return script->getSymbolValue(tok, location); };
}

Expand Down
27 changes: 24 additions & 3 deletions lld/test/ELF/linkerscript/symbolreferenced.s
Original file line number Diff line number Diff line change
Expand Up @@ -23,9 +23,20 @@

# CHECK: 0000000000001000 a f1
# CHECK-NEXT: 0000000000001000 A f2
# CHECK-NEXT: 0000000000001000 a g1
# CHECK-NEXT: 0000000000001000 A g2
# CHECK-NEXT: 0000000000001000 A f3
# CHECK-NEXT: 0000000000001000 A f4
# CHECK-NEXT: 0000000000001000 A f5
# CHECK-NEXT: 0000000000001000 A f6
# CHECK-NEXT: 0000000000001000 A f7
# CHECK-NEXT: 0000000000001000 A newsym
# CHECK: 0000000000002000 A u
# CHECK-NEXT: 0000000000002000 A v
# CHECK-NEXT: 0000000000002000 A w

# CHECK-NOT: g1
# CHECK-NOT: g2
# CHECK-NOT: unused
# CHECK-NOT: another_unused

# RUN: not ld.lld -T chain2.t a.o 2>&1 | FileCheck %s --check-prefix=ERR --implicit-check-not=error:
# ERR-COUNT-3: error: chain2.t:1: symbol not found: undef
Expand All @@ -40,13 +51,23 @@ patatino:
movl newsym, %eax

#--- chain.t
PROVIDE(f2 = 0x1000);
PROVIDE(f7 = 0x1000);
PROVIDE(f5 = f6);
PROVIDE(f6 = f7);
PROVIDE(f4 = f5);
PROVIDE(f3 = f4);
PROVIDE(f2 = f3);
PROVIDE_HIDDEN(f1 = f2);
PROVIDE(newsym = f1);

u = v;
PROVIDE(w = 0x2000);
PROVIDE(v = w);

PROVIDE(g2 = 0x1000);
PROVIDE_HIDDEN(g1 = g2);
PROVIDE(unused = g1);
PROVIDE_HIDDEN(another_unused = g1);

#--- chain2.t
PROVIDE(f2 = undef);
Expand Down

0 comments on commit 1b58d0d

Please sign in to comment.