Skip to content

Commit

Permalink
Compute base addresses from program headers while reading /proc/self/…
Browse files Browse the repository at this point in the history
…maps.

We previously had logic to compute the base address from program
headers as part of symbolization. The problem is that we need a correct
base address earlier in order to adjust a PC into the image's address
space, as these addresses can appear in unsymbolized output.

There was previously an assumption that only the mapping that
was lowest in the address space did not need to be adjusted. This
assumption is not guaranteed (for example, the kernel may choose to
map an ET_DYN lowest) and in fact turned out to be wrong in binaries
linked with lld because the first mapping is read-only.

The solution is to move the program header reading logic into the
code that reads /proc/self/maps.
  • Loading branch information
pcc committed Nov 3, 2017
1 parent d72e3be commit 4f95b15
Showing 1 changed file with 58 additions and 52 deletions.
110 changes: 58 additions & 52 deletions src/symbolize.cc
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,8 @@

#if defined(HAVE_SYMBOLIZE)

#include <string.h>

#include <limits>

#include "symbolize.h"
Expand Down Expand Up @@ -325,41 +327,17 @@ FindSymbol(uint64_t pc, const int fd, char *out, int out_size,
// both regular and dynamic symbol tables if necessary. On success,
// write the symbol name to "out" and return true. Otherwise, return
// false.
static bool GetSymbolFromObjectFile(const int fd, uint64_t pc,
char *out, int out_size,
uint64_t map_base_address) {
static bool GetSymbolFromObjectFile(const int fd,
uint64_t pc,
char* out,
int out_size,
uint64_t base_address) {
// Read the ELF header.
ElfW(Ehdr) elf_header;
if (!ReadFromOffsetExact(fd, &elf_header, sizeof(elf_header), 0)) {
return false;
}

uint64_t symbol_offset = 0;
if (elf_header.e_type == ET_DYN) { // DSO needs offset adjustment.
ElfW(Phdr) phdr;
// We need to find the PT_LOAD segment corresponding to the read-execute
// file mapping in order to correctly perform the offset adjustment.
for (unsigned i = 0; i != elf_header.e_phnum; ++i) {
if (!ReadFromOffsetExact(fd, &phdr, sizeof(phdr),
elf_header.e_phoff + i * sizeof(phdr)))
return false;
if (phdr.p_type == PT_LOAD &&
(phdr.p_flags & (PF_R | PF_X)) == (PF_R | PF_X)) {
// Find the mapped address corresponding to virtual address zero. We do
// this by first adding p_offset. This gives us the mapped address of
// the start of the segment, or in other words the mapped address
// corresponding to the virtual address of the segment. (Note that this
// is distinct from the start address, as p_offset is not guaranteed to
// be page aligned.) We then subtract p_vaddr, which takes us to virtual
// address zero.
symbol_offset = map_base_address + phdr.p_offset - phdr.p_vaddr;
break;
}
}
if (symbol_offset == 0)
return false;
}

ElfW(Shdr) symtab, strtab;

// Consult a regular symbol table first.
Expand All @@ -369,8 +347,7 @@ static bool GetSymbolFromObjectFile(const int fd, uint64_t pc,
symtab.sh_link * sizeof(symtab))) {
return false;
}
if (FindSymbol(pc, fd, out, out_size, symbol_offset,
&strtab, &symtab)) {
if (FindSymbol(pc, fd, out, out_size, base_address, &strtab, &symtab)) {
return true; // Found the symbol in a regular symbol table.
}
}
Expand All @@ -382,8 +359,7 @@ static bool GetSymbolFromObjectFile(const int fd, uint64_t pc,
symtab.sh_link * sizeof(symtab))) {
return false;
}
if (FindSymbol(pc, fd, out, out_size, symbol_offset,
&strtab, &symtab)) {
if (FindSymbol(pc, fd, out, out_size, base_address, &strtab, &symtab)) {
return true; // Found the symbol in a dynamic symbol table.
}
}
Expand Down Expand Up @@ -532,14 +508,20 @@ OpenObjectFileContainingPcAndGetStartAddress(uint64_t pc,
int out_file_name_size) {
int object_fd;

// Open /proc/self/maps.
int maps_fd;
NO_INTR(maps_fd = open("/proc/self/maps", O_RDONLY));
FileDescriptor wrapped_maps_fd(maps_fd);
if (wrapped_maps_fd.get() < 0) {
return -1;
}

int mem_fd;
NO_INTR(mem_fd = open("/proc/self/mem", O_RDONLY));
FileDescriptor wrapped_mem_fd(mem_fd);
if (wrapped_mem_fd.get() < 0) {
return -1;
}

// Iterate over maps and look for the map containing the pc. Then
// look into the symbol tables inside.
char buf[1024]; // Big enough for line of sane /proc/self/maps
Expand Down Expand Up @@ -575,11 +557,6 @@ OpenObjectFileContainingPcAndGetStartAddress(uint64_t pc,
}
++cursor; // Skip ' '.

// Check start and end addresses.
if (!(start_address <= pc && pc < end_address)) {
continue; // We skip this map. PC isn't in this map.
}

// Read flags. Skip flags until we encounter a space or eol.
const char * const flags_start = cursor;
while (cursor < eol && *cursor != ' ') {
Expand All @@ -590,6 +567,48 @@ OpenObjectFileContainingPcAndGetStartAddress(uint64_t pc,
return -1; // Malformed line.
}

// Determine the base address by reading ELF headers in process memory.
ElfW(Ehdr) ehdr;
if (flags_start[0] == 'r' &&
ReadFromOffsetExact(mem_fd, &ehdr, sizeof(ElfW(Ehdr)), start_address) &&
memcmp(ehdr.e_ident, ELFMAG, SELFMAG) == 0) {
switch (ehdr.e_type) {
case ET_EXEC:
base_address = 0;
break;
case ET_DYN:
// Find the segment containing file offset 0. This will correspond
// to the ELF header that we just read. Normally this will have
// virtual address 0, but this is not guaranteed. We must subtract
// the virtual address from the address where the ELF header was
// mapped to get the base address.
//
// If we fail to find a segment for file offset 0, use the address
// of the ELF header as the base address.
base_address = start_address;
for (unsigned i = 0; i != ehdr.e_phnum; ++i) {
ElfW(Phdr) phdr;
if (ReadFromOffsetExact(
mem_fd, &phdr, sizeof(phdr),
start_address + ehdr.e_phoff + i * sizeof(phdr)) &&
phdr.p_type == PT_LOAD && phdr.p_offset == 0) {
base_address = start_address - phdr.p_vaddr;
break;
}
}
break;
default:
// ET_REL or ET_CORE. These aren't directly executable, so they don't
// affect the base address.
break;
}
}

// Check start and end addresses.
if (!(start_address <= pc && pc < end_address)) {
continue; // We skip this map. PC isn't in this map.
}

// Check flags. We are only interested in "r*x" maps.
if (flags_start[0] != 'r' || flags_start[2] != 'x') {
continue; // We skip this map.
Expand All @@ -604,19 +623,6 @@ OpenObjectFileContainingPcAndGetStartAddress(uint64_t pc,
}
++cursor; // Skip ' '.

// Don't subtract 'start_address' from the first entry:
// * If a binary is compiled w/o -pie, then the first entry in
// process maps is likely the binary itself (all dynamic libs
// are mapped higher in address space). For such a binary,
// instruction offset in binary coincides with the actual
// instruction address in virtual memory (as code section
// is mapped to a fixed memory range).
// * If a binary is compiled with -pie, all the modules are
// mapped high at address space (in particular, higher than
// shadow memory of the tool), so the module can't be the
// first entry.
base_address = ((num_maps == 1) ? 0U : start_address) - file_offset;

// Skip to file name. "cursor" now points to dev. We need to
// skip at least two spaces for dev and inode.
int num_spaces = 0;
Expand Down

0 comments on commit 4f95b15

Please sign in to comment.