diff --git a/scripts/cog.sh b/scripts/cog.sh index 4d9a352b..5f412c7d 100755 --- a/scripts/cog.sh +++ b/scripts/cog.sh @@ -11,4 +11,5 @@ cog -P $args \ README.md \ src/austin.h \ src/argparse.c \ + src/linux/py_proc.h \ snap/snapcraft.yaml diff --git a/src/hints.h b/src/hints.h index 0aa59137..b1fde25c 100644 --- a/src/hints.h +++ b/src/hints.h @@ -29,8 +29,6 @@ #define TRUE 1 #define FALSE 0 -#define NOVERSION 0 - #define success(x) (!(x)) #define fail(x) (x) #define sfree(x) {if ((x) != NULL) {free(x); x = NULL;}} diff --git a/src/linux/analyze_elf.h b/src/linux/analyze_elf.h new file mode 100644 index 00000000..0f74454f --- /dev/null +++ b/src/linux/analyze_elf.h @@ -0,0 +1,90 @@ +// ---------------------------------------------------------------------------- +static Elf64_Addr +_get_base_64(Elf64_Ehdr * ehdr, void * elf_map) +{ + for (int i = 0; i < ehdr->e_phnum; ++i) { + Elf64_Phdr * phdr = (Elf64_Phdr *) (elf_map + ehdr->e_phoff + i * ehdr->e_phentsize); + if (phdr->p_type == PT_LOAD) + return phdr->p_vaddr - phdr->p_vaddr % phdr->p_align; + } + return UINT64_MAX; +} /* _get_base_64 */ + +static int +_py_proc__analyze_elf64(py_proc_t * self, void * elf_map, void * elf_base) { + register int symbols = 0; + + Elf64_Ehdr * ehdr = elf_map; + + // Section header must be read from binary as it is not loaded into memory + Elf64_Xword sht_size = ehdr->e_shnum * ehdr->e_shentsize; + Elf64_Off elf_map_size = ehdr->e_shoff + sht_size; + Elf64_Shdr * p_shdr; + + Elf64_Shdr * p_shstrtab = elf_map + ELF_SH_OFF(ehdr, ehdr->e_shstrndx); + char * sh_name_base = elf_map + p_shstrtab->sh_offset; + Elf64_Shdr * p_dynsym = NULL; + Elf64_Addr base = _get_base_64(ehdr, elf_map); + + void * bss_base = NULL; + size_t bss_size = 0; + + if (base != UINT64_MAX) { + log_d("Base @ %p", base); + + for (Elf64_Off sh_off = ehdr->e_shoff; \ + sh_off < elf_map_size; \ + sh_off += ehdr->e_shentsize \ + ) { + p_shdr = (Elf64_Shdr *) (elf_map + sh_off); + + if ( + p_shdr->sh_type == SHT_DYNSYM && \ + strcmp(sh_name_base + p_shdr->sh_name, ".dynsym") == 0 + ) { + p_dynsym = p_shdr; + } + else if (strcmp(sh_name_base + p_shdr->sh_name, ".bss") == 0) { + bss_base = elf_base + (p_shdr->sh_addr - base); + bss_size = p_shdr->sh_size; + } + else if (strcmp(sh_name_base + p_shdr->sh_name, ".PyRuntime") == 0) { + self->map.runtime.base = elf_base + (p_shdr->sh_addr - base); + self->map.runtime.size = p_shdr->sh_size; + } + } + + if (p_dynsym != NULL) { + if (p_dynsym->sh_offset != 0) { + Elf64_Shdr * p_strtabsh = (Elf64_Shdr *) (elf_map + ELF_SH_OFF(ehdr, p_dynsym->sh_link)); + + // Search for dynamic symbols + for (Elf64_Off tab_off = p_dynsym->sh_offset; \ + tab_off < p_dynsym->sh_offset + p_dynsym->sh_size; \ + tab_off += p_dynsym->sh_entsize + ) { + Elf64_Sym * sym = (Elf64_Sym *) (elf_map + tab_off); + char * sym_name = (char *) (elf_map + p_strtabsh->sh_offset + sym->st_name); + void * value = elf_base + (sym->st_value - base); + if ((symbols += _py_proc__check_sym(self, sym_name, value)) >= DYNSYM_COUNT) { + // We have found all the symbols. No need to look further + break; + } + } + } + } + } + + if (symbols < DYNSYM_MANDATORY) { + log_e("ELF binary has not all the mandatory Python symbols"); + set_error(ESYM); + FAIL; + } + + // Communicate BSS data back to the caller + self->map.bss.base = bss_base; + self->map.bss.size = bss_size; + log_d("BSS @ %p (size %x, offset %x)", self->map.bss.base, self->map.bss.size, self->map.bss.base - elf_base); + + SUCCESS; +} /* _py_proc__analyze_elf64 */ diff --git a/src/linux/py_proc.h b/src/linux/py_proc.h index 7a5464d1..93f0b066 100644 --- a/src/linux/py_proc.h +++ b/src/linux/py_proc.h @@ -86,6 +86,12 @@ _file_size(char * file) { } +/*[[[cog +from pathlib import Path +analyze_elf = Path("src/linux/analyze_elf.h").read_text() +print(analyze_elf) +print(analyze_elf.replace("64", "32")) +]]]*/ // ---------------------------------------------------------------------------- static Elf64_Addr _get_base_64(Elf64_Ehdr * ehdr, void * elf_map) @@ -98,7 +104,6 @@ _get_base_64(Elf64_Ehdr * ehdr, void * elf_map) return UINT64_MAX; } /* _get_base_64 */ - static int _py_proc__analyze_elf64(py_proc_t * self, void * elf_map, void * elf_base) { register int symbols = 0; @@ -137,6 +142,10 @@ _py_proc__analyze_elf64(py_proc_t * self, void * elf_map, void * elf_base) { bss_base = elf_base + (p_shdr->sh_addr - base); bss_size = p_shdr->sh_size; } + else if (strcmp(sh_name_base + p_shdr->sh_name, ".PyRuntime") == 0) { + self->map.runtime.base = elf_base + (p_shdr->sh_addr - base); + self->map.runtime.size = p_shdr->sh_size; + } } if (p_dynsym != NULL) { @@ -174,7 +183,6 @@ _py_proc__analyze_elf64(py_proc_t * self, void * elf_map, void * elf_base) { SUCCESS; } /* _py_proc__analyze_elf64 */ - // ---------------------------------------------------------------------------- static Elf32_Addr _get_base_32(Elf32_Ehdr * ehdr, void * elf_map) @@ -187,7 +195,6 @@ _get_base_32(Elf32_Ehdr * ehdr, void * elf_map) return UINT32_MAX; } /* _get_base_32 */ - static int _py_proc__analyze_elf32(py_proc_t * self, void * elf_map, void * elf_base) { register int symbols = 0; @@ -226,6 +233,10 @@ _py_proc__analyze_elf32(py_proc_t * self, void * elf_map, void * elf_base) { bss_base = elf_base + (p_shdr->sh_addr - base); bss_size = p_shdr->sh_size; } + else if (strcmp(sh_name_base + p_shdr->sh_name, ".PyRuntime") == 0) { + self->map.runtime.base = elf_base + (p_shdr->sh_addr - base); + self->map.runtime.size = p_shdr->sh_size; + } } if (p_dynsym != NULL) { @@ -263,6 +274,7 @@ _py_proc__analyze_elf32(py_proc_t * self, void * elf_map, void * elf_base) { SUCCESS; } /* _py_proc__analyze_elf32 */ +//[[[end]]] // ---------------------------------------------------------------------------- static int @@ -391,8 +403,8 @@ _py_proc__parse_maps_file(py_proc_t * self) { while (getline(&line, &len, fp) != -1) { ssize_t lower, upper; - char pathname[1024]; - char perms[5]; + char pathname[1024] = {0}; + char perms[5] = {0}; int field_count = sscanf(line, ADDR_FMT "-" ADDR_FMT " %s %*x %*x:%*x %*x %s\n", &lower, &upper, // Map bounds @@ -408,12 +420,20 @@ _py_proc__parse_maps_file(py_proc_t * self) { size_t page_size = getpagesize(); map->bss_base = (void *) lower - page_size; map->bss_size = upper - lower + page_size; - log_d("Inferred BSS for %s: %lx-%lx", map->path, lower, upper); + log_d("BSS section inferred from VM maps for %s: %lx-%lx", map->path, lower, upper); } if (field_count <= 0) continue; + if (!isvalid(self->map.runtime.base) && strcmp(perms, "rw-p") == 0 && strcmp(map->path, pathname) == 0) { + // This is likely the PyRuntime section. + size_t page_size = getpagesize(); + self->map.runtime.base = (void *) lower - page_size; + self->map.runtime.size = upper - lower + page_size; + log_d("PyRuntime section inferred from VM maps for %s: %lx-%lx", map->path, lower, upper); + } + if (field_count == 0 || strstr(pathname, "[v") == NULL) { // Skip meaningless addresses like [vsyscall] which would give // ridiculous values. diff --git a/src/mac/py_proc.h b/src/mac/py_proc.h index 95429935..a3d1744d 100644 --- a/src/mac/py_proc.h +++ b/src/mac/py_proc.h @@ -156,7 +156,13 @@ _py_proc__analyze_macho64(py_proc_t * self, void * base, void * map) { self->map.bss.base = base + sec[j].addr; self->map.bss.size = sec[j].size; bin_attrs |= B_BSS; - break; + continue; + } + // This section was added in Python 3.11 + if (strcmp(sec[j].sectname, "PyRuntime") == 0) { + self->map.runtime.base = base + sec[j].addr; + self->map.runtime.size = sec[j].size; + continue; } } cmd_cnt++; @@ -257,7 +263,13 @@ _py_proc__analyze_macho32(py_proc_t * self, void * base, void * map) { self->map.bss.base = base + sec[j].addr; self->map.bss.size = sec[j].size; bin_attrs |= B_BSS; - break; + continue; + } + // This section was added in Python 3.11 + if (strcmp(sec[j].sectname, "PyRuntime") == 0) { + self->map.runtime.base = base + sec[j].addr; + self->map.runtime.size = sec[j].size; + continue; } } cmd_cnt++; diff --git a/src/py_proc.c b/src/py_proc.c index 04f5857d..72ff111b 100644 --- a/src/py_proc.c +++ b/src/py_proc.c @@ -175,7 +175,7 @@ _get_version_from_filename(char * filename, const char * needle, int * major, in #if defined PL_MACOS static int -_find_version_in_binary(char * path) { +_find_version_in_binary(char * path, int * version) { size_t binary_size = 0; struct stat s; @@ -203,26 +203,26 @@ _find_version_in_binary(char * path) { FAIL; } - for (char m = '3'; m >= '2'; --m) { - char needle[3] = {0x00, m, '.'}; - size_t current_size = binary_size; - char * current_pos = binary_map->addr; - int major, minor, patch; - major = 0; - while (TRUE) { - char * p = memmem(current_pos, current_size, needle, sizeof(needle)); - if (!isvalid(p)) break; - if (sscanf(++p, "%d.%d.%d", &major, &minor, &patch) == 3) break; - current_size -= p - current_pos + sizeof(needle); - current_pos = p + sizeof(needle); - } + char needle[3] = {0x00, '3', '.'}; + size_t current_size = binary_size; + char * current_pos = binary_map->addr; + int major, minor, patch; + major = 0; + while (TRUE) { + char * p = memmem(current_pos, current_size, needle, sizeof(needle)); + if (!isvalid(p)) break; + if (sscanf(++p, "%d.%d.%d", &major, &minor, &patch) == 3) break; + current_size -= p - current_pos + sizeof(needle); + current_pos = p + sizeof(needle); + } - if (major > 0) { - return PYVERSION(major, minor, patch); - } + if (major >= 3) { + *version = PYVERSION(major, minor, patch); + SUCCESS; } - return NOVERSION; + set_error(EPROC); + FAIL; } /* _find_version_in_binary */ #endif @@ -248,7 +248,7 @@ _py_proc__infer_python_version(py_proc_t * self) { if (fail(py_proc__memcpy(self, self->symbols[DYNSYM_HEX_VERSION], sizeof(py_version), &py_version))) { log_e("Failed to dereference remote Py_Version symbol"); - return NOVERSION; + FAIL; } major = (py_version>>24) & 0xFF; @@ -293,8 +293,8 @@ _py_proc__infer_python_version(py_proc_t * self) { if (major == 0) { // We still haven't found a Python version so we look at the binary // content for clues - int version = NOVERSION; - if (isvalid(self->bin_path) && (version = _find_version_in_binary(self->bin_path))) { + int version; + if (isvalid(self->bin_path) && (success(_find_version_in_binary(self->bin_path, &version)))) { log_d("Python version (from binary content): %d.%d.%d", major, minor, patch); self->py_v = get_version_descriptor(MAJOR(version), MINOR(version), PATCH(version)); SUCCESS; @@ -337,10 +337,12 @@ _py_proc__check_interp_state(py_proc_t * self, void * raddr) { FAIL; } - if (fail(py_proc__get_type(self, V_FIELD(void *, is, py_is, o_tstate_head), tstate_head))) { + void * tstate_head_addr = V_FIELD(void *, is, py_is, o_tstate_head); + + if (fail(py_proc__get_type(self, tstate_head_addr, tstate_head))) { log_e( "Cannot copy PyThreadState head at %p from PyInterpreterState instance", - V_FIELD(void *, is, py_is, o_tstate_head) + tstate_head_addr ); FAIL; } @@ -434,12 +436,9 @@ _py_proc__scan_bss(py_proc_t * self) { FAIL; } - int shift = 0; - size_t step = self->map.bss.size > 0x10000 ? 0x10000 : self->map.bss.size; + size_t step = self->map.bss.size > 0x10000 ? 0x10000 : self->map.bss.size; - V_DESC(self->py_v); - - while (!(shift && V_MAX(3, 10))) { + for (int shift = 0; shift < 1; shift++) { void * base = self->map.bss.base - (shift * step); if (fail(py_proc__memcpy(self, base, self->map.bss.size, bss))) { log_ie("Failed to copy BSS section"); @@ -475,7 +474,6 @@ _py_proc__scan_bss(py_proc_t * self) { #if defined PL_WIN break; #endif - shift++; } set_error(EPROC); @@ -486,34 +484,62 @@ _py_proc__scan_bss(py_proc_t * self) { // ---------------------------------------------------------------------------- static int _py_proc__deref_interp_head(py_proc_t * self) { - if (!isvalid(self) || !isvalid(self->symbols[DYNSYM_RUNTIME])) { + if ( + !isvalid(self) + || !(isvalid(self->symbols[DYNSYM_RUNTIME]) || isvalid(self->map.runtime.base)) + ) { set_error(EPROC); FAIL; } V_DESC(self->py_v); - void * interp_head_raddr; + void * interp_head_raddr = NULL; _PyRuntimeState py_runtime; - if (py_proc__get_type(self, self->symbols[DYNSYM_RUNTIME], py_runtime)) { - log_d( - "Cannot copy _PyRuntimeState structure from remote address %p", - self->symbols[DYNSYM_RUNTIME] - ); - FAIL; + void * runtime_addr = self->symbols[DYNSYM_RUNTIME]; + #if defined PL_LINUX + const size_t size = getpagesize(); + #else + const size_t size = 0; + #endif + + void * lower = isvalid(runtime_addr) ? runtime_addr : self->map.runtime.base; + void * upper = isvalid(runtime_addr) ? runtime_addr : lower + size; + + #ifdef DEBUG + if (isvalid(runtime_addr)) { + log_d("Using runtime state symbol @ %p", runtime_addr); } - - interp_head_raddr = V_FIELD(void *, py_runtime, py_runtime, o_interp_head); - if (V_MAX(3, 8)) { - self->gc_state_raddr = self->symbols[DYNSYM_RUNTIME] + py_v->py_runtime.o_gc; - log_d("GC runtime state @ %p", self->gc_state_raddr); + else { + log_d("Using runtime state section @ %p-%p", lower, upper); } + #endif + + for (void * current_addr = lower; current_addr <= upper; current_addr += sizeof(void *)) { + if (py_proc__get_type(self, current_addr, py_runtime)) { + log_d( + "Cannot copy runtime state structure from remote address %p", + current_addr + ); + continue; + } + + interp_head_raddr = V_FIELD(void *, py_runtime, py_runtime, o_interp_head); + if (V_MAX(3, 8)) { + self->gc_state_raddr = current_addr + py_v->py_runtime.o_gc; + log_d("GC runtime state @ %p", self->gc_state_raddr); + } - log_d("Found possible interpreter state from runtime symbol at %p", interp_head_raddr); + if (fail(_py_proc__check_interp_state(self, interp_head_raddr))) { + log_d("Interpreter state check failed while dereferencing runtime state"); + interp_head_raddr = NULL; + continue; + } + } - if (fail(_py_proc__check_interp_state(self, interp_head_raddr))) { - log_d("Interpreter state check failed while dereferencing symbol"); + if (!isvalid(interp_head_raddr)) { + log_d("Cannot dereference PyInterpreterState head from runtime state"); FAIL; } @@ -556,8 +582,8 @@ _py_proc__find_interpreter_state(py_proc_t * self) { if (fail(_py_proc__infer_python_version(self))) FAIL; - if (self->sym_loaded) { - // Try to resolve the symbols if we have them + if (self->sym_loaded || isvalid(self->map.runtime.base)) { + // Try to resolve the symbols or the runtime section, if we have them self->is_raddr = NULL; diff --git a/src/py_proc.h b/src/py_proc.h index 28a82c35..8540d58d 100644 --- a/src/py_proc.h +++ b/src/py_proc.h @@ -51,6 +51,7 @@ typedef struct { proc_vm_map_block_t exe; proc_vm_map_block_t dynsym; proc_vm_map_block_t rodata; + proc_vm_map_block_t runtime; // Added in Python 3.11 } proc_vm_map_t; typedef struct _proc_extra_info proc_extra_info; // Forward declaration. diff --git a/src/win/py_proc.h b/src/win/py_proc.h index c99fc4d6..70cf5596 100644 --- a/src/win/py_proc.h +++ b/src/win/py_proc.h @@ -98,7 +98,10 @@ _py_proc__analyze_pe(py_proc_t * self, char * path, void * base) { if (strcmp(".data", (const char *) s_hdr[i].Name) == 0) { self->map.bss.base = base + s_hdr[i].VirtualAddress; self->map.bss.size = s_hdr[i].Misc.VirtualSize; - break; + } + else if (strcmp("PyRuntime", (const char *) s_hdr[i].Name) == 0) { + self->map.runtime.base = base + s_hdr[i].VirtualAddress; + self->map.runtime.size = s_hdr[i].Misc.VirtualSize; } } diff --git a/test/targets/target34.py b/test/targets/target34.py index c12493fd..078d8aa6 100755 --- a/test/targets/target34.py +++ b/test/targets/target34.py @@ -27,7 +27,7 @@ def keep_cpu_busy(): a = [] - for i in range(6000000): + for i in range(10_000_000): a.append(i) if i % 1000000 == 0: print("Unwanted output " + str(i)) diff --git a/test/utils.py b/test/utils.py index 757623ec..2ed89dba 100644 --- a/test/utils.py +++ b/test/utils.py @@ -140,9 +140,13 @@ def collect_logs(variant: str, pid: int) -> List[str]: with Path("/var/log/syslog").open() as logfile: needles = (f"{variant}[{pid}]", f"systemd-coredump[{pid}]") return [ - line.strip().replace("#012", "\n") - for line in logfile.readlines() - if any(needle in line for needle in needles) + f" logs for {variant}[{pid}] ".center(80, "="), + *( + line.strip().replace("#012", "\n") + for line in logfile.readlines() + if any(needle in line for needle in needles) + ), + f" end of logs for {variant}[{pid}] ".center(80, "="), ] case _: return [] @@ -183,15 +187,25 @@ def run( raise retcode = process.poll() if check and retcode: - raise CalledProcessError( + exc = CalledProcessError( retcode, process.args, output=stdout, stderr=stderr ) + exc.pid = process.pid + raise exc result = CompletedProcess(process.args, retcode, stdout, stderr) result.pid = process.pid return result +def print_logs(logs: List[str]) -> None: + if logs: + for log in logs: + print(log) + else: + print("<< no logs available >>") + + class Variant(str): ALL: list["Variant"] = [] @@ -220,11 +234,15 @@ def __call__( mojo_args = ["-b"] if mojo else [] - result = run( - [str(self.path)] + mojo_args + list(args), - capture_output=True, - timeout=timeout, - ) + try: + result = run( + [str(self.path)] + mojo_args + list(args), + capture_output=True, + timeout=timeout, + ) + except Exception as exc: + if pid := getattr(exc, "pid", None) is not None: + print_logs(collect_logs(self.name, pid)) if result.returncode in (-11, 139): # SIGSEGV print(bt(self.path)) @@ -242,13 +260,7 @@ def __call__( result.logs = logs if result.returncode != int(expect_fail): - if logs: - print(f" logs for {self.name}[{result.pid}] ".center(80, "=")) - for log in logs: - print(log) - print(f" end of logs for {self.name}[{result.pid}] ".center(80, "=")) - else: - print(f"<< no logs for {self.name}[{result.pid}] >>") + print_logs(logs) return result