From 0c0a1825b8017c1861b6b6eef331b75d4d676f5f Mon Sep 17 00:00:00 2001 From: amatter Date: Tue, 11 Jul 2017 00:00:54 +0300 Subject: [PATCH 01/18] update: Owned addresses types conversion --- cle/address_translator.py | 95 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 95 insertions(+) create mode 100644 cle/address_translator.py diff --git a/cle/address_translator.py b/cle/address_translator.py new file mode 100644 index 00000000..d6f5a967 --- /dev/null +++ b/cle/address_translator.py @@ -0,0 +1,95 @@ +class AddressTranslator(object): + """ + Mediates address translations between typed addresses such as RAW, RVA, LVA, MVA and VA + including address owner and its state (linked or mapped) + + Semantics: + owner - object associated with the address + (any object class based on `cle.Backend`) + owner mapping state - sparse object can be either mapped or not + (actual object's image base VA to be considered valid) + RAW - offset (index) inside a file stream + VA - address inside process flat virtual memory space + RVA - address relative to the object's segment base + (segment base normalized virtual address) + LVA - linked VA (linker) + MVA - mapped VA (loader) + """ + + def __init__(self, rva, owner): + """ + :param rva: virtual address relative to owner's object image base + :type rva: int + :param owner: The object owner address relates to + :type owner: cle.Backend + """ + self._rva, self._owner = rva, owner + + @classmethod + def from_lva(cls, lva, owner): + """ + Loads address translator with LVA + """ + return cls(lva - owner.linked_base, owner) + + @classmethod + def from_mva(cls, mva, owner): + """ + Loads address translator with MVA + """ + return cls(mva - owner.mapped_base, owner) + + @classmethod + def from_rva(cls, rva, owner): + """ + Loads address translator with RVA + """ + return cls(rva, owner) + + @classmethod + def from_raw(cls, raw, owner): + """ + Loads address translator with RAW address + """ + return cls(owner.offset_to_addr(raw) - (owner.mapped_base if owner._is_mapped else owner.linked_base), owner) + + from_linked_va = from_lva + from_va = from_mapped_va = from_mva + from_relative_va = from_rva + + def to_lva(self): + """ + VA -> LVA + :rtype: int + """ + return self._rva + self._owner.linked_base + + def to_mva(self): + """ + RVA -> MVA + :rtype: int + """ + return self._rva + self._owner.mapped_base + + def to_rva(self): + """ + RVA -> RVA + :rtype: int + """ + return self._rva + + def to_raw(self): + """ + RVA -> RAW + :rtype: int + """ + return self._owner.addr_to_offset( + self._rva + (self._owner.mapped_base if self._owner._is_mapped else self._owner.linked_base) + ) + + to_linked_va = to_lva + to_va = to_mapped_va = to_mva + to_relative_va = to_rva + + +AT = AddressTranslator From 4d189437bdbe71b13e1b0142f169d3378a26a731 Mon Sep 17 00:00:00 2001 From: amatter Date: Tue, 11 Jul 2017 00:02:09 +0300 Subject: [PATCH 02/18] fix: minor Mach-O binding issue (logging parameters order confusion) --- cle/backends/macho/binding.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cle/backends/macho/binding.py b/cle/backends/macho/binding.py index 92898180..48b7dfd0 100644 --- a/cle/backends/macho/binding.py +++ b/cle/backends/macho/binding.py @@ -366,7 +366,7 @@ def default_binding_handler(state, binary): value = symbol.addr + state.addend if state.binding_type == 1: # POINTER - l.info("Updating address %#x with symobl %r @ %#x", state.sym_name, location, value) + l.info("Updating address %#x with symobl %r @ %#x", location, state.sym_name, value) binary.memory.write_bytes( location, struct.pack(binary.struct_byteorder + ("Q" if binary.arch.bits == 64 else "I"), value)) From 676a33959673f282df8a633edfeede2bc2a3bf16 Mon Sep 17 00:00:00 2001 From: amatter Date: Tue, 11 Jul 2017 00:25:39 +0300 Subject: [PATCH 03/18] fix: rebasing and valid virtual image bases for Region/Regions --- cle/backends/__init__.py | 113 +++++++++++++++++++++++---------------- 1 file changed, 66 insertions(+), 47 deletions(-) diff --git a/cle/backends/__init__.py b/cle/backends/__init__.py index 8a2ebb9b..2fb09eae 100644 --- a/cle/backends/__init__.py +++ b/cle/backends/__init__.py @@ -1,7 +1,9 @@ import os import subprocess + import archinfo +from ..address_translator import AT from ..memory import Clemory from ..errors import CLECompatibilityError, CLEError @@ -13,6 +15,7 @@ import logging l = logging.getLogger('cle.backends') + class Region(object): """ A region of memory that is mapped in the object's file. @@ -33,17 +36,27 @@ def __init__(self, offset, vaddr, filesize, memsize): self.filesize = filesize self.offset = offset + def _rebase(self, delta): + """ + Does region rebasing to other base address. + Intended for usage by loader's add_object to reflect the rebasing. + + :param delta: Delta offset between an old and a new image bases + :type delta: int + """ + self.vaddr += delta + def contains_addr(self, addr): """ Does this region contain this virtual address? """ - return (addr >= self.vaddr) and (addr < self.vaddr + self.memsize) + return self.vaddr <= addr < self.vaddr + self.memsize def contains_offset(self, offset): """ Does this region contain this offset into the file? """ - return (offset >= self.offset) and (offset < self.offset + self.filesize) + return self.offset <= offset < self.offset + self.filesize def addr_to_offset(self, addr): """ @@ -100,6 +113,7 @@ class Segment(Region): """ pass + class Section(Region): """ Simple representation of a loaded section. @@ -145,14 +159,8 @@ def __repr__(self): self.memsize ) -class Symbol(object): - # enum for symbol types - TYPE_OTHER = 0 - TYPE_NONE = 1 - TYPE_FUNCTION = 2 - TYPE_OBJECT = 3 - TYPE_SECTION = 4 +class Symbol(object): """ Representation of a symbol from a binary file. Smart enough to rebase itself. @@ -170,6 +178,14 @@ class Symbol(object): :vartype resolvedby: None or cle.backends.Symbol :ivar str resolvewith: The name of the library we must use to resolve this symbol, or None if none is required. """ + + # enum for symbol types + TYPE_OTHER = 0 + TYPE_NONE = 1 + TYPE_FUNCTION = 2 + TYPE_OBJECT = 3 + TYPE_SECTION = 4 + def __init__(self, owner, name, addr, size, sym_type): """ Not documenting this since if you try calling it, you're wrong. @@ -255,12 +271,12 @@ class Regions(object): """ def __init__(self, lst=None): - self._list = lst if lst is not None else [ ] + self._list = lst if lst is not None else [] if self._list: self._sorted_list = self._make_sorted(self._list) else: - self._sorted_list = [ ] + self._sorted_list = [] @property def raw_list(self): @@ -302,6 +318,17 @@ def __len__(self): def __repr__(self): return "" % repr(self._list) + def _rebase(self, delta): + """ + Does regions rebasing to other base address. + Modifies state of each internal object, so the list reference doesn't need to be updated, + the same is also valid for sorted list as operation preserves the ordering. + + :param delta: Delta offset between an old and a new image bases + :type delta: int + """ + map(lambda x: x._rebase(delta), self._list) + def append(self, region): """ Append a new Region instance into the list. @@ -317,9 +344,10 @@ def find_region_containing(self, addr): """ Find the region that contains a specific address. Returns None if none of the regions covers the address. - :param int addr: The address. - :return: The region that covers the specific address, or None if no such region is found. - :rtype: Region or None + :param addr: The address. + :type addr: int + :return: The region that covers the specific address, or None if no such region is found. + :rtype: Region or None """ pos = self.bisect_find(self._sorted_list, addr, @@ -528,6 +556,15 @@ def sections(self, v): else: raise ValueError('Unsupported type %s set as sections.' % type(v)) + def rebase(self): + """ + Rebase backend's regions to the new base where they were mapped by the loader + """ + if self.sections: + self.sections._rebase(self.image_base_delta) + if self.segments: + self.segments._rebase(self.image_base_delta) + def contains_addr(self, addr): """ Is `addr` in one of the binary's segments/sections we have loaded? (i.e. is it mapped into memory ?) @@ -542,21 +579,19 @@ def find_segment_containing(self, addr): """ Returns the segment that contains `addr`, or ``None``. """ - - return self.segments.find_region_containing(addr - self.rebase_addr) + return self.segments.find_region_containing(addr) def find_section_containing(self, addr): """ Returns the section that contains `addr` or ``None``. """ - - return self.sections.find_region_containing(addr - self.rebase_addr) + return self.sections.find_region_containing(addr) def addr_to_offset(self, addr): loadable = self.find_loadable_containing(addr) if loadable is not None: - return loadable.addr_to_offset(addr - self.rebase_addr) + return loadable.addr_to_offset(addr) else: return None @@ -564,11 +599,11 @@ def offset_to_addr(self, offset): if self.segments: for s in self.segments: if s.contains_offset(offset): - return s.offset_to_addr(offset) + self.rebase_addr + return s.offset_to_addr(offset) else: for s in self.sections: if s.contains_offset(offset): - return s.offset_to_addr(offset) + self.rebase_addr + return s.offset_to_addr(offset) return None @@ -577,35 +612,20 @@ def get_min_addr(self): This returns the lowest virtual address contained in any loaded segment of the binary. """ - out = None - for segment in self.segments: - if out is None or segment.min_addr < out: - out = segment.min_addr - - if out is None: - for section in self.sections: - if out is None or section.min_addr < out: - out = section.min_addr - - if out is None: - return self.rebase_addr - else: - return out + self.rebase_addr + out = self.mapped_base + if self.segments or self.sections: + out = min(map(lambda x: x.min_addr, self.segments or self.sections)) + return out def get_max_addr(self): """ This returns the highest virtual address contained in any loaded segment of the binary. """ - out = self.segments.max_addr - - if out is None: - out = self.sections.max_addr - - if out is None: - return self.rebase_addr - else: - return out + self.rebase_addr + out = self.mapped_base + if self.segments or self.sections: + out = max(map(lambda x: x.max_addr, self.segments or self.sections)) + return out def set_got_entry(self, symbol_name, newaddr): """ @@ -614,11 +634,10 @@ def set_got_entry(self, symbol_name, newaddr): """ if symbol_name not in self.imports: - l.warning("Could not override the address of symbol %s: symbol entry not " - "found in GOT", symbol_name) + l.warning("Could not override the address of symbol %s: symbol entry not found in GOT", symbol_name) return - self.memory.write_addr_at(self.imports[symbol_name].addr, newaddr) + self.memory.write_addr_at(AT.from_lva(self.imports[symbol_name].addr, self).to_rva(), newaddr) def get_initializers(self): # pylint: disable=no-self-use """ From 9e7b91f669fb8d99596df743d914dd659e6c16da Mon Sep 17 00:00:00 2001 From: amatter Date: Tue, 11 Jul 2017 00:32:08 +0300 Subject: [PATCH 04/18] fix: switch from rebase_addr/requested_base to mapped_base/linked_base --- cle/backends/__init__.py | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/cle/backends/__init__.py b/cle/backends/__init__.py index 2fb09eae..d0ec0491 100644 --- a/cle/backends/__init__.py +++ b/cle/backends/__init__.py @@ -424,10 +424,10 @@ class Backend(object): :vartype arch: archinfo.arch.Arch :ivar str os: The operating system this binary is meant to run under :ivar compatible_with: Another Backend object this object must be compatibile with, or None - :ivar int rebase_addr: The base address of this object in virtual memory + :ivar int mapped_base: The base address of this object in virtual memory :ivar deps: A list of names of shared libraries this binary depends on :ivar linking: 'dynamic' or 'static' - :ivar requested_base: The base address this object requests to be loaded at, or None + :ivar linked_base: The base address this object requests to be loaded at :ivar bool pic: Whether this object is position-independent :ivar bool execstack: Whether this executable has an executable stack :ivar str provides: The name of the shared library dependancy that this object resolves @@ -478,13 +478,12 @@ def __init__(self, self.compatible_with = compatible_with self._symbol_cache = {} - self.rebase_addr_symbolic = 0 + self.mapped_base_symbolic = 0 # These are set by cle, and should not be overriden manually - self.rebase_addr = 0 # not to be set manually - used by CLE + self.mapped_base = self.linked_base = 0 # not to be set manually - used by CLE self.deps = [] # Needed shared objects (libraries dependencies) self.linking = None # Dynamic or static linking - self.requested_base = None self.pic = False self.execstack = False @@ -506,7 +505,6 @@ def __init__(self, else: raise CLEError("Bad parameter: custom_arch=%s" % custom_arch) - def close(self): if self.binary_stream is not None: self.binary_stream.close() @@ -514,9 +512,11 @@ def close(self): def __repr__(self): if self.binary is not None: - return '<%s Object %s, maps [%#x:%#x]>' % (self.__class__.__name__, os.path.basename(self.binary), self.get_min_addr(), self.get_max_addr()) + return '<%s Object %s, maps [%#x:%#x]>' % \ + (self.__class__.__name__, os.path.basename(self.binary), self.get_min_addr(), self.get_max_addr()) else: - return '<%s Object from stream, maps [%#x:%#x]>' % (self.__class__.__name__, self.get_min_addr(), self.get_max_addr()) + return '<%s Object from stream, maps [%#x:%#x]>' % \ + (self.__class__.__name__, self.get_min_addr(), self.get_max_addr()) def set_arch(self, arch): if self.compatible_with is not None and self.compatible_with.arch != arch: @@ -524,6 +524,10 @@ def set_arch(self, arch): self.arch = arch self.memory = Clemory(arch) # Private virtual address space, without relocations + @property + def image_base_delta(self): + return - self.linked_base + self.mapped_base + @property def entry(self): if self._custom_entry_point is not None: From 40abf96b96428e5cc1d6be6867b04a3c69c5725f Mon Sep 17 00:00:00 2001 From: amatter Date: Tue, 11 Jul 2017 00:39:34 +0300 Subject: [PATCH 05/18] fix: add_object is responsible for rebasing --- cle/backends/__init__.py | 3 +++ cle/loader.py | 18 +++++++++--------- 2 files changed, 12 insertions(+), 9 deletions(-) diff --git a/cle/backends/__init__.py b/cle/backends/__init__.py index d0ec0491..d6103c67 100644 --- a/cle/backends/__init__.py +++ b/cle/backends/__init__.py @@ -494,6 +494,9 @@ def __init__(self, self.memory = None + # should be set inside `cle.Loader.add_object` + self._is_mapped = False + if custom_arch is None: self.arch = None elif isinstance(custom_arch, str): diff --git a/cle/loader.py b/cle/loader.py index 3bf9448e..2fa7dda7 100644 --- a/cle/loader.py +++ b/cle/loader.py @@ -310,16 +310,14 @@ def add_object(self, obj, base_addr=None): if self._ignore_import_version_numbers: self._satisfied_deps.add(obj.provides.strip('.0123456789')) - obj.rebase_addr = 0 - obj_offset = obj.get_min_addr() - obj_size = obj.get_max_addr() - obj_offset + obj_size = obj.get_max_addr() - obj.get_min_addr() - if base_addr is not None and self._is_range_free(base_addr + obj_offset, obj_size): + if base_addr is not None and self._is_range_free(base_addr, obj_size): pass - elif obj._custom_base_addr is not None and self._is_range_free(obj._custom_base_addr + obj_offset, obj_size): + elif obj._custom_base_addr is not None and self._is_range_free(obj._custom_base_addr, obj_size): base_addr = obj._custom_base_addr - elif obj.requested_base is not None and self._is_range_free(obj.requested_base + obj_offset, obj_size): - base_addr = obj.requested_base + elif obj.linked_base and self._is_range_free(obj.linked_base, obj_size): + base_addr = obj.linked_base elif not obj.is_main_bin: base_addr = self._get_safe_rebase_addr() elif self.main_bin.pic: @@ -335,9 +333,11 @@ def add_object(self, obj, base_addr=None): l.info("Rebasing %s at %#x", obj.binary, base_addr) self.memory.add_backer(base_addr, obj.memory) - obj.rebase_addr = base_addr - def _is_range_free(self, addr, size): + obj.mapped_base = base_addr + obj.rebase() + obj._is_mapped = True + for o in self.all_objects: if (addr >= o.get_min_addr() and addr < o.get_max_addr()) or \ (o.get_min_addr() >= addr and o.get_min_addr() < addr + size): From c7ebf31da9bd9323177a760b9db44a9f17f6715e Mon Sep 17 00:00:00 2001 From: amatter Date: Tue, 11 Jul 2017 00:45:16 +0300 Subject: [PATCH 06/18] fix: mapped_base initial state for ELF/PE/Mach-O --- cle/backends/elf.py | 7 +++++++ cle/backends/macho/__init__.py | 3 +-- cle/backends/pe.py | 2 +- 3 files changed, 9 insertions(+), 3 deletions(-) diff --git a/cle/backends/elf.py b/cle/backends/elf.py index 0eeb7e96..ee562803 100644 --- a/cle/backends/elf.py +++ b/cle/backends/elf.py @@ -211,6 +211,13 @@ def __init__(self, binary, **kwargs): self.__parsed_reloc_tables = set() + # The linked image base should be evaluated before registering any segment or section due to + # the fact that elffile, used by those methods, is working only with un-based virtual addresses, but Clemories + # themselves are organized as a tree where each node backer internally uses relative addressing + self.mapped_base = self.linked_base = min( + map(lambda x: x['p_vaddr'], + filter(lambda x: x.header.p_type == 'PT_LOAD', self.reader.iter_segments())) or [0] + ) self.__register_segments() self.__register_sections() diff --git a/cle/backends/macho/__init__.py b/cle/backends/macho/__init__.py index eba23c31..393dc694 100644 --- a/cle/backends/macho/__init__.py +++ b/cle/backends/macho/__init__.py @@ -31,7 +31,7 @@ class MachO(Backend): * Sections are always part of a segment, self.sections will thus be empty * Symbols cannot be categorized like in ELF * Symbol resolution must be handled by the binary - * Rebasing cannot be done statically (i.e. self.rebase_addr is ignored for now) + * Rebasing cannot be done statically (i.e. self.mapped_base is ignored for now) * ... """ MH_MAGIC_64 = 0xfeedfacf @@ -44,7 +44,6 @@ def __init__(self, binary, **kwargs): l.warning('The Mach-O backend is not well-supported. Good luck!') super(MachO, self).__init__(binary, **kwargs) - self.rebase_addr = 0 # required for some angr stuffs even though not supported self.struct_byteorder = None # holds byteorder for struct.unpack(...) self.cputype = None diff --git a/cle/backends/pe.py b/cle/backends/pe.py index f0d6635f..8df638e9 100644 --- a/cle/backends/pe.py +++ b/cle/backends/pe.py @@ -121,7 +121,7 @@ def __init__(self, *args, **kwargs): if self.arch is None: self.set_arch(archinfo.arch_from_id(pefile.MACHINE_TYPE[self._pe.FILE_HEADER.Machine])) - self.requested_base = self._pe.OPTIONAL_HEADER.ImageBase + self.mapped_base = self.linked_base = self._pe.OPTIONAL_HEADER.ImageBase self._entry = self._pe.OPTIONAL_HEADER.AddressOfEntryPoint if hasattr(self._pe, 'DIRECTORY_ENTRY_IMPORT'): From 8b2c7752b87419eb697f9176932b36d9ccad8bf4 Mon Sep 17 00:00:00 2001 From: amatter Date: Tue, 11 Jul 2017 00:55:07 +0300 Subject: [PATCH 07/18] fix: Address manipulations fixes --- cle/backends/__init__.py | 6 +-- cle/backends/cgc.py | 3 +- cle/backends/elf.py | 55 +++++++++++++------------ cle/backends/idabin.py | 4 +- cle/backends/macho/binding.py | 13 ++++-- cle/backends/metaelf.py | 23 +++++++---- cle/backends/pe.py | 32 +++++++------- cle/backends/relocations/__init__.py | 7 ++-- cle/backends/relocations/generic.py | 17 ++++---- cle/backends/relocations/generic_elf.py | 20 ++++++--- cle/backends/relocations/ppc64.py | 19 +++++---- cle/loader.py | 18 ++++---- cle/tls/elf_tls.py | 10 ++--- cle/tls/pe_tls.py | 19 +++++---- 14 files changed, 141 insertions(+), 105 deletions(-) diff --git a/cle/backends/__init__.py b/cle/backends/__init__.py index d6103c67..9d3b1547 100644 --- a/cle/backends/__init__.py +++ b/cle/backends/__init__.py @@ -217,7 +217,7 @@ def rebased_addr(self): """ The address of this symbol in the global memory space """ - return self.addr + self.owner_obj.rebase_addr + return AT.from_lva(self.addr, self.owner_obj).to_mva() @property def is_function(self): @@ -534,8 +534,8 @@ def image_base_delta(self): @property def entry(self): if self._custom_entry_point is not None: - return self._custom_entry_point + self.rebase_addr - return self._entry + self.rebase_addr + return AT.from_lva(self._custom_entry_point, self).to_mva() + return AT.from_lva(self._entry, self).to_mva() @property def segments(self): diff --git a/cle/backends/cgc.py b/cle/backends/cgc.py index a23646f9..ce03315f 100644 --- a/cle/backends/cgc.py +++ b/cle/backends/cgc.py @@ -1,6 +1,7 @@ from .elf import ELF from ..patched_stream import PatchedStream from . import register_backend +from ..address_translator import AT ELF_HEADER = "7f45 4c46 0101 0100 0000 0000 0000 0000".replace(" ","").decode('hex') CGC_HEADER = "7f43 4743 0101 0143 014d 6572 696e 6f00".replace(" ","").decode('hex') @@ -21,7 +22,7 @@ def __init__(self, binary, *args, **kwargs): stream = PatchedStream(open(binary, 'rb'), [(0, ELF_HEADER)]) kwargs['filename'] = filename super(CGC, self).__init__(stream, *args, **kwargs) - self.memory.write_bytes(self.get_min_addr(), CGC_HEADER) # repair CGC header + self.memory.write_bytes(AT.from_lva(self.get_min_addr(), self).to_rva(), CGC_HEADER) # repair CGC header self.os = 'cgc' self.execstack = True # the stack is always executable in CGC diff --git a/cle/backends/elf.py b/cle/backends/elf.py index ee562803..10dfe456 100644 --- a/cle/backends/elf.py +++ b/cle/backends/elf.py @@ -12,6 +12,7 @@ from ..patched_stream import PatchedStream from ..errors import CLEError, CLEInvalidBinaryError, CLECompatibilityError from ..utils import ALIGN_DOWN, ALIGN_UP, get_mmaped_data +from ..address_translator import AT import logging l = logging.getLogger('cle.elf') @@ -228,7 +229,7 @@ def __init__(self, binary, **kwargs): self._populate_demangled_names() if patch_undo is not None: - self.memory.write_bytes(self.get_min_addr() + patch_undo[0], patch_undo[1]) + self.memory.write_bytes(AT.from_lva(self.get_min_addr() + patch_undo[0], self).to_rva(), patch_undo[1]) def __getstate__(self): if self.binary is None: @@ -263,20 +264,22 @@ def __setstate__(self, data): self.reader = elffile.ELFFile(self.binary_stream) if self._dynamic and 'DT_STRTAB' in self._dynamic: fakestrtabheader = { - 'sh_offset': self._dynamic['DT_STRTAB'] + 'sh_offset': AT.from_lva(self._dynamic['DT_STRTAB'], self).to_rva() } self.strtab = elffile.StringTableSection(fakestrtabheader, 'strtab_cle', self.memory) if 'DT_SYMTAB' in self._dynamic and 'DT_SYMENT' in self._dynamic: fakesymtabheader = { - 'sh_offset': self._dynamic['DT_SYMTAB'], + 'sh_offset': AT.from_lva(self._dynamic['DT_SYMTAB'], self).to_rva(), 'sh_entsize': self._dynamic['DT_SYMENT'], 'sh_size': 0 } # bogus size: no iteration allowed self.dynsym = elffile.SymbolTableSection(fakesymtabheader, 'symtab_cle', self.memory, self.reader, self.strtab) if 'DT_GNU_HASH' in self._dynamic: - self.hashtable = GNUHashTable(self.dynsym, self.memory, self._dynamic['DT_GNU_HASH'], self.arch) + self.hashtable = GNUHashTable(self.dynsym, self.memory, + AT.from_lva(self._dynamic['DT_GNU_HASH'], self).to_rva(), self.arch) elif 'DT_HASH' in self._dynamic: - self.hashtable = ELFHashTable(self.dynsym, self.memory, self._dynamic['DT_HASH'], self.arch) + self.hashtable = ELFHashTable(self.dynsym, self.memory, + AT.from_lva(self._dynamic['DT_HASH'], self).to_rva(), self.arch) def _cache_symbol_name(self, symbol): name = symbol.name @@ -350,21 +353,21 @@ def get_symbol(self, symid, symbol_table=None): # pylint: disable=arguments-diff def _extract_init_fini(self): # Extract the initializers and finalizers if 'DT_PREINIT_ARRAY' in self._dynamic and 'DT_PREINIT_ARRAYSZ' in self._dynamic: - arr_start = self._dynamic['DT_PREINIT_ARRAY'] + arr_start = AT.from_lva(self._dynamic['DT_PREINIT_ARRAY'], self).to_rva() arr_end = arr_start + self._dynamic['DT_PREINIT_ARRAYSZ'] arr_entsize = self.arch.bytes self._preinit_arr = map(self.memory.read_addr_at, range(arr_start, arr_end, arr_entsize)) if 'DT_INIT' in self._dynamic: - self._init_func = self._dynamic['DT_INIT'] + self._init_func = AT.from_lva(self._dynamic['DT_INIT'], self).to_rva() if 'DT_INIT_ARRAY' in self._dynamic and 'DT_INIT_ARRAYSZ' in self._dynamic: - arr_start = self._dynamic['DT_INIT_ARRAY'] + arr_start = AT.from_lva(self._dynamic['DT_INIT_ARRAY'], self).to_rva() arr_end = arr_start + self._dynamic['DT_INIT_ARRAYSZ'] arr_entsize = self.arch.bytes self._init_arr = map(self.memory.read_addr_at, range(arr_start, arr_end, arr_entsize)) if 'DT_FINI' in self._dynamic: - self._fini_func = self._dynamic['DT_FINI'] + self._fini_func = AT.from_lva(self._dynamic['DT_FINI'], self).to_rva() if 'DT_FINI_ARRAY' in self._dynamic and 'DT_FINI_ARRAYSZ' in self._dynamic: - arr_start = self._dynamic['DT_FINI_ARRAY'] + arr_start = AT.from_lva(self._dynamic['DT_FINI_ARRAY'], self).to_rva() arr_end = arr_start + self._dynamic['DT_FINI_ARRAYSZ'] arr_entsize = self.arch.bytes self._fini_arr = map(self.memory.read_addr_at, range(arr_start, arr_end, arr_entsize)) @@ -381,7 +384,7 @@ def get_initializers(self): # The init func and the init array in the dynamic section are only run by the dynamic loader in shared objects. # In the main binary they are run by libc_csu_init. if self._init_func is not None: - out.append(self._init_func + self.rebase_addr) + out.append(AT.from_lva(self._init_func, self).to_mva()) out.extend(self._init_arr) return out @@ -389,8 +392,8 @@ def get_finalizers(self): if not self._inits_extracted: self._extract_init_fini() out = [] if self._fini_func is not None: - out.append(self._fini_func + self.rebase_addr) - out.extend(map(self._rebase_addr, self._fini_arr)) + out.append(AT.from_lva(self._fini_func, self).to_mva()) + out.extend(map(lambda x: AT.from_lva(x, self).to_mva(), self._fini_arr)) return out def __register_segments(self): @@ -410,9 +413,6 @@ def __register_segments(self): elif seg_readelf.header.p_type == 'PT_GNU_STACK': self.execstack = bool(seg_readelf.header.p_flags & 1) - def _rebase_addr(self, addr): - return addr + self.rebase_addr - def _load_segment(self, seg): self._load_segment_metadata(seg) self._load_segment_memory(seg) @@ -459,7 +459,7 @@ def _load_segment_memory(self, seg): if zeroend > zeropage: data = data.ljust(zeroend - mapstart, '\0') - self.memory.add_backer(mapstart, data) + self.memory.add_backer(AT.from_lva(mapstart, self).to_rva(), data) def __register_dyn(self, seg_readelf): """ @@ -478,7 +478,7 @@ def __register_dyn(self, seg_readelf): # To handle binaries without section headers, we need to hack around pyreadelf's assumptions # make our own string table fakestrtabheader = { - 'sh_offset': self._dynamic['DT_STRTAB'] + 'sh_offset': AT.from_lva(self._dynamic['DT_STRTAB'], self).to_rva() } self.strtab = elffile.StringTableSection(fakestrtabheader, 'strtab_cle', self.memory) @@ -493,7 +493,7 @@ def __register_dyn(self, seg_readelf): if 'DT_SYMTAB' in self._dynamic and 'DT_SYMENT' in self._dynamic: # Construct our own symbol table to hack around pyreadelf assuming section headers are around fakesymtabheader = { - 'sh_offset': self._dynamic['DT_SYMTAB'], + 'sh_offset': AT.from_lva(self._dynamic['DT_SYMTAB'], self).to_rva(), 'sh_entsize': self._dynamic['DT_SYMENT'], 'sh_size': 0 } # bogus size: no iteration allowed @@ -502,9 +502,11 @@ def __register_dyn(self, seg_readelf): # set up the hash table, prefering the gnu hash section to the old hash section # the hash table lets you get any symbol given its name if 'DT_GNU_HASH' in self._dynamic: - self.hashtable = GNUHashTable(self.dynsym, self.memory, self._dynamic['DT_GNU_HASH'], self.arch) + self.hashtable = GNUHashTable( + self.dynsym, self.memory, AT.from_lva(self._dynamic['DT_GNU_HASH'], self).to_rva(), self.arch) elif 'DT_HASH' in self._dynamic: - self.hashtable = ELFHashTable(self.dynsym, self.memory, self._dynamic['DT_HASH'], self.arch) + self.hashtable = ELFHashTable( + self.dynsym, self.memory, AT.from_lva(self._dynamic['DT_HASH'], self).to_rva(), self.arch) else: l.warning("No hash table available in %s", self.binary) @@ -534,7 +536,8 @@ def __register_dyn(self, seg_readelf): # try to parse relocations out of a table of type DT_REL{,A} if 'DT_' + self.rela_type in self._dynamic: - reloffset = self._dynamic['DT_' + self.rela_type] + reloffset = self._dynamic['DT_' + self.rela_type] and \ + AT.from_lva(self._dynamic['DT_' + self.rela_type], self).to_rva() if 'DT_' + self.rela_type + 'SZ' not in self._dynamic: raise CLEInvalidBinaryError('Dynamic section contains DT_' + self.rela_type + ', but DT_' + self.rela_type + 'SZ is not present') @@ -550,7 +553,7 @@ def __register_dyn(self, seg_readelf): # try to parse relocations out of a table of type DT_JMPREL if 'DT_JMPREL' in self._dynamic: - jmpreloffset = self._dynamic['DT_JMPREL'] + jmpreloffset = self._dynamic['DT_JMPREL'] and AT.from_lva(self._dynamic['DT_JMPREL'], self).to_rva() if 'DT_PLTRELSZ' not in self._dynamic: raise CLEInvalidBinaryError('Dynamic section contains DT_JMPREL, but DT_PLTRELSZ is not present') jmprelsz = self._dynamic['DT_PLTRELSZ'] @@ -678,11 +681,11 @@ def __register_sections(self): self.__register_relocs(sec_readelf) if section.occupies_memory: # alloc flag - stick in memory maybe! - if section.vaddr not in self.memory: # only allocate if not already allocated (i.e. by program header) + if AT.from_lva(section.vaddr, self).to_rva() not in self.memory: # only allocate if not already allocated (i.e. by program header) if section.type == 'SHT_NOBITS': - self.memory.add_backer(section.vaddr, '\0'*sec_readelf.header['sh_size']) + self.memory.add_backer(AT.from_lva(section.vaddr, self).to_rva(), '\0'*sec_readelf.header['sh_size']) else: #elif section.type == 'SHT_PROGBITS': - self.memory.add_backer(section.vaddr, sec_readelf.data()) + self.memory.add_backer(AT.from_lva(section.vaddr, self).to_rva(), sec_readelf.data()) def __register_section_symbols(self, sec_re): for sym_re in sec_re.iter_symbols(): diff --git a/cle/backends/idabin.py b/cle/backends/idabin.py index f7132552..40a50dd6 100644 --- a/cle/backends/idabin.py +++ b/cle/backends/idabin.py @@ -268,8 +268,8 @@ def get_max_addr(self): @property def entry(self): if self._custom_entry_point is not None: - return self._custom_entry_point + self.rebase_addr - return self.ida.idc.BeginEA() + self.rebase_addr + return self._custom_entry_point + self.mapped_base + return self.ida.idc.BeginEA() + self.mapped_base def resolve_import_dirty(self, sym, new_val): """ diff --git a/cle/backends/macho/binding.py b/cle/backends/macho/binding.py index 48b7dfd0..82c9f4bc 100644 --- a/cle/backends/macho/binding.py +++ b/cle/backends/macho/binding.py @@ -5,6 +5,7 @@ import struct from ...errors import CLEInvalidBinaryError +from ...address_translator import AT import logging l = logging.getLogger('cle.backends.macho.binding') @@ -368,20 +369,24 @@ def default_binding_handler(state, binary): if state.binding_type == 1: # POINTER l.info("Updating address %#x with symobl %r @ %#x", location, state.sym_name, value) binary.memory.write_bytes( - location, - struct.pack(binary.struct_byteorder + ("Q" if binary.arch.bits == 64 else "I"), value)) + AT.from_lva(location, binary).to_rva(), + struct.pack(binary.struct_byteorder + ("Q" if binary.arch.bits == 64 else "I"), value)) symbol.bind_xrefs.append(location) elif state.binding_type == 2: # ABSOLUTE32 location_32 = location % (2 ** 32) value_32 = value % (2 ** 32) l.info("Updating address %#x with symobl %r @ %#x", state.sym_name, location_32, value_32) - binary.memory.write_bytes(location_32, struct.pack(binary.struct_byteorder + "I", value_32)) + binary.memory.write_bytes( + AT.from_lva(location_32, binary).to_rva(), + struct.pack(binary.struct_byteorder + "I", value_32)) symbol.bind_xrefs.append(location_32) elif state.binding_type == 3: # PCREL32 location_32 = location % (2 ** 32) value_32 = (value - (location + 4)) % (2 ** 32) l.info("Updating address %#x with symobl %r @ %#x", state.sym_name, location_32, value_32) - binary.memory.write_bytes(location_32, struct.pack(binary.struct_byteorder + "I", value_32)) + binary.memory.write_bytes( + AT.from_lva(location_32, binary).to_rva(), + struct.pack(binary.struct_byteorder + "I", value_32)) symbol.bind_xrefs.append(location_32) else: l.error("Unknown BIND_TYPE: %d", state.binding_type) diff --git a/cle/backends/metaelf.py b/cle/backends/metaelf.py index 56f3d9c1..c1bc3c15 100644 --- a/cle/backends/metaelf.py +++ b/cle/backends/metaelf.py @@ -2,6 +2,7 @@ import elftools from . import Backend +from ..address_translator import AT from ..errors import CLEOperationError from ..utils import stream_or_path @@ -25,7 +26,7 @@ def _block(self, addr): thumb = self.arch.name.startswith("ARM") and addr % 2 == 1 realaddr = addr if thumb: realaddr -= 1 - dat = ''.join(self.memory.read_bytes(realaddr, 40)) + dat = ''.join(self.memory.read_bytes(AT.from_lva(realaddr, self).to_rva(), 40)) return pyvex.IRSB(dat, addr, self.arch, bytes_offset=1 if thumb else 0, opt_level=1) def _add_plt_stub(self, name, addr, sanity_check=True): @@ -63,7 +64,10 @@ def _load_plt(self): if self.arch.name in ('X86', 'AMD64'): for name, reloc in self.jmprel.iteritems(): try: - self._add_plt_stub(name, self.memory.read_addr_at(reloc.addr) - 6, sanity_check=not self.pic) + self._add_plt_stub( + name, + self.memory.read_addr_at(AT.from_lva(reloc.addr, reloc.owner_obj).to_rva()) - 6, + sanity_check=not self.pic) except KeyError: pass @@ -74,7 +78,10 @@ def _load_plt(self): # ATTEMPT 3: one ppc scheme I've seen is that there are 16-byte stubs packed together # right before the resolution stubs. if self.arch.name in ('PPC32',): - resolver_stubs = sorted((self.memory.read_addr_at(reloc.addr), name) for name, reloc in self.jmprel.iteritems()) + resolver_stubs = sorted( + (self.memory.read_addr_at(AT.from_lva(reloc.addr, reloc.owner_obj).to_rva()), name) + for name, reloc in self.jmprel.iteritems() + ) if resolver_stubs: stubs_table = resolver_stubs[0][0] - 16 * len(resolver_stubs) for i, (_, name) in enumerate(resolver_stubs): @@ -213,14 +220,14 @@ def plt(self): """ Maps names to addresses. """ - return {k: v + self.rebase_addr for (k, v) in self._plt.iteritems()} + return {k: AT.from_lva(v, self).to_mva() for (k, v) in self._plt.iteritems()} @property def reverse_plt(self): """ Maps addresses to names. """ - return {v + self.rebase_addr: k for (k, v) in self._plt.iteritems()} + return {AT.from_lva(v, self).to_mva(): k for (k, v) in self._plt.iteritems()} def get_call_stub_addr(self, name): """ @@ -230,7 +237,7 @@ def get_call_stub_addr(self, name): raise CLEOperationError("FIXME: this doesn't work on PPC64") if name in self._plt: - return self._plt[name] + self.rebase_addr + return AT.from_lva(self._plt[name], self).to_mva() return None @property @@ -253,8 +260,8 @@ def _ppc64_abiv1_entry_fix(self): if self.is_ppc64_abiv1: ep_offset = self._entry - self._entry = self.memory.read_addr_at(ep_offset) - self.ppc64_initial_rtoc = self.memory.read_addr_at(ep_offset+8) + self._entry = self.memory.read_addr_at(AT.from_lva(ep_offset, self).to_rva()) + self.ppc64_initial_rtoc = self.memory.read_addr_at(AT.from_lva(ep_offset+8, self).to_rva()) @staticmethod def extract_soname(path): diff --git a/cle/backends/pe.py b/cle/backends/pe.py index 8df638e9..f2786d3c 100644 --- a/cle/backends/pe.py +++ b/cle/backends/pe.py @@ -5,6 +5,7 @@ from . import Backend, Symbol, Section, register_backend from .relocations import Relocation from ..errors import CLEError +from ..address_translator import AT try: import pefile @@ -55,22 +56,26 @@ def relocate(self, solist, bypass_compatibility=False): # no work required pass elif self.reloc_type == pefile.RELOCATION_TYPE['IMAGE_REL_BASED_HIGHLOW']: - org_bytes = ''.join(self.owner_obj.memory.read_bytes(self.addr, 4)) + org_bytes = ''.join(self.owner_obj.memory.read_bytes( + AT.from_lva(self.addr, self.owner_obj).to_rva(), 4)) org_value = struct.unpack(' Date: Tue, 11 Jul 2017 00:57:22 +0300 Subject: [PATCH 08/18] update: Relocated so-files mapping test --- tests/test_relocated.py | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) create mode 100644 tests/test_relocated.py diff --git a/tests/test_relocated.py b/tests/test_relocated.py new file mode 100644 index 00000000..d979dc5a --- /dev/null +++ b/tests/test_relocated.py @@ -0,0 +1,18 @@ +import nose +import cle +import os + + +def test_relocated(): + filename = os.path.join(os.path.dirname(os.path.realpath(__file__)), '../../binaries/tests/i386/fauxware') + shared = os.path.join(os.path.dirname(os.path.realpath(__file__)), '../../binaries/tests/i386/prelinked') + ld = cle.Loader(filename, custom_ld_path=[shared]) + nose.tools.assert_equal(ld.main_bin.mapped_base, 0x8048000) + nose.tools.assert_sequence_equal( + map(lambda x: x.mapped_base, ld.all_elf_objects), + [0x8048000, 0x9000000, 0xA000000] + ) + + +if __name__ == '__main__': + test_relocated() From c5b841dd7b036540bc7800fe4f1f827277a8fa4c Mon Sep 17 00:00:00 2001 From: amatter Date: Tue, 11 Jul 2017 01:01:31 +0300 Subject: [PATCH 09/18] fix: Tests fixes. Mapped address cannot be equal to 0x0 --- tests/test_overlap.py | 14 ++++++++------ tests/test_regions.py | 3 ++- 2 files changed, 10 insertions(+), 7 deletions(-) diff --git a/tests/test_overlap.py b/tests/test_overlap.py index bf605981..7e86f61e 100644 --- a/tests/test_overlap.py +++ b/tests/test_overlap.py @@ -2,19 +2,21 @@ import cle import os + class MockBackend(cle.backends.Backend): - def __init__(self, requested_base, size, **kwargs): + def __init__(self, linked_base, size, **kwargs): super(MockBackend, self).__init__('/dev/zero', **kwargs) - self.requested_base = requested_base + self.mapped_base = self.linked_base = linked_base self.size = size def get_max_addr(self): - return self.rebase_addr + self.size + return self.mapped_base + self.size + def test_overlap(): filename = os.path.join(os.path.dirname(os.path.realpath(__file__)), '../../binaries/tests/i386/manysum') ld = cle.Loader(filename, auto_load_libs=False) - nose.tools.assert_equal(ld.main_bin.rebase_addr, 0) + nose.tools.assert_equal(ld.main_bin.linked_base, 0x8048000) nose.tools.assert_equal(ld.main_bin.get_min_addr(), 0x8048000) obj1 = MockBackend(0x8047000, 0x2000, custom_arch=ld.main_bin.arch) @@ -23,8 +25,8 @@ def test_overlap(): ld.add_object(obj1) ld.add_object(obj2) - nose.tools.assert_equal(obj2.rebase_addr, 0x8047000) - nose.tools.assert_greater(obj1.rebase_addr, 0x8048000) + nose.tools.assert_equal(obj2.mapped_base, 0x8047000) + nose.tools.assert_greater(obj1.mapped_base, 0x8048000) if __name__ == '__main__': test_overlap() diff --git a/tests/test_regions.py b/tests/test_regions.py index cd87d52d..7a911a3d 100644 --- a/tests/test_regions.py +++ b/tests/test_regions.py @@ -5,6 +5,7 @@ import cle from cle.backends import Section, Segment +from cle.address_translator import AT TESTS_BASE = os.path.join(os.path.dirname(os.path.realpath(__file__)), @@ -63,7 +64,7 @@ def run_sections(arch, filename, sections): for i, section in enumerate(ld.main_bin.sections): nose.tools.assert_equal(section.name, sections[i].name) nose.tools.assert_equal(section.offset, sections[i].offset) - nose.tools.assert_equal(section.vaddr, sections[i].vaddr) + nose.tools.assert_equal(AT.from_mva(section.vaddr, ld.main_bin).to_lva(), sections[i].vaddr) nose.tools.assert_equal(section.memsize, sections[i].memsize) # address lookups From fd864f071df2ff76c82ed3ebec0996e388c267e1 Mon Sep 17 00:00:00 2001 From: amatter Date: Tue, 11 Jul 2017 01:02:55 +0300 Subject: [PATCH 10/18] fix: PESection interface consistency fix --- cle/backends/pe.py | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/cle/backends/pe.py b/cle/backends/pe.py index f2786d3c..225e375f 100644 --- a/cle/backends/pe.py +++ b/cle/backends/pe.py @@ -29,6 +29,7 @@ def __init__(self, owner, name, addr, is_import, is_export): self.is_import = is_import self.is_export = is_export + class WinReloc(Relocation): """ Represents a relocation for the PE format. @@ -80,15 +81,9 @@ class PESection(Section): """ Represents a section for the PE format. """ - def __init__(self, pe_section): - super(PESection, self).__init__( - pe_section.Name, - pe_section.Misc_PhysicalAddress, - pe_section.VirtualAddress, - pe_section.Misc_VirtualSize, - ) - - self.characteristics = pe_section.Characteristics + def __init__(self, name, offset, vaddr, size, chars): + super(PESection, self).__init__(name, offset, vaddr, size) + self.characteristics = chars # # Public properties @@ -106,6 +101,7 @@ def is_writable(self): def is_executable(self): return self.characteristics & 0x20000000 != 0 + class PE(Backend): """ Representation of a PE (i.e. Windows) binary. @@ -252,7 +248,11 @@ def _register_sections(self): """ for pe_section in self._pe.sections: - section = PESection(pe_section) + section = PESection( + pe_section.Name, pe_section.Misc_PhysicalAddress, + AT.from_rva(pe_section.VirtualAddress, self).to_mva(), + pe_section.Misc_VirtualSize, pe_section.Characteristics + ) self.sections.append(section) self.sections_map[section.name] = section From ae1649b8d8ab3f3e1ee11eca56c5848ade63fb6a Mon Sep 17 00:00:00 2001 From: amatter Date: Tue, 11 Jul 2017 01:04:52 +0300 Subject: [PATCH 11/18] fix: pylint --- cle/backends/__init__.py | 4 ++-- cle/backends/elf.py | 7 +++---- cle/backends/metaelf.py | 4 ++-- cle/backends/pe.py | 2 +- cle/backends/relocations/generic.py | 3 +-- cle/loader.py | 18 ++++++++---------- cle/memory.py | 2 +- 7 files changed, 18 insertions(+), 22 deletions(-) diff --git a/cle/backends/__init__.py b/cle/backends/__init__.py index 9d3b1547..52f3219b 100644 --- a/cle/backends/__init__.py +++ b/cle/backends/__init__.py @@ -251,7 +251,7 @@ def demangled_name(self): stdout, _ = pipe.communicate() demangled = stdout.split("\n") - if len(demangled) > 0: + if demangled: return demangled[0] return None @@ -459,7 +459,7 @@ def __init__(self, self.binary_stream = None if kwargs != {}: - l.warning("Unused kwargs for loading binary %s: %s", self.binary, ', '.join(str(x) for x in kwargs.keys())) + l.warning("Unused kwargs for loading binary %s: %s", self.binary, ', '.join(kwargs.iterkeys())) self.is_main_bin = is_main_bin self.loader = loader diff --git a/cle/backends/elf.py b/cle/backends/elf.py index 10dfe456..43bc6c0d 100644 --- a/cle/backends/elf.py +++ b/cle/backends/elf.py @@ -283,7 +283,7 @@ def __setstate__(self, data): def _cache_symbol_name(self, symbol): name = symbol.name - if len(name) > 0: + if name: if name in self._symbols_by_name: old_symbol = self._symbols_by_name[name] if not old_symbol.is_weak and symbol.is_weak: @@ -718,11 +718,10 @@ def _populate_demangled_names(self): been implemented, then update self.demangled_names in Symbol """ - if not len(self.symbols_by_addr): + if not self.symbols_by_addr: return - names = [self.symbols_by_addr[s].name for s in self.symbols_by_addr] - names = filter(lambda n: n.startswith("_Z"), names) + names = filter(lambda n: n.startswith("_Z"), (s.name for s in self.symbols_by_addr.itervalues())) lookup_names = map(lambda n: n.split("@@")[0], names) # this monstrosity taken from stackoverflow # http://stackoverflow.com/questions/6526500/c-name-mangling-library-for-python diff --git a/cle/backends/metaelf.py b/cle/backends/metaelf.py index c1bc3c15..59426afb 100644 --- a/cle/backends/metaelf.py +++ b/cle/backends/metaelf.py @@ -147,7 +147,7 @@ def scan_forward(addr, name, push=False): except (AssertionError, KeyError, pyvex.PyVEXError): return False - if len(self._plt) == 0 and '__libc_start_main' in self.jmprel and self.entry != 0: + if not self._plt and '__libc_start_main' in self.jmprel and self.entry != 0: # try to scan forward through control flow to find __libc_start_main! try: last_jk = None @@ -177,7 +177,7 @@ def scan_forward(addr, name, push=False): tick.bailout_timer = 5 scan_forward(self.sections_map['.plt'].vaddr, self.jmprel.keys()[0], push=True) - if len(self._plt) == 0: + if not self._plt: # \(_^^)/ return diff --git a/cle/backends/pe.py b/cle/backends/pe.py index 225e375f..e9ba72a8 100644 --- a/cle/backends/pe.py +++ b/cle/backends/pe.py @@ -148,7 +148,7 @@ def __init__(self, *args, **kwargs): self._handle_relocs() self._register_tls() self._register_sections() - self.linking = 'dynamic' if len(self.deps) > 0 else 'static' + self.linking = 'dynamic' if self.deps else 'static' self.jmprel = self._get_jmprel() diff --git a/cle/backends/relocations/generic.py b/cle/backends/relocations/generic.py index e39d1eb1..f9ca2d1d 100644 --- a/cle/backends/relocations/generic.py +++ b/cle/backends/relocations/generic.py @@ -26,8 +26,7 @@ class GenericJumpslotReloc(Relocation): def value(self): if self.is_rela: return self.resolvedby.rebased_addr + self.addend - else: - return self.resolvedby.rebased_addr + return self.resolvedby.rebased_addr class GenericRelativeReloc(Relocation): @property diff --git a/cle/loader.py b/cle/loader.py index 922457ab..e6407ee0 100644 --- a/cle/loader.py +++ b/cle/loader.py @@ -121,8 +121,7 @@ def close(self): def __repr__(self): if self._main_binary_stream is None: return '' % (os.path.basename(self._main_binary_path), self.min_addr(), self.max_addr()) - else: - return '' % (self.min_addr(), self.max_addr()) + return '' % (self.min_addr(), self.max_addr()) def get_initializers(self): """ @@ -183,7 +182,7 @@ def _load_main_binary(self): self.add_object(self.main_bin) def _load_dependencies(self): - while len(self._unsatisfied_deps) > 0: + while self._unsatisfied_deps: dep = self._unsatisfied_deps.pop(0) if isinstance(dep, (str, unicode)): if os.path.basename(dep) in self._satisfied_deps: @@ -340,13 +339,12 @@ def add_object(self, obj, base_addr=None): obj.rebase() obj._is_mapped = True + def _is_range_free(self, va, size): for o in self.all_objects: - if (addr >= o.get_min_addr() and addr < o.get_max_addr()) or \ - (o.get_min_addr() >= addr and o.get_min_addr() < addr + size): + if o.get_min_addr() <= va < o.get_max_addr() or va <= o.get_min_addr() < va + size: return False return True - def _possible_paths(self, path): if os.path.exists(path): yield path dirs = [] # if we say dirs = blah, we modify the original @@ -438,9 +436,9 @@ def _load_tls(self): # TODO: This assert ensures that we have either ELF or PE modules, but not both. # Do we need to handle the case where we have both ELF and PE modules? assert num_elf_modules != num_pe_modules or num_elf_modules == 0 or num_pe_modules == 0 - if len(elf_modules) > 0: + if elf_modules: self.tls_object = ELFTLSObj(elf_modules) - elif len(pe_modules) > 0: + elif pe_modules: self.tls_object = PETLSObj(pe_modules) if self.tls_object: @@ -455,7 +453,7 @@ def _finalize_tls(self): def addr_belongs_to_object(self, addr): for obj in self.all_objects: - if not (addr >= obj.get_min_addr() and addr < obj.get_max_addr()): + if not obj.get_min_addr() <= addr < obj.get_max_addr(): continue if isinstance(obj.memory, str): @@ -486,7 +484,7 @@ def whats_at(self, addr): if addr in o.plt.values(): for k,v in o.plt.iteritems(): if v == addr: - return "PLT stub of %s in %s (offset %#x)" % (k, nameof, off) + return "PLT stub of %s in %s (offset %#x)" % (k, nameof, off) if off in o.symbols_by_addr: name = o.symbols_by_addr[off].name diff --git a/cle/memory.py b/cle/memory.py index 0ed7303c..8e5c21fc 100644 --- a/cle/memory.py +++ b/cle/memory.py @@ -139,7 +139,7 @@ def write_bytes_to_backer(self, addr, data): to_insert = [ ] i = 0 - while i < len(self._backers) and len(data): + while i < len(self._backers) and data: start, backer_data = self._backers[i] # self._backers is always sorted size = len(backer_data) stop = start + size From 1071dfad7520b79e55e08d6fe9b3fc97fad1f6e6 Mon Sep 17 00:00:00 2001 From: amatter Date: Thu, 13 Jul 2017 19:04:21 +0300 Subject: [PATCH 12/18] fix: sorted key function misuse --- cle/backends/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cle/backends/__init__.py b/cle/backends/__init__.py index 52f3219b..26d982cb 100644 --- a/cle/backends/__init__.py +++ b/cle/backends/__init__.py @@ -398,7 +398,7 @@ def _make_sorted(lst): :rtype: list """ - return sorted(lst, lambda x: x.vaddr) + return sorted(lst, key=lambda x: x.vaddr) class Backend(object): From 81c972a6e2794fa143fbc454e965233e3ff026db Mon Sep 17 00:00:00 2001 From: amatter Date: Thu, 13 Jul 2017 19:05:39 +0300 Subject: [PATCH 13/18] update: Regression test suite for AddressTranslator --- tests/test_address_translator.py | 50 ++++++++++++++++++++++++++++++++ 1 file changed, 50 insertions(+) create mode 100644 tests/test_address_translator.py diff --git a/tests/test_address_translator.py b/tests/test_address_translator.py new file mode 100644 index 00000000..fca54868 --- /dev/null +++ b/tests/test_address_translator.py @@ -0,0 +1,50 @@ +import nose +import cle + +from cle.address_translator import AT + + +class MockBackend(cle.Backend): + def __init__(self, linked_base, mapped_base, *nargs, **kwargs): + super(MockBackend, self).__init__("/dev/zero", *nargs, **kwargs) + regions = [ + cle.Region(0x000000, 0x8048000, 0x1b2d30, 0x1b2d30), + cle.Region(0x1b3260, 0x81fc260, 0x002c74, 0x0057bc) + ] + self.linked_base = linked_base + self.mapped_base = mapped_base + self.segments = cle.Regions(lst=regions) + self.sections = self.segments + self.segments._rebase(self.image_base_delta) + self._is_mapped = True + + +owner = MockBackend(0x8048000, 0xa000000) + + +def test_lva_mva_translation(): + nose.tools.assert_equal(AT.from_lva(0x8048000, owner).to_mva(), 0xa000000) + nose.tools.assert_equal(AT.from_mva(0xa1b9a1b, owner).to_lva(), 0x8201a1b) + + +def test_va_rva_translation(): + nose.tools.assert_equal(AT.from_rva(0, owner).to_va(), 0xa000000) + nose.tools.assert_equal(AT.from_va(0xa1b9a1b, owner).to_rva(), 0x1b9a1b) + + +def test_valid_va_raw_translations(): + nose.tools.assert_equal(AT.from_raw(0x1b3260, owner).to_va(), 0xa1b4260) + nose.tools.assert_equal(AT.from_va(0xa1b6ed3, owner).to_raw(), 0x1b5ed3) + + +@nose.tools.raises(TypeError) +def test_invalid_intersegment_raw_va(): + AT.from_raw(0x1b3000, owner).to_va() + + +def test_invalid_va_raw(): + nose.tools.assert_equal(AT.from_va(0xa1b6ed4, owner).to_raw(), None) + + +if __name__ == '__main__': + map(lambda x: x(), filter(lambda o: callable(o) and o.__name__.startswith("test"), globals().itervalues())) From cd406d9a6df1a363de5054697873d826c595fe3c Mon Sep 17 00:00:00 2001 From: amatter Date: Fri, 14 Jul 2017 14:23:53 +0300 Subject: [PATCH 14/18] fix: no-else-return fixes rollback --- cle/backends/relocations/generic.py | 3 ++- cle/loader.py | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/cle/backends/relocations/generic.py b/cle/backends/relocations/generic.py index f9ca2d1d..e39d1eb1 100644 --- a/cle/backends/relocations/generic.py +++ b/cle/backends/relocations/generic.py @@ -26,7 +26,8 @@ class GenericJumpslotReloc(Relocation): def value(self): if self.is_rela: return self.resolvedby.rebased_addr + self.addend - return self.resolvedby.rebased_addr + else: + return self.resolvedby.rebased_addr class GenericRelativeReloc(Relocation): @property diff --git a/cle/loader.py b/cle/loader.py index e6407ee0..3419b36d 100644 --- a/cle/loader.py +++ b/cle/loader.py @@ -121,7 +121,8 @@ def close(self): def __repr__(self): if self._main_binary_stream is None: return '' % (os.path.basename(self._main_binary_path), self.min_addr(), self.max_addr()) - return '' % (self.min_addr(), self.max_addr()) + else: + return '' % (self.min_addr(), self.max_addr()) def get_initializers(self): """ From c233e2392ff80929b309c3e6802e51d1c11e601c Mon Sep 17 00:00:00 2001 From: amatter Date: Fri, 14 Jul 2017 14:28:03 +0300 Subject: [PATCH 15/18] fix: ** operator surrounding fix --- cle/backends/pe.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cle/backends/pe.py b/cle/backends/pe.py index e9ba72a8..5689c83f 100644 --- a/cle/backends/pe.py +++ b/cle/backends/pe.py @@ -61,7 +61,7 @@ def relocate(self, solist, bypass_compatibility=False): AT.from_lva(self.addr, self.owner_obj).to_rva(), 4)) org_value = struct.unpack(' Date: Fri, 14 Jul 2017 14:28:35 +0300 Subject: [PATCH 16/18] fix: image_base_delta style fix --- cle/backends/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cle/backends/__init__.py b/cle/backends/__init__.py index 26d982cb..890e708f 100644 --- a/cle/backends/__init__.py +++ b/cle/backends/__init__.py @@ -529,7 +529,7 @@ def set_arch(self, arch): @property def image_base_delta(self): - return - self.linked_base + self.mapped_base + return self.mapped_base - self.linked_base @property def entry(self): From d45e91002f808e38d0f15ece1ca781a96ab1e7cc Mon Sep 17 00:00:00 2001 From: amatter Date: Fri, 14 Jul 2017 15:05:29 +0300 Subject: [PATCH 17/18] fix: Switch functional to readable. Linked image base alignment --- cle/backends/elf.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/cle/backends/elf.py b/cle/backends/elf.py index 43bc6c0d..21d72226 100644 --- a/cle/backends/elf.py +++ b/cle/backends/elf.py @@ -215,10 +215,9 @@ def __init__(self, binary, **kwargs): # The linked image base should be evaluated before registering any segment or section due to # the fact that elffile, used by those methods, is working only with un-based virtual addresses, but Clemories # themselves are organized as a tree where each node backer internally uses relative addressing - self.mapped_base = self.linked_base = min( - map(lambda x: x['p_vaddr'], - filter(lambda x: x.header.p_type == 'PT_LOAD', self.reader.iter_segments())) or [0] - ) + seg_addrs = (ALIGN_DOWN(x['p_vaddr'], self.loader.page_size) + for x in self.reader.iter_segments() if x.header.p_type == 'PT_LOAD') + self.mapped_base = self.linked_base = min(seg_addrs) if seg_addrs else 0 self.__register_segments() self.__register_sections() From 6b4b94df15d90457026a86d6be94a54fb2490259 Mon Sep 17 00:00:00 2001 From: amatter Date: Fri, 14 Jul 2017 15:21:18 +0300 Subject: [PATCH 18/18] update: CLEOperationError on rebasing already mapped image --- cle/backends/__init__.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/cle/backends/__init__.py b/cle/backends/__init__.py index 890e708f..52663d14 100644 --- a/cle/backends/__init__.py +++ b/cle/backends/__init__.py @@ -5,7 +5,7 @@ import archinfo from ..address_translator import AT from ..memory import Clemory -from ..errors import CLECompatibilityError, CLEError +from ..errors import CLECompatibilityError, CLEOperationError, CLEError try: import claripy @@ -567,6 +567,8 @@ def rebase(self): """ Rebase backend's regions to the new base where they were mapped by the loader """ + if self._is_mapped: + raise CLEOperationError("Image already rebased from %#x to %#x" % (self.linked_base, self.mapped_base)) if self.sections: self.sections._rebase(self.image_base_delta) if self.segments: