From 987654544f1027275df46c2eae57ca600818e470 Mon Sep 17 00:00:00 2001 From: M4xw Date: Fri, 2 Feb 2024 15:33:53 +0100 Subject: [PATCH 01/15] [Sleigh] Add base implementation for MIPS (WIP) --- CMakeLists.txt | 12 + include/remill/Arch/Arch.h | 5 + include/remill/Arch/MIPS/Runtime/State.h | 278 +++++++++++++++++ include/remill/Arch/Name.h | 1 + include/remill/Arch/Runtime/HyperCall.h | 3 + include/remill/Arch/Runtime/Intrinsics.h | 4 + lib/Arch/Arch.cpp | 10 + lib/Arch/CMakeLists.txt | 1 + lib/Arch/Instruction.cpp | 1 + lib/Arch/MIPS/CMakeLists.txt | 1 + lib/Arch/MIPS/Runtime/CMakeLists.txt | 44 +++ lib/Arch/MIPS/Runtime/Instructions.cpp | 43 +++ lib/Arch/Name.cpp | 6 +- lib/Arch/Runtime/HyperCall.cpp | 11 +- lib/Arch/Sleigh/CMakeLists.txt | 3 + lib/Arch/Sleigh/MIPS.h | 42 +++ lib/Arch/Sleigh/MIPSArch.cpp | 379 +++++++++++++++++++++++ 17 files changed, 841 insertions(+), 3 deletions(-) create mode 100644 include/remill/Arch/MIPS/Runtime/State.h create mode 100644 lib/Arch/MIPS/CMakeLists.txt create mode 100644 lib/Arch/MIPS/Runtime/CMakeLists.txt create mode 100644 lib/Arch/MIPS/Runtime/Instructions.cpp create mode 100644 lib/Arch/Sleigh/MIPS.h create mode 100644 lib/Arch/Sleigh/MIPSArch.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index da0f4f957..e4a5e7395 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -80,6 +80,7 @@ set(REMILL_BUILD_SEMANTICS_DIR_AARCH64 "${CMAKE_CURRENT_BINARY_DIR}/lib/Arch/AAr set(REMILL_BUILD_SEMANTICS_DIR_SPARC32 "${CMAKE_CURRENT_BINARY_DIR}/lib/Arch/SPARC32/Runtime") set(REMILL_BUILD_SEMANTICS_DIR_SPARC64 "${CMAKE_CURRENT_BINARY_DIR}/lib/Arch/SPARC64/Runtime") set(REMILL_BUILD_SEMANTICS_DIR_PPC64_32ADDR "${CMAKE_CURRENT_BINARY_DIR}/lib/Arch/PPC/Runtime") +set(REMILL_BUILD_SEMANTICS_DIR_MIPS64_32ADDR "${CMAKE_CURRENT_BINARY_DIR}/lib/Arch/MIPS/Runtime") set(REMILL_INCLUDE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/include") set(REMILL_LIB_DIR "${CMAKE_CURRENT_SOURCE_DIR}/lib") @@ -220,6 +221,7 @@ target_compile_definitions(remill_settings INTERFACE "REMILL_BUILD_SEMANTICS_DIR_SPARC32=\"${REMILL_BUILD_SEMANTICS_DIR_SPARC32}\"" "REMILL_BUILD_SEMANTICS_DIR_SPARC64=\"${REMILL_BUILD_SEMANTICS_DIR_SPARC64}\"" "REMILL_BUILD_SEMANTICS_DIR_PPC64_32ADDR=\"${REMILL_BUILD_SEMANTICS_DIR_PPC64_32ADDR}\"" + "REMILL_BUILD_SEMANTICS_DIR_MIPS64_32ADDR=\"${REMILL_BUILD_SEMANTICS_DIR_MIPS64_32ADDR}\"" ) set(ghidra_patch_user "github-actions[bot]") @@ -254,8 +256,17 @@ sleigh_compile( OUT_FILE "${sleigh_BINARY_DIR}/specfiles/Ghidra/Processors/PowerPC/data/languages/ppc_32_e200_be.sla" ) +sleigh_compile( + TARGET mips_be_vr4300_spec + COMPILER "${sleigh_compiler}" + SLASPEC "${ghidra-fork_SOURCE_DIR}/Ghidra/Processors/MIPS/data/languages/mips64be.slaspec" + LOG_FILE "${sleigh_BINARY_DIR}/sleighspecs/spec_build_logs/mips64be.sla.log" + OUT_FILE "${sleigh_BINARY_DIR}/specfiles/Ghidra/Processors/MIPS/data/languages/mips64be.sla" +) + add_custom_target(sleigh_custom_specs) add_dependencies(sleigh_custom_specs ppc_e200_spec) +add_dependencies(sleigh_custom_specs mips_be_vr4300_spec) target_link_libraries(remill_settings INTERFACE ${llvm_libs} @@ -317,6 +328,7 @@ if(REMILL_ENABLE_INSTALL_TARGET) ) install(FILES "${sleigh_BINARY_DIR}/specfiles/Ghidra/Processors/PowerPC/data/languages/ppc_32_e200_be.sla" DESTINATION "${CMAKE_INSTALL_DATADIR}/sleigh/specfiles/Ghidra/Processors/PowerPC/data/languages/") + install(FILES "${sleigh_BINARY_DIR}/specfiles/Ghidra/Processors/MIPS/data/languages/mips64be.sla" DESTINATION "${CMAKE_INSTALL_DATADIR}/sleigh/specfiles/Ghidra/Processors/MIPS/data/languages/") configure_file( "${CMAKE_CURRENT_SOURCE_DIR}/cmake/remillConfig.cmake.in" diff --git a/include/remill/Arch/Arch.h b/include/remill/Arch/Arch.h index 4fc5ff8ef..9504a356e 100644 --- a/include/remill/Arch/Arch.h +++ b/include/remill/Arch/Arch.h @@ -357,6 +357,7 @@ class Arch { bool IsSPARC32(void) const; bool IsSPARC64(void) const; bool IsPPC(void) const; + bool IsMIPS(void) const; bool IsWindows(void) const; bool IsLinux(void) const; @@ -437,6 +438,10 @@ class Arch { static ArchPtr GetSleighPPC(llvm::LLVMContext *context, OSName os, ArchName arch_name); + // Defined in `lib/Arch/Sleigh/MIPSArch.cpp` + static ArchPtr GetSleighMIPS(llvm::LLVMContext *context, OSName os, + ArchName arch_name); + // Defined in `lib/Arch/SPARC32/Arch.cpp`. static ArchPtr GetSPARC32(llvm::LLVMContext *context, OSName os, ArchName arch_name); diff --git a/include/remill/Arch/MIPS/Runtime/State.h b/include/remill/Arch/MIPS/Runtime/State.h new file mode 100644 index 000000000..c773bf45c --- /dev/null +++ b/include/remill/Arch/MIPS/Runtime/State.h @@ -0,0 +1,278 @@ +/* + * Copyright (c) 2022-present Trail of Bits, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#pragma clang diagnostic push +#pragma clang diagnostic fatal "-Wpadded" + +#include "remill/Arch/Runtime/State.h" + +#if !defined(INCLUDED_FROM_REMILL) +# include "remill/Arch/Runtime/Types.h" +#endif + +struct Reg final { + union { + alignas(4) uint32_t dword; + alignas(8) uint64_t qword; + } __attribute__((packed)); +} __attribute__((packed)); + +static_assert(sizeof(uint64_t) == sizeof(Reg), "Invalid packing of `Reg`."); +static_assert(0 == __builtin_offsetof(Reg, dword), + "Invalid packing of `Reg::dword`."); + +static_assert(0 == __builtin_offsetof(Reg, qword), + "Invalid packing of `Reg::qword`."); + +// General Purpose Registers +struct alignas(8) GPR final { + volatile uint64_t _0; + Reg zero; + volatile uint64_t _1; + Reg at; + volatile uint64_t _2; + Reg v0; + volatile uint64_t _3; + Reg v1; + volatile uint64_t _4; + Reg a0; + volatile uint64_t _5; + Reg a1; + volatile uint64_t _6; + Reg a2; + volatile uint64_t _7; + Reg a3; + volatile uint64_t _8; + Reg t0; + volatile uint64_t _9; + Reg t1; + volatile uint64_t _10; + Reg t2; + volatile uint64_t _11; + Reg t3; + volatile uint64_t _12; + Reg t4; + volatile uint64_t _13; + Reg t5; + volatile uint64_t _14; + Reg t6; + volatile uint64_t _15; + Reg t7; + volatile uint64_t _16; + Reg s0; + volatile uint64_t _17; + Reg s1; + volatile uint64_t _18; + Reg s2; + volatile uint64_t _19; + Reg s3; + volatile uint64_t _20; + Reg s4; + volatile uint64_t _21; + Reg s5; + volatile uint64_t _22; + Reg s6; + volatile uint64_t _23; + Reg s7; + volatile uint64_t _24; + Reg t8; + volatile uint64_t _25; + Reg t9; + volatile uint64_t _26; + Reg k0; + volatile uint64_t _27; + Reg k1; + volatile uint64_t _28; + Reg gp; + volatile uint64_t _29; + Reg sp; + volatile uint64_t _30; + Reg s8; + volatile uint64_t _31; + Reg ra; + volatile uint64_t _32; + Reg pc; + +} __attribute__((packed)); + +static_assert(528 == sizeof(GPR), "Invalid structure packing of `GPR`."); + +// Floating Pointer Registers +struct alignas(8) FPR final { + volatile uint64_t _0; + Reg f0; + volatile uint64_t _1; + Reg f1; + volatile uint64_t _2; + Reg f2; + volatile uint64_t _3; + Reg f3; + volatile uint64_t _4; + Reg f4; + volatile uint64_t _5; + Reg f5; + volatile uint64_t _6; + Reg f6; + volatile uint64_t _7; + Reg f7; + volatile uint64_t _8; + Reg f8; + volatile uint64_t _9; + Reg f9; + volatile uint64_t _10; + Reg f10; + volatile uint64_t _11; + Reg f11; + volatile uint64_t _12; + Reg f12; + volatile uint64_t _13; + Reg f13; + volatile uint64_t _14; + Reg f14; + volatile uint64_t _15; + Reg f15; + volatile uint64_t _16; + Reg f16; + volatile uint64_t _17; + Reg f17; + volatile uint64_t _18; + Reg f18; + volatile uint64_t _19; + Reg f19; + volatile uint64_t _20; + Reg f20; + volatile uint64_t _21; + Reg f21; + volatile uint64_t _22; + Reg f22; + volatile uint64_t _23; + Reg f23; + volatile uint64_t _24; + Reg f24; + volatile uint64_t _25; + Reg f25; + volatile uint64_t _26; + Reg f26; + volatile uint64_t _27; + Reg f27; + volatile uint64_t _28; + Reg f28; + volatile uint64_t _29; + Reg f29; + volatile uint64_t _30; + Reg f30; + volatile uint64_t _31; + Reg f31; + +} __attribute__((packed)); + +static_assert(512 == sizeof(FPR), "Invalid structure packing of `FPR`."); + +struct alignas(8) FlagRegisters final { + volatile uint64_t _0; + Reg ISAModeSwitch; +} __attribute__((packed)); + +struct alignas(8) COP0Registers final { + volatile uint64_t _0; + Reg Index; + volatile uint64_t _1; + Reg Random; + volatile uint64_t _2; + Reg EntryLo0; + volatile uint64_t _3; + Reg EntryLo1; + volatile uint64_t _4; + Reg Context; + volatile uint64_t _5; + Reg PageMask; + volatile uint64_t _6; + Reg Wired; + volatile uint64_t _7; + Reg HWREna; + volatile uint64_t _8; + Reg BadVAddr; + volatile uint64_t _9; + Reg Count; + volatile uint64_t _10; + Reg EntryHi; + volatile uint64_t _11; + Reg Compare; + volatile uint64_t _12; + Reg Status; + volatile uint64_t _13; + Reg Cause; + volatile uint64_t _14; + Reg EPC; + volatile uint64_t _15; + Reg PRId; + volatile uint64_t _16; + Reg Config; + volatile uint64_t _17; + Reg LLAddr; + volatile uint64_t _18; + Reg WatchLo; + volatile uint64_t _19; + Reg WatchHi; + volatile uint64_t _20; + Reg XContext; + volatile uint64_t _21; + Reg cop0_reg21; + volatile uint64_t _22; + Reg cop0_reg22; + volatile uint64_t _23; + Reg Debug; + volatile uint64_t _24; + Reg DEPC; + volatile uint64_t _25; + Reg PerfCnt; + volatile uint64_t _26; + Reg ErrCtl; + volatile uint64_t _27; + Reg CacheErr; + volatile uint64_t _28; + Reg TagLo; + volatile uint64_t _29; + Reg TagHi; + volatile uint64_t _30; + Reg ErrorEPC; + volatile uint64_t _31; + Reg DESAVE; +} __attribute__((packed)); + +struct alignas(8) MIPSState : public ArchState { + GPR gpr; // 528 bytes. + + uint64_t _0; + + FPR fpr; + + uint64_t _1; + + FlagRegisters flags; + + uint64_t _2; + + COP0Registers cop0; + + uint64_t _3; +} __attribute__((packed)); + +struct State : public MIPSState {}; + +#pragma clang diagnostic pop diff --git a/include/remill/Arch/Name.h b/include/remill/Arch/Name.h index 4c030d3db..01477a251 100644 --- a/include/remill/Arch/Name.h +++ b/include/remill/Arch/Name.h @@ -121,6 +121,7 @@ enum ArchName : uint32_t { kArchThumb2LittleEndian, kArchPPC, + kArchMIPS, }; ArchName GetArchName(const llvm::Triple &triple); diff --git a/include/remill/Arch/Runtime/HyperCall.h b/include/remill/Arch/Runtime/HyperCall.h index 1a32eeafd..a50163e0a 100644 --- a/include/remill/Arch/Runtime/HyperCall.h +++ b/include/remill/Arch/Runtime/HyperCall.h @@ -96,6 +96,9 @@ class SyncHyperCall { kPPCEmulateInstruction, kPPCSysCall, + + kMIPSEmulateInstruction, + kMIPSSysCall, }; } __attribute__((packed)); diff --git a/include/remill/Arch/Runtime/Intrinsics.h b/include/remill/Arch/Runtime/Intrinsics.h index c06343149..92fed0cc9 100644 --- a/include/remill/Arch/Runtime/Intrinsics.h +++ b/include/remill/Arch/Runtime/Intrinsics.h @@ -425,4 +425,8 @@ __remill_ppc_emulate_instruction(Memory *); [[gnu::used, gnu::const]] extern Memory *__remill_ppc_syscall(Memory *); +[[gnu::used, gnu::const]] extern Memory *__remill_mips_emulate_instruction(Memory *); + +[[gnu::used, gnu::const]] extern Memory *__remill_mips_syscall(Memory *); + } // extern C diff --git a/lib/Arch/Arch.cpp b/lib/Arch/Arch.cpp index e9edc59f0..72d67220b 100644 --- a/lib/Arch/Arch.cpp +++ b/lib/Arch/Arch.cpp @@ -58,6 +58,7 @@ static unsigned AddressSize(ArchName arch_name) { case kArchThumb2LittleEndian: case kArchSparc32: case kArchSparc32_SLEIGH: + case kArchMIPS: return 32; // Actually MIPS64 but on 32bit Address bus for vr4300 case kArchPPC: return 32; case kArchAMD64: case kArchAMD64_AVX: @@ -117,6 +118,7 @@ ArchLocker Arch::Lock(ArchName arch_name_) { case ArchName::kArchX86_SLEIGH: case ArchName::kArchSparc32_SLEIGH: case ArchName::kArchPPC: return &gSleighArchLock; + case ArchName::kArchMIPS: return &gSleighArchLock; default: return ArchLocker(); } } @@ -247,6 +249,10 @@ auto Arch::GetArchByName(llvm::LLVMContext *context_, OSName os_name_, return GetSleighPPC(context_, os_name_, arch_name_); } + case kArchMIPS: { + DLOG(INFO) << "Using architecture: MIPS"; + return GetSleighMIPS(context_, os_name_, arch_name_); + } default: { return nullptr; } @@ -428,6 +434,10 @@ bool Arch::IsPPC(void) const { return remill::kArchPPC == arch_name; } +bool Arch::IsMIPS(void) const { + return remill::kArchMIPS == arch_name; +} + bool Arch::IsWindows(void) const { return remill::kOSWindows == os_name; } diff --git a/lib/Arch/CMakeLists.txt b/lib/Arch/CMakeLists.txt index 3f6310875..edbbdb7a3 100644 --- a/lib/Arch/CMakeLists.txt +++ b/lib/Arch/CMakeLists.txt @@ -29,6 +29,7 @@ add_library(remill_arch STATIC add_subdirectory(AArch32) add_subdirectory(AArch64) add_subdirectory(PPC) +add_subdirectory(MIPS) add_subdirectory(SPARC32) add_subdirectory(SPARC64) add_subdirectory(Sleigh) diff --git a/lib/Arch/Instruction.cpp b/lib/Arch/Instruction.cpp index ba6a12fcb..72b43134b 100644 --- a/lib/Arch/Instruction.cpp +++ b/lib/Arch/Instruction.cpp @@ -673,6 +673,7 @@ std::string Instruction::Serialize(void) const { case kArchSparc32: ss << "SPARC32"; break; case kArchSparc64: ss << "SPARC64"; break; case kArchPPC: ss << "PowerPC"; break; + case kArchMIPS: ss << "MIPS"; break; } }; diff --git a/lib/Arch/MIPS/CMakeLists.txt b/lib/Arch/MIPS/CMakeLists.txt new file mode 100644 index 000000000..588d9c739 --- /dev/null +++ b/lib/Arch/MIPS/CMakeLists.txt @@ -0,0 +1 @@ +add_subdirectory(Runtime) diff --git a/lib/Arch/MIPS/Runtime/CMakeLists.txt b/lib/Arch/MIPS/Runtime/CMakeLists.txt new file mode 100644 index 000000000..694cfd4ff --- /dev/null +++ b/lib/Arch/MIPS/Runtime/CMakeLists.txt @@ -0,0 +1,44 @@ +# Copyright (c) 2022-present Trail of Bits, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +cmake_minimum_required(VERSION 3.6) +project(mips_runtime) + +set(MIPSRUNTIME_SOURCEFILES + Instructions.cpp + + "${REMILL_LIB_DIR}/Arch/Runtime/Intrinsics.cpp" +) + +set_source_files_properties(BasicBlock.cpp PROPERTIES COMPILE_FLAGS "-O3 -g0") + +# Visual C++ requires C++14 +if(WIN32) + set(required_cpp_standard "c++14") +else() + set(required_cpp_standard "c++17") +endif() + +add_runtime(mips + SOURCES ${MIPSRUNTIME_SOURCEFILES} + ADDRESS_SIZE 32 + DEFINITIONS "LITTLE_ENDIAN=${little_endian}" "REMILL_DISABLE_INT128=1" + BCFLAGS "-std=${required_cpp_standard}" + INCLUDEDIRECTORIES "${REMILL_INCLUDE_DIR}" "${REMILL_SOURCE_DIR}" + INSTALLDESTINATION "${REMILL_INSTALL_SEMANTICS_DIR}" + ARCH mips64 + + DEPENDENCIES + "${REMILL_INCLUDE_DIR}/remill/Arch/MIPS/Runtime/State.h" +) diff --git a/lib/Arch/MIPS/Runtime/Instructions.cpp b/lib/Arch/MIPS/Runtime/Instructions.cpp new file mode 100644 index 000000000..36ab6d3f6 --- /dev/null +++ b/lib/Arch/MIPS/Runtime/Instructions.cpp @@ -0,0 +1,43 @@ +/* + * Copyright (c) 2022-present Trail of Bits, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "remill/Arch/MIPS/Runtime/State.h" +#include "remill/Arch/Runtime/Float.h" +#include "remill/Arch/Runtime/Intrinsics.h" +#include "remill/Arch/Runtime/Operators.h" + +// A definition is required to ensure that LLVM doesn't optimize the `State` type out of the bytecode +// See https://github.com/lifting-bits/remill/pull/631#issuecomment-1279989004f +State __remill_state; + +#define HYPER_CALL state.hyper_call + +namespace { + +DEF_SEM(HandleUnsupported) { + return __remill_sync_hyper_call(state, memory, + SyncHyperCall::kMIPSEmulateInstruction); +} + +DEF_SEM(HandleInvalidInstruction) { + HYPER_CALL = AsyncHyperCall::kInvalidInstruction; + return memory; +} + +} // namespace + +DEF_ISEL(UNSUPPORTED_INSTRUCTION) = HandleUnsupported; +DEF_ISEL(INVALID_INSTRUCTION) = HandleInvalidInstruction; diff --git a/lib/Arch/Name.cpp b/lib/Arch/Name.cpp index d8180f956..fc1554d41 100644 --- a/lib/Arch/Name.cpp +++ b/lib/Arch/Name.cpp @@ -30,6 +30,7 @@ ArchName GetArchName(const llvm::Triple &triple) { case llvm::Triple::sparc: return kArchSparc32; case llvm::Triple::sparcv9: return kArchSparc64; case llvm::Triple::ppc: return kArchPPC; + case llvm::Triple::mips64: return kArchMIPS; default: return kArchInvalid; } } @@ -75,10 +76,10 @@ ArchName GetArchName(std::string_view arch_name) { } else if (arch_name == "sparc32_sleigh") { return kArchSparc32_SLEIGH; - + } else if (arch_name == "mips") { + return kArchMIPS; } else if (arch_name == "ppc") { return kArchPPC; - } else if (arch_name == "aarch64_sleigh") { return kArchAArch64LittleEndian_SLEIGH; } else { @@ -106,6 +107,7 @@ static const std::string_view kArchNames[] = { [kArchSparc32_SLEIGH] = "sparc32_sleigh", [kArchThumb2LittleEndian] = "thumb2", [kArchPPC] = "ppc", + [kArchMIPS] = "mips", }; } // namespace diff --git a/lib/Arch/Runtime/HyperCall.cpp b/lib/Arch/Runtime/HyperCall.cpp index 4633d858e..6ea5d98ff 100644 --- a/lib/Arch/Runtime/HyperCall.cpp +++ b/lib/Arch/Runtime/HyperCall.cpp @@ -26,6 +26,9 @@ #elif defined(__aarch64__) # include "remill/Arch/AArch64/Runtime/State.h" # define REMILL_HYPERCALL_AARCH64 1 +#elif defined(__mips__) +# include "remill/Arch/MIPS/Runtime/State.h" +# define REMILL_HYPERCALL_MIPS 1 #elif defined(__sparc__) # if ADDRESS_SIZE_BITS == 32 # include "remill/Arch/SPARC32/Runtime/State.h" @@ -379,9 +382,15 @@ Memory *__remill_sync_hyper_call(State &state, Memory *mem, break; # endif +#elif defined(REMILL_HYPERCALL_MIPS) + case SyncHyperCall::kMIPSEmulateInstruction: + mem = __remill_mips_emulate_instruction(mem); + break; + case SyncHyperCall::kMIPSSysCall: + mem = __remill_mips_syscall(mem); + break; #elif defined(REMILL_HYPERCALL_PPC) - case SyncHyperCall::kPPCEmulateInstruction: mem = __remill_ppc_emulate_instruction(mem); break; diff --git a/lib/Arch/Sleigh/CMakeLists.txt b/lib/Arch/Sleigh/CMakeLists.txt index 8e480ede2..361c99d27 100644 --- a/lib/Arch/Sleigh/CMakeLists.txt +++ b/lib/Arch/Sleigh/CMakeLists.txt @@ -39,14 +39,17 @@ add_library(remill_arch_sleigh STATIC "${REMILL_INCLUDE_DIR}/remill/Arch/SPARC32/SPARC32Base.h" "${REMILL_INCLUDE_DIR}/remill/Arch/PPC/Runtime/State.h" + "${REMILL_INCLUDE_DIR}/remill/Arch/MIPS/Runtime/State.h" Arch.h Thumb.h PPC.h + MIPS.h Arch.cpp X86Arch.cpp Thumb2Arch.cpp PPCArch.cpp + MIPSArch.cpp ControlFlowStructuring.cpp ControlFlowStructuring.h diff --git a/lib/Arch/Sleigh/MIPS.h b/lib/Arch/Sleigh/MIPS.h new file mode 100644 index 000000000..95863d235 --- /dev/null +++ b/lib/Arch/Sleigh/MIPS.h @@ -0,0 +1,42 @@ +/* + * Copyright (c) 2022-present Trail of Bits, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include +#include +#include +#include +#include + +#include "Arch.h" + +namespace remill::sleighmips { + +class SleighMIPSDecoder final : public remill::sleigh::SleighDecoder { + public: + SleighMIPSDecoder(const remill::Arch &); + + llvm::Value *LiftPcFromCurrPc(llvm::IRBuilder<> &, llvm::Value *, size_t, + const DecodingContext &) const override; + + void InitializeSleighContext(uint64_t addr, + remill::sleigh::SingleInstructionSleighContext &, + const ContextValues &) const override; +}; + +} // namespace remill::sleighmips diff --git a/lib/Arch/Sleigh/MIPSArch.cpp b/lib/Arch/Sleigh/MIPSArch.cpp new file mode 100644 index 000000000..e9761d1fb --- /dev/null +++ b/lib/Arch/Sleigh/MIPSArch.cpp @@ -0,0 +1,379 @@ +/* + * Copyright (c) 2022-present Trail of Bits, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Arch.h" +#include "MIPS.h" + +#define INCLUDED_FROM_REMILL +#include + +namespace remill { + +namespace sleighmips { +SleighMIPSDecoder::SleighMIPSDecoder(const remill::Arch &arch) + : SleighDecoder( + arch, "mips64be.sla", "mips64.pspec", + sleigh::ContextRegMappings( + {}, {}), {}) {} + +llvm::Value *SleighMIPSDecoder::LiftPcFromCurrPc(llvm::IRBuilder<> &bldr, + llvm::Value *curr_pc, + size_t curr_insn_size, + const DecodingContext &) const { + return bldr.CreateAdd(curr_pc, llvm::ConstantInt::get(curr_pc->getType(), 4)); +} + +void SleighMIPSDecoder::InitializeSleighContext( + uint64_t addr, remill::sleigh::SingleInstructionSleighContext &ctxt, + const ContextValues &values) const { + //sleigh::SetContextRegisterValueInSleigh( + // addr, std::string("ZERO").c_str(), "zero", 0, ctxt, values); +} + +class SleighMIPSArch : public ArchBase { + public: + SleighMIPSArch(llvm::LLVMContext *context_, OSName os_name_, + ArchName arch_name_) + : ArchBase(context_, os_name_, arch_name_), + decoder(*this) {} + virtual ~SleighMIPSArch() = default; + + DecodingContext CreateInitialContext(void) const override { + return DecodingContext(); + } + + std::string_view StackPointerRegisterName(void) const override { + return "SP"; + } + + std::string_view ProgramCounterRegisterName(void) const override { + return "PC"; + } + + OperandLifter::OpLifterPtr + DefaultLifter(const remill::IntrinsicTable &intrinsics) const override { + return decoder.GetOpLifter(); + } + + bool DecodeInstruction(uint64_t address, std::string_view instr_bytes, + Instruction &inst, + DecodingContext context) const override { + inst.pc = address; + inst.next_pc = address + instr_bytes.size(); // Default fall-through. + inst.branch_taken_pc = 0; + inst.branch_not_taken_pc = 0; + inst.has_branch_taken_delay_slot = false; + inst.has_branch_not_taken_delay_slot = false; + inst.arch_name = arch_name; + inst.sub_arch_name = arch_name; + inst.branch_taken_arch_name = arch_name; + inst.arch = this; + inst.category = Instruction::kCategoryInvalid; + inst.operands.clear(); + inst.flows = Instruction::InvalidInsn(); + + context.UpdateContextReg(std::string("ZERO"), 0); // What to do here? + + return this->decoder.DecodeInstruction(address, instr_bytes, inst, context); + } + + uint64_t MinInstructionAlign(const DecodingContext &) const override { + return 4; + } + + uint64_t MinInstructionSize(const DecodingContext &) const override { + return 4; + } + + uint64_t MaxInstructionSize(const DecodingContext &, + bool permit_fuse_idioms) const { + return permit_fuse_idioms ? 8 : 4; // To handle delayslots, apparently + } + + // Returns `true` if we should lift the semantics of `next_inst` as a delay + // slot of `inst`. The `branch_taken_path` tells us whether we are in the + // context of the taken path of a branch or the not-taken path of a branch. + bool NextInstructionIsDelayed(const Instruction &inst, + const Instruction &next_inst, + bool branch_taken_path) const { + if (inst.delayed_pc != next_inst.pc) { + return false; + } + + if (branch_taken_path) { + return inst.has_branch_taken_delay_slot; + } else { + return inst.has_branch_not_taken_delay_slot; + } + } + + // Returns `true` if a given instruction might have a delay slot. + bool MayHaveDelaySlot(const Instruction &inst) const { + return inst.has_branch_taken_delay_slot || + inst.has_branch_not_taken_delay_slot; + } + + // Returns `true` if memory access are little endian byte ordered. + bool MemoryAccessIsLittleEndian(void) const { + return false; + } + + llvm::CallingConv::ID DefaultCallingConv(void) const override { + return llvm::CallingConv::C; + } + + llvm::Triple Triple(void) const override { + auto triple = BasicTriple(); + triple.setArch(llvm::Triple::mips64); + return triple; + } + + llvm::DataLayout DataLayout(void) const override { + return llvm::DataLayout("E-m:e-p:32:32-i64:64-f128:64-n32-S64"); + } + + void PopulateRegisterTable(void) const override { + CHECK_NOTNULL(context); + + reg_by_offset.resize(sizeof(MIPSState)); + + auto u8 = llvm::Type::getInt8Ty(*context); + auto u32 = llvm::Type::getInt32Ty(*context); + auto u64 = llvm::Type::getInt64Ty(*context); + + auto f32 = llvm::Type::getFloatTy(*context); + auto f64 = llvm::Type::getDoubleTy(*context); + +#define OFFSET_OF(type, access) \ + (reinterpret_cast(&reinterpret_cast( \ + static_cast(nullptr)->access))) + +#define REG(name, access, type) \ + AddRegister(#name, type, OFFSET_OF(MIPSState, access), nullptr) + +#define SUB_REG(name, access, type, parent_reg_name) \ + AddRegister(#name, type, OFFSET_OF(MIPSState, access), #parent_reg_name) + + REG(ZERO, gpr.zero.qword, u64); + SUB_REG(ZERO_LO, gpr.zero.dword, u32, ZERO); + REG(AT, gpr.at.qword, u64); + SUB_REG(AT_LO, gpr.at.dword, u32, AT); + REG(V0, gpr.v0.qword, u64); + SUB_REG(V0_LO, gpr.v0.dword, u32, V0); + REG(V1, gpr.v1.qword, u64); + SUB_REG(V1_LO, gpr.v1.dword, u32, V1); + REG(A0, gpr.a0.qword, u64); + SUB_REG(A0_LO, gpr.a0.dword, u32, A0); + REG(A1, gpr.a1.qword, u64); + SUB_REG(A1_LO, gpr.a1.dword, u32, A1); + REG(A2, gpr.a2.qword, u64); + SUB_REG(A2_LO, gpr.a2.dword, u32, A2); + REG(A3, gpr.a3.qword, u64); + SUB_REG(A3_LO, gpr.a3.dword, u32, A3); + REG(T0, gpr.t0.qword, u64); + SUB_REG(T0_LO, gpr.t0.dword, u32, T0); + REG(T1, gpr.t1.qword, u64); + SUB_REG(T1_LO, gpr.t1.dword, u32, T1); + REG(T2, gpr.t2.qword, u64); + SUB_REG(T2_LO, gpr.t2.dword, u32, T2); + REG(T3, gpr.t3.qword, u64); + SUB_REG(T3_LO, gpr.t3.dword, u32, T3); + REG(T4, gpr.t4.qword, u64); + SUB_REG(T4_LO, gpr.t4.dword, u32, T4); + REG(T5, gpr.t5.qword, u64); + SUB_REG(T5_LO, gpr.t5.dword, u32, T5); + REG(T6, gpr.t6.qword, u64); + SUB_REG(T6_LO, gpr.t6.dword, u32, T6); + REG(T7, gpr.t7.qword, u64); + SUB_REG(T7_LO, gpr.t7.dword, u32, T7); + REG(S0, gpr.s0.qword, u64); + SUB_REG(S0_LO, gpr.s0.dword, u32, S0); + REG(S1, gpr.s1.qword, u64); + SUB_REG(S1_LO, gpr.s1.dword, u32, S1); + REG(S2, gpr.s2.qword, u64); + SUB_REG(S2_LO, gpr.s2.dword, u32, S2); + REG(S3, gpr.s3.qword, u64); + SUB_REG(S3_LO, gpr.s3.dword, u32, S3); + REG(S4, gpr.s4.qword, u64); + SUB_REG(S4_LO, gpr.s4.dword, u32, S4); + REG(S5, gpr.s5.qword, u64); + SUB_REG(S5_LO, gpr.s5.dword, u32, S5); + REG(S6, gpr.s6.qword, u64); + SUB_REG(S6_LO, gpr.s6.dword, u32, S6); + REG(S7, gpr.s7.qword, u64); + SUB_REG(S7_LO, gpr.s7.dword, u32, S7); + REG(T8, gpr.t8.qword, u64); + SUB_REG(T8_LO, gpr.t8.dword, u32, T8); + REG(T9, gpr.t9.qword, u64); + SUB_REG(T9_LO, gpr.t9.dword, u32, T9); + REG(K0, gpr.k0.qword, u64); + SUB_REG(K0_LO, gpr.k0.dword, u32, K0); + REG(K1, gpr.k1.qword, u64); + SUB_REG(K1_LO, gpr.k1.dword, u32, K1); + REG(GP, gpr.gp.qword, u64); + SUB_REG(GP_LO, gpr.gp.dword, u32, GP); + REG(SP, gpr.sp.qword, u64); + SUB_REG(SP_LO, gpr.sp.dword, u32, SP); + REG(S8, gpr.s8.qword, u64); + SUB_REG(S8_LO, gpr.s8.dword, u32, S8); + REG(RA, gpr.ra.qword, u64); + SUB_REG(RA_LO, gpr.ra.dword, u32, RA); + REG(PC, gpr.pc.qword, u64); + SUB_REG(PC_LO, gpr.pc.dword, u32, PC); + + // Flags + REG(ISAMODESWITCH, flags.ISAModeSwitch.qword, u8); + + // FPR + REG(F0, fpr.f0.qword, f64); + SUB_REG(F0_LO, fpr.f0.dword, f32, F0); + REG(F1, fpr.f1.qword, f64); + SUB_REG(F1_LO, fpr.f1.dword, f32, F1); + REG(F2, fpr.f2.qword, f64); + SUB_REG(F2_LO, fpr.f2.dword, f32, F2); + REG(F3, fpr.f3.qword, f64); + SUB_REG(F3_LO, fpr.f3.dword, f32, F3); + REG(F4, fpr.f4.qword, f64); + SUB_REG(F4_LO, fpr.f4.dword, f32, F4); + REG(F5, fpr.f5.qword, f64); + SUB_REG(F5_LO, fpr.f5.dword, f32, F5); + REG(F6, fpr.f6.qword, f64); + SUB_REG(F6_LO, fpr.f6.dword, f32, F6); + REG(F7, fpr.f7.qword, f64); + SUB_REG(F7_LO, fpr.f7.dword, f32, F7); + REG(F8, fpr.f8.qword, f64); + SUB_REG(F8_LO, fpr.f8.dword, f32, F8); + REG(F9, fpr.f9.qword, f64); + SUB_REG(F9_LO, fpr.f9.dword, f32, F9); + REG(F10, fpr.f10.qword, f64); + SUB_REG(F10_LO, fpr.f10.dword, f32, F10); + REG(F11, fpr.f11.qword, f64); + SUB_REG(F11_LO, fpr.f11.dword, f32, F11); + REG(F12, fpr.f12.qword, f64); + SUB_REG(F12_LO, fpr.f12.dword, f32, F12); + REG(F13, fpr.f13.qword, f64); + SUB_REG(F13_LO, fpr.f13.dword, f32, F13); + REG(F14, fpr.f14.qword, f64); + SUB_REG(F14_LO, fpr.f14.dword, f32, F14); + REG(F15, fpr.f15.qword, f64); + SUB_REG(F15_LO, fpr.f15.dword, f32, F15); + REG(F16, fpr.f16.qword, f64); + SUB_REG(F16_LO, fpr.f16.dword, f32, F16); + REG(F17, fpr.f17.qword, f64); + SUB_REG(F17_LO, fpr.f17.dword, f32, F17); + REG(F18, fpr.f18.qword, f64); + SUB_REG(F18_LO, fpr.f18.dword, f32, F18); + REG(F19, fpr.f19.qword, f64); + SUB_REG(F19_LO, fpr.f19.dword, f32, F19); + REG(F20, fpr.f20.qword, f64); + SUB_REG(F20_LO, fpr.f20.dword, f32, F20); + REG(F21, fpr.f21.qword, f64); + SUB_REG(F21_LO, fpr.f21.dword, f32, F21); + REG(F22, fpr.f22.qword, f64); + SUB_REG(F22_LO, fpr.f22.dword, f32, F22); + REG(F23, fpr.f23.qword, f64); + SUB_REG(F23_LO, fpr.f23.dword, f32, F23); + REG(F24, fpr.f24.qword, f64); + SUB_REG(F24_LO, fpr.f24.dword, f32, F24); + REG(F25, fpr.f25.qword, f64); + SUB_REG(F25_LO, fpr.f25.dword, f32, F25); + REG(F26, fpr.f26.qword, f64); + SUB_REG(F26_LO, fpr.f26.dword, f32, F26); + REG(F27, fpr.f27.qword, f64); + SUB_REG(F27_LO, fpr.f27.dword, f32, F27); + REG(F28, fpr.f28.qword, f64); + SUB_REG(F28_LO, fpr.f28.dword, f32, F28); + REG(F29, fpr.f29.qword, f64); + SUB_REG(F29_LO, fpr.f29.dword, f32, F29); + REG(F30, fpr.f30.qword, f64); + SUB_REG(F30_LO, fpr.f30.dword, f32, F30); + REG(F31, fpr.f31.qword, f64); + SUB_REG(F31_LO, fpr.f31.dword, f32, F31); + + // COP0 + REG(INDEX, cop0.Index.qword, u64); + REG(RANDOM, cop0.Random.qword, u64); + REG(ENTRYLO0, cop0.EntryLo0.qword, u64); + REG(ENTRYLO1, cop0.EntryLo1.qword, u64); + REG(CONTEXT, cop0.Context.qword, u64); + REG(PAGEMASK, cop0.PageMask.qword, u64); + REG(WIRED, cop0.Wired.qword, u64); + REG(HWRENA, cop0.HWREna.qword, u64); + REG(BADVADDR, cop0.BadVAddr.qword, u64); + REG(COUNT, cop0.Count.qword, u64); + REG(ENTRYHI, cop0.EntryHi.qword, u64); + REG(COMPARE, cop0.Compare.qword, u64); + REG(STATUS, cop0.Status.qword, u64); + REG(CAUSE, cop0.Cause.qword, u64); + REG(EPC, cop0.EPC.qword, u64); + REG(PRID, cop0.PRId.qword, u64); + REG(CONFIG, cop0.Config.qword, u64); + REG(LLADDR, cop0.LLAddr.qword, u64); + REG(WATCHLO, cop0.WatchLo.qword, u64); + REG(WATCHHI, cop0.WatchHi.qword, u64); + REG(XCONTEXT, cop0.XContext.qword, u64); + REG(COP0_REG21, cop0.cop0_reg21.qword, u64); + REG(COP0_REG22, cop0.cop0_reg22.qword, u64); + REG(DEBUG, cop0.Debug.qword, u64); + REG(DEPC, cop0.DEPC.qword, u64); + REG(PERFCNT, cop0.PerfCnt.qword, u64); + REG(ERRCTL, cop0.ErrCtl.qword, u64); + REG(CACHEERR, cop0.CacheErr.qword, u64); + REG(TAGLO, cop0.TagLo.qword, u64); + REG(TAGHI, cop0.TagHi.qword, u64); + REG(ERRORPC, cop0.ErrorEPC.qword, u64); + REG(DESAVE, cop0.DESAVE.qword, u64); + } + + void + FinishLiftedFunctionInitialization(llvm::Module *module, + llvm::Function *bb_func) const override { + auto &context = module->getContext(); + const auto addr = llvm::Type::getInt64Ty(context); + + auto &entry_block = bb_func->getEntryBlock(); + llvm::IRBuilder<> ir(&entry_block); + + const auto pc_arg = NthArgument(bb_func, kPCArgNum); + const auto state_ptr_arg = NthArgument(bb_func, kStatePointerArgNum); + + auto mk_alloca = [&](auto &from) { + return ir.CreateAlloca(addr, nullptr, from.data()); + }; + ir.CreateStore(pc_arg, mk_alloca(kNextPCVariableName)); + ir.CreateStore(pc_arg, mk_alloca(kIgnoreNextPCVariableName)); + + /*auto u8 = llvm::Type::getInt8Ty(context); + auto zero_c = ir.CreateAlloca(u8, nullptr, "ZERO"); + ir.CreateStore(llvm::Constant::getNullValue(u8), zero_c);*/ + + std::ignore = RegisterByName(kPCVariableName)->AddressOf(state_ptr_arg, ir); + } + + private: + SleighMIPSDecoder decoder; +}; + +} // namespace sleighmips + +Arch::ArchPtr Arch::GetSleighMIPS(llvm::LLVMContext *context_, + remill::OSName os_name_, + remill::ArchName arch_name_) { + return std::make_unique(context_, os_name_, + arch_name_); +} + +} // namespace remill From 67911a504f27fa3214ff817cfbd01a0e5eb25172 Mon Sep 17 00:00:00 2001 From: M4xw Date: Wed, 7 Feb 2024 15:45:15 +0100 Subject: [PATCH 02/15] Fix for existing function references in DeclareLiftedFunction --- lib/Arch/Arch.cpp | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/lib/Arch/Arch.cpp b/lib/Arch/Arch.cpp index 72d67220b..519c49b4c 100644 --- a/lib/Arch/Arch.cpp +++ b/lib/Arch/Arch.cpp @@ -730,8 +730,14 @@ llvm::Function *Arch::DeclareLiftedFunction(std::string_view name_, auto func_type = llvm::dyn_cast( RecontextualizeType(LiftedFunctionType(), context)); llvm::StringRef name(name_.data(), name_.size()); - auto func = llvm::Function::Create( + auto func = module->getFunction(name.str()); + + if (!func || func->getFunctionType() != func_type) { + func = llvm::Function::Create( func_type, llvm::GlobalValue::ExternalLinkage, 0u, name, module); + } else if (func->isDeclaration()) { + func->setLinkage(llvm::GlobalValue::ExternalLinkage); + } auto memory = remill::NthArgument(func, kMemoryPointerArgNum); auto state = remill::NthArgument(func, kStatePointerArgNum); From 32e2ff16411d08bf1a758ace919a605658826593 Mon Sep 17 00:00:00 2001 From: M4xw Date: Wed, 7 Feb 2024 16:06:39 +0100 Subject: [PATCH 03/15] Remove custom spec sleigh_compile --- CMakeLists.txt | 9 --------- 1 file changed, 9 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index e4a5e7395..355c4649b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -256,17 +256,8 @@ sleigh_compile( OUT_FILE "${sleigh_BINARY_DIR}/specfiles/Ghidra/Processors/PowerPC/data/languages/ppc_32_e200_be.sla" ) -sleigh_compile( - TARGET mips_be_vr4300_spec - COMPILER "${sleigh_compiler}" - SLASPEC "${ghidra-fork_SOURCE_DIR}/Ghidra/Processors/MIPS/data/languages/mips64be.slaspec" - LOG_FILE "${sleigh_BINARY_DIR}/sleighspecs/spec_build_logs/mips64be.sla.log" - OUT_FILE "${sleigh_BINARY_DIR}/specfiles/Ghidra/Processors/MIPS/data/languages/mips64be.sla" -) - add_custom_target(sleigh_custom_specs) add_dependencies(sleigh_custom_specs ppc_e200_spec) -add_dependencies(sleigh_custom_specs mips_be_vr4300_spec) target_link_libraries(remill_settings INTERFACE ${llvm_libs} From b7a1d4290184c6df4844f11aaa5ea14b76ac0221 Mon Sep 17 00:00:00 2001 From: M4xw Date: Wed, 14 Feb 2024 12:13:46 +0100 Subject: [PATCH 04/15] Use WeakAnyLinkage for func declartions This allows for link-time overriding of used functions in other seperately processed blocks --- lib/Arch/Arch.cpp | 2 +- lib/BC/SleighLifter.cpp | 8 +++++++- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/lib/Arch/Arch.cpp b/lib/Arch/Arch.cpp index 519c49b4c..62161583d 100644 --- a/lib/Arch/Arch.cpp +++ b/lib/Arch/Arch.cpp @@ -736,7 +736,7 @@ llvm::Function *Arch::DeclareLiftedFunction(std::string_view name_, func = llvm::Function::Create( func_type, llvm::GlobalValue::ExternalLinkage, 0u, name, module); } else if (func->isDeclaration()) { - func->setLinkage(llvm::GlobalValue::ExternalLinkage); + func->setLinkage(llvm::GlobalValue::WeakAnyLinkage); } auto memory = remill::NthArgument(func, kMemoryPointerArgNum); diff --git a/lib/BC/SleighLifter.cpp b/lib/BC/SleighLifter.cpp index 443512dee..68aa75fdb 100644 --- a/lib/BC/SleighLifter.cpp +++ b/lib/BC/SleighLifter.cpp @@ -1633,8 +1633,14 @@ SleighLifter::DefineInstructionFunction(Instruction &inst, ptr_ty}; auto ty = llvm::FunctionType::get(inst.arch->MemoryPointerType(), params, false); - auto func = llvm::Function::Create(ty, llvm::GlobalValue::ExternalLinkage, 0, + auto func = target_mod->getFunction(nm.str()); + + if (!func || func->getFunctionType() != ty) { + func = llvm::Function::Create(ty, llvm::GlobalValue::ExternalLinkage, 0, nm.str(), target_mod); + } else if (func->isDeclaration()) { + func->setLinkage(llvm::GlobalValue::WeakAnyLinkage); + } auto memory = remill::NthArgument(func, 1); auto state = remill::NthArgument(func, 0); From 6eaee55d554e1bb9818bbc7580e9a79e809e5c99 Mon Sep 17 00:00:00 2001 From: M4xw Date: Wed, 14 Feb 2024 12:14:51 +0100 Subject: [PATCH 05/15] [TraceLifter] StoreProgramCounter in kCategoryDirectFunctionCall case Otherwise the info about the target function address gets lost for the missing_block trampoline --- lib/BC/TraceLifter.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/lib/BC/TraceLifter.cpp b/lib/BC/TraceLifter.cpp index c53c5a2f6..9a662294c 100644 --- a/lib/BC/TraceLifter.cpp +++ b/lib/BC/TraceLifter.cpp @@ -477,6 +477,9 @@ bool TraceLifter::Impl::Lift( case Instruction::kCategoryDirectFunctionCall: { direct_func_call: try_add_delay_slot(true, block); + // M4xw: How to handle this appropriately? + StoreProgramCounter(block, inst.branch_taken_pc, *intrinsics); + if (inst.branch_not_taken_pc != inst.branch_taken_pc) { trace_work_list.insert(inst.branch_taken_pc); auto target_trace = get_trace_decl(inst.branch_taken_pc); From 5e1cb33b5f39a9203d4c9f405050af9b33961f46 Mon Sep 17 00:00:00 2001 From: M4xw Date: Wed, 14 Feb 2024 12:31:47 +0100 Subject: [PATCH 06/15] [MIPSArch] Format and clean unused things NextInstructionIsDelayed and MayHaveDelaySlot would technically be used by TraceLifter but dont seem to impact sleigh --- lib/Arch/Sleigh/MIPSArch.cpp | 65 ++++++++++-------------------------- 1 file changed, 18 insertions(+), 47 deletions(-) diff --git a/lib/Arch/Sleigh/MIPSArch.cpp b/lib/Arch/Sleigh/MIPSArch.cpp index e9761d1fb..5f6d73648 100644 --- a/lib/Arch/Sleigh/MIPSArch.cpp +++ b/lib/Arch/Sleigh/MIPSArch.cpp @@ -24,29 +24,27 @@ namespace remill { namespace sleighmips { SleighMIPSDecoder::SleighMIPSDecoder(const remill::Arch &arch) - : SleighDecoder( - arch, "mips64be.sla", "mips64.pspec", - sleigh::ContextRegMappings( - {}, {}), {}) {} - -llvm::Value *SleighMIPSDecoder::LiftPcFromCurrPc(llvm::IRBuilder<> &bldr, - llvm::Value *curr_pc, - size_t curr_insn_size, - const DecodingContext &) const { + : SleighDecoder(arch, "mips64be.sla", "mips64.pspec", + sleigh::ContextRegMappings({}, {}), {}) {} + +llvm::Value * +SleighMIPSDecoder::LiftPcFromCurrPc(llvm::IRBuilder<> &bldr, + llvm::Value *curr_pc, size_t curr_insn_size, + const DecodingContext &) const { return bldr.CreateAdd(curr_pc, llvm::ConstantInt::get(curr_pc->getType(), 4)); } void SleighMIPSDecoder::InitializeSleighContext( uint64_t addr, remill::sleigh::SingleInstructionSleighContext &ctxt, const ContextValues &values) const { - //sleigh::SetContextRegisterValueInSleigh( - // addr, std::string("ZERO").c_str(), "zero", 0, ctxt, values); + //sleigh::SetContextRegisterValueInSleigh( + // addr, std::string("ZERO").c_str(), "zero", 0, ctxt, values); } class SleighMIPSArch : public ArchBase { public: SleighMIPSArch(llvm::LLVMContext *context_, OSName os_name_, - ArchName arch_name_) + ArchName arch_name_) : ArchBase(context_, os_name_, arch_name_), decoder(*this) {} virtual ~SleighMIPSArch() = default; @@ -85,7 +83,7 @@ class SleighMIPSArch : public ArchBase { inst.operands.clear(); inst.flows = Instruction::InvalidInsn(); - context.UpdateContextReg(std::string("ZERO"), 0); // What to do here? + context.UpdateContextReg(std::string("ZERO"), 0); // What to do here? return this->decoder.DecodeInstruction(address, instr_bytes, inst, context); } @@ -100,35 +98,7 @@ class SleighMIPSArch : public ArchBase { uint64_t MaxInstructionSize(const DecodingContext &, bool permit_fuse_idioms) const { - return permit_fuse_idioms ? 8 : 4; // To handle delayslots, apparently - } - - // Returns `true` if we should lift the semantics of `next_inst` as a delay - // slot of `inst`. The `branch_taken_path` tells us whether we are in the - // context of the taken path of a branch or the not-taken path of a branch. - bool NextInstructionIsDelayed(const Instruction &inst, - const Instruction &next_inst, - bool branch_taken_path) const { - if (inst.delayed_pc != next_inst.pc) { - return false; - } - - if (branch_taken_path) { - return inst.has_branch_taken_delay_slot; - } else { - return inst.has_branch_not_taken_delay_slot; - } - } - - // Returns `true` if a given instruction might have a delay slot. - bool MayHaveDelaySlot(const Instruction &inst) const { - return inst.has_branch_taken_delay_slot || - inst.has_branch_not_taken_delay_slot; - } - - // Returns `true` if memory access are little endian byte ordered. - bool MemoryAccessIsLittleEndian(void) const { - return false; + return 8; // Note: Technically 4 but due to delay slots we need pass 8 bytes to sleigh } llvm::CallingConv::ID DefaultCallingConv(void) const override { @@ -142,6 +112,7 @@ class SleighMIPSArch : public ArchBase { } llvm::DataLayout DataLayout(void) const override { + // M4xw: TODO: Confirm this is correct return llvm::DataLayout("E-m:e-p:32:32-i64:64-f128:64-n32-S64"); } @@ -233,7 +204,7 @@ class SleighMIPSArch : public ArchBase { SUB_REG(RA_LO, gpr.ra.dword, u32, RA); REG(PC, gpr.pc.qword, u64); SUB_REG(PC_LO, gpr.pc.dword, u32, PC); - + // Flags REG(ISAMODESWITCH, flags.ISAModeSwitch.qword, u8); @@ -359,7 +330,7 @@ class SleighMIPSArch : public ArchBase { /*auto u8 = llvm::Type::getInt8Ty(context); auto zero_c = ir.CreateAlloca(u8, nullptr, "ZERO"); ir.CreateStore(llvm::Constant::getNullValue(u8), zero_c);*/ - + std::ignore = RegisterByName(kPCVariableName)->AddressOf(state_ptr_arg, ir); } @@ -370,10 +341,10 @@ class SleighMIPSArch : public ArchBase { } // namespace sleighmips Arch::ArchPtr Arch::GetSleighMIPS(llvm::LLVMContext *context_, - remill::OSName os_name_, - remill::ArchName arch_name_) { + remill::OSName os_name_, + remill::ArchName arch_name_) { return std::make_unique(context_, os_name_, - arch_name_); + arch_name_); } } // namespace remill From 97d7429f4f1d7fbe013de385f5f99ae7aff8ef87 Mon Sep 17 00:00:00 2001 From: M4xw Date: Fri, 16 Feb 2024 18:32:34 +0100 Subject: [PATCH 07/15] Placeholder impl for HI/LO regs --- include/remill/Arch/MIPS/Runtime/State.h | 5 +++++ lib/Arch/Sleigh/MIPSArch.cpp | 2 ++ 2 files changed, 7 insertions(+) diff --git a/include/remill/Arch/MIPS/Runtime/State.h b/include/remill/Arch/MIPS/Runtime/State.h index c773bf45c..b990c2880 100644 --- a/include/remill/Arch/MIPS/Runtime/State.h +++ b/include/remill/Arch/MIPS/Runtime/State.h @@ -186,6 +186,11 @@ static_assert(512 == sizeof(FPR), "Invalid structure packing of `FPR`."); struct alignas(8) FlagRegisters final { volatile uint64_t _0; Reg ISAModeSwitch; + // TODO: Move them elsewhere + volatile uint64_t _1; + Reg HI; + volatile uint64_t _2; + Reg LO; } __attribute__((packed)); struct alignas(8) COP0Registers final { diff --git a/lib/Arch/Sleigh/MIPSArch.cpp b/lib/Arch/Sleigh/MIPSArch.cpp index 5f6d73648..f645b4a68 100644 --- a/lib/Arch/Sleigh/MIPSArch.cpp +++ b/lib/Arch/Sleigh/MIPSArch.cpp @@ -207,6 +207,8 @@ class SleighMIPSArch : public ArchBase { // Flags REG(ISAMODESWITCH, flags.ISAModeSwitch.qword, u8); + REG(HI, flags.HI.qword, u64); + REG(LO, flags.LO.qword, u64); // FPR REG(F0, fpr.f0.qword, f64); From ce1b4a131b5e417a000cc5947833ea5b5b237bd6 Mon Sep 17 00:00:00 2001 From: M4xw Date: Wed, 21 Feb 2024 12:02:04 +0100 Subject: [PATCH 08/15] [Lift] Pass IntrinsicTable to SimpleTraceManager and use function_call wrapper for function links --- bin/lift/Lift.cpp | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/bin/lift/Lift.cpp b/bin/lift/Lift.cpp index 3cc0afbca..a25031e71 100644 --- a/bin/lift/Lift.cpp +++ b/bin/lift/Lift.cpp @@ -125,7 +125,10 @@ class SimpleTraceManager : public remill::TraceManager { public: virtual ~SimpleTraceManager(void) = default; - explicit SimpleTraceManager(Memory &memory_) : memory(memory_) {} + explicit SimpleTraceManager(Memory &memory_, + const remill::IntrinsicTable &intrinsics_) + : memory(memory_), + intrinsics(intrinsics_) {} protected: // Called when we have lifted, i.e. defined the contents, of a new trace. @@ -147,7 +150,13 @@ class SimpleTraceManager : public remill::TraceManager { if (trace_it != traces.end()) { return trace_it->second; } else { - return nullptr; + // Use function_call if we can't have the target addr in scope + auto byte_it = memory.find(addr); + if (byte_it == memory.end()) { + return intrinsics.function_call; + } else { + return nullptr; + } } } @@ -174,6 +183,7 @@ class SimpleTraceManager : public remill::TraceManager { public: Memory &memory; std::unordered_map traces; + const remill::IntrinsicTable &intrinsics; }; // Looks for calls to a function like `__remill_function_return`, and @@ -266,8 +276,8 @@ int main(int argc, char *argv[]) { const auto mem_ptr_type = arch->MemoryPointerType(); Memory memory = UnhexlifyInputBytes(addr_mask); - SimpleTraceManager manager(memory); remill::IntrinsicTable intrinsics(module.get()); + SimpleTraceManager manager(memory, intrinsics); auto inst_lifter = arch->DefaultLifter(intrinsics); From f97f65a8155888eb5315bc8d1d91efd58be4d1cc Mon Sep 17 00:00:00 2001 From: M4xw Date: Wed, 21 Feb 2024 12:32:41 +0100 Subject: [PATCH 09/15] [MIPS] Pass Arch to PCodeCFG [MIPS] Implement temporary workaround for Sleigh Branch Likely Issue --- include/remill/BC/PCodeCFG.h | 7 ++-- include/remill/BC/SleighLifter.h | 1 + lib/BC/PcodeCFG.cpp | 23 ++++++++++--- lib/BC/SleighLifter.cpp | 56 +++++++++++++++++++++++++++----- 4 files changed, 72 insertions(+), 15 deletions(-) diff --git a/include/remill/BC/PCodeCFG.h b/include/remill/BC/PCodeCFG.h index 84e7d47eb..fb545a276 100644 --- a/include/remill/BC/PCodeCFG.h +++ b/include/remill/BC/PCodeCFG.h @@ -67,11 +67,13 @@ class PcodeCFG { PcodeCFG(std::map blocks); }; -PcodeCFG CreateCFG(const std::vector &linear_ops); +PcodeCFG CreateCFG(const std::vector &linear_ops, + const remill::Arch &arch); class PcodeCFGBuilder { public: - explicit PcodeCFGBuilder(const std::vector &linear_ops); + explicit PcodeCFGBuilder(const std::vector &linear_ops, + const remill::Arch &arch); PcodeCFG Build() const; private: @@ -82,6 +84,7 @@ class PcodeCFGBuilder { std::vector GetBlockStarts() const; const std::vector &linear_ops; + const remill::Arch &arch; }; diff --git a/include/remill/BC/SleighLifter.h b/include/remill/BC/SleighLifter.h index 27e22cef9..bc1a27151 100644 --- a/include/remill/BC/SleighLifter.h +++ b/include/remill/BC/SleighLifter.h @@ -82,6 +82,7 @@ class SleighLifter : public InstructionLifter { const ContextValues &context_values); ::Sleigh &GetEngine(void) const; + const remill::Arch &arch; }; diff --git a/lib/BC/PcodeCFG.cpp b/lib/BC/PcodeCFG.cpp index 5265d3374..486498921 100644 --- a/lib/BC/PcodeCFG.cpp +++ b/lib/BC/PcodeCFG.cpp @@ -1,3 +1,5 @@ +#include +#include #include #include @@ -15,8 +17,9 @@ namespace remill { namespace sleigh { -PcodeCFG CreateCFG(const std::vector &linear_ops) { - return PcodeCFGBuilder(linear_ops).Build(); +PcodeCFG CreateCFG(const std::vector &linear_ops, + const remill::Arch &arch) { + return PcodeCFGBuilder(linear_ops, arch).Build(); } @@ -116,6 +119,16 @@ PcodeCFGBuilder::GetControlFlowExitsForIndex(size_t index) const { auto taken_exit = build_direct_target_exit(curr_op.vars[0], index); + // BUG(M4xw): Branch likelies seem to trigger a bug that causes wrong BranchTaken conditions + // This is always a CBRANCH preceded by a BOOL_NEGATE, so this is a workaround for now + // The logic operation is technically the same, the BOOL_NEGATE flip happens in SleighLifter + if (this->arch.arch_name == remill::ArchName::kArchMIPS) { + if (linear_ops[index - 1].op == CPUI_BOOL_NEGATE) { + DLOG(INFO) << "Flipping CBRANCH Targets"; + std::swap(taken_exit, fallthrough_exit); + } + } + return ConditionalExit{taken_exit, fallthrough_exit}; } case CPUI_CALLIND: @@ -171,8 +184,10 @@ PcodeCFG PcodeCFGBuilder::Build() const { } -PcodeCFGBuilder::PcodeCFGBuilder(const std::vector &linear_ops) - : linear_ops(linear_ops) {} +PcodeCFGBuilder::PcodeCFGBuilder(const std::vector &linear_ops, + const remill::Arch &arch) + : linear_ops(linear_ops), + arch(arch) {} PcodeCFG::PcodeCFG(std::map blocks) : blocks(std::move(blocks)) {} diff --git a/lib/BC/SleighLifter.cpp b/lib/BC/SleighLifter.cpp index 68aa75fdb..7a0ff92e1 100644 --- a/lib/BC/SleighLifter.cpp +++ b/lib/BC/SleighLifter.cpp @@ -124,6 +124,8 @@ llvm::Value *CreatePcodeBitShift(llvm::Value *lhs, llvm::Value *rhs, class SleighLifter::PcodeToLLVMEmitIntoBlock { private: + bool flip_bool_negate; + class Parameter { public: virtual ~Parameter(void) = default; @@ -453,7 +455,8 @@ class SleighLifter::PcodeToLLVMEmitIntoBlock { std::vector user_op_names_, llvm::BasicBlock *exit_block_, const sleigh::MaybeBranchTakenVar &to_lift_btaken_, PcodeToLLVMEmitIntoBlock::DecodingContextConstants context_reg_lifter) - : target_block(target_block), + : flip_bool_negate(false), + target_block(target_block), state_pointer(state_pointer), context(target_block->getContext()), insn(insn), @@ -523,7 +526,7 @@ class SleighLifter::PcodeToLLVMEmitIntoBlock { target_vnode.offset, target_vnode.size, entry_bldr); print_vardata(this->insn_lifter_parent.GetEngine(), ss, target_vnode); DLOG(ERROR) << "Creating unique for unknown register: " << ss.str() << " " - << reg_ptr->getName().str(); + << reg_ptr->getName().str(); return RegisterValue::CreateRegister(reg_ptr); } @@ -547,7 +550,7 @@ class SleighLifter::PcodeToLLVMEmitIntoBlock { vnode.space, vnode.offset, vnode.size); DLOG(INFO) << "Looking for reg name " << reg_name << " from offset " - << vnode.offset; + << vnode.offset; return this->LiftNormalRegisterOrCreateUnique(bldr, reg_name, vnode); } else if (space_name == "const") { @@ -748,8 +751,10 @@ class SleighLifter::PcodeToLLVMEmitIntoBlock { return this->LiftStoreIntoOutParam( bldr, bldr.CreateZExt( - bldr.CreateICmpEQ(*bneg_inval, - llvm::ConstantInt::get(byte_type, 0)), + bldr.CreateICmpEQ( + *bneg_inval, + llvm::ConstantInt::get(byte_type, + this->flip_bool_negate ? 1 : 0)), byte_type), outvar); } @@ -1447,6 +1452,18 @@ class SleighLifter::PcodeToLLVMEmitIntoBlock { // either exit (means real control flow), to block (fake control flow) size_t index = 0; for (auto pc : blk.ops) { + // BUG(M4xw): Branch likelies seem to trigger a bug that causes wrong BranchTaken conditions + // This is always a CBRANCH preceded by a BOOL_NEGATE, so this is a workaround for now + // The logic operation is technically the same, the CBranch Target flip occurs in PcodeCFG + if (this->insn_lifter_parent.arch.arch_name == + remill::ArchName::kArchMIPS) { + if (pc.op == OpCode::CPUI_BOOL_NEGATE && + blk.ops[index + 1].op == OpCode::CPUI_CBRANCH) { + this->flip_bool_negate = true; + } else { + this->flip_bool_negate = false; + } + } this->LiftBtakenIfReached(bldr, pc.op, index); this->LiftPcodeOp(bldr, pc.op, pc.outvar, pc.vars.data(), pc.vars.size()); index += 1; @@ -1606,7 +1623,8 @@ SleighLifter::SleighLifter(const remill::Arch &arch_, : InstructionLifter(&arch_, intrinsics_), sleigh_context(new sleigh::SingleInstructionSleighContext( dec_.GetSLAName(), dec_.GetPSpec())), - decoder(dec_) {} + decoder(dec_), + arch(arch_) {} const std::string_view SleighLifter::kInstructionFunctionPrefix = @@ -1634,10 +1652,10 @@ SleighLifter::DefineInstructionFunction(Instruction &inst, auto ty = llvm::FunctionType::get(inst.arch->MemoryPointerType(), params, false); auto func = target_mod->getFunction(nm.str()); - + if (!func || func->getFunctionType() != ty) { func = llvm::Function::Create(ty, llvm::GlobalValue::ExternalLinkage, 0, - nm.str(), target_mod); + nm.str(), target_mod); } else if (func->isDeclaration()) { func->setLinkage(llvm::GlobalValue::WeakAnyLinkage); } @@ -1694,7 +1712,7 @@ SleighLifter::LiftIntoInternalBlockWithSleighState( //TODO(Ian): make a safe to use sleighinstruction context that wraps a context with an arch to preform reset reinits - auto cfg = sleigh::CreateCFG(pcode_record.ops); + auto cfg = sleigh::CreateCFG(pcode_record.ops, this->arch); SleighLifter::PcodeToLLVMEmitIntoBlock::DecodingContextConstants @@ -1773,6 +1791,26 @@ LiftStatus SleighLifter::LiftIntoBlockWithSleighState( llvm::ConstantInt::get(this->GetWordType(), inst.bytes.size())), next_pc_ref); + /////////////////////////////////////////////////////////////////////////////////////////// + // Handle COUNT Reg approximation + // May be prefered here over patches to sleigh definitions for now + // TODO(M4xw): Implement exact cycle count per opcode according to the optimization manual + if (inst.arch->IsMIPS()) { + const auto [count_ref, count_ref_type] = + LoadRegAddress(block, state_ptr, "COUNT"); + + const auto count = + intoblock_builer.CreateLoad(this->GetWordType(), count_ref); + + intoblock_builer.CreateStore( + intoblock_builer.CreateAdd( + count, llvm::ConstantInt::get( + this->GetWordType(), + 4)), // Historically approximated Count per Opcode + count_ref); + } + ////////////////////////////////////////////////////////////////////////////////////////// + // TODO(Ian): THIS IS AN UNSOUND ASSUMPTION THAT RETURNS ALWAYS RETURN TO THE FALLTHROUGH, this is just to make things work intoblock_builer.CreateStore( intoblock_builer.CreateLoad(this->GetWordType(), next_pc_ref), From ebaed422c1c20409b968ade22bd78a289bf3a918 Mon Sep 17 00:00:00 2001 From: M4xw Date: Wed, 21 Feb 2024 12:38:05 +0100 Subject: [PATCH 10/15] Use Arch::isMIPS instead of arch_name check --- lib/BC/PcodeCFG.cpp | 2 +- lib/BC/SleighLifter.cpp | 3 +-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/lib/BC/PcodeCFG.cpp b/lib/BC/PcodeCFG.cpp index 486498921..c96cb67d8 100644 --- a/lib/BC/PcodeCFG.cpp +++ b/lib/BC/PcodeCFG.cpp @@ -122,7 +122,7 @@ PcodeCFGBuilder::GetControlFlowExitsForIndex(size_t index) const { // BUG(M4xw): Branch likelies seem to trigger a bug that causes wrong BranchTaken conditions // This is always a CBRANCH preceded by a BOOL_NEGATE, so this is a workaround for now // The logic operation is technically the same, the BOOL_NEGATE flip happens in SleighLifter - if (this->arch.arch_name == remill::ArchName::kArchMIPS) { + if (this->arch.IsMIPS()) { if (linear_ops[index - 1].op == CPUI_BOOL_NEGATE) { DLOG(INFO) << "Flipping CBRANCH Targets"; std::swap(taken_exit, fallthrough_exit); diff --git a/lib/BC/SleighLifter.cpp b/lib/BC/SleighLifter.cpp index 7a0ff92e1..97231a7b5 100644 --- a/lib/BC/SleighLifter.cpp +++ b/lib/BC/SleighLifter.cpp @@ -1455,8 +1455,7 @@ class SleighLifter::PcodeToLLVMEmitIntoBlock { // BUG(M4xw): Branch likelies seem to trigger a bug that causes wrong BranchTaken conditions // This is always a CBRANCH preceded by a BOOL_NEGATE, so this is a workaround for now // The logic operation is technically the same, the CBranch Target flip occurs in PcodeCFG - if (this->insn_lifter_parent.arch.arch_name == - remill::ArchName::kArchMIPS) { + if (this->insn_lifter_parent.arch.IsMIPS()) { if (pc.op == OpCode::CPUI_BOOL_NEGATE && blk.ops[index + 1].op == OpCode::CPUI_CBRANCH) { this->flip_bool_negate = true; From 91e0d38c264edfcf5ef394d6028d9f8b0a043a26 Mon Sep 17 00:00:00 2001 From: M4xw Date: Wed, 21 Feb 2024 16:13:41 +0100 Subject: [PATCH 11/15] [MIPS] Implement __remill_set_coprocessor_reg Intrinsic --- include/remill/Arch/Runtime/Intrinsics.h | 4 +++ include/remill/BC/IntrinsicTable.h | 1 + lib/Arch/Runtime/Intrinsics.cpp | 1 + lib/BC/IntrinsicTable.cpp | 1 + lib/BC/SleighLifter.cpp | 36 ++++++++++++++++++++++-- 5 files changed, 41 insertions(+), 2 deletions(-) diff --git a/include/remill/Arch/Runtime/Intrinsics.h b/include/remill/Arch/Runtime/Intrinsics.h index 92fed0cc9..ddfa987f7 100644 --- a/include/remill/Arch/Runtime/Intrinsics.h +++ b/include/remill/Arch/Runtime/Intrinsics.h @@ -137,6 +137,10 @@ __remill_flag_computation_carry(bool result, ...); [[gnu::used]] extern Memory *__remill_async_hyper_call(State &, addr_t ret_addr, Memory *); +[[gnu::used]] extern void +__remill_set_coprocessor_reg(State &, uint8_t cop_num, uint64_t reg_num, + uint64_t value, uint8_t sel); + // This intrinsic must be tagged with the `always_inline` function attribute // since it has an implementation we want to use in Anvill's lifted IR. // diff --git a/include/remill/BC/IntrinsicTable.h b/include/remill/BC/IntrinsicTable.h index 227b60f6f..1998deebf 100644 --- a/include/remill/BC/IntrinsicTable.h +++ b/include/remill/BC/IntrinsicTable.h @@ -44,6 +44,7 @@ class IntrinsicTable { // OS interaction. llvm::Function *const sync_hyper_call; llvm::Function *const async_hyper_call; + llvm::Function *const set_coprocessor_reg; // Memory read intrinsics. llvm::Function *const read_memory_8; diff --git a/lib/Arch/Runtime/Intrinsics.cpp b/lib/Arch/Runtime/Intrinsics.cpp index b83e973ca..368daf9e9 100644 --- a/lib/Arch/Runtime/Intrinsics.cpp +++ b/lib/Arch/Runtime/Intrinsics.cpp @@ -120,6 +120,7 @@ extern "C" [[gnu::used]] void __remill_intrinsics(void) { USED(__remill_async_hyper_call); USED(__remill_sync_hyper_call); + USED(__remill_set_coprocessor_reg); USED(__remill_undefined_8); USED(__remill_undefined_16); diff --git a/lib/BC/IntrinsicTable.cpp b/lib/BC/IntrinsicTable.cpp index 215ed7a02..0b383003e 100644 --- a/lib/BC/IntrinsicTable.cpp +++ b/lib/BC/IntrinsicTable.cpp @@ -87,6 +87,7 @@ IntrinsicTable::IntrinsicTable(llvm::Module *module) // OS interaction. sync_hyper_call(FindIntrinsic(module, "__remill_sync_hyper_call")), async_hyper_call(FindIntrinsic(module, "__remill_async_hyper_call")), + set_coprocessor_reg(FindIntrinsic(module, "__remill_set_coprocessor_reg")), // Memory access. read_memory_8(SetMemoryReadNone(FindPureIntrinsic(module, "__remill_read_memory_8"))), diff --git a/lib/BC/SleighLifter.cpp b/lib/BC/SleighLifter.cpp index 97231a7b5..cf40b48c6 100644 --- a/lib/BC/SleighLifter.cpp +++ b/lib/BC/SleighLifter.cpp @@ -84,6 +84,7 @@ static size_t kNextPcArgNum = 3; static const std::string kEqualityClaimName = "claim_eq"; static const std::string kSysCallName = "syscall"; +static const std::string kSetCopRegName = "setCopReg"; static bool isVarnodeInConstantSpace(VarnodeData vnode) { auto spc = vnode.getAddr().getSpace(); @@ -526,7 +527,7 @@ class SleighLifter::PcodeToLLVMEmitIntoBlock { target_vnode.offset, target_vnode.size, entry_bldr); print_vardata(this->insn_lifter_parent.GetEngine(), ss, target_vnode); DLOG(ERROR) << "Creating unique for unknown register: " << ss.str() << " " - << reg_ptr->getName().str(); + << reg_ptr->getName().str(); return RegisterValue::CreateRegister(reg_ptr); } @@ -550,7 +551,7 @@ class SleighLifter::PcodeToLLVMEmitIntoBlock { vnode.space, vnode.offset, vnode.size); DLOG(INFO) << "Looking for reg name " << reg_name << " from offset " - << vnode.offset; + << vnode.offset; return this->LiftNormalRegisterOrCreateUnique(bldr, reg_name, vnode); } else if (space_name == "const") { @@ -1357,6 +1358,37 @@ class SleighLifter::PcodeToLLVMEmitIntoBlock { args); return kLiftedInstruction; + } else if (other_func_name == kSetCopRegName && + insn.arch_name == ArchName::kArchMIPS) { + DLOG(INFO) << "Invoking setCopReg"; + + if (isize == 5) { + VarnodeData &cop_num = vars[1]; + VarnodeData ®_num = vars[2]; + VarnodeData &value = vars[3]; + VarnodeData &sel = vars[4]; + + auto inval_cop_num = this->LiftIntegerInParam(bldr, cop_num); + auto inval_reg_num = ConstantValue::CreatConstant( + this->replacement_cont.LiftOffsetOrReplace( + bldr, reg_num, + llvm::IntegerType::get(this->context, reg_num.size * 8))); + auto inval_value = LiftIntegerInParam(bldr, value); + auto inval_sel = this->LiftIntegerInParam(bldr, sel); + + std::array args = { + state_pointer, inval_cop_num.value(), + inval_reg_num.get() + ->LiftAsInParam(bldr, llvm::IntegerType::get( + this->context, reg_num.size * 8)) + .value(), + inval_value.value(), inval_sel.value()}; + + bldr.CreateCall( + insn_lifter_parent.GetIntrinsicTable()->set_coprocessor_reg, + args); + } + return kLiftedInstruction; } DLOG(ERROR) << "Unsupported pcode intrinsic: " << *other_func_name; } From bbb3fa58017f4aae628619b4153d2b7ed612d94d Mon Sep 17 00:00:00 2001 From: M4xw Date: Fri, 23 Feb 2024 16:30:53 +0100 Subject: [PATCH 12/15] [MIPS] Expose FCSR --- include/remill/Arch/MIPS/Runtime/State.h | 9 +++++++++ lib/Arch/Sleigh/MIPSArch.cpp | 4 ++++ 2 files changed, 13 insertions(+) diff --git a/include/remill/Arch/MIPS/Runtime/State.h b/include/remill/Arch/MIPS/Runtime/State.h index b990c2880..567d47e4c 100644 --- a/include/remill/Arch/MIPS/Runtime/State.h +++ b/include/remill/Arch/MIPS/Runtime/State.h @@ -260,6 +260,11 @@ struct alignas(8) COP0Registers final { Reg DESAVE; } __attribute__((packed)); +struct alignas(8) COP1Registers final { + volatile uint64_t _0; + Reg FCSR; +} __attribute__((packed)); + struct alignas(8) MIPSState : public ArchState { GPR gpr; // 528 bytes. @@ -276,6 +281,10 @@ struct alignas(8) MIPSState : public ArchState { COP0Registers cop0; uint64_t _3; + + COP1Registers cop1; + + uint64_t _4; } __attribute__((packed)); struct State : public MIPSState {}; diff --git a/lib/Arch/Sleigh/MIPSArch.cpp b/lib/Arch/Sleigh/MIPSArch.cpp index f645b4a68..06c2ed971 100644 --- a/lib/Arch/Sleigh/MIPSArch.cpp +++ b/lib/Arch/Sleigh/MIPSArch.cpp @@ -309,6 +309,10 @@ class SleighMIPSArch : public ArchBase { REG(TAGHI, cop0.TagHi.qword, u64); REG(ERRORPC, cop0.ErrorEPC.qword, u64); REG(DESAVE, cop0.DESAVE.qword, u64); + + // COP1 + // TODO: Maybe move fpr here? + REG(FCSR, cop1.FCSR.dword, u32); } void From 5af2876cb2c8ea5fdaadbd80ecdb5e07af5469a0 Mon Sep 17 00:00:00 2001 From: M4xw Date: Tue, 26 Mar 2024 15:51:13 +0100 Subject: [PATCH 13/15] Attempt at solving the negated CBranch Issue Unsure if this catches every case --- lib/BC/PcodeCFG.cpp | 10 ---------- lib/BC/SleighLifter.cpp | 30 +++++++++++------------------- 2 files changed, 11 insertions(+), 29 deletions(-) diff --git a/lib/BC/PcodeCFG.cpp b/lib/BC/PcodeCFG.cpp index c96cb67d8..64c38a27b 100644 --- a/lib/BC/PcodeCFG.cpp +++ b/lib/BC/PcodeCFG.cpp @@ -119,16 +119,6 @@ PcodeCFGBuilder::GetControlFlowExitsForIndex(size_t index) const { auto taken_exit = build_direct_target_exit(curr_op.vars[0], index); - // BUG(M4xw): Branch likelies seem to trigger a bug that causes wrong BranchTaken conditions - // This is always a CBRANCH preceded by a BOOL_NEGATE, so this is a workaround for now - // The logic operation is technically the same, the BOOL_NEGATE flip happens in SleighLifter - if (this->arch.IsMIPS()) { - if (linear_ops[index - 1].op == CPUI_BOOL_NEGATE) { - DLOG(INFO) << "Flipping CBRANCH Targets"; - std::swap(taken_exit, fallthrough_exit); - } - } - return ConditionalExit{taken_exit, fallthrough_exit}; } case CPUI_CALLIND: diff --git a/lib/BC/SleighLifter.cpp b/lib/BC/SleighLifter.cpp index cf40b48c6..fad0968e5 100644 --- a/lib/BC/SleighLifter.cpp +++ b/lib/BC/SleighLifter.cpp @@ -125,8 +125,6 @@ llvm::Value *CreatePcodeBitShift(llvm::Value *lhs, llvm::Value *rhs, class SleighLifter::PcodeToLLVMEmitIntoBlock { private: - bool flip_bool_negate; - class Parameter { public: virtual ~Parameter(void) = default; @@ -456,8 +454,7 @@ class SleighLifter::PcodeToLLVMEmitIntoBlock { std::vector user_op_names_, llvm::BasicBlock *exit_block_, const sleigh::MaybeBranchTakenVar &to_lift_btaken_, PcodeToLLVMEmitIntoBlock::DecodingContextConstants context_reg_lifter) - : flip_bool_negate(false), - target_block(target_block), + : target_block(target_block), state_pointer(state_pointer), context(target_block->getContext()), insn(insn), @@ -752,10 +749,8 @@ class SleighLifter::PcodeToLLVMEmitIntoBlock { return this->LiftStoreIntoOutParam( bldr, bldr.CreateZExt( - bldr.CreateICmpEQ( - *bneg_inval, - llvm::ConstantInt::get(byte_type, - this->flip_bool_negate ? 1 : 0)), + bldr.CreateICmpEQ(*bneg_inval, + llvm::ConstantInt::get(byte_type, 0)), byte_type), outvar); } @@ -1419,6 +1414,13 @@ class SleighLifter::PcodeToLLVMEmitIntoBlock { return LiftStatus::kLiftedLifterError; } + if (btaken_var.invert) { + // Branch taken evaluation is inverted + *maybe_should_branch = bldr.CreateICmpEQ( + *maybe_should_branch, + llvm::ConstantInt::get(llvm::IntegerType::get(this->context, 8), 0)); + } + auto should_branch = bldr.CreateZExtOrTrunc( *maybe_should_branch, llvm::IntegerType::get(this->context, 8)); auto branch_taken_ref = this->GetBranchTakenRef(); @@ -1484,17 +1486,6 @@ class SleighLifter::PcodeToLLVMEmitIntoBlock { // either exit (means real control flow), to block (fake control flow) size_t index = 0; for (auto pc : blk.ops) { - // BUG(M4xw): Branch likelies seem to trigger a bug that causes wrong BranchTaken conditions - // This is always a CBRANCH preceded by a BOOL_NEGATE, so this is a workaround for now - // The logic operation is technically the same, the CBranch Target flip occurs in PcodeCFG - if (this->insn_lifter_parent.arch.IsMIPS()) { - if (pc.op == OpCode::CPUI_BOOL_NEGATE && - blk.ops[index + 1].op == OpCode::CPUI_CBRANCH) { - this->flip_bool_negate = true; - } else { - this->flip_bool_negate = false; - } - } this->LiftBtakenIfReached(bldr, pc.op, index); this->LiftPcodeOp(bldr, pc.op, pc.outvar, pc.vars.data(), pc.vars.size()); index += 1; @@ -1840,6 +1831,7 @@ LiftStatus SleighLifter::LiftIntoBlockWithSleighState( 4)), // Historically approximated Count per Opcode count_ref); } + LOG(INFO) << inst.Serialize(); ////////////////////////////////////////////////////////////////////////////////////////// // TODO(Ian): THIS IS AN UNSOUND ASSUMPTION THAT RETURNS ALWAYS RETURN TO THE FALLTHROUGH, this is just to make things work From cb315437d8bf50ac8e47d047f2baef7ffa0b7db5 Mon Sep 17 00:00:00 2001 From: M4xw Date: Wed, 3 Apr 2024 12:55:08 +0200 Subject: [PATCH 14/15] [Lift] Support for passing a file via --bytes using @file.ext syntax --- bin/lift/Lift.cpp | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/bin/lift/Lift.cpp b/bin/lift/Lift.cpp index a25031e71..d8ffed574 100644 --- a/bin/lift/Lift.cpp +++ b/bin/lift/Lift.cpp @@ -241,6 +241,18 @@ int main(int argc, char *argv[]) { return EXIT_FAILURE; } + if (FLAGS_bytes[0] == '@') { + std::ifstream file(FLAGS_bytes.substr(1)); + if (!file.is_open()) { + std::cerr << "Could not open file " << FLAGS_bytes.substr(1) + << " specified to --bytes." << std::endl; + return EXIT_FAILURE; + } + std::stringstream buffer; + buffer << file.rdbuf(); + FLAGS_bytes = buffer.str(); + } + if (FLAGS_bytes.size() % 2) { std::cerr << "Please specify an even number of nibbles to --bytes." << std::endl; From f4123782a5b96d46e4285ddbc6f86c32262ef7aa Mon Sep 17 00:00:00 2001 From: M4xw Date: Mon, 8 Apr 2024 14:11:55 +0200 Subject: [PATCH 15/15] [MIPS] Fix State Reg --- include/remill/Arch/MIPS/Runtime/State.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/remill/Arch/MIPS/Runtime/State.h b/include/remill/Arch/MIPS/Runtime/State.h index 567d47e4c..e2270f5a1 100644 --- a/include/remill/Arch/MIPS/Runtime/State.h +++ b/include/remill/Arch/MIPS/Runtime/State.h @@ -27,8 +27,8 @@ struct Reg final { union { - alignas(4) uint32_t dword; alignas(8) uint64_t qword; + alignas(4) uint32_t dword; } __attribute__((packed)); } __attribute__((packed));