From b9aeed67b4b8130b6730b61d3e3e0603502e6d42 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?John=20K=C3=A4ll=C3=A9n?= Date: Tue, 15 Oct 2024 19:38:38 +0200 Subject: [PATCH] Feature: long constant fusion for PowerPC. --- src/Arch/PowerPC/LongConstantFuser.cs | 228 ++++++++++++++++++ src/Arch/PowerPC/MemoryOperand.cs | 33 ++- src/Arch/PowerPC/PowerPcArchitecture.cs | 5 +- src/Arch/PowerPC/PowerPcDisassembler.cs | 3 +- src/Arch/PowerPC/PowerPcRewriter.cs | 11 +- src/Arch/PowerPC/SliceOperand.cs | 76 ++++++ src/Arch/RiscV/LongConstantFuser.cs | 4 +- .../Arch/PowerPC/InstructionBuilder.cs | 8 +- .../PE/PPC/hello_ppc.reko/hello_ppc_text.asm | 16 +- subjects/regression.log | 6 +- 10 files changed, 357 insertions(+), 33 deletions(-) create mode 100644 src/Arch/PowerPC/LongConstantFuser.cs create mode 100644 src/Arch/PowerPC/SliceOperand.cs diff --git a/src/Arch/PowerPC/LongConstantFuser.cs b/src/Arch/PowerPC/LongConstantFuser.cs new file mode 100644 index 0000000000..b5608bb930 --- /dev/null +++ b/src/Arch/PowerPC/LongConstantFuser.cs @@ -0,0 +1,228 @@ +#region License +/* + * Copyright (C) 1999-2024 John Källén. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 or or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful or + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; see the file COPYING. If not or write to + * the Free Software Foundation or 675 Mass Ave or Cambridge or MA 02139 or USA. + */ +#endregion + +using Reko.Core; +using Reko.Core.Collections; +using Reko.Core.Expressions; +using Reko.Core.Machine; +using System; +using System.Collections; +using System.Collections.Generic; + +namespace Reko.Arch.PowerPC +{ + public class LongConstantFuser : IEnumerable + { + private PowerPcDisassembler dasm; + + public LongConstantFuser(PowerPcDisassembler dasm) + { + this.dasm = dasm; + } + + public IEnumerator GetEnumerator() + { + ImmediateOperand immLo; + var e = new LookaheadEnumerator(dasm); + while (e.MoveNext()) + { + var instrHi = e.Current; + switch (instrHi.Mnemonic) + { + case Mnemonic.addis: + case Mnemonic.oris: + if (((RegisterStorage) instrHi.Operands[1]).Number != 0 || + !e.TryPeek(1, out var instrLo)) + { + break; + } + if (instrLo.Mnemonic == Mnemonic.ori) + { + if (instrHi.Operands[0] == instrLo.Operands[0] && + instrLo.Operands[0] == instrLo.Operands[1]) + { + // Mutate the addis/oris and ori + var immHi = (ImmediateOperand) instrHi.Operands[2]; + immLo = (ImmediateOperand) instrLo.Operands[2]; + var longConst = new ImmediateOperand( + Constant.Create( + instrHi.Operands[0].Width, + (immHi.Value.ToInt32() << 16) | + immLo.Value.ToInt32())); + var hiOp = new SliceOperand(SliceType.Hi, immHi, longConst); + var loOp = new SliceOperand(SliceType.Lo, immLo, longConst); + instrHi.Operands[1] = hiOp; + instrLo.Operands[2] = loOp; + } + } + if (instrLo.Mnemonic == Mnemonic.addi) + { + if (instrHi.Operands[0] == instrLo.Operands[0] && + instrLo.Operands[0] == instrLo.Operands[1]) + { + // Mutate the addis/oris and addi + var immHi = (ImmediateOperand) instrHi.Operands[2]; + immLo = (ImmediateOperand) instrLo.Operands[2]; + var longConst = new ImmediateOperand( + Constant.Create( + instrHi.Operands[0].Width, + (immHi.Value.ToInt32() << 16) + + immLo.Value.ToInt32())); + var hiOp = new SliceOperand(SliceType.Hi, immHi, longConst); + var loOp = new SliceOperand(SliceType.Lo, immLo, longConst); + instrHi.Operands[1] = hiOp; + instrLo.Operands[2] = loOp; + } + } + else if (IsMemoryInstruction(instrLo.Mnemonic) && + instrLo.Operands[1] is MemoryOperand memOp) + { + if (instrHi.Operands[0] == memOp.BaseRegister && + memOp.Offset is ImmediateOperand imm) + { + var immHi = (ImmediateOperand) instrHi.Operands[2]; + immLo = imm; + // Mutate the addis/oris and the memory operand + var longConst = new ImmediateOperand( + Constant.Create( + instrHi.Operands[0].Width, + (immHi.Value.ToInt32() << 16) + + immLo.Value.ToInt32())); + var hiOp = new SliceOperand(SliceType.Hi, immHi, longConst); + var loOp = new SliceOperand(SliceType.Lo, immLo, longConst); + instrHi.Operands[2] = hiOp; + memOp.Offset = loOp; + } + } + break; + default: + break; + } + yield return e.Current; + } + } + + IEnumerator IEnumerable.GetEnumerator() => GetEnumerator(); + + private static bool IsMemoryInstruction(Mnemonic mnemonic) + { + return mnemonic switch + { + Mnemonic.lbz or + Mnemonic.lbzcix or + Mnemonic.lbzu or + Mnemonic.lbzux or + Mnemonic.lbzx or + Mnemonic.ld or + Mnemonic.ldat or + Mnemonic.ldarx or + Mnemonic.ldcix or + Mnemonic.ldmx or + Mnemonic.ldu or + Mnemonic.ldux or + Mnemonic.ldx or + Mnemonic.lfd or + Mnemonic.lfdp or + Mnemonic.lfdu or + Mnemonic.lfdux or + Mnemonic.lfdx or + Mnemonic.lfs or + Mnemonic.lfsu or + Mnemonic.lfsux or + Mnemonic.lfsx or + Mnemonic.lha or + Mnemonic.lhau or + Mnemonic.lhaux or + Mnemonic.lhax or + Mnemonic.lhbrx or + Mnemonic.lhz or + Mnemonic.lhzu or + Mnemonic.lhzux or + Mnemonic.lhzx or + Mnemonic.lq or + Mnemonic.lwa or + Mnemonic.lwarx or + Mnemonic.lwax or + Mnemonic.lwbrx or + Mnemonic.lwz or + Mnemonic.lwzu or + Mnemonic.lwzux or + Mnemonic.lwzx or + + Mnemonic.stb or + Mnemonic.stbcix or + Mnemonic.stbcx or + Mnemonic.stbu or + Mnemonic.stbux or + Mnemonic.stbx or + Mnemonic.std or + Mnemonic.stdat or + Mnemonic.stdbrx or + Mnemonic.stdcx or + Mnemonic.stdcix or + Mnemonic.stdu or + Mnemonic.stdx or + Mnemonic.stfd or + Mnemonic.stfdp or + Mnemonic.stfdu or + Mnemonic.stfdux or + Mnemonic.stfdx or + Mnemonic.stfiwx or + Mnemonic.stfs or + Mnemonic.stfsu or + Mnemonic.stfsux or + Mnemonic.stfsx or + Mnemonic.sth or + Mnemonic.sthbrx or + Mnemonic.sthcix or + Mnemonic.sthcx or + Mnemonic.sthu or + Mnemonic.sthx or + Mnemonic.stmw or + Mnemonic.stop or + Mnemonic.stq or + Mnemonic.stqcx or + Mnemonic.stqdx or + Mnemonic.stswi or + Mnemonic.stswx or + Mnemonic.stvebx or + Mnemonic.stvehx or + Mnemonic.stvewx or + Mnemonic.stvx or + Mnemonic.stvxl or + Mnemonic.stw or + Mnemonic.stwat or + Mnemonic.stwbrx or + Mnemonic.stwcix or + Mnemonic.stwcx or + Mnemonic.stwu or + Mnemonic.stwux or + Mnemonic.stwx or + Mnemonic.stxsd or + Mnemonic.stxsdx or + Mnemonic.stxsihx or + Mnemonic.stxsiwx or + Mnemonic.stxsix or + Mnemonic.stxssp => true, + _ => false + }; + } + } +} diff --git a/src/Arch/PowerPC/MemoryOperand.cs b/src/Arch/PowerPC/MemoryOperand.cs index 8afb545178..fc926ee662 100644 --- a/src/Arch/PowerPC/MemoryOperand.cs +++ b/src/Arch/PowerPC/MemoryOperand.cs @@ -19,30 +19,47 @@ #endregion using Reko.Core; -using Reko.Core.Expressions; using Reko.Core.Machine; using Reko.Core.Types; -using System; -using System.Collections.Generic; -using System.Linq; -using System.Text; namespace Reko.Arch.PowerPC { public class MemoryOperand : AbstractMachineOperand { - public MemoryOperand(PrimitiveType size, RegisterStorage reg, int offset) : base(size) + public MemoryOperand(PrimitiveType size, RegisterStorage reg, MachineOperand offset) : base(size) { this.BaseRegister = reg; this.Offset = offset; } public RegisterStorage BaseRegister { get; } - public int Offset { get; } + public MachineOperand Offset { get; set; } protected override void DoRender(MachineInstructionRenderer renderer, MachineInstructionRendererOptions options) { - renderer.WriteString($"{Offset}({BaseRegister})"); + if (Offset is SliceOperand slice) + { + renderer.WriteString($"{slice}({BaseRegister})"); + } + else + { + var offset = (ImmediateOperand) this.Offset; + renderer.WriteString($"{offset.Value.ToInt32()}({BaseRegister.Name})"); + } + } + + public int IntOffset() + { + ImmediateOperand offset; + if (this.Offset is SliceOperand slice) + { + offset = slice.Value; + } + else + { + offset = (ImmediateOperand) this.Offset; + } + return offset.Value.ToInt32(); } } } diff --git a/src/Arch/PowerPC/PowerPcArchitecture.cs b/src/Arch/PowerPC/PowerPcArchitecture.cs index 31fd3a69f9..c3cf842c34 100644 --- a/src/Arch/PowerPC/PowerPcArchitecture.cs +++ b/src/Arch/PowerPC/PowerPcArchitecture.cs @@ -171,7 +171,8 @@ public PowerPcDisassembler CreateDisassemblerImpl(EndianImageReader rdr) public override IEnumerable CreateDisassembler(EndianImageReader rdr) { - return new PowerPcDisassembler(this, EnsureDecoders(), rdr, WordWidth); + var dasm = new PowerPcDisassembler(this, EnsureDecoders(), rdr, WordWidth); + return new LongConstantFuser(dasm); } public override IProcessorEmulator CreateEmulator(SegmentMap segmentMap, IPlatformEmulator envEmulator) @@ -230,7 +231,7 @@ public override abstract IEnumerable
CreatePointerScanner( return null; if (mem.BaseRegister != reg) return null; - uAddr = (uint)((int)uAddr + mem.Offset); + uAddr = (uint)((int)uAddr + mem.IntOffset()); reg = (RegisterStorage)e.Current.Operands[0]; if (!e.MoveNext() || e.Current.Mnemonic != Mnemonic.mtctr) diff --git a/src/Arch/PowerPC/PowerPcDisassembler.cs b/src/Arch/PowerPC/PowerPcDisassembler.cs index fbeaab0017..3433a8dd13 100644 --- a/src/Arch/PowerPC/PowerPcDisassembler.cs +++ b/src/Arch/PowerPC/PowerPcDisassembler.cs @@ -656,7 +656,8 @@ internal static Bitfield[] BeFields(params (int bitPos, int bitLength)[] fieldDe private MachineOperand MemOff(uint reg, uint wInstr) { - return new MemoryOperand(PrimitiveType.Word32, arch.Registers[(int)reg & 0x1F], (short) wInstr); + return new MemoryOperand(PrimitiveType.Word32, arch.Registers[(int)reg & 0x1F], + ImmediateOperand.Int32((short) wInstr)); } private RegisterStorage CRegFromBits(uint r) diff --git a/src/Arch/PowerPC/PowerPcRewriter.cs b/src/Arch/PowerPC/PowerPcRewriter.cs index e1a5ed7997..eff10f5271 100644 --- a/src/Arch/PowerPC/PowerPcRewriter.cs +++ b/src/Arch/PowerPC/PowerPcRewriter.cs @@ -731,21 +731,22 @@ private Expression EffectiveAddress(MachineOperand operand, RtlEmitter emitter) { var mop = (MemoryOperand) operand; var reg = binder.EnsureRegister(mop.BaseRegister); - var offset = mop.Offset; + var offset = mop.IntOffset(); return emitter.IAddS(reg, offset); } private Expression EffectiveAddress_r0(int iOp, int extraOffset = 0) { var mop = (MemoryOperand) instr.Operands[iOp]; + var offset = mop.IntOffset(); if (mop.BaseRegister.Number == 0) { - return Constant.Word32(mop.Offset + extraOffset); + return Constant.Word32(offset + extraOffset); } else { var reg = binder.EnsureRegister(mop.BaseRegister); - var offset = mop.Offset + extraOffset; + offset = offset + extraOffset; if (offset != 0) return m.IAddS(reg, offset); else @@ -756,14 +757,14 @@ private Expression EffectiveAddress_r0(int iOp, int extraOffset = 0) private Expression EffectiveAddress_r0(MachineOperand operand) { var mop = (MemoryOperand) operand; + var offset = mop.IntOffset(); if (mop.BaseRegister.Number == 0) { - return Constant.Word32(mop.Offset); + return Constant.Word32(offset); } else { var reg = binder.EnsureRegister(mop.BaseRegister); - var offset = mop.Offset; if (offset != 0) return m.IAddS(reg, offset); else diff --git a/src/Arch/PowerPC/SliceOperand.cs b/src/Arch/PowerPC/SliceOperand.cs new file mode 100644 index 0000000000..5962a5c9b2 --- /dev/null +++ b/src/Arch/PowerPC/SliceOperand.cs @@ -0,0 +1,76 @@ +#region License +/* + * Copyright (C) 1999-2024 John Källén. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; see the file COPYING. If not, write to + * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. + */ +#endregion + +using Reko.Core.Machine; +using System.Collections.Generic; + +namespace Reko.Arch.PowerPC; + +/// +/// Used to represent sliced immediate values in +/// auipc, lui instructions. +/// +public class SliceOperand : AbstractMachineOperand +{ + public SliceOperand( + SliceType slice, + ImmediateOperand value, + MachineOperand inferredValue) + : base(value.Width) + { + this.Slice = slice; + this.Value = value; + this.InferredValue = inferredValue; + } + + public SliceType Slice { get; } + public ImmediateOperand Value { get; } + public MachineOperand InferredValue { get; } + + protected override void DoRender(MachineInstructionRenderer renderer, MachineInstructionRendererOptions options) + { + renderer.WriteString(Slice.Format()); + renderer.WriteChar('('); + InferredValue.Render(renderer, options); + renderer.WriteChar(')'); + } +} + +public enum SliceType +{ + None, + + Hi, + Lo, +} + +public static class SliceTypeExtensions +{ + private static readonly Dictionary strFormats = new() + { + { SliceType.Hi, "%hi" }, + { SliceType.Lo, "%lo" }, + }; + + public static string Format(this SliceType type) + { + return strFormats[type]; + } +} diff --git a/src/Arch/RiscV/LongConstantFuser.cs b/src/Arch/RiscV/LongConstantFuser.cs index d6fecb5bf0..70d3fa79ac 100644 --- a/src/Arch/RiscV/LongConstantFuser.cs +++ b/src/Arch/RiscV/LongConstantFuser.cs @@ -70,7 +70,7 @@ public IEnumerator GetEnumerator() else if (IsMemoryInstruction(instrLo.Mnemonic)) { var memOp = (MemoryOperand) instrLo.Operands[1]; - if (instrLo.Operands[0] == memOp.Base && + if (instrHi.Operands[0] == memOp.Base && memOp.Offset is ImmediateOperand imm) { var immHi = (ImmediateOperand) instrHi.Operands[1]; @@ -113,7 +113,7 @@ public IEnumerator GetEnumerator() else if (IsMemoryInstruction(instrLo.Mnemonic)) { var memOp = (MemoryOperand) instrLo.Operands[1]; - if (instrLo.Operands[0] == memOp.Base && + if (instrHi.Operands[0] == memOp.Base && memOp.Offset is ImmediateOperand imm) { immLo = imm; diff --git a/src/UnitTests/Arch/PowerPC/InstructionBuilder.cs b/src/UnitTests/Arch/PowerPC/InstructionBuilder.cs index 5f94c20d9a..93ed939db1 100644 --- a/src/UnitTests/Arch/PowerPC/InstructionBuilder.cs +++ b/src/UnitTests/Arch/PowerPC/InstructionBuilder.cs @@ -139,7 +139,7 @@ public void Add_(RegisterStorage rT, RegisterStorage rA, RegisterStorage rB) internal void Lbzu(RegisterStorage rD, short offset, RegisterStorage rA) { - Add(new PowerPcInstruction(Mnemonic.lbzu, rD, new MemoryOperand(rD.DataType, rA, offset), null, false)); + Add(new PowerPcInstruction(Mnemonic.lbzu, rD, new MemoryOperand(rD.DataType, rA, ImmediateOperand.Int32(offset)), null, false)); } public void Lis(RegisterStorage r, ushort uimm) @@ -149,12 +149,12 @@ public void Lis(RegisterStorage r, ushort uimm) public void Lwzu(RegisterStorage rD, short offset, RegisterStorage rA) { - Add(new PowerPcInstruction(Mnemonic.lwzu, rD, new MemoryOperand(rD.DataType, rA, offset), null, false)); + Add(new PowerPcInstruction(Mnemonic.lwzu, rD, new MemoryOperand(rD.DataType, rA, ImmediateOperand.Int32(offset)), null, false)); } public void Lwz(RegisterStorage rD, short offset, RegisterStorage rA) { - Add(new PowerPcInstruction(Mnemonic.lwz, rD, new MemoryOperand(rD.DataType, rA, offset), null, false)); + Add(new PowerPcInstruction(Mnemonic.lwz, rD, new MemoryOperand(rD.DataType, rA, ImmediateOperand.Int32(offset)), null, false)); } public void Mtctr(RegisterStorage r) @@ -174,7 +174,7 @@ public void Stbu(RegisterStorage rS, short offset, RegisterStorage rA) private MemoryOperand Mem(RegisterStorage baseReg, short offset) { - return new MemoryOperand(baseReg.DataType, baseReg, offset); + return new MemoryOperand(baseReg.DataType, baseReg, ImmediateOperand.Int32(offset)); } } } diff --git a/subjects/PE/PPC/hello_ppc.reko/hello_ppc_text.asm b/subjects/PE/PPC/hello_ppc.reko/hello_ppc_text.asm index e9919afeb5..c294c0d714 100644 --- a/subjects/PE/PPC/hello_ppc.reko/hello_ppc_text.asm +++ b/subjects/PE/PPC/hello_ppc.reko/hello_ppc_text.asm @@ -2386,8 +2386,8 @@ fn00402404 proc l00402458: addi r4,r1,+0038 - addis r3,r0,+7379 - ori r3,r3,7376 + addis r3,%hi(73797376),+7379 + ori r3,r3,%lo(73797376) bl fn00403F9C lwz r2,20(r1) cmpwi cr1,r3,+0000 @@ -2866,8 +2866,8 @@ l00402A7C: l00402AB0: addi r31,r1,+0038 ori r4,r31,0000 - addis r3,r0,+6164 - ori r3,r3,6472 + addis r3,%hi(61646472),+6164 + ori r3,r3,%lo(61646472) bl fn00403F9C lwz r2,20(r1) cmpwi cr1,r3,+0000 @@ -3554,8 +3554,8 @@ fn004032FC proc addi r5,r5,+0004 addi r6,r0,+0042 stw r6,12(r31) - addis r7,r0,+7FFF - ori r7,r7,FFFF + addis r7,%hi(FFFFFFFF),+7FFF + ori r7,r7,%lo(FFFFFFFF) stw r7,4(r31) bl fn004004B4 nop @@ -3651,8 +3651,8 @@ l0040347C: beq cr1,$004034A4 l00403484: - addis r4,r0,+7FFF - ori r4,r4,FFFF + addis r4,%hi(FFFFFFFF),+7FFF + ori r4,r4,%lo(FFFFFFFF) cmplw cr1,r28,r4 bgt cr1,$004034A4 diff --git a/subjects/regression.log b/subjects/regression.log index e72f4a9d5f..44e99fbe08 100644 --- a/subjects/regression.log +++ b/subjects/regression.log @@ -2464,7 +2464,7 @@ fn00008EC2: error: An error occurred while rewriting procedure to high-level lan The given key 'l00009735' was not present in the dictionary. at System.Collections.Generic.Dictionary`2.get_Item(TKey key) at Reko.Core.Graphs.DominatorGraph`1.BuildDominanceFrontiers(DirectedGraph`1 graph, Dictionary`2 idoms) in c:\dev\uxmal\reko\master\src\Core\Graphs\DominatorGraph.cs:line 193 - at Reko.Core.Graphs.DominatorGraph`1..ctor(DirectedGraph`1 graph, T entryNode) in c:\dev\uxmal\reko\master\src\Core\Graphs\DominatorGraph.cs:line 47 + at Reko.Core.Graphs.DominatorGraph`1..ctor(DirectedGraph`1 graph, T entryNode) in c:\dev\uxmal\reko\master\src\Core\Graphs\DominatorGraph.cs:line 46 at Reko.Structure.StructureAnalysis.Execute() in c:\dev\uxmal\reko\master\src\Decompiler\Structure\StructureAnalysis.cs:line 147 at Reko.Structure.StructureAnalysis.Structure() in c:\dev\uxmal\reko\master\src\Decompiler\Structure\StructureAnalysis.cs:line 80 at Reko.Decompiler.StructureProgram() in c:\dev\uxmal\reko\master\src\Decompiler\Decompiler.cs:line 593 @@ -2472,7 +2472,7 @@ fn00009746: error: An error occurred while rewriting procedure to high-level lan The given key 'l0000985B' was not present in the dictionary. at System.Collections.Generic.Dictionary`2.get_Item(TKey key) at Reko.Core.Graphs.DominatorGraph`1.BuildDominanceFrontiers(DirectedGraph`1 graph, Dictionary`2 idoms) in c:\dev\uxmal\reko\master\src\Core\Graphs\DominatorGraph.cs:line 193 - at Reko.Core.Graphs.DominatorGraph`1..ctor(DirectedGraph`1 graph, T entryNode) in c:\dev\uxmal\reko\master\src\Core\Graphs\DominatorGraph.cs:line 47 + at Reko.Core.Graphs.DominatorGraph`1..ctor(DirectedGraph`1 graph, T entryNode) in c:\dev\uxmal\reko\master\src\Core\Graphs\DominatorGraph.cs:line 46 at Reko.Structure.StructureAnalysis.Execute() in c:\dev\uxmal\reko\master\src\Decompiler\Structure\StructureAnalysis.cs:line 147 at Reko.Structure.StructureAnalysis.Structure() in c:\dev\uxmal\reko\master\src\Decompiler\Structure\StructureAnalysis.cs:line 80 at Reko.Decompiler.StructureProgram() in c:\dev\uxmal\reko\master\src\Decompiler\Decompiler.cs:line 593 @@ -2574,4 +2574,4 @@ PE Debug type 14 not supported yet. === PE\x86\VCExeSample\VCExeSample Signature of 'Microsoft Visual C++ 8' detected. -Decompiled 93 binaries in 97.96 seconds. +Decompiled 93 binaries in 52.35 seconds.