You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
I was trying to have a increase width CPU with 4 Load/store Units. However I am observing that Loads are not issued in parallel even though there are multiple functional units. why is this?
Below is my script
parser = argparse.ArgumentParser(description='A simple script to configure RISC-V core.')
parser.add_argument("--binary", default="", nargs="?", type=str,
help="Path to the binary to execute.")
reacted with thumbs up emoji reacted with thumbs down emoji reacted with laugh emoji reacted with hooray emoji reacted with confused emoji reacted with heart emoji reacted with rocket emoji reacted with eyes emoji
-
I was trying to have a increase width CPU with 4 Load/store Units. However I am observing that Loads are not issued in parallel even though there are multiple functional units. why is this?
Below is my script
import m5
from m5.objects import *
import argparse
class L1Cache(Cache):
assoc = 2
tag_latency = 1
data_latency = 1
response_latency = 1
mshrs = 4
tgts_per_mshr = 20
class L1ICache(L1Cache):
size = '32kB'
class L1DCache(L1Cache):
size = '1024kB'
class L2Cache(Cache):
size = '1024kB'
assoc = 8
tag_latency = 1
data_latency = 1
response_latency = 1
mshrs = 20
tgts_per_mshr = 12
class TestFUPool(MinorFUPool):
funcUnits = [
MinorDefaultIntFU(),
MinorDefaultIntFU(),
MinorDefaultIntMulFU(),
MinorDefaultIntDivFU(),
MinorDefaultFloatSimdFU(),
MinorDefaultPredFU(),
MinorDefaultMemFU(),
MinorDefaultMemFU(),
MinorDefaultMemFU(),
MinorDefaultMemFU(),
MinorDefaultMiscFU(),
]
parser = argparse.ArgumentParser(description='A simple script to configure RISC-V core.')
parser.add_argument("--binary", default="", nargs="?", type=str,
help="Path to the binary to execute.")
args = parser.parse_args()
system = System()
system.clk_domain = SrcClockDomain()
system.clk_domain.clock = "1000GHz"
system.clk_domain.voltage_domain = VoltageDomain()
system.mem_mode = "timing"
system.mem_ranges = [AddrRange("1024MB")]
system.cpu = RiscvMinorCPU()
system.cpu.isa = RiscvISA(vlen=1024)
system.cpu.icache = L1ICache()
system.cpu.dcache = L1DCache()
system.cpu.icache.connectCPU(system.cpu)
system.cpu.dcache.connectCPU(system.cpu)
system.l2bus = L2XBar()
system.cpu.icache.connectBus(system.l2bus)
system.cpu.dcache.connectBus(system.l2bus)
system.l2cache = L2Cache()
system.l2cache.connectCPUSideBus(system.l2bus)
system.membus = SystemXBar()
system.l2cache.connectMemSideBus(system.membus)
system.cpu.createInterruptController()
system.mem_ctrl = MemCtrl()
system.mem_ctrl.dram = DDR3_1600_8x8()
system.mem_ctrl.dram.range = system.mem_ranges[0]
system.mem_ctrl.port = system.membus.mem_side_ports
system.system_port = system.membus.cpu_side_ports
system.cache_line_size = 1024
system.cpu.executeFuncUnits = TestFUPool()
system.cpu.fetch1FetchLimit = 5
system.cpu.fetch2InputBufferSize = 5
system.cpu.decodeInputWidth = 5
system.cpu.decodeInputBufferSize = 5
system.cpu.executeInputWidth = 5
system.cpu.executeIssueLimit = 5
system.cpu.executeMemoryIssueLimit = 5
system.cpu.executeMemoryCommitLimit = 5
system.cpu.executeCommitLimit = 5
system.cpu.executeInputBufferSize = 7
##system.cpu.executeMemoryWidth = 256
system.cpu.executeLSQRequestsQueueSize =16
system.cpu.executeLSQStoreBufferSize =16
system.cpu.executeMaxAccessesInMemory = 16
system.cpu.executeLSQMaxStoreBufferStoresPerCycle = 16
system.workload = SEWorkload.init_compatible(args.binary)
process = Process()
process.cmd = [args.binary]
system.cpu.workload = process
system.cpu.createThreads()
root = Root(full_system=False, system=system)
m5.instantiate()
print(f"Beginning simulation!")
exit_event = m5.simulate()
print(f"Exiting @ tick {m5.curTick()} because {exit_event.getCause()}")`
below is the sample execution trace
11129780: system.cpu: T0 : 0x10390 @TestFunc+28 : vle16_v v24, (a3)
11129780: system.cpu: T0 : 0x10390 @TestFunc+28. 0 : vle16_v_micro v24, 0(a3), v24
11129781: system.cpu: T0 : 0x10390 @TestFunc+28. 1 : vle16_v_micro v25, 128(a3), v25
11129782: system.cpu: T0 : 0x10390 @TestFunc+28. 2 : vle16_v_micro v26, 256(a3), v26
11129783: system.cpu: T0 : 0x10390 @TestFunc+28. 3 : vle16_v_micro v27, 384(a3), v27
11129784: system.cpu: T0 : 0x10390 @TestFunc+28. 4 : vle16_v_micro v28, 512(a3), v28
11129785: system.cpu: T0 : 0x10390 @TestFunc+28. 5 : vle16_v_micro v29, 640(a3), v29
11129786: system.cpu: T0 : 0x10390 @TestFunc+28. 6 : vle16_v_micro v30, 768(a3), v30
11129787: system.cpu: T0 : 0x10390 @TestFunc+28. 7 : vle16_v_micro v31, 896(a3), v31
Thanks,
Sharath
Beta Was this translation helpful? Give feedback.
All reactions