Skip to content

Commit

Permalink
perf: use perfUtils in Utility (#179)
Browse files Browse the repository at this point in the history
  • Loading branch information
Tang-Haojin authored Jul 11, 2024
1 parent 6e15b5b commit f9dffb2
Show file tree
Hide file tree
Showing 12 changed files with 119 additions and 182 deletions.
8 changes: 4 additions & 4 deletions .github/workflows/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ jobs:
# This workflow contains a single job called "build"
tl-test_L2:
# The type of runner that the job will run on
runs-on: ubuntu-latest
runs-on: ubuntu-24.04

# Steps represent a sequence of tasks that will be executed as part of the job
steps:
Expand All @@ -35,7 +35,7 @@ jobs:
uses: coursier/cache-action@v5

- name: Verilator
run: sudo apt install verilator
run: sudo apt install verilator libsqlite3-dev

- name: Setup Mill
uses: jodersky/[email protected]
Expand All @@ -60,7 +60,7 @@ jobs:
tl-test_L2L3:
# The type of runner that the job will run on
runs-on: ubuntu-latest
runs-on: ubuntu-24.04

# Steps represent a sequence of tasks that will be executed as part of the job
steps:
Expand All @@ -77,7 +77,7 @@ jobs:
uses: coursier/cache-action@v5

- name: Verilator
run: sudo apt install verilator
run: sudo apt install verilator libsqlite3-dev

- name: Setup Mill
uses: jodersky/[email protected]
Expand Down
4 changes: 2 additions & 2 deletions src/main/scala/huancun/DataStorage.scala
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ package huancun
import org.chipsalliance.cde.config.Parameters
import chisel3._
import chisel3.util._
import huancun.utils.{SRAMWrapper, XSPerfAccumulate}
import huancun.utils.SRAMWrapper
import utility._

class DataStorage(implicit p: Parameters) extends HuanCunModule {
Expand Down Expand Up @@ -264,7 +264,7 @@ class DataStorage(implicit p: Parameters) extends HuanCunModule {
val debug_stack_used = PopCount(bank_en.grouped(stackSize).toList.map(seq => Cat(seq).orR))

for (i <- 1 to nrStacks) {
XSPerfAccumulate(cacheParams, s"DS_${i}_stacks_used", debug_stack_used === i.U)
XSPerfAccumulate(s"DS_${i}_stacks_used", debug_stack_used === i.U)
}

}
Expand Down
2 changes: 1 addition & 1 deletion src/main/scala/huancun/HuanCun.scala
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ import freechips.rocketchip.tilelink._
import freechips.rocketchip.tilelink.TLMessages._
import freechips.rocketchip.util.{BundleField, BundleFieldBase, UIntToOH1}
import huancun.prefetch._
import utils.{ResetGen, XSPerfAccumulate}
import utils.ResetGen
import utility.{Pipeline, FastArbiter}
import huancun.noninclusive.MSHR

Expand Down
22 changes: 11 additions & 11 deletions src/main/scala/huancun/MSHRAlloc.scala
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ import org.chipsalliance.cde.config.Parameters
import chisel3._
import chisel3.util._
import huancun.utils._
import utility.{ParallelOR, ParallelPriorityMux}
import utility._
import freechips.rocketchip.tilelink._

class MSHRSelector(implicit p: Parameters) extends HuanCunModule {
Expand Down Expand Up @@ -192,9 +192,9 @@ class MSHRAlloc(implicit p: Parameters) extends HuanCunModule {
}
val cntEnable =
!io.status(i).valid && cnt =/= 0.U && cntStart && cnt < 5000.U // Ignore huge cnt during L3 dir reset
XSPerfHistogram(cacheParams, "mshr_latency_" + Integer.toString(i, 10), cnt, cntEnable, 0, 300, 10, rStrict = true)
XSPerfHistogram(cacheParams, "mshr_latency_" + Integer.toString(i, 10), cnt, cntEnable, 300, 1000, 50, lStrict = true)
XSPerfMax(cacheParams, "mshr_latency", cnt, cntEnable)
XSPerfHistogram("mshr_latency_" + Integer.toString(i, 10), cnt, cntEnable, 0, 300, 10, right_strict = true)
XSPerfHistogram("mshr_latency_" + Integer.toString(i, 10), cnt, cntEnable, 300, 1000, 50, right_strict = true)
XSPerfMax("mshr_latency", cnt, cntEnable)
}
}

Expand All @@ -203,13 +203,13 @@ class MSHRAlloc(implicit p: Parameters) extends HuanCunModule {
(s.bits.set(block_granularity - 1, 0) === io.a_req.bits.set(block_granularity - 1, 0))
))

XSPerfAccumulate(cacheParams, "nrWorkingABCmshr", PopCount(io.status.init.init.map(_.valid)))
XSPerfAccumulate(cacheParams, "nrWorkingBmshr", io.status.take(mshrs+1).last.valid)
XSPerfAccumulate(cacheParams, "nrWorkingCmshr", io.status.last.valid)
XSPerfAccumulate(cacheParams, "conflictA", io.a_req.valid && conflict_a)
XSPerfAccumulate(cacheParams, "conflictByPrefetch", io.a_req.valid && Cat(pretch_block_vec).orR)
XSPerfAccumulate(cacheParams, "conflictB", io.b_req.valid && conflict_b)
XSPerfAccumulate(cacheParams, "conflictC", io.c_req.valid && conflict_c)
XSPerfAccumulate("nrWorkingABCmshr", PopCount(io.status.init.init.map(_.valid)))
XSPerfAccumulate("nrWorkingBmshr", io.status.take(mshrs+1).last.valid)
XSPerfAccumulate("nrWorkingCmshr", io.status.last.valid)
XSPerfAccumulate("conflictA", io.a_req.valid && conflict_a)
XSPerfAccumulate("conflictByPrefetch", io.a_req.valid && Cat(pretch_block_vec).orR)
XSPerfAccumulate("conflictB", io.b_req.valid && conflict_b)
XSPerfAccumulate("conflictC", io.c_req.valid && conflict_c)
//val perfinfo = IO(new Bundle(){
// val perfEvents = Output(new PerfEventsBundle(numPCntHcMSHR))
//})
Expand Down
17 changes: 8 additions & 9 deletions src/main/scala/huancun/RequestBuffer.scala
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,7 @@ package huancun
import org.chipsalliance.cde.config.Parameters
import chisel3._
import chisel3.util._
import huancun.utils.XSPerfAccumulate
import utility.FastArbiter
import utility.{FastArbiter, XSPerfAccumulate}

class RequestBuffer(flow: Boolean = true, entries: Int = 16)(implicit p: Parameters) extends HuanCunModule {

Expand Down Expand Up @@ -92,18 +91,18 @@ class RequestBuffer(flow: Boolean = true, entries: Int = 16)(implicit p: Paramet
}
}

XSPerfAccumulate(cacheParams, "req_buffer_merge", dup && !full)
XSPerfAccumulate("req_buffer_merge", dup && !full)
if(flow){
XSPerfAccumulate(cacheParams, "req_buffer_flow", no_ready_entry && io.in.fire)
XSPerfAccumulate("req_buffer_flow", no_ready_entry && io.in.fire)
}
XSPerfAccumulate(cacheParams, "req_buffer_alloc", alloc)
XSPerfAccumulate(cacheParams, "req_buffer_full", full)
XSPerfAccumulate("req_buffer_alloc", alloc)
XSPerfAccumulate("req_buffer_full", full)
for(i <- 0 until entries){
val update = PopCount(valids) === i.U
XSPerfAccumulate(cacheParams, s"req_buffer_util_$i", update)
XSPerfAccumulate(s"req_buffer_util_$i", update)
}
XSPerfAccumulate(cacheParams, "recv_prefetch", io.in.fire && io.in.bits.isPrefetch.getOrElse(false.B))
XSPerfAccumulate(cacheParams, "recv_normal", io.in.fire && !io.in.bits.isPrefetch.getOrElse(false.B))
XSPerfAccumulate("recv_prefetch", io.in.fire && io.in.bits.isPrefetch.getOrElse(false.B))
XSPerfAccumulate("recv_normal", io.in.fire && !io.in.bits.isPrefetch.getOrElse(false.B))
val perfinfo = IO(Output(Vec(numPCntHcReqb, (UInt(6.W)))))
val perfEvents = Seq(
("req_buffer_merge ", dup && !full ),
Expand Down
17 changes: 8 additions & 9 deletions src/main/scala/huancun/TopDownMonitor.scala
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,7 @@ import org.chipsalliance.cde.config.Parameters
import chisel3._
import chisel3.util._
import huancun.noninclusive.DirResult
import huancun.utils.{XSPerfAccumulate, XSPerfHistogram}
import utility.MemReqSource
import utility.{MemReqSource, XSPerfAccumulate, XSPerfHistogram}

class TopDownMonitor()(implicit p: Parameters) extends HuanCunModule {
val banks = 1 << bankBits
Expand Down Expand Up @@ -36,7 +35,7 @@ class TopDownMonitor()(implicit p: Parameters) extends HuanCunModule {
}

addrMatch := Cat(addrMatchVec.flatten).orR
XSPerfAccumulate(cacheParams, s"${cacheParams.name}MissMatch_${hartId}", addrMatch)
XSPerfAccumulate(s"${cacheParams.name}MissMatch_${hartId}", addrMatch)
}

/* ====== PART TWO ======
Expand All @@ -56,16 +55,16 @@ class TopDownMonitor()(implicit p: Parameters) extends HuanCunModule {
// val missVecAll = allMSHRMatchVec(s => s.fromA && s.is_miss)

val totalMSHRs = banks * mshrsAll
XSPerfHistogram(cacheParams, "parallel_misses_CPU" , PopCount(missVecCPU), true.B, 0, totalMSHRs, 1)
XSPerfHistogram(cacheParams, "parallel_misses_Pref", PopCount(missVecPref), true.B, 0, totalMSHRs, 1)
XSPerfHistogram(cacheParams, "parallel_misses_All" , PopCount(missVecCPU)+PopCount(missVecPref), true.B, 0, 32, 1)
XSPerfHistogram("parallel_misses_CPU" , PopCount(missVecCPU), true.B, 0, totalMSHRs, 1)
XSPerfHistogram("parallel_misses_Pref", PopCount(missVecPref), true.B, 0, totalMSHRs, 1)
XSPerfHistogram("parallel_misses_All" , PopCount(missVecCPU)+PopCount(missVecPref), true.B, 0, 32, 1)

/* ====== PART THREE ======
* Distinguish req sources and count num & miss
*/
// count releases
val releaseCnt = allMSHRMatchVec(s => s.will_free && s.fromC)
XSPerfAccumulate(cacheParams, s"${cacheParams.name}C_ReleaseCnt_Total", PopCount(releaseCnt))
XSPerfAccumulate(s"${cacheParams.name}C_ReleaseCnt_Total", PopCount(releaseCnt))

// we can follow the counting logic of Directory to count
// add reqSource in replacerInfo, set in MSHRAlloc, passes in Directory and get the result in DirResult
Expand All @@ -81,7 +80,7 @@ class TopDownMonitor()(implicit p: Parameters) extends HuanCunModule {
val sourceMatchVecMiss = dirResultMatchVec(r => r.replacerInfo.reqSource === i.U && !r.self.hit)

val sourceName = MemReqSource.apply(i).toString
XSPerfAccumulate(cacheParams, s"E2_${cacheParams.name}AReqSource_${sourceName}_Total", PopCount(sourceMatchVec))
XSPerfAccumulate(cacheParams, s"E2_${cacheParams.name}AReqSource_${sourceName}_Miss", PopCount(sourceMatchVecMiss))
XSPerfAccumulate(s"E2_${cacheParams.name}AReqSource_${sourceName}_Total", PopCount(sourceMatchVec))
XSPerfAccumulate(s"E2_${cacheParams.name}AReqSource_${sourceName}_Miss", PopCount(sourceMatchVecMiss))
}
}
24 changes: 12 additions & 12 deletions src/main/scala/huancun/noninclusive/Directory.scala
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ import huancun.MetaData._
import huancun._
import huancun.debug.{DirectoryLogger, TypeId}
import huancun.utils._
import utility.{ParallelMax, ParallelPriorityMux}
import utility.{GTimer, ParallelMax, ParallelPriorityMux, XSPerfAccumulate}

trait HasClientInfo { this: HasHuanCunParameters =>
// assume all clients have same params
Expand Down Expand Up @@ -316,18 +316,18 @@ class Directory(implicit p: Parameters)

assert(dirReadPorts == 1)
val req_r = RegEnable(req.bits, req.fire)
XSPerfAccumulate(cacheParams, "selfdir_A_req", req_r.replacerInfo.channel(0) && resp.valid)
XSPerfAccumulate(cacheParams, "selfdir_A_hit", RegNext(req_r.replacerInfo.channel(0) && resp.valid) && resp.bits.self.hit)
XSPerfAccumulate(cacheParams, "selfdir_B_req", req_r.replacerInfo.channel(1) && resp.valid)
XSPerfAccumulate(cacheParams, "selfdir_B_hit", RegNext(req_r.replacerInfo.channel(1) && resp.valid) && resp.bits.self.hit)
XSPerfAccumulate(cacheParams, "selfdir_C_req", req_r.replacerInfo.channel(2) && resp.valid)
XSPerfAccumulate(cacheParams, "selfdir_C_hit", RegNext(req_r.replacerInfo.channel(2) && resp.valid) && resp.bits.self.hit)
XSPerfAccumulate("selfdir_A_req", req_r.replacerInfo.channel(0) && resp.valid)
XSPerfAccumulate("selfdir_A_hit", RegNext(req_r.replacerInfo.channel(0) && resp.valid) && resp.bits.self.hit)
XSPerfAccumulate("selfdir_B_req", req_r.replacerInfo.channel(1) && resp.valid)
XSPerfAccumulate("selfdir_B_hit", RegNext(req_r.replacerInfo.channel(1) && resp.valid) && resp.bits.self.hit)
XSPerfAccumulate("selfdir_C_req", req_r.replacerInfo.channel(2) && resp.valid)
XSPerfAccumulate("selfdir_C_hit", RegNext(req_r.replacerInfo.channel(2) && resp.valid) && resp.bits.self.hit)

XSPerfAccumulate(cacheParams, "selfdir_dirty", RegNext(resp.valid) && resp.bits.self.dirty)
XSPerfAccumulate(cacheParams, "selfdir_TIP", RegNext(resp.valid) && resp.bits.self.state === TIP)
XSPerfAccumulate(cacheParams, "selfdir_BRANCH", RegNext(resp.valid) && resp.bits.self.state === BRANCH)
XSPerfAccumulate(cacheParams, "selfdir_TRUNK", RegNext(resp.valid) && resp.bits.self.state === TRUNK)
XSPerfAccumulate(cacheParams, "selfdir_INVALID", RegNext(resp.valid) && resp.bits.self.state === INVALID)
XSPerfAccumulate("selfdir_dirty", RegNext(resp.valid) && resp.bits.self.dirty)
XSPerfAccumulate("selfdir_TIP", RegNext(resp.valid) && resp.bits.self.state === TIP)
XSPerfAccumulate("selfdir_BRANCH", RegNext(resp.valid) && resp.bits.self.state === BRANCH)
XSPerfAccumulate("selfdir_TRUNK", RegNext(resp.valid) && resp.bits.self.state === TRUNK)
XSPerfAccumulate("selfdir_INVALID", RegNext(resp.valid) && resp.bits.self.state === INVALID)
//val perfinfo = IO(new Bundle(){
// val perfEvents = Output(new PerfEventsBundle(numPCntHcDir))
//})
Expand Down
5 changes: 2 additions & 3 deletions src/main/scala/huancun/noninclusive/ProbeHelper.scala
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,7 @@ import chisel3._
import chisel3.util._
import freechips.rocketchip.tilelink.{TLMessages, TLPermissions}
import huancun.{HuanCunModule, MSHRRequest, MetaData}
import huancun.utils.XSPerfAccumulate
import utility.MemReqSource
import utility.{MemReqSource, XSPerfAccumulate}

class ProbeHelper(entries: Int = 5, enqDelay: Int = 1)(implicit p: Parameters)
extends HuanCunModule with HasClientInfo
Expand Down Expand Up @@ -65,7 +64,7 @@ class ProbeHelper(entries: Int = 5, enqDelay: Int = 1)(implicit p: Parameters)

io.probe <> queue.io.deq

XSPerfAccumulate(cacheParams, "client_dir_conflict", queue.io.enq.fire)
XSPerfAccumulate("client_dir_conflict", queue.io.enq.fire)
//val perfinfo = IO(new Bundle(){
// val perfEvents = Output(new PerfEventsBundle(numPCntHcReqb))
//})
Expand Down
122 changes: 0 additions & 122 deletions src/main/scala/huancun/utils/XSPerfAccumulate.scala

This file was deleted.

Loading

0 comments on commit f9dffb2

Please sign in to comment.