diff --git a/.github/workflows/run-profiler-regression.yaml b/.github/workflows/run-profiler-regression.yaml index fbedb2b0d1f..b383616bfea 100644 --- a/.github/workflows/run-profiler-regression.yaml +++ b/.github/workflows/run-profiler-regression.yaml @@ -13,11 +13,12 @@ jobs: fail-fast: false matrix: runner-info: [ - # No GS as tests now require synced starts. GS profiler tests will run on dedicated BMs + # E150 + {arch: grayskull, runs-on: ["grayskull"], name: E150}, # N150 - {arch: wormhole_b0, runs-on: ["wormhole_b0", "multi-chip-num-pcie-1", "multi-chip-num-chips-1"]}, + {arch: wormhole_b0, runs-on: ["wormhole_b0", "multi-chip-num-pcie-1", "multi-chip-num-chips-1"], name: N150}, # N300 - {arch: wormhole_b0, runs-on: ["wormhole_b0", "multi-chip-num-pcie-1", "multi-chip-num-chips-2"]}, + {arch: wormhole_b0, runs-on: ["wormhole_b0", "multi-chip-num-pcie-1", "multi-chip-num-chips-2"], name: N300}, ] env: TT_METAL_ENV: ${{ vars.TT_METAL_ENV }} diff --git a/tests/scripts/run_performance.sh b/tests/scripts/run_performance.sh index 0b0a0692c96..fb696df04b8 100755 --- a/tests/scripts/run_performance.sh +++ b/tests/scripts/run_performance.sh @@ -65,7 +65,7 @@ run_device_perf_models() { if [ "$tt_arch" == "grayskull" ]; then #TODO(MO): Until #6560 is fixed, GS device profiler test are grouped with #Model Device perf regression tests to make sure thy run on no-soft-reset BMs - tests/scripts/run_profiler_regressions.sh PROFILER + tests/scripts/run_profiler_regressions.sh PROFILER_NO_RESET env pytest models/demos/metal_BERT_large_11/tests -m $test_marker diff --git a/tests/scripts/run_profiler_regressions.sh b/tests/scripts/run_profiler_regressions.sh index 5f94633aed5..fe7fedcea79 100755 --- a/tests/scripts/run_profiler_regressions.sh +++ b/tests/scripts/run_profiler_regressions.sh @@ -36,7 +36,12 @@ run_profiling_test(){ run_additional_T3000_test - TT_METAL_DEVICE_PROFILER=1 pytest $PROFILER_TEST_SCRIPTS_ROOT/test_device_profiler.py -vvv + TT_METAL_DEVICE_PROFILER=1 pytest $PROFILER_TEST_SCRIPTS_ROOT/test_device_profiler.py::test_custom_cycle_count -vvv + TT_METAL_DEVICE_PROFILER=1 pytest $PROFILER_TEST_SCRIPTS_ROOT/test_device_profiler.py::test_full_buffer -vvv + #TODO(MO): Needed until #6560 is fixed. + if [ "$ARCH_NAME" != "grayskull" ]; then + TT_METAL_DEVICE_PROFILER=1 pytest $PROFILER_TEST_SCRIPTS_ROOT/test_device_profiler.py::test_multi_op -vvv + fi remove_default_log_locations @@ -51,6 +56,22 @@ run_profiling_test(){ remove_default_log_locations } +run_profiling_no_reset_test(){ + if [[ -z "$ARCH_NAME" ]]; then + echo "Must provide ARCH_NAME in environment" 1>&2 + exit 1 + fi + + echo "Make sure this test runs in a build with ENABLE_PROFILER=1 ENABLE_TRACY=1" + + source build/python_env/bin/activate + export PYTHONPATH=$TT_METAL_HOME + + TT_METAL_DEVICE_PROFILER=1 pytest $PROFILER_TEST_SCRIPTS_ROOT/test_device_profiler.py::test_multi_op -vvv + + remove_default_log_locations +} + run_post_proc_test(){ source build/python_env/bin/activate export PYTHONPATH=$TT_METAL_HOME @@ -62,6 +83,8 @@ cd $TT_METAL_HOME if [[ $1 == "PROFILER" ]]; then run_profiling_test +elif [[ $1 == "PROFILER_NO_RESET" ]]; then + run_profiling_no_reset_test elif [[ $1 == "POST_PROC" ]]; then run_post_proc_test else diff --git a/tt_metal/jit_build/genfiles.cpp b/tt_metal/jit_build/genfiles.cpp index 262c4b9344c..9c244ddd913 100644 --- a/tt_metal/jit_build/genfiles.cpp +++ b/tt_metal/jit_build/genfiles.cpp @@ -411,10 +411,12 @@ std::string generate_bank_to_noc_coord_descriptor_string( ss << endl; ss << "extern uint16_t dram_bank_to_noc_xy[NUM_NOCS][NUM_DRAM_BANKS];" << endl; ss << "extern int32_t bank_to_dram_offset[NUM_DRAM_BANKS];" << endl; - ss << "extern int32_t noc_xy_to_profiler_flat_id[noc_size_x][noc_size_y];" << endl; ss << "extern uint16_t l1_bank_to_noc_xy[NUM_NOCS][NUM_L1_BANKS];" << endl; ss << "extern int32_t bank_to_l1_offset[NUM_L1_BANKS];" << endl; + ss << "#if defined(COMPILE_FOR_BRISC) || defined(COMPILE_FOR_NCRISC) || defined(COMPILE_FOR_ERISC)" << endl; + ss << "extern uint8_t noc_xy_to_profiler_flat_id[noc_size_x][noc_size_y];" << endl; ss << "extern uint16_t profiler_core_count_per_dram;" << endl; + ss << "#endif" << endl; ss << endl; ss << "#else // !KERNEL_BUILD (FW_BUILD)" << endl; @@ -451,17 +453,18 @@ std::string generate_bank_to_noc_coord_descriptor_string( * For DRAM banks in particular, integer division of flat_id/core_count_per_dram gives the dram bank id and the modulo * is the offset. * */ + ss << "#if defined(COMPILE_FOR_BRISC) || defined(COMPILE_FOR_NCRISC) || defined(COMPILE_FOR_ERISC)" << endl; ss << "uint16_t profiler_core_count_per_dram __attribute__((used)) = "; ss << core_count_per_dram << ";" << endl; ss << endl; - ss << "int32_t noc_xy_to_profiler_flat_id[noc_size_x][noc_size_y] __attribute__((used)) = {" << endl; + ss << "uint8_t noc_xy_to_profiler_flat_id[noc_size_x][noc_size_y] __attribute__((used)) = {" << endl; for (unsigned int x = 0; x < grid_size.x; x++) { ss << " {" << endl; for (unsigned int y = 0; y < grid_size.y; y++) { CoreCoord core = {x,y}; if (profiler_flat_id_map.find(core) == profiler_flat_id_map.end()){ - ss << " " << -1 << "," << endl; + ss << " " << 255 << "," << endl; } else{ ss << " " << profiler_flat_id_map.at(core) << "," << endl; @@ -471,6 +474,7 @@ std::string generate_bank_to_noc_coord_descriptor_string( } ss << "};" << endl; ss << endl; + ss << "#endif" << endl; #endif