Skip to content

Commit

Permalink
Merge commit 'cc0cf2d04c39c7571fe0194a8172af37fcd69a7e'
Browse files Browse the repository at this point in the history
  • Loading branch information
whitneywhtsang committed Oct 10, 2024
2 parents cf98f3a + cc0cf2d commit 5623ad7
Show file tree
Hide file tree
Showing 6 changed files with 26 additions and 27 deletions.
2 changes: 0 additions & 2 deletions .github/workflows/llvm-build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,6 @@ jobs:
-DLLVM_INSTALL_UTILS=ON
-DLLVM_TARGETS_TO_BUILD="host;NVPTX;AMDGPU"
-DLLVM_ENABLE_TERMINFO=OFF
-DLLVM_ABI_BREAKING_CHECKS=FORCE_OFF
llvm-project/llvm
ninja -C llvm-project/build check-mlir install
Expand All @@ -131,7 +130,6 @@ jobs:
-DLLVM_INSTALL_UTILS=ON
-DLLVM_TARGETS_TO_BUILD="host;NVPTX;AMDGPU"
-DLLVM_ENABLE_TERMINFO=OFF
-DLLVM_ABI_BREAKING_CHECKS=FORCE_OFF
llvm-project/llvm
ninja -C llvm-project/build check-mlir install
Expand Down
1 change: 0 additions & 1 deletion .github/workflows/llvm-build/almalinux.Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,6 @@ RUN cmake -GNinja -Bbuild \
-DLLVM_ENABLE_PROJECTS=mlir \
-DLLVM_ENABLE_TERMINFO=OFF \
-DLLVM_INSTALL_UTILS=ON \
-DLLVM_ABI_BREAKING_CHECKS=FORCE_OFF \
-DLLVM_TARGETS_TO_BUILD="host;NVPTX;AMDGPU" \
/source/llvm-project/llvm

Expand Down
6 changes: 5 additions & 1 deletion python/triton/compiler/compiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ def hash(self):
"ptx": ptx_prototype_pattern,
}

mlir_arg_type_pattern = r'%\w+: ((?:[^,\s<)]+|<[^>]+>)+),?'
mlir_arg_type_pattern = r'%\w+: ((?:[^,\s<)]+|<[^>]+>)+(?: {[^}]+})?),?'
ptx_arg_type_pattern = r"\.param\s+\.(\w+)"
arg_type_pattern = {
"ttir": mlir_arg_type_pattern,
Expand All @@ -71,6 +71,10 @@ def convert_type_repr(x):
# Currently we only capture the pointer type and assume the pointer is on global memory.
# TODO: Capture and support shared memory space
match = re.search(r'!tt\.ptr<([^,]+)', x)
tma = re.search(r'tt.nv_tma_desc = 1', x)
if tma is not None:
return 'nvTmaDesc'
x = re.sub(r' {[^}]+}', '', x)
if match is not None:
return '*' + convert_type_repr(match.group(1))
return x
Expand Down
2 changes: 1 addition & 1 deletion third_party/nvidia/backend/compiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -359,7 +359,7 @@ def make_cubin(src, metadata, opt, capability):

raise RuntimeError(f'{error}\n'
f'`ptxas` stderr:\n{log}\n'
f'Repro command: {ptxas_cmd}\n')
f'Repro command: {" ".join(ptxas_cmd)}\n')

with open(fbin, 'rb') as f:
cubin = f.read()
Expand Down
1 change: 0 additions & 1 deletion third_party/proton/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -161,7 +161,6 @@ The following example demonstrates how to use instruction sampling:
```python
import triton.profiler as proton


proton.start(name="profile_name", context="shadow", backend="cupti_pcsampling")
```

Expand Down
41 changes: 20 additions & 21 deletions third_party/proton/proton/viewer.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,8 +104,6 @@ def get_min_time_bytes(df, device_info):

def derive_metrics(gf, metrics, raw_metrics, device_info):
derived_metrics = []
original_metrics = []
exclusive_metrics = ["util"] + list(derivable_metrics.keys()) + list(avg_time_factor_dict.factor.keys())
internal_frame_indices = gf.dataframe["device_id"].isna()

def get_time_seconds(df):
Expand All @@ -121,10 +119,10 @@ def get_time_seconds(df):
gf.dataframe["util (inc)"] = min_time_flops["min_time"].combine(min_time_bytes["min_time"], max) / time_sec
gf.dataframe.loc[internal_frame_indices, "util (inc)"] = np.nan
derived_metrics.append("util (inc)")
elif metric in derivable_metrics:
deriveable_metric = derivable_metrics[metric]
metric_name = deriveable_metric.name
metric_factor_dict = deriveable_metric.factor
elif metric in derivable_metrics: # flop<width>/s, <t/g>byte/s
derivable_metric = derivable_metrics[metric]
metric_name = derivable_metric.name
metric_factor_dict = derivable_metric.factor
matched_metric_name = match_available_metrics([metric_name], raw_metrics)[0]
gf.dataframe[f"{metric} (inc)"] = (gf.dataframe[matched_metric_name] / (get_time_seconds(gf.dataframe)) /
metric_factor_dict[metric])
Expand All @@ -134,24 +132,28 @@ def get_time_seconds(df):
gf.dataframe[f"{metric} (inc)"] = (get_time_seconds(gf.dataframe) /
time_factor_dict.factor[metric_time_unit])
derived_metrics.append(f"{metric} (inc)")
metric_name = match_available_metrics([time_factor_dict.name], raw_metrics)[0]
elif metric in avg_time_factor_dict.factor:
metric_time_unit = avg_time_factor_dict.name + "/" + metric.split("/")[1]
gf.dataframe[f"{metric} (inc)"] = (get_time_seconds(gf.dataframe) / gf.dataframe['count'] /
avg_time_factor_dict.factor[metric_time_unit])
gf.dataframe.loc[internal_frame_indices, f"{metric} (inc)"] = np.nan
derived_metrics.append(f"{metric} (inc)")
else:
original_metrics.append(metric)
if metric not in exclusive_metrics:
single_frame = gf.dataframe[metric_name]
total = gf.dataframe[metric_name].iloc[0]
metric = metric.split("/")[0]
gf.dataframe[f"{metric}/% (inc)"] = (single_frame / total) * 100.0
derived_metrics.append(f"{metric}/% (inc)")
if original_metrics:
original_metrics = match_available_metrics(original_metrics, raw_metrics)
return derived_metrics + original_metrics
metric_name_and_unit = metric.split("/")
metric_name = metric_name_and_unit[0]
if len(metric_name_and_unit) > 1:
metric_unit = metric_name_and_unit[1]
if metric_unit != "%":
raise ValueError(f"Unsupported unit {metric_unit}")
matched_metric_name = match_available_metrics([metric_name], raw_metrics)[0]
single_frame = gf.dataframe[matched_metric_name]
total = gf.dataframe[matched_metric_name].iloc[0]
gf.dataframe[f"{metric_name}/% (inc)"] = (single_frame / total) * 100.0
derived_metrics.append(f"{metric_name}/% (inc)")
else:
matched_metric_name = match_available_metrics([metric_name], raw_metrics)[0]
derived_metrics.append(matched_metric_name)
return derived_metrics


def format_frames(gf, format):
Expand Down Expand Up @@ -234,10 +236,7 @@ def main():
- flop[<8/16/32/64>]/s, gflop[<8/16/32/64>]/s, tflop[<8/16/32/64>]/s: flops / time
- byte/s, gbyte/s, tbyte/s: bytes / time
- util: max(sum(flops<width>) / peak_flops<width>_time, sum(bytes) / peak_bandwidth_time)
For inclusive metrics (e.g. time) an additional column is printed showing the percentage
each frame is of the full model.
- <metric>/%%: frame(metric) / sum(metric). Only availble for inclusive metrics (e.g. time)
""",
)
argparser.add_argument(
Expand Down

0 comments on commit 5623ad7

Please sign in to comment.