Skip to content

Commit

Permalink
Merge pull request #98 from bytedance/gyj/add_version
Browse files Browse the repository at this point in the history
Add Version and Execution Date info to reports
  • Loading branch information
YJessicaGao authored Aug 26, 2024
2 parents b5ac619 + babd3aa commit 7c49815
Show file tree
Hide file tree
Showing 6 changed files with 57 additions and 22 deletions.
3 changes: 3 additions & 0 deletions VERSION
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
major=1
minor=0
patch=0
17 changes: 17 additions & 0 deletions byte_infer_perf/general_perf/core/perf_engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
import json
import subprocess
import time
import traceback

from typing import Any, Dict, Tuple
import virtualenv
Expand Down Expand Up @@ -70,6 +71,19 @@ def __init__(self) -> None:
self.prev_sys_path = list(sys.path)
self.real_prefix = sys.prefix
self.compile_only_mode = False
self.version = self.get_version()

def get_version(self):
version = ""
try:
version_file = os.path.join(str(BYTE_MLPERF_ROOT), "../VERSION")
with open(version_file) as f:
_version = f.read().splitlines()
version = '.'.join(v.split('=')[1] for v in _version)
except Exception as e:
traceback.print_exc()
log.warning(f"get bytemlperf version failed, error msg: {e}")
return version

def start_engine(self) -> None:
'''
Expand Down Expand Up @@ -168,6 +182,9 @@ def single_workload_perf(
base_report.pop("Backend")
return compile_info["compile_status"], base_report

base_report["Version"] = self.version
base_report["Execution Date"] = time.strftime("%Y-%m-%d %H:%M:%S")

# load runtime backend
"""
Start Here
Expand Down
17 changes: 17 additions & 0 deletions byte_infer_perf/llm_perf/launch.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
import multiprocessing as mp
import signal
from typing import Any, Dict, Iterable, List
import traceback

# ${prj_root}/
BYTE_MLPERF_ROOT = pathlib.Path(__file__).parents[1]
Expand All @@ -45,12 +46,26 @@ def __init__(self, hardware, task, host, port) -> None:
self.result_queue = mp.Queue()
self.jobs: List[mp.Process] = []
self.server_process = None
self.version = self.get_version()


def __del__(self):
self.stop_server()


def get_version(self):
version = ""
try:
version_file = os.path.join(str(BYTE_MLPERF_ROOT), "../VERSION")
with open(version_file) as f:
_version = f.read().splitlines()
version = '.'.join(v.split('=')[1] for v in _version)
except Exception as e:
traceback.print_exc()
logger.warning(f"get bytemlperf version failed, error msg: {e}")
return version


def start_engine(self) -> None:
# load workload
workload = load_workload(self.task)
Expand Down Expand Up @@ -85,6 +100,8 @@ def start_engine(self) -> None:

test_perf=test_perf,
test_accuracy=test_accuracy,

version=self.version,
)
self.reporter.start()

Expand Down
4 changes: 4 additions & 0 deletions byte_infer_perf/llm_perf/utils/reporter.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@ def __init__(
max_new_tokens: int,
test_perf: bool,
test_accuracy: bool,
version: str="",
) -> None:
self._running: bool = False
self.cond: threading.Condition = threading.Condition()
Expand All @@ -87,12 +88,15 @@ def __init__(
self.tp_size = tp_size
self.batch_size = batch_size
self.input_tokens = input_tokens
self.version = version

# result template
self.result: Dict[str, Any] = {
"Model": self.task,
"Backend": self.backend,
"Host Info": get_cpu_name(),
"Version": self.version,
"Execution Date": time.strftime("%Y-%m-%d %H:%M:%S"),
"Min New Tokens": min_new_tokens,
"Max New Tokens": max_new_tokens,
"Accuracy": {"PPL": [], "Token Diff": {}, "Logits Diff": {}},
Expand Down
1 change: 0 additions & 1 deletion byte_micro_perf/backends/GPU/backend_gpu.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,6 @@


class BackendGPU(Backend):

def get_device_count(self):
return torch.cuda.device_count()

Expand Down
37 changes: 16 additions & 21 deletions byte_micro_perf/core/perf_engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -212,16 +212,26 @@ def __init__(self) -> None:
self.old_os_path = os.environ["PATH"]
self.prev_sys_path = list(sys.path)
self.real_prefix = sys.prefix
self.version = self.get_version()

def get_version(self):
version = ""
try:
version_file = os.path.join(str(BYTE_MLPERF_ROOT), "../VERSION")
with open(version_file) as f:
_version = f.read().splitlines()
version = '.'.join(v.split('=')[1] for v in _version)
except Exception as e:
traceback.print_exc()
log.warning(f"get bytemlperf version failed, error msg: {e}")
return version

def get_cpu_name(self):
command = "lscpu | grep 'Model name' | awk -F: '{print $2}'"
cpu_name = subprocess.check_output(command, shell=True)
return cpu_name.decode().strip()



def start_engine(self) -> None:

if self.args.activate_venv:
self.activate_venv(self.backend_type)

Expand Down Expand Up @@ -270,7 +280,6 @@ def start_engine(self) -> None:
for shape in shape_list:
test_list.append(ConfigInstance(dtype, shape, case_index))
case_index = case_index + 1


try:
mp.set_start_method("spawn", force=True)
Expand All @@ -287,10 +296,6 @@ def start_engine(self) -> None:
if self.workload["operator"] in ["device2host", "host2device"]:
instance_num = 1





input_queues = mp.Queue()
output_queues = mp.Queue(maxsize=1)

Expand All @@ -308,16 +313,13 @@ def start_engine(self) -> None:
assert "ready" == output_queues.get()
log.info("all ranks are ready and listening, init done")



if group == 1:
for test_instance in test_list:
input_queues.put(test_instance, True)

for _ in range(instance_num):
input_queues.put("end", True)


for process in _subprocesses.processes:
process.join()

Expand All @@ -330,9 +332,6 @@ def start_engine(self) -> None:
if self.args.activate_venv:
self.deactivate_venv()




def perf_func(self, rank: int, *args):
backend_instance = self.backend_class(self.workload, self.args.vendor_path)
op_name = self.workload["operator"]
Expand All @@ -342,7 +341,6 @@ def perf_func(self, rank: int, *args):
# set device accroding to local_rank
set_device_func = getattr(backend_instance, "set_device")
set_device_func(rank)


if world_size > 1:
init_ccl_func = getattr(backend_instance, "initialize_ccl")
Expand All @@ -354,7 +352,6 @@ def perf_func(self, rank: int, *args):
else:
raise ValueError(f"Unknown operation: {op_name.lower()}")


output_queues.put("ready")

result_list = []
Expand Down Expand Up @@ -396,7 +393,6 @@ def perf_func(self, rank: int, *args):

result_list = sorted(output_result_list, key=lambda x: x.config.index)


elif group_size > 1:
for i, test_instance in enumerate(test_list):
if rank == 0:
Expand All @@ -421,7 +417,6 @@ def perf_func(self, rank: int, *args):

result_list.append(ResultItem(test_instance, reports))


if rank == 0:
print(f"{len(result_list)} tasks finished.")

Expand All @@ -439,9 +434,11 @@ def perf_func(self, rank: int, *args):
"Backend": self.backend_type,
"Host Info": self.get_cpu_name(),
"Device Info": getattr(self.backend, "get_device_name")(),
"Version": self.version,
"Execution Date": time.strftime("%Y-%m-%d %H:%M:%S"),
"Performance": [result.report for result in dtype_results_mapping[dtype]]
}

filename = (
f"result-{str(dtype)}"
+ (
Expand All @@ -460,8 +457,6 @@ def perf_func(self, rank: int, *args):
destroy_group_func()

return True



def activate_venv(self, hardware_type: str) -> bool:
if os.path.exists("backends/" + hardware_type + "/requirements.txt"):
Expand Down

0 comments on commit 7c49815

Please sign in to comment.