-
Notifications
You must be signed in to change notification settings - Fork 17
/
benchmark_schema.py
122 lines (95 loc) · 4.03 KB
/
benchmark_schema.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
from sqlalchemy import Boolean, Column, DateTime, Float, Integer, String
from sqlalchemy.orm import declarative_base
Base = declarative_base()
class TestRun(Base):
__tablename__ = "test_run"
# unique run ID
id = Column(Integer, primary_key=True)
# pytest data
session_id = Column(String, nullable=False)
name = Column(String, nullable=False)
originalname = Column(String, nullable=False)
path = Column(String, nullable=True)
setup_outcome = Column(String, nullable=True)
call_outcome = Column(String, nullable=True)
teardown_outcome = Column(String, nullable=True)
# Runtime data
coiled_runtime_version = Column(String, nullable=True)
coiled_software_name = Column(String, nullable=True)
dask_version = Column(String, nullable=True)
dask_expr_version = Column(String, nullable=True)
distributed_version = Column(String, nullable=True)
python_version = Column(String, nullable=True)
platform = Column(String, nullable=True)
# CI runner data
ci_run_url = Column(String, nullable=True)
# Wall clock data
start = Column(DateTime, nullable=True)
end = Column(DateTime, nullable=True)
duration = Column(Float, nullable=True)
# Memory data
average_memory = Column(Float, nullable=True)
peak_memory = Column(Float, nullable=True)
# Durations data
compute_time = Column(Float, nullable=True)
disk_spill_time = Column(Float, nullable=True)
serializing_time = Column(Float, nullable=True)
transfer_time = Column(Float, nullable=True)
# Scheduler
scheduler_cpu_avg = Column(Float, nullable=True)
scheduler_memory_max = Column(Float, nullable=True)
# Event Loop
worker_max_tick = Column(Float, nullable=True)
scheduler_max_tick = Column(Float, nullable=True)
# Cluster name/id/details_url
cluster_name = Column(String, nullable=True)
cluster_id = Column(Integer, nullable=True)
cluster_details_url = Column(String, nullable=True)
# Artifacts
performance_report_url = Column(String, nullable=True) # Not yet collected
cluster_dump_url = Column(String, nullable=True)
memray_profiles_url = Column(String, nullable=True)
py_spy_profiles_url = Column(String, nullable=True)
class TPCHRun(Base):
__tablename__ = "tpch_run"
# unique run ID
id = Column(Integer, primary_key=True)
# pytest data
session_id = Column(String, nullable=False)
name = Column(String, nullable=False)
originalname = Column(String, nullable=False)
path = Column(String, nullable=True)
setup_outcome = Column(String, nullable=True)
call_outcome = Column(String, nullable=True)
teardown_outcome = Column(String, nullable=True)
# Runtime data
dask_version = Column(String, nullable=True)
dask_expr_version = Column(String, nullable=True)
distributed_version = Column(String, nullable=True)
duckdb_version = Column(String, nullable=True)
pyspark_version = Column(String, nullable=True)
polars_version = Column(String, nullable=True)
python_version = Column(String, nullable=True)
platform = Column(String, nullable=True)
# CI runner data
ci_run_url = Column(String, nullable=True)
# Wall clock data
start = Column(DateTime, nullable=True)
end = Column(DateTime, nullable=True)
duration = Column(Float, nullable=True)
# Memory data
average_memory = Column(Float, nullable=True)
peak_memory = Column(Float, nullable=True)
# Cluster name/id/details_url
cluster_name = Column(String, nullable=True)
cluster_id = Column(Integer, nullable=True)
cluster_details_url = Column(String, nullable=True)
scale = Column(Integer, nullable=False)
query = Column(Integer, nullable=False)
local = Column(Boolean, nullable=False)
compression = Column(String, nullable=True)
partition_size = Column(String, nullable=True)
partition_size = Column(String, nullable=True)
n_workers = Column(Integer, nullable=True)
worker_vm_type = Column(String, nullable=True)
cluster_disk_size = Column(Integer, nullable=True)