Skip to content

Commit

Permalink
perf(io): speedup merlin summary generation by excluding duplicates
Browse files Browse the repository at this point in the history
  • Loading branch information
kmnhan committed Apr 15, 2024
1 parent aa3140c commit d6b4253
Showing 1 changed file with 19 additions and 8 deletions.
27 changes: 19 additions & 8 deletions src/erlab/io/plugins/merlin.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ class BL403Loader(LoaderBase):
"hv": "BL Energy",
"polarization": "EPU POL",
"temp_sample": "Temperature Sensor B",
"photon_flux": "Mesh Current",
"mesh_current": "Mesh Current",
}
coordinate_attrs: tuple[str, ...] = (
"beta",
Expand All @@ -41,7 +41,7 @@ class BL403Loader(LoaderBase):
"y",
"z",
"polarization",
"photon_flux",
"mesh_current",
)
additional_attrs: dict[str, str | int | float] = {
"configuration": 1,
Expand All @@ -50,6 +50,9 @@ class BL403Loader(LoaderBase):
always_single: bool = False

def load_single(self, file_path: str | os.PathLike) -> xr.DataArray:
if os.path.splitext(file_path)[1] == ".ibw":
return self.load_live(file_path)

data = load_experiment(file_path)
# One file always corresponds to single region, so assume only one data variable
data: xr.DataArray = data.data_vars[next(iter(data.data_vars.keys()))]
Expand Down Expand Up @@ -138,12 +141,12 @@ def generate_summary(
) -> pd.DataFrame:
files: dict[str, str] = {}

for pth in erlab.io.utilities.get_files(data_dir, extensions=(".pxt",)):
data_name = os.path.splitext(os.path.basename(pth))[0]
for path in erlab.io.utilities.get_files(data_dir, extensions=(".pxt",)):
data_name = os.path.splitext(os.path.basename(path))[0]
name_match = re.match(r"(.*?_\d{3})_(?:_S\d{3})?", data_name)
if name_match is not None:
data_name = name_match.group(1)
files[data_name] = pth
files[data_name] = path

if not exclude_live:
for file in os.listdir(data_dir):
Expand All @@ -170,11 +173,12 @@ def generate_summary(
"azi": "delta",
}

cols = ["Time", "File Name", "Type", *summary_attrs.keys()]
cols = ["File Name", "Path", "Time", "Type", *summary_attrs.keys()]

data: list[dict] = []
data_info = []

processed_indices: list[int] = []

for name, path in files.items():
if os.path.splitext(path)[1] == ".ibw":
data = self.load_live(path)
Expand All @@ -183,6 +187,12 @@ def generate_summary(
else:
data_type = "LXY"
else:
idx = self.infer_index(os.path.splitext(os.path.basename(path))[0])
if idx in processed_indices:
continue

if idx is not None:
processed_indices.append(idx)
data = self.load(path)
data_type = "core"
if "alpha" in data.dims:
Expand All @@ -194,11 +204,12 @@ def generate_summary(

data_info.append(
[
name,
path,
datetime.datetime.strptime(
f"{data.attrs['Date']} {data.attrs['Time']}",
"%d/%m/%Y %I:%M:%S %p",
),
name,
data_type,
]
)
Expand Down

0 comments on commit d6b4253

Please sign in to comment.