Skip to content

Commit

Permalink
feat: Add support for multi-version metadata
Browse files Browse the repository at this point in the history
As an example, for comparing against the most recent I've added the 5 most recent
  • Loading branch information
dangotbanned committed Oct 7, 2024
1 parent a618ffc commit 1792340
Show file tree
Hide file tree
Showing 2 changed files with 11 additions and 0 deletions.
Binary file not shown.
11 changes: 11 additions & 0 deletions tools/vendor_datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -135,6 +135,17 @@ def request_trees_to_df(tag: str, /) -> pl.DataFrame:
return df.select(*sorted(df.columns))


def request_trees_to_df_batched(*tags: str, delay: int = 5) -> pl.DataFrame:
import random
import time

dfs: list[pl.DataFrame] = []
for tag in tags:
time.sleep(delay + random.triangular())
dfs.append(request_trees_to_df(tag))
return pl.concat(dfs)


def collect_metadata(tag: str, /, fp: Path, *, write_schema: bool = True) -> None:
metadata = request_trees_to_df(tag)
if not fp.exists():
Expand Down

0 comments on commit 1792340

Please sign in to comment.