Skip to content

Commit

Permalink
archive: bdiviz newest version
Browse files Browse the repository at this point in the history
  • Loading branch information
EdenWuyifan committed Sep 25, 2024
1 parent d407f60 commit 4ac01e3
Show file tree
Hide file tree
Showing 2 changed files with 24 additions and 8 deletions.
4 changes: 2 additions & 2 deletions bdikit/models/contrastive_learning/cl_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,7 @@ def _load_table_tokens(self, table: pd.DataFrame) -> List[np.ndarray]:
print(f"Table features loaded for {len(table.columns)} columns")
return embeddings

print(f"Extracting features from {len(table.columns)} columns...")
# print(f"Extracting features from {len(table.columns)} columns...")
tables = []
for _, column in enumerate(table.columns):
curr_table = pd.DataFrame(table[column])
Expand All @@ -133,7 +133,7 @@ def _inference_on_tables(self, tables: List[pd.DataFrame]) -> List[List]:
batch = []
results = []

for tid, table in tqdm(enumerate(tables), total=total):
for tid, table in enumerate(tables):
x, _ = self.unlabeled._tokenize(table)
batch.append((x, x, []))

Expand Down
28 changes: 22 additions & 6 deletions bdikit/visualization/schema_matching.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@
pn.extension("mathjax") # type: ignore
pn.extension("vega") # type: ignore
pn.extension("floatpanel") # type: ignore
pn.extension("jsoneditor") # type: ignore


def truncate_text(text: str, max_chars: int):
Expand Down Expand Up @@ -99,7 +100,6 @@ def __init__(

self.top_k = max(1, min(top_k, 40))

self.rec_table_df: Optional[pd.DataFrame] = None
self.rec_list_df: Optional[pd.DataFrame] = None
self.rec_cols: Optional[List[str]] = None
self.subschemas = None
Expand Down Expand Up @@ -378,12 +378,10 @@ def _get_heatmap(self) -> None:
rec_cols = list(rec_cols)
rec_cols.sort()

rec_table_df = pd.DataFrame(rec_table)
rec_list_df = pd.DataFrame(rec_list)
rec_list_df["Value"] = pd.to_numeric(rec_list_df["Value"])
rec_list_df["DataFrame"] = rec_list_df["DataFrame"].astype(str)

self.rec_table_df = rec_table_df
self.rec_list_df = rec_list_df
self.rec_cols = rec_cols

Expand All @@ -392,7 +390,9 @@ def _get_heatmap(self) -> None:
self.get_cols_subschema()

def _gen_clusters(self) -> Dict[str, List[Tuple[str, str]]]:
knn = NearestNeighbors(n_neighbors=min(10, len(self.source.columns)), metric="cosine")
knn = NearestNeighbors(
n_neighbors=min(10, len(self.source.columns)), metric="cosine"
)
l_features_flat = []
for _, l_features in self.l_features.items():
l_features_flat.extend(l_features)
Expand Down Expand Up @@ -558,11 +558,14 @@ def _discard_column(self, select_column: Optional[str]) -> None:
logger.critical(f"Invalid column: {select_column}")
return

selected = select_column
if self.selected_row is not None:
selected = self.selected_row["Column"].values[0]
logger.critical(f"Discarding column: {select_column}")
recommendations = self.heatmap_recommendations
for idx, d in enumerate(recommendations):
candidate_name = d["source_column"]
if candidate_name == select_column:
if candidate_name == selected:
recommendations.pop(idx)
self._write_json(recommendations)
self._record_user_action("discard", d)
Expand Down Expand Up @@ -634,7 +637,7 @@ def _plot_heatmap_base(self, heatmap_rec_list: pd.DataFrame) -> pn.pane.Vega:
)
.add_params(single, search_input)
)
background = base.mark_rect(size=100)
background = base.mark_rect()

box_sources = []
if self.additional_sources:
Expand Down Expand Up @@ -1283,9 +1286,22 @@ def plot_ai_assistant(clicks: int) -> Optional[pn.FloatPanel]:
)
return None

def plot_json_file(clicks: int) -> pn.Row:
return pn.Row(
pn.FloatPanel(
pn.widgets.JSONEditor(
value=self.heatmap_recommendations, width=500
),
name="JSON Viewer",
width=540,
align="end",
)
)

return pn.Column(
column_top,
pn.bind(plot_ai_assistant, ai_assistant_button.param.clicks),
# pn.bind(plot_json_file, acc_button.param.clicks),
pn.Spacer(height=5),
pn.Column(heatmap_bind),
scroll=True,
Expand Down

0 comments on commit 4ac01e3

Please sign in to comment.