Skip to content

Commit

Permalink
Fix feature type detector
Browse files Browse the repository at this point in the history
  • Loading branch information
ThomasMeissnerDS committed Aug 3, 2024
1 parent 5d29d05 commit 68a1e1f
Show file tree
Hide file tree
Showing 4 changed files with 8 additions and 19 deletions.
16 changes: 0 additions & 16 deletions bluecast/general_utils/general_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -182,14 +182,6 @@ def save_out_of_fold_data(

y_true = y_true.astype(int)

print("DEBUUUUUUUUUUUUUUUUG")
print(y_true.shape)
print(y_true)

print("DEBUUUUUUUUUUUUUUUUG AGAIN")
print(y_hat.shape)
print(y_hat)

oof_data_copy["predicted_class"] = y_classes
oof_data_copy["target_class_predicted_probas"] = [
1 - preds if cls == 0 else preds for preds, cls in zip(y_hat, y_classes)
Expand All @@ -211,14 +203,6 @@ def save_out_of_fold_data(

y_true = y_true.astype(int)

print("DEBUUUUUUUUUUUUUUUUG MULTICLASS")
print(y_true.shape)
print(y_true)

print("DEBUUUUUUUUUUUUUUUUG MULTICLASS AGAIN")
print(y_hat.shape)
print(y_hat)

oof_data_copy["predicted_class"] = y_classes
oof_data_copy["target_class_predicted_probas"] = np.asarray(
[pred[target_cls] for pred, target_cls in zip(y_hat, y_true)]
Expand Down
2 changes: 1 addition & 1 deletion bluecast/ml_modelling/xgboost.py
Original file line number Diff line number Diff line change
Expand Up @@ -748,7 +748,7 @@ def objective(trial):
shuffle=self.conf_training.shuffle_during_training,
)
folds = []
for train_index, test_index in skf.split(x_train, y_train):
for train_index, test_index in skf.split(x_train, y_train.astype(int)):
folds.append((train_index.tolist(), test_index.tolist()))

result = xgb.cv(
Expand Down
7 changes: 6 additions & 1 deletion bluecast/preprocessing/feature_types.py
Original file line number Diff line number Diff line change
Expand Up @@ -229,6 +229,11 @@ def fit_transform_feature_types(self, df: pd.DataFrame) -> pd.DataFrame:
bool_cols, no_bool_cols = self.identify_bool_columns(df_clean)
self.identify_date_time_columns(df_clean, no_bool_cols)
df_clean = self.cast_rest_columns_to_object(df_clean, bool_cols)
for key in self.detected_col_types:
if self.detected_col_types[key] == "datetime[ns]":
df_clean[key] = pd.to_datetime(df[key], yearfirst=True)
else:
df_clean[key] = df_clean[key].astype(self.detected_col_types[key])
return df_clean

def transform_feature_types(
Expand All @@ -246,7 +251,7 @@ def transform_feature_types(
for key in self.detected_col_types:
if ignore_cols and key not in ignore_cols and key in df_clean.columns:
if self.detected_col_types[key] == "datetime[ns]":
df_clean[key] = pd.to_datetime(df[key], yearfirst=True)
df_clean[key] = pd.to_datetime(df_clean[key], yearfirst=True)
else:
df_clean[key] = df_clean[key].astype(self.detected_col_types[key])
return df_clean
2 changes: 1 addition & 1 deletion bluecast/tests/test_feature_type_detector.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,4 +59,4 @@ def test_feature_type_detector():
assert transformed_df["categorical_col2"].dtype == np.object_

# Ensure the transformed dataframe is not the same as the original dataframe (transformed datetime columns)
assert transformed_df.equals(df)
assert not transformed_df.equals(df)

0 comments on commit 68a1e1f

Please sign in to comment.