diff --git a/bluecast/blueprints/welcome.py b/bluecast/blueprints/welcome.py index 9ebbaf19..13becced 100644 --- a/bluecast/blueprints/welcome.py +++ b/bluecast/blueprints/welcome.py @@ -7,6 +7,7 @@ from bluecast.blueprints.cast_cv import BlueCastCV from bluecast.blueprints.cast_cv_regression import BlueCastCVRegression from bluecast.blueprints.cast_regression import BlueCastRegression +from bluecast.config.training_config import TrainingConfig class WelcomeToBlueCast: @@ -15,6 +16,21 @@ def __init__(self): self.output = widgets.Output() def automl_configurator(self) -> None: + message = """ + Welcome to BlueCast!\n + This configurator will help find the right configuration for each user in a non-programmatic way. + Make sure to store the instantiated WelcomeToBlueCast instance into a variable + to be able to retrieve the pre-configured automl instance after pressing submit. + + # Sample usage: + welcome = WelcomeToBlueCast() + welcome.automl_configurator() + automl = welcome.automl_instance + automl.fit(df_train, target_col="target") + y_hat = automl.predict(df_val)\n + """ + print(message) + # Create widgets with tooltips task = self._create_task_widget() debug_mode = self._create_debug_mode_widget() @@ -22,12 +38,34 @@ def automl_configurator(self) -> None: shap_values = self._create_shap_values_widget() n_folds = self._create_n_folds_widget() oof_storage_path = self._create_oof_storage_path_widget() + + # New widgets for hyperparameter tuning options + hyperparameter_tuning_rounds = ( + self._create_hyperparameter_tuning_rounds_widget() + ) + hyperparameter_tuning_max_runtime_secs = ( + self._create_hyperparameter_tuning_max_runtime_secs_widget() + ) + plot_hyperparameter_tuning_overview = ( + self._create_plot_hyperparameter_tuning_overview_widget() + ) + show_detailed_tuning_logs = self._create_show_detailed_tuning_logs_widget() + submit_button = self._create_submit_button() # Link the submit button to the on_submit_clicked function submit_button.on_click( lambda b: self.on_submit_clicked( - task, debug_mode, n_models, shap_values, n_folds, oof_storage_path + task, + debug_mode, + n_models, + shap_values, + n_folds, + oof_storage_path, + hyperparameter_tuning_rounds, + hyperparameter_tuning_max_runtime_secs, + plot_hyperparameter_tuning_overview, + show_detailed_tuning_logs, ) ) @@ -39,15 +77,20 @@ def automl_configurator(self) -> None: shap_values, n_folds, oof_storage_path, + hyperparameter_tuning_rounds, + hyperparameter_tuning_max_runtime_secs, + plot_hyperparameter_tuning_overview, + show_detailed_tuning_logs, submit_button, self.output, ) + # Widget creation functions def _create_task_widget(self): return widgets.Dropdown( options=["binary", "multiclass", "regression"], description="Task Type:", - tooltip="Select the type of problem you're solving: binary classification (i.e. True/False), multiclass classification (i.e. 'safe', 'risky', 'dangerous'), or regression (continous targets).", + tooltip="Select the type of problem you're solving: binary classification, multiclass classification, or regression.", style={"description_width": "initial"}, layout=widgets.Layout(width="500px"), ) @@ -101,6 +144,46 @@ def _create_oof_storage_path_widget(self): layout=widgets.Layout(width="500px"), ) + def _create_hyperparameter_tuning_rounds_widget(self): + return widgets.IntSlider( + value=200, + min=1, + max=500, + description="Tuning Rounds:", + tooltip="Specify the number of hyperparameter tuning rounds. The more rounds the longer the tuning process. The tuning wll stop earlier when 'Max Tuning Runtime (secs)' has been reached.", + style={"description_width": "initial"}, + layout=widgets.Layout(width="500px"), + ) + + def _create_hyperparameter_tuning_max_runtime_secs_widget(self): + return widgets.IntSlider( + value=3600, + min=60, + max=14400, + description="Max Tuning Runtime (secs):", + tooltip="Specify the maximum runtime in seconds for hyperparameter tuning. (i.e. 3600 is one hour)", + style={"description_width": "initial"}, + layout=widgets.Layout(width="500px"), + ) + + def _create_plot_hyperparameter_tuning_overview_widget(self): + return widgets.ToggleButtons( + options=[("Yes", True), ("No", False)], + description="Plot Tuning Overview:", + tooltip="Specify whether to plot the hyperparameter tuning overview. This will create charts showing which hyperparameters were most important and the evolution of losses during the tuning..", + style={"description_width": "initial"}, + layout=widgets.Layout(width="500px"), + ) + + def _create_show_detailed_tuning_logs_widget(self): + return widgets.ToggleButtons( + options=[("Yes", True), ("No", False)], + description="Show Detailed Tuning Logs:", + tooltip="Specify whether to show detailed tuning logs during hyperparameter tuning. This will print every single tested hyperparameter set and the evaluation result.", + style={"description_width": "initial"}, + layout=widgets.Layout(width="500px"), + ) + def _create_submit_button(self): return widgets.Button( description="Submit", @@ -108,8 +191,19 @@ def _create_submit_button(self): layout=widgets.Layout(width="200px"), ) + # Function handling form submission def on_submit_clicked( - self, task, debug_mode, n_models, shap_values, n_folds, oof_storage_path + self, + task, + debug_mode, + n_models, + shap_values, + n_folds, + oof_storage_path, + hyperparameter_tuning_rounds, + hyperparameter_tuning_max_runtime_secs, + plot_hyperparameter_tuning_overview, + show_detailed_tuning_logs, ): with self.output: clear_output(wait=True) @@ -122,10 +216,24 @@ def on_submit_clicked( shap_values_value = shap_values.value n_folds_value = n_folds.value oof_storage_path_value = oof_storage_path.value + hyperparameter_tuning_rounds_value = hyperparameter_tuning_rounds.value + hyperparameter_tuning_max_runtime_secs_value = ( + hyperparameter_tuning_max_runtime_secs.value + ) + plot_tuning_overview_value = plot_hyperparameter_tuning_overview.value + show_detailed_logs_value = show_detailed_tuning_logs.value # Create the BlueCast instance self.automl_instance = self.instantiate_bluecast_instance( - task_type, n_models_value + task_type, + n_models_value, + n_folds_value, + oof_storage_path_value, + debug_mode_value, + hyperparameter_tuning_rounds_value, + hyperparameter_tuning_max_runtime_secs_value, + plot_tuning_overview_value, + show_detailed_logs_value, ) # Update instance properties @@ -134,24 +242,15 @@ def on_submit_clicked( self.update_debug_flag(debug_mode_value) self.update_hyperparam_folds(n_folds_value) self.update_oof_storage_path(oof_storage_path_value) - print("Finished configuration of BlueCast instance") - - def instantiate_bluecast_instance( - self, task, n_models: int - ) -> Union[BlueCast, BlueCastCV, BlueCastRegression, BlueCastCVRegression]: - if task in ["binary", "multiclass"] and n_models == 1: - return BlueCast(class_problem=task) - elif task in ["binary", "multiclass"] and n_models > 1: - return BlueCastCV(class_problem=task) - elif task == "regression" and n_models == 1: - return BlueCastRegression(class_problem=task) - elif task == "regression" and n_models > 1: - return BlueCastCVRegression(class_problem=task) - else: - raise ValueError( - "No suitable configuration found. Please raise a GitHub issue." + self.update_hyperparameter_tuning_rounds(hyperparameter_tuning_rounds_value) + self.update_hyperparameter_tuning_max_runtime_secs( + hyperparameter_tuning_max_runtime_secs_value ) + self.update_plot_hyperparameter_tuning_overview(plot_tuning_overview_value) + self.update_show_detailed_tuning_logs(show_detailed_logs_value) + print("Finished configuration of BlueCast instance") + # New update methods for hyperparameter tuning parameters def update_calc_shap_flag(self, calc_shap): self.automl_instance.conf_training.calculate_shap_values = calc_shap @@ -165,3 +264,82 @@ def update_oof_storage_path(self, oof_storage_path): self.automl_instance.conf_training.out_of_fold_dataset_store_path = ( oof_storage_path ) + + def update_hyperparameter_tuning_rounds(self, rounds): + self.automl_instance.conf_training.hyperparameter_tuning_rounds = rounds + + def update_hyperparameter_tuning_max_runtime_secs(self, runtime_secs): + self.automl_instance.conf_training.hyperparameter_tuning_max_runtime_secs = ( + runtime_secs + ) + + def update_plot_hyperparameter_tuning_overview(self, plot_tuning_overview): + self.automl_instance.conf_training.plot_hyperparameter_tuning_overview = ( + plot_tuning_overview + ) + + def update_show_detailed_tuning_logs(self, show_logs): + self.automl_instance.conf_training.show_detailed_tuning_logs = show_logs + + # Function to instantiate BlueCast based on the task type + def instantiate_bluecast_instance( + self, + task, + n_models: int, + n_folds: int, + oof_storage_path: str, + autotune_model: bool, + hyperparameter_tuning_rounds: int, + hyperparameter_tuning_max_runtime_secs: int, + plot_hyperparameter_tuning_overview: bool, + show_detailed_tuning_logs: bool, + ) -> Union[BlueCast, BlueCastCV, BlueCastRegression, BlueCastCVRegression]: + # Prepare the configuration for BlueCast + conf_training = TrainingConfig( + autotune_model=autotune_model, + hypertuning_cv_folds=n_folds, + out_of_fold_dataset_store_path=oof_storage_path, + hyperparameter_tuning_rounds=hyperparameter_tuning_rounds, + hyperparameter_tuning_max_runtime_secs=hyperparameter_tuning_max_runtime_secs, + plot_hyperparameter_tuning_overview=plot_hyperparameter_tuning_overview, + show_detailed_tuning_logs=show_detailed_tuning_logs, + ) + + if task in ["binary", "multiclass"]: + if n_models == 1: + automl_instance_bc = BlueCast( + class_problem=task, conf_training=conf_training + ) + print( + f"Instantiated BlueCast instance with:\n automl_instance = BlueCast(class_problem={task})\n" + ) + return automl_instance_bc + else: + automl_instance_bcc = BlueCastCV( + class_problem=task, conf_training=conf_training + ) + print( + f"Instantiated BlueCast instance with:\n automl_instance = BlueCastCV(class_problem={task})\n" + ) + return automl_instance_bcc + elif task == "regression": + if n_models == 1: + automl_instance_bcr = BlueCastRegression( + class_problem=task, conf_training=conf_training + ) + print( + f"Instantiated BlueCast instance with:\n automl_instance = BlueCastRegression(class_problem={task})\n" + ) + return automl_instance_bcr + else: + automl_instance_bcrc = BlueCastCVRegression( + class_problem=task, conf_training=conf_training + ) + print( + f"Instantiated BlueCast instance with:\n automl_instance = BlueCastCVRegression(class_problem={task})\n" + ) + return automl_instance_bcrc + else: + raise ValueError( + "No suitable configuration found. Please raise a GitHub issue." + ) diff --git a/bluecast/tests/test_welcome.py b/bluecast/tests/test_welcome.py new file mode 100644 index 00000000..8b89b18b --- /dev/null +++ b/bluecast/tests/test_welcome.py @@ -0,0 +1,116 @@ +from unittest.mock import MagicMock, patch + +import ipywidgets as widgets +import pytest + +from bluecast.blueprints.welcome import WelcomeToBlueCast + + +@pytest.fixture +def welcome(): + """Fixture to initialize WelcomeToBlueCast class.""" + return WelcomeToBlueCast() + + +def test_widget_creation(welcome): + """Test if widgets are created properly.""" + assert isinstance(welcome._create_task_widget(), widgets.Dropdown) + assert isinstance(welcome._create_debug_mode_widget(), widgets.ToggleButtons) + assert isinstance(welcome._create_n_models_widget(), widgets.IntSlider) + assert isinstance(welcome._create_shap_values_widget(), widgets.ToggleButtons) + assert isinstance(welcome._create_n_folds_widget(), widgets.IntSlider) + assert isinstance(welcome._create_oof_storage_path_widget(), widgets.Text) + assert isinstance( + welcome._create_hyperparameter_tuning_rounds_widget(), widgets.IntSlider + ) + assert isinstance( + welcome._create_hyperparameter_tuning_max_runtime_secs_widget(), + widgets.IntSlider, + ) + assert isinstance( + welcome._create_plot_hyperparameter_tuning_overview_widget(), + widgets.ToggleButtons, + ) + assert isinstance( + welcome._create_show_detailed_tuning_logs_widget(), widgets.ToggleButtons + ) + assert isinstance(welcome._create_submit_button(), widgets.Button) + + +@patch("bluecast.blueprints.welcome.clear_output") +def test_on_submit_clicked(mock_clear_output, welcome): + """Test the submission handling logic.""" + task = MagicMock(value="binary") + debug_mode = MagicMock(value=True) + n_models = MagicMock(value=2) + shap_values = MagicMock(value=True) + n_folds = MagicMock(value=5) + oof_storage_path = MagicMock(value="/path/to/oof") + hyperparameter_tuning_rounds = MagicMock(value=100) + hyperparameter_tuning_max_runtime_secs = MagicMock(value=3600) + plot_hyperparameter_tuning_overview = MagicMock(value=True) + show_detailed_tuning_logs = MagicMock(value=True) + + # Mock the instantiation of BlueCast to avoid creating real objects + with patch.object( + welcome, "instantiate_bluecast_instance", return_value=MagicMock() + ) as mock_instantiate: + welcome.on_submit_clicked( + task, + debug_mode, + n_models, + shap_values, + n_folds, + oof_storage_path, + hyperparameter_tuning_rounds, + hyperparameter_tuning_max_runtime_secs, + plot_hyperparameter_tuning_overview, + show_detailed_tuning_logs, + ) + + # Ensure clear_output is called + mock_clear_output.assert_called_once() + + # Ensure instantiate_bluecast_instance is called with correct parameters + mock_instantiate.assert_called_once_with( + "binary", 2, 5, "/path/to/oof", True, 100, 3600, True, True + ) + + +def test_bluecast_instance_instantiation(welcome): + """Test if BlueCast instances are correctly instantiated based on task type and number of models.""" + with patch("bluecast.blueprints.welcome.BlueCast") as MockBlueCast, patch( + "bluecast.blueprints.welcome.BlueCastCV" + ) as MockBlueCastCV, patch( + "bluecast.blueprints.welcome.BlueCastRegression" + ) as MockBlueCastRegression, patch( + "bluecast.blueprints.welcome.BlueCastCVRegression" + ) as MockBlueCastCVRegression: + + # Binary classification, single model + automl_instance = welcome.instantiate_bluecast_instance( + "binary", 1, 5, "/path/to/oof", True, 100, 3600, True, True + ) + MockBlueCast.assert_called_once() + assert automl_instance == MockBlueCast.return_value + + # Binary classification, multiple models + automl_instance = welcome.instantiate_bluecast_instance( + "binary", 2, 5, "/path/to/oof", True, 100, 3600, True, True + ) + MockBlueCastCV.assert_called_once() + assert automl_instance == MockBlueCastCV.return_value + + # Regression, single model + automl_instance = welcome.instantiate_bluecast_instance( + "regression", 1, 5, "/path/to/oof", True, 100, 3600, True, True + ) + MockBlueCastRegression.assert_called_once() + assert automl_instance == MockBlueCastRegression.return_value + + # Regression, multiple models + automl_instance = welcome.instantiate_bluecast_instance( + "regression", 2, 5, "/path/to/oof", True, 100, 3600, True, True + ) + MockBlueCastCVRegression.assert_called_once() + assert automl_instance == MockBlueCastCVRegression.return_value diff --git a/dist/bluecast-1.6.2-py3-none-any.whl b/dist/bluecast-1.6.2-py3-none-any.whl index 2c5bb7f2..f13a8f3b 100644 Binary files a/dist/bluecast-1.6.2-py3-none-any.whl and b/dist/bluecast-1.6.2-py3-none-any.whl differ diff --git a/dist/bluecast-1.6.2.tar.gz b/dist/bluecast-1.6.2.tar.gz index 55dbb39d..f3e58e26 100644 Binary files a/dist/bluecast-1.6.2.tar.gz and b/dist/bluecast-1.6.2.tar.gz differ diff --git a/docs/source/Basic usage.md b/docs/source/Basic usage.md index 530a0c2b..10075ec3 100644 --- a/docs/source/Basic usage.md +++ b/docs/source/Basic usage.md @@ -1,5 +1,34 @@ # Basic usage +As part of our persona-driven development BlueCast offers two ways of +configuration: + +* a non-programmatic way which uses widgets for most important options +* a programmatic way with ull control and flexibility + +## Non-programmatic way for starters + +The non-programmatic way should be helpful for new BlueCast users +or ml practitioners with little ml experience. The interface can be +used like this: + +```python +from bluecast.blueprints.welcome import WelcomeToBlueCast + +welcome = WelcomeToBlueCast() +welcome.automl_configurator() + +# here users can chose from the given options and click 'submit' to create the instance +# after submit the automl instance can be retrieved and used like: +automl = welcome.automl_instance +automl.fit(df_train, target_col="target") +y_hat = automl.predict(df_val) +``` + +## Programmatic way + +The programmatic way is suitable for experienced users. + The module blueprints contains the main functionality of the library. The main entry point is the `Blueprint` class. It already includes needed preprocessing (including some convenience functionality like feature type detection)