diff --git a/.gitignore b/.gitignore index 679812827..080a3cd9c 100644 --- a/.gitignore +++ b/.gitignore @@ -147,4 +147,6 @@ src # Pycharm .idea -.vscode \ No newline at end of file +.vscode +tmp +wandb \ No newline at end of file diff --git a/CHANGELOG.md b/CHANGELOG.md index 72bdf2922..6c268f0af 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -48,6 +48,9 @@ - Handle configspace as dictionary in mlp and parego example. - Adapt sgd loss to newest scikit-learn version. +## Features +- Log to WandB (#1037) + # 2.0.1 ## Improvements diff --git a/examples/6_advanced_features/1_wandb_logging.py b/examples/6_advanced_features/1_wandb_logging.py new file mode 100644 index 000000000..994d8f5f0 --- /dev/null +++ b/examples/6_advanced_features/1_wandb_logging.py @@ -0,0 +1,51 @@ +""" +Use Weights and Biases for logging +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +This example shows how to use Weights and Biases (WandB) for logging. + +To use WandB, you need to install the package via pip: + +.. code-block:: bash + + pip install wandb + +Then you can use the WandBCallback to log the results of the optimization as well as intermediate information to WandB. +This is done by creating a WandBCallback object and passing it to the used Facade. + +""" +from __future__ import annotations + +import numpy as np +from ConfigSpace import Configuration, ConfigurationSpace +from sklearn import datasets +from sklearn.model_selection import cross_val_score +from sklearn.svm import SVC + +import smac +from smac import HyperparameterOptimizationFacade, Scenario +from smac.callback import WandBCallback + +iris = datasets.load_iris() + + +def train(config: Configuration, seed: int = 0) -> float: + classifier = SVC(C=config["C"], random_state=seed) + scores = cross_val_score(classifier, iris.data, iris.target, cv=5) + return 1 - np.mean(scores) + + +configspace = ConfigurationSpace({"C": (0.100, 1000.0)}) + +# Scenario object specifying the optimization environment +scenario = Scenario(configspace, deterministic=True, n_trials=100, seed=3) + +wandb_callback = WandBCallback( + project="smac-dev", + entity="benjamc", + config=Scenario.make_serializable(scenario), +) + +# Use SMAC to find the best configuration/hyperparameters +smac = HyperparameterOptimizationFacade(scenario, train, callbacks=[wandb_callback], overwrite=True) +incumbent = smac.optimize() diff --git a/examples/6_advanced_features/README.rst b/examples/6_advanced_features/README.rst new file mode 100644 index 000000000..ec83a5ca9 --- /dev/null +++ b/examples/6_advanced_features/README.rst @@ -0,0 +1,2 @@ +Advanced Features +======== \ No newline at end of file diff --git a/setup.py b/setup.py index 3967dcf1f..e15b2f44c 100644 --- a/setup.py +++ b/setup.py @@ -40,6 +40,9 @@ def read_file(filepath: str) -> str: "pre-commit", "pylint", ], + "wandb": [ + "wandb", + ] } setuptools.setup( diff --git a/smac/callback/__init__.py b/smac/callback/__init__.py index 73e9dc6e4..c23d3bc29 100644 --- a/smac/callback/__init__.py +++ b/smac/callback/__init__.py @@ -1,7 +1,9 @@ from smac.callback.callback import Callback from smac.callback.metadata_callback import MetadataCallback +from smac.callback.wandb_logging import WandBCallback __all__ = [ "Callback", "MetadataCallback", + "WandBCallback", ] diff --git a/smac/callback/wandb_logging.py b/smac/callback/wandb_logging.py new file mode 100644 index 000000000..54e5c35a8 --- /dev/null +++ b/smac/callback/wandb_logging.py @@ -0,0 +1,97 @@ +from typing import Any + +from dataclasses import asdict + +import smac +from smac.callback import Callback +from smac.runhistory import TrialInfo, TrialValue + + +class WandBCallback(Callback): + """ + + Callback to log the results of the optimization as well as intermediate information to WandB. + + Logs TrialInfo, TrialValue and the number of successfully executed trials (as step) to WandB `on_tell_end`. + Upon the end of the run, logs the trajectory of the intensifier to WandB. + + Parameters + ---------- + project : str + The project name of the WandB project. + entity : str + The entity name of the WandB project. + id : str, optional + The id of the run. + outdir : str, optional + The output directory of the WandB run. + mode : str, optional + The mode of the WandB run. + resume : str, optional + The resume mode of the WandB run. + job_type : str, optional + The job type of the WandB run. + group : str, optional + The group of the WandB run. + config : dict or str, optional + The configuration of the WandB run. + save_code : bool, optional + Whether to save the code of the WandB run. + **kwargs : dict + Additional arguments to pass to the WandB run. + """ + + def __init__( + self, + project: str, + entity: str, + id: str | None = None, + outdir: str | None = None, + mode: str | None = None, + resume: str = "allow", + job_type: str | None = None, + group: str | None = None, + config: dict | str | None = None, + save_code: bool = True, + **kwargs: dict[str, Any], + ) -> None: + import wandb + + self.run = wandb.init( + id=id, + resume=resume, + mode=mode, + project=project, + job_type=job_type, + entity=entity, + group=group, + dir=outdir, + config=config, + save_code=save_code, + **kwargs, + ) + super().__init__() + + def on_tell_end(self, smbo: smac.main.smbo.SMBO, info: TrialInfo, value: TrialValue) -> bool | None: # noqa: D102 + info_dict = asdict(info) + info_dict["config"] = info_dict["config"].get_dictionary() + value_dict = asdict(value) + log_dict = info_dict | value_dict + log_dict["step"] = smbo.runhistory.finished + self.run.log(data=log_dict) + return super().on_tell_end(smbo, info, value) + + def on_end(self, smbo: smac.main.smbo.SMBO) -> None: # noqa: D102 + intensifier_data = smbo.intensifier.get_data() + trajectory = intensifier_data["trajectory"] + import pandas as pd + + df = pd.DataFrame(data=trajectory) + print(df) + # trajectory = Table(dataframe=df, allow_mixed_types=True) + df["costs"] = df["costs"].apply(lambda x: x[0]) # TODO properly log multi costs + for index, row in df.iterrows(): + print(dict(row)) + self.run.log(dict(row)) + self.run.finish() + return super().on_end(smbo) diff --git a/smac/intensifier/abstract_intensifier.py b/smac/intensifier/abstract_intensifier.py index b7a5ae1ca..1acf80c8d 100644 --- a/smac/intensifier/abstract_intensifier.py +++ b/smac/intensifier/abstract_intensifier.py @@ -669,6 +669,17 @@ def save(self, filename: str | Path) -> None: with open(filename, "w") as fp: json.dump(data, fp, indent=2, cls=NumpyEncoder) + def get_data(self): + data = { + "incumbent_ids": [self.runhistory.get_config_id(config) for config in self._incumbents], + "rejected_config_ids": self._rejected_config_ids, + "incumbents_changed": self._incumbents_changed, + "trajectory": [dataclasses.asdict(item) for item in self._trajectory], + "state": self.get_state(), + } + return data + + def load(self, filename: str | Path) -> None: """Loads the latest state of the intensifier including the incumbents and trajectory.""" if isinstance(filename, str):