automl · benjamc · Feb 16, 2023 · Feb 16, 2023 · Jun 2, 2023 · Jun 2, 2023
diff --git a/.gitignore b/.gitignore
@@ -147,4 +147,6 @@ src
 
 # Pycharm
 .idea
-.vscode
+.vscode
+tmp
+wandb
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -48,6 +48,9 @@
 - Handle configspace as dictionary in mlp and parego example.
 - Adapt sgd loss to newest scikit-learn version.
 
+## Features
+- Log to WandB (#1037)
+
 # 2.0.1
 
 ## Improvements

diff --git a/examples/6_advanced_features/1_wandb_logging.py b/examples/6_advanced_features/1_wandb_logging.py
@@ -0,0 +1,51 @@
+"""
+Use Weights and Biases for logging
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+This example shows how to use Weights and Biases (WandB) for logging.
+
+To use WandB, you need to install the package via pip:
+
+.. code-block:: bash
+
+        pip install wandb
+
+Then you can use the WandBCallback to log the results of the optimization as well as intermediate information to WandB.
+This is done by creating a WandBCallback object and passing it to the used Facade.
+
+"""
+from __future__ import annotations
+
+import numpy as np
+from ConfigSpace import Configuration, ConfigurationSpace
+from sklearn import datasets
+from sklearn.model_selection import cross_val_score
+from sklearn.svm import SVC
+
+import smac
+from smac import HyperparameterOptimizationFacade, Scenario
+from smac.callback import WandBCallback
+
+iris = datasets.load_iris()
+
+
+def train(config: Configuration, seed: int = 0) -> float:
+    classifier = SVC(C=config["C"], random_state=seed)
+    scores = cross_val_score(classifier, iris.data, iris.target, cv=5)
+    return 1 - np.mean(scores)
+
+
+configspace = ConfigurationSpace({"C": (0.100, 1000.0)})
+
+# Scenario object specifying the optimization environment
+scenario = Scenario(configspace, deterministic=True, n_trials=100, seed=3)
+
+wandb_callback = WandBCallback(
+    project="smac-dev",
+    entity="benjamc",
+    config=Scenario.make_serializable(scenario),
+)
+
+# Use SMAC to find the best configuration/hyperparameters
+smac = HyperparameterOptimizationFacade(scenario, train, callbacks=[wandb_callback], overwrite=True)
+incumbent = smac.optimize()
diff --git a/examples/6_advanced_features/README.rst b/examples/6_advanced_features/README.rst
@@ -0,0 +1,2 @@
+Advanced Features
+========
diff --git a/setup.py b/setup.py
@@ -40,6 +40,9 @@ def read_file(filepath: str) -> str:
         "pre-commit",
         "pylint",
     ],
+    "wandb": [
+        "wandb",
+    ]
 }
 
 setuptools.setup(

diff --git a/smac/callback/__init__.py b/smac/callback/__init__.py
@@ -1,7 +1,9 @@
 from smac.callback.callback import Callback
 from smac.callback.metadata_callback import MetadataCallback
+from smac.callback.wandb_logging import WandBCallback
 
 __all__ = [
     "Callback",
     "MetadataCallback",
+    "WandBCallback",
 ]
diff --git a/smac/callback/wandb_logging.py b/smac/callback/wandb_logging.py
@@ -0,0 +1,97 @@
+from typing import Any
+
+from dataclasses import asdict
+
+import smac
+from smac.callback import Callback
+from smac.runhistory import TrialInfo, TrialValue
+
+
+class WandBCallback(Callback):
+    """
+
+    Callback to log the results of the optimization as well as intermediate information to WandB.
+
+    Logs TrialInfo, TrialValue and the number of successfully executed trials (as step) to WandB `on_tell_end`.
+    Upon the end of the run, logs the trajectory of the intensifier to WandB.
+
+    Parameters
+    ----------
+    project : str
+        The project name of the WandB project.
+    entity : str
+        The entity name of the WandB project.
+    id : str, optional
+        The id of the run.
+    outdir : str, optional
+        The output directory of the WandB run.
+    mode : str, optional
+        The mode of the WandB run.
+    resume : str, optional
+        The resume mode of the WandB run.
+    job_type : str, optional
+        The job type of the WandB run.
+    group : str, optional
+        The group of the WandB run.
+    config : dict or str, optional
+        The configuration of the WandB run.
+    save_code : bool, optional
+        Whether to save the code of the WandB run.
+    **kwargs : dict
+        Additional arguments to pass to the WandB run.
+    """
+
+    def __init__(
+        self,
+        project: str,
+        entity: str,
+        id: str | None = None,
+        outdir: str | None = None,
+        mode: str | None = None,
+        resume: str = "allow",
+        job_type: str | None = None,
+        group: str | None = None,
+        config: dict | str | None = None,
+        save_code: bool = True,
+        **kwargs: dict[str, Any],
+    ) -> None:
+        import wandb
+
+        self.run = wandb.init(
+            id=id,
+            resume=resume,
+            mode=mode,
+            project=project,
+            job_type=job_type,
+            entity=entity,
+            group=group,
+            dir=outdir,
+            config=config,
+            save_code=save_code,
+            **kwargs,
+        )
+        super().__init__()
+
+    def on_tell_end(self, smbo: smac.main.smbo.SMBO, info: TrialInfo, value: TrialValue) -> bool | None:  # noqa: D102
+        info_dict = asdict(info)
+        info_dict["config"] = info_dict["config"].get_dictionary()
+        value_dict = asdict(value)
+        log_dict = info_dict | value_dict
+        log_dict["step"] = smbo.runhistory.finished
+        self.run.log(data=log_dict)
+        return super().on_tell_end(smbo, info, value)
+
+    def on_end(self, smbo: smac.main.smbo.SMBO) -> None:  # noqa: D102
+        intensifier_data = smbo.intensifier.get_data()
+        trajectory = intensifier_data["trajectory"]
+        import pandas as pd
+
+        df = pd.DataFrame(data=trajectory)
+        print(df)
+        # trajectory = Table(dataframe=df, allow_mixed_types=True)
+        df["costs"] = df["costs"].apply(lambda x: x[0])  # TODO properly log multi costs
+        for index, row in df.iterrows():
+            print(dict(row))
+            self.run.log(dict(row))
+        self.run.finish()
+        return super().on_end(smbo)
diff --git a/smac/intensifier/abstract_intensifier.py b/smac/intensifier/abstract_intensifier.py
@@ -669,6 +669,17 @@ def save(self, filename: str | Path) -> None:
         with open(filename, "w") as fp:
             json.dump(data, fp, indent=2, cls=NumpyEncoder)
 
+    def get_data(self):
+        data = {
+            "incumbent_ids": [self.runhistory.get_config_id(config) for config in self._incumbents],
+            "rejected_config_ids": self._rejected_config_ids,
+            "incumbents_changed": self._incumbents_changed,
+            "trajectory": [dataclasses.asdict(item) for item in self._trajectory],
+            "state": self.get_state(),
+        }
+        return data
+
+
     def load(self, filename: str | Path) -> None:
         """Loads the latest state of the intensifier including the incumbents and trajectory."""
         if isinstance(filename, str):