kubeflow · shashank-iitbhu · Aug 21, 2024 · Aug 25, 2024 · Aug 25, 2024 · Aug 26, 2024
diff --git a/.github/workflows/e2e-test-pytorch-mnist.yaml b/.github/workflows/e2e-test-pytorch-mnist.yaml
@@ -41,5 +41,7 @@ jobs:
           - "long-running-resume,from-volume-resume,median-stop"
           # others
           - "grid,bayesian-optimization,tpe,multivariate-tpe,cma-es,hyperband"
+          - "hyperopt-distribution"
           - "file-metrics-collector,pytorchjob-mnist"
           - "median-stop-with-json-format,file-metrics-collector-with-json-format"
+
diff --git a/examples/v1beta1/hp-tuning/hyperopt-distribution.yaml b/examples/v1beta1/hp-tuning/hyperopt-distribution.yaml
@@ -0,0 +1,81 @@
+---
+apiVersion: kubeflow.org/v1beta1
+kind: Experiment
+metadata:
+  namespace: kubeflow
+  name: hyperopt-distribution
+spec:
+  objective:
+    type: minimize
+    goal: 0.001
+    objectiveMetricName: loss
+  algorithm:
+    algorithmName: random
+  parallelTrialCount: 3
+  maxTrialCount: 12
+  maxFailedTrialCount: 3
+  parameters:
+    - name: lr
+      parameterType: double
+      feasibleSpace:
+        min: "0.01"
+        max: "0.05"
+        step: "0.01"
+        distribution: "uniform"
+    - name: momentum
+      parameterType: double
+      feasibleSpace:
+        min: "0.5"
+        max: "0.9"
+        distribution: "logUniform"
+    - name: weight_decay
+      parameterType: double
+      feasibleSpace:
+        min: "0.01"
+        max: "0.05"
+        distribution: "normal"
+    - name: dropout_rate
+      parameterType: double
+      feasibleSpace:
+        min: "0.1"
+        max: "0.5"
+        step: "0.001"
+        distribution: "logNormal"
+  trialTemplate:
+    primaryContainerName: training-container
+    trialParameters:
+      - name: learningRate
+        description: Learning rate for the training model
+        reference: lr
+      - name: momentum
+        description: Momentum for the training model
+        reference: momentum
+      - name: weightDecay
+        description: Weight decay for the training model
+        reference: weight_decay
+      - name: dropoutRate
+        description: Dropout rate for the training model
+        reference: dropout_rate
+    trialSpec:
+      apiVersion: batch/v1
+      kind: Job
+      spec:
+        template:
+          spec:
+            containers:
+              - name: training-container
+                image: docker.io/kubeflowkatib/pytorch-mnist-cpu:latest
+                command:
+                  - "python3"
+                  - "/opt/pytorch-mnist/mnist.py"
+                  - "--epochs=1"
+                  - "--batch-size=16"
+                  - "--lr=${trialParameters.learningRate}"
+                  - "--momentum=${trialParameters.momentum}"
+                  - "--weight-decay=${trialParameters.weightDecay}"
+                  - "--dropout-rate=${trialParameters.dropoutRate}"
+                resources:
+                  limits:
+                    memory: "1Gi"
+                    cpu: "0.5"
+            restartPolicy: Never
diff --git a/examples/v1beta1/trial-images/pytorch-mnist/mnist.py b/examples/v1beta1/trial-images/pytorch-mnist/mnist.py
@@ -150,6 +150,20 @@ def main():
         metavar="M",
         help="SGD momentum (default: 0.5)",
     )
+    parser.add_argument(
+        "--weight-decay",
+        type=float,
+        default=0.01,
+        metavar="WD",
+        help="Weight decay for regularization (default: 0.01)",
+    )
+    parser.add_argument(
+        "--dropout-rate",
+        type=float,
+        default=0.5,
+        metavar="DR",
+        help="Dropout rate for the model (default: 0.5)",
+    )
     parser.add_argument(
         "--no-cuda", action="store_true", default=False, help="disables CUDA training"
     )

diff --git a/pkg/apis/manager/v1beta1/api.pb.go b/pkg/apis/manager/v1beta1/api.pb.go
diff --git a/pkg/apis/manager/v1beta1/api.proto b/pkg/apis/manager/v1beta1/api.proto
@@ -101,11 +101,11 @@ enum ParameterType {
  * Distribution types for HyperParameter.
  */
 enum Distribution {
-    UNIFORM = 0;
-    LOG_UNIFORM = 1;
-    NORMAL = 2;
-    LOG_NORMAL = 3;
-    DISTRIBUTION_UNKNOWN = 4;
+    DISTRIBUTION_UNSPECIFIED = 0;
+    UNIFORM = 1;
+    LOG_UNIFORM = 2;
+    NORMAL = 3;
+    LOG_NORMAL = 4;
 }
 
 /**

diff --git a/pkg/apis/manager/v1beta1/python/api_pb2.py b/pkg/apis/manager/v1beta1/python/api_pb2.py
diff --git a/pkg/apis/manager/v1beta1/python/api_pb2.pyi b/pkg/apis/manager/v1beta1/python/api_pb2.pyi
@@ -16,11 +16,11 @@ class ParameterType(int, metaclass=_enum_type_wrapper.EnumTypeWrapper):
 
 class Distribution(int, metaclass=_enum_type_wrapper.EnumTypeWrapper):
     __slots__ = ()
+    DISTRIBUTION_UNSPECIFIED: _ClassVar[Distribution]
     UNIFORM: _ClassVar[Distribution]
     LOG_UNIFORM: _ClassVar[Distribution]
     NORMAL: _ClassVar[Distribution]
     LOG_NORMAL: _ClassVar[Distribution]
-    DISTRIBUTION_UNKNOWN: _ClassVar[Distribution]
 
 class ObjectiveType(int, metaclass=_enum_type_wrapper.EnumTypeWrapper):
     __slots__ = ()
@@ -39,11 +39,11 @@ DOUBLE: ParameterType
 INT: ParameterType
 DISCRETE: ParameterType
 CATEGORICAL: ParameterType
+DISTRIBUTION_UNSPECIFIED: Distribution
 UNIFORM: Distribution
 LOG_UNIFORM: Distribution
 NORMAL: Distribution
 LOG_NORMAL: Distribution
-DISTRIBUTION_UNKNOWN: Distribution
 UNKNOWN: ObjectiveType
 MINIMIZE: ObjectiveType
 MAXIMIZE: ObjectiveType

diff --git a/pkg/controller.v1beta1/suggestion/suggestionclient/suggestionclient.go b/pkg/controller.v1beta1/suggestion/suggestionclient/suggestionclient.go
@@ -532,22 +532,12 @@ func convertParameterType(typ experimentsv1beta1.ParameterType) suggestionapi.Pa
 }
 
 func convertFeasibleSpace(fs experimentsv1beta1.FeasibleSpace) *suggestionapi.FeasibleSpace {
-	distribution := convertDistribution(fs.Distribution)
-	if distribution == suggestionapi.Distribution_DISTRIBUTION_UNKNOWN {
-		return &suggestionapi.FeasibleSpace{
-			Max:  fs.Max,
-			Min:  fs.Min,
-			List: fs.List,
-			Step: fs.Step,
-		}
-	}
-
 	return &suggestionapi.FeasibleSpace{
 		Max:          fs.Max,
 		Min:          fs.Min,
 		List:         fs.List,
 		Step:         fs.Step,
-		Distribution: distribution,
+		Distribution: convertDistribution(fs.Distribution),
 	}
 }
 
@@ -562,7 +552,7 @@ func convertDistribution(typ experimentsv1beta1.Distribution) suggestionapi.Dist
 	case experimentsv1beta1.DistributionLogNormal:
 		return suggestionapi.Distribution_LOG_NORMAL
 	default:
-		return suggestionapi.Distribution_DISTRIBUTION_UNKNOWN
+		return suggestionapi.Distribution_DISTRIBUTION_UNSPECIFIED
 	}
 }
 

diff --git a/pkg/controller.v1beta1/suggestion/suggestionclient/suggestionclient_test.go b/pkg/controller.v1beta1/suggestion/suggestionclient/suggestionclient_test.go
@@ -618,7 +618,7 @@ func TestConvertDistribution(t *testing.T) {
 		},
 		{
 			inDistribution:       experimentsv1beta1.DistributionUnknown,
-			expectedDistribution: suggestionapi.Distribution_DISTRIBUTION_UNKNOWN,
+			expectedDistribution: suggestionapi.Distribution_DISTRIBUTION_UNSPECIFIED,
 			testDescription:      "Convert unknown distribution",
 		},
 	}

diff --git a/pkg/suggestion/v1beta1/hyperopt/base_service.py b/pkg/suggestion/v1beta1/hyperopt/base_service.py
@@ -17,6 +17,7 @@
 import hyperopt
 import numpy as np
 
+from pkg.apis.manager.v1beta1.python import api_pb2
 from pkg.suggestion.v1beta1.internal.constant import (
     CATEGORICAL,
     DISCRETE,
@@ -63,13 +64,66 @@ def create_hyperopt_domain(self):
         hyperopt_search_space = {}
         for param in self.search_space.params:
             if param.type == INTEGER:
-                hyperopt_search_space[param.name] = hyperopt.hp.quniform(
-                    param.name, float(param.min), float(param.max), float(param.step)
-                )
-            elif param.type == DOUBLE:
-                hyperopt_search_space[param.name] = hyperopt.hp.uniform(
+                hyperopt_search_space[param.name] = hyperopt.hp.uniformint(
                     param.name, float(param.min), float(param.max)
                 )
+            elif param.type == DOUBLE:
+                if param.distribution == api_pb2.UNIFORM or param.distribution is None:
+                    if param.step:
+                        hyperopt_search_space[param.name] = hyperopt.hp.quniform(
+                            param.name,
+                            float(param.min),
+                            float(param.max),
+                            float(param.step),
+                        )
+                    else:
+                        hyperopt_search_space[param.name] = hyperopt.hp.uniform(
+                            param.name, float(param.min), float(param.max)
+                        )
+                elif param.distribution == api_pb2.LOG_UNIFORM:
+                    if param.step:
+                        hyperopt_search_space[param.name] = hyperopt.hp.qloguniform(
+                            param.name,
+                            float(param.min),
+                            float(param.max),
+                            float(param.step),
+                        )
+                    else:
+                        hyperopt_search_space[param.name] = hyperopt.hp.loguniform(
+                            param.name, float(param.min), float(param.max)
+                        )
+                elif param.distribution == api_pb2.NORMAL:
+                    mu = (float(param.min) + float(param.max)) / 2
+                    sigma = (float(param.max) - float(param.min)) / 6
+                    if param.step:
+                        hyperopt_search_space[param.name] = hyperopt.hp.qnormal(
+                            param.name,
+                            mu,
+                            sigma,
+                            float(param.step),
+                        )
+                    else:
+                        hyperopt_search_space[param.name] = hyperopt.hp.normal(
+                            param.name,
+                            mu,
+                            sigma,
+                        )
+                elif param.distribution == api_pb2.LOG_NORMAL:
+                    mu = (float(param.min) + float(param.max)) / 2
+                    sigma = (float(param.max) - float(param.min)) / 6
+                    if param.step:
+                        hyperopt_search_space[param.name] = hyperopt.hp.qlognormal(
+                            param.name,
+                            mu,
+                            sigma,
+                            float(param.step),
+                        )
+                    else:
+                        hyperopt_search_space[param.name] = hyperopt.hp.lognormal(
+                            param.name,
+                            mu,
+                            sigma,
+                        )
             elif param.type == CATEGORICAL or param.type == DISCRETE:
                 hyperopt_search_space[param.name] = hyperopt.hp.choice(
                     param.name, param.list

diff --git a/pkg/suggestion/v1beta1/internal/constant.py b/pkg/suggestion/v1beta1/internal/constant.py
@@ -19,3 +19,8 @@
 DOUBLE = "DOUBLE"
 CATEGORICAL = "CATEGORICAL"
 DISCRETE = "DISCRETE"
+
+UNIFORM = "UNIFORM"
+LOG_UNIFORM = "LOG_UNIFORM"
+NORMAL = "NORMAL"
+LOG_NORMAL = "LOG_NORMAL"
diff --git a/pkg/suggestion/v1beta1/internal/search_space.py b/pkg/suggestion/v1beta1/internal/search_space.py
@@ -82,25 +82,36 @@ def __str__(self):
 
     @staticmethod
     def convert_parameter(p):
+        distribution = (
+            p.feasible_space.distribution
+            if p.feasible_space.distribution != ""
+            and p.feasible_space.distribution is not None
+            and p.feasible_space.distribution != api.DISTRIBUTION_UNSPECIFIED
+            else None
+        )
+
         if p.parameter_type == api.INT:
             # Default value for INT parameter step is 1
-            step = 1
-            if p.feasible_space.step is not None and p.feasible_space.step != "":
-                step = p.feasible_space.step
+            step = p.feasible_space.step if p.feasible_space.step else 1
             return HyperParameter.int(
-                p.name, p.feasible_space.min, p.feasible_space.max, step
+                p.name, p.feasible_space.min, p.feasible_space.max, step, distribution
             )
+
         elif p.parameter_type == api.DOUBLE:
             return HyperParameter.double(
                 p.name,
                 p.feasible_space.min,
                 p.feasible_space.max,
                 p.feasible_space.step,
+                distribution,
             )
+
         elif p.parameter_type == api.CATEGORICAL:
             return HyperParameter.categorical(p.name, p.feasible_space.list)
+
         elif p.parameter_type == api.DISCRETE:
             return HyperParameter.discrete(p.name, p.feasible_space.list)
+
         else:
             logger.error(
                 "Cannot get the type for the parameter: %s (%s)",
@@ -110,33 +121,35 @@ def convert_parameter(p):
 
 
 class HyperParameter(object):
-    def __init__(self, name, type_, min_, max_, list_, step):
+    def __init__(self, name, type_, min_, max_, list_, step, distribution=None):
         self.name = name
         self.type = type_
         self.min = min_
         self.max = max_
         self.list = list_
         self.step = step
+        self.distribution = distribution
 
     def __str__(self):
-        if self.type == constant.INTEGER or self.type == constant.DOUBLE:
+        if self.type in [constant.INTEGER, constant.DOUBLE]:
             return (
-                "HyperParameter(name: {}, type: {}, min: {}, max: {}, step: {})".format(
-                    self.name, self.type, self.min, self.max, self.step
-                )
+                f"HyperParameter(name: {self.name}, type: {self.type}, min: {self.min}, "
+                f"max: {self.max}, step: {self.step}, distribution: {self.distribution})"
             )
         else:
             return "HyperParameter(name: {}, type: {}, list: {})".format(
                 self.name, self.type, ", ".join(self.list)
             )
 
     @staticmethod
-    def int(name, min_, max_, step):
-        return HyperParameter(name, constant.INTEGER, min_, max_, [], step)
+    def int(name, min_, max_, step, distribution=None):
+        return HyperParameter(
+            name, constant.INTEGER, min_, max_, [], step, distribution
+        )
 
     @staticmethod
-    def double(name, min_, max_, step):
-        return HyperParameter(name, constant.DOUBLE, min_, max_, [], step)
+    def double(name, min_, max_, step, distribution=None):
+        return HyperParameter(name, constant.DOUBLE, min_, max_, [], step, distribution)
 
     @staticmethod
     def categorical(name, lst):