Skip to content

Commit

Permalink
Update k8s provisioner to use server-side apply methodology (#5700)
Browse files Browse the repository at this point in the history
  • Loading branch information
kaspersjo committed Sep 23, 2024
1 parent 306c08e commit 49e2162
Show file tree
Hide file tree
Showing 6 changed files with 129 additions and 138 deletions.
60 changes: 31 additions & 29 deletions admin/deployments.go
Original file line number Diff line number Diff line change
Expand Up @@ -205,39 +205,41 @@ type UpdateDeploymentOptions struct {
}

func (s *Service) UpdateDeployment(ctx context.Context, depl *database.Deployment, opts *UpdateDeploymentOptions) error {
// Update the provisioned runtime if the version has changed
if opts.Version != "" && opts.Version != depl.RuntimeVersion {
// Get provisioner from the set
p, ok := s.ProvisionerSet[depl.Provisioner]
if !ok {
return fmt.Errorf("provisioner: %q is not in the provisioner set", depl.Provisioner)
}
// Get provisioner from the set
p, ok := s.ProvisionerSet[depl.Provisioner]
if !ok {
return fmt.Errorf("provisioner: %q is not in the provisioner set", depl.Provisioner)
}

// Update the runtime
err := p.Update(ctx, depl.ProvisionID, opts.Version)
if err != nil {
s.Logger.Error("provisioner: failed to update", zap.String("deployment_id", depl.ID), zap.String("provisioner", depl.Provisioner), zap.String("provision_id", depl.ProvisionID), zap.Error(err), observability.ZapCtx(ctx))
return err
}
// Update the runtime
_, err := p.Provision(ctx, &provisioner.ProvisionOptions{
ProvisionID: depl.ProvisionID,
Slots: depl.Slots,
RuntimeVersion: opts.Version,
Annotations: opts.Annotations.ToMap(),
})
if err != nil {
s.Logger.Error("provisioner: failed to update", zap.String("deployment_id", depl.ID), zap.String("provisioner", depl.Provisioner), zap.String("provision_id", depl.ProvisionID), zap.Error(err), observability.ZapCtx(ctx))
return err
}

// Wait for the runtime to be ready after update
err = p.AwaitReady(ctx, depl.ProvisionID)
if err != nil {
s.Logger.Error("provisioner: failed awaiting runtime to be ready after update", zap.String("deployment_id", depl.ID), zap.String("provisioner", depl.Provisioner), zap.String("provision_id", depl.ProvisionID), zap.Error(err), observability.ZapCtx(ctx))
// Mark deployment error
_, err2 := s.DB.UpdateDeploymentStatus(ctx, depl.ID, database.DeploymentStatusError, err.Error())
return multierr.Combine(err, err2)
}
// Wait for the runtime to be ready after update
err = p.AwaitReady(ctx, depl.ProvisionID)
if err != nil {
s.Logger.Error("provisioner: failed awaiting runtime to be ready after update", zap.String("deployment_id", depl.ID), zap.String("provisioner", depl.Provisioner), zap.String("provision_id", depl.ProvisionID), zap.Error(err), observability.ZapCtx(ctx))
// Mark deployment error
_, err2 := s.DB.UpdateDeploymentStatus(ctx, depl.ID, database.DeploymentStatusError, err.Error())
return multierr.Combine(err, err2)
}

// Update the deployment runtime version
_, err = s.DB.UpdateDeploymentRuntimeVersion(ctx, depl.ID, opts.Version)
if err != nil {
// NOTE: If the update was triggered by a scheduled job like 'upgrade_latest_version_projects',
// then this error will cause the update to be retried on the next job invocation and it should eventually become consistent.
// Update the deployment runtime version
_, err = s.DB.UpdateDeploymentRuntimeVersion(ctx, depl.ID, opts.Version)
if err != nil {
// NOTE: If the update was triggered by a scheduled job like 'validate_deployments',
// then this error will cause the update to be retried on the next job invocation and it should eventually become consistent.

// TODO: Handle inconsistent state when a manually triggered update failed, where we can't rely on job retries.
return err
}
// TODO: Handle inconsistent state when a manually triggered update failed, where we can't rely on job retries.
return err
}

rt, err := s.OpenRuntimeClient(depl)
Expand Down
14 changes: 10 additions & 4 deletions admin/jobs/river/validate_deployments.go
Original file line number Diff line number Diff line change
Expand Up @@ -103,14 +103,20 @@ func (w *ValidateDeploymentsWorker) reconcileAllDeploymentsForProject(ctx contex
return err
}

// Trigger a redeploy if config is no longer valid
// Trigger re-provision if config is no longer valid
if !v {
w.admin.Logger.Info("validate deployments: config no longer valid, triggering redeploy", zap.String("organization_id", org.ID), zap.String("project_id", proj.ID), zap.String("deployment_id", depl.ID), observability.ZapCtx(ctx))
_, err = w.admin.RedeployProject(ctx, proj, depl)
w.admin.Logger.Info("validate deployments: config no longer valid, triggering re-provision", zap.String("organization_id", org.ID), zap.String("project_id", proj.ID), zap.String("deployment_id", depl.ID), observability.ZapCtx(ctx))
err = w.admin.UpdateDeployment(ctx, depl, &admin.UpdateDeploymentOptions{
Version: depl.RuntimeVersion,
Branch: depl.Branch,
Variables: proj.ProdVariables,
Annotations: w.admin.NewDeploymentAnnotations(org, proj),
EvictCachedRepo: false,
})
if err != nil {
return err
}
w.admin.Logger.Info("validate deployments: redeployed", zap.String("organization_id", org.ID), zap.String("project_id", proj.ID), observability.ZapCtx(ctx))
w.admin.Logger.Info("validate deployments: re-provisioned", zap.String("organization_id", org.ID), zap.String("project_id", proj.ID), observability.ZapCtx(ctx))
continue
}

Expand Down
45 changes: 26 additions & 19 deletions admin/provisioner/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,8 @@ The provisioner is configured using `RILL_ADMIN_PROVISIONER_SET_JSON` with a nam
"http_ingress": "templates/http_ingress.yaml", // Ingress resource template for HTTP
"grpc_ingress": "templates/grpc_ingress.yaml", // Ingress resource template for GRCP
"service": "templates/service.yaml", // Service resource template
"statefulset": "templates/statefulset.yaml" // Statefulset resource template
"deployment": "templates/deployment.yaml", // Deployment resource template
"pvc": "templates/pvc.yaml" // PVC resource template
}
}
}
Expand All @@ -55,35 +56,28 @@ The Kubernetes resource templates provides a high level of flexibility, but they

Note: For internal Rill users refer to our private infra repos containing environment specific configurations and templates.

### statefulset.yaml
### deployment.yaml
```
apiVersion: apps/v1
kind: StatefulSet
kind: Deployment
spec:
persistentVolumeClaimRetentionPolicy:
whenDeleted: Delete
whenScaled: Retain
volumeClaimTemplates:
- metadata:
name: data
spec:
accessModes:
- ReadWriteOnce
resources:
requests:
storage: {{ .StorageBytes }}
replicas: 1
strategy:
type: Recreate
selector:
matchLabels:
app.kubernetes.io/name: {{ .Names.StatefulSet }}
serviceName: cloud-runtime
app.kubernetes.io/name: {{ .Names.Deployment }}
template:
metadata:
labels:
app.kubernetes.io/name: {{ .Names.StatefulSet }}
app.kubernetes.io/name: {{ .Names.Deployment }}
spec:
securityContext:
fsGroup: 1000
volumes:
- name: data
persistentVolumeClaim:
claimName: {{ .Names.PVC }}
containers:
- args:
- runtime
Expand Down Expand Up @@ -153,7 +147,7 @@ spec:
port: 9090
targetPort: 9090
selector:
app.kubernetes.io/name: {{ .Names.StatefulSet }}
app.kubernetes.io/name: {{ .Names.Deployment }}
```

### grpc_ingress.yaml
Expand Down Expand Up @@ -197,3 +191,16 @@ spec:
path: /v1
pathType: Prefix
```

### pvc.yaml
```
apiVersion: v1
kind: PersistentVolumeClaim
spec:
accessModes:
- ReadWriteOnce
resources:
requests:
storage: {{ .StorageBytes }}
storageClassName: storageclass-example
```
Loading

0 comments on commit 49e2162

Please sign in to comment.