Skip to content

Commit

Permalink
Simplify interpretation of produced metrics
Browse files Browse the repository at this point in the history
* Add pre-computed phase durations

* Introduce JSON specs for metric output
  • Loading branch information
Izzette committed Feb 6, 2024
1 parent 607a3ee commit 5942e0e
Show file tree
Hide file tree
Showing 7 changed files with 275 additions and 40 deletions.
3 changes: 3 additions & 0 deletions cmd/kube-transition-metrics/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ import (
//nolint:gosec
_ "net/http/pprof"
"os"
"time"

"github.com/BackMarket-oss/kube-transition-metrics/internal/options"
"github.com/BackMarket-oss/kube-transition-metrics/internal/prommetrics"
Expand Down Expand Up @@ -65,6 +66,8 @@ func main() {

options := options.Parse()
zerolog.SetGlobalLevel(options.LogLevel)
zerolog.DurationFieldInteger = false
zerolog.DurationFieldUnit = time.Second

config := getKubeconfig(options)
clientset, err := kubernetes.NewForConfig(config)
Expand Down
51 changes: 43 additions & 8 deletions internal/statistics/container_statistics.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,9 @@ type containerStatistic struct {
pod *podStatistic
imagePull imagePullStatistic

// Previous init container, null if first init container or non-init container
previous *containerStatistic

// The timestamp for when the container first turned Running.
runningTimestamp time.Time

Expand Down Expand Up @@ -46,21 +49,53 @@ func (cs containerStatistic) logger() zerolog.Logger {
Logger()
}

func (cs containerStatistic) appendInitFields(event *zerolog.Event) {
if !cs.runningTimestamp.IsZero() && cs.previous != nil && !cs.previous.readyTimestamp.IsZero() {
event.Dur("previous_to_running_seconds", cs.runningTimestamp.Sub(cs.previous.readyTimestamp))
}
}

func (cs containerStatistic) appendNonInitFields(event *zerolog.Event) {
if !cs.runningTimestamp.IsZero() && !cs.pod.initializingTimestamp.IsZero() {
event.Dur("initialized_to_running_seconds", cs.runningTimestamp.Sub(cs.pod.initializingTimestamp))
}
}

func (cs containerStatistic) event() *zerolog.Event {
event := zerolog.Dict()

event.Bool("init_container", cs.initContainer)
if cs.initContainer {
cs.appendInitFields(event)
} else {
cs.appendNonInitFields(event)
}

if !cs.runningTimestamp.IsZero() {
event.Time("running_timestamp", cs.runningTimestamp)
}
if !cs.startedTimestamp.IsZero() {
event.Float64("started_latency",
cs.startedTimestamp.Sub(cs.pod.creationTimestamp).Seconds())
event.Time("started_timestamp", cs.startedTimestamp)
if !cs.runningTimestamp.IsZero() {
event.Dur("running_to_started_seconds", cs.startedTimestamp.Sub(cs.runningTimestamp))
}
}
if !cs.readyTimestamp.IsZero() {
event.Float64("ready_latency",
cs.readyTimestamp.Sub(cs.pod.creationTimestamp).Seconds())
}
if !cs.runningTimestamp.IsZero() {
event.Float64("running_latency",
cs.runningTimestamp.Sub(cs.pod.creationTimestamp).Seconds())
event.Time("ready_timestamp", cs.readyTimestamp)

// As init containers do not supported startup, liveliness, or readiness probes the Started container status field is
// not set for init containers.
// Instead, readiness represents the time an init container has excited successfully,allowing the following containers
// to proceed.
// Given this, presenting both running_to_ready_seconds and started_to_ready_seconds is useful to cover the differing
// meanings for both container types.
// See: https://github.com/kubernetes/website/blob/b397a8f/content/en/docs/concepts/workloads/pods/init-containers.md
if !cs.runningTimestamp.IsZero() {
event.Dur("running_to_ready_seconds", cs.readyTimestamp.Sub(cs.runningTimestamp))
}
if !cs.startedTimestamp.IsZero() {
event.Dur("started_to_ready_seconds", cs.readyTimestamp.Sub(cs.runningTimestamp))
}
}

return event
Expand Down
10 changes: 5 additions & 5 deletions internal/statistics/image_pull_collector.go
Original file line number Diff line number Diff line change
Expand Up @@ -135,11 +135,11 @@ func (ev imagePullingEvent) handle(statistic *podStatistic) bool {
}
imagePullStatistic := &containerStatistic.imagePull

if !imagePullStatistic.finishedAt.IsZero() {
if !imagePullStatistic.finishedTimestamp.IsZero() {
logger.Debug().Str("container_name", ev.containerName).
Msg("Skipping event for initialized pod")
} else if imagePullStatistic.startedAt.IsZero() {
imagePullStatistic.startedAt = ev.event.FirstTimestamp.Time
} else if imagePullStatistic.startedTimestamp.IsZero() {
imagePullStatistic.startedTimestamp = ev.event.FirstTimestamp.Time
}

return false
Expand Down Expand Up @@ -194,8 +194,8 @@ func (ev imagePulledEvent) handle(statistic *podStatistic) bool {
}
imagePullStatistic := &containerStatistic.imagePull

if imagePullStatistic.finishedAt.IsZero() {
imagePullStatistic.finishedAt = ev.event.LastTimestamp.Time
if imagePullStatistic.finishedTimestamp.IsZero() {
imagePullStatistic.finishedTimestamp = ev.event.LastTimestamp.Time
}

imagePullStatistic.log(ev.event.Message)
Expand Down
19 changes: 11 additions & 8 deletions internal/statistics/image_pull_statistics.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,18 +9,21 @@ import (
type imagePullStatistic struct {
container *containerStatistic

startedAt time.Time
finishedAt time.Time
startedTimestamp time.Time
finishedTimestamp time.Time
}

func (s imagePullStatistic) log(message string) {
metrics := zerolog.Dict()
metrics.Bool("init_container", s.container.initContainer)
if !s.finishedAt.IsZero() && !s.startedAt.IsZero() {
metrics.Float64(
"image_pull_duration",
s.finishedAt.Sub(s.startedAt).Seconds(),
)

if !s.startedTimestamp.IsZero() {
metrics.Time("started_timestamp", s.startedTimestamp)
}
if !s.finishedTimestamp.IsZero() {
metrics.Time("finished_timestamp", s.finishedTimestamp)
if !s.startedTimestamp.IsZero() {
metrics.Dur("duration_seconds", s.finishedTimestamp.Sub(s.startedTimestamp))
}
}

logger :=
Expand Down
41 changes: 24 additions & 17 deletions internal/statistics/pod_statistics.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,10 +22,10 @@ type podStatistic struct {
creationTimestamp time.Time

// The timestamp for when the pod was scheduled.
scheduledTimestamp time.Time
initializingTimestamp time.Time

// The timestamp for when the pod was initialized.
initializedTimestamp time.Time
runningTimestamp time.Time

// The timestamp for when the pod first turned Ready.
readyTimestamp time.Time
Expand All @@ -46,8 +46,11 @@ func (s *podStatistic) initialize(pod *corev1.Pod) {
s.initContainers = make(map[string]*containerStatistic)
s.containers = make(map[string]*containerStatistic)

var previous *containerStatistic
for _, container := range pod.Spec.InitContainers {
s.initContainers[container.Name] = newContainerStatistic(s, true, container)
s.initContainers[container.Name].previous = previous
previous = s.initContainers[container.Name]
}
for _, container := range pod.Spec.Containers {
s.containers[container.Name] = newContainerStatistic(s, false, container)
Expand All @@ -64,20 +67,24 @@ func (s podStatistic) logger() zerolog.Logger {
func (s podStatistic) event() *zerolog.Event {
event := zerolog.Dict()

if !s.scheduledTimestamp.IsZero() {
event.Float64(
"scheduled_latency",
s.scheduledTimestamp.Sub(s.creationTimestamp).Seconds())
event.Time("creation_timestamp", s.creationTimestamp)
if !s.initializingTimestamp.IsZero() {
event.Time("initializing_timestamp", s.initializingTimestamp)
event.Dur("creation_to_initializing_seconds", s.initializingTimestamp.Sub(s.creationTimestamp))
}
if !s.initializedTimestamp.IsZero() {
event.Float64(
"initialized_latency",
s.initializedTimestamp.Sub(s.creationTimestamp).Seconds())
if !s.runningTimestamp.IsZero() {
event.Time("running_timestamp", s.runningTimestamp)
event.Dur("creation_to_running_seconds", s.runningTimestamp.Sub(s.creationTimestamp))
if !s.initializingTimestamp.IsZero() {
event.Dur("initializing_to_running_seconds", s.runningTimestamp.Sub(s.initializingTimestamp))
}
}
if !s.readyTimestamp.IsZero() {
event.Float64(
"ready_latency",
s.readyTimestamp.Sub(s.creationTimestamp).Seconds())
event.Time("ready_timestamp", s.readyTimestamp)
event.Dur("creation_to_ready_seconds", s.readyTimestamp.Sub(s.creationTimestamp))
if !s.runningTimestamp.IsZero() {
event.Dur("running_to_ready_seconds", s.readyTimestamp.Sub(s.runningTimestamp))
}
}

return event
Expand Down Expand Up @@ -112,16 +119,16 @@ func (s *podStatistic) update(pod *corev1.Pod) {
//nolint:exhaustive
switch condition.Type {
case corev1.PodScheduled:
if s.scheduledTimestamp.IsZero() {
s.scheduledTimestamp = condition.LastTransitionTime.Time
if s.initializingTimestamp.IsZero() {
s.initializingTimestamp = condition.LastTransitionTime.Time
}
case corev1.PodInitialized:
// Pod Initialized occursafter all images pulled, no need to continue to
// track

if s.initializedTimestamp.IsZero() {
if s.runningTimestamp.IsZero() {
go s.imagePullCollector.cancel("pod_initialized")
s.initializedTimestamp = condition.LastTransitionTime.Time
s.runningTimestamp = condition.LastTransitionTime.Time
}
case corev1.PodReady:
if s.readyTimestamp.IsZero() {
Expand Down
4 changes: 2 additions & 2 deletions internal/statistics/statistics_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -118,9 +118,9 @@ func TestPodStatisticUpdate(t *testing.T) {
// Update the pod statistic for the "new" state
stat.update(pod)

assert.NotZero(t, stat.scheduledTimestamp, "scheduledTimestamp was not set")
assert.NotZero(t, stat.initializingTimestamp, "scheduledTimestamp was not set")
assert.NotZero(
t, stat.initializedTimestamp, "initializedTimestamp was not set")
t, stat.runningTimestamp, "initializedTimestamp was not set")
assert.NotEmpty(t, stat.containers, "containers map was not populated")

// Check that the imagePullCollector would have been canceled for the right
Expand Down
Loading

0 comments on commit 5942e0e

Please sign in to comment.