Skip to content

Commit

Permalink
#101193 Preserve Step Info Across ILM Auto Retries (#113187) (#113786)
Browse files Browse the repository at this point in the history
* Add new Previous Step Info field to LifecycleExecutionState

* Add new field to IndexLifecycleExplainResponse

* Add new field to TransportExplainLifecycleAction

* Add logic to IndexLifecycleTransition to keep previous setp info

* Switch tests to use Java standard Clock class

for any time based testing, this is the recommended method

* Fix tests for new field

Also refactor tests to newer style

* Add test to ensure step info is preserved

Across auto retries

* Add docs for new field

* Changelog Entry

* Update docs/changelog/113187.yaml

* Revert "Switch tests to use Java standard Clock class"

This reverts commit 241074c.

* PR Changes

* PR Changes - Improve docs wording



* Integration test for new ILM explain field

* Use ROOT locale instead of default toLowerCase

* PR Changes - Switch to block strings

* Remove forbidden API usage

---------

Co-authored-by: Mary Gouseti <[email protected]>
  • Loading branch information
lukewhiting and gmarouli committed Sep 30, 2024
1 parent bbc3202 commit c6e3397
Show file tree
Hide file tree
Showing 11 changed files with 210 additions and 113 deletions.
5 changes: 5 additions & 0 deletions docs/changelog/113187.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
pr: 113187
summary: Preserve Step Info Across ILM Auto Retries
area: ILM+SLM
type: enhancement
issues: []
7 changes: 7 additions & 0 deletions docs/reference/ilm/apis/explain.asciidoc
Original file line number Diff line number Diff line change
Expand Up @@ -303,6 +303,12 @@ the case.
"index_uuid": "H7lF9n36Rzqa-KfKcnGQMg",
"index": "test-000057"
},
"previous_step_info": { <5>
"type": "cluster_block_exception",
"reason": "index [test-000057/H7lF9n36Rzqa-KfKcnGQMg] blocked by: [FORBIDDEN/5/index read-only (api)",
"index_uuid": "H7lF9n36Rzqa-KfKcnGQMg",
"index": "test-000057"
},
"phase_execution": {
"policy": "my_lifecycle3",
"phase_definition": {
Expand All @@ -329,3 +335,4 @@ is true, {ilm-init} will retry the failed step automatically.
<3> Shows the number of attempted automatic retries to execute the failed
step.
<4> What went wrong
<5> Contains a copy of the `step_info` field (when it exists) of the last attempted or executed step for diagnostic purposes, since the `step_info` is overwritten during each new attempt.
Original file line number Diff line number Diff line change
Expand Up @@ -226,6 +226,7 @@ static TransportVersion def(int id) {
public static final TransportVersion SEMANTIC_TEXT_SEARCH_INFERENCE_ID = def(8_750_00_0);
public static final TransportVersion ML_INFERENCE_CHUNKING_SETTINGS = def(8_751_00_0);
public static final TransportVersion SEMANTIC_QUERY_INNER_HITS = def(8_752_00_0);
public static final TransportVersion RETAIN_ILM_STEP_INFO = def(8_753_00_0);

/*
* STOP! READ THIS FIRST! No, really,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ public record LifecycleExecutionState(
Boolean isAutoRetryableError,
Integer failedStepRetryCount,
String stepInfo,
String previousStepInfo,
String phaseDefinition,
Long lifecycleDate,
Long phaseTime,
Expand All @@ -53,6 +54,7 @@ public record LifecycleExecutionState(
private static final String IS_AUTO_RETRYABLE_ERROR = "is_auto_retryable_error";
private static final String FAILED_STEP_RETRY_COUNT = "failed_step_retry_count";
private static final String STEP_INFO = "step_info";
private static final String PREVIOUS_STEP_INFO = "previous_step_info";
private static final String PHASE_DEFINITION = "phase_definition";
private static final String SNAPSHOT_NAME = "snapshot_name";
private static final String SNAPSHOT_REPOSITORY = "snapshot_repository";
Expand All @@ -74,6 +76,7 @@ public static Builder builder(LifecycleExecutionState state) {
.setIsAutoRetryableError(state.isAutoRetryableError)
.setFailedStepRetryCount(state.failedStepRetryCount)
.setStepInfo(state.stepInfo)
.setPreviousStepInfo(state.previousStepInfo)
.setPhaseDefinition(state.phaseDefinition)
.setIndexCreationDate(state.lifecycleDate)
.setPhaseTime(state.phaseTime)
Expand Down Expand Up @@ -116,6 +119,10 @@ public static LifecycleExecutionState fromCustomMetadata(Map<String, String> cus
if (stepInfo != null) {
builder.setStepInfo(stepInfo);
}
String previousStepInfo = customData.get(PREVIOUS_STEP_INFO);
if (previousStepInfo != null) {
builder.setPreviousStepInfo(previousStepInfo);
}
String phaseDefinition = customData.get(PHASE_DEFINITION);
if (phaseDefinition != null) {
builder.setPhaseDefinition(phaseDefinition);
Expand Down Expand Up @@ -224,6 +231,9 @@ public Map<String, String> asMap() {
if (stepInfo != null) {
result.put(STEP_INFO, stepInfo);
}
if (previousStepInfo != null) {
result.put(PREVIOUS_STEP_INFO, previousStepInfo);
}
if (lifecycleDate != null) {
result.put(INDEX_CREATION_DATE, String.valueOf(lifecycleDate));
}
Expand Down Expand Up @@ -263,6 +273,7 @@ public static class Builder {
private String step;
private String failedStep;
private String stepInfo;
private String previousStepInfo;
private String phaseDefinition;
private Long indexCreationDate;
private Long phaseTime;
Expand Down Expand Up @@ -301,6 +312,11 @@ public Builder setStepInfo(String stepInfo) {
return this;
}

public Builder setPreviousStepInfo(String previousStepInfo) {
this.previousStepInfo = previousStepInfo;
return this;
}

public Builder setPhaseDefinition(String phaseDefinition) {
this.phaseDefinition = phaseDefinition;
return this;
Expand Down Expand Up @@ -370,6 +386,7 @@ public LifecycleExecutionState build() {
isAutoRetryableError,
failedStepRetryCount,
stepInfo,
previousStepInfo,
phaseDefinition,
indexCreationDate,
phaseTime,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ public class IndexLifecycleExplainResponse implements ToXContentObject, Writeabl
private static final ParseField STEP_TIME_MILLIS_FIELD = new ParseField("step_time_millis");
private static final ParseField STEP_TIME_FIELD = new ParseField("step_time");
private static final ParseField STEP_INFO_FIELD = new ParseField("step_info");
private static final ParseField PREVIOUS_STEP_INFO_FIELD = new ParseField("previous_step_info");
private static final ParseField PHASE_EXECUTION_INFO = new ParseField("phase_execution");
private static final ParseField AGE_FIELD = new ParseField("age");
private static final ParseField TIME_SINCE_INDEX_CREATION_FIELD = new ParseField("time_since_index_creation");
Expand Down Expand Up @@ -76,6 +77,7 @@ public class IndexLifecycleExplainResponse implements ToXContentObject, Writeabl
(String) a[17],
(String) a[18],
(BytesReference) a[11],
(BytesReference) a[21],
(PhaseExecutionInfo) a[12]
// a[13] == "age"
// a[20] == "time_since_index_creation"
Expand Down Expand Up @@ -111,6 +113,11 @@ public class IndexLifecycleExplainResponse implements ToXContentObject, Writeabl
PARSER.declareString(ConstructingObjectParser.optionalConstructorArg(), SHRINK_INDEX_NAME);
PARSER.declareLong(ConstructingObjectParser.optionalConstructorArg(), INDEX_CREATION_DATE_MILLIS_FIELD);
PARSER.declareString(ConstructingObjectParser.optionalConstructorArg(), TIME_SINCE_INDEX_CREATION_FIELD);
PARSER.declareObject(ConstructingObjectParser.optionalConstructorArg(), (p, c) -> {
XContentBuilder builder = JsonXContent.contentBuilder();
builder.copyCurrentStructure(p);
return BytesReference.bytes(builder);
}, PREVIOUS_STEP_INFO_FIELD);
}

private final String index;
Expand All @@ -126,6 +133,7 @@ public class IndexLifecycleExplainResponse implements ToXContentObject, Writeabl
private final Long stepTime;
private final boolean managedByILM;
private final BytesReference stepInfo;
private final BytesReference previousStepInfo;
private final PhaseExecutionInfo phaseExecutionInfo;
private final Boolean isAutoRetryableError;
private final Integer failedStepRetryCount;
Expand Down Expand Up @@ -153,6 +161,7 @@ public static IndexLifecycleExplainResponse newManagedIndexResponse(
String snapshotName,
String shrinkIndexName,
BytesReference stepInfo,
BytesReference previousStepInfo,
PhaseExecutionInfo phaseExecutionInfo
) {
return new IndexLifecycleExplainResponse(
Expand All @@ -174,6 +183,7 @@ public static IndexLifecycleExplainResponse newManagedIndexResponse(
snapshotName,
shrinkIndexName,
stepInfo,
previousStepInfo,
phaseExecutionInfo
);
}
Expand All @@ -198,6 +208,7 @@ public static IndexLifecycleExplainResponse newUnmanagedIndexResponse(String ind
null,
null,
null,
null,
null
);
}
Expand All @@ -221,6 +232,7 @@ private IndexLifecycleExplainResponse(
String snapshotName,
String shrinkIndexName,
BytesReference stepInfo,
BytesReference previousStepInfo,
PhaseExecutionInfo phaseExecutionInfo
) {
if (managedByILM) {
Expand Down Expand Up @@ -262,6 +274,7 @@ private IndexLifecycleExplainResponse(
|| actionTime != null
|| stepTime != null
|| stepInfo != null
|| previousStepInfo != null
|| phaseExecutionInfo != null) {
throw new IllegalArgumentException(
"Unmanaged index response must only contain fields: [" + MANAGED_BY_ILM_FIELD + ", " + INDEX_FIELD + "]"
Expand All @@ -283,6 +296,7 @@ private IndexLifecycleExplainResponse(
this.isAutoRetryableError = isAutoRetryableError;
this.failedStepRetryCount = failedStepRetryCount;
this.stepInfo = stepInfo;
this.previousStepInfo = previousStepInfo;
this.phaseExecutionInfo = phaseExecutionInfo;
this.repositoryName = repositoryName;
this.snapshotName = snapshotName;
Expand Down Expand Up @@ -314,6 +328,11 @@ public IndexLifecycleExplainResponse(StreamInput in) throws IOException {
} else {
indexCreationDate = null;
}
if (in.getTransportVersion().onOrAfter(TransportVersions.RETAIN_ILM_STEP_INFO)) {
previousStepInfo = in.readOptionalBytesReference();
} else {
previousStepInfo = null;
}
} else {
policyName = null;
lifecycleDate = null;
Expand All @@ -327,6 +346,7 @@ public IndexLifecycleExplainResponse(StreamInput in) throws IOException {
actionTime = null;
stepTime = null;
stepInfo = null;
previousStepInfo = null;
phaseExecutionInfo = null;
repositoryName = null;
snapshotName = null;
Expand Down Expand Up @@ -359,6 +379,9 @@ public void writeTo(StreamOutput out) throws IOException {
if (out.getTransportVersion().onOrAfter(TransportVersions.V_8_1_0)) {
out.writeOptionalLong(indexCreationDate);
}
if (out.getTransportVersion().onOrAfter(TransportVersions.RETAIN_ILM_STEP_INFO)) {
out.writeOptionalBytesReference(previousStepInfo);
}
}
}

Expand Down Expand Up @@ -422,6 +445,10 @@ public BytesReference getStepInfo() {
return stepInfo;
}

public BytesReference getPreviousStepInfo() {
return previousStepInfo;
}

public PhaseExecutionInfo getPhaseExecutionInfo() {
return phaseExecutionInfo;
}
Expand Down Expand Up @@ -515,6 +542,9 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws
if (stepInfo != null && stepInfo.length() > 0) {
builder.rawField(STEP_INFO_FIELD.getPreferredName(), stepInfo.streamInput(), XContentType.JSON);
}
if (previousStepInfo != null && previousStepInfo.length() > 0) {
builder.rawField(PREVIOUS_STEP_INFO_FIELD.getPreferredName(), previousStepInfo.streamInput(), XContentType.JSON);
}
if (phaseExecutionInfo != null) {
builder.field(PHASE_EXECUTION_INFO.getPreferredName(), phaseExecutionInfo);
}
Expand Down Expand Up @@ -544,6 +574,7 @@ public int hashCode() {
snapshotName,
shrinkIndexName,
stepInfo,
previousStepInfo,
phaseExecutionInfo
);
}
Expand Down Expand Up @@ -575,6 +606,7 @@ public boolean equals(Object obj) {
&& Objects.equals(snapshotName, other.snapshotName)
&& Objects.equals(shrinkIndexName, other.shrinkIndexName)
&& Objects.equals(stepInfo, other.stepInfo)
&& Objects.equals(previousStepInfo, other.previousStepInfo)
&& Objects.equals(phaseExecutionInfo, other.phaseExecutionInfo);
}

Expand Down
Loading

0 comments on commit c6e3397

Please sign in to comment.