From a9c23104c20e06566a4e5c7d577f36c7076113c2 Mon Sep 17 00:00:00 2001 From: Luke Whiting Date: Mon, 30 Sep 2024 11:44:46 +0100 Subject: [PATCH] #101193 Preserve Step Info Across ILM Auto Retries (#113187) * Add new Previous Step Info field to LifecycleExecutionState * Add new field to IndexLifecycleExplainResponse * Add new field to TransportExplainLifecycleAction * Add logic to IndexLifecycleTransition to keep previous setp info * Switch tests to use Java standard Clock class for any time based testing, this is the recommended method * Fix tests for new field Also refactor tests to newer style * Add test to ensure step info is preserved Across auto retries * Add docs for new field * Changelog Entry * Update docs/changelog/113187.yaml * Revert "Switch tests to use Java standard Clock class" This reverts commit 241074c735fc46d6cf9d7a0eb25037e3d0f87785. * PR Changes * PR Changes - Improve docs wording Co-authored-by: Mary Gouseti * Integration test for new ILM explain field * Use ROOT locale instead of default toLowerCase * PR Changes - Switch to block strings * Remove forbidden API usage --------- Co-authored-by: Mary Gouseti --- docs/changelog/113187.yaml | 5 + docs/reference/ilm/apis/explain.asciidoc | 7 ++ .../org/elasticsearch/TransportVersions.java | 1 + .../metadata/LifecycleExecutionState.java | 17 ++++ .../ilm/IndexLifecycleExplainResponse.java | 32 ++++++ .../IndexLifecycleExplainResponseTests.java | 91 +++++++++-------- .../ilm/LifecycleExecutionStateTests.java | 97 ++++++------------- .../xpack/ilm/ExplainLifecycleIT.java | 60 ++++++++++++ .../xpack/ilm/IndexLifecycleTransition.java | 2 + .../TransportExplainLifecycleAction.java | 6 ++ .../ilm/IndexLifecycleTransitionTests.java | 5 +- 11 files changed, 210 insertions(+), 113 deletions(-) create mode 100644 docs/changelog/113187.yaml diff --git a/docs/changelog/113187.yaml b/docs/changelog/113187.yaml new file mode 100644 index 0000000000000..397179c4bc3bb --- /dev/null +++ b/docs/changelog/113187.yaml @@ -0,0 +1,5 @@ +pr: 113187 +summary: Preserve Step Info Across ILM Auto Retries +area: ILM+SLM +type: enhancement +issues: [] diff --git a/docs/reference/ilm/apis/explain.asciidoc b/docs/reference/ilm/apis/explain.asciidoc index a1ddde8c9f2d9..31c6ae9e82ec7 100644 --- a/docs/reference/ilm/apis/explain.asciidoc +++ b/docs/reference/ilm/apis/explain.asciidoc @@ -303,6 +303,12 @@ the case. "index_uuid": "H7lF9n36Rzqa-KfKcnGQMg", "index": "test-000057" }, + "previous_step_info": { <5> + "type": "cluster_block_exception", + "reason": "index [test-000057/H7lF9n36Rzqa-KfKcnGQMg] blocked by: [FORBIDDEN/5/index read-only (api)", + "index_uuid": "H7lF9n36Rzqa-KfKcnGQMg", + "index": "test-000057" + }, "phase_execution": { "policy": "my_lifecycle3", "phase_definition": { @@ -329,3 +335,4 @@ is true, {ilm-init} will retry the failed step automatically. <3> Shows the number of attempted automatic retries to execute the failed step. <4> What went wrong +<5> Contains a copy of the `step_info` field (when it exists) of the last attempted or executed step for diagnostic purposes, since the `step_info` is overwritten during each new attempt. diff --git a/server/src/main/java/org/elasticsearch/TransportVersions.java b/server/src/main/java/org/elasticsearch/TransportVersions.java index e05aedb81e28d..0e55ce1ee17f0 100644 --- a/server/src/main/java/org/elasticsearch/TransportVersions.java +++ b/server/src/main/java/org/elasticsearch/TransportVersions.java @@ -226,6 +226,7 @@ static TransportVersion def(int id) { public static final TransportVersion SEMANTIC_TEXT_SEARCH_INFERENCE_ID = def(8_750_00_0); public static final TransportVersion ML_INFERENCE_CHUNKING_SETTINGS = def(8_751_00_0); public static final TransportVersion SEMANTIC_QUERY_INNER_HITS = def(8_752_00_0); + public static final TransportVersion RETAIN_ILM_STEP_INFO = def(8_753_00_0); /* * STOP! READ THIS FIRST! No, really, diff --git a/server/src/main/java/org/elasticsearch/cluster/metadata/LifecycleExecutionState.java b/server/src/main/java/org/elasticsearch/cluster/metadata/LifecycleExecutionState.java index b88b5086980d1..abc0983ccb2d4 100644 --- a/server/src/main/java/org/elasticsearch/cluster/metadata/LifecycleExecutionState.java +++ b/server/src/main/java/org/elasticsearch/cluster/metadata/LifecycleExecutionState.java @@ -28,6 +28,7 @@ public record LifecycleExecutionState( Boolean isAutoRetryableError, Integer failedStepRetryCount, String stepInfo, + String previousStepInfo, String phaseDefinition, Long lifecycleDate, Long phaseTime, @@ -53,6 +54,7 @@ public record LifecycleExecutionState( private static final String IS_AUTO_RETRYABLE_ERROR = "is_auto_retryable_error"; private static final String FAILED_STEP_RETRY_COUNT = "failed_step_retry_count"; private static final String STEP_INFO = "step_info"; + private static final String PREVIOUS_STEP_INFO = "previous_step_info"; private static final String PHASE_DEFINITION = "phase_definition"; private static final String SNAPSHOT_NAME = "snapshot_name"; private static final String SNAPSHOT_REPOSITORY = "snapshot_repository"; @@ -74,6 +76,7 @@ public static Builder builder(LifecycleExecutionState state) { .setIsAutoRetryableError(state.isAutoRetryableError) .setFailedStepRetryCount(state.failedStepRetryCount) .setStepInfo(state.stepInfo) + .setPreviousStepInfo(state.previousStepInfo) .setPhaseDefinition(state.phaseDefinition) .setIndexCreationDate(state.lifecycleDate) .setPhaseTime(state.phaseTime) @@ -116,6 +119,10 @@ public static LifecycleExecutionState fromCustomMetadata(Map cus if (stepInfo != null) { builder.setStepInfo(stepInfo); } + String previousStepInfo = customData.get(PREVIOUS_STEP_INFO); + if (previousStepInfo != null) { + builder.setPreviousStepInfo(previousStepInfo); + } String phaseDefinition = customData.get(PHASE_DEFINITION); if (phaseDefinition != null) { builder.setPhaseDefinition(phaseDefinition); @@ -224,6 +231,9 @@ public Map asMap() { if (stepInfo != null) { result.put(STEP_INFO, stepInfo); } + if (previousStepInfo != null) { + result.put(PREVIOUS_STEP_INFO, previousStepInfo); + } if (lifecycleDate != null) { result.put(INDEX_CREATION_DATE, String.valueOf(lifecycleDate)); } @@ -263,6 +273,7 @@ public static class Builder { private String step; private String failedStep; private String stepInfo; + private String previousStepInfo; private String phaseDefinition; private Long indexCreationDate; private Long phaseTime; @@ -301,6 +312,11 @@ public Builder setStepInfo(String stepInfo) { return this; } + public Builder setPreviousStepInfo(String previousStepInfo) { + this.previousStepInfo = previousStepInfo; + return this; + } + public Builder setPhaseDefinition(String phaseDefinition) { this.phaseDefinition = phaseDefinition; return this; @@ -370,6 +386,7 @@ public LifecycleExecutionState build() { isAutoRetryableError, failedStepRetryCount, stepInfo, + previousStepInfo, phaseDefinition, indexCreationDate, phaseTime, diff --git a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ilm/IndexLifecycleExplainResponse.java b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ilm/IndexLifecycleExplainResponse.java index c3c9fa88a1a96..9c679cd04c94d 100644 --- a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ilm/IndexLifecycleExplainResponse.java +++ b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ilm/IndexLifecycleExplainResponse.java @@ -48,6 +48,7 @@ public class IndexLifecycleExplainResponse implements ToXContentObject, Writeabl private static final ParseField STEP_TIME_MILLIS_FIELD = new ParseField("step_time_millis"); private static final ParseField STEP_TIME_FIELD = new ParseField("step_time"); private static final ParseField STEP_INFO_FIELD = new ParseField("step_info"); + private static final ParseField PREVIOUS_STEP_INFO_FIELD = new ParseField("previous_step_info"); private static final ParseField PHASE_EXECUTION_INFO = new ParseField("phase_execution"); private static final ParseField AGE_FIELD = new ParseField("age"); private static final ParseField TIME_SINCE_INDEX_CREATION_FIELD = new ParseField("time_since_index_creation"); @@ -76,6 +77,7 @@ public class IndexLifecycleExplainResponse implements ToXContentObject, Writeabl (String) a[17], (String) a[18], (BytesReference) a[11], + (BytesReference) a[21], (PhaseExecutionInfo) a[12] // a[13] == "age" // a[20] == "time_since_index_creation" @@ -111,6 +113,11 @@ public class IndexLifecycleExplainResponse implements ToXContentObject, Writeabl PARSER.declareString(ConstructingObjectParser.optionalConstructorArg(), SHRINK_INDEX_NAME); PARSER.declareLong(ConstructingObjectParser.optionalConstructorArg(), INDEX_CREATION_DATE_MILLIS_FIELD); PARSER.declareString(ConstructingObjectParser.optionalConstructorArg(), TIME_SINCE_INDEX_CREATION_FIELD); + PARSER.declareObject(ConstructingObjectParser.optionalConstructorArg(), (p, c) -> { + XContentBuilder builder = JsonXContent.contentBuilder(); + builder.copyCurrentStructure(p); + return BytesReference.bytes(builder); + }, PREVIOUS_STEP_INFO_FIELD); } private final String index; @@ -126,6 +133,7 @@ public class IndexLifecycleExplainResponse implements ToXContentObject, Writeabl private final Long stepTime; private final boolean managedByILM; private final BytesReference stepInfo; + private final BytesReference previousStepInfo; private final PhaseExecutionInfo phaseExecutionInfo; private final Boolean isAutoRetryableError; private final Integer failedStepRetryCount; @@ -153,6 +161,7 @@ public static IndexLifecycleExplainResponse newManagedIndexResponse( String snapshotName, String shrinkIndexName, BytesReference stepInfo, + BytesReference previousStepInfo, PhaseExecutionInfo phaseExecutionInfo ) { return new IndexLifecycleExplainResponse( @@ -174,6 +183,7 @@ public static IndexLifecycleExplainResponse newManagedIndexResponse( snapshotName, shrinkIndexName, stepInfo, + previousStepInfo, phaseExecutionInfo ); } @@ -198,6 +208,7 @@ public static IndexLifecycleExplainResponse newUnmanagedIndexResponse(String ind null, null, null, + null, null ); } @@ -221,6 +232,7 @@ private IndexLifecycleExplainResponse( String snapshotName, String shrinkIndexName, BytesReference stepInfo, + BytesReference previousStepInfo, PhaseExecutionInfo phaseExecutionInfo ) { if (managedByILM) { @@ -262,6 +274,7 @@ private IndexLifecycleExplainResponse( || actionTime != null || stepTime != null || stepInfo != null + || previousStepInfo != null || phaseExecutionInfo != null) { throw new IllegalArgumentException( "Unmanaged index response must only contain fields: [" + MANAGED_BY_ILM_FIELD + ", " + INDEX_FIELD + "]" @@ -283,6 +296,7 @@ private IndexLifecycleExplainResponse( this.isAutoRetryableError = isAutoRetryableError; this.failedStepRetryCount = failedStepRetryCount; this.stepInfo = stepInfo; + this.previousStepInfo = previousStepInfo; this.phaseExecutionInfo = phaseExecutionInfo; this.repositoryName = repositoryName; this.snapshotName = snapshotName; @@ -314,6 +328,11 @@ public IndexLifecycleExplainResponse(StreamInput in) throws IOException { } else { indexCreationDate = null; } + if (in.getTransportVersion().onOrAfter(TransportVersions.RETAIN_ILM_STEP_INFO)) { + previousStepInfo = in.readOptionalBytesReference(); + } else { + previousStepInfo = null; + } } else { policyName = null; lifecycleDate = null; @@ -327,6 +346,7 @@ public IndexLifecycleExplainResponse(StreamInput in) throws IOException { actionTime = null; stepTime = null; stepInfo = null; + previousStepInfo = null; phaseExecutionInfo = null; repositoryName = null; snapshotName = null; @@ -359,6 +379,9 @@ public void writeTo(StreamOutput out) throws IOException { if (out.getTransportVersion().onOrAfter(TransportVersions.V_8_1_0)) { out.writeOptionalLong(indexCreationDate); } + if (out.getTransportVersion().onOrAfter(TransportVersions.RETAIN_ILM_STEP_INFO)) { + out.writeOptionalBytesReference(previousStepInfo); + } } } @@ -422,6 +445,10 @@ public BytesReference getStepInfo() { return stepInfo; } + public BytesReference getPreviousStepInfo() { + return previousStepInfo; + } + public PhaseExecutionInfo getPhaseExecutionInfo() { return phaseExecutionInfo; } @@ -515,6 +542,9 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws if (stepInfo != null && stepInfo.length() > 0) { builder.rawField(STEP_INFO_FIELD.getPreferredName(), stepInfo.streamInput(), XContentType.JSON); } + if (previousStepInfo != null && previousStepInfo.length() > 0) { + builder.rawField(PREVIOUS_STEP_INFO_FIELD.getPreferredName(), previousStepInfo.streamInput(), XContentType.JSON); + } if (phaseExecutionInfo != null) { builder.field(PHASE_EXECUTION_INFO.getPreferredName(), phaseExecutionInfo); } @@ -544,6 +574,7 @@ public int hashCode() { snapshotName, shrinkIndexName, stepInfo, + previousStepInfo, phaseExecutionInfo ); } @@ -575,6 +606,7 @@ public boolean equals(Object obj) { && Objects.equals(snapshotName, other.snapshotName) && Objects.equals(shrinkIndexName, other.shrinkIndexName) && Objects.equals(stepInfo, other.stepInfo) + && Objects.equals(previousStepInfo, other.previousStepInfo) && Objects.equals(phaseExecutionInfo, other.phaseExecutionInfo); } diff --git a/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ilm/IndexLifecycleExplainResponseTests.java b/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ilm/IndexLifecycleExplainResponseTests.java index a12b4ff75ee39..ea3c9cc5926ab 100644 --- a/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ilm/IndexLifecycleExplainResponseTests.java +++ b/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ilm/IndexLifecycleExplainResponseTests.java @@ -73,6 +73,7 @@ private static IndexLifecycleExplainResponse randomManagedIndexExplainResponse() stepNull ? null : randomAlphaOfLength(10), stepNull ? null : randomAlphaOfLength(10), randomBoolean() ? null : new BytesArray(new RandomStepInfo(() -> randomAlphaOfLength(10)).toString()), + randomBoolean() ? null : new BytesArray(new RandomStepInfo(() -> randomAlphaOfLength(10)).toString()), randomBoolean() ? null : PhaseExecutionInfoTests.randomPhaseExecutionInfo("") ); } @@ -99,6 +100,7 @@ public void testInvalidStepDetails() { randomBoolean() ? null : randomAlphaOfLength(10), randomBoolean() ? null : randomAlphaOfLength(10), randomBoolean() ? null : new BytesArray(new RandomStepInfo(() -> randomAlphaOfLength(10)).toString()), + randomBoolean() ? null : new BytesArray(new RandomStepInfo(() -> randomAlphaOfLength(10)).toString()), randomBoolean() ? null : PhaseExecutionInfoTests.randomPhaseExecutionInfo("") ) ); @@ -132,6 +134,7 @@ public void testIndexAges() { null, null, null, + null, null ); assertThat(managedExplainResponse.getLifecycleDate(), is(notNullValue())); @@ -191,42 +194,32 @@ protected IndexLifecycleExplainResponse mutateInstance(IndexLifecycleExplainResp String shrinkIndexName = instance.getShrinkIndexName(); boolean managed = instance.managedByILM(); BytesReference stepInfo = instance.getStepInfo(); + BytesReference previousStepInfo = instance.getPreviousStepInfo(); PhaseExecutionInfo phaseExecutionInfo = instance.getPhaseExecutionInfo(); + if (managed) { - switch (between(0, 14)) { - case 0: - index = index + randomAlphaOfLengthBetween(1, 5); - break; - case 1: - policy = policy + randomAlphaOfLengthBetween(1, 5); - break; - case 2: + switch (between(0, 15)) { + case 0 -> index += randomAlphaOfLengthBetween(1, 5); + case 1 -> policy += randomAlphaOfLengthBetween(1, 5); + case 2 -> { phase = randomAlphaOfLengthBetween(1, 5); action = randomAlphaOfLengthBetween(1, 5); step = randomAlphaOfLengthBetween(1, 5); - break; - case 3: - phaseTime = randomValueOtherThan(phaseTime, () -> randomLongBetween(0, 100000)); - break; - case 4: - actionTime = randomValueOtherThan(actionTime, () -> randomLongBetween(0, 100000)); - break; - case 5: - stepTime = randomValueOtherThan(stepTime, () -> randomLongBetween(0, 100000)); - break; - case 6: + } + case 3 -> phaseTime = randomValueOtherThan(phaseTime, () -> randomLongBetween(0, 100000)); + case 4 -> actionTime = randomValueOtherThan(actionTime, () -> randomLongBetween(0, 100000)); + case 5 -> stepTime = randomValueOtherThan(stepTime, () -> randomLongBetween(0, 100000)); + case 6 -> { if (Strings.hasLength(failedStep) == false) { failedStep = randomAlphaOfLength(10); } else if (randomBoolean()) { - failedStep = failedStep + randomAlphaOfLengthBetween(1, 5); + failedStep += randomAlphaOfLengthBetween(1, 5); } else { failedStep = null; } - break; - case 7: - policyTime = randomValueOtherThan(policyTime, () -> randomLongBetween(0, 100000)); - break; - case 8: + } + case 7 -> policyTime = randomValueOtherThan(policyTime, () -> randomLongBetween(0, 100000)); + case 8 -> { if (Strings.hasLength(stepInfo) == false) { stepInfo = new BytesArray(randomByteArrayOfLength(100)); } else if (randomBoolean()) { @@ -237,31 +230,36 @@ protected IndexLifecycleExplainResponse mutateInstance(IndexLifecycleExplainResp } else { stepInfo = null; } - break; - case 9: - phaseExecutionInfo = randomValueOtherThan( - phaseExecutionInfo, - () -> PhaseExecutionInfoTests.randomPhaseExecutionInfo("") - ); - break; - case 10: + } + case 9 -> { + if (Strings.hasLength(previousStepInfo) == false) { + previousStepInfo = new BytesArray(randomByteArrayOfLength(100)); + } else if (randomBoolean()) { + previousStepInfo = randomValueOtherThan( + previousStepInfo, + () -> new BytesArray(new RandomStepInfo(() -> randomAlphaOfLength(10)).toString()) + ); + } else { + previousStepInfo = null; + } + } + case 10 -> phaseExecutionInfo = randomValueOtherThan( + phaseExecutionInfo, + () -> PhaseExecutionInfoTests.randomPhaseExecutionInfo("") + ); + case 11 -> { return IndexLifecycleExplainResponse.newUnmanagedIndexResponse(index); - case 11: + } + case 12 -> { isAutoRetryableError = true; failedStepRetryCount = randomValueOtherThan(failedStepRetryCount, () -> randomInt(10)); - break; - case 12: - repositoryName = randomValueOtherThan(repositoryName, () -> randomAlphaOfLengthBetween(5, 10)); - break; - case 13: - snapshotName = randomValueOtherThan(snapshotName, () -> randomAlphaOfLengthBetween(5, 10)); - break; - case 14: - shrinkIndexName = randomValueOtherThan(shrinkIndexName, () -> randomAlphaOfLengthBetween(5, 10)); - break; - default: - throw new AssertionError("Illegal randomisation branch"); + } + case 13 -> repositoryName = randomValueOtherThan(repositoryName, () -> randomAlphaOfLengthBetween(5, 10)); + case 14 -> snapshotName = randomValueOtherThan(snapshotName, () -> randomAlphaOfLengthBetween(5, 10)); + case 15 -> shrinkIndexName = randomValueOtherThan(shrinkIndexName, () -> randomAlphaOfLengthBetween(5, 10)); + default -> throw new AssertionError("Illegal randomisation branch"); } + return IndexLifecycleExplainResponse.newManagedIndexResponse( index, indexCreationDate, @@ -280,6 +278,7 @@ protected IndexLifecycleExplainResponse mutateInstance(IndexLifecycleExplainResp snapshotName, shrinkIndexName, stepInfo, + previousStepInfo, phaseExecutionInfo ); } else { diff --git a/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ilm/LifecycleExecutionStateTests.java b/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ilm/LifecycleExecutionStateTests.java index 1758c3729e373..dd7e88b14ef5e 100644 --- a/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ilm/LifecycleExecutionStateTests.java +++ b/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ilm/LifecycleExecutionStateTests.java @@ -67,11 +67,7 @@ public void testEmptyValuesAreNotSerialized() { public void testEqualsAndHashcode() { LifecycleExecutionState original = LifecycleExecutionState.fromCustomMetadata(createCustomMetadata()); - EqualsHashCodeTestUtils.checkEqualsAndHashCode( - original, - toCopy -> LifecycleExecutionState.builder(toCopy).build(), - LifecycleExecutionStateTests::mutate - ); + EqualsHashCodeTestUtils.checkEqualsAndHashCode(original, toCopy -> LifecycleExecutionState.builder(toCopy).build(), this::mutate); } public void testGetCurrentStepKey() { @@ -133,78 +129,46 @@ public void testGetCurrentStepKey() { assertNull(error6.getMessage()); } - private static LifecycleExecutionState mutate(LifecycleExecutionState toMutate) { + private LifecycleExecutionState mutate(LifecycleExecutionState toMutate) { LifecycleExecutionState.Builder newState = LifecycleExecutionState.builder(toMutate); - switch (randomIntBetween(0, 17)) { - case 0: - newState.setPhase(randomValueOtherThan(toMutate.phase(), () -> randomAlphaOfLengthBetween(5, 20))); - break; - case 1: - newState.setAction(randomValueOtherThan(toMutate.action(), () -> randomAlphaOfLengthBetween(5, 20))); - break; - case 2: - newState.setStep(randomValueOtherThan(toMutate.step(), () -> randomAlphaOfLengthBetween(5, 20))); - break; - case 3: - newState.setPhaseDefinition(randomValueOtherThan(toMutate.phaseDefinition(), () -> randomAlphaOfLengthBetween(5, 20))); - break; - case 4: - newState.setFailedStep(randomValueOtherThan(toMutate.failedStep(), () -> randomAlphaOfLengthBetween(5, 20))); - break; - case 5: - newState.setStepInfo(randomValueOtherThan(toMutate.stepInfo(), () -> randomAlphaOfLengthBetween(5, 20))); - break; - case 6: - newState.setPhaseTime(randomValueOtherThan(toMutate.phaseTime(), ESTestCase::randomLong)); - break; - case 7: - newState.setActionTime(randomValueOtherThan(toMutate.actionTime(), ESTestCase::randomLong)); - break; - case 8: - newState.setStepTime(randomValueOtherThan(toMutate.stepTime(), ESTestCase::randomLong)); - break; - case 9: - newState.setIndexCreationDate(randomValueOtherThan(toMutate.lifecycleDate(), ESTestCase::randomLong)); - break; - case 10: - newState.setShrinkIndexName(randomValueOtherThan(toMutate.shrinkIndexName(), () -> randomAlphaOfLengthBetween(5, 20))); - break; - case 11: - newState.setSnapshotRepository( - randomValueOtherThan(toMutate.snapshotRepository(), () -> randomAlphaOfLengthBetween(5, 20)) - ); - break; - case 12: - newState.setSnapshotIndexName(randomValueOtherThan(toMutate.snapshotIndexName(), () -> randomAlphaOfLengthBetween(5, 20))); - break; - case 13: - newState.setSnapshotName(randomValueOtherThan(toMutate.snapshotName(), () -> randomAlphaOfLengthBetween(5, 20))); - break; - case 14: - newState.setDownsampleIndexName( - randomValueOtherThan(toMutate.downsampleIndexName(), () -> randomAlphaOfLengthBetween(5, 20)) - ); - break; - case 15: - newState.setIsAutoRetryableError(randomValueOtherThan(toMutate.isAutoRetryableError(), ESTestCase::randomBoolean)); - break; - case 16: - newState.setFailedStepRetryCount(randomValueOtherThan(toMutate.failedStepRetryCount(), ESTestCase::randomInt)); - break; - case 17: - return LifecycleExecutionState.builder().build(); - default: - throw new IllegalStateException("unknown randomization branch"); + switch (randomIntBetween(0, 18)) { + case 0 -> newState.setPhase(randomValueOtherThan(toMutate.phase(), this::randomString)); + case 1 -> newState.setAction(randomValueOtherThan(toMutate.action(), this::randomString)); + case 2 -> newState.setStep(randomValueOtherThan(toMutate.step(), this::randomString)); + case 3 -> newState.setPhaseDefinition(randomValueOtherThan(toMutate.phaseDefinition(), this::randomString)); + case 4 -> newState.setFailedStep(randomValueOtherThan(toMutate.failedStep(), this::randomString)); + case 5 -> newState.setStepInfo(randomValueOtherThan(toMutate.stepInfo(), this::randomString)); + case 6 -> newState.setPreviousStepInfo(randomValueOtherThan(toMutate.previousStepInfo(), this::randomString)); + case 7 -> newState.setPhaseTime(randomValueOtherThan(toMutate.phaseTime(), ESTestCase::randomLong)); + case 8 -> newState.setActionTime(randomValueOtherThan(toMutate.actionTime(), ESTestCase::randomLong)); + case 9 -> newState.setStepTime(randomValueOtherThan(toMutate.stepTime(), ESTestCase::randomLong)); + case 10 -> newState.setIndexCreationDate(randomValueOtherThan(toMutate.lifecycleDate(), ESTestCase::randomLong)); + case 11 -> newState.setShrinkIndexName(randomValueOtherThan(toMutate.shrinkIndexName(), this::randomString)); + case 12 -> newState.setSnapshotRepository(randomValueOtherThan(toMutate.snapshotRepository(), this::randomString)); + case 13 -> newState.setSnapshotIndexName(randomValueOtherThan(toMutate.snapshotIndexName(), this::randomString)); + case 14 -> newState.setSnapshotName(randomValueOtherThan(toMutate.snapshotName(), this::randomString)); + case 15 -> newState.setDownsampleIndexName(randomValueOtherThan(toMutate.downsampleIndexName(), this::randomString)); + case 16 -> newState.setIsAutoRetryableError(randomValueOtherThan(toMutate.isAutoRetryableError(), ESTestCase::randomBoolean)); + case 17 -> newState.setFailedStepRetryCount(randomValueOtherThan(toMutate.failedStepRetryCount(), ESTestCase::randomInt)); + case 18 -> { + return LifecycleExecutionState.EMPTY_STATE; + } + default -> throw new IllegalStateException("unknown randomization branch"); } return newState.build(); } + private String randomString() { + return randomAlphaOfLengthBetween(5, 20); + } + static Map createCustomMetadata() { String phase = randomAlphaOfLengthBetween(5, 20); String action = randomAlphaOfLengthBetween(5, 20); String step = randomAlphaOfLengthBetween(5, 20); String failedStep = randomAlphaOfLengthBetween(5, 20); String stepInfo = randomAlphaOfLengthBetween(15, 50); + String previousStepInfo = randomAlphaOfLengthBetween(15, 50); String phaseDefinition = randomAlphaOfLengthBetween(15, 50); String repositoryName = randomAlphaOfLengthBetween(10, 20); String snapshotName = randomAlphaOfLengthBetween(10, 20); @@ -220,6 +184,7 @@ static Map createCustomMetadata() { customMetadata.put("step", step); customMetadata.put("failed_step", failedStep); customMetadata.put("step_info", stepInfo); + customMetadata.put("previous_step_info", previousStepInfo); customMetadata.put("phase_definition", phaseDefinition); customMetadata.put("creation_date", String.valueOf(indexCreationDate)); customMetadata.put("phase_time", String.valueOf(phaseTime)); diff --git a/x-pack/plugin/ilm/qa/multi-node/src/javaRestTest/java/org/elasticsearch/xpack/ilm/ExplainLifecycleIT.java b/x-pack/plugin/ilm/qa/multi-node/src/javaRestTest/java/org/elasticsearch/xpack/ilm/ExplainLifecycleIT.java index dc8c248bbbad6..ec8f7c230b1d3 100644 --- a/x-pack/plugin/ilm/qa/multi-node/src/javaRestTest/java/org/elasticsearch/xpack/ilm/ExplainLifecycleIT.java +++ b/x-pack/plugin/ilm/qa/multi-node/src/javaRestTest/java/org/elasticsearch/xpack/ilm/ExplainLifecycleIT.java @@ -30,6 +30,7 @@ import org.elasticsearch.xpack.core.ilm.ShrinkAction; import org.junit.Before; +import java.util.Formatter; import java.util.HashMap; import java.util.Locale; import java.util.Map; @@ -42,6 +43,7 @@ import static org.elasticsearch.xpack.TimeSeriesRestDriver.explainIndex; import static org.hamcrest.Matchers.allOf; import static org.hamcrest.Matchers.containsString; +import static org.hamcrest.Matchers.greaterThan; import static org.hamcrest.Matchers.greaterThanOrEqualTo; import static org.hamcrest.Matchers.hasKey; import static org.hamcrest.Matchers.is; @@ -257,6 +259,64 @@ public void testExplainOrder() throws Exception { ); } + public void testStepInfoPreservedOnAutoRetry() throws Exception { + String policyName = "policy-" + randomAlphaOfLength(5).toLowerCase(Locale.ROOT); + + Request createPolice = new Request("PUT", "_ilm/policy/" + policyName); + createPolice.setJsonEntity(""" + { + "policy": { + "phases": { + "hot": { + "actions": { + "rollover": { + "max_docs": 1 + } + } + } + } + } + } + """); + assertOK(client().performRequest(createPolice)); + + String aliasName = "step-info-test"; + String indexName = aliasName + "-" + randomAlphaOfLength(5).toLowerCase(Locale.ROOT); + + Request templateRequest = new Request("PUT", "_index_template/template_" + policyName); + + String templateBodyTemplate = """ + { + "index_patterns": ["%s-*"], + "template": { + "settings": { + "index.lifecycle.name": "%s", + "index.lifecycle.rollover_alias": "%s" + } + } + } + """; + Formatter formatter = new Formatter(Locale.ROOT); + templateRequest.setJsonEntity(formatter.format(templateBodyTemplate, aliasName, policyName, aliasName).toString()); + + assertOK(client().performRequest(templateRequest)); + + Request indexRequest = new Request("POST", "/" + indexName + "/_doc/1"); + indexRequest.setJsonEntity("{\"test\":\"value\"}"); + assertOK(client().performRequest(indexRequest)); + + assertBusy(() -> { + Map explainIndex = explainIndex(client(), indexName); + assertThat(explainIndex.get("failed_step_retry_count"), notNullValue()); + assertThat(explainIndex.get("previous_step_info"), notNullValue()); + assertThat((int) explainIndex.get("failed_step_retry_count"), greaterThan(0)); + assertThat( + explainIndex.get("previous_step_info").toString(), + containsString("rollover_alias [" + aliasName + "] does not point to index [" + indexName + "]") + ); + }); + } + private void assertUnmanagedIndex(Map explainIndexMap) { assertThat(explainIndexMap.get("managed"), is(false)); assertThat(explainIndexMap.get("time_since_index_creation"), is(nullValue())); diff --git a/x-pack/plugin/ilm/src/main/java/org/elasticsearch/xpack/ilm/IndexLifecycleTransition.java b/x-pack/plugin/ilm/src/main/java/org/elasticsearch/xpack/ilm/IndexLifecycleTransition.java index a87f2d4d2151e..b3f29535020bf 100644 --- a/x-pack/plugin/ilm/src/main/java/org/elasticsearch/xpack/ilm/IndexLifecycleTransition.java +++ b/x-pack/plugin/ilm/src/main/java/org/elasticsearch/xpack/ilm/IndexLifecycleTransition.java @@ -289,6 +289,7 @@ private static LifecycleExecutionState updateExecutionStateToStep( // clear any step info or error-related settings from the current step updatedState.setFailedStep(null); + updatedState.setPreviousStepInfo(existingState.stepInfo()); updatedState.setStepInfo(null); updatedState.setIsAutoRetryableError(null); updatedState.setFailedStepRetryCount(null); @@ -389,6 +390,7 @@ public static LifecycleExecutionState moveStateToNextActionAndUpdateCachedPhase( updatedState.setStep(nextStep.name()); updatedState.setStepTime(nowAsMillis); updatedState.setFailedStep(null); + updatedState.setPreviousStepInfo(existingState.stepInfo()); updatedState.setStepInfo(null); updatedState.setIsAutoRetryableError(null); updatedState.setFailedStepRetryCount(null); diff --git a/x-pack/plugin/ilm/src/main/java/org/elasticsearch/xpack/ilm/action/TransportExplainLifecycleAction.java b/x-pack/plugin/ilm/src/main/java/org/elasticsearch/xpack/ilm/action/TransportExplainLifecycleAction.java index 383dc6622f280..c50ea682ca9a2 100644 --- a/x-pack/plugin/ilm/src/main/java/org/elasticsearch/xpack/ilm/action/TransportExplainLifecycleAction.java +++ b/x-pack/plugin/ilm/src/main/java/org/elasticsearch/xpack/ilm/action/TransportExplainLifecycleAction.java @@ -127,10 +127,15 @@ static IndexLifecycleExplainResponse getIndexLifecycleExplainResponse( String policyName = indexMetadata.getLifecyclePolicyName(); String currentPhase = lifecycleState.phase(); String stepInfo = lifecycleState.stepInfo(); + String previousStepInfo = lifecycleState.previousStepInfo(); BytesArray stepInfoBytes = null; if (stepInfo != null) { stepInfoBytes = new BytesArray(stepInfo); } + BytesArray previousStepInfoBytes = null; + if (previousStepInfo != null) { + previousStepInfoBytes = new BytesArray(previousStepInfo); + } Long indexCreationDate = indexMetadata.getCreationDate(); // parse existing phase steps from the phase definition in the index settings @@ -182,6 +187,7 @@ static IndexLifecycleExplainResponse getIndexLifecycleExplainResponse( lifecycleState.snapshotName(), lifecycleState.shrinkIndexName(), stepInfoBytes, + previousStepInfoBytes, phaseExecutionInfo ); } else { diff --git a/x-pack/plugin/ilm/src/test/java/org/elasticsearch/xpack/ilm/IndexLifecycleTransitionTests.java b/x-pack/plugin/ilm/src/test/java/org/elasticsearch/xpack/ilm/IndexLifecycleTransitionTests.java index 9449e0c0574dc..37d586240eb7a 100644 --- a/x-pack/plugin/ilm/src/test/java/org/elasticsearch/xpack/ilm/IndexLifecycleTransitionTests.java +++ b/x-pack/plugin/ilm/src/test/java/org/elasticsearch/xpack/ilm/IndexLifecycleTransitionTests.java @@ -896,7 +896,7 @@ public void testMoveClusterStateToFailedNotOnError() { ); } - public void testMoveClusterStateToPreviouslyFailedStepAsAutomaticRetry() { + public void testMoveClusterStateToPreviouslyFailedStepAsAutomaticRetryAndSetsPreviousStepInfo() { String indexName = "my_index"; String policyName = "my_policy"; long now = randomNonNegativeLong(); @@ -921,6 +921,8 @@ public void testMoveClusterStateToPreviouslyFailedStepAsAutomaticRetry() { lifecycleState.setStep(errorStepKey.name()); lifecycleState.setStepTime(now); lifecycleState.setFailedStep(failedStepKey.name()); + String initialStepInfo = randomAlphaOfLengthBetween(10, 50); + lifecycleState.setStepInfo(initialStepInfo); ClusterState clusterState = buildClusterState( indexName, indexSettingsBuilder, @@ -938,6 +940,7 @@ public void testMoveClusterStateToPreviouslyFailedStepAsAutomaticRetry() { IndexLifecycleRunnerTests.assertClusterStateOnNextStep(clusterState, index, errorStepKey, failedStepKey, nextClusterState, now); LifecycleExecutionState executionState = nextClusterState.metadata().index(indexName).getLifecycleExecutionState(); assertThat(executionState.failedStepRetryCount(), is(1)); + assertThat(executionState.previousStepInfo(), is(initialStepInfo)); } public void testMoveToFailedStepDoesntRefreshCachedPhaseWhenUnsafe() {