-
Notifications
You must be signed in to change notification settings - Fork 24.9k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Tag redacted document in ingest pipeline (#113552)
Adds a new option trace_redact in redact processor to indicate a document has been redacted in the ingest pipeline. If a document is processed by a redact processor AND any field is redacted, ingest metadata _ingest._redact._is_redacted = true will be set. Closes #94633
- Loading branch information
Showing
6 changed files
with
176 additions
and
4 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
pr: 113552 | ||
summary: Tag redacted document in ingest metadata | ||
area: Ingest Node | ||
type: enhancement | ||
issues: [] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -259,7 +259,8 @@ public void testLicenseChecks() throws Exception { | |
">", | ||
MatcherWatchdog.noop(), | ||
notAllowed, | ||
false // set skip_if_unlicensed to false, we do not want to skip, we do want to fail | ||
false, // set skip_if_unlicensed to false, we do not want to skip, we do want to fail | ||
false | ||
); | ||
assertThat(processor.getSkipIfUnlicensed(), equalTo(false)); | ||
var ingestDoc = createIngestDoc(Map.of("not_the_field", "fieldValue")); | ||
|
@@ -314,6 +315,118 @@ public void testLicenseChanges() throws Exception { | |
} | ||
} | ||
|
||
@SuppressWarnings("unchecked") | ||
public void testTraceRedact() throws Exception { | ||
var config = new HashMap<String, Object>(); | ||
config.put("field", "to_redact"); | ||
config.put("patterns", List.of("%{EMAILADDRESS:REDACTED}")); | ||
config.put("trace_redact", true); | ||
{ | ||
var processor = new RedactProcessor.Factory(mockLicenseState(), MatcherWatchdog.noop()).create( | ||
null, | ||
"t", | ||
"d", | ||
new HashMap<>(config) | ||
); | ||
var message = "this should not be redacted"; | ||
var ingestDoc = createIngestDoc(Map.of("to_redact", message)); | ||
var redactedDoc = processor.execute(ingestDoc); | ||
|
||
assertEquals(message, redactedDoc.getFieldValue("to_redact", String.class)); | ||
assertNull(redactedDoc.getFieldValue(RedactProcessor.METADATA_PATH_REDACT_IS_REDACTED, Boolean.class, true)); | ||
} | ||
{ | ||
var processor = new RedactProcessor.Factory(mockLicenseState(), MatcherWatchdog.noop()).create( | ||
null, | ||
"t", | ||
"d", | ||
new HashMap<>(config) | ||
); | ||
var ingestDoc = createIngestDoc(Map.of("to_redact", "[email protected] will be redacted")); | ||
var redactedDoc = processor.execute(ingestDoc); | ||
|
||
assertEquals("<REDACTED> will be redacted", redactedDoc.getFieldValue("to_redact", String.class)); | ||
// validate ingest metadata path correctly resolved | ||
assertTrue(redactedDoc.getFieldValue(RedactProcessor.METADATA_PATH_REDACT_IS_REDACTED, Boolean.class)); | ||
// validate ingest metadata structure correct | ||
var ingestMeta = redactedDoc.getIngestMetadata(); | ||
assertTrue(ingestMeta.containsKey(RedactProcessor.REDACT_KEY)); | ||
var redactMetadata = (HashMap<String, Object>) ingestMeta.get(RedactProcessor.REDACT_KEY); | ||
assertTrue(redactMetadata.containsKey(RedactProcessor.IS_REDACTED_KEY)); | ||
assertTrue((Boolean) redactMetadata.get(RedactProcessor.IS_REDACTED_KEY)); | ||
} | ||
{ | ||
var configNoTrace = new HashMap<String, Object>(); | ||
configNoTrace.put("field", "to_redact"); | ||
configNoTrace.put("patterns", List.of("%{EMAILADDRESS:REDACTED}")); | ||
|
||
var processor = new RedactProcessor.Factory(mockLicenseState(), MatcherWatchdog.noop()).create(null, "t", "d", configNoTrace); | ||
var ingestDoc = createIngestDoc(Map.of("to_redact", "[email protected] will be redacted")); | ||
var redactedDoc = processor.execute(ingestDoc); | ||
|
||
assertEquals("<REDACTED> will be redacted", redactedDoc.getFieldValue("to_redact", String.class)); | ||
assertNull(redactedDoc.getFieldValue(RedactProcessor.METADATA_PATH_REDACT_IS_REDACTED, Boolean.class, true)); | ||
} | ||
} | ||
|
||
public void testTraceRedactMultipleProcessors() throws Exception { | ||
var configRedact = new HashMap<String, Object>(); | ||
configRedact.put("field", "to_redact"); | ||
configRedact.put("patterns", List.of("%{EMAILADDRESS:REDACTED}")); | ||
configRedact.put("trace_redact", true); | ||
|
||
var configNoRedact = new HashMap<String, Object>(); | ||
configNoRedact.put("field", "to_redact"); | ||
configNoRedact.put("patterns", List.of("%{IP:REDACTED}")); // not in the doc | ||
configNoRedact.put("trace_redact", true); | ||
|
||
// first processor does not redact doc, second one does | ||
{ | ||
var processorRedact = new RedactProcessor.Factory(mockLicenseState(), MatcherWatchdog.noop()).create( | ||
null, | ||
"t1", | ||
"d", | ||
new HashMap<>(configRedact) | ||
); | ||
var processorNoRedact = new RedactProcessor.Factory(mockLicenseState(), MatcherWatchdog.noop()).create( | ||
null, | ||
"t2", | ||
"d", | ||
new HashMap<>(configNoRedact) | ||
); | ||
var ingestDocWithEmail = createIngestDoc(Map.of("to_redact", "[email protected] will be redacted")); | ||
|
||
var docNotRedacted = processorNoRedact.execute(ingestDocWithEmail); | ||
assertNull(docNotRedacted.getFieldValue(RedactProcessor.METADATA_PATH_REDACT_IS_REDACTED, Boolean.class, true)); | ||
|
||
var docRedacted = processorRedact.execute(docNotRedacted); | ||
assertTrue(docRedacted.getFieldValue(RedactProcessor.METADATA_PATH_REDACT_IS_REDACTED, Boolean.class)); | ||
} | ||
// first processor redacts doc, second one does not | ||
{ | ||
var processorRedact = new RedactProcessor.Factory(mockLicenseState(), MatcherWatchdog.noop()).create( | ||
null, | ||
"t1", | ||
"d", | ||
new HashMap<>(configRedact) | ||
); | ||
var processorNoRedact = new RedactProcessor.Factory(mockLicenseState(), MatcherWatchdog.noop()).create( | ||
null, | ||
"t2", | ||
"d", | ||
new HashMap<>(configNoRedact) | ||
); | ||
var ingestDocWithEmail = createIngestDoc(Map.of("to_redact", "[email protected] will be redacted")); | ||
|
||
var docRedacted = processorRedact.execute(ingestDocWithEmail); | ||
assertTrue(docRedacted.getFieldValue(RedactProcessor.METADATA_PATH_REDACT_IS_REDACTED, Boolean.class)); | ||
|
||
// validate does not override already redacted doc metadata | ||
var docRedactedAlready = processorNoRedact.execute(docRedacted); | ||
assertTrue(docRedactedAlready.getFieldValue(RedactProcessor.METADATA_PATH_REDACT_IS_REDACTED, Boolean.class)); | ||
} | ||
} | ||
|
||
public void testMergeLongestRegion() { | ||
var r = List.of( | ||
new RedactProcessor.RegionTrackingMatchExtractor.Replacement(10, 20, "first"), | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -24,7 +24,7 @@ | |
index: test | ||
id: "1" | ||
pipeline: "pipeline-using-a-redact-processor" | ||
body: {to_redact: "0.0.0.1 is my secret IP to redact"} | ||
body: { to_redact: "0.0.0.1 is my secret IP to redact" } | ||
|
||
- do: | ||
get: | ||
|
@@ -96,3 +96,25 @@ | |
} | ||
- length: { docs: 1 } | ||
- match: { docs.0.doc._source.to_redact: "==*EMAIL*== will be redacted" } | ||
--- | ||
"Test redact with trace_redact": | ||
- do: | ||
ingest.simulate: | ||
body: > | ||
{ | ||
"pipeline": { | ||
"processors": [ | ||
{ | ||
"redact": { | ||
"field": "to_redact", | ||
"patterns": ["%{EMAILADDRESS:EMAIL}", "%{IP:IP_ADDRESS}"], | ||
"trace_redact": true | ||
} | ||
} | ||
] | ||
}, | ||
"docs": [{"_source": {"to_redact": "[email protected] will be redacted"}}] | ||
} | ||
- length: { docs: 1 } | ||
- match: { docs.0.doc._source.to_redact: "<EMAIL> will be redacted" } | ||
- match: { docs.0.doc._ingest._redact._is_redacted: true } |