Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Tag redacted document in ingest pipeline (#113552) #113750

Draft
wants to merge 1 commit into
base: 8.x
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions docs/changelog/113552.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
pr: 113552
summary: Tag redacted document in ingest metadata
area: Ingest Node
type: enhancement
issues: []
1 change: 1 addition & 0 deletions docs/reference/ingest/processors/redact.asciidoc
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ patterns. Legacy Grok patterns are not supported.
| `ignore_missing` | no | `true` | If `true` and `field` does not exist or is `null`, the processor quietly exits without modifying the document
include::common-options.asciidoc[]
| `skip_if_unlicensed` | no | `false` | If `true` and the current license does not support running redact processors, then the processor quietly exits without modifying the document
| `trace_redact` | no | `false` | If `true` then ingest metadata `_ingest._redact._is_redacted` is set to `true` if the document has been redacted
|======

In this example the predefined `IP` Grok pattern is used to match
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,12 @@ public class RedactProcessor extends AbstractProcessor {
private static final String DEFAULT_REDACTED_START = "<";
private static final String DEFAULT_REDACTED_END = ">";

protected static final String REDACT_KEY = "_redact";
protected static final String IS_REDACTED_KEY = "_is_redacted";
protected static final String METADATA_PATH_REDACT = IngestDocument.INGEST_KEY + "." + REDACT_KEY;
// indicates if document has been redacted, path: _ingest._redact._is_redacted
protected static final String METADATA_PATH_REDACT_IS_REDACTED = METADATA_PATH_REDACT + "." + IS_REDACTED_KEY;

private final String redactField;
private final List<Grok> groks;
private final boolean ignoreMissing;
Expand All @@ -65,6 +71,8 @@ public class RedactProcessor extends AbstractProcessor {
private final XPackLicenseState licenseState;
private final boolean skipIfUnlicensed;

private final boolean traceRedact;

RedactProcessor(
String tag,
String description,
Expand All @@ -76,7 +84,8 @@ public class RedactProcessor extends AbstractProcessor {
String redactedEndToken,
MatcherWatchdog matcherWatchdog,
XPackLicenseState licenseState,
boolean skipIfUnlicensed
boolean skipIfUnlicensed,
boolean traceRedact
) {
super(tag, description);
this.redactField = redactField;
Expand All @@ -94,6 +103,7 @@ public class RedactProcessor extends AbstractProcessor {
}
this.licenseState = licenseState;
this.skipIfUnlicensed = skipIfUnlicensed;
this.traceRedact = traceRedact;
}

@Override
Expand Down Expand Up @@ -128,6 +138,8 @@ public IngestDocument execute(IngestDocument ingestDocument) {
try {
String redacted = matchRedact(fieldValue, groks, redactedStartToken, redactedEndToken);
ingestDocument.setFieldValue(redactField, redacted);
updateMetadataIfNecessary(ingestDocument, fieldValue, redacted);

return ingestDocument;
} catch (RuntimeException e) {
// grok throws a RuntimeException when the watchdog interrupts the match
Expand Down Expand Up @@ -203,6 +215,21 @@ private static void matchRepeat(Grok grok, byte[] utf8Bytes, RegionTrackingMatch
} while (offset != length);
}

private void updateMetadataIfNecessary(IngestDocument ingestDocument, String fieldValue, String redacted) {
if (traceRedact == false || fieldValue == null) {
return;
}

Boolean isRedactedMetadata = ingestDocument.getFieldValue(METADATA_PATH_REDACT_IS_REDACTED, Boolean.class, true);
boolean alreadyRedacted = Boolean.TRUE.equals(isRedactedMetadata);
boolean isRedacted = fieldValue.equals(redacted) == false;

// document newly redacted
if (alreadyRedacted == false && isRedacted) {
ingestDocument.setFieldValue(METADATA_PATH_REDACT_IS_REDACTED, true);
}
}

/**
* A Grok capture extractor which tracks matched regions
* and the Grok pattern name for redaction later.
Expand Down Expand Up @@ -389,6 +416,8 @@ public RedactProcessor create(
String redactStart = ConfigurationUtils.readStringProperty(TYPE, processorTag, config, "prefix", DEFAULT_REDACTED_START);
String redactEnd = ConfigurationUtils.readStringProperty(TYPE, processorTag, config, "suffix", DEFAULT_REDACTED_END);

boolean traceRedact = ConfigurationUtils.readBooleanProperty(TYPE, processorTag, config, "trace_redact", false);

if (matchPatterns == null || matchPatterns.isEmpty()) {
throw newConfigurationException(TYPE, processorTag, "patterns", "List of patterns must not be empty");
}
Expand All @@ -406,7 +435,8 @@ public RedactProcessor create(
redactEnd,
matcherWatchdog,
licenseState,
skipIfUnlicensed
skipIfUnlicensed,
traceRedact
);
} catch (Exception e) {
throw newConfigurationException(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@ public void testConfigKeysRemoved() throws Exception {
config.put("patterns", List.of("%{MY_PATTERN:name}!"));
config.put("pattern_definitions", Map.of("MY_PATTERN", "foo"));
config.put("ignore_missing", true);
config.put("trace_redact", true);
config.put("extra", "unused");

factory.create(null, null, null, config);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -259,7 +259,8 @@ public void testLicenseChecks() throws Exception {
">",
MatcherWatchdog.noop(),
notAllowed,
false // set skip_if_unlicensed to false, we do not want to skip, we do want to fail
false, // set skip_if_unlicensed to false, we do not want to skip, we do want to fail
false
);
assertThat(processor.getSkipIfUnlicensed(), equalTo(false));
var ingestDoc = createIngestDoc(Map.of("not_the_field", "fieldValue"));
Expand Down Expand Up @@ -314,6 +315,118 @@ public void testLicenseChanges() throws Exception {
}
}

@SuppressWarnings("unchecked")
public void testTraceRedact() throws Exception {
var config = new HashMap<String, Object>();
config.put("field", "to_redact");
config.put("patterns", List.of("%{EMAILADDRESS:REDACTED}"));
config.put("trace_redact", true);
{
var processor = new RedactProcessor.Factory(mockLicenseState(), MatcherWatchdog.noop()).create(
null,
"t",
"d",
new HashMap<>(config)
);
var message = "this should not be redacted";
var ingestDoc = createIngestDoc(Map.of("to_redact", message));
var redactedDoc = processor.execute(ingestDoc);

assertEquals(message, redactedDoc.getFieldValue("to_redact", String.class));
assertNull(redactedDoc.getFieldValue(RedactProcessor.METADATA_PATH_REDACT_IS_REDACTED, Boolean.class, true));
}
{
var processor = new RedactProcessor.Factory(mockLicenseState(), MatcherWatchdog.noop()).create(
null,
"t",
"d",
new HashMap<>(config)
);
var ingestDoc = createIngestDoc(Map.of("to_redact", "[email protected] will be redacted"));
var redactedDoc = processor.execute(ingestDoc);

assertEquals("<REDACTED> will be redacted", redactedDoc.getFieldValue("to_redact", String.class));
// validate ingest metadata path correctly resolved
assertTrue(redactedDoc.getFieldValue(RedactProcessor.METADATA_PATH_REDACT_IS_REDACTED, Boolean.class));
// validate ingest metadata structure correct
var ingestMeta = redactedDoc.getIngestMetadata();
assertTrue(ingestMeta.containsKey(RedactProcessor.REDACT_KEY));
var redactMetadata = (HashMap<String, Object>) ingestMeta.get(RedactProcessor.REDACT_KEY);
assertTrue(redactMetadata.containsKey(RedactProcessor.IS_REDACTED_KEY));
assertTrue((Boolean) redactMetadata.get(RedactProcessor.IS_REDACTED_KEY));
}
{
var configNoTrace = new HashMap<String, Object>();
configNoTrace.put("field", "to_redact");
configNoTrace.put("patterns", List.of("%{EMAILADDRESS:REDACTED}"));

var processor = new RedactProcessor.Factory(mockLicenseState(), MatcherWatchdog.noop()).create(null, "t", "d", configNoTrace);
var ingestDoc = createIngestDoc(Map.of("to_redact", "[email protected] will be redacted"));
var redactedDoc = processor.execute(ingestDoc);

assertEquals("<REDACTED> will be redacted", redactedDoc.getFieldValue("to_redact", String.class));
assertNull(redactedDoc.getFieldValue(RedactProcessor.METADATA_PATH_REDACT_IS_REDACTED, Boolean.class, true));
}
}

public void testTraceRedactMultipleProcessors() throws Exception {
var configRedact = new HashMap<String, Object>();
configRedact.put("field", "to_redact");
configRedact.put("patterns", List.of("%{EMAILADDRESS:REDACTED}"));
configRedact.put("trace_redact", true);

var configNoRedact = new HashMap<String, Object>();
configNoRedact.put("field", "to_redact");
configNoRedact.put("patterns", List.of("%{IP:REDACTED}")); // not in the doc
configNoRedact.put("trace_redact", true);

// first processor does not redact doc, second one does
{
var processorRedact = new RedactProcessor.Factory(mockLicenseState(), MatcherWatchdog.noop()).create(
null,
"t1",
"d",
new HashMap<>(configRedact)
);
var processorNoRedact = new RedactProcessor.Factory(mockLicenseState(), MatcherWatchdog.noop()).create(
null,
"t2",
"d",
new HashMap<>(configNoRedact)
);
var ingestDocWithEmail = createIngestDoc(Map.of("to_redact", "[email protected] will be redacted"));

var docNotRedacted = processorNoRedact.execute(ingestDocWithEmail);
assertNull(docNotRedacted.getFieldValue(RedactProcessor.METADATA_PATH_REDACT_IS_REDACTED, Boolean.class, true));

var docRedacted = processorRedact.execute(docNotRedacted);
assertTrue(docRedacted.getFieldValue(RedactProcessor.METADATA_PATH_REDACT_IS_REDACTED, Boolean.class));
}
// first processor redacts doc, second one does not
{
var processorRedact = new RedactProcessor.Factory(mockLicenseState(), MatcherWatchdog.noop()).create(
null,
"t1",
"d",
new HashMap<>(configRedact)
);
var processorNoRedact = new RedactProcessor.Factory(mockLicenseState(), MatcherWatchdog.noop()).create(
null,
"t2",
"d",
new HashMap<>(configNoRedact)
);
var ingestDocWithEmail = createIngestDoc(Map.of("to_redact", "[email protected] will be redacted"));

var docRedacted = processorRedact.execute(ingestDocWithEmail);
assertTrue(docRedacted.getFieldValue(RedactProcessor.METADATA_PATH_REDACT_IS_REDACTED, Boolean.class));

// validate does not override already redacted doc metadata
var docRedactedAlready = processorNoRedact.execute(docRedacted);
assertTrue(docRedactedAlready.getFieldValue(RedactProcessor.METADATA_PATH_REDACT_IS_REDACTED, Boolean.class));
}
}

public void testMergeLongestRegion() {
var r = List.of(
new RedactProcessor.RegionTrackingMatchExtractor.Replacement(10, 20, "first"),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
index: test
id: "1"
pipeline: "pipeline-using-a-redact-processor"
body: {to_redact: "0.0.0.1 is my secret IP to redact"}
body: { to_redact: "0.0.0.1 is my secret IP to redact" }

- do:
get:
Expand Down Expand Up @@ -96,3 +96,25 @@
}
- length: { docs: 1 }
- match: { docs.0.doc._source.to_redact: "==*EMAIL*== will be redacted" }
---
"Test redact with trace_redact":
- do:
ingest.simulate:
body: >
{
"pipeline": {
"processors": [
{
"redact": {
"field": "to_redact",
"patterns": ["%{EMAILADDRESS:EMAIL}", "%{IP:IP_ADDRESS}"],
"trace_redact": true
}
}
]
},
"docs": [{"_source": {"to_redact": "[email protected] will be redacted"}}]
}
- length: { docs: 1 }
- match: { docs.0.doc._source.to_redact: "<EMAIL> will be redacted" }
- match: { docs.0.doc._ingest._redact._is_redacted: true }
Loading