From 4a92c612fc666822bcddc8d9eb4b841700a1d22a Mon Sep 17 00:00:00 2001 From: Artem Shelkovnikov Date: Tue, 23 Jul 2024 16:31:22 +0200 Subject: [PATCH] Add enabled flag for error monitor --- config.yml.example | 6 +++++- connectors/config.py | 1 + connectors/utils.py | 7 +++++++ tests/test_sink.py | 2 +- tests/test_utils.py | 17 +++++++++++++---- 5 files changed, 27 insertions(+), 6 deletions(-) diff --git a/config.yml.example b/config.yml.example index 4ee99fa69..474ce5b28 100644 --- a/config.yml.example +++ b/config.yml.example @@ -205,7 +205,7 @@ # ## ------------------------------- Service Error Monitor ---------------------------------- # -## Configirations related to Error Monitor functionality of the syncs. +## Configurations related to Error Monitor functionality of the syncs. ## Each running sync has an error monitor attached to it. Error monitor is taking care of ## ignoring transient errors while ingesting the data. For example, failing to download or ingest a single ## document should not stop the sync. Failing to ingest some meaningful number of documents, however, @@ -217,6 +217,10 @@ ## - Errors while downloading attachments per attachment ## - Transient errors in connector, depending on connector implementation # +## Switch for enabling/disabling error monitor +## When disabled, errors are only counted - they never cause failures (legacy behavior) +#service.error_monitor.enabled: true +# ## Total number of errors that will be tolerated per sync. ## Once number of errors exceed this number, the sync will terminate. #service.error_monitor.max_total_errors: 1000 diff --git a/connectors/config.py b/connectors/config.py index a4126019a..03dabcaee 100644 --- a/connectors/config.py +++ b/connectors/config.py @@ -88,6 +88,7 @@ def _default_config(): "max_errors": 20, "max_errors_span": 600, "error_monitor": { + "enabled": True, "max_total_errors": 1000, "max_consecutive_errors": 10, "max_error_rate": 0.15, diff --git a/connectors/utils.py b/connectors/utils.py index ee5c3d01d..c24e2bd24 100644 --- a/connectors/utils.py +++ b/connectors/utils.py @@ -1005,12 +1005,16 @@ class TooManyErrors(Exception): class ErrorMonitor: def __init__( self, + enabled=True, max_total_errors=1000, max_consecutive_errors=10, max_error_rate=0.15, error_window_size=100, error_queue_size=10, ): + # When disabled, only track errors + self.enabled = enabled + self.max_error_rate = max_error_rate self.error_window_size = error_window_size self.error_window = [False] * error_window_size @@ -1081,6 +1085,9 @@ def _error_window_error_rate(self): return error_rate def _raise_if_necessary(self): + if not self.enabled: + return + if self.consecutive_error_count > self.max_consecutive_errors: msg = f"Exceeded maximum consecutive errors - saw {self.consecutive_error_count} errors in a row. Last error: {self.last_error}" raise TooManyErrors(msg) from self.last_error diff --git a/tests/test_sink.py b/tests/test_sink.py index b45bf4215..f84ac08f8 100644 --- a/tests/test_sink.py +++ b/tests/test_sink.py @@ -1418,7 +1418,7 @@ async def test_force_canceled_extractor_put_doc(): @mock.patch( "connectors.es.management_client.ESManagementClient.yield_existing_documents_metadata" ) -async def test_extractor_get_docs_when_downloads_fail( +async def test_extractor_get_docs_when_downloads_fail_because_of_error_monitor( yield_existing_documents_metadata, ): queue = await queue_mock() diff --git a/tests/test_utils.py b/tests/test_utils.py index 1eb53748e..8c28cd644 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -1369,7 +1369,7 @@ def test_error_monitor_raises_when_errors_were_reported_before(): error_monitor.track_error(InvalidIndexNameError("Can't use this name")) -def test_when_error_monitor_reports_too_many_consecutive_errors(): +def test_error_monitor_when_reports_too_many_consecutive_errors(): error_monitor = ErrorMonitor(max_consecutive_errors=3) error_monitor.track_error(Exception("first")) @@ -1380,7 +1380,7 @@ def test_when_error_monitor_reports_too_many_consecutive_errors(): error_monitor.track_error(Exception("fourth")) -def test_when_error_monitor_reports_too_many_total_errors(): +def test_error_monitor_when_reports_too_many_total_errors(): error_monitor = ErrorMonitor( max_total_errors=100, max_consecutive_errors=999, max_error_rate=1 ) @@ -1398,7 +1398,7 @@ def test_when_error_monitor_reports_too_many_total_errors(): error_monitor.track_error(Exception("third")) -def test_when_error_monitor_reports_too_many_errors_in_window(): +def test_error_monitor_when_reports_too_many_errors_in_window(): error_monitor = ErrorMonitor(error_window_size=100, max_error_rate=0.05) # rate is 0.04 @@ -1420,7 +1420,7 @@ def test_when_error_monitor_reports_too_many_errors_in_window(): error_monitor.track_error(Exception("last")) -def test_when_errors_are_tracked_last_x_errors_are_stored(): +def test_error_monitor_when_errors_are_tracked_last_x_errors_are_stored(): error_monitor = ErrorMonitor(error_queue_size=5) for _ in range(5): @@ -1439,3 +1439,12 @@ def test_when_errors_are_tracked_last_x_errors_are_stored(): assert str(errors[2]) == "second_part" assert str(errors[3]) == "second_part" assert str(errors[4]) == "second_part" + + +def test_error_monitor_when_disabled(): + error_monitor = ErrorMonitor( + enabled=False, max_total_errors=1, max_consecutive_errors=1, max_error_rate=0.01 + ) + + for _ in range(9999): + error_monitor.track_error(Exception("second_part"))