Skip to content

Commit

Permalink
Add enabled flag for error monitor
Browse files Browse the repository at this point in the history
  • Loading branch information
artem-shelkovnikov committed Jul 23, 2024
1 parent 19d9fcc commit 4a92c61
Show file tree
Hide file tree
Showing 5 changed files with 27 additions and 6 deletions.
6 changes: 5 additions & 1 deletion config.yml.example
Original file line number Diff line number Diff line change
Expand Up @@ -205,7 +205,7 @@
#
## ------------------------------- Service Error Monitor ----------------------------------
#
## Configirations related to Error Monitor functionality of the syncs.
## Configurations related to Error Monitor functionality of the syncs.
## Each running sync has an error monitor attached to it. Error monitor is taking care of
## ignoring transient errors while ingesting the data. For example, failing to download or ingest a single
## document should not stop the sync. Failing to ingest some meaningful number of documents, however,
Expand All @@ -217,6 +217,10 @@
## - Errors while downloading attachments per attachment
## - Transient errors in connector, depending on connector implementation
#
## Switch for enabling/disabling error monitor
## When disabled, errors are only counted - they never cause failures (legacy behavior)
#service.error_monitor.enabled: true
#
## Total number of errors that will be tolerated per sync.
## Once number of errors exceed this number, the sync will terminate.
#service.error_monitor.max_total_errors: 1000
Expand Down
1 change: 1 addition & 0 deletions connectors/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,7 @@ def _default_config():
"max_errors": 20,
"max_errors_span": 600,
"error_monitor": {
"enabled": True,
"max_total_errors": 1000,
"max_consecutive_errors": 10,
"max_error_rate": 0.15,
Expand Down
7 changes: 7 additions & 0 deletions connectors/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -1005,12 +1005,16 @@ class TooManyErrors(Exception):
class ErrorMonitor:
def __init__(
self,
enabled=True,
max_total_errors=1000,
max_consecutive_errors=10,
max_error_rate=0.15,
error_window_size=100,
error_queue_size=10,
):
# When disabled, only track errors
self.enabled = enabled

self.max_error_rate = max_error_rate
self.error_window_size = error_window_size
self.error_window = [False] * error_window_size
Expand Down Expand Up @@ -1081,6 +1085,9 @@ def _error_window_error_rate(self):
return error_rate

def _raise_if_necessary(self):
if not self.enabled:
return

if self.consecutive_error_count > self.max_consecutive_errors:
msg = f"Exceeded maximum consecutive errors - saw {self.consecutive_error_count} errors in a row. Last error: {self.last_error}"
raise TooManyErrors(msg) from self.last_error
Expand Down
2 changes: 1 addition & 1 deletion tests/test_sink.py
Original file line number Diff line number Diff line change
Expand Up @@ -1418,7 +1418,7 @@ async def test_force_canceled_extractor_put_doc():
@mock.patch(
"connectors.es.management_client.ESManagementClient.yield_existing_documents_metadata"
)
async def test_extractor_get_docs_when_downloads_fail(
async def test_extractor_get_docs_when_downloads_fail_because_of_error_monitor(
yield_existing_documents_metadata,
):
queue = await queue_mock()
Expand Down
17 changes: 13 additions & 4 deletions tests/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -1369,7 +1369,7 @@ def test_error_monitor_raises_when_errors_were_reported_before():
error_monitor.track_error(InvalidIndexNameError("Can't use this name"))


def test_when_error_monitor_reports_too_many_consecutive_errors():
def test_error_monitor_when_reports_too_many_consecutive_errors():
error_monitor = ErrorMonitor(max_consecutive_errors=3)

error_monitor.track_error(Exception("first"))
Expand All @@ -1380,7 +1380,7 @@ def test_when_error_monitor_reports_too_many_consecutive_errors():
error_monitor.track_error(Exception("fourth"))


def test_when_error_monitor_reports_too_many_total_errors():
def test_error_monitor_when_reports_too_many_total_errors():
error_monitor = ErrorMonitor(
max_total_errors=100, max_consecutive_errors=999, max_error_rate=1
)
Expand All @@ -1398,7 +1398,7 @@ def test_when_error_monitor_reports_too_many_total_errors():
error_monitor.track_error(Exception("third"))


def test_when_error_monitor_reports_too_many_errors_in_window():
def test_error_monitor_when_reports_too_many_errors_in_window():
error_monitor = ErrorMonitor(error_window_size=100, max_error_rate=0.05)

# rate is 0.04
Expand All @@ -1420,7 +1420,7 @@ def test_when_error_monitor_reports_too_many_errors_in_window():
error_monitor.track_error(Exception("last"))


def test_when_errors_are_tracked_last_x_errors_are_stored():
def test_error_monitor_when_errors_are_tracked_last_x_errors_are_stored():
error_monitor = ErrorMonitor(error_queue_size=5)

for _ in range(5):
Expand All @@ -1439,3 +1439,12 @@ def test_when_errors_are_tracked_last_x_errors_are_stored():
assert str(errors[2]) == "second_part"
assert str(errors[3]) == "second_part"
assert str(errors[4]) == "second_part"


def test_error_monitor_when_disabled():
error_monitor = ErrorMonitor(
enabled=False, max_total_errors=1, max_consecutive_errors=1, max_error_rate=0.01
)

for _ in range(9999):
error_monitor.track_error(Exception("second_part"))

0 comments on commit 4a92c61

Please sign in to comment.