Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion src/critic/libs/ddb.py
Original file line number Diff line number Diff line change
Expand Up @@ -143,7 +143,7 @@ def get(cls, partition_value: Any, sort_value: Any | None = None) -> BaseModel |
return cls.ddb_to_model(response['Item']) if 'Item' in response else None

@classmethod
def query(cls, partition_value: Any) -> list[BaseModel]:
def query(cls, partition_value: Any, **kwargs) -> list[BaseModel]:
"""Query for all items with the given partition key."""
names, values, clauses = cls.alias({cls.partition_key: partition_value})

Expand All @@ -152,6 +152,7 @@ def query(cls, partition_value: Any) -> list[BaseModel]:
KeyConditionExpression=clauses[0],
ExpressionAttributeNames=names,
ExpressionAttributeValues=values,
**kwargs,
)

items = response.get('Items', [])
Expand Down
15 changes: 14 additions & 1 deletion src/critic/libs/uptime.py
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,7 @@ def put_log(
status_code: int,
latency: float,
error_messages: list[str],
log_counter: int,
):
"""
Puts a log for the check. This method should only be called once per monitor check.
Expand All @@ -143,6 +144,11 @@ def put_log(
latency_secs=latency,
error_message=error_messages if error_messages else None,
)
if log_counter >= UptimeLogTable.retention_limit:
# The "-1" is to make room for the log we're about to put.
UptimeLogTable.prune(
self.monitor.id, log_counter - (UptimeLogTable.retention_limit - 1)
Comment thread
calebsyring marked this conversation as resolved.
)
UptimeLogTable.put(uptime_log)
self._put_log = True

Expand Down Expand Up @@ -171,7 +177,13 @@ def run(self):
state, consecutive_fails, error_messages = self.check_resp(resp)

# Update the monitor
updated = self.update_monitor({'state': state, 'consecutive_fails': consecutive_fails})
updated = self.update_monitor(
{
'state': state,
'consecutive_fails': consecutive_fails,
'log_counter': min(self.monitor.log_counter + 1, UptimeLogTable.retention_limit),
Comment thread
calebsyring marked this conversation as resolved.
}
)

# Save a log
if updated:
Expand All @@ -187,4 +199,5 @@ def run(self):
resp.status_code if resp else 0,
latency,
error_messages,
self.monitor.log_counter,
)
1 change: 1 addition & 0 deletions src/critic/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ class UptimeMonitorModel(BaseModel):
state: MonitorState = Field(default=MonitorState.new)
frequency_mins: int = Field(ge=1, default=1)
consecutive_fails: int = Field(ge=0, default=0)
log_counter: int = Field(ge=0, default=0)
next_due_at: AwareDatetime = Field(
default_factory=lambda: datetime.now(UTC).replace(second=0, microsecond=0)
)
Expand Down
7 changes: 7 additions & 0 deletions src/critic/tables.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,3 +66,10 @@ class UptimeLogTable(Table):
model = UptimeLogModel
partition_key = 'monitor_id'
sort_key = 'timestamp'
retention_limit = 1440 # 24 hours if logging once a minute

@classmethod
def prune(cls, monitor_id: str, n: int):
"""Prune the n oldest logs"""
for log in cls.query(monitor_id, ScanIndexForward=True, Limit=n):
cls.delete(monitor_id, getattr(log, cls.sort_key))
30 changes: 28 additions & 2 deletions tests/critic_tests/test_libs/test_uptime.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,9 +27,9 @@ def test_duplicate_update(self):
def test_duplicate_log(self):
monitor = UptimeMonitorFactory.put()
check = UptimeCheck(str(monitor.project_id), monitor.slug)
check.put_log(MonitorState.up, 200, 0.1, None)
check.put_log(MonitorState.up, 200, 0.1, None, 0)
with pytest.raises(Exception, match='Log already put'):
check.put_log(MonitorState.up, 200, 0.1, None)
check.put_log(MonitorState.up, 200, 0.1, None, 0)

def test_race_condition(self, httpx_mock):
monitor = UptimeMonitorFactory.put(next_due_at='2026-02-10 11:50:00Z')
Expand Down Expand Up @@ -74,6 +74,7 @@ def test_run_up(self, caplog, httpx_mock):
assert response.state == MonitorState.up
assert response.next_due_at > time_to_check
assert response.consecutive_fails == 0
assert response.log_counter == 1

response: UptimeLogModel = UptimeLogTable.query(monitor.id)[-1]

Expand All @@ -97,6 +98,7 @@ def test_down_with_consec_fails_above_threshold(self, httpx_mock):
# Monitor should be down with 2 consec fails
assert response.state == MonitorState.down
assert response.consecutive_fails == 2
assert response.log_counter == 1

response: UptimeLogModel = UptimeLogTable.query(monitor.id)[-1]
# log should have resp of 0 since there was a timeout
Expand All @@ -117,6 +119,7 @@ def test_down_with_consec_fails_below_threshold(self, httpx_mock):
response: UptimeMonitorModel = UptimeMonitorTable.get(monitor.project_id, monitor.slug)
assert response.state == MonitorState.down
assert response.consecutive_fails == 1
assert response.log_counter == 1

response: UptimeLogModel = UptimeLogTable.query(monitor.id)[-1]
# log should have resp of 0 since there was a timeout
Expand Down Expand Up @@ -193,3 +196,26 @@ def test_assertion_fails_with_multiple_errors(self, httpx_mock):
response: UptimeLogModel = UptimeLogTable.query(monitor_id)[-1]

assert '404' in response.error_message[1]

def test_log_retention_limit(self, monkeypatch, httpx_mock):
monkeypatch.setattr(UptimeLogTable, 'retention_limit', 3)
monitor: UptimeMonitorModel = UptimeMonitorFactory.put(
next_due_at='2026-02-01 12:00:00Z',
frequency_mins=1,
)

for minute in range(4):
current = datetime(2026, 2, 1, 12, minute, 0, tzinfo=UTC)
httpx_mock.add_response()
with freeze_time(current):
UptimeCheck(str(monitor.project_id), monitor.slug).run()

logs = UptimeLogTable.query(monitor.id)
assert len(logs) == 3
monitor = UptimeMonitorTable.get(monitor.project_id, monitor.slug)
assert monitor.log_counter == 3
assert [log.timestamp for log in logs] == [
datetime(2026, 2, 1, 12, 1, 0, tzinfo=UTC),
datetime(2026, 2, 1, 12, 2, 0, tzinfo=UTC),
datetime(2026, 2, 1, 12, 3, 0, tzinfo=UTC),
]
12 changes: 6 additions & 6 deletions uv.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading