Skip to content

Commit

Permalink
telegram event noise reduction (#77)
Browse files Browse the repository at this point in the history
* apply the same notification rules on zd events to tg events

* update sample config and version bump

* linting
  • Loading branch information
ayazabbas authored Jun 12, 2024
1 parent ab41a4d commit d57a393
Show file tree
Hide file tree
Showing 4 changed files with 62 additions and 31 deletions.
10 changes: 5 additions & 5 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -33,18 +33,18 @@ Event types are configured via environment variables:

- `TelegramEvent`
- `TELEGRAM_BOT_TOKEN` - API token for the Telegram bot
- `OPEN_ALERTS_FILE` - Path to local file used for persisting open alerts

- `ZendutyEvent`
- `ZENDUTY_INTEGRATION_KEY` - Integration key for Zenduty service API integration
- `OPEN_ALERTS_FILE` - Path to local file used for persisting open alerts

### Zenduty Alert Thresholds
- Zenduty alert will fire if a check fails 5 or more times within 5 minutes.
- The alert will be resolved if the check failed < 4 times within 5 minutes.
### Alert Thresholds
- Alert thresholds apply to ZendutyEvent and TelegramEvent (resolution only applies to zenduty)
- Checks run approximately once per minute.
- These thresholds can be overridden per check type in config.yaml
- `zenduty_alert_threshold`: number of failures in 5 minutes >= to this value trigger an alert (default: 5)
- `zenduty_resolution_threshold`: number of failures in 5 minutes <= this value resolve the alert (default: 3)
- `alert_threshold`: number of failures in 5 minutes >= to this value trigger an alert (default: 5)
- `resolution_threshold`: number of failures in 5 minutes <= this value resolve the alert (default: 3)

## Finding the Telegram Group Chat ID

Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ ignore_missing_imports = true

[tool.poetry]
name = "pyth-observer"
version = "0.2.12"
version = "0.2.13"
description = "Alerts and stuff"
authors = []
readme = "README.md"
Expand Down
24 changes: 14 additions & 10 deletions pyth_observer/dispatch.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ def __init__(self, config, publishers):
self.open_alerts = self.load_alerts()
# below is used to store events to later send if mutilple failures occur
# events cannot be stored in open_alerts as they are not JSON serializable.
self.zenduty_events = {}
self.delayed_events = {}

def load_alerts(self):
try:
Expand Down Expand Up @@ -79,7 +79,7 @@ async def run(self, states: List[State]):
for event_type in self.config["events"]:
event: Event = globals()[event_type](check, context)

if event_type == "ZendutyEvent":
if event_type in ["ZendutyEvent", "TelegramEvent"]:
alert_identifier = self.generate_alert_identifier(check)
alert = self.open_alerts.get(alert_identifier)
if alert is None:
Expand All @@ -89,11 +89,12 @@ async def run(self, states: List[State]):
"failures": 1,
"last_window_failures": None,
"sent": False,
"event_type": event_type,
}
else:
alert["failures"] += 1
self.zenduty_events[alert_identifier] = event
continue # Skip sending immediately for ZendutyEvent
self.delayed_events[alert_identifier] = event
continue # Skip sending immediately for ZendutyEvent or TelegramEvent

sent_events.append(event.send())

Expand Down Expand Up @@ -177,8 +178,8 @@ async def process_zenduty_events(self, current_time):
for identifier, info in self.open_alerts.items():
self.check_zd_alert_status(identifier, current_time)
check_config = self.config["checks"]["global"][info["type"]]
alert_threshold = check_config.get("zenduty_alert_threshold", 5)
resolution_threshold = check_config.get("zenduty_resolution_threshold", 3)
alert_threshold = check_config.get("alert_threshold", 5)
resolution_threshold = check_config.get("resolution_threshold", 3)
# Resolve the alert if raised and failed < $threshold times in the last 5m window
resolved = False
if (
Expand All @@ -187,7 +188,10 @@ async def process_zenduty_events(self, current_time):
):
logger.debug(f"Resolving Zenduty alert {identifier}")
resolved = True
if info["sent"]:
if (
info["sent"]
and info.get("event_type", "ZendutyEvent") == "ZendutyEvent"
):
response = await send_zenduty_alert(
identifier, identifier, resolved=True
)
Expand All @@ -208,16 +212,16 @@ async def process_zenduty_events(self, current_time):
logger.debug(f"Raising Zenduty alert {identifier}")
self.open_alerts[identifier]["sent"] = True
self.open_alerts[identifier]["last_alert"] = current_time.isoformat()
event = self.zenduty_events.get(identifier)
event = self.delayed_events.get(identifier)
if event:
to_alert.append(event.send())

await asyncio.gather(*to_alert)
for identifier in to_remove:
if self.open_alerts.get(identifier):
del self.open_alerts[identifier]
if self.zenduty_events.get(identifier):
del self.zenduty_events[identifier]
if self.delayed_events.get(identifier):
del self.delayed_events[identifier]

with open(self.open_alerts_file, "w") as file:
json.dump(self.open_alerts, file)
57 changes: 42 additions & 15 deletions sample.config.yaml
Original file line number Diff line number Diff line change
@@ -1,26 +1,29 @@
network:
name: "pythnet"
http_endpoint: "https://pythnet.rpcpool.com"
ws_endpoint: "wss://pythnet.rpcpool.com"
http_endpoint: "https://api2.pythnet.pyth.network"
ws_endpoint: "wss://api2.pythnet.pyth.network"
first_mapping: "AHtgzX45WTKfkPG53L6WYhGEXwQkN1BVknET3sVsLL8J"
crosschain_endpoint: "https://hermes.pyth.network"
request_rate_limit: 10
request_rate_period: 1
events:
# NOTE: Uncomment to enable Datadog metrics, see README.md for datadog credential docs.
# - DatadogEvent
- LogEvent
# - DatadogEvent
# - TelegramEvent
- ZendutyEvent
# Alert thresholds apply to Zenduty and Telegram events
# - Checks run approximately once per minute
# - `alert_threshold`: number of failures within 5 minutes >= to this value trigger an alert (default: 5)
# - `resolution_threshold`: number of failures within 5 minutes <= this value resolve the alert (default: 3)
checks:
global:
# Price feed checks
PriceFeedOfflineCheck:
enable: true
max_slot_distance: 25
max_slot_distance: 120
abandoned_slot_distance: 100000
zenduty_alert_threshold: 3
zenduty_resolution_threshold: 0
alert_threshold: 3
resolution_threshold: 0
PriceFeedCoinGeckoCheck:
enable: true
max_deviation: 5
Expand All @@ -46,24 +49,48 @@ checks:
PublisherPriceCheck:
enable: true
max_slot_distance: 25
max_aggregate_distance: 6
zenduty_alert_threshold: 5
zenduty_resolution_threshold: 2
max_aggregate_distance: 5
alert_threshold: 2
resolution_threshold: 1
PublisherStalledCheck:
enable: false
stall_time_limit: 30
abandoned_time_limit: 600
abandoned_time_limit: 300
max_slot_distance: 25
zenduty_alert_threshold: 1
zenduty_resolution_threshold: 0
alert_threshold: 1
resolution_threshold: 0
# Per-symbol config
Crypto.ANC/USD:
PublisherPriceCheck:
enable: true
max_slot_distance: 25
max_aggregate_distance: 50
Crypto.MIR/USD:
PublisherPriceCheck:
enable: true
max_slot_distance: 25
max_aggregate_distance: 25
Crypto.MNGO/USD:
PriceFeedOfflineCheck:
max_slot_distance: 10000
max_slot_distance: 100000
Crypto.SLND/USD:
PriceFeedOfflineCheck:
max_slot_distance: 100000
Crypto.SNY/USD:
PriceFeedOfflineCheck:
max_slot_distance: 100000
Crypto.PORT/USD:
PriceFeedOfflineCheck:
max_slot_distance: 100000
FX.USD/HKD:
PriceFeedOfflineCheck:
max_slot_distance: 10000
Crypto.ZBC/USD:
PublisherPriceCheck:
max_aggregate_distance: 30
Crypto.BTC/USD:
PublisherStalledCheck:
enable: true
stall_time_limit: 60
stall_time_limit: 300 # This will override the global stall_time_limit for Crypto.BTC/USD
abandoned_time_limit: 600 # This will override the global abandoned_time_limit for Crypto.BTC/USD
max_slot_distance: 25

0 comments on commit d57a393

Please sign in to comment.