Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Record downtime in ac-down attribute #348

Merged
merged 10 commits into from
Aug 30, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions changelog.d/332.added.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Add support for tracking event downtime for Reachability and Portstate events.
23 changes: 23 additions & 0 deletions src/zino/events.py
Original file line number Diff line number Diff line change
Expand Up @@ -149,6 +149,7 @@ def commit(self, event: Event, user: str = "monitor"):
event.id = self.get_next_available_event_id()
else:
old_event = self.events[event.id]
self.record_downtime(event, old_event)
index = EventIndex(event.router, event.subindex, type(event))
self.events[event.id] = event

Expand Down Expand Up @@ -204,6 +205,28 @@ def _call_observers_for(self, new_event: Event, old_event: Optional[Event] = Non
for observer in self._observers:
observer(new_event=new_event, old_event=old_event)

def record_downtime(self, new_event: Event, old_event: Optional[Event] = None):
timestamp = now()
# Assume not initally down if this is a completely new event
is_initially_down = old_event.is_down() if old_event else False
# Entering up state
if is_initially_down and not new_event.is_down():
if not new_event.lasttrans:
_log.debug(f"Event {new_event.id} transitioned from down to up with no lasttrans value")
return

downtime = timestamp - new_event.lasttrans

# bogus, ignore
if downtime <= timedelta(0):
return

new_event.ac_down = (new_event.ac_down or timedelta(0)) + downtime
new_event.lasttrans = timestamp
# Entering down state
elif not is_initially_down and new_event.is_down():
new_event.lasttrans = timestamp


class EventExistsError(Exception):
pass
21 changes: 21 additions & 0 deletions src/zino/statemodels.py
Original file line number Diff line number Diff line change
Expand Up @@ -337,6 +337,13 @@ def dump_event_to_file(self, dir_name: str):
with open(filename, "w") as statefile:
statefile.write(self.model_dump_json(exclude_none=True, indent=2))

def is_down(self) -> bool:
"""Returns true if the event is in a down state.
What is considered a "down state" depends on the type of event,
but generally it means something like a device or a port is down.
"""
return False


class FlapState(StrEnum):
FLAPPING = "flapping"
Expand All @@ -356,6 +363,13 @@ class PortStateEvent(Event):
def subindex(self) -> SubIndex:
return self.ifindex

def is_down(self) -> bool:
"""Returns true if the event is in a down state.
A PortStateEvent is considered "down" if the port it is related to is
either down or in a flapping state.
"""
return self.portstate in [InterfaceState.DOWN, InterfaceState.FLAPPING]


class BGPEvent(Event):
type: Literal["bgp"] = "bgp"
Expand Down Expand Up @@ -388,6 +402,13 @@ class ReachabilityEvent(Event):
type: Literal["reachability"] = "reachability"
reachability: Optional[ReachabilityState] = None

def is_down(self) -> bool:
"""Returns true if the event is in a down state.
A ReachabilityEvent is considered "down" if the device it is related to
is not reachable.
"""
return self.reachability == ReachabilityState.NORESPONSE


class AlarmEvent(Event):
type: Literal["alarm"] = "alarm"
Expand Down
4 changes: 3 additions & 1 deletion src/zino/tasks/reachabletask.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,8 +50,10 @@ async def _run_extra_job(self):
def _update_reachability_event_as_reachable(self):
event = self.state.events.get(self.device.name, None, ReachabilityEvent)
if event and event.reachability != ReachabilityState.REACHABLE:
event.reachability = ReachabilityState.REACHABLE
event = self.state.events.checkout(event.id)
event.add_log(f"{self.device.name} reachable")
event.reachability = ReachabilityState.REACHABLE
self.state.events.commit(event)
lunkwill42 marked this conversation as resolved.
Show resolved Hide resolved

def _schedule_extra_job(self):
name = self._get_extra_job_name()
Expand Down
40 changes: 39 additions & 1 deletion tests/events_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
import pytest

from zino.events import EventExistsError, EventIndex, Events
from zino.statemodels import Event, EventState, ReachabilityEvent
from zino.statemodels import Event, EventState, ReachabilityEvent, ReachabilityState
from zino.time import now


Expand Down Expand Up @@ -255,3 +255,41 @@ def test_delete_should_remove_closed_event_from_index_if_still_in_index(self):

index = EventIndex("foobar", None, ReachabilityEvent)
assert not events._events_by_index.get(index)

def test_when_lasttrans_is_not_set_record_downtime_should_not_update_event(self):
events = Events()
old_event = events.get_or_create_event("foobar", None, ReachabilityEvent)
old_event.reachability = ReachabilityState.NORESPONSE
events.commit(old_event)

new_event = events.checkout(old_event.id)
new_event.reachability = ReachabilityState.REACHABLE
new_event.lasttrans = None
new_event.ac_down = None

events.record_downtime(new_event, old_event)

assert new_event.lasttrans is None
assert new_event.ac_down is None

def test_when_downtime_is_calculated_to_zero_or_less_record_downtime_should_not_update_event(self, monkeypatch):
events = Events()
old_event = events.get_or_create_event("foobar", None, ReachabilityEvent)
old_event.reachability = ReachabilityState.NORESPONSE
events.commit(old_event)

# Make now() return same value as lasttrans so record_downtime
# calculates a timedelta of 0
lasttrans = now()
mocked_now = Mock(return_value=lasttrans)
monkeypatch.setattr("zino.events.now", mocked_now)

new_event = events.checkout(old_event.id)
new_event.reachability = ReachabilityState.REACHABLE
new_event.lasttrans = lasttrans
new_event.ac_down = None

events.record_downtime(new_event, old_event)

assert new_event.lasttrans == lasttrans
assert new_event.ac_down is None
90 changes: 90 additions & 0 deletions tests/snmp_fixtures/linksadmindown.snmprec
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
1.3.6.1.2.1.1.1.0|4x|50726f4375727665204a34393030422053776974636820323632362c207265766973696f6e20482e30382e39382c20524f4d20482e30382e303220282f73772f636f64652f6275696c642f666973682874735f30385f352929
1.3.6.1.2.1.1.2.0|6|1.3.6.1.4.1.11.2.3.7.11.45
1.3.6.1.2.1.1.3.0|67|3307498428
1.3.6.1.2.1.1.4.0|4x|45696e6172204c696c6c6562727967666a656c64
1.3.6.1.2.1.1.5.0|4|buick
1.3.6.1.2.1.1.6.0|4x|4c43266262656e
1.3.6.1.2.1.1.7.0|2|74
1.3.6.1.2.1.2.2.1.1.1|2|1
1.3.6.1.2.1.2.2.1.1.2|2|2
1.3.6.1.2.1.2.2.1.2.1|4|1
1.3.6.1.2.1.2.2.1.2.2|4|2
1.3.6.1.2.1.2.2.1.3.1|2|6
1.3.6.1.2.1.2.2.1.3.2|2|6
1.3.6.1.2.1.2.2.1.4.1|2|1514
1.3.6.1.2.1.2.2.1.4.2|2|1514
1.3.6.1.2.1.2.2.1.5.1|66|10000000
1.3.6.1.2.1.2.2.1.5.2|66|10000000
1.3.6.1.2.1.2.2.1.6.1|4x|0019bb8f4b7f
1.3.6.1.2.1.2.2.1.6.2|4x|0019bb8f4b7e
1.3.6.1.2.1.2.2.1.7.1|2|2
1.3.6.1.2.1.2.2.1.7.2|2|2
1.3.6.1.2.1.2.2.1.8.1|2|1
1.3.6.1.2.1.2.2.1.8.2|2|1
1.3.6.1.2.1.2.2.1.9.1|67|1737882440
1.3.6.1.2.1.2.2.1.9.2|67|1806268995
1.3.6.1.2.1.2.2.1.10.1|65|0
1.3.6.1.2.1.2.2.1.10.2|65|0
1.3.6.1.2.1.2.2.1.11.1|65|0
1.3.6.1.2.1.2.2.1.11.2|65|0
1.3.6.1.2.1.2.2.1.12.1|65|0
1.3.6.1.2.1.2.2.1.12.2|65|0
1.3.6.1.2.1.2.2.1.13.1|65|0
1.3.6.1.2.1.2.2.1.13.2|65|0
1.3.6.1.2.1.2.2.1.14.1|65|0
1.3.6.1.2.1.2.2.1.14.2|65|0
1.3.6.1.2.1.2.2.1.15.1|65|0
1.3.6.1.2.1.2.2.1.15.2|65|0
1.3.6.1.2.1.2.2.1.16.1|65|0
1.3.6.1.2.1.2.2.1.16.2|65|0
1.3.6.1.2.1.2.2.1.17.1|65|0
1.3.6.1.2.1.2.2.1.17.2|65|0
1.3.6.1.2.1.2.2.1.18.1|65|0
1.3.6.1.2.1.2.2.1.18.2|65|0
1.3.6.1.2.1.2.2.1.19.1|65|0
1.3.6.1.2.1.2.2.1.19.2|65|0
1.3.6.1.2.1.2.2.1.20.1|65|0
1.3.6.1.2.1.2.2.1.20.2|65|0
1.3.6.1.2.1.2.2.1.21.1|66|0
1.3.6.1.2.1.2.2.1.21.2|66|0
1.3.6.1.2.1.2.2.1.22.1|6|1.3.6.1.2.1.10.7
1.3.6.1.2.1.2.2.1.22.2|6|1.3.6.1.2.1.10.7
1.3.6.1.2.1.11.30.0|2|2
1.3.6.1.2.1.31.1.1.1.1.1|4|1
1.3.6.1.2.1.31.1.1.1.1.2|4|2
1.3.6.1.2.1.31.1.1.1.2.1|65|0
1.3.6.1.2.1.31.1.1.1.2.2|65|0
1.3.6.1.2.1.31.1.1.1.3.1|65|0
1.3.6.1.2.1.31.1.1.1.3.2|65|0
1.3.6.1.2.1.31.1.1.1.4.1|65|0
1.3.6.1.2.1.31.1.1.1.4.2|65|0
1.3.6.1.2.1.31.1.1.1.5.1|65|0
1.3.6.1.2.1.31.1.1.1.5.2|65|0
1.3.6.1.2.1.31.1.1.1.6.1|70|0
1.3.6.1.2.1.31.1.1.1.6.2|70|0
1.3.6.1.2.1.31.1.1.1.7.1|70|0
1.3.6.1.2.1.31.1.1.1.7.2|70|0
1.3.6.1.2.1.31.1.1.1.8.1|70|0
1.3.6.1.2.1.31.1.1.1.8.2|70|0
1.3.6.1.2.1.31.1.1.1.9.1|70|0
1.3.6.1.2.1.31.1.1.1.9.2|70|0
1.3.6.1.2.1.31.1.1.1.10.1|70|0
1.3.6.1.2.1.31.1.1.1.10.2|70|0
1.3.6.1.2.1.31.1.1.1.11.1|70|0
1.3.6.1.2.1.31.1.1.1.11.2|70|0
1.3.6.1.2.1.31.1.1.1.12.1|70|0
1.3.6.1.2.1.31.1.1.1.12.2|70|0
1.3.6.1.2.1.31.1.1.1.13.1|70|0
1.3.6.1.2.1.31.1.1.1.13.2|70|0
1.3.6.1.2.1.31.1.1.1.14.1|2|1
1.3.6.1.2.1.31.1.1.1.14.2|2|1
1.3.6.1.2.1.31.1.1.1.15.1|66|10
1.3.6.1.2.1.31.1.1.1.15.2|66|10
1.3.6.1.2.1.31.1.1.1.16.1|2|1
1.3.6.1.2.1.31.1.1.1.16.2|2|1
1.3.6.1.2.1.31.1.1.1.17.1|2|1
1.3.6.1.2.1.31.1.1.1.17.2|2|1
1.3.6.1.2.1.31.1.1.1.18.1|4x|4120706f656d
1.3.6.1.2.1.31.1.1.1.18.2|4x|66726f6d20612066616d6f7573
1.3.6.1.2.1.31.1.1.1.19.1|67|1737882440
1.3.6.1.2.1.31.1.1.1.19.2|67|1806268995
105 changes: 104 additions & 1 deletion tests/tasks/test_linkstatetask.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,19 @@
from datetime import timedelta
from unittest.mock import patch

import pytest

from zino.config.models import PollDevice
from zino.oid import OID
from zino.state import ZinoState
from zino.statemodels import InterfaceState, Port
from zino.statemodels import InterfaceState, Port, PortStateEvent
from zino.tasks.linkstatetask import (
BaseInterfaceRow,
CollectedInterfaceDataIsNotSaneError,
LinkStateTask,
MissingInterfaceTableData,
)
from zino.time import now


class TestLinkStateTask:
Expand Down Expand Up @@ -77,6 +79,97 @@ def test_when_interface_data_is_empty_update_interfaces_should_keep_processing(s
is None
)

@pytest.mark.asyncio
async def test_when_event_is_new_it_should_set_lasttrans(self, linkstatetask_with_one_link_down):
task = linkstatetask_with_one_link_down
await task.run()
event = task.state.events.get(task.device.name, 2, PortStateEvent)
assert event.lasttrans

@pytest.mark.asyncio
async def test_when_event_transitions_from_to_up_it_should_update_lasttrans(self, linkstatetask_with_links_up):
task = linkstatetask_with_links_up
initial_lasttrans = now() - timedelta(minutes=5)

device = task.device_state
device.ports.update({1: Port(ifindex=1, ifdescr="1", ifalias="from a famous", state=InterfaceState.DOWN)})

event = task.state.events.create_event(task.device.name, 1, PortStateEvent)
event.ifindex = 1
event.portstate = InterfaceState.DOWN
event.lasttrans = initial_lasttrans
task.state.events.commit(event)

assert (await task.run()) is None
updated_event = task.state.events[event.id]
assert updated_event.portstate == InterfaceState.UP
assert updated_event.lasttrans > initial_lasttrans

@pytest.mark.asyncio
async def test_when_event_transitions_from_down_to_admindown_it_should_update_lasttrans(
self, linkstatetask_with_admin_down
):
task = linkstatetask_with_admin_down
initial_lasttrans = now() - timedelta(minutes=5)

device = task.device_state
device.ports.update({1: Port(ifindex=1, ifdescr="1", ifalias="from a famous", state=InterfaceState.DOWN)})

event = task.state.events.create_event(task.device.name, 1, PortStateEvent)
event.ifindex = 1
event.portstate = InterfaceState.DOWN
event.lasttrans = initial_lasttrans
task.state.events.commit(event)

assert (await task.run()) is None
updated_event = task.state.events[event.id]
assert updated_event.portstate == InterfaceState.ADMIN_DOWN
assert updated_event.lasttrans > initial_lasttrans

@pytest.mark.asyncio
async def test_when_event_transitions_from_down_to_up_it_should_update_ac_down(self, linkstatetask_with_links_up):
task = linkstatetask_with_links_up
initial_lasttrans = now() - timedelta(minutes=5)
initial_ac_down = timedelta(0)

device = task.device_state
device.ports.update({1: Port(ifindex=1, ifdescr="1", ifalias="from a famous", state=InterfaceState.DOWN)})

event = task.state.events.create_event(task.device.name, 1, PortStateEvent)
event.ifindex = 1
event.portstate = InterfaceState.DOWN
event.ac_down = initial_ac_down
event.lasttrans = initial_lasttrans
task.state.events.commit(event)

assert (await task.run()) is None
updated_event = task.state.events[event.id]
assert updated_event.portstate == InterfaceState.UP
assert updated_event.ac_down > initial_ac_down

@pytest.mark.asyncio
async def test_when_event_transitions_from_down_to_admindown_it_should_update_ac_down(
self, linkstatetask_with_admin_down
):
task = linkstatetask_with_admin_down
initial_lasttrans = now() - timedelta(minutes=5)
initial_ac_down = timedelta(0)

device = task.device_state
device.ports.update({1: Port(ifindex=1, ifdescr="1", ifalias="alias", state=InterfaceState.DOWN)})

event = task.state.events.create_event(task.device.name, 1, PortStateEvent)
event.ifindex = 1
event.portstate = InterfaceState.DOWN
event.ac_down = initial_ac_down
event.lasttrans = initial_lasttrans
task.state.events.commit(event)

assert (await task.run()) is None
updated_event = task.state.events[event.id]
assert updated_event.portstate == InterfaceState.ADMIN_DOWN
assert updated_event.ac_down > initial_ac_down


class TestBaseInterfaceRow:
def test_when_index_is_missing_is_sane_should_return_false(self):
Expand Down Expand Up @@ -131,6 +224,16 @@ def linkstatetask_with_one_link_down(snmpsim, snmp_test_port):
yield task


@pytest.fixture
def linkstatetask_with_admin_down(snmpsim, snmp_test_port):
device = PollDevice(
name="buick.lab.example.org", address="127.0.0.1", port=snmp_test_port, community="linksadmindown"
)
state = ZinoState()
task = LinkStateTask(device, state)
yield task


@pytest.fixture
def task_with_dummy_device():
device = PollDevice(name="test", address="127.0.0.1")
Expand Down
Loading
Loading