Add chart status and bug fixes(#5)

* test(notification): strict checks for required keys * test: add edge cases * fix(service): not creating intervalTask when creating service * test: a simple go test server to check if notification can be reached (need to replace with pytest mocker) * feat:add endpoint for statistics for UptimeRecord * test: UptimeRecord based on service ID * feat: chart data API * feat:support define interval for charting API * fix: keep sending notification after a service is recovered. Now limited sending to 3 times. * fix: return UTC time for chartAPI instead of local time * chores: ignore pycache * chores: add source of readme chart
AnsonDev42 · Apr 10, 2024 · 830f8a0 · 830f8a0
1 parent ce70540
commit 830f8a0
Show file tree

Hide file tree

Showing 14 changed files with 3,563 additions and 35 deletions.
diff --git a/.gitignore b/.gitignore
@@ -1 +1,2 @@
 /django_debug.log
+**/__pycache__/
diff --git a/apps/monitoring/statistics.py b/apps/monitoring/statistics.py
@@ -1,42 +1,115 @@
 from datetime import timedelta
 
+from django.db.models import Avg
+from django.utils import timezone
 from django.utils.timezone import now
 
 from apps.monitoring.models import UptimeRecord
 
-
 QUERY_TIME_RANGE_TYPE = {
     1: "Last 1 hour",
     3: "Last 3 hours",
     6: "Last 6 hours",
     24: "Last 24 hours",
     168: "Last 7 days",
     720: "Last 30 days",
+    -1: "All time",
 }
 
 
-def calculate_past(time_range=None):
+def calculate_past_summary(time_range=None):
     """
     Given an time range in HOUR?DATE, query all UptimeRecord and
     calculate uptime percentage and the average response time
 
-    :return: uptime_percentage and avg_response_time
+    :return:total_records, uptime_percentage and avg_response_time
     """
-
     uptime_percentage, avg_response_time = None, None
-    if not time_range or time_range.value not in QUERY_TIME_RANGE_TYPE.keys():
+    if (not time_range) or (time_range not in QUERY_TIME_RANGE_TYPE.keys()):
         return uptime_percentage and avg_response_time
-    time_delta = time_range.value
+    time_delta = time_range
     results = UptimeRecord.objects.filter(
         created_at__gte=now() - timedelta(hours=time_delta)
     )
-    # results = UptimeRecord.objects.filter(created_at? range)
-    sum_avg_time, sum_uptime = 0, 0
     total_records = results.count()
-    for record in results:
-        sum_avg_time += record.response_time
-        sum_uptime += 1 if record.status else 0
-    if total_records:
-        avg_response_time = sum_avg_time / total_records
-        uptime_percentage = (sum_uptime / total_records) * 100
-    return uptime_percentage, avg_response_time
+    up_records = results.filter(status=True).count()
+    average_response_time = (
+        results.filter(status=True).aggregate(Avg("response_time"))[
+            "response_time__avg"
+        ]
+        or 0
+    )
+
+    uptime_percentage = (up_records / total_records) * 100 if total_records else 0
+
+    return total_records, uptime_percentage, average_response_time
+
+
+def calculate_past_chart(time_range, split_interval):
+    """
+    Given a time range in HOUR, query all UptimeRecord and
+    calculate uptime_percentage and the average_response_time in the interval for chart,
+    the interval is calculated by showing 30 records in the chart. E.g. if the time_range is 720 hours,
+    the chart will show 30 records, each record represents 24 hours.
+    :return: a json contains a summary of uptime percentage and average response time, following 30 detailed records
+    where each record contains total_records, uptime_percentage, average_response_time, time_start and time_end
+
+    """
+
+    if (not time_range) or (time_range not in QUERY_TIME_RANGE_TYPE.keys()):
+        return KeyError("Invalid time range")
+    # iterate 30 intervals in the given time range
+    if split_interval < 1:
+        split_interval = 1
+    delta = timedelta(hours=time_range / split_interval)
+    start_time = now() - timedelta(hours=time_range)
+    total_records, total_up_records = 0, 0
+    all_results = []
+    total_avg_response_time = []
+
+    for _ in range(split_interval):
+        end_time = start_time + delta
+        results = UptimeRecord.objects.filter(
+            created_at__gte=start_time, created_at__lt=end_time
+        )
+        interval_total_records = results.count()
+        interval_up_records = results.filter(status=True).count()
+        average_response_time = (
+            results.filter(status=True).aggregate(Avg("response_time"))[
+                "response_time__avg"
+            ]
+            or 0
+        )
+        all_results.append(
+            {
+                "uptime_percentage": (interval_up_records / interval_total_records)
+                * 100
+                if interval_total_records
+                else 0,
+                "average_response_time": average_response_time,
+                "time_start": timezone.localtime(end_time).strftime("%b. %-d, %H:%M"),
+            }
+        )
+        total_records += interval_total_records
+        total_up_records += interval_up_records
+        total_avg_response_time.append(average_response_time)
+        start_time = end_time
+
+    total_avg_response_time = sum(total_avg_response_time) / len(
+        total_avg_response_time
+    )
+    uptime_percentage = (total_up_records / total_records) * 100 if total_records else 0
+
+    summary = {
+        "time_range": time_range,
+        "total_records": total_records,
+        "uptime_percentage": uptime_percentage,
+        "average_response_time": total_avg_response_time,
+        "time_start": timezone.localtime(now()).strftime("%b. %-d, %H:%M"),
+        "time_end": timezone.localtime(now()).strftime("%b. %-d, %H:%M"),
+    }
+    response = {
+        "summary": summary,
+        "data": all_results,
+    }
+    return response
diff --git a/apps/monitoring/tasks.py b/apps/monitoring/tasks.py
@@ -1,7 +1,7 @@
 from celery import shared_task
-from .models import Service, UptimeRecord
+from apps.monitoring.models import Service, UptimeRecord
 from apps.notification.models import NotificationChannel, NotificationLog
-from .utils import check_service_status
+from apps.monitoring.utils import check_service_status
 
 
 # logger = get_task_logger(__nae__)
@@ -29,20 +29,30 @@ def check_monitor_services_status(service_id=None):
         error_message=error_message,
         service=service,
     )
-    # breakpoint()
+    if not Service.objects.filter(id=service_id).exists():
+        return
     if not is_up:
         message = f"Service {service.name} is down."
-        channels = NotificationChannel.objects.all()  # Example: Notify all channels
+        channels = Service.objects.get(id=service_id).notification_channel.all()
         for channel in channels:
             was_success = channel.send_notification(service, message)
             NotificationLog.objects.create(
                 service=service, message=message, was_success=was_success
             )
     else:
+        # check the last three records are up or not, if all up, do not send notification
+        records = UptimeRecord.objects.filter(service=service).order_by("-check_at")[:3]
+        if len(records) == 3 and all(record.status for record in records):
+            return
         message = f"Service {service.name} is up."
         channels = NotificationChannel.objects.all()  # Example: Notify all channels
         for channel in channels:
             was_success = channel.send_notification(service, message)
             NotificationLog.objects.create(
                 service=service, message=message, was_success=was_success
             )
+
+
+if __name__ == "__main__":
+    check_monitor_services_status(service_id=1)
+    print("check_monitor_services_status()")
diff --git a/apps/monitoring/views.py b/apps/monitoring/views.py
@@ -1,12 +1,19 @@
 from rest_framework import viewsets
 from django_celery_beat.models import IntervalSchedule, PeriodicTask
+from rest_framework.decorators import action
+from rest_framework.response import Response
 
-from .models import UptimeRecord
-from .serializers import (
+from apps.monitoring.models import UptimeRecord
+from apps.monitoring.serializers import (
     IntervalScheduleSerializer,
     PeriodicTaskSerializer,
     UptimeRecordSerializer,
 )
+from apps.monitoring.statistics import (
+    QUERY_TIME_RANGE_TYPE,
+    calculate_past_summary,
+    calculate_past_chart,
+)
 
 
 class IntervalScheduleViewSet(viewsets.ModelViewSet):
@@ -22,3 +29,37 @@ class PeriodicTaskViewSet(viewsets.ModelViewSet):
 class UptimeRecordViewSet(viewsets.ModelViewSet):
     queryset = UptimeRecord.objects.all()
     serializer_class = UptimeRecordSerializer
+
+    @action(detail=False, methods=["get"])
+    def stats(self, request):
+        # service_id = request.query_params.get("service_id")
+        time_range = int(request.query_params.get("time_range", 1))
+
+        # Apply time_range if specified and valid
+        if time_range not in QUERY_TIME_RANGE_TYPE:
+            return Response({"error": "Invalid time range"}, status=400)
+        (
+            total_records,
+            uptime_percentage,
+            average_response_time,
+        ) = calculate_past_summary(time_range=time_range)
+
+        data = {
+            "total_records": total_records,
+            "uptime_percentage": uptime_percentage,
+            "average_response_time": average_response_time,
+        }
+
+        return Response(data)
+
+    @action(detail=False, methods=["get"])
+    def chart(self, request):
+        time_range = int(request.query_params.get("time_range", 1))
+        split_interval = int(request.query_params.get("split_interval", 6))
+        if time_range not in QUERY_TIME_RANGE_TYPE:
+            return Response({"error": "Invalid time range"}, status=400)
+
+        data = calculate_past_chart(
+            time_range=time_range, split_interval=split_interval
+        )
+        return Response(data)
diff --git a/apps/notification/serializers.py b/apps/notification/serializers.py
@@ -1,9 +1,15 @@
+from pydantic_core import ValidationError
 from rest_framework import serializers
 
 from apps.notification.models import NotificationChannel, NotificationType
+from apps.notification.notify_services.bark import Bark
+from apps.notification.notify_services.telegram import Telegram
 
 
 class NotificationChannelSerializer(serializers.HyperlinkedModelSerializer):
+    type = serializers.ChoiceField(choices=NotificationType.choices)
+    url = serializers.URLField(required=False)
+
     class Meta:
         model = NotificationChannel
         fields = (
@@ -13,4 +19,34 @@ class Meta:
             "type",
             "url",
         )  # Explicitly include 'id' and other fields you need
-        type = serializers.ChoiceField(choices=NotificationType.choices)
+
+    def validate(self, attrs):
+        details = attrs.get("details")
+        channel_type = attrs.get("type")
+        match channel_type:
+            case NotificationType.TELEGRAM:
+                pydantic_model = Telegram
+            case NotificationType.BARK:
+                pydantic_model = Bark
+            case _:
+                pydantic_model = None
+
+        if pydantic_model:
+            try:
+                # Validates the details using the Pydantic model
+                pydantic_model(**details)
+            except ValidationError as e:
+                raise serializers.ValidationError({"details": e.errors()})
+
+        return attrs
+
+    def create(self, validated_data):
+        channel = NotificationChannel.objects.create(**validated_data)
+        return channel
+
+    def update(self, instance, validated_data):
+        instance.name = validated_data.get("name", instance.name)
+        instance.details = validated_data.get("details", instance.details)
+        instance.type = validated_data.get("type", instance.type)
+        instance.save()
+        return instance
diff --git a/apps/service/serializers.py b/apps/service/serializers.py
@@ -49,6 +49,7 @@ class ServiceSerializer(serializers.HyperlinkedModelSerializer):
     class Meta:
         model = Service
         fields = (
+            "id",
             "name",
             "description",
             "monitoring_endpoint",
@@ -67,23 +68,31 @@ class Meta:
     def create(self, validated_data):
         periodic_task_data = validated_data.pop("periodic_task_data", None)
         notification_channels_data = validated_data.pop("notification_channel", [])
-
+        interval_data = periodic_task_data.pop("interval", None)
         service = Service.objects.create(**validated_data)
 
         if notification_channels_data:
             service.notification_channel.set(notification_channels_data)
 
-        if periodic_task_data:
-            periodic_task_data["kwargs"] = json.dumps(
-                {"service_id": service.id}
-            )  # Update this line based on the task argument structure
-            periodic_task_serializer = PeriodicTaskSerializer(data=periodic_task_data)
-            if periodic_task_serializer.is_valid(raise_exception=True):
-                periodic_task = periodic_task_serializer.save()
-                service.periodic_task = (
-                    periodic_task  # Bind the PeriodicTask to the Service
-                )
-                service.save()
+        if not periodic_task_data or not interval_data:
+            raise ValueError(
+                "Invalid or missing 'periodic_task_data' for Service creation."
+            )
+        # create interval schedule
+        interval_serializer = IntervalScheduleSerializer(data=interval_data)
+        if not interval_serializer.is_valid(raise_exception=True):
+            raise ValueError("Invalid 'interval' data for PeriodicTask creation.")
+        # overwrite periodic_task_data kwargs with service id
+        periodic_task_data["kwargs"] = json.dumps({"service_id": service.id})
+        # create periodic task
+        periodic_task_data["interval"] = interval_data
+        periodic_task_serializer = PeriodicTaskSerializer(data=periodic_task_data)
+        if periodic_task_serializer.is_valid(raise_exception=True):
+            periodic_task = periodic_task_serializer.save()
+            service.periodic_task = (
+                periodic_task  # Bind the PeriodicTask to the Service
+            )
+            service.save()
 
         return service