Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

refactor of analyzers for local queries #2767

Open
wants to merge 1 commit into
base: develop
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
56 changes: 55 additions & 1 deletion api_app/analyzers_manager/classes.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,16 +10,18 @@

import requests
from django.conf import settings
from django.core.files.base import ContentFile

from certego_saas.apps.user.models import User
from tests.mock_utils import MockUpResponse, if_mock_connections, patch

from ..choices import Classification, PythonModuleBasePaths
from ..classes import Plugin
from ..helpers import calculate_sha256
from ..models import PythonConfig
from .constants import HashChoices, TypeChoices
from .exceptions import AnalyzerConfigurationException, AnalyzerRunException
from .models import AnalyzerConfig, AnalyzerReport
from .models import AnalyzerConfig, AnalyzerReport, AnalyzerSourceFile

logger = logging.getLogger(__name__)

Expand All @@ -38,6 +40,58 @@ class BaseAnalyzerMixin(Plugin, metaclass=ABCMeta):
SUSPICIOUS_EVALUATION = 35
FALSE_POSITIVE = -50

@classmethod
def update_support_model(cls, file_name):
pass

@classmethod
def update_source_file(cls, request_data: Dict, file_name) -> bool:
# check if file is updated
logger.info(
f"Source file update started with request data {request_data}, file name {file_name} and python module {cls.python_module}"
)
update = False
response = requests.get(**request_data)
response.raise_for_status()
cfile = ContentFile(response.content, name=file_name)
sha_res = calculate_sha256(response.content)
source_file = AnalyzerSourceFile.objects.filter(
file_name=file_name, python_module=cls.python_module
).first()
# check if source file exists
if source_file:
logger.info(f"Found source file {source_file}")
# check if source file needs to be updated
if source_file.sha256 != sha_res:
logger.info("About to update source file")
source_file.file.delete()
source_file.file = cfile
source_file.sha256 = sha_res
source_file.save()
update = True
else:
logger.info(
f"About to create new source file with file name {file_name} and python module {cls.python_module}"
)
AnalyzerSourceFile.objects.create(
file_name=file_name,
python_module=cls.python_module,
file=cfile,
sha256=sha_res,
)
update = True

return update

@classmethod
def update_internal_data(cls, request_data: Dict, file_name) -> bool:
update = cls.update_source_file(request_data, file_name)

if update:
cls.update_support_model(file_name)

return update

def threat_to_evaluation(self, threat_level):
# MAGIC NUMBERS HERE!!!
# I know, it should be 25-50-75-100. We raised it a bit because too many false positives were generated
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
# Generated by Django 4.2.17 on 2025-02-21 15:25

import django.db.models.deletion
import django.utils.timezone
from django.db import migrations, models


class Migration(migrations.Migration):

dependencies = [
("api_app", "0071_delete_last_elastic_report"),
("analyzers_manager", "0151_analyzer_config_ipquery"),
]

operations = [
migrations.CreateModel(
name="TorExitAddress",
fields=[
(
"id",
models.BigAutoField(
auto_created=True,
primary_key=True,
serialize=False,
verbose_name="ID",
),
),
("last_update", models.DateTimeField(auto_now=True)),
("ip", models.GenericIPAddressField(unique=True)),
],
options={
"abstract": False,
},
),
migrations.CreateModel(
name="TrancoRecord",
fields=[
(
"id",
models.BigAutoField(
auto_created=True,
primary_key=True,
serialize=False,
verbose_name="ID",
),
),
("last_update", models.DateTimeField(auto_now=True)),
("version", models.IntegerField(default=0)),
("rank", models.IntegerField()),
("domain", models.CharField(max_length=512)),
(
"retrieved_date",
models.DateTimeField(default=django.utils.timezone.now),
),
],
options={
"unique_together": {("rank", "domain", "retrieved_date")},
},
),
migrations.CreateModel(
name="FireHolRecord",
fields=[
(
"id",
models.BigAutoField(
auto_created=True,
primary_key=True,
serialize=False,
verbose_name="ID",
),
),
("last_update", models.DateTimeField(auto_now=True)),
("version", models.IntegerField(default=1)),
("file_date", models.DateTimeField()),
("source", models.CharField(max_length=300)),
("ip_start", models.GenericIPAddressField()),
("ip_end", models.GenericIPAddressField()),
("category", models.CharField(max_length=300)),
],
options={
"unique_together": {("source", "ip_start", "ip_end", "category")},
},
),
migrations.CreateModel(
name="AnalyzerSourceFile",
fields=[
(
"id",
models.BigAutoField(
auto_created=True,
primary_key=True,
serialize=False,
verbose_name="ID",
),
),
("file_name", models.CharField(max_length=512)),
("file", models.FileField(upload_to="analyzers_source_files")),
("sha256", models.CharField(max_length=64, unique=True)),
("last_update", models.DateTimeField(auto_now=True)),
(
"python_module",
models.ForeignKey(
on_delete=django.db.models.deletion.CASCADE,
related_name="source_files",
to="api_app.pythonmodule",
),
),
],
options={
"unique_together": {("file_name", "python_module")},
},
),
]
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
from django.db import migrations


def migrate(apps, schema_editor):
PythonModule = apps.get_model("api_app", "PythonModule")
Parameter = apps.get_model("api_app", "Parameter")
pm = PythonModule.objects.get(
module="firehol_iplist.FireHol_IPList",
base_path="api_app.analyzers_manager.observable_analyzers",
)
Parameter.objects.get(name="list_names", python_module=pm).delete()


def reverse_migrate(apps, schema_editor):
pass


class Migration(migrations.Migration):
dependencies = [
("api_app", "0071_delete_last_elastic_report"),
(
"analyzers_manager",
"0152_torexitaddress_trancorecord_fireholrecord_and_more",
),
]
operations = [
migrations.RunPython(migrate, reverse_migrate),
]
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
from django.db import migrations


def migrate(apps, schema_editor):
PythonModule = apps.get_model("api_app", "PythonModule")
CrontabSchedule = apps.get_model("django_celery_beat", "CrontabSchedule")

cron_firehol = CrontabSchedule.objects.get_or_create(minute=10, hour=18)[0]
cron_tranco = CrontabSchedule.objects.get_or_create(minute=0, hour=1)[0]
pm_firehol = PythonModule.objects.get(
module="firehol_iplist.FireHol_IPList",
base_path="api_app.analyzers_manager.observable_analyzers",
)
pm_tranco = PythonModule.objects.get(
module="tranco.Tranco",
base_path="api_app.analyzers_manager.observable_analyzers",
)
pm_firehol.update_schedule = cron_firehol
pm_tranco.update_schedule = cron_tranco
pm_firehol.save()
pm_tranco.save()


def reverse_migrate(apps, schema_editor):
pass


class Migration(migrations.Migration):
dependencies = [
("api_app", "0071_delete_last_elastic_report"),
("analyzers_manager", "0153_remove_firehol_iplist_list_name_parameter"),
]
operations = [
migrations.RunPython(migrate, reverse_migrate),
]
3 changes: 3 additions & 0 deletions api_app/analyzers_manager/models/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
# flake8: noqa
from .models import *
from .support_models import *
Original file line number Diff line number Diff line change
Expand Up @@ -347,3 +347,19 @@ def plugin_type(cls) -> str:
@property
def config_exception(cls):
return AnalyzerConfigurationException


class AnalyzerSourceFile(models.Model):
file_name = models.CharField(max_length=512)
python_module = models.ForeignKey(
PythonModule, related_name="source_files", on_delete=models.CASCADE
)
file = models.FileField(upload_to="analyzers_source_files")
sha256 = models.CharField(unique=True, max_length=64)
last_update = models.DateTimeField(auto_now=True)

class Meta:
unique_together = ("file_name", "python_module")

def __str__(self):
return f"file_name: {self.file_name}, sha256:{self.sha256}"
76 changes: 76 additions & 0 deletions api_app/analyzers_manager/models/support_models.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
from logging import getLogger

from django.db import models
from django.utils.timezone import now

logger = getLogger(__name__)


class SupportModel(models.Model):
last_update = models.DateTimeField(auto_now=True)

class Meta:
abstract = True

@classmethod
def generate(cls, data):
records = []
for i, record in enumerate(data):
records.append(cls(**record))
if i % 10000 == 0 and i != 0 and records:
cls.objects.bulk_create(records, ignore_conflicts=True)
records = []
if records:
cls.objects.bulk_create(records, ignore_conflicts=True)

@classmethod
def reset(cls):
cls.objects.all().delete()


class FireHolRecord(SupportModel):
version = models.IntegerField(default=1)
file_date = models.DateTimeField()
source = models.CharField(max_length=300)
ip_start = models.GenericIPAddressField()
ip_end = models.GenericIPAddressField()
category = models.CharField(max_length=300)

class Meta:
unique_together = ("source", "ip_start", "ip_end", "category")

@classmethod
def generate(cls, data):
records = []
for i, record in enumerate(data):
logger.debug(f"Record is: {record}")
records.append(cls(**record))
if i % 10000 == 0 and i != 0 and records:
cls.objects.bulk_create(
records,
update_conflicts=True,
update_fields=["file_date"],
unique_fields=["source", "ip_start", "ip_end", "category"],
)
records = []
if records:
cls.objects.bulk_create(
records,
update_conflicts=True,
update_fields=["file_date"],
unique_fields=["source", "ip_start", "ip_end", "category"],
)


class TorExitAddress(SupportModel):
ip = models.GenericIPAddressField(unique=True)


class TrancoRecord(SupportModel):
version = models.IntegerField(default=0)
rank = models.IntegerField()
domain = models.CharField(max_length=512)
retrieved_date = models.DateTimeField(default=now)

class Meta:
unique_together = ("rank", "domain", "retrieved_date")
Loading
Loading