Skip to content
This repository has been archived by the owner on Nov 21, 2024. It is now read-only.

Commit

Permalink
search: minor fixes on elastic search implementation
Browse files Browse the repository at this point in the history
  • Loading branch information
DonHaul committed Jun 7, 2024
1 parent 3b0f7a8 commit 566ce2e
Show file tree
Hide file tree
Showing 21 changed files with 397 additions and 23 deletions.
4 changes: 4 additions & 0 deletions .envs/docker/.django
Original file line number Diff line number Diff line change
Expand Up @@ -13,3 +13,7 @@ CELERY_RESULT_BACKEND=redis://redis:6379/2
# Flower
CELERY_FLOWER_USER=debug
CELERY_FLOWER_PASSWORD=debug

#OpenSearch
OPENSEARCH_HOST=opensearch:9200
OPENSEARCH_INDEX_PREFIX=backoffice-backend-local
4 changes: 4 additions & 0 deletions .envs/local/.django
Original file line number Diff line number Diff line change
Expand Up @@ -13,3 +13,7 @@ CELERY_RESULT_BACKEND=redis://redis:6379/2
# Flower
CELERY_FLOWER_USER=debug
CELERY_FLOWER_PASSWORD=debug

# Opensearch
OPENSEARCH_HOST=opensearch:9200
OPENSEARCH_INDEX_PREFIX=backoffice-backend-local
10 changes: 10 additions & 0 deletions .github/workflows/integration-tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,15 @@ jobs:
test:
runs-on: ubuntu-latest
services:
opensearch:
image: registry.cern.ch/cern-sis/inspire/opensearch
env:
bootstrap.memory_lock: true
ES_JAVA_OPTS: -Xms1024m -Xmx1024m
discovery.type: single-node
DISABLE_SECURITY_PLUGIN: true
ports:
- 9200:9200
rabbitmq:
image: rabbitmq:3-management
ports:
Expand Down Expand Up @@ -54,5 +63,6 @@ jobs:
--env POSTGRES_USER=inspire
--env POSTGRES_PASSWORD=inspire
--env POSTGRES_HOST=127.0.0.1
--env OPENSEARCH_HOST=127.0.0.1:9200
${{ inputs.image }}
run pytest
2 changes: 1 addition & 1 deletion .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ on:
required: true
outputs:
image-id:
description: The ID of image that has been buit
description: The ID of image that has been built
value: ${{ jobs.build.outputs.image-id }}

jobs:
Expand Down
2 changes: 1 addition & 1 deletion .python-version
Original file line number Diff line number Diff line change
@@ -1 +1 @@
3.11
3.11.9
15 changes: 15 additions & 0 deletions backoffice/utils/pagination.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@

#from django_elasticsearch_dsl_drf.pagination import QueryFriendlyPageNumberPagination
from rest_framework.pagination import PageNumberPagination


class StandardResultsSetPagination(PageNumberPagination):
page_size = 10
page_size_query_param = "page_size"
max_page_size = 100


class OSStandardResultsSetPagination(PageNumberPagination):
page_size = 10
page_size_query_param = "page_size"
max_page_size = 100
7 changes: 7 additions & 0 deletions backoffice/workflows/api/serializers.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
from rest_framework import serializers
from django_elasticsearch_dsl_drf.serializers import DocumentSerializer

from backoffice.workflows.documents import WorkflowDocument
from backoffice.workflows.models import Workflow, WorkflowTicket


Expand All @@ -13,3 +15,8 @@ class WorkflowTicketSerializer(serializers.ModelSerializer):
class Meta:
model = WorkflowTicket
fields = "__all__"

class WorkflowDocumentSerializer(DocumentSerializer):
class Meta:
document = WorkflowDocument
fields = "__all__"
26 changes: 24 additions & 2 deletions backoffice/workflows/api/views.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
from django.shortcuts import get_object_or_404
from rest_framework import status, viewsets
from rest_framework.response import Response
from django_elasticsearch_dsl_drf.viewsets import BaseDocumentViewSet

from backoffice.workflows.models import Workflow, WorkflowTicket

from .serializers import WorkflowSerializer, WorkflowTicketSerializer
from backoffice.workflows.documents import WorkflowDocument
from backoffice.utils.pagination import OSStandardResultsSetPagination
from .serializers import WorkflowSerializer, WorkflowTicketSerializer, WorkflowDocumentSerializer


class WorkflowViewSet(viewsets.ModelViewSet):
Expand Down Expand Up @@ -65,3 +67,23 @@ def create(self, request, *args, **kwargs):
return Response(serializer.data, status=status.HTTP_201_CREATED)
except Exception as e:
return Response({"error": str(e)}, status=status.HTTP_500_INTERNAL_SERVER_ERROR)


class WorkflowDocumentView(BaseDocumentViewSet):
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self.search = self.search.extra(track_total_hits=True)

document = WorkflowDocument
serializer_class = WorkflowSerializer
pagination_class = OSStandardResultsSetPagination

search_fields = {
"workflow_type",
"status",
"is_update",
}
ordering = ["_updated_at"]

def get_serializer_class(self):
return WorkflowDocumentSerializer
29 changes: 29 additions & 0 deletions backoffice/workflows/documents.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
from django.conf import settings
from django_opensearch_dsl import Document, fields
from django_opensearch_dsl.registries import registry

from backoffice.workflows.models import Workflow


@registry.register_document
class WorkflowDocument(Document):
id = fields.TextField()
workflow_type = fields.KeywordField()
data = fields.ObjectField()
status = fields.KeywordField()
is_update = fields.BooleanField()

class Index:
name = settings.OPENSEARCH_INDEX_NAMES[__name__]
settings = {
"number_of_shards": 1,
"number_of_replicas": 1,
"max_result_window": 70000,
}

class Django:
model = Workflow
fields = [
"_created_at",
"_updated_at",
]
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
# Generated by Django 4.2.6 on 2024-06-03 06:25

from django.db import migrations, models
import django.utils.timezone


class Migration(migrations.Migration):
dependencies = [
("workflows", "0005_workflowticket_ticket_type_alter_workflow_status"),
]

operations = [
migrations.AddField(
model_name="workflow",
name="_created_at",
field=models.DateTimeField(auto_now_add=True, default=django.utils.timezone.now),
preserve_default=False,
),
migrations.AddField(
model_name="workflow",
name="_updated_at",
field=models.DateTimeField(auto_now=True),
),
]
3 changes: 3 additions & 0 deletions backoffice/workflows/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,9 @@ class Workflow(models.Model):
core = models.BooleanField()
is_update = models.BooleanField()

_created_at = models.DateTimeField(auto_now_add=True)
_updated_at = models.DateTimeField(auto_now=True)


class WorkflowTicket(models.Model):
workflow_id = models.ForeignKey(Workflow, on_delete=models.CASCADE)
Expand Down
3 changes: 2 additions & 1 deletion compose/local/django/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,8 @@ RUN curl -sSL https://install.python-poetry.org \
| python - --version "${POETRY_VERSION}" \
&& poetry --version

COPY poetry.lock pyproject.toml .
COPY poetry.lock ./poetry.lock
COPY pyproject.toml ./pyproject.toml
RUN poetry install --no-root

COPY . ${APP_HOME}
Expand Down
2 changes: 2 additions & 0 deletions compose/local/opensearch/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
FROM opensearchproject/opensearch:2.14.0
RUN bin/opensearch-plugin install analysis-icu
16 changes: 16 additions & 0 deletions config/search_router.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
from django.conf import settings
from rest_framework.routers import DefaultRouter, SimpleRouter

from backoffice.workflows.api.views import WorkflowDocumentView

if settings.DEBUG:
router = DefaultRouter()
else:
router = SimpleRouter()


# Workflow
router.register("workflow", WorkflowDocumentView, basename="workflow")

app_name = "search"
urlpatterns = router.urls
21 changes: 21 additions & 0 deletions config/settings/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import platform
from pathlib import Path

from opensearch_dsl import connections
import dj_database_url
import environ

Expand Down Expand Up @@ -98,6 +99,8 @@
"drf_spectacular",
"allauth.socialaccount.providers.orcid",
"django_prometheus",
"django_opensearch_dsl",
"django_elasticsearch_dsl_drf"
]

LOCAL_APPS = ["backoffice.users", "backoffice.workflows", "backoffice.management"]
Expand Down Expand Up @@ -341,6 +344,7 @@
),
"DEFAULT_PERMISSION_CLASSES": ("backoffice.management.permissions.IsAdminOrCuratorUser",),
"DEFAULT_SCHEMA_CLASS": "drf_spectacular.openapi.AutoSchema",
"DEFAULT_PAGINATION_CLASS": "backoffice.utils.pagination.StandardResultsSetPagination",
}

# django-cors-headers - https://github.com/adamchainz/django-cors-headers#setup
Expand All @@ -367,3 +371,20 @@
}
}
}




# Opensearch
# ------------------------------------------------------------------------------
# Name of the Opensearch index
OPENSEARCH_INDEX_NAMES = {
"backoffice.workflows.documents": f'{env("OPENSEARCH_INDEX_PREFIX")}-workflows',
}

OPENSEARCH_DSL = {
"default": {"hosts": env("OPENSEARCH_HOST")},
}

# Workaround because it wont add the connection settings automatically
connections.configure(default=OPENSEARCH_DSL["default"])
2 changes: 1 addition & 1 deletion config/settings/local.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
default="uBCAZjYhsVU3Zg8k96GM2c0GqgnTHyj0L3UhNQd4kQTktLyFztesAqb81jucXSMY",
)
# https://docs.djangoproject.com/en/dev/ref/settings/#allowed-hosts
ALLOWED_HOSTS = ["localhost", "0.0.0.0", "127.0.0.1"]
ALLOWED_HOSTS = ["localhost", "0.0.0.0", "127.0.0.1", "host.docker.internal"]

# CACHES
# ------------------------------------------------------------------------------
Expand Down
20 changes: 19 additions & 1 deletion config/settings/test.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@
"DJANGO_SECRET_KEY",
default="0GcuNOm9KXvazfJKLWYOoTSIBznjRNj3qfioFMKtHBow8Sv7hOmjkBbOBRZDGZy2",
)
ALLOWED_HOSTS = ["127.0.0.1"]

# https://docs.djangoproject.com/en/dev/ref/settings/#test-runner
TEST_RUNNER = "django.test.runner.DiscoverRunner"

Expand All @@ -33,5 +35,21 @@
# ------------------------------------------------------------------------------
# https://docs.djangoproject.com/en/dev/ref/settings/#media-url
MEDIA_URL = "http://media.testserver"
# Your stuff...
# Opensearch
# ------------------------------------------------------------------------------
# Name of the Opensearch index
OPENSEARCH_INDEX_NAMES = {
"backoffice.workflows.documents": "backoffice-backend-test-workflows",
}
# Force an index refresh with every save.
OPENSEARCH_DSL_AUTO_REFRESH = True

OPENSEARCH_DSL = {
"default": {
"hosts": [env("OPENSEARCH_HOST")],
"use_ssl": False,
"verify_certs": False,
"timeout": 30,
},
}

1 change: 1 addition & 0 deletions config/urls.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@
SpectacularSwaggerView.as_view(url_name="api-schema"),
name="api-docs",
),
path("api/search/", include("config.search_router")),
]

if settings.DEBUG:
Expand Down
29 changes: 29 additions & 0 deletions local.yml
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,35 @@ services:
networks:
- djangonetwork

opensearch:
container_name: backoffice_local_opensearch
build:
context: .
dockerfile: ./compose/local/opensearch/Dockerfile
restart: "always"
environment:
- cluster.name=opensearch-cluster
- node.name=opensearch-node1
- discovery.seed_hosts=opensearch-node1
- bootstrap.memory_lock=true
- discovery.type=single-node
- DISABLE_SECURITY_PLUGIN=true
- "OPENSEARCH_JAVA_OPTS=-Xms1024m -Xmx1024m"
ulimits:
memlock:
soft: -1
hard: -1
nofile:
soft: 65536
hard: 65536
mem_limit: 2g
ports:
- 9200:9200
- 9300:9300
- 9600:9600 # performance analysis
networks:
- djangonetwork

networks:
djangonetwork:
driver: bridge
Loading

0 comments on commit 566ce2e

Please sign in to comment.