From 6c0d4671f7611afb78b008ae04f9a1080bde6627 Mon Sep 17 00:00:00 2001 From: Friedel Wolff Date: Fri, 2 Aug 2024 21:51:54 +0200 Subject: [PATCH] Rework filtering and searching for projects Two intertwined issues are fixed: - An empty search query would cause no projects to be returned. Now we only search if a query is given. - We weren't filtering projects at all. We're really going against the intention of django_filters with this one, but let's see... The test was simply wrong (and duplicated). --- app/general/filters.py | 46 +++++++++++++++++--------------- app/general/tests/test_filter.py | 10 ++----- 2 files changed, 26 insertions(+), 30 deletions(-) diff --git a/app/general/filters.py b/app/general/filters.py index e75f96bc..72fe7ef9 100644 --- a/app/general/filters.py +++ b/app/general/filters.py @@ -14,7 +14,7 @@ class DocumentFileFilter(django_filters.FilterSet): - search = django_filters.CharFilter(method="filter_search", label="Search") + search = django_filters.CharFilter(method="ignore", label="Search") institution = ModelMultipleChoiceFilter( queryset=Institution.objects.all(), widget=forms.CheckboxSelectMultiple @@ -37,7 +37,6 @@ class Meta: def filter_queryset(self, queryset): # More information about weighting and normalization in postgres: # https://www.postgresql.org/docs/current/textsearch-controls.html#TEXTSEARCH-RANKING - queryset = super().filter_queryset(queryset) search = self.form.cleaned_data.get("search", "").strip() query = SearchQuery(search) @@ -60,20 +59,18 @@ def filter_queryset(self, queryset): project_search_vector = SearchVector("name", weight="A") + SearchVector( "description", weight="B" ) - project_query = ( - Project.objects.annotate( - heading=F("name"), - view=Value("project_detail"), - logo_url=F("logo"), - associated_url=F("url"), - search_headline=SearchHeadline("description", query, max_words=15, min_words=10), - rank=SearchRank(project_search_vector, query, normalization=16), - search=project_search_vector, - ) - .filter(search=query) - .values(*fields) + project_query = Project.objects.annotate( + heading=F("name"), + view=Value("project_detail"), + logo_url=F("logo"), + associated_url=F("url"), + search_headline=SearchHeadline("description", query, max_words=15, min_words=10), + rank=SearchRank(project_search_vector, query, normalization=16), ) + queryset = super().filter_queryset(queryset) + project_query = super().filter_queryset(project_query) + # We limit the headline to limit the performance impact. On very large # documents, this slows things down if unconstrained. search_headline = SearchHeadline( @@ -87,13 +84,18 @@ def filter_queryset(self, queryset): associated_url=F("url"), rank=search_rank, search_headline=search_headline, - ).values(*fields) - return queryset.union(project_query, all=True).order_by("-rank") + ) + if search: + # An empty search on Project filters out everything. + queryset = queryset.filter(search_vector=query) + project_query = project_query.annotate(search=project_search_vector).filter( + search=query + ) - def filter_search(self, queryset, name, value): - if value: - query = SearchQuery(value.strip()) - return queryset.filter(search_vector=query) + queryset = queryset.values(*fields) + project_query = project_query.values(*fields) + return queryset.union(project_query, all=True).order_by("-rank") - else: - return queryset + def ignore(self, queryset, name, value): + # All fields are handled in `.filter_queryset()` + return queryset diff --git a/app/general/tests/test_filter.py b/app/general/tests/test_filter.py index 9eea3982..bc328769 100644 --- a/app/general/tests/test_filter.py +++ b/app/general/tests/test_filter.py @@ -50,14 +50,8 @@ def test_institution_filter(self): data = {"institution": [self.institution1.id]} filter = DocumentFileFilter(data=data) qs = filter.qs - self.assertEqual(len(qs), 1) - self.assertEqual(qs[0]["id"], self.doc1.id) - - def test_institution_filter(self): - data = {"institution": [self.institution1.id]} - filter = DocumentFileFilter(data=data) - qs = filter.qs - self.assertEqual(len(qs), 1) + self.assertEqual(len(qs), 2) + # TODO: ordering between documents and projects are not yet defined self.assertEqual(qs[0]["id"], self.doc1.id) def test_subjects_filter(self):