Skip to content

Commit

Permalink
Merge pull request #108 from SADiLaR/bug/search-bugs
Browse files Browse the repository at this point in the history
Fix search bugs
  • Loading branch information
friedelwolff authored Aug 2, 2024
2 parents 3a16eab + 6c0d467 commit 0d52ebd
Show file tree
Hide file tree
Showing 3 changed files with 26 additions and 72 deletions.
50 changes: 24 additions & 26 deletions app/general/filters.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,14 +14,11 @@


class DocumentFileFilter(django_filters.FilterSet):
search = django_filters.CharFilter(method="filter_search", label="Search")
search = django_filters.CharFilter(method="ignore", label="Search")

institution = ModelMultipleChoiceFilter(
queryset=Institution.objects.all(), widget=forms.CheckboxSelectMultiple
)
document_type = MultipleChoiceFilter(
choices=DocumentFile.document_type_choices, widget=forms.CheckboxSelectMultiple
)
subjects = ModelMultipleChoiceFilter(
queryset=Subject.objects.all(), widget=forms.CheckboxSelectMultiple
)
Expand All @@ -32,7 +29,6 @@ class DocumentFileFilter(django_filters.FilterSet):
class Meta:
model = DocumentFile
fields = [
"document_type",
"institution",
"subjects",
"languages",
Expand All @@ -41,7 +37,6 @@ class Meta:
def filter_queryset(self, queryset):
# More information about weighting and normalization in postgres:
# https://www.postgresql.org/docs/current/textsearch-controls.html#TEXTSEARCH-RANKING
queryset = super().filter_queryset(queryset)

search = self.form.cleaned_data.get("search", "").strip()
query = SearchQuery(search)
Expand All @@ -64,20 +59,18 @@ def filter_queryset(self, queryset):
project_search_vector = SearchVector("name", weight="A") + SearchVector(
"description", weight="B"
)
project_query = (
Project.objects.annotate(
heading=F("name"),
view=Value("project_detail"),
logo_url=F("logo"),
associated_url=F("url"),
search_headline=SearchHeadline("description", query, max_words=15, min_words=10),
rank=SearchRank(project_search_vector, query, normalization=16),
search=project_search_vector,
)
.filter(search=query)
.values(*fields)
project_query = Project.objects.annotate(
heading=F("name"),
view=Value("project_detail"),
logo_url=F("logo"),
associated_url=F("url"),
search_headline=SearchHeadline("description", query, max_words=15, min_words=10),
rank=SearchRank(project_search_vector, query, normalization=16),
)

queryset = super().filter_queryset(queryset)
project_query = super().filter_queryset(project_query)

# We limit the headline to limit the performance impact. On very large
# documents, this slows things down if unconstrained.
search_headline = SearchHeadline(
Expand All @@ -91,13 +84,18 @@ def filter_queryset(self, queryset):
associated_url=F("url"),
rank=search_rank,
search_headline=search_headline,
).values(*fields)
return queryset.union(project_query, all=True).order_by("-rank")
)
if search:
# An empty search on Project filters out everything.
queryset = queryset.filter(search_vector=query)
project_query = project_query.annotate(search=project_search_vector).filter(
search=query
)

def filter_search(self, queryset, name, value):
if value:
query = SearchQuery(value.strip())
return queryset.filter(search_vector=query)
queryset = queryset.values(*fields)
project_query = project_query.values(*fields)
return queryset.union(project_query, all=True).order_by("-rank")

else:
return queryset
def ignore(self, queryset, name, value):
# All fields are handled in `.filter_queryset()`
return queryset
18 changes: 2 additions & 16 deletions app/general/tests/test_filter.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,22 +50,9 @@ def test_institution_filter(self):
data = {"institution": [self.institution1.id]}
filter = DocumentFileFilter(data=data)
qs = filter.qs
self.assertEqual(len(qs), 1)
self.assertEqual(qs[0]["id"], self.doc1.id)

def test_institution_filter(self):
data = {"institution": [self.institution1.id]}
filter = DocumentFileFilter(data=data)
qs = filter.qs
self.assertEqual(len(qs), 1)
self.assertEqual(qs[0]["id"], self.doc1.id)

def test_document_type_filter(self):
data = {"document_type": ["glossary"]}
filter = DocumentFileFilter(data=data)
qs = filter.qs
self.assertEqual(len(qs), 2)
self.assertCountEqual([qs[0]["id"], qs[1]["id"]], [self.doc1.id, self.doc2.id])
# TODO: ordering between documents and projects are not yet defined
self.assertEqual(qs[0]["id"], self.doc1.id)

def test_subjects_filter(self):
data = {"subjects": [self.subject1.id]}
Expand All @@ -84,7 +71,6 @@ def test_languages_filter(self):
def test_combined_filters(self):
data = {
"institution": [self.institution1.id],
"document_type": ["glossary"],
"subjects": [self.subject1.id],
"languages": [self.language1.id],
}
Expand Down
30 changes: 0 additions & 30 deletions app/templates/app/search.html
Original file line number Diff line number Diff line change
Expand Up @@ -32,22 +32,6 @@
</div>


<div class="form-group">
<label class="mt-3 mb-2 filter-label">{% trans "Document Type" %}</label>
<div class="checkbox-container">
{% for checkbox in filter.form.document_type %}
<div>
<label for="{{ checkbox.id_for_label }}">
{{ checkbox.tag }}
{{ checkbox.choice_label }}
</label>
<br>
</div>
{% endfor %}
</div>
</div>


<div class="form-group">
<label class="mt-3 mb-2 filter-label">{% trans "Subjects" %}</label>
<div class="checkbox-container">
Expand Down Expand Up @@ -208,20 +192,6 @@ <h3>
</div>
{% endfor %}
</div>
</div>
<div class="form-group">
<label class="mt-3 mb-2 filter-label">{% trans "Document Type" %}</label>
<div class="checkbox-container">
{% for checkbox in filter.form.document_type %}
<div>
<label for="{{ checkbox.id_for_label }}">
{{ checkbox.tag }}
{{ checkbox.choice_label }}
</label>
<br>
</div>
{% endfor %}
</div>
</div>
<div class="form-group">
<label class="mt-3 mb-2 filter-label">{% trans "Subjects" %}</label>
Expand Down

0 comments on commit 0d52ebd

Please sign in to comment.