Skip to content

Commit

Permalink
Merge branch 'develop' into feature/default-sort
Browse files Browse the repository at this point in the history
  • Loading branch information
lukavdplas authored Mar 12, 2024
2 parents c44f58b + e7045ac commit 68c199a
Show file tree
Hide file tree
Showing 29 changed files with 814 additions and 4 deletions.
38 changes: 38 additions & 0 deletions backend/addcorpus/citation.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
import os
from django.template import Template, Context
from datetime import date

from django.conf import settings

from addcorpus.models import CorpusConfiguration
from addcorpus.load_corpus import corpus_dir


def render_citation(corpus_name):
raw = citation_template(corpus_name)
return render_citation_context(raw)


def citation_template(corpus_name):
conf = CorpusConfiguration.objects.get(corpus__name=corpus_name)
page = conf.citation_page

if page:
path = os.path.join(corpus_dir(corpus_name), 'citation', page)
with open(path) as f:
content = f.read()
return content


def render_citation_context(raw_template):
template = Template(raw_template)
today = date.today()
context = Context({
'frontend_url': settings.BASE_URL,
'date': {
'year': today.year,
'month': today.strftime('%B'),
'day': today.day
}
})
return template.render(context)
21 changes: 21 additions & 0 deletions backend/addcorpus/json_corpora/validate.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
import os
import json
from jsonschema import validate as validate_schema

here = os.path.dirname(os.path.abspath(__file__))
schemas_dir = os.path.join(here, '../schemas')

def corpus_schema():
path = os.path.join(schemas_dir, 'corpus.schema.json')
with open(path) as f:
return json.load(f)

def validate(instance):
'''
Validate a JSON corpus instance
Currently, this just checks that it conforms to corpus.schema.json
'''

schema = corpus_schema()
validate_schema(instance, schema)
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
# Generated by Django 4.2.7 on 2024-03-05 15:30

import addcorpus.validators
from django.db import migrations, models


class Migration(migrations.Migration):

dependencies = [
('addcorpus', '0008_alter_field_display_type_geo'),
]

operations = [
migrations.AddField(
model_name='corpusconfiguration',
name='citation_page',
field=models.CharField(blank=True, help_text='filename of the citation specification (in markdown) for this corpus', max_length=128, validators=[addcorpus.validators.validate_markdown_filename_extension]),
),
]
6 changes: 6 additions & 0 deletions backend/addcorpus/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,12 @@ class CorpusConfiguration(models.Model):
validators=[validate_markdown_filename_extension],
help_text='filename of the markdown documentation file for this corpus',
)
citation_page = models.CharField(
max_length=128,
blank=True,
validators=[validate_markdown_filename_extension],
help_text='filename of the citation specification (in markdown) for this corpus',
)
description = models.CharField(
max_length=MAX_LENGTH_DESCRIPTION,
blank=True,
Expand Down
1 change: 1 addition & 0 deletions backend/addcorpus/save_corpus.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ def _copy_corpus_attributes(corpus_definition: CorpusDefinition, configuration:
'allow_image_download',
'category',
'description_page',
'citation_page',
'document_context',
'es_alias',
'es_index',
Expand Down
226 changes: 226 additions & 0 deletions backend/addcorpus/schemas/corpus.schema.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,226 @@
{
"$schema": "https://json-schema.org/draft/2020-12/schema",
"$id": "https://github.com/UUDigitalHumanitieslab/I-analyzer/blob/develop/backend/addcorpus/schemas/corpus.schema.json",
"title": "Corpus",
"description": "A corpus on I-analyzer",
"type": "object",
"properties": {
"name": {
"type": "string",
"description": "Internal name. Used in the URL and the database."
},
"meta": {
"type": "object",
"description": "Metadata about the corpus",
"properties": {
"title": {
"type": "string",
"description": "Human-friendly name"
},
"description": {
"type": "string",
"description": "Longer description"
},
"languages": {
"type": "array",
"description": "IETF tags of languages used in the content",
"items": {
"type": "string"
},
"uniqueItems": true,
"minItems": 1
},
"category": {
"type": "string",
"description": "nature of the content",
"enum": [
"parliament",
"periodical",
"finance",
"ruling",
"review",
"inscription",
"oration",
"book",
"informative"
]
},
"date_range": {
"type": "object",
"description": "the date range of the content",
"properties": {
"min": {
"type": "string",
"format": "date"
},
"max": {
"type": "string",
"format": "date"
}
}
}
},
"required": [
"title",
"description",
"languages",
"category",
"date_range"
]
},
"source_data": {
"type": "object",
"properties": {
"type": {
"type": "string",
"description": "data type of the source files",
"enum": ["csv"]
},
"options": {
"type": "object",
"description": "additional options for source files",
"properties": {
"delimiter": {
"type": "string",
"description": "delimiter between values in the source files",
"enum": [",", ";", "\t"]
}
}
}
}
},
"fields": {
"type": "array",
"description": "list of fields",
"items": {
"type": "object",
"description": "A field in a corpus",
"properties": {
"name": {
"type": "string",
"description": "internal name"
},
"display_name": {
"type": "string",
"description": "human-friendly name"
},
"description": {
"type": "string",
"description": "longer description for users"
},
"type": {
"type": "string",
"enum": [
"text_content",
"text_metadata",
"url",
"integer",
"float",
"date",
"boolean",
"geo_json"
]
},
"options": {
"type": "object",
"properties": {
"search": {
"type": "boolean",
"description": "whether the field supports full-text search"
},
"filter": {
"type": "string",
"description": "search filter for the field",
"enum": ["show", "hide", "none"]
},
"preview": {
"type": "boolean",
"description": "whether the field is included in the preview of a document"
},
"visualize": {
"type": "boolean",
"description": "whether the field is visualised"
},
"sort": {
"type": "boolean",
"description": "whether search results can be sorted on this field"
},
"hidden": {
"type": "boolean",
"description": "whether the field is hidden from the interface"
}
},
"required": ["search", "filter", "preview", "visualize", "sort", "hidden"]
},
"language": {
"type": "string",
"description": "language of the field's content. Either an IETF tag, or \"dynamic\"."
},
"extract": {
"type": "object",
"description": "how to extract this field's value from source files",
"properties": {
"column": {
"type": "string",
"description": "name of the column in the CSV file"
}
},
"required": ["column"]
}
},
"required": ["name", "display_name", "type", "options", "extract"]
}
},
"options": {
"type": "object",
"properties": {
"default_sort": {
"description": "default sort settings for search results",
"$ref": "#sortSetting"
},
"language_field": {
"type": "string",
"description": "name of the field that contains the IETF tag of the document's content"
},
"document_context": {
"type": "object",
"description": "description of how documents can be grouped",
"properties": {
"context_field": {
"type": "string",
"description": "name of the field to group by"
},
"display_name": {
"type": "string",
"description": "display name of a group, ,e.g. 'book'"
},
"sort": {
"description": "when showing document context, sort them like this",
"$ref": "#sortSetting"
}
},
"required": ["context_field", "display_name"]
}
}
}
},
"required": ["name", "meta", "source_data", "fields"],
"$defs": {
"sortSetting": {
"$anchor": "sortSetting",
"type": "object",
"description": "Describes how to sort search results",
"properties": {
"field": {
"type": "string",
"description": "name of on which to sort"
},
"ascending": {
"type": "boolean",
"description": "whether the sort direction is ascending or descending"
}
},
"required": ["field", "ascending"]
}
}
}
1 change: 1 addition & 0 deletions backend/addcorpus/serializers.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,7 @@ class Meta:
'allow_image_download',
'category',
'description_page',
'citation_page',
'description',
'document_context',
'es_alias',
Expand Down
7 changes: 7 additions & 0 deletions backend/addcorpus/tests/citation/citation.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
## APA

> Centre for Digital Humanities, Utrecht University (2024). *Example corpus* [data set]. URL: {{ frontend_url }}
## MLA

> Centre for Digital Humanities, Utrecht University. *Example corpus*, {{ frontend_url }}. Accessed {{ date.day }} {{ date.month }} {{ date.year }}.
2 changes: 2 additions & 0 deletions backend/addcorpus/tests/mock_csv_corpus.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@ class MockCSVCorpus(CSVCorpusDefinition):
max_date = datetime.datetime(year=2022, month=12, day=31)
image = 'nothing.jpeg'
data_directory = os.path.join(here, 'csv_example')
citation_page = 'citation.md'

field_entry = 'character'

languages = ['en']
Expand Down
21 changes: 21 additions & 0 deletions backend/addcorpus/tests/test_citation.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
from addcorpus.citation import render_citation
from datetime import date
from unittest.mock import patch

expected = '''## APA
> Centre for Digital Humanities, Utrecht University (2024). *Example corpus* [data set]. URL: http://localhost:4200
## MLA
> Centre for Digital Humanities, Utrecht University. *Example corpus*, http://localhost:4200. Accessed 1 January 2024.
'''

def test_citation_page(mock_corpus):
# monkeypatch.setattr(date, 'today', lambda : date(2024, 1, 1))

with patch('addcorpus.citation.date') as mock_date:
mock_date.today.return_value = date(2024, 1, 1)

result = render_citation(mock_corpus)
assert result == expected
5 changes: 5 additions & 0 deletions backend/addcorpus/tests/test_corpus_views.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,11 @@ def test_corpus_documentation_view(admin_client, mock_corpus):
response = admin_client.get(f'/api/corpus/documentation/{mock_corpus}/mock-csv-corpus.md')
assert response.status_code == 200

def test_corpus_citation_view(admin_client, mock_corpus):
response = admin_client.get(f'/api/corpus/citation/{mock_corpus}')
assert response.status_code == 200


def test_nonexistent_corpus(admin_client):
response = admin_client.get(f'/api/corpus/documentation/unknown-corpus/mock-csv-corpus.md')
assert response.status_code == 404
Expand Down
1 change: 1 addition & 0 deletions backend/addcorpus/urls.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,5 +5,6 @@
path('', CorpusView.as_view()),
path('image/<str:corpus>/<str:filename>', CorpusImageView.as_view()),
path('documentation/<str:corpus>/<str:filename>', CorpusDocumentationView.as_view()),
path('citation/<str:corpus>', CorpusCitationView.as_view()),
path('document/<str:corpus>/<str:filename>', CorpusDocumentView.as_view())
]
Loading

0 comments on commit 68c199a

Please sign in to comment.