Skip to content

Commit

Permalink
Merge branch 'develop' of github.com:UUDigitalHumanitieslab/I-analyze…
Browse files Browse the repository at this point in the history
…r into develop
  • Loading branch information
JeltevanBoheemen committed Oct 23, 2023
2 parents 844e192 + 26f331c commit e7cde60
Show file tree
Hide file tree
Showing 53 changed files with 797 additions and 128 deletions.
36 changes: 36 additions & 0 deletions .github/ISSUE_TEMPLATE/bug_report.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
---
name: Bug report
about: Let us know about something that isn't working right
title: ''
labels: bug
assignees: ''

---

### What went wrong?

Describe what happened.

### Expected behavior

What did you expect to happen?

### Screenshots

If applicable, please add a screenshot of the problem!

### Which version?

Please specify where you encountered the issue:

- [ ] https://ianalyzer.hum.uu.nl
- [ ] https://peopleandparliament.hum.uu.nl
- [ ] https://peace.sites.uu.nl/
- [ ] a server hosted elsewhere (i.e. not by the research software lab)
- [ ] a local server

If this happened on local or third-party server, it helps if you can be more specific about the version. Please include the version number (e.g. "3.2.4") or a commit hash if you know it!

### To reproduce

How can a developer replicate the issue? Please provide any information you can. For example: "I went to https://ianalyzer.hum.uu.nl/search/troonredes?date=1814-01-01:1972-01-01 and then clicked on *Download CSV*. I pressed *cancel* and then I clicked *Download CSV* again."
20 changes: 20 additions & 0 deletions .github/ISSUE_TEMPLATE/feature_request.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
---
name: Feature request
about: Suggest an idea for something new
title: ''
labels: enhancement
assignees: ''

---

**Is your feature request related to a problem? Please describe.**
A clear and concise description of what the problem is. Ex. I'm always frustrated when [...]

**Describe the solution you'd like**
A clear and concise description of what you want to happen.

**Describe alternatives you've considered**
A clear and concise description of any alternative solutions or features you've considered.

**Additional context**
Add any other context or screenshots about the feature request here.
41 changes: 29 additions & 12 deletions backend/api/tests/test_api_views.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,19 +2,11 @@
from addcorpus.models import Corpus
from rest_framework.status import is_success

def test_search_history_view(admin_user, admin_client):
corpus = Corpus.objects.create(name = 'mock-corpus')

# get search history
response = admin_client.get('/api/search_history/')
assert is_success(response.status_code)
assert len(response.data) == 0

# add a query to search history
data = {
def mock_query_data(user, corpus_name):
return {
'aborted': False,
'corpus': corpus.name,
'user': admin_user.id,
'corpus': corpus_name,
'user': user.id,
'started': datetime.now().isoformat(),
'completed': datetime.now().isoformat(),
'query_json': {
Expand All @@ -25,6 +17,17 @@ def test_search_history_view(admin_user, admin_client):
'total_results': 10,
'transferred': 0,
}

def test_search_history_view(admin_user, admin_client):
corpus = Corpus.objects.create(name = 'mock-corpus')

# get search history
response = admin_client.get('/api/search_history/')
assert is_success(response.status_code)
assert len(response.data) == 0

# add a query to search history
data = mock_query_data(admin_user, 'mock-corpus')
response = admin_client.post('/api/search_history/', data, content_type='application/json')
assert is_success(response.status_code)

Expand All @@ -34,6 +37,20 @@ def test_search_history_view(admin_user, admin_client):
assert len(response.data) == 1


def test_delete_search_history(auth_client, auth_user, db):
mock_corpus = 'mock-corpus'
corpus = Corpus.objects.create(name = mock_corpus)
query = mock_query_data(auth_user, mock_corpus)
auth_client.post('/api/search_history/', query, content_type='application/json')

assert len(auth_user.queries.all()) == 1

response = auth_client.post('/api/search_history/delete_all/')
assert is_success(response.status_code)

assert len(auth_user.queries.all()) == 0


def test_task_status_view(transactional_db, admin_client, celery_worker):
bad_request = {
'bad_key': 'data'
Expand Down
8 changes: 7 additions & 1 deletion backend/api/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@
from api.serializers import QuerySerializer
from rest_framework.permissions import IsAuthenticated
from rest_framework.exceptions import APIException
from rest_framework.decorators import action
import logging
from rest_framework.permissions import IsAuthenticated
from api.utils import check_json_keys
from celery import current_app as celery_app

Expand All @@ -23,6 +23,12 @@ class QueryViewset(viewsets.ModelViewSet):
def get_queryset(self):
return self.request.user.queries.all()

@action(detail=False, methods=['post'])
def delete_all(self, request):
queries = self.get_queryset()
queries.delete()
return Response('success')

class TaskStatusView(APIView):
'''
Get the status of an array of backend tasks (working/done/failed),
Expand Down
10 changes: 7 additions & 3 deletions backend/download/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,18 +50,22 @@ def index_ml_mock_corpus(es_client, ml_mock_corpus):
def index_mock_corpus(es_client, mock_corpus, index_small_mock_corpus, index_large_mock_corpus, index_ml_mock_corpus):
yield mock_corpus

def save_all_results_csv(mock_corpus, mock_corpus_specs):
def all_results_request_json(mock_corpus, mock_corpus_specs):
fields = mock_corpus_specs['fields']
query = mock_corpus_specs['example_query']

request_json = {
return {
'corpus': mock_corpus,
'es_query': MATCH_ALL,
'fields': fields,
'route': '/search/{};query={}'.format(mock_corpus, query)
}

def save_all_results_csv(mock_corpus, mock_corpus_specs):
request_json = all_results_request_json(mock_corpus, mock_corpus_specs)
results = tasks.download_scroll(request_json)
filename = tasks.make_csv(results, request_json)
fake_id = mock_corpus + '_all_results'
filename = tasks.make_csv(results, request_json, fake_id)

return filename

Expand Down
21 changes: 6 additions & 15 deletions backend/download/create_csv.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,12 +21,10 @@ def write_file(filename, fieldnames, rows, dialect = 'excel'):

return filepath

def create_filename(descriptive_part, essential_suffix = '.csv'):
max_length = 255 - (len(essential_suffix) + len(settings.CSV_FILES_PATH))
truncated = descriptive_part[:min(max_length, len(descriptive_part))]
return truncated + essential_suffix
def create_filename(download_id):
return f'{download_id}.csv'

def search_results_csv(results, fields, query):
def search_results_csv(results, fields, query, download_id):
entries = []
field_set = set(fields)
field_set.update(['query'])
Expand All @@ -50,14 +48,14 @@ def search_results_csv(results, fields, query):
entry.update({highlight_field_name: soup.get_text()})
entries.append(entry)

filename = create_filename(query)
filename = create_filename(download_id)
field_set.discard('context')
fieldnames = sorted(field_set)
filepath = write_file(filename, fieldnames, entries, dialect = 'resultsDialect')
return filepath


def term_frequency_csv(queries, results, field_name, unit = None):
def term_frequency_csv(queries, results, field_name, download_id, unit = None):
has_token_counts = results[0].get('token_count', None) != None
query_column = ['Query'] if len(queries) > 1 else []
freq_columns = ['Term frequency', 'Relative term frequency (by # documents)', 'Total documents']
Expand All @@ -66,17 +64,10 @@ def term_frequency_csv(queries, results, field_name, unit = None):

rows = term_frequency_csv_rows(queries, results, field_name, unit)

filename = term_frequency_filename(queries, field_name)
filename = create_filename(download_id)
filepath = write_file(filename, fieldnames, rows)
return filepath

def term_frequency_filename(queries, field_name):
querystring = '_'.join(queries)
timestamp = datetime.now().isoformat(sep='_', timespec='minutes') # ensure csv filenames are unique with timestamp
suffix = '_' + timestamp + '.csv'
description = 'term_frequency_{}_{}'.format(field_name, querystring)
return create_filename(description, suffix)

def term_frequency_csv_rows(queries, results, field_name, unit):
for result in results:
field_value = format_field_value(result['key'], unit)
Expand Down
4 changes: 2 additions & 2 deletions backend/download/mail.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,8 @@ def send_csv_email(user_email, username, download_id):

subject = 'I-Analyzer CSV download'
from_email = settings.DEFAULT_FROM_EMAIL
path = Download.objects.get(id=download_id).filename
_, filename = os.path.split(path)
download = Download.objects.get(id=download_id)
filename = download.descriptive_filename()

context = {
'email_title': 'Download CSV',
Expand Down
7 changes: 7 additions & 0 deletions backend/download/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,3 +51,10 @@ def complete(self, filename = None):
self.filename = filename
self.completed = datetime.now()
self.save()

def descriptive_filename(self):
corpus_name = self.corpus.name
type_name = self.download_type
timestamp = self.completed.strftime('%Y-%m-%d %H:%M')

return f'{type_name}__{corpus_name}__{timestamp}.csv'
12 changes: 6 additions & 6 deletions backend/download/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,9 +37,9 @@ def download_scroll(request_json, download_size=10000):
return results

@shared_task()
def make_csv(results, request_json):
def make_csv(results, request_json, log_id):
query = create_query(request_json)
filepath = create_csv.search_results_csv(results, request_json['fields'], query)
filepath = create_csv.search_results_csv(results, request_json['fields'], query, log_id)
return filepath


Expand Down Expand Up @@ -82,20 +82,20 @@ def download_search_results(request_json, user):

make_chain = lambda: chain(
download_scroll.s(request_json, download_limit),
make_csv.s(request_json),
make_csv.s(request_json, download.id),
complete_download.s(download.id),
csv_data_email.s(user.email, user.username),
).on_error(complete_failed_download.s(download.id))

return try_download(make_chain, download)

@shared_task()
def make_term_frequency_csv(results_per_series, parameters_per_series):
def make_term_frequency_csv(results_per_series, parameters_per_series, log_id):
'''
Export term frequency results to a csv.
'''
query_per_series, field_name, unit = extract_term_frequency_download_metadata(parameters_per_series)
return create_csv.term_frequency_csv(query_per_series, results_per_series, field_name, unit = unit)
return create_csv.term_frequency_csv(query_per_series, results_per_series, field_name, log_id, unit = unit)


def term_frequency_full_data_tasks(parameters_per_series, visualization_type):
Expand Down Expand Up @@ -166,7 +166,7 @@ def download_full_data(request_json, user):

make_chain = lambda : chain(
task,
make_term_frequency_csv.s(parameters),
make_term_frequency_csv.s(parameters, download.id),
complete_download.s(download.id),
csv_data_email.s(user.email, user.username),
).on_error(complete_failed_download.s(download.id))
Expand Down
4 changes: 2 additions & 2 deletions backend/download/tests/test_csv_results.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@
def result_csv_with_highlights(csv_directory):
route = 'parliament-netherlands_query=test'
fields = ['speech']
file = create_csv.search_results_csv(hits(mock_es_result), fields, route)
file = create_csv.search_results_csv(hits(mock_es_result), fields, route, 0)
return file

def test_create_csv(result_csv_with_highlights):
Expand Down Expand Up @@ -190,7 +190,7 @@ def test_csv_encoding(ml_mock_corpus_results_csv):

@pytest.fixture()
def term_frequency_file(index_small_mock_corpus, csv_directory):
filename = create_csv.term_frequency_csv(mock_queries, mock_timeline_result, 'date', unit = 'year')
filename = create_csv.term_frequency_csv(mock_queries, mock_timeline_result, 'date', 0, unit = 'year')
return filename


Expand Down
16 changes: 11 additions & 5 deletions backend/download/tests/test_file_storage.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,13 @@
import os
from download import tasks
from download.conftest import all_results_request_json
from download.models import Download

def test_format_route_to_filename():
route = '/search/mock-corpus;query=test'
request_json = { 'route': route }
output = tasks.create_query(request_json)
assert output == 'mock-corpus_query=test'
def test_download_filename(auth_user, small_mock_corpus, index_small_mock_corpus, small_mock_corpus_specs):
request = all_results_request_json(small_mock_corpus, small_mock_corpus_specs)
tasks.download_search_results(request, auth_user).apply()
download = Download.objects.latest('completed')
_, filename = os.path.split(download.filename)
name, ext = os.path.splitext(filename)
assert name == str(download.id)
assert ext == '.csv'
3 changes: 2 additions & 1 deletion backend/download/tests/test_full_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,8 @@ def test_timeline_full_data(small_mock_corpus, index_small_mock_corpus, small_mo

group = tasks.term_frequency_full_data_tasks(full_data_parameters, 'date_term_frequency')
results = group.apply().get()
filename = tasks.make_term_frequency_csv(results, full_data_parameters)
log_id = 0 # fake ID
filename = tasks.make_term_frequency_csv(results, full_data_parameters, log_id)

with open(filename) as f:
reader = csv.DictReader(f)
Expand Down
23 changes: 12 additions & 11 deletions backend/download/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,14 +21,15 @@

logger = logging.getLogger()

def send_csv_file(directory, filename, download_type, encoding, format=None):
def send_csv_file(download, directory, encoding, format=None):
'''
Perform final formatting and send a CSV file as a FileResponse
'''
converted_filename = convert_csv.convert_csv(
directory, filename, download_type, encoding, format)
directory, download.filename, download.download_type, encoding, format)
path = os.path.join(directory, converted_filename)
return FileResponse(open(path, 'rb'), filename=filename, as_attachment=True)

return FileResponse(open(path, 'rb'), filename=download.descriptive_filename(), as_attachment=True)

class ResultsDownloadView(APIView):
'''
Expand All @@ -51,13 +52,13 @@ def post(self, request, *args, **kwargs):
handle_tags_in_request(request)
search_results = es_download.normal_search(
corpus_name, request.data['es_query'], request.data['size'])
csv_path = tasks.make_csv(search_results, request.data)
directory, filename = os.path.split(csv_path)
# Create download for download history
download = Download.objects.create(
download_type='search_results', corpus=corpus, parameters=request.data, user=request.user)
csv_path = tasks.make_csv(search_results, request.data, download.id)
directory, filename = os.path.split(csv_path)
# Create download for download history
download.complete(filename=filename)
return send_csv_file(directory, filename, 'search_results', request.data['encoding'])
return send_csv_file(download, directory, request.data['encoding'])
except Exception as e:
logger.error(e)
raise APIException(detail = 'Download failed: could not generate csv file')
Expand Down Expand Up @@ -138,13 +139,13 @@ def get(self, request, *args, **kwargs):
encoding = request.query_params.get('encoding', 'utf-8')
format = request.query_params.get('table_format', None)

record = Download.objects.get(id=id)
if not record.user == request.user:
download = Download.objects.get(id=id)
if not download.user == request.user:
raise PermissionDenied(detail='User has no access to this download')

directory = settings.CSV_FILES_PATH

if not os.path.isfile(os.path.join(directory, record.filename)):
if not os.path.isfile(os.path.join(directory, download.filename)):
raise NotFound(detail='File does not exist')

return send_csv_file(directory, record.filename, record.download_type, encoding, format)
return send_csv_file(download, directory, encoding, format)
Loading

0 comments on commit e7cde60

Please sign in to comment.