Skip to content

Commit

Permalink
Merge branch 'develop' into feature/privacy-settings-1
Browse files Browse the repository at this point in the history
  • Loading branch information
lukavdplas authored Oct 19, 2023
2 parents 983d28b + 82f557a commit eefc68e
Show file tree
Hide file tree
Showing 17 changed files with 211 additions and 48 deletions.
36 changes: 36 additions & 0 deletions .github/ISSUE_TEMPLATE/bug_report.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
---
name: Bug report
about: Let us know about something that isn't working right
title: ''
labels: bug
assignees: ''

---

### What went wrong?

Describe what happened.

### Expected behavior

What did you expect to happen?

### Screenshots

If applicable, please add a screenshot of the problem!

### Which version?

Please specify where you encountered the issue:

- [ ] https://ianalyzer.hum.uu.nl
- [ ] https://peopleandparliament.hum.uu.nl
- [ ] https://peace.sites.uu.nl/
- [ ] a server hosted elsewhere (i.e. not by the research software lab)
- [ ] a local server

If this happened on local or third-party server, it helps if you can be more specific about the version. Please include the version number (e.g. "3.2.4") or a commit hash if you know it!

### To reproduce

How can a developer replicate the issue? Please provide any information you can. For example: "I went to https://ianalyzer.hum.uu.nl/search/troonredes?date=1814-01-01:1972-01-01 and then clicked on *Download CSV*. I pressed *cancel* and then I clicked *Download CSV* again."
20 changes: 20 additions & 0 deletions .github/ISSUE_TEMPLATE/feature_request.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
---
name: Feature request
about: Suggest an idea for something new
title: ''
labels: enhancement
assignees: ''

---

**Is your feature request related to a problem? Please describe.**
A clear and concise description of what the problem is. Ex. I'm always frustrated when [...]

**Describe the solution you'd like**
A clear and concise description of what you want to happen.

**Describe alternatives you've considered**
A clear and concise description of any alternative solutions or features you've considered.

**Additional context**
Add any other context or screenshots about the feature request here.
8 changes: 1 addition & 7 deletions backend/addcorpus/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,13 +41,7 @@ class VisualizationType(Enum):
'visualize',
'visualizedField',
'normalize',
'size',
'positions',
'freqCompensation',
'analysis',
'maxDocuments',
'numberOfNgrams',
'dateField',
'ngramSettings'
]
'''
Field names that cannot be used because they are also query parameters in frontend routes.
Expand Down
8 changes: 7 additions & 1 deletion backend/addcorpus/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from addcorpus.validators import validate_language_code, validate_image_filename_extension, \
validate_markdown_filename_extension, validate_es_mapping, validate_mimetype, validate_search_filter, \
validate_name_is_not_a_route_parameter, validate_search_filter_with_mapping, validate_searchable_field_has_full_text_search, \
validate_visualizations_with_mapping, validate_implication
validate_visualizations_with_mapping, validate_implication, any_date_fields, visualisations_require_date_field

MAX_LENGTH_NAME = 126
MAX_LENGTH_DESCRIPTION = 254
Expand Down Expand Up @@ -269,3 +269,9 @@ def clean(self):
validate_implication(self.search_field_core, self.searchable, "Core search fields must be searchable")
except ValidationError as e:
warnings.warn(e.message)

validate_implication(
self.visualizations, self.corpus_configuration.fields.all(),
'The ngram visualisation requires a date field on the corpus',
visualisations_require_date_field, any_date_fields,
)
32 changes: 31 additions & 1 deletion backend/addcorpus/tests/test_validators.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import pytest
from addcorpus.es_mappings import int_mapping, text_mapping, keyword_mapping
from addcorpus.models import Field
from addcorpus.es_mappings import int_mapping, text_mapping, keyword_mapping, main_content_mapping, date_mapping
from addcorpus.validators import *

def test_validate_mimetype():
Expand Down Expand Up @@ -71,3 +72,32 @@ def test_filename_validation():
with pytest.raises(ValidationError):
validate_image_filename_extension('image.txt')

def test_validate_ngram_has_date_field():
text_field = Field(
name='content',
es_mapping=main_content_mapping(),
visualizations=['wordcloud', 'ngram']
)

date_field = Field(
name='date',
es_mapping=date_mapping()
)

with_date_field = [text_field, date_field]
without_date_field = [text_field]

validate_implication(
text_field.visualizations, with_date_field,
'',
visualisations_require_date_field,
any_date_fields
)

with pytest.raises(ValidationError):
validate_implication(
text_field.visualizations, without_date_field,
'',
visualisations_require_date_field,
any_date_fields
)
7 changes: 7 additions & 0 deletions backend/addcorpus/validators.py
Original file line number Diff line number Diff line change
Expand Up @@ -152,3 +152,10 @@ def validate_markdown_filename_extension(filename):
def validate_image_filename_extension(filename):
allowed = ['.jpeg', '.jpg', '.png', '.JPG']
validate_filename_extension(filename, allowed)

def any_date_fields(fields):
is_date = lambda field: primary_mapping_type(field.es_mapping) == 'date'
return any(map(is_date, fields))

def visualisations_require_date_field(visualisations):
return visualisations and 'ngram' in visualisations
2 changes: 1 addition & 1 deletion backend/corpora/dbnl/dbnl.py
Original file line number Diff line number Diff line change
Expand Up @@ -362,7 +362,7 @@ def _xml_files(self):
transform_soup_func=utils.pad_content,
),
es_mapping=main_content_mapping(token_counts=True),
visualizations=['wordcloud', 'ngram'],
visualizations=['wordcloud'],
)

has_content = FieldDefinition(
Expand Down
4 changes: 2 additions & 2 deletions backend/corpora/parliament/finland-old.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,8 @@ def sources(self, start, end):
yield csv_file, {}

languages = ['sv', 'fi']
description_page = 'finland.md'
image = 'finland.jpg'
description_page = 'finland-old.md'
image = 'finland-old.jpg'

document_context = document_context()

Expand Down
Binary file modified backend/corpora/parliament/images/finland-old.jpg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
2 changes: 1 addition & 1 deletion documentation/Defining-corpus-fields.md
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ The following properties determine how a field appears in the interface.

`search_filter` can be set if the interface should include a search filter widget for the field. I-analyzer includes date filters, multiplechoice filters (used for keyword data), range filters, and boolean filters. See [filters.py](../backend/addcorpus/filters.py).

`visualizations` optionally specifies a list of visualisations that apply for the field. Generally speaking, this is based on the type of data. For date fields and categorical/ordinal fields (usually keyword type), you can use `['resultscount', 'termfrequency']`. For text fields, you can use `['wordcloud', 'ngram']`.
`visualizations` optionally specifies a list of visualisations that apply for the field. Generally speaking, this is based on the type of data. For date fields and categorical/ordinal fields (usually keyword type), you can use `['resultscount', 'termfrequency']`. For text fields, you can use `['wordcloud', 'ngram']`. However, the ngram visualisation also requires that your corpus has a date field.

If a field includes the `'resultscount'` and/or `'termfrequency'` visualisations and it is not a date field, you can also specify `visualisation_sort`, which determines how to sort the x-axis of the graph. Default is `'value'`, where categories are sorted based on the y-axis value (i.e., frequency). You may specify that they should be sorted on `'key'`, so that categories are sorted alphabetically (for keywords) or small-to-large (for numbers).

Expand Down
3 changes: 3 additions & 0 deletions frontend/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,9 @@
FROM node:14-alpine

RUN apk update && apk add --no-cache --virtual .gyp python3 make g++
# Install Chrome
RUN apk add chromium
ENV CHROME_BIN='/usr/bin/chromium-browser'

# create directory frontend on container
WORKDIR /frontend
Expand Down
1 change: 1 addition & 0 deletions frontend/karma.conf.js
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ module.exports = function (config) {
// '--disable-gpu', this might not be needed http://cvuorinen.net/2017/05/running-angular-tests-in-headless-chrome/
// Without a remote debugging port, Google Chrome exits immediately.
'--remote-debugging-port=9222',
'--no-sandbox'
],
}
}
Expand Down
37 changes: 37 additions & 0 deletions frontend/src/app/models/visualization.spec.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
import { NgramParameters } from './visualization';

describe('NgramParameters', ()=> {
let ngramParameters: NgramParameters;

beforeEach(() => {
ngramParameters = new NgramParameters(
2,
'any',
false,
'none',
50,
10,
'date'
);
});

it('should convert itself to a param string', () => {
const paramString = ngramParameters.toRouteParam();
expect(paramString).toEqual(
's:2,p:any,c:false,a:none,m:50,n:10,f:date'
);
});

it('should set itself from a param string', () => {
ngramParameters.fromRouteParam(
's:3,p:first,c:true,a:none,m:50,n:20,f:date'
);
expect(ngramParameters.size).toEqual(3);
expect(ngramParameters.positions).toEqual('first');
expect(ngramParameters.freqCompensation).toEqual(true);
expect(ngramParameters.analysis).toEqual('none');
expect(ngramParameters.maxDocuments).toEqual(50);
expect(ngramParameters.numberOfNgrams).toEqual(20);
expect(ngramParameters.dateField).toEqual('date');
});
});
44 changes: 42 additions & 2 deletions frontend/src/app/models/visualization.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
import { AggregateResult, DateResult } from '.';
import { EsQuery, EsQuerySorted } from './elasticsearch';
import { QueryParameters } from './search-requests';

export interface TermFrequencyResult {
Expand Down Expand Up @@ -111,14 +110,55 @@ export type NGramRequestParameters = {
date_field: string;
} & QueryParameters;

export interface NgramParameters {
export class NgramParameters {
size: number;
positions: string;
freqCompensation: boolean;
analysis: string;
maxDocuments: number;
numberOfNgrams: number;
dateField: string;

ngramSettings: string [];

constructor(size: number,
positions: string,
freqCompensation: boolean,
analysis: string,
maxDocuments: number,
numberOfNgrams: number,
dateField: string
) {
this.size = size;
this.positions = positions;
this.freqCompensation = freqCompensation;
this.analysis = analysis;
this.maxDocuments = maxDocuments;
this.numberOfNgrams = numberOfNgrams;
this.dateField = dateField;
}

toRouteParam(): string {
return [`s:${this.size}`,`p:${this.positions}`,`c:${this.freqCompensation}`,
`a:${this.analysis}`,`m:${this.maxDocuments}`,`n:${this.numberOfNgrams}`,
`f:${this.dateField}`].join(',');
}

fromRouteParam(paramString: string) {
this.ngramSettings = paramString.split(',');
this.size = parseInt(this.findSetting('s'), 10);
this.positions = this.findSetting('p');
this.freqCompensation = this.findSetting('c') === 'true';
this.analysis = this.findSetting('a');
this.maxDocuments = parseInt(this.findSetting('m'), 10);
this.numberOfNgrams = parseInt(this.findSetting('n'), 10);
this.dateField = this.findSetting('f');
}

findSetting(abbreviation: string): string | undefined{
const setting = this.ngramSettings.find(s => s[0] === abbreviation);
return setting.split(':')[1];
}
}

export interface FieldCoverage {
Expand Down
8 changes: 0 additions & 8 deletions frontend/src/app/visualization/ngram/ngram.component.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -32,12 +32,4 @@ describe('NgramComponent', () => {
expect(component).toBeTruthy();
});

it('should set the currentParameters with the right type', () => {
const params = convertToParamMap({size: '5'});
component.setParameters(params);
expect(component.currentParameters.size).toEqual(5);
const newParams = convertToParamMap({size: '2'});
component.setParameters(newParams);
expect(component.currentParameters.size).toEqual(2);
});
});
Loading

0 comments on commit eefc68e

Please sign in to comment.