Skip to content

Commit

Permalink
Merge branch 'develop'
Browse files Browse the repository at this point in the history
  • Loading branch information
malakhovks committed Jul 8, 2019
2 parents ac9c4fc + d19d4f1 commit fd8fde1
Show file tree
Hide file tree
Showing 40 changed files with 191 additions and 101 deletions.
2 changes: 1 addition & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
FROM python:2.7-slim

LABEL maintainer "Kyrylo Malakhov <[email protected]> and Vitalii Velychko <[email protected]>"
LABEL description "ken (konspekt English) is a natural language processing API service for contextual and semantic analysis with document taxonomy building feature (python:2.7-slim + Nginx + uWSGI + Flask)"
LABEL description "KEn (konspekt English) is a natural language processing API service for contextual and semantic analysis with document taxonomy building feature (python:2.7-slim + Nginx + uWSGI + Flask)"

COPY . /srv/ken
WORKDIR /srv/ken
Expand Down
160 changes: 142 additions & 18 deletions README.md

Large diffs are not rendered by default.

15 changes: 1 addition & 14 deletions ken.py
Original file line number Diff line number Diff line change
Expand Up @@ -548,7 +548,7 @@ def get_terms_list():
if len(doc_for_tokens) == 2:

'''
# extract one-word terms from 2-words statements (excluding articles DET)
# Extract one-word terms from 2-words statements (excluding articles DET)
'''
if doc_for_tokens[0].pos_ in ['DET', 'PUNCT']:

Expand Down Expand Up @@ -666,7 +666,6 @@ def get_terms_list():


# If two-word term not exists in two_word_terms_help_list
# if chunk.lower_ not in two_word_terms_help_list:
if chunk.lemma_ not in two_word_terms_help_list:

# update two_word_terms_help_list with the new two-word term
Expand Down Expand Up @@ -1015,19 +1014,7 @@ def get_ner():
# ------------------------------------------------------------------------------------------------------
TODO - in pdf
TODO 2 files, comparable
TODO exception handling in a good way
TODO Languagetool in a separate container for spelling correction
TODO in production on Windows
Done in production on Linux with uWSGI, Nginx, Docker
Done Handling NER in terms
# ------------------------------------------------------------------------------------------------------
"""

Expand Down
115 changes: 47 additions & 68 deletions static/javascripts/ken-recap.js
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ $newProjectAndClearAll.click(function () {
});

$(document).ready(function () {
// Load last recapped file data
if (localStorage.getItem("recapForLastFile")) {
console.log("Load last recapped file data");
resJSON = JSON.parse(localStorage.getItem("recapForLastFile"));
Expand All @@ -65,6 +66,15 @@ $(document).ready(function () {
text: element.tname
}));
}

// add text from last recapped file to textarea id="sents_from_text"
// Clear textarea id="sents_from_text"
$sents_from_text.text('');
// add to textarea id="sents_from_text"
for (let sent_element of resJSON.termsintext.sentences.sent) {
$sents_from_text.append(sent_element + '\n\n')
}

}

if (localStorage.getItem("projectFiles")) {
Expand All @@ -79,7 +89,6 @@ $(document).ready(function () {

$("#displacy").hide();
$("#displacy-ner").hide();
// $("#displacy-label").center();

iziToast.info({
title: 'Вітаємо!',
Expand Down Expand Up @@ -288,7 +297,7 @@ function fetchFileToRecapService() {
$upload_button.css('display', 'none');
$('.tabs').css('display', 'block');

// Очистка списка терминов, поля textArea и input choose file
// Clear terms list,textArea, input choose file
$recapOverviewButton.val("");
$('option', $uploadResultList).remove();
$('option', $uploadUnknownTerms).remove();
Expand All @@ -301,7 +310,7 @@ function fetchFileToRecapService() {
method: 'post',
body: form
})
.then(function (response) {
.then(response => {

if (response.status == 503) {
$("body").css("cursor", "default");
Expand All @@ -311,20 +320,16 @@ function fetchFileToRecapService() {
message: 'Статус: ' + response.status,
position: 'bottomLeft'
});
alert('Сервіс зайнятий, спробуйте ще раз.' + '\n' + 'Статус: ' + response.status);
return;
}
// return response.json().then(function (result) {
return response.text().then(function (result) {

return response.text().then(result => {

dom = new DOMParser().parseFromString(result, "text/xml");
resJSON = xmlToJson(dom);
// console.log(JSON.stringify(resJSON));
// console.log(JSON.stringify(Object.values(resJSON.termsintext.sentences.sent)))

// add to local storage recap of the last uploaded file
localStorage["recapForLastFile"] = JSON.stringify(resJSON);
// add to local storage recap of the last uploaded file

// add to local storage recap of this file for #projectFileList
localStorage[uploadFileName.split('\\').pop()] = JSON.stringify(resJSON);
Expand All @@ -337,30 +342,28 @@ function fetchFileToRecapService() {
}));
}

// Clear textarea id="sents_from_text"
$sents_from_text.text('');
// add to textarea id="sents_from_text"
for (let sent_element of resJSON.termsintext.sentences.sent) {
$sents_from_text.append(sent_element + '\n\n')
}
// hide progress bar
// $("body").css("cursor", "default");
// $(".loader").hide();
});
})
// fetch to parce.xml for NER
.then(function (next) {
.then(next => {
return fetch('/ken/api/v1.0/en/file/parcexml', {
method: 'post',
body: form
})
.then(function (response) {
return response.text().then(function (result) {
.then(response => {
return response.text().then(result => {

dom = new DOMParser().parseFromString(result, "text/xml");
resParceJSON = xmlToJson(dom);

for (let sentElement of resParceJSON.text.sentence) {

// console.log(JSON.stringify(sentElement));

if (sentElement.hasOwnProperty('ner')) {
if (Array.isArray(sentElement.ner.entity)) {
for (let entityElement of sentElement.ner.entity) {
Expand All @@ -382,15 +385,15 @@ function fetchFileToRecapService() {
})
})
// fetch to /ken/api/v1.0/en/html/ner for NER
.then(function (next) {
.then(next => {

sentencesData = JSON.stringify(Object.values(resJSON.termsintext.sentences.sent));
console.log(sentencesData);

return fetch('/ken/api/v1.0/en/html/ner', {
method: 'post',
body: sentencesData
})
.then(function (response) {
.then(response => {
return response.text().then(function (result) {
// htmlWithNER = new DOMParser().parseFromString(result, "text/html");
annotation = '<center><p><a target="_blank" href="https://spacy.io/api/annotation#named-entities">Named Entity Recognition annotations</a></p></center>'
Expand All @@ -406,7 +409,7 @@ function fetchFileToRecapService() {
});
})
})
.catch(function (error) {
.catch(error => {
$("body").css("cursor", "default");
$(".loader").hide();
iziToast.warning({
Expand Down Expand Up @@ -435,8 +438,12 @@ function forUploadResultListClickAndEnterPressEvents() {

// inserting sentences with selected terms in textArea #textContent
if (Array.isArray(resJSON.termsintext.exporterms.term[valOfSelectedElementInUploadResultList].sentpos)) {
let sentIndex = [];
for (let elementForUploadResultListDbClickAndEnterPress of resJSON.termsintext.exporterms.term[valOfSelectedElementInUploadResultList].sentpos) {
$textContent.append('\n' + resJSON.termsintext.sentences.sent[elementForUploadResultListDbClickAndEnterPress.substring(0, elementForUploadResultListDbClickAndEnterPress.indexOf("/")) - 1] + '\n');
if (!sentIndex.includes(parseInt(elementForUploadResultListDbClickAndEnterPress.substring(0, elementForUploadResultListDbClickAndEnterPress.indexOf("/")) - 1))) {
$textContent.append('\n' + resJSON.termsintext.sentences.sent[elementForUploadResultListDbClickAndEnterPress.substring(0, elementForUploadResultListDbClickAndEnterPress.indexOf("/")) - 1] + '\n');
sentIndex.push(parseInt(elementForUploadResultListDbClickAndEnterPress.substring(0, elementForUploadResultListDbClickAndEnterPress.indexOf("/")) - 1));
}
}
}

Expand Down Expand Up @@ -511,26 +518,6 @@ function forUploadResultListClickAndEnterPressEvents() {
}
$textContent.highlightWithinTextarea(onInput);

/* function multiSearchOr(text, searchWord) {
var regex = RegExp('\\b(\\w*' + searchWord + '\\w*)\\b', 'ig');
let m;
let foundWords = [];
while ((m = regex.exec(text)) !== null) {
// This is necessary to avoid infinite loops with zero-width matches
if (m.index === regex.lastIndex) {
regex.lastIndex++;
}
// The result can be accessed through the `m`-variable.
m.forEach((match, groupIndex) => {
// console.log(match);
foundWords.push(match)
});
}
return foundWords[0];
} */


// visualize noun chunk / term
let displacy = new displaCy('/ken/api/v1.0/en/html/depparse/nounchunk', {
container: '#displacy'
Expand Down Expand Up @@ -565,10 +552,26 @@ function forProjectFileListClickAndEnterPressEvents() {
}));
}

alert('Разбор файлу "' + $projectFileList.prop('value') + '" завантажено');
// add text from last recapped file to textarea id="sents_from_text"
// Clear textarea id="sents_from_text"
$sents_from_text.text('');
// add to textarea id="sents_from_text"
for (let sent_element of resJSON.termsintext.sentences.sent) {
$sents_from_text.append(sent_element + '\n\n')
}

iziToast.info({
title: 'Разбор файлу',
message: $projectFileList.prop('value') + ' завантажено!',
position: 'bottomLeft'
});

} else {
alert('Разбору файлу "' + $projectFileList.prop('value') + '" не існує');
iziToast.warning({
title: 'Разбор файлу',
message: $projectFileList.prop('value') + ' не існує!',
position: 'bottomLeft'
});
}

}
Expand Down Expand Up @@ -642,26 +645,6 @@ function xmlToJson(xml) {
return obj;
}

function getLanguage(ofText) {
let text = 'https://translate.yandex.net/api/v1.5/tr.json/detect?hint=ru,en&key=trnsl.1.1.20160517T143002Z.e9fc37c7a484c5f4.8cba036cc3eb084c401f3766ed5b2b389b6dc9fc&text=' + ofText;
if (self.fetch) {
fetch(text, {
method: 'post'
})
.then(function (response) {
return response.json().then(function (result) {
// langField.innerHTML = result.lang;
console.log(result.lang);
})
})
.catch(function (error) {
alert('Виникла помилка на стороні серевера.' + '\n' + 'Помилка: ' + error + '\n' + ' Cпробуйте ще раз.');
});
} else {
alert('Ваш браузер застарів. Встановіть актуальну версію Google Chrome');
}
}

// CHANGE TABS
$('.nav-tabs a').click(function (e) {
e.preventDefault();
Expand All @@ -670,10 +653,6 @@ $('.nav-tabs a').click(function (e) {

$('a[data-toggle="data"]').on('shown.bs.tab', function (e) {

// if ($(e.target).attr("href") == '#new_term_tab'){
// alert('target');
// }

if ($("#new_term_tab").is(".tab-pane.active")) {
$("#displacy").hide();
$("#displacy-ner").show();
Expand Down
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
Binary file removed wiki/work-plan-requirements/ken-pipeline.graffle
Binary file not shown.
Binary file removed wiki/work-plan-requirements/ken-pipeline.pdf
Binary file not shown.
Binary file removed wiki/work-plan-requirements/requirements.docx
Binary file not shown.
Binary file not shown.
Binary file not shown.

0 comments on commit fd8fde1

Please sign in to comment.