diff --git a/.github/workflows/push-to-protected.yml b/.github/workflows/push-to-protected.yml index d37311ac8..62da3c1cd 100644 --- a/.github/workflows/push-to-protected.yml +++ b/.github/workflows/push-to-protected.yml @@ -20,8 +20,8 @@ jobs: # Steps represent a sequence of tasks that will be executed as part of the job steps: # Checks-out your repository under $GITHUB_WORKSPACE, so your job can access it - - uses: actions/checkout@v3 - - uses: actions/setup-java@v3 + - uses: actions/checkout@v4 + - uses: actions/setup-java@v4 with: java-version: '17' distribution: 'adopt' diff --git a/.gitignore b/.gitignore index bbaee0ca5..60137c0a3 100644 --- a/.gitignore +++ b/.gitignore @@ -6,4 +6,4 @@ catalog-v001.xml **/bundle.js **/generated-sources **/java-generated -**/npm-debug.log +termit-doc.md diff --git a/pom.xml b/pom.xml index 5594520f1..94154f29c 100644 --- a/pom.xml +++ b/pom.xml @@ -7,11 +7,11 @@ org.springframework.boot spring-boot-starter-parent - 3.2.3 + 3.2.5 termit - 3.0.4 + 3.1.0 TermIt Terminology manager based on Semantic Web technologies. ${packaging} @@ -20,39 +20,19 @@ kbss https://kbss.felk.cvut.cz/m2repo - - - central-snapshots - https://oss.sonatype.org/content/repositories/snapshots - - false - true - - - central-snapshots - https://oss.sonatype.org/content/repositories/snapshots - - false - - - true - - - 17 2.7.0 1.5.5.Final 2.2.0 - 2.0.0-SNAPSHOT - 0.14.2 - 1.9.20 + 2.0.0 + 0.14.3 DEV @@ -132,14 +112,14 @@ org.eclipse.rdf4j rdf4j-rio-rdfxml - 4.3.8 + 4.3.11 com.github.ledsoft jopa-spring-transaction - 0.3.0-SNAPSHOT + 0.3.0 @@ -177,11 +157,19 @@ org.springframework spring-aspects + + org.springframework.retry + spring-retry + org.apache.httpcomponents.client5 httpclient5 5.2.1 + + org.aspectj + aspectjrt + @@ -427,7 +415,18 @@ mapstruct-processor ${org.mapstruct.version} + + cz.lukaskabc.cvut.processor + spring-boot-configuration-docgen-processor + 1.0 + + + -Aconfigurationdoc.output_file=termit-doc.md + -Aconfigurationdoc.format=MD + -Aconfigurationdoc.configuration_package=cz.cvut.kbss.termit + -Aconfigurationdoc.prepend_required=true + ${java.version} ${java.version} @@ -448,13 +447,6 @@ true - - - org.aspectj - aspectjtools - ${org.aspectj.version} - - main diff --git a/src/main/java/cz/cvut/kbss/termit/aspect/ChangeTrackingAspect.java b/src/main/java/cz/cvut/kbss/termit/aspect/ChangeTrackingAspect.java index 348f8ef31..241b25f46 100644 --- a/src/main/java/cz/cvut/kbss/termit/aspect/ChangeTrackingAspect.java +++ b/src/main/java/cz/cvut/kbss/termit/aspect/ChangeTrackingAspect.java @@ -46,7 +46,7 @@ public class ChangeTrackingAspect { @Autowired private ChangeTrackingHelperDao helperDao; - @Pointcut(value = "execution(public void persist(..)) && target(cz.cvut.kbss.termit.persistence.dao.GenericDao) " + + @Pointcut(value = "execution(public void persist(..)) && (target(cz.cvut.kbss.termit.persistence.dao.BaseAssetDao)) " + "&& @args(cz.cvut.kbss.termit.model.changetracking.Audited)") public void persistOperation() { } @@ -56,7 +56,7 @@ public void persistOperation() { public void persistTermOperation() { } - @Pointcut(value = "execution(public * update(..)) && target(cz.cvut.kbss.termit.persistence.dao.GenericDao) " + + @Pointcut(value = "execution(public * update(..)) && target(cz.cvut.kbss.termit.persistence.dao.BaseAssetDao) " + "&& @args(cz.cvut.kbss.termit.model.changetracking.Audited)") public void updateOperation() { } diff --git a/src/main/java/cz/cvut/kbss/termit/config/AppConfig.java b/src/main/java/cz/cvut/kbss/termit/config/AppConfig.java index ec216d2a3..9a6ec39d7 100644 --- a/src/main/java/cz/cvut/kbss/termit/config/AppConfig.java +++ b/src/main/java/cz/cvut/kbss/termit/config/AppConfig.java @@ -23,6 +23,7 @@ import org.springframework.context.annotation.EnableAspectJAutoProxy; import org.springframework.context.annotation.EnableMBeanExport; import org.springframework.context.annotation.aspectj.EnableSpringConfigured; +import org.springframework.retry.annotation.EnableRetry; import org.springframework.scheduling.annotation.AsyncConfigurer; import org.springframework.scheduling.annotation.EnableAsync; import org.springframework.scheduling.annotation.EnableScheduling; @@ -33,6 +34,7 @@ @EnableSpringConfigured @EnableAsync @EnableScheduling +@EnableRetry public class AppConfig implements AsyncConfigurer { @Override diff --git a/src/main/java/cz/cvut/kbss/termit/config/WebAppConfig.java b/src/main/java/cz/cvut/kbss/termit/config/WebAppConfig.java index fe961c17d..da01d8ece 100644 --- a/src/main/java/cz/cvut/kbss/termit/config/WebAppConfig.java +++ b/src/main/java/cz/cvut/kbss/termit/config/WebAppConfig.java @@ -24,7 +24,6 @@ import com.fasterxml.jackson.databind.module.SimpleModule; import com.fasterxml.jackson.datatype.jsr310.JavaTimeModule; import cz.cvut.kbss.jopa.model.MultilingualString; -import cz.cvut.kbss.jopa.sessions.UnitOfWorkImpl; import cz.cvut.kbss.jsonld.JsonLd; import cz.cvut.kbss.jsonld.jackson.JsonLdModule; import cz.cvut.kbss.jsonld.jackson.serialization.SerializationConstants; @@ -32,7 +31,6 @@ import cz.cvut.kbss.termit.util.AdjustedUriTemplateProxyServlet; import cz.cvut.kbss.termit.util.ConfigParam; import cz.cvut.kbss.termit.util.Constants; -import cz.cvut.kbss.termit.util.json.ManageableIgnoreMixin; import cz.cvut.kbss.termit.util.json.MultilingualStringDeserializer; import cz.cvut.kbss.termit.util.json.MultilingualStringSerializer; import io.swagger.v3.oas.models.Components; @@ -94,8 +92,6 @@ public static ObjectMapper createJsonObjectMapper() { multilingualStringModule.addSerializer(MultilingualString.class, new MultilingualStringSerializer()); multilingualStringModule.addDeserializer(MultilingualString.class, new MultilingualStringDeserializer()); objectMapper.registerModule(multilingualStringModule); - // Ignore UoW references injected into entities - objectMapper.addMixIn(UnitOfWorkImpl.class, ManageableIgnoreMixin.class); // JSR 310 (Java 8 DateTime API) objectMapper.registerModule(new JavaTimeModule()); // Serialize datetime as ISO strings diff --git a/src/main/java/cz/cvut/kbss/termit/dto/listing/TermDto.java b/src/main/java/cz/cvut/kbss/termit/dto/listing/TermDto.java index d46c851b6..550516537 100644 --- a/src/main/java/cz/cvut/kbss/termit/dto/listing/TermDto.java +++ b/src/main/java/cz/cvut/kbss/termit/dto/listing/TermDto.java @@ -35,7 +35,7 @@ * Contains fewer data than a regular {@link cz.cvut.kbss.termit.model.Term}. */ @OWLClass(iri = SKOS.CONCEPT) -@JsonLdAttributeOrder({"uri", "label", "subTerms"}) +@JsonLdAttributeOrder({"uri", "label", "parentTerms", "subTerms"}) @JsonIgnoreProperties({"definition", "persistenceContext"}) public class TermDto extends AbstractTerm { diff --git a/src/main/java/cz/cvut/kbss/termit/exception/AnnotationGenerationException.java b/src/main/java/cz/cvut/kbss/termit/exception/AnnotationGenerationException.java index 2b50fd9d4..a07ced43d 100644 --- a/src/main/java/cz/cvut/kbss/termit/exception/AnnotationGenerationException.java +++ b/src/main/java/cz/cvut/kbss/termit/exception/AnnotationGenerationException.java @@ -20,7 +20,7 @@ /** * Indicates a failure during document annotation generation. */ -public class AnnotationGenerationException extends TermItException { +public class AnnotationGenerationException extends FileContentProcessingException { public AnnotationGenerationException(String message) { super(message); diff --git a/src/main/java/cz/cvut/kbss/termit/exception/FileContentProcessingException.java b/src/main/java/cz/cvut/kbss/termit/exception/FileContentProcessingException.java new file mode 100644 index 000000000..521f87fb9 --- /dev/null +++ b/src/main/java/cz/cvut/kbss/termit/exception/FileContentProcessingException.java @@ -0,0 +1,15 @@ +package cz.cvut.kbss.termit.exception; + +/** + * Indicates an error when processing file content. + */ +public class FileContentProcessingException extends TermItException { + + public FileContentProcessingException(String message) { + super(message); + } + + public FileContentProcessingException(String message, Throwable cause) { + super(message, cause); + } +} diff --git a/src/main/java/cz/cvut/kbss/termit/model/assignment/OccurrenceTarget.java b/src/main/java/cz/cvut/kbss/termit/model/assignment/OccurrenceTarget.java index 3edc118c2..a46dc56f2 100644 --- a/src/main/java/cz/cvut/kbss/termit/model/assignment/OccurrenceTarget.java +++ b/src/main/java/cz/cvut/kbss/termit/model/assignment/OccurrenceTarget.java @@ -23,6 +23,8 @@ import cz.cvut.kbss.termit.model.Asset; import cz.cvut.kbss.termit.model.selector.Selector; import cz.cvut.kbss.termit.util.Vocabulary; +import jakarta.validation.constraints.NotEmpty; +import jakarta.validation.constraints.NotNull; import java.net.URI; import java.util.Objects; @@ -32,10 +34,12 @@ @OWLClass(iri = Vocabulary.s_c_cil_vyskytu) public abstract class OccurrenceTarget extends AbstractEntity { + @NotNull @ParticipationConstraints(nonEmpty = true) @OWLObjectProperty(iri = Vocabulary.s_p_ma_zdroj) private URI source; + @NotEmpty @ParticipationConstraints(nonEmpty = true) @OWLObjectProperty(iri = Vocabulary.s_p_ma_selektor, cascade = CascadeType.ALL, fetch = FetchType.EAGER) private Set selectors; diff --git a/src/main/java/cz/cvut/kbss/termit/model/assignment/TermOccurrence.java b/src/main/java/cz/cvut/kbss/termit/model/assignment/TermOccurrence.java index 0f7f52eab..e2902d87a 100644 --- a/src/main/java/cz/cvut/kbss/termit/model/assignment/TermOccurrence.java +++ b/src/main/java/cz/cvut/kbss/termit/model/assignment/TermOccurrence.java @@ -18,11 +18,18 @@ package cz.cvut.kbss.termit.model.assignment; import com.fasterxml.jackson.annotation.JsonTypeInfo; -import cz.cvut.kbss.jopa.model.annotations.*; +import cz.cvut.kbss.jopa.model.annotations.CascadeType; +import cz.cvut.kbss.jopa.model.annotations.FetchType; +import cz.cvut.kbss.jopa.model.annotations.OWLClass; +import cz.cvut.kbss.jopa.model.annotations.OWLDataProperty; +import cz.cvut.kbss.jopa.model.annotations.OWLObjectProperty; +import cz.cvut.kbss.jopa.model.annotations.ParticipationConstraints; +import cz.cvut.kbss.jopa.model.annotations.Types; import cz.cvut.kbss.jopa.vocabulary.DC; import cz.cvut.kbss.termit.model.AbstractEntity; import cz.cvut.kbss.termit.model.util.HasTypes; import cz.cvut.kbss.termit.util.Vocabulary; +import jakarta.validation.constraints.NotNull; import java.net.URI; import java.util.Objects; @@ -37,10 +44,12 @@ public abstract class TermOccurrence extends AbstractEntity implements HasTypes */ public static final String CONTEXT_SUFFIX = "occurrences"; + @NotNull @ParticipationConstraints(nonEmpty = true) @OWLObjectProperty(iri = Vocabulary.s_p_je_prirazenim_termu) private URI term; + @NotNull @ParticipationConstraints(nonEmpty = true) @OWLObjectProperty(iri = Vocabulary.s_p_ma_cil, cascade = {CascadeType.MERGE}, fetch = FetchType.EAGER) OccurrenceTarget target; @@ -110,6 +119,35 @@ public void setScore(Double score) { this.score = score; } + /** + * Marks this term occurrence as suggested by automation. + *

+ * Corresponds to classifying with {@link Vocabulary#s_c_navrzeny_vyskyt_termu}. + */ + public void markSuggested() { + addType(Vocabulary.s_c_navrzeny_vyskyt_termu); + } + + /** + * Marks this term occurrence as approved. + *

+ * Corresponds to removing the {@link Vocabulary#s_c_navrzeny_vyskyt_termu} type. + */ + public void markApproved() { + removeType(Vocabulary.s_c_navrzeny_vyskyt_termu); + } + + /** + * Checks whether this term occurrence is marked as suggested by automation. + *

+ * Suggested in this context means classified with {@link Vocabulary#s_c_navrzeny_vyskyt_termu}. + * + * @return {@code true} when this instance is marked as suggested by automation + */ + public boolean isSuggested() { + return hasType(Vocabulary.s_c_navrzeny_vyskyt_termu); + } + @Override public String toString() { return "TermOccurrence{<" + @@ -131,7 +169,8 @@ public URI resolveContext() { } /** - * Resolves identifier of the repository context in which term occurrences targeting the specified source should be stored. + * Resolves identifier of the repository context in which term occurrences targeting the specified source should be + * stored. *

* The context is based on the specified source and {@link #CONTEXT_SUFFIX}. * diff --git a/src/main/java/cz/cvut/kbss/termit/model/selector/TextQuoteSelector.java b/src/main/java/cz/cvut/kbss/termit/model/selector/TextQuoteSelector.java index e669f4f3c..c1568019a 100644 --- a/src/main/java/cz/cvut/kbss/termit/model/selector/TextQuoteSelector.java +++ b/src/main/java/cz/cvut/kbss/termit/model/selector/TextQuoteSelector.java @@ -53,6 +53,12 @@ public TextQuoteSelector(@NotBlank String exactMatch) { this.exactMatch = exactMatch; } + public TextQuoteSelector(@NotBlank String exactMatch, String prefix, String suffix) { + this.exactMatch = exactMatch; + this.prefix = prefix; + this.suffix = suffix; + } + public String getExactMatch() { return exactMatch; } diff --git a/src/main/java/cz/cvut/kbss/termit/persistence/dao/BaseDao.java b/src/main/java/cz/cvut/kbss/termit/persistence/dao/BaseDao.java index fc260fbf5..a87fb845d 100644 --- a/src/main/java/cz/cvut/kbss/termit/persistence/dao/BaseDao.java +++ b/src/main/java/cz/cvut/kbss/termit/persistence/dao/BaseDao.java @@ -78,6 +78,12 @@ public Optional getReference(URI id) { } } + @Override + public void detach(T entity) { + Objects.requireNonNull(entity); + em.detach(entity); + } + @ModifiesData @Override public void persist(T entity) { diff --git a/src/main/java/cz/cvut/kbss/termit/persistence/dao/GenericDao.java b/src/main/java/cz/cvut/kbss/termit/persistence/dao/GenericDao.java index a7af03fe3..376918051 100644 --- a/src/main/java/cz/cvut/kbss/termit/persistence/dao/GenericDao.java +++ b/src/main/java/cz/cvut/kbss/termit/persistence/dao/GenericDao.java @@ -60,6 +60,15 @@ public interface GenericDao { */ Optional getReference(URI id); + /** + * Detaches the specified entity from the current persistence context. + *

+ * Does nothing if the specified entity is not managed. + * + * @param entity Entity to detach + */ + void detach(T entity); + /** * Persists the specified entity. * diff --git a/src/main/java/cz/cvut/kbss/termit/persistence/dao/TermDao.java b/src/main/java/cz/cvut/kbss/termit/persistence/dao/TermDao.java index 18d5ceaa9..3d12e1bed 100644 --- a/src/main/java/cz/cvut/kbss/termit/persistence/dao/TermDao.java +++ b/src/main/java/cz/cvut/kbss/termit/persistence/dao/TermDao.java @@ -95,11 +95,6 @@ private void postLoad(Term r) { r.setInverseExactMatchTerms(loadInverseExactMatchTerms(r)); } - public void detach(Term term) { - Objects.requireNonNull(term); - em.detach(term); - } - /** * Loads terms whose relatedness to the specified term is inferred due to the symmetry of SKOS related. * diff --git a/src/main/java/cz/cvut/kbss/termit/persistence/dao/TermOccurrenceDao.java b/src/main/java/cz/cvut/kbss/termit/persistence/dao/TermOccurrenceDao.java index ee8c75ccf..e3529e477 100644 --- a/src/main/java/cz/cvut/kbss/termit/persistence/dao/TermOccurrenceDao.java +++ b/src/main/java/cz/cvut/kbss/termit/persistence/dao/TermOccurrenceDao.java @@ -23,12 +23,12 @@ import cz.cvut.kbss.jopa.model.query.Query; import cz.cvut.kbss.jopa.vocabulary.DC; import cz.cvut.kbss.jopa.vocabulary.RDFS; -import cz.cvut.kbss.termit.asset.provenance.ModifiesData; import cz.cvut.kbss.termit.dto.assignment.TermOccurrences; import cz.cvut.kbss.termit.exception.PersistenceException; import cz.cvut.kbss.termit.model.Asset; import cz.cvut.kbss.termit.model.Term; import cz.cvut.kbss.termit.model.assignment.TermOccurrence; +import cz.cvut.kbss.termit.persistence.dao.util.ScheduledContextRemover; import cz.cvut.kbss.termit.persistence.dao.util.SparqlResultToTermOccurrenceMapper; import cz.cvut.kbss.termit.util.Configuration; import cz.cvut.kbss.termit.util.Vocabulary; @@ -52,12 +52,13 @@ public class TermOccurrenceDao extends BaseDao { */ private static final String FIND_ALL_TARGETING_QUERY = "SELECT ?occ ?type ?term ?target ?suggested ?selector ?exactMatch ?prefix ?suffix ?startPosition ?endPosition WHERE {" + - "?occ a ?occurrence ;" + - " a ?type ;" + + "?occ a ?occurrence ." + + "GRAPH ?g { " + + "?occ a ?type ;" + " ?hasTarget ?target ." + "OPTIONAL {" + " ?occ ?assignmentOfTerm ?term ." + - "}" + + "} }" + "?target a ?occurrenceTarget ;" + " ?hasSource ?source ." + "OPTIONAL {" + @@ -70,17 +71,20 @@ public class TermOccurrenceDao extends BaseDao { " } UNION {" + " ?selector ?hasStart ?startPosition ;" + " ?hasEnd ?endPosition ." + - " }" + - "}" + + " } " + + "} " + "FILTER (?type = ?fileOccurrence || ?type = ?definitionalOccurrence)" + "BIND(EXISTS { ?occ a ?suggestedType . } as ?suggested)" + "} GROUP BY ?occ ?type ?term ?target ?suggested ?selector ?exactMatch ?prefix ?suffix ?startPosition ?endPosition"; private final Configuration.Persistence config; - public TermOccurrenceDao(EntityManager em, Configuration config) { + private final ScheduledContextRemover contextRemover; + + public TermOccurrenceDao(EntityManager em, Configuration config, ScheduledContextRemover contextRemover) { super(TermOccurrence.class, em); this.config = config.getPersistence(); + this.contextRemover = contextRemover; } /** @@ -92,8 +96,8 @@ public TermOccurrenceDao(EntityManager em, Configuration config) { public List findAllOf(Term term) { Objects.requireNonNull(term); return em.createNativeQuery("SELECT ?x WHERE {" + - "?x a ?type ;" + - "?hasTerm ?term . }", TermOccurrence.class) + "?x a ?type ;" + + "?hasTerm ?term . }", TermOccurrence.class) .setParameter("type", typeUri) .setParameter("hasTerm", URI.create(Vocabulary.s_p_je_prirazenim_termu)) .setParameter("term", term.getUri()).getResultList(); @@ -123,6 +127,7 @@ public List findAllDefinitionalOf(Term term) { public List findAllTargeting(Asset target) { Objects.requireNonNull(target); final Query query = em.createNativeQuery(FIND_ALL_TARGETING_QUERY) + .setParameter("g", TermOccurrence.resolveContext(target.getUri())) .setParameter("occurrence", URI.create(Vocabulary.s_c_vyskyt_termu)) .setParameter("hasTarget", URI.create(Vocabulary.s_p_ma_cil)) .setParameter("assignmentOfTerm", URI.create(Vocabulary.s_p_je_prirazenim_termu)) @@ -150,26 +155,26 @@ public List findAllTargeting(Asset target) { */ public List getOccurrenceInfo(Term term) { return em.createNativeQuery("SELECT ?term ?resource ?label (count(?x) as ?cnt) ?type ?suggested WHERE {" + - "BIND (?t AS ?term)" + - "{" + - " ?x a ?suggestedOccurrence ." + - " BIND (true as ?suggested)" + - "} UNION {" + - " ?x a ?occurrence ." + - " FILTER NOT EXISTS {" + - " ?x a ?suggestedOccurrence ." + - " }" + - " BIND (false as ?suggested)" + - "} " + - " ?x ?hasTerm ?term ;" + - " ?hasTarget ?target . " + - " { ?target ?hasSource ?resource . FILTER NOT EXISTS { ?resource a ?fileType . } } " + - " UNION { ?target ?hasSource ?file . ?resource ?isDocumentOf ?file . } " + - "BIND (IF(EXISTS { ?resource a ?termType }, ?termDefOcc, ?fileOcc) as ?type)" + - "{ ?resource rdfs:label ?label . } UNION { ?resource ?hasTitle ?label . } " + - "FILTER langMatches(lang(?label), ?lang)" + - "} GROUP BY ?resource ?term ?label ?type ?suggested HAVING (?cnt > 0) ORDER BY ?label", - "TermOccurrences") + "BIND (?t AS ?term)" + + "{" + + " ?x a ?suggestedOccurrence ." + + " BIND (true as ?suggested)" + + "} UNION {" + + " ?x a ?occurrence ." + + " FILTER NOT EXISTS {" + + " ?x a ?suggestedOccurrence ." + + " }" + + " BIND (false as ?suggested)" + + "} " + + " ?x ?hasTerm ?term ;" + + " ?hasTarget ?target . " + + " { ?target ?hasSource ?resource . FILTER NOT EXISTS { ?resource a ?fileType . } } " + + " UNION { ?target ?hasSource ?file . ?resource ?isDocumentOf ?file . } " + + "BIND (IF(EXISTS { ?resource a ?termType }, ?termDefOcc, ?fileOcc) as ?type)" + + "{ ?resource rdfs:label ?label . } UNION { ?resource ?hasTitle ?label . } " + + "FILTER langMatches(lang(?label), ?lang)" + + "} GROUP BY ?resource ?term ?label ?type ?suggested HAVING (?cnt > 0) ORDER BY ?label", + "TermOccurrences") .setParameter("suggestedOccurrence", URI.create(Vocabulary.s_c_navrzeny_vyskyt_termu)) .setParameter("hasTerm", URI.create(Vocabulary.s_p_je_prirazenim_termu)) .setParameter("hasTarget", URI.create(Vocabulary.s_p_ma_cil)) @@ -185,7 +190,6 @@ public List getOccurrenceInfo(Term term) { .setParameter("t", term.getUri()).getResultList(); } - @ModifiesData @Override public void persist(TermOccurrence entity) { Objects.requireNonNull(entity); @@ -201,6 +205,18 @@ public void persist(TermOccurrence entity) { } } + @Override + public TermOccurrence update(TermOccurrence entity) { + Objects.requireNonNull(entity); + try { + // Evict possibly cached references with default context (cached by this.find) + em.getEntityManagerFactory().getCache().evict(TermOccurrence.class, entity.getUri(), null); + return em.merge(entity, new EntityDescriptor(entity.resolveContext())); + } catch (RuntimeException e) { + throw new PersistenceException(e); + } + } + /** * Removes all suggested term occurrences whose target points to the specified asset. * @@ -213,14 +229,14 @@ public void removeSuggested(Asset target) { private void removeAll(URI assetUri, URI toType) { em.createNativeQuery("DELETE WHERE {" + - "?x a ?toType ;" + - "?hasTarget ?target ;" + - "?y ?z ." + - "?target a ?occurrenceTarget ;" + - "?hasSelector ?selector ;" + - "?hasSource ?asset ." + - "?target ?tY ?tZ ." + - "?selector ?sY ?sZ . }") + "?x a ?toType ;" + + "?hasTarget ?target ;" + + "?y ?z ." + + "?target a ?occurrenceTarget ;" + + "?hasSelector ?selector ;" + + "?hasSource ?asset ." + + "?target ?tY ?tZ ." + + "?selector ?sY ?sZ . }") .setParameter("toType", toType) .setParameter("hasTarget", URI.create(Vocabulary.s_p_ma_cil)) .setParameter("occurrenceTarget", URI.create(Vocabulary.s_c_cil_vyskytu)) @@ -231,15 +247,22 @@ private void removeAll(URI assetUri, URI toType) { /** * Removes all term occurrences whose target points to the specified asset. + *

+ * Note that the removal may not be immediate. Rather, the occurrences are moved into a context that is scheduled + * for removal later. * * @param target Asset for which term occurrences will be removed */ public void removeAll(Asset target) { Objects.requireNonNull(target); - em.createNativeQuery("DROP GRAPH ?g") - .setParameter("g", TermOccurrence.resolveContext(target.getUri())) + final URI sourceContext = TermOccurrence.resolveContext(target.getUri()); + final URI targetContext = URI.create(sourceContext + "-for-removal-" + System.currentTimeMillis()); + em.createNativeQuery("MOVE GRAPH ?g TO ?targetContext") + .setParameter("g", sourceContext) + .setParameter("targetContext", targetContext) .executeUpdate(); + contextRemover.scheduleForRemoval(targetContext); } /** @@ -252,14 +275,14 @@ public void removeAll(Asset target) { */ public void removeAllOrphans() { em.createNativeQuery("SELECT DISTINCT ?source WHERE {" + - "?t a ?target ;" + - "?hasSource ?source ." + - // If an asset does not have a label, it does not exist - "FILTER NOT EXISTS { " + - "{ ?source ?hasLabel ?label . } " + - "UNION" + - "{ ?source ?hasTitle ?label . } " + - "}}", URI.class) + "?t a ?target ;" + + "?hasSource ?source ." + + // If an asset does not have a label, it does not exist + "FILTER NOT EXISTS { " + + "{ ?source ?hasLabel ?label . } " + + "UNION" + + "{ ?source ?hasTitle ?label . } " + + "}}", URI.class) .setParameter("target", URI.create(Vocabulary.s_c_cil_vyskytu)) .setParameter("hasSource", URI.create(Vocabulary.s_p_ma_zdroj)) .setParameter("hasLabel", URI.create(RDFS.LABEL)) diff --git a/src/main/java/cz/cvut/kbss/termit/persistence/dao/util/ScheduledContextRemover.java b/src/main/java/cz/cvut/kbss/termit/persistence/dao/util/ScheduledContextRemover.java new file mode 100644 index 000000000..fb82b4228 --- /dev/null +++ b/src/main/java/cz/cvut/kbss/termit/persistence/dao/util/ScheduledContextRemover.java @@ -0,0 +1,65 @@ +package cz.cvut.kbss.termit.persistence.dao.util; + +import cz.cvut.kbss.jopa.model.EntityManager; +import cz.cvut.kbss.termit.util.Utils; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.springframework.lang.NonNull; +import org.springframework.scheduling.annotation.Scheduled; +import org.springframework.stereotype.Component; +import org.springframework.transaction.annotation.Transactional; + +import java.net.URI; +import java.util.HashSet; +import java.util.Objects; +import java.util.Set; +import java.util.concurrent.TimeUnit; + +/** + * Drops registered repository contexts at scheduled moments. + *

+ * This allows to move time-consuming removal of repository contexts containing a lot of data to times of low system + * activity. + */ +@Component +public class ScheduledContextRemover { + + private static final Logger LOG = LoggerFactory.getLogger(ScheduledContextRemover.class); + + private final EntityManager em; + + private final Set contextsToRemove = new HashSet<>(); + + public ScheduledContextRemover(EntityManager em) { + this.em = em; + } + + /** + * Schedules the specified context identifier for removal at the next execution of the context cleanup. + * + * @param contextUri Identifier of the context to remove + * @see #runContextRemoval() + */ + public synchronized void scheduleForRemoval(@NonNull URI contextUri) { + LOG.debug("Scheduling context {} for removal.", Utils.uriToString(contextUri)); + contextsToRemove.add(Objects.requireNonNull(contextUri)); + } + + /** + * Runs the removal of the registered repository contexts. + *

+ * This method is scheduled and should not be invoked manually. + * + * @see #scheduleForRemoval(URI) + */ + @Transactional + @Scheduled(fixedRate = 1, timeUnit = TimeUnit.MINUTES) + public void runContextRemoval() { + LOG.trace("Running scheduled repository context removal."); + contextsToRemove.forEach(g -> { + LOG.trace("Dropping repository context {}.", Utils.uriToString(g)); + em.createNativeQuery("DROP GRAPH ?g").setParameter("g", g).executeUpdate(); + }); + contextsToRemove.clear(); + } +} diff --git a/src/main/java/cz/cvut/kbss/termit/rest/ResourceController.java b/src/main/java/cz/cvut/kbss/termit/rest/ResourceController.java index 0841fb375..ef3c155c2 100644 --- a/src/main/java/cz/cvut/kbss/termit/rest/ResourceController.java +++ b/src/main/java/cz/cvut/kbss/termit/rest/ResourceController.java @@ -27,6 +27,7 @@ import cz.cvut.kbss.termit.security.SecurityConstants; import cz.cvut.kbss.termit.service.IdentifierResolver; import cz.cvut.kbss.termit.service.business.ResourceService; +import cz.cvut.kbss.termit.service.document.ResourceRetrievalSpecification; import cz.cvut.kbss.termit.util.Configuration; import cz.cvut.kbss.termit.util.Constants.QueryParams; import cz.cvut.kbss.termit.util.TypeAwareResource; @@ -44,7 +45,17 @@ import org.springframework.http.MediaType; import org.springframework.http.ResponseEntity; import org.springframework.security.access.prepost.PreAuthorize; -import org.springframework.web.bind.annotation.*; +import org.springframework.web.bind.annotation.DeleteMapping; +import org.springframework.web.bind.annotation.GetMapping; +import org.springframework.web.bind.annotation.PathVariable; +import org.springframework.web.bind.annotation.PostMapping; +import org.springframework.web.bind.annotation.PutMapping; +import org.springframework.web.bind.annotation.RequestBody; +import org.springframework.web.bind.annotation.RequestMapping; +import org.springframework.web.bind.annotation.RequestMethod; +import org.springframework.web.bind.annotation.RequestParam; +import org.springframework.web.bind.annotation.ResponseStatus; +import org.springframework.web.bind.annotation.RestController; import org.springframework.web.multipart.MultipartFile; import java.io.IOException; @@ -129,16 +140,16 @@ public ResponseEntity getContent( @RequestParam(name = "attachment", required = false) boolean asAttachment, @Parameter( description = "Datetime (ISO-format) at which the content is expected to be valid. Allows getting older revisions of the resource content.") - @RequestParam(name = "at", required = false) Optional at) { + @RequestParam(name = "at", required = false) Optional at, + @Parameter(description = "Whether to return the content without unconfirmed term occurrences.") + @RequestParam(name = "withoutUnconfirmedOccurrences", + required = false) boolean withoutUnconfirmedOccurrences) { final Resource resource = getResource(localName, namespace); try { - final TypeAwareResource content; - if (at.isPresent()) { - final Instant timestamp = RestUtils.parseTimestamp(at.get()); - content = resourceService.getContent(resource, timestamp); - } else { - content = resourceService.getContent(resource); - } + final Optional timestamp = at.map(RestUtils::parseTimestamp); + final TypeAwareResource content = resourceService.getContent(resource, + new ResourceRetrievalSpecification(timestamp, + withoutUnconfirmedOccurrences)); final ResponseEntity.BodyBuilder builder = ResponseEntity.ok() .contentLength(content.contentLength()) .contentType(MediaType.parseMediaType( @@ -200,7 +211,10 @@ public ResponseEntity hasContent(@Parameter(description = ResourceControll if (!hasContent) { return ResponseEntity.notFound().build(); } else { - final String contentType = resourceService.getContent(r).getMediaType().orElse(null); + final String contentType = resourceService.getContent(r, + new ResourceRetrievalSpecification(Optional.empty(), + false)) + .getMediaType().orElse(null); return ResponseEntity.noContent().header(HttpHeaders.CONTENT_TYPE, contentType).build(); } } diff --git a/src/main/java/cz/cvut/kbss/termit/rest/TermOccurrenceController.java b/src/main/java/cz/cvut/kbss/termit/rest/TermOccurrenceController.java index ae8a9e241..ddfa5057a 100644 --- a/src/main/java/cz/cvut/kbss/termit/rest/TermOccurrenceController.java +++ b/src/main/java/cz/cvut/kbss/termit/rest/TermOccurrenceController.java @@ -17,6 +17,8 @@ */ package cz.cvut.kbss.termit.rest; +import cz.cvut.kbss.jsonld.JsonLd; +import cz.cvut.kbss.termit.model.assignment.TermOccurrence; import cz.cvut.kbss.termit.security.SecurityConstants; import cz.cvut.kbss.termit.service.IdentifierResolver; import cz.cvut.kbss.termit.service.business.TermOccurrenceService; @@ -31,8 +33,16 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.springframework.http.HttpStatus; +import org.springframework.http.MediaType; import org.springframework.security.access.prepost.PreAuthorize; -import org.springframework.web.bind.annotation.*; +import org.springframework.web.bind.annotation.DeleteMapping; +import org.springframework.web.bind.annotation.PathVariable; +import org.springframework.web.bind.annotation.PutMapping; +import org.springframework.web.bind.annotation.RequestBody; +import org.springframework.web.bind.annotation.RequestMapping; +import org.springframework.web.bind.annotation.RequestParam; +import org.springframework.web.bind.annotation.ResponseStatus; +import org.springframework.web.bind.annotation.RestController; import java.net.URI; @@ -57,6 +67,22 @@ public TermOccurrenceController(IdentifierResolver idResolver, Configuration con this.occurrenceService = occurrenceService; } + @Operation(security = {@SecurityRequirement(name = "bearer-key")}, + description = "Creates or updates a term occurrence.") + @ApiResponses({ + @ApiResponse(responseCode = "202", description = "Term occurrence saved"), + @ApiResponse(responseCode = "409", + description = "The occurrence is not valid, e.g., the term or target asset do not exist") + }) + @PutMapping(consumes = {JsonLd.MEDIA_TYPE, MediaType.APPLICATION_JSON_VALUE}) + @ResponseStatus(HttpStatus.NO_CONTENT) + @PreAuthorize("hasRole('" + SecurityConstants.ROLE_FULL_USER + "')") + public void saveOccurrence(@Parameter(description = "Term occurrence to save") + @RequestBody TermOccurrence occurrence) { + occurrenceService.persistOrUpdate(occurrence); + LOG.debug("Saved term occurrence {}.", occurrence); + } + @Operation(security = {@SecurityRequirement(name = "bearer-key")}, description = "Approves a suggested term occurrence with the specified identifier.") @ApiResponses({ @@ -64,7 +90,7 @@ public TermOccurrenceController(IdentifierResolver idResolver, Configuration con @ApiResponse(responseCode = "404", description = "Term occurrence not found.") }) @PutMapping(value = "/{localName}") - @ResponseStatus(HttpStatus.NO_CONTENT) + @ResponseStatus(HttpStatus.ACCEPTED) @PreAuthorize("hasRole('" + SecurityConstants.ROLE_FULL_USER + "')") public void approveOccurrence( @Parameter(description = TermOccurrenceControllerDoc.ID_LOCAL_NAME_DESCRIPTION, @@ -75,7 +101,7 @@ public void approveOccurrence( @RequestParam(name = Constants.QueryParams.NAMESPACE) String namespace) { final URI identifier = idResolver.resolveIdentifier(namespace, localName); - occurrenceService.approve(occurrenceService.getRequiredReference(identifier)); + occurrenceService.approve(identifier); LOG.debug("Occurrence with identifier <{}> approved.", identifier); } @@ -95,7 +121,7 @@ public void removeOccurrence(@Parameter(description = TermOccurrenceControllerDo example = TermOccurrenceControllerDoc.ID_NAMESPACE_EXAMPLE) @RequestParam(name = Constants.QueryParams.NAMESPACE) String namespace) { final URI identifier = idResolver.resolveIdentifier(namespace, localName); - occurrenceService.remove(occurrenceService.getRequiredReference(identifier)); + occurrenceService.remove(identifier); LOG.debug("Occurrence with identifier <{}> removed.", identifier); } @@ -106,6 +132,6 @@ private static final class TermOccurrenceControllerDoc { private static final String ID_LOCAL_NAME_DESCRIPTION = "Locally (in the context of the specified namespace) unique part of the term occurrence identifier."; private static final String ID_LOCAL_NAME_EXAMPLE = "instance-12345"; private static final String ID_NAMESPACE_DESCRIPTION = "Term occurrence identifier namespace."; - private static final String ID_NAMESPACE_EXAMPLE = "http://onto.fel.cvut.cz/ontologies/application/termit/pojem/v\u00fdskyt-termu/"; + private static final String ID_NAMESPACE_EXAMPLE = "http://onto.fel.cvut.cz/ontologies/application/termit/pojem/výskyt-termu/"; } } diff --git a/src/main/java/cz/cvut/kbss/termit/service/business/ResourceService.java b/src/main/java/cz/cvut/kbss/termit/service/business/ResourceService.java index 03a8dc1c8..1a11ba6d7 100644 --- a/src/main/java/cz/cvut/kbss/termit/service/business/ResourceService.java +++ b/src/main/java/cz/cvut/kbss/termit/service/business/ResourceService.java @@ -32,10 +32,13 @@ import cz.cvut.kbss.termit.model.resource.Resource; import cz.cvut.kbss.termit.service.changetracking.ChangeRecordProvider; import cz.cvut.kbss.termit.service.document.DocumentManager; +import cz.cvut.kbss.termit.service.document.ResourceRetrievalSpecification; import cz.cvut.kbss.termit.service.document.TextAnalysisService; +import cz.cvut.kbss.termit.service.document.html.UnconfirmedTermOccurrenceRemover; import cz.cvut.kbss.termit.service.repository.ChangeRecordService; import cz.cvut.kbss.termit.service.repository.ResourceRepositoryService; import cz.cvut.kbss.termit.util.TypeAwareResource; +import org.jetbrains.annotations.NotNull; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.springframework.beans.factory.annotation.Autowired; @@ -48,8 +51,14 @@ import java.io.InputStream; import java.net.URI; -import java.time.Instant; -import java.util.*; +import java.util.ArrayList; +import java.util.Collections; +import java.util.Comparator; +import java.util.HashSet; +import java.util.List; +import java.util.Objects; +import java.util.Optional; +import java.util.Set; /** * Interface of business logic concerning resources. @@ -120,16 +129,31 @@ public boolean hasContent(Resource resource) { /** * Gets content of the specified resource. + *

+ * The {@link ResourceRetrievalSpecification} argument provides further parameterization of the content to + * retrieve. + *

+ * If the timestamp specified by {@code retrievalSpecification} is older than the first version of the specified + * resource, this version is returned. Similarly, if the timestamp is later than the most recent backup of the + * resource, the current version is returned. * - * @param resource Resource whose content should be retrieved + * @param resource Resource whose content should be retrieved + * @param retrievalSpecification Specification of the result * @return Representation of the resource content * @throws UnsupportedAssetOperationException When content of the specified resource cannot be retrieved * @throws NotFoundException When the specified resource has no content stored */ - public TypeAwareResource getContent(Resource resource) { + public TypeAwareResource getContent(Resource resource, ResourceRetrievalSpecification retrievalSpecification) { Objects.requireNonNull(resource); verifyFileOperationPossible(resource, "Content retrieval"); - return documentManager.getAsResource((File) resource); + final File file = (File) resource; + TypeAwareResource result = retrievalSpecification.at() + .map(instant -> documentManager.getAsResource(file, instant)) + .orElseGet(() -> documentManager.getAsResource(file)); + if (retrievalSpecification.withoutUnconfirmedOccurrences()) { + result = new UnconfirmedTermOccurrenceRemover().removeUnconfirmedOccurrences(result); + } + return result; } private void verifyFileOperationPossible(Resource resource, String operation) { @@ -138,25 +162,6 @@ private void verifyFileOperationPossible(Resource resource, String operation) { } } - /** - * Gets content of the specified resource valid at the specified timestamp. - *

- * This method provides access to backups of the specified resource. If the specified timestamp is older than the - * first version of the specified resource, this version is returned. Similarly, if the timestamp is later than the - * most recent backup of the resource, the current version is returned. - * - * @param resource Resource whose content should be retrieved - * @param at Timestamp of the version of the retrieved resource - * @return Representation of the resource content - * @throws UnsupportedAssetOperationException When content of the specified resource cannot be retrieved - * @throws NotFoundException When the specified resource has no content stored - */ - public TypeAwareResource getContent(Resource resource, Instant at) { - Objects.requireNonNull(resource); - verifyFileOperationPossible(resource, "Content retrieval"); - return documentManager.getAsResource((File) resource, at); - } - /** * Saves content of the specified resource. * @@ -190,10 +195,9 @@ public void saveContent(Resource resource, InputStream content) { public List getFiles(Resource document) { Objects.requireNonNull(document); final Resource instance = findRequired(document.getUri()); - if (!(instance instanceof Document)) { + if (!(instance instanceof Document doc)) { throw new UnsupportedAssetOperationException("Cannot get files from resource which is not a document."); } - final Document doc = (Document) instance; if (doc.getFiles() != null) { final List list = new ArrayList<>(doc.getFiles()); list.sort(Comparator.comparing(File::getLabel)); @@ -215,10 +219,9 @@ public List getFiles(Resource document) { public void addFileToDocument(Resource document, File file) { Objects.requireNonNull(document); Objects.requireNonNull(file); - if (!(document instanceof Document)) { + if (!(document instanceof Document doc)) { throw new UnsupportedAssetOperationException("Cannot add file to the specified resource " + document); } - final Document doc = (Document) document; doc.addFile(file); if (doc.getVocabulary() != null) { final Vocabulary vocabulary = vocabularyService.getRequiredReference(doc.getVocabulary()); @@ -359,7 +362,7 @@ public List getChanges(Resource asset) { } @Override - public void setApplicationEventPublisher(ApplicationEventPublisher eventPublisher) { + public void setApplicationEventPublisher(@NotNull ApplicationEventPublisher eventPublisher) { this.eventPublisher = eventPublisher; } } diff --git a/src/main/java/cz/cvut/kbss/termit/service/business/TermOccurrenceService.java b/src/main/java/cz/cvut/kbss/termit/service/business/TermOccurrenceService.java index 90f0500be..f391608ea 100644 --- a/src/main/java/cz/cvut/kbss/termit/service/business/TermOccurrenceService.java +++ b/src/main/java/cz/cvut/kbss/termit/service/business/TermOccurrenceService.java @@ -45,18 +45,27 @@ public interface TermOccurrenceService { void persist(TermOccurrence occurrence); /** - * Approves the specified term occurrence. + * Saves the specified term occurrence, either persisting it or updating if it already exists. + *

+ * If the occurrence already exists, it is assumed that the term has changed and only this attribute is updated. + * + * @param occurrence Occurrence to save + */ + void persistOrUpdate(TermOccurrence occurrence); + + /** + * Approves term occurrence with the specified identifier. *

* This removes the suggested classification of the occurrence if it were present. * - * @param occurrence Occurrence to approve + * @param occurrenceId Identifier of the occurrence to approve */ - void approve(TermOccurrence occurrence); + void approve(URI occurrenceId); /** - * Removes the specified term occurrence. + * Removes term occurrence with the specified identifier. * - * @param occurrence Occurrence to remove + * @param occurrenceId Identifier of the occurrence to remove */ - void remove(TermOccurrence occurrence); + void remove(URI occurrenceId); } diff --git a/src/main/java/cz/cvut/kbss/termit/service/business/TermService.java b/src/main/java/cz/cvut/kbss/termit/service/business/TermService.java index 53ff8eea0..073bf0e78 100644 --- a/src/main/java/cz/cvut/kbss/termit/service/business/TermService.java +++ b/src/main/java/cz/cvut/kbss/termit/service/business/TermService.java @@ -491,7 +491,7 @@ public void setTermDefinitionSource(Term term, TermDefinitionSource definitionSo Objects.requireNonNull(definitionSource); definitionSource.setTerm(term.getUri()); if (term.getDefinitionSource() != null) { - termOccurrenceService.remove(term.getDefinitionSource()); + termOccurrenceService.remove(term.getDefinitionSource().getUri()); } termOccurrenceService.persist(definitionSource); } @@ -511,7 +511,7 @@ public void setTermDefinitionSource(Term term, TermDefinitionSource definitionSo public void removeTermDefinitionSource(Term term) { Objects.requireNonNull(term); if (term.getDefinitionSource() != null) { - termOccurrenceService.remove(term.getDefinitionSource()); + termOccurrenceService.remove(term.getDefinitionSource().getUri()); } } diff --git a/src/main/java/cz/cvut/kbss/termit/service/document/AnnotationGenerator.java b/src/main/java/cz/cvut/kbss/termit/service/document/AnnotationGenerator.java index b4d3be2fa..4333be04a 100644 --- a/src/main/java/cz/cvut/kbss/termit/service/document/AnnotationGenerator.java +++ b/src/main/java/cz/cvut/kbss/termit/service/document/AnnotationGenerator.java @@ -67,6 +67,7 @@ public void generateAnnotations(InputStream content, File source) { final TermOccurrenceResolver occurrenceResolver = findResolverFor(source); LOG.debug("Resolving annotations of file {}.", source); occurrenceResolver.parseContent(content, source); + occurrenceResolver.setExistingOccurrences(occurrenceSaver.getExistingOccurrences(source)); final List occurrences = occurrenceResolver.findTermOccurrences(); saveAnnotatedContent(source, occurrenceResolver.getContent()); occurrenceSaver.saveOccurrences(occurrences, source); diff --git a/src/main/java/cz/cvut/kbss/termit/service/document/AsynchronousTermOccurrenceSaver.java b/src/main/java/cz/cvut/kbss/termit/service/document/AsynchronousTermOccurrenceSaver.java index ae064e45c..a12186af0 100644 --- a/src/main/java/cz/cvut/kbss/termit/service/document/AsynchronousTermOccurrenceSaver.java +++ b/src/main/java/cz/cvut/kbss/termit/service/document/AsynchronousTermOccurrenceSaver.java @@ -34,4 +34,9 @@ public void saveOccurrences(List occurrences, Asset source) { synchronousSaver.saveOccurrences(occurrences, source); LOG.trace("Finished saving term occurrences for asset {}.", source); } + + @Override + public List getExistingOccurrences(Asset source) { + return synchronousSaver.getExistingOccurrences(source); + } } diff --git a/src/main/java/cz/cvut/kbss/termit/service/document/ResourceRetrievalSpecification.java b/src/main/java/cz/cvut/kbss/termit/service/document/ResourceRetrievalSpecification.java new file mode 100644 index 000000000..e26d433e0 --- /dev/null +++ b/src/main/java/cz/cvut/kbss/termit/service/document/ResourceRetrievalSpecification.java @@ -0,0 +1,13 @@ +package cz.cvut.kbss.termit.service.document; + +import java.time.Instant; +import java.util.Optional; + +/** + * Specifies what resource content should be retrieved. + * + * @param at Timestamp indicating the version of the resource to retrieve + * @param withoutUnconfirmedOccurrences Whether the content should not contain unconfirmed term occurrences + */ +public record ResourceRetrievalSpecification(Optional at, boolean withoutUnconfirmedOccurrences) { +} diff --git a/src/main/java/cz/cvut/kbss/termit/service/document/SynchronousTermOccurrenceSaver.java b/src/main/java/cz/cvut/kbss/termit/service/document/SynchronousTermOccurrenceSaver.java index 46e9e769f..e8ec00613 100644 --- a/src/main/java/cz/cvut/kbss/termit/service/document/SynchronousTermOccurrenceSaver.java +++ b/src/main/java/cz/cvut/kbss/termit/service/document/SynchronousTermOccurrenceSaver.java @@ -1,9 +1,7 @@ package cz.cvut.kbss.termit.service.document; import cz.cvut.kbss.termit.model.Asset; -import cz.cvut.kbss.termit.model.assignment.OccurrenceTarget; import cz.cvut.kbss.termit.model.assignment.TermOccurrence; -import cz.cvut.kbss.termit.model.selector.Selector; import cz.cvut.kbss.termit.persistence.dao.TermOccurrenceDao; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -11,7 +9,6 @@ import org.springframework.transaction.annotation.Transactional; import java.util.List; -import java.util.Set; /** * Saves occurrences synchronously. @@ -32,43 +29,15 @@ public SynchronousTermOccurrenceSaver(TermOccurrenceDao termOccurrenceDao) { @Transactional @Override public void saveOccurrences(List occurrences, Asset source) { - LOG.trace("Saving term occurrences for asset {}.", source); - final List existing = termOccurrenceDao.findAllTargeting(source); - occurrences.stream().filter(o -> isNew(o, existing)) - .filter(o -> !o.getTerm().equals(source.getUri())).forEach(o -> { - o.addType(cz.cvut.kbss.termit.util.Vocabulary.s_c_navrzeny_vyskyt_termu); - termOccurrenceDao.persist(o); - }); + LOG.debug("Saving term occurrences for asset {}.", source); + LOG.trace("Removing all existing occurrences in asset {}.", source); + termOccurrenceDao.removeAll(source); + LOG.trace("Persisting new occurrences in {}.", source); + occurrences.stream().filter(o -> !o.getTerm().equals(source.getUri())).forEach(termOccurrenceDao::persist); } - /** - * Checks whether the specified term occurrence is new or if there already exists an equivalent one. - *

- * Two occurrences are considered equivalent iff they represent the same term, they have a target with the same - * source file, and the target contains at least one equal selector. - * - * @param occurrence The supposedly new occurrence to check - * @param existing Existing occurrences relevant to the specified file - * @return Whether the occurrence is truly new - */ - private static boolean isNew(TermOccurrence occurrence, List existing) { - final OccurrenceTarget target = occurrence.getTarget(); - assert target != null; - final Set selectors = target.getSelectors(); - for (TermOccurrence to : existing) { - if (!to.getTerm().equals(occurrence.getTerm())) { - continue; - } - final OccurrenceTarget fileTarget = to.getTarget(); - assert fileTarget != null; - assert fileTarget.getSource().equals(target.getSource()); - // Same term, contains at least one identical selector - if (fileTarget.getSelectors().stream().anyMatch(selectors::contains)) { - LOG.trace("Skipping occurrence {} because another one with matching term and selectors exists.", - occurrence); - return false; - } - } - return true; + @Override + public List getExistingOccurrences(Asset source) { + return termOccurrenceDao.findAllTargeting(source); } } diff --git a/src/main/java/cz/cvut/kbss/termit/service/document/TermOccurrenceResolver.java b/src/main/java/cz/cvut/kbss/termit/service/document/TermOccurrenceResolver.java index d25ca40f8..55964f5b3 100644 --- a/src/main/java/cz/cvut/kbss/termit/service/document/TermOccurrenceResolver.java +++ b/src/main/java/cz/cvut/kbss/termit/service/document/TermOccurrenceResolver.java @@ -19,12 +19,17 @@ import cz.cvut.kbss.termit.model.AbstractTerm; import cz.cvut.kbss.termit.model.Asset; -import cz.cvut.kbss.termit.model.assignment.*; +import cz.cvut.kbss.termit.model.assignment.DefinitionalOccurrenceTarget; +import cz.cvut.kbss.termit.model.assignment.FileOccurrenceTarget; +import cz.cvut.kbss.termit.model.assignment.TermDefinitionalOccurrence; +import cz.cvut.kbss.termit.model.assignment.TermFileOccurrence; +import cz.cvut.kbss.termit.model.assignment.TermOccurrence; import cz.cvut.kbss.termit.model.resource.File; import cz.cvut.kbss.termit.service.repository.TermRepositoryService; import java.io.InputStream; import java.net.URI; +import java.util.Collections; import java.util.List; /** @@ -34,6 +39,8 @@ public abstract class TermOccurrenceResolver { protected final TermRepositoryService termService; + protected List existingOccurrences = Collections.emptyList(); + protected TermOccurrenceResolver(TermRepositoryService termService) { this.termService = termService; } @@ -49,6 +56,15 @@ protected TermOccurrenceResolver(TermRepositoryService termService) { */ public abstract void parseContent(InputStream input, Asset source); + /** + * Sets occurrences that already existed on previous analyses. + * + * @param existingOccurrences Term occurrences from the previous analysis run + */ + public void setExistingOccurrences(List existingOccurrences) { + this.existingOccurrences = existingOccurrences; + } + /** * Gets the content which was previously parsed and processed by this instance. *

@@ -95,6 +111,7 @@ protected TermOccurrence createOccurrence(URI termUri, Asset source) { } else { throw new IllegalArgumentException("Unsupported term occurrence source " + source); } + occurrence.markSuggested(); return occurrence; } } diff --git a/src/main/java/cz/cvut/kbss/termit/service/document/TermOccurrenceSaver.java b/src/main/java/cz/cvut/kbss/termit/service/document/TermOccurrenceSaver.java index 7a005b2e7..85286d4bb 100644 --- a/src/main/java/cz/cvut/kbss/termit/service/document/TermOccurrenceSaver.java +++ b/src/main/java/cz/cvut/kbss/termit/service/document/TermOccurrenceSaver.java @@ -19,4 +19,12 @@ public interface TermOccurrenceSaver { * @param source Asset in which the terms occur */ void saveOccurrences(List occurrences, Asset source); + + /** + * Gets a list of existing term occurrences in the specified asset. + * + * @param source Asset in which the terms occur + * @return List of existing term occurrences + */ + List getExistingOccurrences(Asset source); } diff --git a/src/main/java/cz/cvut/kbss/termit/service/document/html/HtmlSelectorGenerators.java b/src/main/java/cz/cvut/kbss/termit/service/document/html/HtmlSelectorGenerators.java index 8d0779d5b..bf09c6ba2 100644 --- a/src/main/java/cz/cvut/kbss/termit/service/document/html/HtmlSelectorGenerators.java +++ b/src/main/java/cz/cvut/kbss/termit/service/document/html/HtmlSelectorGenerators.java @@ -18,10 +18,10 @@ package cz.cvut.kbss.termit.service.document.html; import cz.cvut.kbss.termit.model.selector.Selector; +import cz.cvut.kbss.termit.util.Configuration; import org.jsoup.nodes.Element; import org.springframework.stereotype.Service; -import java.util.Arrays; import java.util.List; import java.util.Set; import java.util.stream.Collectors; @@ -37,8 +37,13 @@ @Service public class HtmlSelectorGenerators { - private final List generators = Arrays - .asList(new TextQuoteSelectorGenerator(), new TextPositionSelectorGenerator()); + private final List generators; + + public HtmlSelectorGenerators(Configuration config) { + this.generators = List.of( + new TextQuoteSelectorGenerator(config.getTextAnalysis().getTextQuoteSelectorContextLength()), + new TextPositionSelectorGenerator()); + } /** * Generates selectors for the specified HTML/XML elements. diff --git a/src/main/java/cz/cvut/kbss/termit/service/document/html/HtmlTermOccurrenceResolver.java b/src/main/java/cz/cvut/kbss/termit/service/document/html/HtmlTermOccurrenceResolver.java index 9e83a45cf..c67c466ca 100644 --- a/src/main/java/cz/cvut/kbss/termit/service/document/html/HtmlTermOccurrenceResolver.java +++ b/src/main/java/cz/cvut/kbss/termit/service/document/html/HtmlTermOccurrenceResolver.java @@ -20,8 +20,10 @@ import cz.cvut.kbss.termit.exception.AnnotationGenerationException; import cz.cvut.kbss.termit.model.Asset; import cz.cvut.kbss.termit.model.Term; +import cz.cvut.kbss.termit.model.assignment.OccurrenceTarget; import cz.cvut.kbss.termit.model.assignment.TermOccurrence; import cz.cvut.kbss.termit.model.resource.File; +import cz.cvut.kbss.termit.model.selector.Selector; import cz.cvut.kbss.termit.service.document.DocumentManager; import cz.cvut.kbss.termit.service.document.TermOccurrenceResolver; import cz.cvut.kbss.termit.service.repository.TermRepositoryService; @@ -62,6 +64,9 @@ @Scope(value = ConfigurableBeanFactory.SCOPE_PROTOTYPE) public class HtmlTermOccurrenceResolver extends TermOccurrenceResolver { + private static final String BNODE_PREFIX = "_:"; + private static final String SCORE_ATTRIBUTE = "score"; + private static final Logger LOG = LoggerFactory.getLogger(HtmlTermOccurrenceResolver.class); private final HtmlSelectorGenerators selectorGenerators; @@ -166,11 +171,23 @@ public List findTermOccurrences() { LOG.trace("Processing RDFa annotated element {}.", element); final Optional occurrence = resolveAnnotation(element, source); occurrence.ifPresent(to -> { - if (to.getScore() != null && to.getScore() > scoreThreshold) { - LOG.trace("Found term occurrence {}.", to); + if (!to.isSuggested()) { + // Occurrence already approved in content (from previous manual approval) + result.add(to); + } else if (existsApproved(to)) { + LOG.trace("Found term occurrence {} with matching existing approved occurrence.", to); + to.markApproved(); + // Annotation without score is considered approved by the frontend + element.removeAttr(SCORE_ATTRIBUTE); result.add(to); } else { - LOG.trace("The score of this occurrence {} is lower than the specified threshold", to); + if (to.getScore() > scoreThreshold) { + LOG.trace("Found term occurrence {}.", to); + result.add(to); + } else { + LOG.trace("The confidence score of occurrence {} is lower than the configured threshold {}.", + to, scoreThreshold); + } } }); } @@ -187,7 +204,8 @@ private Optional resolveAnnotation(Element rdfaElem, Asset so verifyTermExists(rdfaElem, termUri, termId); final TermOccurrence occurrence = createOccurrence(termUri, source); occurrence.getTarget().setSelectors(selectorGenerators.generateSelectors(rdfaElem)); - final String strScore = rdfaElem.attr("score"); + occurrence.setUri(resolveOccurrenceId(rdfaElem, source)); + final String strScore = rdfaElem.attr(SCORE_ATTRIBUTE); if (!strScore.isEmpty()) { try { final Double score = Double.parseDouble(strScore); @@ -196,6 +214,9 @@ private Optional resolveAnnotation(Element rdfaElem, Asset so occurrence.setScore(0.0); LOG.error("Unable to parse score.", e); } + } else { + // Occurrence already approved in text analysis output (probably from a previous processing of the content) + occurrence.markApproved(); } return Optional.of(occurrence); } @@ -212,6 +233,34 @@ private void verifyTermExists(Element rdfaElem, URI termUri, String termId) { existingTermIds.add(termId); } + private URI resolveOccurrenceId(Element rdfaElem, Asset source) { + final String base = TermOccurrence.resolveContext(source.getUri()) + "/"; + String about = rdfaElem.attr("about"); + if (about.startsWith(BNODE_PREFIX)) { + about = about.substring(BNODE_PREFIX.length()); + } + return URI.create(base + about); + } + + private boolean existsApproved(TermOccurrence newOccurrence) { + final OccurrenceTarget target = newOccurrence.getTarget(); + assert target != null; + final Set selectors = target.getSelectors(); + for (TermOccurrence to : existingOccurrences) { + if (!to.getTerm().equals(newOccurrence.getTerm())) { + continue; + } + final OccurrenceTarget existingTarget = to.getTarget(); + assert existingTarget != null; + assert existingTarget.getSource().equals(target.getSource()); + // Same term, contains at least one identical selector + if (existingTarget.getSelectors().stream().anyMatch(selectors::contains) && !to.isSuggested()) { + return true; + } + } + return false; + } + @Override public boolean supports(Asset source) { if (source instanceof Term) { diff --git a/src/main/java/cz/cvut/kbss/termit/service/document/html/TextQuoteSelectorGenerator.java b/src/main/java/cz/cvut/kbss/termit/service/document/html/TextQuoteSelectorGenerator.java index 529501d97..a0dacbb03 100644 --- a/src/main/java/cz/cvut/kbss/termit/service/document/html/TextQuoteSelectorGenerator.java +++ b/src/main/java/cz/cvut/kbss/termit/service/document/html/TextQuoteSelectorGenerator.java @@ -33,10 +33,11 @@ */ class TextQuoteSelectorGenerator implements SelectorGenerator { - /** - * Length of the generated prefix and suffix - */ - static final int CONTEXT_LENGTH = 32; + private final int contextLength; + + TextQuoteSelectorGenerator(int contextLength) { + this.contextLength = contextLength; + } @Override public TextQuoteSelector generateSelector(Element... elements) { @@ -55,13 +56,12 @@ private Optional extractPrefix(Element start) { current = current.parent(); final List previousSiblings = current.childNodes().subList(0, previous.siblingIndex()); sb = extractNodeText(previousSiblings).append(sb); - if (sb.length() >= CONTEXT_LENGTH) { + if (sb.length() >= contextLength) { break; } previous = current; } - return sb.length() > 0 ? Optional.of(sb.substring(Math.max(0, sb.length() - CONTEXT_LENGTH))) : - Optional.empty(); + return !sb.isEmpty() ? Optional.of(sb.substring(Math.max(0, sb.length() - contextLength))) : Optional.empty(); } private Optional extractSuffix(Element end) { @@ -73,11 +73,11 @@ private Optional extractSuffix(Element end) { final List previousSiblings = current.childNodes() .subList(previous.siblingIndex() + 1, current.childNodeSize()); sb.append(extractNodeText(previousSiblings)); - if (sb.length() >= CONTEXT_LENGTH) { + if (sb.length() >= contextLength) { break; } previous = current; } - return sb.length() > 0 ? Optional.of(sb.substring(0, Math.min(sb.length(), CONTEXT_LENGTH))) : Optional.empty(); + return !sb.isEmpty() ? Optional.of(sb.substring(0, Math.min(sb.length(), contextLength))) : Optional.empty(); } } diff --git a/src/main/java/cz/cvut/kbss/termit/service/document/html/UnconfirmedTermOccurrenceRemover.java b/src/main/java/cz/cvut/kbss/termit/service/document/html/UnconfirmedTermOccurrenceRemover.java new file mode 100644 index 000000000..0d3055ef3 --- /dev/null +++ b/src/main/java/cz/cvut/kbss/termit/service/document/html/UnconfirmedTermOccurrenceRemover.java @@ -0,0 +1,51 @@ +package cz.cvut.kbss.termit.service.document.html; + +import cz.cvut.kbss.termit.exception.FileContentProcessingException; +import cz.cvut.kbss.termit.service.export.util.TypeAwareByteArrayResource; +import cz.cvut.kbss.termit.util.TypeAwareResource; +import org.jsoup.Jsoup; +import org.jsoup.nodes.Document; +import org.jsoup.nodes.Node; +import org.jsoup.select.Elements; + +import java.io.BufferedWriter; +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.io.OutputStreamWriter; +import java.nio.charset.StandardCharsets; + +/** + * Removes unconfirmed term occurrences from content. + */ +public class UnconfirmedTermOccurrenceRemover { + + + /** + * Removes unconfirmed term occurrences from the specified input. + *

+ * Removing such occurrences means the corresponding elements are replaced with their text content. + *

+ * An occurrence is considered unconfirmed when it has a confidence score, confirmed occurrences do not have + * scores. + * + * @param input Input to process + * @return Processed content + */ + public TypeAwareResource removeUnconfirmedOccurrences(TypeAwareResource input) { + try { + final Document doc = Jsoup.parse(input.getInputStream(), StandardCharsets.UTF_8.name(), ""); + doc.outputSettings().prettyPrint(false); + final Elements spanElements = doc.select("span[score]"); + spanElements.forEach(Node::unwrap); + + final ByteArrayOutputStream out = new ByteArrayOutputStream(); + final BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(out)); + writer.write(doc.toString()); // Write modified HTML to output stream + writer.close(); + return new TypeAwareByteArrayResource(out.toByteArray(), input.getMediaType().orElse(null), + input.getFileExtension().orElse(null)); + } catch (IOException e) { + throw new FileContentProcessingException("Unable to read resource for unconfirmed occurrence removal.", e); + } + } +} diff --git a/src/main/java/cz/cvut/kbss/termit/service/document/util/TypeAwareFileSystemResource.java b/src/main/java/cz/cvut/kbss/termit/service/document/util/TypeAwareFileSystemResource.java index fce88b122..01913f836 100644 --- a/src/main/java/cz/cvut/kbss/termit/service/document/util/TypeAwareFileSystemResource.java +++ b/src/main/java/cz/cvut/kbss/termit/service/document/util/TypeAwareFileSystemResource.java @@ -46,13 +46,12 @@ public boolean equals(Object o) { if (this == o) { return true; } - if (!(o instanceof TypeAwareFileSystemResource)) { + if (!(o instanceof TypeAwareFileSystemResource that)) { return false; } if (!super.equals(o)) { return false; } - TypeAwareFileSystemResource that = (TypeAwareFileSystemResource) o; return Objects.equals(mediaType, that.mediaType); } diff --git a/src/main/java/cz/cvut/kbss/termit/service/repository/TermOccurrenceRepositoryService.java b/src/main/java/cz/cvut/kbss/termit/service/repository/TermOccurrenceRepositoryService.java index a3db006b9..20c19851c 100644 --- a/src/main/java/cz/cvut/kbss/termit/service/repository/TermOccurrenceRepositoryService.java +++ b/src/main/java/cz/cvut/kbss/termit/service/repository/TermOccurrenceRepositoryService.java @@ -18,18 +18,24 @@ package cz.cvut.kbss.termit.service.repository; import cz.cvut.kbss.termit.exception.NotFoundException; +import cz.cvut.kbss.termit.exception.ValidationException; import cz.cvut.kbss.termit.model.assignment.TermOccurrence; import cz.cvut.kbss.termit.persistence.dao.TermOccurrenceDao; import cz.cvut.kbss.termit.service.business.TermOccurrenceService; +import cz.cvut.kbss.termit.util.Utils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.retry.annotation.Backoff; +import org.springframework.retry.annotation.Retryable; +import org.springframework.scheduling.annotation.Async; import org.springframework.scheduling.annotation.Scheduled; import org.springframework.stereotype.Service; import org.springframework.transaction.annotation.Transactional; import java.net.URI; import java.util.Objects; +import java.util.Optional; import static cz.cvut.kbss.termit.util.Constants.SCHEDULING_PATTERN; @@ -40,9 +46,16 @@ public class TermOccurrenceRepositoryService implements TermOccurrenceService { private final TermOccurrenceDao termOccurrenceDao; + private final TermRepositoryService termService; + + private final ResourceRepositoryService resourceService; + @Autowired - public TermOccurrenceRepositoryService(TermOccurrenceDao termOccurrenceDao) { + public TermOccurrenceRepositoryService(TermOccurrenceDao termOccurrenceDao, TermRepositoryService termService, + ResourceRepositoryService resourceService) { this.termOccurrenceDao = termOccurrenceDao; + this.termService = termService; + this.resourceService = resourceService; } @Override @@ -54,25 +67,58 @@ public TermOccurrence getRequiredReference(URI id) { @Override public void persist(TermOccurrence occurrence) { Objects.requireNonNull(occurrence); + checkTermExists(occurrence); + if (!termService.exists(occurrence.getTarget().getSource()) && !resourceService.exists( + occurrence.getTarget().getSource())) { + throw new ValidationException( + "Occurrence references an unknown asset " + Utils.uriToString(occurrence.getTarget().getSource())); + } termOccurrenceDao.persist(occurrence); } + private void checkTermExists(TermOccurrence occurrence) { + if (!termService.exists(occurrence.getTerm())) { + throw new ValidationException( + "Occurrence references an unknown term " + Utils.uriToString(occurrence.getTerm())); + } + } + @Transactional @Override - public void approve(TermOccurrence occurrence) { + public void persistOrUpdate(TermOccurrence occurrence) { Objects.requireNonNull(occurrence); - final TermOccurrence toApprove = termOccurrenceDao.find(occurrence.getUri()).orElseThrow( - () -> NotFoundException.create(TermOccurrence.class, occurrence.getUri())); + if (termOccurrenceDao.exists(occurrence.getUri())) { + final Optional existingWrapped = termOccurrenceDao.find(occurrence.getUri()); + assert existingWrapped.isPresent(); + final TermOccurrence existing = existingWrapped.get(); + termOccurrenceDao.detach(existing); + checkTermExists(occurrence); + existing.setTerm(occurrence.getTerm()); + termOccurrenceDao.update(existing); + } else { + persist(occurrence); + } + } + + @Async + // Retry in case the occurrence has not been persisted, yet (see AsynchronousTermOccurrenceSaver) + @Retryable(retryFor = NotFoundException.class, maxAttempts = 3, backoff = @Backoff(delay = 30000L)) + @Transactional + @Override + public void approve(URI occurrenceId) { + Objects.requireNonNull(occurrenceId); + final TermOccurrence toApprove = termOccurrenceDao.find(occurrenceId).orElseThrow( + () -> NotFoundException.create(TermOccurrence.class, occurrenceId)); LOG.trace("Approving term occurrence {}", toApprove); - toApprove.removeType(cz.cvut.kbss.termit.util.Vocabulary.s_c_navrzeny_vyskyt_termu); + toApprove.markApproved(); } @Transactional @Override - public void remove(TermOccurrence occurrence) { - Objects.requireNonNull(occurrence); - LOG.trace("Removing term occurrence {}.", occurrence); - termOccurrenceDao.remove(occurrence); + public void remove(URI occurrenceId) { + Objects.requireNonNull(occurrenceId); + LOG.trace("Removing term occurrence {}.", occurrenceId); + termOccurrenceDao.getReference(occurrenceId).ifPresent(termOccurrenceDao::remove); } /** diff --git a/src/main/java/cz/cvut/kbss/termit/service/repository/UserGroupRepositoryService.java b/src/main/java/cz/cvut/kbss/termit/service/repository/UserGroupRepositoryService.java index f89f1cee7..01130a855 100644 --- a/src/main/java/cz/cvut/kbss/termit/service/repository/UserGroupRepositoryService.java +++ b/src/main/java/cz/cvut/kbss/termit/service/repository/UserGroupRepositoryService.java @@ -70,7 +70,7 @@ protected UserGroup mapToDto(UserGroup entity) { public void addMembers(UserGroup target, Collection toAdd) { Objects.requireNonNull(target); Objects.requireNonNull(toAdd); - if (toAdd.size() == 0) { + if (toAdd.isEmpty()) { return; } LOG.debug("Adding users {} to group {}.", toAdd, target); @@ -84,7 +84,7 @@ public void addMembers(UserGroup target, Collection toAdd) { public void removeMembers(UserGroup target, Collection toRemove) { Objects.requireNonNull(target); Objects.requireNonNull(toRemove); - if (toRemove.size() == 0) { + if (toRemove.isEmpty()) { return; } LOG.debug("Removing users {} from group {}.", toRemove, target); diff --git a/src/main/java/cz/cvut/kbss/termit/util/Configuration.java b/src/main/java/cz/cvut/kbss/termit/util/Configuration.java index e54d2d667..1c624b46a 100644 --- a/src/main/java/cz/cvut/kbss/termit/util/Configuration.java +++ b/src/main/java/cz/cvut/kbss/termit/util/Configuration.java @@ -19,9 +19,11 @@ import cz.cvut.kbss.termit.model.acl.AccessLevel; import jakarta.validation.Valid; +import jakarta.validation.constraints.Min; import jakarta.validation.constraints.NotNull; import org.springframework.boot.context.properties.ConfigurationProperties; import org.springframework.context.annotation.Primary; +import org.springframework.validation.annotation.Validated; import java.util.Optional; import java.util.Set; @@ -37,7 +39,7 @@ */ @ConfigurationProperties("termit") @Primary -@Valid +@Validated public class Configuration { /** * TermIt frontend URL. @@ -52,23 +54,41 @@ public class Configuration { * server. */ private String jmxBeanName = "TermItAdminBean"; + @Valid private Persistence persistence = new Persistence(); + @Valid private Repository repository = new Repository(); + @Valid private ChangeTracking changetracking = new ChangeTracking(); + @Valid private Comments comments = new Comments(); + @Valid private Namespace namespace = new Namespace(); + @Valid private Admin admin = new Admin(); + @Valid private File file = new File(); + @Valid private Jwt jwt = new Jwt(); + @Valid private TextAnalysis textAnalysis = new TextAnalysis(); + @Valid private Glossary glossary = new Glossary(); + @Valid private PublicView publicView = new PublicView(); + @Valid private Workspace workspace = new Workspace(); + @Valid private Cors cors = new Cors(); + @Valid private Schedule schedule = new Schedule(); + @Valid private ACL acl = new ACL(); + @Valid private Mail mail = new Mail(); + @Valid private Security security = new Security(); + @Valid private Language language = new Language(); public String getUrl() { @@ -231,7 +251,7 @@ public void setLanguage(Language language) { this.language = language; } - + @Validated public static class Persistence { /** * OntoDriver class for the repository. @@ -261,6 +281,7 @@ public void setLanguage(String language) { } } + @Validated public static class Repository { /** * URL of the main application repository. @@ -315,7 +336,10 @@ public void setPassword(String password) { } } + @Validated public static class ChangeTracking { + + @Valid Context context = new Context(); public Context getContext() { @@ -344,6 +368,7 @@ public void setExtension(String extension) { } } + @Validated public static class Comments { /** * IRI of the repository context used to store comments (discussion to assets). @@ -360,6 +385,7 @@ public void setContext(String context) { } } + @Validated public static class Namespace { /** * Namespace for vocabulary identifiers. @@ -387,6 +413,7 @@ public static class Namespace { * http://www.example.org/ontologies/vocabularies/metropolitan-plan/SEPARATOR/inhabited-area}, where 'SEPARATOR' * is the value of this configuration parameter. */ + @Valid private NamespaceDetail term = new NamespaceDetail(); /** * Separator of File namespace from the parent Document identifier. @@ -399,15 +426,17 @@ public static class Namespace { * http://www.example.org/ontologies/resources/metropolitan-plan/document/SEPARATOR/main-file}, where * 'SEPARATOR' is the value of this configuration parameter. */ + @Valid private NamespaceDetail file = new NamespaceDetail(); /** * Separator of snapshot timestamp and original asset identifier. *

* For example, if we have a Vocabulary with IRI {@code http://www.example.org/ontologies/vocabularies/metropolitan-plan} - * and the snapshot separator is configured to {@code version}, a snapshot will IRI will look something like + * and the snapshot separator is configured to {@code version}, a snapshot IRI will look something like * {@code http://www.example.org/ontologies/vocabularies/metropolitan-plan/version/20220530T202317Z}. */ + @Valid private NamespaceDetail snapshot = new NamespaceDetail(); public String getVocabulary() { @@ -458,7 +487,9 @@ public void setSnapshot(NamespaceDetail snapshot) { this.snapshot = snapshot; } + @Validated public static class NamespaceDetail { + @NotNull String separator; @@ -472,6 +503,7 @@ public void setSeparator(String separator) { } } + @Validated public static class Admin { /** * Specifies the folder in which admin credentials are saved when its account is generated. @@ -501,6 +533,7 @@ public void setCredentialsLocation(String credentialsLocation) { } } + @Validated public static class File { /** * Specifies root directory in which document files are stored. @@ -517,6 +550,7 @@ public void setStorage(String storage) { } } + @Validated public static class Jwt { /** * Secret key used when hashing a JWT. @@ -532,29 +566,24 @@ public void setSecretKey(String secretKey) { } } + @Validated public static class TextAnalysis { /** * URL of the text analysis service. */ - String url; + private String url; /** - * Minimal match score of a term occurrence for which a term assignment should be automatically generated. - *

- * More specifically, when annotated file content is being processed, term occurrences with sufficient score - * will cause creation of corresponding term assignments to the file. - * - * @deprecated This configuration is currently not used. + * Score threshold for a term occurrence for it to be saved into the repository. */ - @Deprecated @NotNull - String termAssignmentMinScore; + private String termOccurrenceMinScore = Constants.SCORE_THRESHOLD.toString(); /** - * Score threshold for a term occurrence for it to be saved into the repository. + * Maximum of the prefix and suffix of a text quote selector. */ - @NotNull - String termOccurrenceMinScore = Constants.SCORE_THRESHOLD.toString(); + @Min(8) + private int textQuoteSelectorContextLength = 32; public String getUrl() { return url; @@ -564,14 +593,6 @@ public void setUrl(String url) { this.url = url; } - public String getTermAssignmentMinScore() { - return termAssignmentMinScore; - } - - public void setTermAssignmentMinScore(String termAssignmentMinScore) { - this.termAssignmentMinScore = termAssignmentMinScore; - } - public String getTermOccurrenceMinScore() { return termOccurrenceMinScore; } @@ -579,14 +600,23 @@ public String getTermOccurrenceMinScore() { public void setTermOccurrenceMinScore(String termOccurrenceMinScore) { this.termOccurrenceMinScore = termOccurrenceMinScore; } + + public int getTextQuoteSelectorContextLength() { + return textQuoteSelectorContextLength; + } + + public void setTextQuoteSelectorContextLength(int textQuoteSelectorContextLength) { + this.textQuoteSelectorContextLength = textQuoteSelectorContextLength; + } } + @Validated public static class Glossary { /** * IRI path to append to vocabulary IRI to get glossary identifier. */ @NotNull - String fragment; + private String fragment; public String getFragment() { return fragment; @@ -597,12 +627,13 @@ public void setFragment(String fragment) { } } + @Validated public static class PublicView { /** - * Unmapped properties allowed to appear in the SKOS export. + * Unmapped properties allowed to appear in the public term access API. */ @NotNull - private Set whiteListProperties; + private Set whiteListProperties = Set.of(); public Set getWhiteListProperties() { return whiteListProperties; @@ -613,6 +644,7 @@ public void setWhiteListProperties(final Set whiteListProperties) { } } + @Validated public static class Workspace { /** @@ -635,6 +667,7 @@ public void setAllVocabulariesEditable(boolean allVocabulariesEditable) { } } + @Validated public static class Cors { /** * A comma-separated list of allowed origins for CORS. @@ -668,8 +701,10 @@ public void setAllowedOriginPatterns(String allowedOriginPatterns) { } } + @Validated public static class Schedule { + @Valid private Cron cron = new Cron(); public Cron getCron() { @@ -682,6 +717,7 @@ public void setCron(Cron cron) { public static class Cron { + @Valid private Notification notification = new Notification(); public Notification getNotification() { @@ -711,6 +747,7 @@ public void setComments(String comments) { } } + @Validated public static class Mail { /** @@ -730,15 +767,16 @@ public void setSender(String sender) { /** * Configuration for initialization of new {@link cz.cvut.kbss.termit.model.acl.AccessControlList}s. */ + @Validated public static class ACL { /** - * Default access level for users in editor role. + * Default access level for users in the editor role. */ private AccessLevel defaultEditorAccessLevel = AccessLevel.READ; /** - * Default access level for users in editor role. + * Default access level for users in the reader role. */ private AccessLevel defaultReaderAccessLevel = AccessLevel.READ; @@ -759,6 +797,7 @@ public void setDefaultReaderAccessLevel(AccessLevel defaultReaderAccessLevel) { } } + @Validated public static class Security { public enum ProviderType { @@ -766,7 +805,7 @@ public enum ProviderType { } /** - * Determines whether an internal security mechanism or an external OIDC service will be used for + * Determines whether the internal security mechanism or an external OIDC service will be used for * authentication. *

* In case na OIDC service is selected, it should be configured using standard Spring Boot OAuth2 properties. @@ -797,6 +836,7 @@ public void setRoleClaim(String roleClaim) { } } + @Validated public static class Language { /** @@ -805,13 +845,15 @@ public static class Language { * The file must be in Turtle format. The term definitions must use SKOS terminology for attributes (prefLabel, * scopeNote and broader/narrower). */ + @Valid private LanguageSource types = new LanguageSource(); /** - * Path to a file containing definition of the language of states terms can be in with. The file must be in + * Path to a file containing definition of the language of states terms can be in. The file must be in * Turtle format. The term definitions must use SKOS terminology for attributes (prefLabel, scopeNote and * broader/narrower). */ + @Valid private LanguageSource states = new LanguageSource(); public LanguageSource getTypes() { diff --git a/src/test/java/cz/cvut/kbss/termit/environment/config/TestServiceConfig.java b/src/test/java/cz/cvut/kbss/termit/environment/config/TestServiceConfig.java index bb0cc6221..41874802e 100644 --- a/src/test/java/cz/cvut/kbss/termit/environment/config/TestServiceConfig.java +++ b/src/test/java/cz/cvut/kbss/termit/environment/config/TestServiceConfig.java @@ -25,6 +25,7 @@ import cz.cvut.kbss.termit.model.selector.Selector; import cz.cvut.kbss.termit.service.document.html.DummySelectorGenerator; import cz.cvut.kbss.termit.service.document.html.HtmlSelectorGenerators; +import cz.cvut.kbss.termit.util.Configuration; import org.aspectj.lang.Aspects; import org.jsoup.nodes.Element; import org.springframework.boot.test.context.TestConfiguration; @@ -76,8 +77,8 @@ public LocalValidatorFactoryBean validatorFactoryBean() { @Bean @Primary - public HtmlSelectorGenerators htmlSelectorGenerators() { - return new HtmlSelectorGenerators() { + public HtmlSelectorGenerators htmlSelectorGenerators(Configuration configuration) { + return new HtmlSelectorGenerators(configuration) { @Override public Set generateSelectors(Element... elements) { return Collections.singleton(new DummySelectorGenerator().generateSelector(elements)); diff --git a/src/test/java/cz/cvut/kbss/termit/model/assignment/TermOccurrenceTest.java b/src/test/java/cz/cvut/kbss/termit/model/assignment/TermOccurrenceTest.java index 306890e8c..5066b6d95 100644 --- a/src/test/java/cz/cvut/kbss/termit/model/assignment/TermOccurrenceTest.java +++ b/src/test/java/cz/cvut/kbss/termit/model/assignment/TermOccurrenceTest.java @@ -18,13 +18,18 @@ package cz.cvut.kbss.termit.model.assignment; import cz.cvut.kbss.termit.environment.Generator; +import cz.cvut.kbss.termit.util.Vocabulary; import org.junit.jupiter.api.Test; import java.net.URI; import static org.hamcrest.MatcherAssert.assertThat; import static org.hamcrest.Matchers.endsWith; +import static org.hamcrest.Matchers.hasItem; +import static org.hamcrest.Matchers.not; import static org.hamcrest.Matchers.startsWith; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertTrue; class TermOccurrenceTest { @@ -44,4 +49,22 @@ void resolveContextSupportsSourceIdentifiersEndingWithSlash() { assertThat(ctx.toString(), startsWith(source.toString())); assertThat(ctx.toString(), endsWith("/" + TermOccurrence.CONTEXT_SUFFIX)); } + + @Test + void markSuggestedAddsSuggestedTypeToTypes() { + final TermOccurrence sut = new TermFileOccurrence(); + sut.markSuggested(); + assertTrue(sut.isSuggested()); + assertThat(sut.getTypes(), hasItem(Vocabulary.s_c_navrzeny_vyskyt_termu)); + } + + @Test + void markApprovedRemovesSuggestedTypeFromTypes() { + final TermOccurrence sut = new TermFileOccurrence(); + sut.addType(Vocabulary.s_c_navrzeny_vyskyt_termu); + assertTrue(sut.isSuggested()); + sut.markApproved(); + assertFalse(sut.isSuggested()); + assertThat(sut.getTypes(), not(hasItem(Vocabulary.s_c_navrzeny_vyskyt_termu))); + } } diff --git a/src/test/java/cz/cvut/kbss/termit/persistence/dao/BaseDaoTest.java b/src/test/java/cz/cvut/kbss/termit/persistence/dao/BaseDaoTest.java index ebc0bafe9..f677393f3 100644 --- a/src/test/java/cz/cvut/kbss/termit/persistence/dao/BaseDaoTest.java +++ b/src/test/java/cz/cvut/kbss/termit/persistence/dao/BaseDaoTest.java @@ -171,6 +171,25 @@ void getReferenceReturnsEmptyOptionalWhenNoMatchingInstanceExists() { assertFalse(result.isPresent()); } + @Test + void detachDetachesInstanceFromPersistenceContext() { + final Term term = Generator.generateTermWithId(); + transactional(() -> sut.persist(term)); + transactional(() -> { + final Optional instance = sut.find(term.getUri()); + assertTrue(instance.isPresent()); + assertTrue(em.contains(instance.get())); + sut.detach(instance.get()); + assertFalse(em.contains(instance.get())); + }); + } + + @Test + void detachDoesNothingWhenEntityIsNotManaged() { + final Term term = Generator.generateTermWithId(); + assertDoesNotThrow(() -> sut.detach(term)); + } + private static class BaseDaoImpl extends BaseDao { BaseDaoImpl(EntityManager em) { diff --git a/src/test/java/cz/cvut/kbss/termit/persistence/dao/ResourceDaoTest.java b/src/test/java/cz/cvut/kbss/termit/persistence/dao/ResourceDaoTest.java index fe22efecf..4a96ae7c4 100644 --- a/src/test/java/cz/cvut/kbss/termit/persistence/dao/ResourceDaoTest.java +++ b/src/test/java/cz/cvut/kbss/termit/persistence/dao/ResourceDaoTest.java @@ -236,9 +236,9 @@ void detachDetachesInstanceFromPersistenceContext() { transactional(() -> { final Resource toDetach = sut.find(resource.getUri()).get(); - assertTrue(sut.em.contains(toDetach)); + assertTrue(em.contains(toDetach)); sut.detach(toDetach); - assertFalse(sut.em.contains(toDetach)); + assertFalse(em.contains(toDetach)); }); } @@ -247,9 +247,9 @@ void detachDoesNothingForNonManagedInstance() { final Resource resource = Generator.generateResourceWithId(); transactional(() -> { - assertFalse(sut.em.contains(resource)); + assertFalse(em.contains(resource)); sut.detach(resource); - assertFalse(sut.em.contains(resource)); + assertFalse(em.contains(resource)); }); } diff --git a/src/test/java/cz/cvut/kbss/termit/persistence/dao/TermOccurrenceDaoTest.java b/src/test/java/cz/cvut/kbss/termit/persistence/dao/TermOccurrenceDaoTest.java index c3bbef884..4bb2356b4 100644 --- a/src/test/java/cz/cvut/kbss/termit/persistence/dao/TermOccurrenceDaoTest.java +++ b/src/test/java/cz/cvut/kbss/termit/persistence/dao/TermOccurrenceDaoTest.java @@ -21,6 +21,7 @@ import cz.cvut.kbss.jopa.model.JOPAPersistenceProperties; import cz.cvut.kbss.jopa.model.descriptors.Descriptor; import cz.cvut.kbss.jopa.model.descriptors.EntityDescriptor; +import cz.cvut.kbss.jopa.model.query.TypedQuery; import cz.cvut.kbss.termit.dto.assignment.TermOccurrences; import cz.cvut.kbss.termit.environment.Environment; import cz.cvut.kbss.termit.environment.Generator; @@ -31,6 +32,7 @@ import cz.cvut.kbss.termit.model.resource.Document; import cz.cvut.kbss.termit.model.resource.File; import cz.cvut.kbss.termit.model.selector.TextQuoteSelector; +import cz.cvut.kbss.termit.persistence.dao.util.ScheduledContextRemover; import cz.cvut.kbss.termit.util.Vocabulary; import org.eclipse.rdf4j.model.ValueFactory; import org.eclipse.rdf4j.model.vocabulary.RDFS; @@ -59,6 +61,9 @@ class TermOccurrenceDaoTest extends BaseDaoTestRunner { @Autowired private EntityManager em; + @Autowired + private ScheduledContextRemover contextRemover; + @Autowired private TermOccurrenceDao sut; @@ -159,7 +164,7 @@ void findAllInFileReturnsTermOccurrencesWithTargetFile() { final Map> allOccurrences = generateOccurrences(false, fOne, fTwo); final List matching = allOccurrences.values().stream().flatMap( l -> l.stream().filter(to -> to.getTarget().getSource().equals(fOne.getUri()))) - .collect(Collectors.toList()); + .toList(); em.getEntityManagerFactory().getCache().evictAll(); final List result = sut.findAllTargeting(fOne); @@ -251,7 +256,10 @@ void removeAllRemovesSuggestedAndConfirmedOccurrences() { to.removeType(Vocabulary.s_c_navrzeny_vyskyt_termu); em.merge(to); }))); - transactional(() -> sut.removeAll(file)); + transactional(() -> { + sut.removeAll(file); + contextRemover.runContextRemoval(); + }); assertTrue(sut.findAllTargeting(file).isEmpty()); assertFalse(em.createNativeQuery("ASK { ?x a ?termOccurrence . }", Boolean.class).setParameter("termOccurrence", URI.create( @@ -270,7 +278,10 @@ void removeAllRemovesAlsoOccurrenceTargets() { to.removeType(Vocabulary.s_c_navrzeny_vyskyt_termu); em.merge(to); }))); - transactional(() -> sut.removeAll(file)); + transactional(() -> { + sut.removeAll(file); + contextRemover.runContextRemoval(); + }); assertFalse(em.createNativeQuery("ASK { ?x a ?target . }", Boolean.class).setParameter("target", URI.create( @@ -497,4 +508,30 @@ void getOccurrenceInfoByTermReturnsDistinguishableFileAndDefinitionalOccurrences } } } + + @Test + void updateSavesTermOccurrenceInContext() { + final File file = Generator.generateFileWithId(FILE_LABEL); + transactional(() -> em.persist(file)); + final TermOccurrence occurrence = new TermFileOccurrence(Generator.generateUri(), + new FileOccurrenceTarget(file)); + occurrence.getTarget().setSelectors(Collections.singleton(new TextQuoteSelector("test"))); + + transactional(() -> sut.persist(occurrence)); + assertTrue(em.createNativeQuery("ASK WHERE { GRAPH ?g { ?x a ?occurrence ; ?hasTerm ?term .} }", Boolean.class) + .setParameter("g", TermOccurrence.resolveContext(file.getUri())) + .setParameter("x", occurrence.getUri()) + .setParameter("occurrence", URI.create(Vocabulary.s_c_souborovy_vyskyt_termu)) + .setParameter("hasTerm", URI.create(Vocabulary.s_p_je_prirazenim_termu)) + .getSingleResult()); + final URI newTermUri = Generator.generateUri(); + occurrence.setTerm(newTermUri); + transactional(() -> sut.update(occurrence)); + assertTrue(em.createNativeQuery("ASK WHERE { GRAPH ?g { ?x a ?occurrence ; ?hasTerm ?term .} }", Boolean.class) + .setParameter("g", TermOccurrence.resolveContext(file.getUri())) + .setParameter("x", occurrence.getUri()) + .setParameter("occurrence", URI.create(Vocabulary.s_c_souborovy_vyskyt_termu)) + .setParameter("hasTerm", URI.create(Vocabulary.s_p_je_prirazenim_termu)) + .getSingleResult()); + } } diff --git a/src/test/java/cz/cvut/kbss/termit/persistence/dao/util/ScheduledContextRemoverTest.java b/src/test/java/cz/cvut/kbss/termit/persistence/dao/util/ScheduledContextRemoverTest.java new file mode 100644 index 000000000..cb2a78a92 --- /dev/null +++ b/src/test/java/cz/cvut/kbss/termit/persistence/dao/util/ScheduledContextRemoverTest.java @@ -0,0 +1,48 @@ +package cz.cvut.kbss.termit.persistence.dao.util; + +import cz.cvut.kbss.jopa.model.EntityManager; +import cz.cvut.kbss.jopa.vocabulary.RDFS; +import cz.cvut.kbss.termit.environment.Generator; +import cz.cvut.kbss.termit.persistence.dao.BaseDaoTestRunner; +import org.junit.jupiter.api.Test; +import org.springframework.beans.factory.annotation.Autowired; + +import java.net.URI; +import java.util.HashSet; +import java.util.Set; + +import static org.junit.jupiter.api.Assertions.assertFalse; + +class ScheduledContextRemoverTest extends BaseDaoTestRunner { + + @Autowired + private EntityManager em; + + @Autowired + private ScheduledContextRemover sut; + + @Test + void runContextRemovalDropsContextsRegisteredForRemoval() { + final Set graphs = generateGraphs(); + graphs.forEach(sut::scheduleForRemoval); + + sut.runContextRemoval(); + graphs.forEach(g -> assertFalse( + em.createNativeQuery("ASK { ?g ?y ?z . }", Boolean.class).setParameter("g", g).getSingleResult())); + } + + private Set generateGraphs() { + final Set result = new HashSet<>(); + transactional(() -> { + for (int i = 0; i < 5; i++) { + final URI graphUri = Generator.generateUri(); + em.createNativeQuery("INSERT DATA { GRAPH ?g { ?g a ?type } }", Void.class) + .setParameter("g", graphUri) + .setParameter("type", URI.create(RDFS.RESOURCE)) + .executeUpdate(); + result.add(graphUri); + } + }); + return result; + } +} diff --git a/src/test/java/cz/cvut/kbss/termit/rest/ResourceControllerTest.java b/src/test/java/cz/cvut/kbss/termit/rest/ResourceControllerTest.java index 9f2d466c7..c6aa208cf 100644 --- a/src/test/java/cz/cvut/kbss/termit/rest/ResourceControllerTest.java +++ b/src/test/java/cz/cvut/kbss/termit/rest/ResourceControllerTest.java @@ -31,6 +31,7 @@ import cz.cvut.kbss.termit.rest.handler.ErrorInfo; import cz.cvut.kbss.termit.service.IdentifierResolver; import cz.cvut.kbss.termit.service.business.ResourceService; +import cz.cvut.kbss.termit.service.document.ResourceRetrievalSpecification; import cz.cvut.kbss.termit.service.document.util.TypeAwareFileSystemResource; import cz.cvut.kbss.termit.util.Configuration; import cz.cvut.kbss.termit.util.Constants; @@ -58,6 +59,7 @@ import java.util.ArrayList; import java.util.Collections; import java.util.List; +import java.util.Optional; import java.util.Set; import java.util.stream.Collectors; import java.util.stream.IntStream; @@ -154,7 +156,7 @@ void getContentReturnsContentOfRequestedFile() throws Exception { .thenReturn(file.getUri()); when(resourceServiceMock.findRequired(file.getUri())).thenReturn(file); final java.io.File content = createTemporaryHtmlFile(); - when(resourceServiceMock.getContent(file)) + when(resourceServiceMock.getContent(eq(file), any(ResourceRetrievalSpecification.class))) .thenReturn(new TypeAwareFileSystemResource(content, MediaType.TEXT_HTML_VALUE)); final MvcResult mvcResult = mockMvc .perform(get(PATH + "/" + FILE_NAME + "/content")) @@ -162,6 +164,7 @@ void getContentReturnsContentOfRequestedFile() throws Exception { final String resultContent = mvcResult.getResponse().getContentAsString(); assertEquals(HTML_CONTENT, resultContent); assertEquals(MediaType.TEXT_HTML_VALUE, mvcResult.getResponse().getHeader(HttpHeaders.CONTENT_TYPE)); + verify(resourceServiceMock).getContent(file, new ResourceRetrievalSpecification(Optional.empty(), false)); } private static java.io.File createTemporaryHtmlFile() throws Exception { @@ -344,7 +347,7 @@ void hasContentChecksForContentExistenceInService() throws Exception { when(resourceServiceMock.findRequired(file.getUri())).thenReturn(file); when(resourceServiceMock.hasContent(file)).thenReturn(true); final java.io.File content = createTemporaryHtmlFile(); - when(resourceServiceMock.getContent(file)) + when(resourceServiceMock.getContent(eq(file), any(ResourceRetrievalSpecification.class))) .thenReturn(new TypeAwareFileSystemResource(content, MediaType.TEXT_HTML_VALUE)); mockMvc.perform(head(PATH + "/" + FILE_NAME + "/content").param(QueryParams.NAMESPACE, RESOURCE_NAMESPACE)) .andExpect(status().isNoContent()); @@ -358,7 +361,7 @@ void hasContentReturnsMimeType() throws Exception { when(resourceServiceMock.findRequired(file.getUri())).thenReturn(file); when(resourceServiceMock.hasContent(file)).thenReturn(true); final java.io.File content = createTemporaryHtmlFile(); - when(resourceServiceMock.getContent(file)) + when(resourceServiceMock.getContent(eq(file), any(ResourceRetrievalSpecification.class))) .thenReturn(new TypeAwareFileSystemResource(content, MediaType.TEXT_HTML_VALUE)); mockMvc.perform(head(PATH + "/" + FILE_NAME + "/content") .param(QueryParams.NAMESPACE, RESOURCE_NAMESPACE)) @@ -374,7 +377,7 @@ void getContentSupportsReturningContentAsAttachment() throws Exception { .thenReturn(file.getUri()); when(resourceServiceMock.findRequired(file.getUri())).thenReturn(file); final java.io.File content = createTemporaryHtmlFile(); - when(resourceServiceMock.getContent(file)) + when(resourceServiceMock.getContent(eq(file), any(ResourceRetrievalSpecification.class))) .thenReturn(new TypeAwareFileSystemResource(content, MediaType.TEXT_HTML_VALUE)); final MvcResult mvcResult = mockMvc .perform(get(PATH + "/" + FILE_NAME + "/content").param("attachment", Boolean.toString(true))) @@ -415,7 +418,7 @@ void getContentWithTimestampReturnsContentOfRequestedFileAtSpecifiedTimestamp() when(resourceServiceMock.findRequired(file.getUri())).thenReturn(file); final java.io.File content = createTemporaryHtmlFile(); final Instant at = Utils.timestamp().truncatedTo(ChronoUnit.SECONDS); - when(resourceServiceMock.getContent(eq(file), any(Instant.class))) + when(resourceServiceMock.getContent(eq(file), any(ResourceRetrievalSpecification.class))) .thenReturn(new TypeAwareFileSystemResource(content, MediaType.TEXT_HTML_VALUE)); final MvcResult mvcResult = mockMvc .perform(get(PATH + "/" + FILE_NAME + "/content") @@ -424,7 +427,7 @@ void getContentWithTimestampReturnsContentOfRequestedFileAtSpecifiedTimestamp() final String resultContent = mvcResult.getResponse().getContentAsString(); assertEquals(HTML_CONTENT, resultContent); assertEquals(MediaType.TEXT_HTML_VALUE, mvcResult.getResponse().getHeader(HttpHeaders.CONTENT_TYPE)); - verify(resourceServiceMock).getContent(file, at); + verify(resourceServiceMock).getContent(file, new ResourceRetrievalSpecification(Optional.of(at), false)); } /** @@ -440,4 +443,24 @@ void updateResourceHandlesDeserializationOfDocumentFromJsonLd() throws Exception .andExpect(status().isNoContent()); verify(resourceServiceMock).update(document); } + + @Test + void getContentWithoutUnconfirmedOccurrencesReturnsContentOfRequestedFileAtWithoutUnconfirmedTermOccurrences() + throws Exception { + final File file = generateFile(); + when(identifierResolverMock.resolveIdentifier(any(), eq(FILE_NAME))) + .thenReturn(file.getUri()); + when(resourceServiceMock.findRequired(file.getUri())).thenReturn(file); + final java.io.File content = createTemporaryHtmlFile(); + when(resourceServiceMock.getContent(eq(file), any(ResourceRetrievalSpecification.class))) + .thenReturn(new TypeAwareFileSystemResource(content, MediaType.TEXT_HTML_VALUE)); + final MvcResult mvcResult = mockMvc + .perform(get(PATH + "/" + FILE_NAME + "/content") + .queryParam("withoutUnconfirmedOccurrences", Boolean.toString(true))) + .andExpect(status().isOk()).andReturn(); + final String resultContent = mvcResult.getResponse().getContentAsString(); + assertEquals(HTML_CONTENT, resultContent); + assertEquals(MediaType.TEXT_HTML_VALUE, mvcResult.getResponse().getHeader(HttpHeaders.CONTENT_TYPE)); + verify(resourceServiceMock).getContent(file, new ResourceRetrievalSpecification(Optional.empty(), true)); + } } diff --git a/src/test/java/cz/cvut/kbss/termit/rest/TermOccurrenceControllerTest.java b/src/test/java/cz/cvut/kbss/termit/rest/TermOccurrenceControllerTest.java index 7976f034e..46bac49e5 100644 --- a/src/test/java/cz/cvut/kbss/termit/rest/TermOccurrenceControllerTest.java +++ b/src/test/java/cz/cvut/kbss/termit/rest/TermOccurrenceControllerTest.java @@ -18,7 +18,6 @@ package cz.cvut.kbss.termit.rest; import cz.cvut.kbss.termit.environment.Generator; -import cz.cvut.kbss.termit.exception.NotFoundException; import cz.cvut.kbss.termit.model.Term; import cz.cvut.kbss.termit.model.assignment.TermOccurrence; import cz.cvut.kbss.termit.service.IdentifierResolver; @@ -30,13 +29,17 @@ import org.junit.jupiter.api.Test; import org.junit.jupiter.api.extension.ExtendWith; import org.mockito.Answers; +import org.mockito.ArgumentCaptor; import org.mockito.InjectMocks; import org.mockito.Mock; import org.mockito.junit.jupiter.MockitoExtension; +import org.springframework.http.MediaType; import java.net.URI; -import static org.mockito.Mockito.*; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.mockito.Mockito.verify; +import static org.mockito.Mockito.when; import static org.springframework.test.web.servlet.request.MockMvcRequestBuilders.delete; import static org.springframework.test.web.servlet.request.MockMvcRequestBuilders.put; import static org.springframework.test.web.servlet.result.MockMvcResultMatchers.status; @@ -67,13 +70,12 @@ void setUp() { @Test void approveOccurrenceApprovesTermOccurrenceViaService() throws Exception { - final TermOccurrence to = generateTermOccurrence(); - when(occurrenceService.getRequiredReference(OCCURRENCE_URI)).thenReturn(to); when(idResolverMock.resolveIdentifier(NAMESPACE, LOCAL_NAME)).thenReturn(OCCURRENCE_URI); - mockMvc.perform(put(TermOccurrenceController.PATH + "/" + LOCAL_NAME).queryParam(Constants.QueryParams.NAMESPACE, NAMESPACE)) - .andExpect(status().isNoContent()); - verify(occurrenceService).getRequiredReference(OCCURRENCE_URI); - verify(occurrenceService).approve(to); + mockMvc.perform( + put(TermOccurrenceController.PATH + "/" + LOCAL_NAME).queryParam(Constants.QueryParams.NAMESPACE, + NAMESPACE)) + .andExpect(status().is2xxSuccessful()); + verify(occurrenceService).approve(OCCURRENCE_URI); } private TermOccurrence generateTermOccurrence() { @@ -85,22 +87,24 @@ private TermOccurrence generateTermOccurrence() { } @Test - void approveOccurrenceReturnsNotFoundWhenOccurrenceIsNotFoundByService() throws Exception { - when(occurrenceService.getRequiredReference(OCCURRENCE_URI)).thenThrow(NotFoundException.class); + void removeOccurrenceRemovesTermOccurrenceViaService() throws Exception { when(idResolverMock.resolveIdentifier(NAMESPACE, LOCAL_NAME)).thenReturn(OCCURRENCE_URI); - mockMvc.perform(put(TermOccurrenceController.PATH + "/" + LOCAL_NAME).queryParam(Constants.QueryParams.NAMESPACE, NAMESPACE)) - .andExpect(status().isNotFound()); - verify(occurrenceService, never()).approve(any()); + mockMvc.perform( + delete(TermOccurrenceController.PATH + "/" + LOCAL_NAME).queryParam(Constants.QueryParams.NAMESPACE, + NAMESPACE)) + .andExpect(status().isNoContent()); + verify(occurrenceService).remove(OCCURRENCE_URI); } @Test - void removeOccurrenceRemovesTermOccurrenceViaService() throws Exception { + void saveOccurrenceSavesSpecifiedTermOccurrence() throws Exception { final TermOccurrence to = generateTermOccurrence(); - when(occurrenceService.getRequiredReference(OCCURRENCE_URI)).thenReturn(to); - when(idResolverMock.resolveIdentifier(NAMESPACE, LOCAL_NAME)).thenReturn(OCCURRENCE_URI); - mockMvc.perform(delete(TermOccurrenceController.PATH + "/" + LOCAL_NAME).queryParam(Constants.QueryParams.NAMESPACE, NAMESPACE)) + mockMvc.perform(put(TermOccurrenceController.PATH).content(toJson(to)).contentType(MediaType.APPLICATION_JSON)) .andExpect(status().isNoContent()); - verify(occurrenceService).getRequiredReference(OCCURRENCE_URI); - verify(occurrenceService).remove(to); + final ArgumentCaptor captor = ArgumentCaptor.forClass(TermOccurrence.class); + verify(occurrenceService).persistOrUpdate(captor.capture()); + assertEquals(to.getUri(), captor.getValue().getUri()); + assertEquals(to.getTerm(), captor.getValue().getTerm()); + assertEquals(to.getTarget().getSource(), captor.getValue().getTarget().getSource()); } } diff --git a/src/test/java/cz/cvut/kbss/termit/service/business/ResourceServiceTest.java b/src/test/java/cz/cvut/kbss/termit/service/business/ResourceServiceTest.java index 64d20d4e1..9c02130a7 100644 --- a/src/test/java/cz/cvut/kbss/termit/service/business/ResourceServiceTest.java +++ b/src/test/java/cz/cvut/kbss/termit/service/business/ResourceServiceTest.java @@ -17,6 +17,7 @@ */ package cz.cvut.kbss.termit.service.business; +import cz.cvut.kbss.termit.environment.Environment; import cz.cvut.kbss.termit.environment.Generator; import cz.cvut.kbss.termit.event.DocumentRenameEvent; import cz.cvut.kbss.termit.event.FileRenameEvent; @@ -31,20 +32,27 @@ import cz.cvut.kbss.termit.model.resource.File; import cz.cvut.kbss.termit.model.resource.Resource; import cz.cvut.kbss.termit.service.document.DocumentManager; +import cz.cvut.kbss.termit.service.document.ResourceRetrievalSpecification; import cz.cvut.kbss.termit.service.document.TextAnalysisService; +import cz.cvut.kbss.termit.service.export.util.TypeAwareByteArrayResource; import cz.cvut.kbss.termit.service.repository.ChangeRecordService; import cz.cvut.kbss.termit.service.repository.ResourceRepositoryService; +import cz.cvut.kbss.termit.util.TypeAwareResource; import cz.cvut.kbss.termit.util.Utils; +import org.jsoup.Jsoup; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.extension.ExtendWith; import org.mockito.*; import org.mockito.junit.jupiter.MockitoExtension; import org.springframework.context.ApplicationEventPublisher; +import org.springframework.http.MediaType; import org.springframework.transaction.TransactionSystemException; import java.io.ByteArrayInputStream; +import java.io.InputStream; import java.net.URI; +import java.nio.charset.StandardCharsets; import java.time.Instant; import java.util.*; @@ -118,14 +126,15 @@ void removeEnsuresAttributesForDocumentManagerArePresent() { @Test void getContentLoadsContentOfFileFromDocumentManager() { final File file = Generator.generateFileWithId("test.html"); - sut.getContent(file); + sut.getContent(file, new ResourceRetrievalSpecification(Optional.empty(), false)); verify(documentManager).getAsResource(file); } @Test void getContentThrowsUnsupportedAssetOperationWhenResourceIsNotFile() { final Resource resource = Generator.generateResourceWithId(); - assertThrows(UnsupportedAssetOperationException.class, () -> sut.getContent(resource)); + assertThrows(UnsupportedAssetOperationException.class, + () -> sut.getContent(resource, new ResourceRetrievalSpecification(Optional.empty(), false))); verify(documentManager, never()).getAsResource(any()); } @@ -180,7 +189,7 @@ void runTextAnalysisInvokesTextAnalysisWithVocabularyRelatedToFilesDocument() { void runTextAnalysisThrowsUnsupportedAssetOperationWhenResourceIsNotFile() { final Resource resource = Generator.generateResourceWithId(); assertThrows(UnsupportedAssetOperationException.class, - () -> sut.runTextAnalysis(resource, Collections.emptySet())); + () -> sut.runTextAnalysis(resource, Collections.emptySet())); verify(textAnalysisService, never()).analyzeFile(any(), anySet()); } @@ -188,7 +197,7 @@ void runTextAnalysisThrowsUnsupportedAssetOperationWhenResourceIsNotFile() { void runTextAnalysisThrowsUnsupportedAssetOperationWhenFileHasNoVocabularyAndNoVocabulariesAreSpecifiedEither() { final File file = Generator.generateFileWithId("test.html"); assertThrows(UnsupportedAssetOperationException.class, - () -> sut.runTextAnalysis(file, Collections.emptySet())); + () -> sut.runTextAnalysis(file, Collections.emptySet())); verify(textAnalysisService, never()).analyzeFile(any(), anySet()); } @@ -474,7 +483,23 @@ void removeThrowsAssetRemovalExceptionWhenNonEmptyDocumentIsRemoved() { void getContentAtTimestampLoadsContentOfFileAtTimestampFromDocumentManager() { final File file = Generator.generateFileWithId("test.hml"); final Instant at = Utils.timestamp(); - sut.getContent(file, at); + sut.getContent(file, new ResourceRetrievalSpecification(Optional.of(at), false)); verify(documentManager).getAsResource(file, at); } + + @Test + void getContentWithoutUnconfirmedOccurrencesRemovesUnconfirmedOccurrencesFromFileContentBeforeReturningIt() + throws Exception { + final File file = Generator.generateFileWithId("test.hml"); + TypeAwareResource content; + try (final InputStream is = Environment.loadFile("data/rdfa-simple.html")) { + content = new TypeAwareByteArrayResource(is.readAllBytes(), MediaType.TEXT_HTML_VALUE, ".html"); + } + when(documentManager.getAsResource(file)).thenReturn(content); + + final TypeAwareResource result = sut.getContent(file, + new ResourceRetrievalSpecification(Optional.empty(), true)); + final org.jsoup.nodes.Document doc = Jsoup.parse(result.getInputStream(), StandardCharsets.UTF_8.name(), ""); + assertTrue(doc.select("span[score]").isEmpty()); + } } diff --git a/src/test/java/cz/cvut/kbss/termit/service/business/TermServiceTest.java b/src/test/java/cz/cvut/kbss/termit/service/business/TermServiceTest.java index bb0923e80..a7d26dde4 100644 --- a/src/test/java/cz/cvut/kbss/termit/service/business/TermServiceTest.java +++ b/src/test/java/cz/cvut/kbss/termit/service/business/TermServiceTest.java @@ -380,7 +380,7 @@ void setTermDefinitionReplacesExistingTermDefinition() { sut.setTermDefinitionSource(term, definitionSource); assertEquals(term.getUri(), definitionSource.getTerm()); - verify(termOccurrenceRepositoryService).remove(existingSource); + verify(termOccurrenceRepositoryService).remove(existingSource.getUri()); verify(termOccurrenceRepositoryService).persist(definitionSource); } @@ -494,14 +494,12 @@ void updateInvokesTextAnalysisOnAllTermsInTermsVocabularyWhenLabelHasChanged() { void removeTermDefinitionSourceRemovesOccurrenceRepresentingSourceOfDefinitionOfSpecifiedTerm() { final Term term = generateTermWithId(); final TermDefinitionSource defSource = new TermDefinitionSource(term.getUri(), - new FileOccurrenceTarget( - Generator.generateFileWithId( - "test.html"))); + new FileOccurrenceTarget(Generator.generateFileWithId("test.html"))); defSource.setUri(Generator.generateUri()); term.setDefinitionSource(defSource); sut.removeTermDefinitionSource(term); - verify(termOccurrenceRepositoryService).remove(defSource); + verify(termOccurrenceRepositoryService).remove(defSource.getUri()); } @Test diff --git a/src/test/java/cz/cvut/kbss/termit/service/document/AnnotationGeneratorTest.java b/src/test/java/cz/cvut/kbss/termit/service/document/AnnotationGeneratorTest.java index b770fd143..e94fca73f 100644 --- a/src/test/java/cz/cvut/kbss/termit/service/document/AnnotationGeneratorTest.java +++ b/src/test/java/cz/cvut/kbss/termit/service/document/AnnotationGeneratorTest.java @@ -20,6 +20,7 @@ import cz.cvut.kbss.jopa.model.EntityManager; import cz.cvut.kbss.jopa.model.MultilingualString; import cz.cvut.kbss.jopa.model.descriptors.Descriptor; +import cz.cvut.kbss.jopa.model.descriptors.EntityDescriptor; import cz.cvut.kbss.termit.environment.Generator; import cz.cvut.kbss.termit.exception.AnnotationGenerationException; import cz.cvut.kbss.termit.model.Term; @@ -386,15 +387,14 @@ void generateAnnotationsCreatesTermOccurrenceWhenItHasExistingSelectorButReferen transactional(() -> { em.persist(t); em.persist(otherTerm); - em.persist(to); + em.persist(to, new EntityDescriptor(to.resolveContext())); }); final InputStream content = loadFile("data/rdfa-simple.html"); generateFile(); sut.generateAnnotations(content, file); final List allOccurrences = termOccurrenceDao.findAllTargeting(file); - assertEquals(2, allOccurrences.size()); - assertTrue(allOccurrences.stream().anyMatch(o -> o.getTerm().equals(otherTerm.getUri()))); - assertTrue(allOccurrences.stream().anyMatch(o -> o.getTerm().equals(term.getUri()))); + assertEquals(1, allOccurrences.size()); + assertEquals(term.getUri(), allOccurrences.get(0).getTerm()); } @Test @@ -406,8 +406,9 @@ void repeatedAnnotationGenerationDoesNotIncreaseTotalNumberOfTermOccurrencesForR final List occurrencesTwo = termOccurrenceDao.findAllTargeting(file); assertEquals(occurrencesOne.size(), occurrencesTwo.size()); final int instanceCount = em.createNativeQuery("SELECT (count(*) as ?count) WHERE {" + - "?x a ?termOccurrence ." + + "GRAPH ?g { ?x a ?termOccurrence . }" + "}", Integer.class) + .setParameter("g", TermOccurrence.resolveContext(file.getUri())) .setParameter("termOccurrence", URI.create(Vocabulary.s_c_vyskyt_termu)) .getSingleResult(); assertEquals(occurrencesTwo.size(), instanceCount); diff --git a/src/test/java/cz/cvut/kbss/termit/service/document/SynchronousTermOccurrenceSaverTest.java b/src/test/java/cz/cvut/kbss/termit/service/document/SynchronousTermOccurrenceSaverTest.java new file mode 100644 index 000000000..c335e27b5 --- /dev/null +++ b/src/test/java/cz/cvut/kbss/termit/service/document/SynchronousTermOccurrenceSaverTest.java @@ -0,0 +1,42 @@ +package cz.cvut.kbss.termit.service.document; + +import cz.cvut.kbss.termit.environment.Generator; +import cz.cvut.kbss.termit.model.Term; +import cz.cvut.kbss.termit.model.assignment.TermOccurrence; +import cz.cvut.kbss.termit.model.resource.File; +import cz.cvut.kbss.termit.persistence.dao.TermOccurrenceDao; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.extension.ExtendWith; +import org.mockito.InOrder; +import org.mockito.InjectMocks; +import org.mockito.Mock; +import org.mockito.junit.jupiter.MockitoExtension; + +import java.util.List; + +import static org.mockito.Mockito.inOrder; + +@ExtendWith(MockitoExtension.class) +class SynchronousTermOccurrenceSaverTest { + + @Mock + private TermOccurrenceDao occurrenceDao; + + @InjectMocks + private SynchronousTermOccurrenceSaver sut; + + @Test + void saveOccurrencesRemovesAllExistingOccurrencesAndPersistsSpecifiedOnes() { + final Term t = Generator.generateTermWithId(); + final File asset = Generator.generateFileWithId("test.html"); + final List occurrences = List.of( + Generator.generateTermOccurrence(t, asset, true), + Generator.generateTermOccurrence(t, asset, true) + ); + sut.saveOccurrences(occurrences, asset); + + final InOrder inOrder = inOrder(occurrenceDao); + inOrder.verify(occurrenceDao).removeAll(asset); + occurrences.forEach(to -> inOrder.verify(occurrenceDao).persist(to)); + } +} diff --git a/src/test/java/cz/cvut/kbss/termit/service/document/html/HtmlTermOccurrenceResolverTest.java b/src/test/java/cz/cvut/kbss/termit/service/document/html/HtmlTermOccurrenceResolverTest.java index 6cff00955..627a24feb 100644 --- a/src/test/java/cz/cvut/kbss/termit/service/document/html/HtmlTermOccurrenceResolverTest.java +++ b/src/test/java/cz/cvut/kbss/termit/service/document/html/HtmlTermOccurrenceResolverTest.java @@ -18,12 +18,19 @@ package cz.cvut.kbss.termit.service.document.html; import cz.cvut.kbss.termit.environment.Generator; +import cz.cvut.kbss.termit.model.Term; import cz.cvut.kbss.termit.model.assignment.TermOccurrence; import cz.cvut.kbss.termit.model.resource.Document; import cz.cvut.kbss.termit.model.resource.File; +import cz.cvut.kbss.termit.model.selector.Selector; +import cz.cvut.kbss.termit.model.selector.TextPositionSelector; +import cz.cvut.kbss.termit.model.selector.TextQuoteSelector; import cz.cvut.kbss.termit.service.document.DocumentManager; import cz.cvut.kbss.termit.service.repository.TermRepositoryService; import cz.cvut.kbss.termit.util.Configuration; +import cz.cvut.kbss.termit.util.Vocabulary; +import org.jsoup.Jsoup; +import org.jsoup.select.Elements; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.extension.ExtendWith; import org.mockito.InjectMocks; @@ -34,12 +41,22 @@ import java.io.InputStream; import java.net.URI; +import java.nio.charset.StandardCharsets; import java.util.List; import java.util.Optional; +import java.util.Set; import static org.hamcrest.MatcherAssert.assertThat; +import static org.hamcrest.Matchers.endsWith; import static org.hamcrest.Matchers.greaterThan; -import static org.junit.jupiter.api.Assertions.*; +import static org.hamcrest.Matchers.hasItem; +import static org.hamcrest.Matchers.not; +import static org.hamcrest.Matchers.startsWith; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertNotNull; +import static org.junit.jupiter.api.Assertions.assertNull; +import static org.junit.jupiter.api.Assertions.assertTrue; import static org.mockito.Mockito.when; @ExtendWith(MockitoExtension.class) @@ -56,7 +73,7 @@ class HtmlTermOccurrenceResolverTest { @SuppressWarnings("unused") @Spy - private HtmlSelectorGenerators selectorGenerators = new HtmlSelectorGenerators(); + private HtmlSelectorGenerators selectorGenerators = new HtmlSelectorGenerators(config); @Mock private DocumentManager documentManager; @@ -94,8 +111,7 @@ void supportsReturnsTrueForHtmlFileWithoutExtension() { @Test void findTermOccurrencesExtractsAlsoScoreFromRdfa() { when(termService.exists(TERM_URI)).thenReturn(true); - final File file = new File(); - file.setLabel("rdfa-simple.html"); + final File file = initFile(); final InputStream is = cz.cvut.kbss.termit.environment.Environment.loadFile("data/rdfa-simple.html"); sut.parseContent(is, file); final List result = sut.findTermOccurrences(); @@ -105,11 +121,17 @@ void findTermOccurrencesExtractsAlsoScoreFromRdfa() { }); } + private static File initFile() { + final File file = new File(); + file.setLabel("rdfa-simple.html"); + file.setUri(URI.create(Vocabulary.s_c_soubor + "/" + file.getLabel())); + return file; + } + @Test void findTermOccurrencesHandlesRdfaWithoutScore() { when(termService.exists(TERM_URI)).thenReturn(true); - final File file = new File(); - file.setLabel("rdfa-simple.html"); + final File file = initFile(); final InputStream is = cz.cvut.kbss.termit.environment.Environment.loadFile("data/rdfa-simple-no-score.html"); sut.parseContent(is, file); final List result = sut.findTermOccurrences(); @@ -119,8 +141,7 @@ void findTermOccurrencesHandlesRdfaWithoutScore() { @Test void findTermOccurrencesHandlesInvalidScoreInRdfa() { when(termService.exists(TERM_URI)).thenReturn(true); - final File file = new File(); - file.setLabel("rdfa-simple.html"); + final File file = initFile(); final InputStream is = cz.cvut.kbss.termit.environment.Environment .loadFile("data/rdfa-simple-invalid-score.html"); sut.parseContent(is, file); @@ -132,4 +153,47 @@ void findTermOccurrencesHandlesInvalidScoreInRdfa() { void supportsReturnsTrueForTerm() { assertTrue(sut.supports(Generator.generateTermWithId())); } + + @Test + void findTermOccurrencesGeneratesOccurrenceUriBasedOnAnnotationAbout() { + when(termService.exists(TERM_URI)).thenReturn(true); + final File file = initFile(); + final InputStream is = cz.cvut.kbss.termit.environment.Environment.loadFile("data/rdfa-simple.html"); + sut.parseContent(is, file); + final List result = sut.findTermOccurrences(); + assertEquals(1, result.size()); + assertThat(result.get(0).getUri().toString(), startsWith(file.getUri() + "/" + TermOccurrence.CONTEXT_SUFFIX)); + assertThat(result.get(0).getUri().toString(), endsWith("1")); + } + + @Test + void findTermOccurrencesMarksOccurrencesAsSuggested() { + when(termService.exists(TERM_URI)).thenReturn(true); + final File file = initFile(); + final InputStream is = cz.cvut.kbss.termit.environment.Environment.loadFile("data/rdfa-simple.html"); + sut.parseContent(is, file); + final List result = sut.findTermOccurrences(); + result.forEach(to -> assertThat(to.getTypes(), hasItem(Vocabulary.s_c_navrzeny_vyskyt_termu))); + } + + @Test + void findTermOccurrencesSetsFoundOccurrencesAsApprovedWhenCorrespondingExistingOccurrenceWasApproved() throws Exception { + when(termService.exists(TERM_URI)).thenReturn(true); + final File file = initFile(); + final TermOccurrence existing = Generator.generateTermOccurrence(new Term(TERM_URI), file, false); + final Selector quoteSelector = new TextQuoteSelector("Územní plán", "RDFa simple", "hlavního města Prahy."); + final Selector posSelector = new TextPositionSelector(21, 32); + existing.getTarget().setSelectors(Set.of(quoteSelector, posSelector)); + final InputStream is = cz.cvut.kbss.termit.environment.Environment.loadFile("data/rdfa-simple.html"); + sut.parseContent(is, file); + sut.setExistingOccurrences(List.of(existing)); + + final List result = sut.findTermOccurrences(); + assertEquals(1, result.size()); + assertThat(result.get(0).getTypes(), not(hasItem(Vocabulary.s_c_navrzeny_vyskyt_termu))); + final org.jsoup.nodes.Document document = Jsoup.parse(sut.getContent(), StandardCharsets.UTF_8.name(), ""); + final Elements annotations = document.select("span[about]"); + assertEquals(1, annotations.size()); + assertFalse(annotations.get(0).hasAttr("score")); + } } diff --git a/src/test/java/cz/cvut/kbss/termit/service/document/html/TextQuoteSelectorGeneratorTest.java b/src/test/java/cz/cvut/kbss/termit/service/document/html/TextQuoteSelectorGeneratorTest.java index ac88220b8..809953279 100644 --- a/src/test/java/cz/cvut/kbss/termit/service/document/html/TextQuoteSelectorGeneratorTest.java +++ b/src/test/java/cz/cvut/kbss/termit/service/document/html/TextQuoteSelectorGeneratorTest.java @@ -25,11 +25,14 @@ import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; -import static cz.cvut.kbss.termit.service.document.html.TextQuoteSelectorGenerator.CONTEXT_LENGTH; -import static org.junit.jupiter.api.Assertions.*; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertNotNull; +import static org.junit.jupiter.api.Assertions.assertNull; class TextQuoteSelectorGeneratorTest { + private static final int CONTEXT_LENGTH = 32; + private TextQuoteSelectorGenerator sut; private Document document; @@ -37,7 +40,7 @@ class TextQuoteSelectorGeneratorTest { @BeforeEach void setUp() { this.document = new Document(""); - this.sut = new TextQuoteSelectorGenerator(); + this.sut = new TextQuoteSelectorGenerator(CONTEXT_LENGTH); } @Test diff --git a/src/test/java/cz/cvut/kbss/termit/service/document/html/UnconfirmedTermOccurrenceRemoverTest.java b/src/test/java/cz/cvut/kbss/termit/service/document/html/UnconfirmedTermOccurrenceRemoverTest.java new file mode 100644 index 000000000..994228be5 --- /dev/null +++ b/src/test/java/cz/cvut/kbss/termit/service/document/html/UnconfirmedTermOccurrenceRemoverTest.java @@ -0,0 +1,42 @@ +package cz.cvut.kbss.termit.service.document.html; + +import cz.cvut.kbss.termit.environment.Environment; +import cz.cvut.kbss.termit.service.export.util.TypeAwareByteArrayResource; +import cz.cvut.kbss.termit.util.TypeAwareResource; +import org.jsoup.Jsoup; +import org.jsoup.nodes.Document; +import org.junit.jupiter.api.Test; +import org.springframework.http.MediaType; + +import java.io.InputStream; +import java.nio.charset.StandardCharsets; + +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertTrue; + +class UnconfirmedTermOccurrenceRemoverTest { + + @Test + void removeUnconfirmedOccurrencesReturnsContentWithoutSpansWithScoreThatIndicateOccurrenceIsUnconfirmed() + throws Exception { + TypeAwareResource input; + try (final InputStream is = Environment.loadFile("data/rdfa-simple.html")) { + input = new TypeAwareByteArrayResource(is.readAllBytes(), MediaType.TEXT_HTML_VALUE, ".html"); + } + final TypeAwareResource result = new UnconfirmedTermOccurrenceRemover().removeUnconfirmedOccurrences(input); + final Document doc = Jsoup.parse(result.getInputStream(), StandardCharsets.UTF_8.name(), ""); + assertTrue(doc.select("span[score]").isEmpty()); + } + + @Test + void removeUnconfirmedOccurrencesPreservesSpansWithoutScoreRepresentingConfirmedOccurrences() throws Exception { + TypeAwareResource input; + try (final InputStream is = Environment.loadFile("data/rdfa-simple-no-score.html")) { + input = new TypeAwareByteArrayResource(is.readAllBytes(), MediaType.TEXT_HTML_VALUE, ".html"); + } + final TypeAwareResource result = new UnconfirmedTermOccurrenceRemover().removeUnconfirmedOccurrences(input); + final Document doc = Jsoup.parse(result.getInputStream(), StandardCharsets.UTF_8.name(), ""); + assertFalse( + doc.select("span[resource='http://onto.fel.cvut.cz/ontologies/mpp/domains/uzemni-plan']").isEmpty()); + } +} diff --git a/src/test/java/cz/cvut/kbss/termit/service/repository/TermOccurrenceRepositoryServiceTest.java b/src/test/java/cz/cvut/kbss/termit/service/repository/TermOccurrenceRepositoryServiceTest.java index 98850015e..4bdc9825c 100644 --- a/src/test/java/cz/cvut/kbss/termit/service/repository/TermOccurrenceRepositoryServiceTest.java +++ b/src/test/java/cz/cvut/kbss/termit/service/repository/TermOccurrenceRepositoryServiceTest.java @@ -17,60 +17,122 @@ */ package cz.cvut.kbss.termit.service.repository; -import cz.cvut.kbss.jopa.model.EntityManager; -import cz.cvut.kbss.termit.environment.Environment; import cz.cvut.kbss.termit.environment.Generator; +import cz.cvut.kbss.termit.exception.ValidationException; import cz.cvut.kbss.termit.model.Term; -import cz.cvut.kbss.termit.model.User; import cz.cvut.kbss.termit.model.assignment.FileOccurrenceTarget; import cz.cvut.kbss.termit.model.assignment.TermDefinitionSource; +import cz.cvut.kbss.termit.model.assignment.TermFileOccurrence; +import cz.cvut.kbss.termit.model.assignment.TermOccurrence; import cz.cvut.kbss.termit.model.resource.File; import cz.cvut.kbss.termit.model.selector.TextQuoteSelector; -import cz.cvut.kbss.termit.service.BaseServiceTestRunner; -import org.junit.jupiter.api.BeforeEach; +import cz.cvut.kbss.termit.persistence.dao.TermOccurrenceDao; import org.junit.jupiter.api.Test; -import org.springframework.beans.factory.annotation.Autowired; -import org.springframework.test.annotation.DirtiesContext; +import org.junit.jupiter.api.extension.ExtendWith; +import org.mockito.ArgumentCaptor; +import org.mockito.InjectMocks; +import org.mockito.Mock; +import org.mockito.junit.jupiter.MockitoExtension; import java.util.Collections; +import java.util.Optional; +import java.util.Set; +import static org.hamcrest.MatcherAssert.assertThat; +import static org.hamcrest.Matchers.containsString; import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertNotNull; +import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.mockito.Mockito.verify; +import static org.mockito.Mockito.when; -@DirtiesContext(classMode = DirtiesContext.ClassMode.AFTER_EACH_TEST_METHOD) -class TermOccurrenceRepositoryServiceTest extends BaseServiceTestRunner { +@ExtendWith(MockitoExtension.class) +class TermOccurrenceRepositoryServiceTest { - @Autowired - private EntityManager em; + @Mock + private TermOccurrenceDao dao; - @Autowired + @Mock + private TermRepositoryService termService; + + @Mock + private ResourceRepositoryService resourceService; + + @InjectMocks private TermOccurrenceRepositoryService sut; - private User user; + @Test + void persistOccurrenceSavesSpecifiedOccurrenceIntoRepository() { + final Term term = Generator.generateTermWithId(); + when(termService.exists(term.getUri())).thenReturn(true); + final File resource = Generator.generateFileWithId("test.html"); + when(resourceService.exists(resource.getUri())).thenReturn(true); + final TermDefinitionSource definitionSource = new TermDefinitionSource(term.getUri(), + new FileOccurrenceTarget(resource)); + definitionSource.getTarget().setSelectors(Collections.singleton(new TextQuoteSelector("test"))); - @BeforeEach - void setUp() { - this.user = Generator.generateUserWithId(); - transactional(() -> em.persist(user)); - Environment.setCurrentUser(user); + sut.persist(definitionSource); + verify(dao).persist(definitionSource); } @Test - void persistOccurrenceSavesSpecifiedOccurrenceIntoRepository() { + void persistThrowsValidationExceptionWhenReferencedTermDoesNotExist() { + final File resource = Generator.generateFileWithId("test.html"); + final TermOccurrence occurrence = new TermFileOccurrence(Generator.generateUri(), + new FileOccurrenceTarget(resource)); + occurrence.getTarget().setSelectors(Set.of(new TextQuoteSelector("test text"))); + + final ValidationException ex = assertThrows(ValidationException.class, () -> sut.persist(occurrence)); + assertThat(ex.getMessage(), containsString("references an unknown term")); + assertThat(ex.getMessage(), containsString(occurrence.getTerm().toString())); + } + + @Test + void persistThrowsValidationExceptionWhenTargetAssetDoesNotExist() { + final Term term = Generator.generateTermWithId(); + when(termService.exists(term.getUri())).thenReturn(true); + final TermOccurrence occurrence = new TermFileOccurrence(term.getUri(), new FileOccurrenceTarget()); + occurrence.getTarget().setSource(Generator.generateUri()); + occurrence.getTarget().setSelectors(Set.of(new TextQuoteSelector("test text"))); + + final ValidationException ex = assertThrows(ValidationException.class, () -> sut.persist(occurrence)); + assertThat(ex.getMessage(), containsString("references an unknown asset")); + assertThat(ex.getMessage(), containsString(occurrence.getTarget().getSource().toString())); + } + + @Test + void persistOrUpdatePersistsOccurrenceWhenItDoesNotExist() { final Term term = Generator.generateTermWithId(); + when(termService.exists(term.getUri())).thenReturn(true); final File resource = Generator.generateFileWithId("test.html"); - transactional(() -> { - em.persist(term); - em.persist(resource); - }); + when(resourceService.exists(resource.getUri())).thenReturn(true); final TermDefinitionSource definitionSource = new TermDefinitionSource(term.getUri(), - new FileOccurrenceTarget(resource)); + new FileOccurrenceTarget(resource)); definitionSource.getTarget().setSelectors(Collections.singleton(new TextQuoteSelector("test"))); - transactional(() -> sut.persist(definitionSource)); - final TermDefinitionSource result = em.find(TermDefinitionSource.class, definitionSource.getUri()); - assertNotNull(result); - assertEquals(term.getUri(), result.getTerm()); - assertEquals(resource.getUri(), result.getTarget().getSource()); + sut.persistOrUpdate(definitionSource); + verify(dao).persist(definitionSource); + } + + @Test + void persistOrUpdateSetsTermOnExistingOccurrenceWhenItExists() { + final Term originalTerm = Generator.generateTermWithId(); + final Term newTerm = Generator.generateTermWithId(); + when(termService.exists(newTerm.getUri())).thenReturn(true); + final File resource = Generator.generateFileWithId("test.html"); + final TermDefinitionSource original = new TermDefinitionSource(originalTerm.getUri(), + new FileOccurrenceTarget(resource)); + original.getTarget().setSelectors(Collections.singleton(new TextQuoteSelector("test"))); + original.setUri(Generator.generateUri()); + final TermDefinitionSource update = new TermDefinitionSource(newTerm.getUri(), + new FileOccurrenceTarget(resource)); + original.getTarget().setSelectors(Collections.singleton(new TextQuoteSelector("test"))); + update.setUri(original.getUri()); + when(dao.exists(original.getUri())).thenReturn(true); + when(dao.find(original.getUri())).thenReturn(Optional.of(original)); + + sut.persistOrUpdate(update); + final ArgumentCaptor captor = ArgumentCaptor.forClass(TermOccurrence.class); + verify(dao).update(captor.capture()); + assertEquals(newTerm.getUri(), captor.getValue().getTerm()); } } diff --git a/src/test/resources/application.yml b/src/test/resources/application.yml index 3d73082a4..5a95845a1 100644 --- a/src/test/resources/application.yml +++ b/src/test/resources/application.yml @@ -30,7 +30,6 @@ termit: storage: /tmp/termit textAnalysis: url: http://localhost/annotace - termAssignmentMinScore: 1 termOccurrenceMinScore: 0.49 comments: context: http://onto.fel.cvut.cz/ontologies/komentare diff --git a/src/test/resources/data/rdfa-simple.html b/src/test/resources/data/rdfa-simple.html index 121a41c26..e57ed4f0a 100644 --- a/src/test/resources/data/rdfa-simple.html +++ b/src/test/resources/data/rdfa-simple.html @@ -6,6 +6,6 @@ Územní plán hlavního města Prahy. + typeof="termit:výskyt-termu">Územní plán hlavního města Prahy. - \ No newline at end of file + diff --git a/src/test/resources/rdf4j-memory-rdfs.ttl b/src/test/resources/rdf4j-memory-rdfs.ttl index 05da956eb..52439df5c 100644 --- a/src/test/resources/rdf4j-memory-rdfs.ttl +++ b/src/test/resources/rdf4j-memory-rdfs.ttl @@ -1,29 +1,18 @@ @prefix rdfs: . -@prefix rep: . -@prefix sr: . -@prefix sail: . -@prefix cgqi: . -@prefix spin: . -@prefix ms: . -@prefix sp: . -@prefix sb: . +@prefix config: . - -[] a rep:Repository ; - rep:repositoryID "mem:termit" ; +[] a config:Repository ; + config:rep.id "mem:termit" ; rdfs:label "Memory store with RDFS support" ; - rep:repositoryImpl [ - rep:repositoryType "openrdf:SailRepository" ; - sr:sailImpl [ - sail:sailType "rdf4j:SchemaCachingRDFSInferencer" ; - sail:delegate [ - sail:sailType "openrdf:DedupingInferencer" ; - sail:delegate [ - sail:sailType "openrdf:MemoryStore" ; - sail:iterationCacheSyncThreshold "10000"; - ms:persist false ; - sb:evaluationStrategyFactory "org.eclipse.rdf4j.query.algebra.evaluation.impl.StrictEvaluationStrategyFactory" - ] - ] - ] -]. + config:rep.impl [ + config:rep.type "openrdf:SailRepository" ; + config:sail.impl [ + config:sail.type "rdf4j:SchemaCachingRDFSInferencer"; + config:delegate [ + config:sail.type "openrdf:MemoryStore" ; + config:sail.iterationCacheSyncThreshold "10000"; + config:mem.persist false; + config:sail.evaluationStrategyFactory "org.eclipse.rdf4j.query.algebra.evaluation.impl.StrictEvaluationStrategyFactory" + ]; + ] + ].