From f6ecc941968cfd3c8eeecb8af08ac60443703795 Mon Sep 17 00:00:00 2001 From: ndc-dxc <162444006+ndc-dxc@users.noreply.github.com> Date: Sat, 23 Nov 2024 19:34:34 +0100 Subject: [PATCH] Req_tracing (#149) * harvester will stop for fatal errors * adds accepts header for url check * adding mock stats * adding mock stats * added committed at to the HarvesterRun * added committed while harvesting * collecting stats during harvester --- build.gradle | 3 +- .../ndc/integration/BaseIntegrationTest.java | 5 + .../ndc/controller/SemanticAssetStats.java | 35 +++++++ .../controller/SemanticAssetsController.java | 29 ++++++ .../event/HarvesterUpdateCommitDateEvent.java | 13 +++ ...arvesterCommitDateUpdateEventListener.java | 42 +++++++++ .../harvester/AgencyRepositoryService.java | 16 +++- .../context/HarvestExecutionContextUtils.java | 18 ++++ .../BaseSemanticAssetHarvester.java | 10 +- .../ControlledVocabularyHarvester.java | 15 ++- .../harvesters/OntologyHarvester.java | 14 ++- .../harvester/harvesters/SchemaHarvester.java | 14 ++- .../harvester/model/HarvesterStatsHolder.java | 12 +++ .../harvester/model/SemanticAssetModel.java | 3 + .../SemanticAssetModelValidationContext.java | 28 ++++++ .../BaseSemanticAssetPathProcessor.java | 28 +++++- .../ControlledVocabularyPathProcessor.java | 7 +- .../SemanticAssetPathProcessor.java | 3 +- .../service/HarvesterRunService.java | 15 ++- .../service/SemanticContentStatsService.java | 91 +++++++++++++++++++ .../service/startupjob/StartupJobsRunner.java | 3 +- .../startupjob/UpdateRevisionCommittedAt.java | 79 ++++++++++++++++ .../ndc/harvester/util/GitUtils.java | 90 ++++++++++++++++++ .../ndc/model/harvester/HarvesterRun.java | 13 +++ .../model/harvester/SemanticContentStats.java | 25 +++++ .../service/logging/NDCHarvesterLogger.java | 5 + .../logging/NDCHarvesterLoggerUtils.java | 2 +- .../resources/application-local.properties | 2 +- src/main/resources/application.properties | 2 + .../resources/db/migration/V8__tracing.sql | 2 + .../migration/V9__semantic_content_stats.sql | 13 +++ .../AgencyRepositoryServiceTest.java | 9 +- .../BaseSemanticAssetHarvesterTest.java | 11 ++- .../ControlledVocabularyHarvesterTest.java | 4 + .../harvesters/OntologyHarvesterTest.java | 3 + .../harvesters/SchemaHarvesterTest.java | 3 + ...ControlledVocabularyPathProcessorTest.java | 3 + .../OntologyPathProcessorTest.java | 3 + .../SchemaPathProcessorTest.java | 4 +- .../SemanticAssetPathProcessorTest.java | 3 + 40 files changed, 646 insertions(+), 34 deletions(-) create mode 100644 src/main/java/it/gov/innovazione/ndc/controller/SemanticAssetStats.java create mode 100644 src/main/java/it/gov/innovazione/ndc/eventhandler/event/HarvesterUpdateCommitDateEvent.java create mode 100644 src/main/java/it/gov/innovazione/ndc/eventhandler/handler/HarvesterCommitDateUpdateEventListener.java create mode 100644 src/main/java/it/gov/innovazione/ndc/harvester/model/HarvesterStatsHolder.java create mode 100644 src/main/java/it/gov/innovazione/ndc/harvester/service/SemanticContentStatsService.java create mode 100644 src/main/java/it/gov/innovazione/ndc/harvester/service/startupjob/UpdateRevisionCommittedAt.java create mode 100644 src/main/java/it/gov/innovazione/ndc/model/harvester/SemanticContentStats.java create mode 100644 src/main/resources/db/migration/V8__tracing.sql create mode 100644 src/main/resources/db/migration/V9__semantic_content_stats.sql diff --git a/build.gradle b/build.gradle index 21fa04b5..6acd9c7f 100644 --- a/build.gradle +++ b/build.gradle @@ -175,6 +175,7 @@ jacocoTestCoverageVerification { 'it.gov.innovazione.ndc.eventhandler.*', 'it.gov.innovazione.ndc.harvester.service.RepositoryService', 'it.gov.innovazione.ndc.harvester.service.HarvesterRunService', + 'it.gov.innovazione.ndc.harvester.service.SemanticContentStatsService', 'it.gov.innovazione.ndc.harvester.SecurityUtils', 'it.gov.innovazione.ndc.*Exception', 'it.gov.innovazione.ndc.harvester.context.*', @@ -195,7 +196,7 @@ jacocoTestCoverageVerification { 'it.gov.innovazione.ndc.repository.TripleStoreRepository', 'it.gov.innovazione.ndc.service.EventCleaner', 'it.gov.innovazione.ndc.service.TemplateService', - 'it.gov.innovazione.ndc.harvester.service.startupjob.TempEraserStartupJob', + 'it.gov.innovazione.ndc.harvester.service.startupjob.*', 'it.gov.innovazione.ndc.alerter.*' ] } diff --git a/src/integration/java/it/gov/innovazione/ndc/integration/BaseIntegrationTest.java b/src/integration/java/it/gov/innovazione/ndc/integration/BaseIntegrationTest.java index 59e76b11..2a778b51 100644 --- a/src/integration/java/it/gov/innovazione/ndc/integration/BaseIntegrationTest.java +++ b/src/integration/java/it/gov/innovazione/ndc/integration/BaseIntegrationTest.java @@ -5,6 +5,7 @@ import it.gov.innovazione.ndc.harvester.HarvesterService; import it.gov.innovazione.ndc.harvester.model.index.SemanticAssetMetadata; import it.gov.innovazione.ndc.harvester.service.RepositoryService; +import it.gov.innovazione.ndc.harvester.service.SemanticContentStatsService; import it.gov.innovazione.ndc.harvester.service.startupjob.StartupJobsRunner; import it.gov.innovazione.ndc.repository.TripleStoreProperties; import org.elasticsearch.client.RestClient; @@ -66,6 +67,9 @@ public class BaseIntegrationTest { @SpyBean RepositoryService repositoryService; + @SpyBean + SemanticContentStatsService semanticContentStatsService; + @Autowired TripleStoreProperties virtuosoProps; @@ -96,6 +100,7 @@ private void dataIsHarvested() throws IOException { doReturn(cloneDir).when(agencyRepositoryService).cloneRepo(REPO_URL, null); doNothing().when(agencyRepositoryService).removeClonedRepo(cloneDir); doNothing().when(repositoryService).storeRightsHolders(any(), any()); + doNothing().when(semanticContentStatsService).saveStats(); harvesterService.harvest(asRepo(REPO_URL)); diff --git a/src/main/java/it/gov/innovazione/ndc/controller/SemanticAssetStats.java b/src/main/java/it/gov/innovazione/ndc/controller/SemanticAssetStats.java new file mode 100644 index 00000000..bce6bcb1 --- /dev/null +++ b/src/main/java/it/gov/innovazione/ndc/controller/SemanticAssetStats.java @@ -0,0 +1,35 @@ +package it.gov.innovazione.ndc.controller; + +import lombok.Builder; +import lombok.Data; + +import java.math.BigDecimal; +import java.math.RoundingMode; + +@Data +@Builder +public class SemanticAssetStats { + private final SemanticAssetTypeStats totalStats; + private final SemanticAssetTypeStats controlledVocabularyStats; + private final SemanticAssetTypeStats ontologyStats; + private final SemanticAssetTypeStats schemaStats; + + @Data + @Builder + public static class SemanticAssetTypeStats { + private final long current; + private final long lastYear; + + public long getIncrementOverLastYear() { + return current - lastYear; + } + + public double getIncrementPercentageOverLastYear() { + BigDecimal bigDecimal = BigDecimal.valueOf( + lastYear == 0 + ? 0 + : ((double) current - lastYear) / lastYear * 100); + return bigDecimal.setScale(1, RoundingMode.HALF_UP).doubleValue(); + } + } +} diff --git a/src/main/java/it/gov/innovazione/ndc/controller/SemanticAssetsController.java b/src/main/java/it/gov/innovazione/ndc/controller/SemanticAssetsController.java index ea46a96a..15044b9f 100644 --- a/src/main/java/it/gov/innovazione/ndc/controller/SemanticAssetsController.java +++ b/src/main/java/it/gov/innovazione/ndc/controller/SemanticAssetsController.java @@ -56,6 +56,35 @@ List getRightsHolders() { } + @Operation(tags = {"semantic-assets"}, + summary = "Retrieves the statistics", + description = "Retrieves the statistics of the semantic assets.", + operationId = "getStats", + responses = {@ApiResponse(responseCode = "200", description = "OK", content = { + @Content(mediaType = "application/json", schema = @Schema(implementation = SemanticAssetStats.class)) + })}) + @GetMapping(value = "/semantic-assets/stats", produces = {"application/json"}) + SemanticAssetStats getStats() { + return SemanticAssetStats.builder() + .totalStats(SemanticAssetStats.SemanticAssetTypeStats.builder() + .current(118) + .lastYear(115) + .build()) + .controlledVocabularyStats(SemanticAssetStats.SemanticAssetTypeStats.builder() + .current(120) + .lastYear(118) + .build()) + .ontologyStats(SemanticAssetStats.SemanticAssetTypeStats.builder() + .current(100) + .lastYear(98) + .build()) + .schemaStats(SemanticAssetStats.SemanticAssetTypeStats.builder() + .current(80) + .lastYear(80) + .build()) + .build(); + } + @Override public ResponseEntity search( String q, diff --git a/src/main/java/it/gov/innovazione/ndc/eventhandler/event/HarvesterUpdateCommitDateEvent.java b/src/main/java/it/gov/innovazione/ndc/eventhandler/event/HarvesterUpdateCommitDateEvent.java new file mode 100644 index 00000000..3dbedf90 --- /dev/null +++ b/src/main/java/it/gov/innovazione/ndc/eventhandler/event/HarvesterUpdateCommitDateEvent.java @@ -0,0 +1,13 @@ +package it.gov.innovazione.ndc.eventhandler.event; + +import lombok.Builder; +import lombok.Data; + +import java.time.Instant; + +@Builder +@Data +public class HarvesterUpdateCommitDateEvent { + private final String runId; + private final Instant commitDate; +} diff --git a/src/main/java/it/gov/innovazione/ndc/eventhandler/handler/HarvesterCommitDateUpdateEventListener.java b/src/main/java/it/gov/innovazione/ndc/eventhandler/handler/HarvesterCommitDateUpdateEventListener.java new file mode 100644 index 00000000..d5ebce36 --- /dev/null +++ b/src/main/java/it/gov/innovazione/ndc/eventhandler/handler/HarvesterCommitDateUpdateEventListener.java @@ -0,0 +1,42 @@ +package it.gov.innovazione.ndc.eventhandler.handler; + +import it.gov.innovazione.ndc.eventhandler.NdcEventHandler; +import it.gov.innovazione.ndc.eventhandler.NdcEventWrapper; +import it.gov.innovazione.ndc.eventhandler.event.HarvesterUpdateCommitDateEvent; +import it.gov.innovazione.ndc.harvester.service.HarvesterRunService; +import lombok.RequiredArgsConstructor; +import lombok.extern.slf4j.Slf4j; +import org.springframework.stereotype.Service; + +import java.util.Collection; +import java.util.List; +import java.util.Objects; + +@Service +@RequiredArgsConstructor +@Slf4j +public class HarvesterCommitDateUpdateEventListener implements NdcEventHandler { + + private static final Collection> SUPPORTED_EVENTS = List.of(HarvesterUpdateCommitDateEvent.class); + + private final HarvesterRunService harvesterRunService; + + @Override + public boolean canHandle(NdcEventWrapper event) { + return SUPPORTED_EVENTS.contains(event.getPayload().getClass()); + } + + @Override + public void handle(NdcEventWrapper event) { + HarvesterUpdateCommitDateEvent payload = (HarvesterUpdateCommitDateEvent) event.getPayload(); + if (Objects.isNull(payload) || Objects.isNull(payload.getCommitDate())) { + log.warn("Received invalid HarvesterUpdateCommitDateEvent: {}", payload); + return; + } + harvesterRunService.getAllRuns().stream() + .filter(harvesterRun -> payload.getRunId().equals(harvesterRun.getId())) + .findAny() + .map(harvesterRun -> harvesterRun.withRevisionCommittedAt(payload.getCommitDate())) + .ifPresent(harvesterRunService::updateHarvesterRunCommittedAt); + } +} diff --git a/src/main/java/it/gov/innovazione/ndc/harvester/AgencyRepositoryService.java b/src/main/java/it/gov/innovazione/ndc/harvester/AgencyRepositoryService.java index 2dff9f7c..b780b790 100644 --- a/src/main/java/it/gov/innovazione/ndc/harvester/AgencyRepositoryService.java +++ b/src/main/java/it/gov/innovazione/ndc/harvester/AgencyRepositoryService.java @@ -1,5 +1,7 @@ package it.gov.innovazione.ndc.harvester; +import it.gov.innovazione.ndc.eventhandler.NdcEventPublisher; +import it.gov.innovazione.ndc.eventhandler.event.HarvesterUpdateCommitDateEvent; import it.gov.innovazione.ndc.harvester.exception.InvalidAssetFolderException; import it.gov.innovazione.ndc.harvester.model.CvPath; import it.gov.innovazione.ndc.harvester.model.SemanticAssetPath; @@ -11,6 +13,7 @@ import it.gov.innovazione.ndc.harvester.util.GitUtils; import it.gov.innovazione.ndc.harvester.util.PropertiesUtils; import it.gov.innovazione.ndc.harvester.util.Version; +import it.gov.innovazione.ndc.model.harvester.HarvesterRun; import it.gov.innovazione.ndc.service.logging.HarvesterStage; import it.gov.innovazione.ndc.service.logging.LoggingContext; import lombok.SneakyThrows; @@ -19,6 +22,7 @@ import java.io.IOException; import java.nio.file.Path; +import java.time.Instant; import java.util.Collections; import java.util.Comparator; import java.util.List; @@ -40,19 +44,22 @@ public class AgencyRepositoryService { private final ControlledVocabularyFolderScanner controlledVocabularyFolderScanner; private final SchemaFolderScanner schemaFolderScanner; private final List lowerSkipWords; + private final NdcEventPublisher eventPublisher; public AgencyRepositoryService(FileUtils fileUtils, GitUtils gitUtils, OntologyFolderScanner ontologyFolderScanner, ControlledVocabularyFolderScanner controlledVocabularyFolderScanner, SchemaFolderScanner schemaFolderScanner, - AgencyRepositoryServiceProperties agencyRepositoryServiceProperties) { + AgencyRepositoryServiceProperties agencyRepositoryServiceProperties, + NdcEventPublisher eventPublisher) { this.fileUtils = fileUtils; this.gitUtils = gitUtils; this.ontologyFolderScanner = ontologyFolderScanner; this.controlledVocabularyFolderScanner = controlledVocabularyFolderScanner; this.schemaFolderScanner = schemaFolderScanner; this.lowerSkipWords = PropertiesUtils.lowerSkipWords(agencyRepositoryServiceProperties.getSkipWords(), MIN_SKIP_WORD_LENGTH); + this.eventPublisher = eventPublisher; } public Path cloneRepo(String repoUrl) throws IOException { @@ -62,7 +69,12 @@ public Path cloneRepo(String repoUrl) throws IOException { public Path cloneRepo(String repoUrl, String revision) throws IOException { Path cloneDir = fileUtils.createTempDirectory(TEMP_DIR_PREFIX); log.info("Cloning repo {} @ revision {}, at location {}", repoUrl, revision, cloneDir); - gitUtils.cloneRepo(repoUrl, cloneDir.toFile(), revision); + Instant instant = gitUtils.cloneRepoAndGetLastCommitDate(repoUrl, cloneDir.toFile(), revision); + eventPublisher.publishEvent("harvester", "harvester.get.commit.date", null, "harvester", + HarvesterUpdateCommitDateEvent.builder() + .runId(HarvesterRun.getCurrentRunId()) + .commitDate(instant) + .build()); return cloneDir; } diff --git a/src/main/java/it/gov/innovazione/ndc/harvester/context/HarvestExecutionContextUtils.java b/src/main/java/it/gov/innovazione/ndc/harvester/context/HarvestExecutionContextUtils.java index d0bca59f..7b159c51 100644 --- a/src/main/java/it/gov/innovazione/ndc/harvester/context/HarvestExecutionContextUtils.java +++ b/src/main/java/it/gov/innovazione/ndc/harvester/context/HarvestExecutionContextUtils.java @@ -1,11 +1,16 @@ package it.gov.innovazione.ndc.harvester.context; +import it.gov.innovazione.ndc.model.harvester.SemanticContentStats; import lombok.NoArgsConstructor; +import java.util.ArrayList; +import java.util.List; + @NoArgsConstructor(access = lombok.AccessLevel.PRIVATE) public class HarvestExecutionContextUtils { private static final ThreadLocal CONTEXT_HOLDER = new ThreadLocal<>(); + private static final ThreadLocal> SEMANTIC_CONTENT_STATS_HOLDER = ThreadLocal.withInitial(ArrayList::new); public static HarvestExecutionContext getContext() { return CONTEXT_HOLDER.get(); @@ -17,5 +22,18 @@ public static void setContext(HarvestExecutionContext context) { public static void clearContext() { CONTEXT_HOLDER.remove(); + clearSemanticContentStats(); + } + + public static List getSemanticContentStats() { + return SEMANTIC_CONTENT_STATS_HOLDER.get(); + } + + public static void addSemanticContentStat(SemanticContentStats stats) { + SEMANTIC_CONTENT_STATS_HOLDER.get().add(stats); + } + + public static void clearSemanticContentStats() { + SEMANTIC_CONTENT_STATS_HOLDER.get().clear(); } } diff --git a/src/main/java/it/gov/innovazione/ndc/harvester/harvesters/BaseSemanticAssetHarvester.java b/src/main/java/it/gov/innovazione/ndc/harvester/harvesters/BaseSemanticAssetHarvester.java index 06addfe7..88e622c7 100644 --- a/src/main/java/it/gov/innovazione/ndc/harvester/harvesters/BaseSemanticAssetHarvester.java +++ b/src/main/java/it/gov/innovazione/ndc/harvester/harvesters/BaseSemanticAssetHarvester.java @@ -13,9 +13,12 @@ import it.gov.innovazione.ndc.harvester.context.HarvestExecutionContextUtils; import it.gov.innovazione.ndc.harvester.exception.SinglePathProcessingException; import it.gov.innovazione.ndc.harvester.harvesters.utils.PathUtils; +import it.gov.innovazione.ndc.harvester.model.HarvesterStatsHolder; import it.gov.innovazione.ndc.harvester.model.Instance; import it.gov.innovazione.ndc.harvester.model.SemanticAssetPath; +import it.gov.innovazione.ndc.harvester.service.SemanticContentStatsService; import it.gov.innovazione.ndc.model.harvester.Repository; +import it.gov.innovazione.ndc.model.harvester.SemanticContentStats; import it.gov.innovazione.ndc.service.logging.HarvesterStage; import it.gov.innovazione.ndc.service.logging.LoggingContext; import lombok.RequiredArgsConstructor; @@ -45,6 +48,7 @@ public abstract class BaseSemanticAssetHarvester

im private final SemanticAssetType type; private final NdcEventPublisher eventPublisher; private final ConfigService configService; + private final SemanticContentStatsService semanticContentStatsService; @Override public SemanticAssetType getType() { @@ -70,7 +74,8 @@ public void harvest(Repository repository, Path rootPath) { for (P path : paths) { try { - processPath(repository.getUrl(), path); + HarvesterStatsHolder harvesterStatsHolder = processPath(repository.getUrl(), path); + semanticContentStatsService.updateStats(harvesterStatsHolder); log.debug("Path {} processed correctly for {}", path, type); } catch (SinglePathProcessingException e) { boolean isInfrastuctureError = checkInfrastructureError(e); @@ -112,6 +117,7 @@ public void harvest(Repository repository, Path rootPath) { } } } + semanticContentStatsService.saveStats(); } private boolean checkInfrastructureError(SinglePathProcessingException e) { @@ -190,7 +196,7 @@ public void cleanUpBeforeHarvesting(String repoUrl, Instance instance) { // by default nothing specific } - protected abstract void processPath(String repoUrl, P path); + protected abstract HarvesterStatsHolder processPath(String repoUrl, P path); protected abstract List

scanForPaths(Path rootPath); } diff --git a/src/main/java/it/gov/innovazione/ndc/harvester/harvesters/ControlledVocabularyHarvester.java b/src/main/java/it/gov/innovazione/ndc/harvester/harvesters/ControlledVocabularyHarvester.java index 63574e66..17e3563c 100644 --- a/src/main/java/it/gov/innovazione/ndc/harvester/harvesters/ControlledVocabularyHarvester.java +++ b/src/main/java/it/gov/innovazione/ndc/harvester/harvesters/ControlledVocabularyHarvester.java @@ -5,8 +5,10 @@ import it.gov.innovazione.ndc.harvester.AgencyRepositoryService; import it.gov.innovazione.ndc.harvester.SemanticAssetType; import it.gov.innovazione.ndc.harvester.model.CvPath; +import it.gov.innovazione.ndc.harvester.model.HarvesterStatsHolder; import it.gov.innovazione.ndc.harvester.model.Instance; import it.gov.innovazione.ndc.harvester.pathprocessors.ControlledVocabularyPathProcessor; +import it.gov.innovazione.ndc.harvester.service.SemanticContentStatsService; import org.springframework.stereotype.Component; import java.nio.file.Path; @@ -17,15 +19,20 @@ public class ControlledVocabularyHarvester extends BaseSemanticAssetHarvester SemanticAssetMetadata tryExtractMe } } - protected void processWithModel(String repoUrl, P path, M model) { + protected HarvesterStatsHolder processWithModel(String repoUrl, P path, M model) { log.debug("Enriching model before persisting"); enrichModelBeforePersisting(model, path); - indexMetadataForSearch(model); + SemanticAssetModelValidationContext.ValidationContextStats statsBefore = getStats(model); + SemanticAssetMetadata meta = indexMetadataForSearch(model); + SemanticAssetModelValidationContext.ValidationContextStats statsAfter = getStats(model); persistModelToTripleStore(repoUrl, path, model); collectRightsHolderInContext(repoUrl, model); + return HarvesterStatsHolder.builder() + .metadata(meta) + .validationContextStats(SemanticAssetModelValidationContext.difference(statsBefore, statsAfter)) + .build(); + } + + private static SemanticAssetModelValidationContext.ValidationContextStats getStats(M model) { + return Optional.ofNullable(model) + .map(SemanticAssetModel::getValidationContext) + .map(SemanticAssetModelValidationContext.ValidationContextStats::of) + .orElse(SemanticAssetModelValidationContext.ValidationContextStats.empty()); } private void collectRightsHolderInContext(String repoUrl, M model) { @@ -79,7 +94,7 @@ protected void enrichModelBeforePersisting(M model, P path) { } @Override - public void process(String repoUrl, P path) { + public HarvesterStatsHolder process(String repoUrl, P path) { try { log.info("Processing path {}", path); @@ -90,8 +105,10 @@ public void process(String repoUrl, P path) { Resource resource = model.getMainResource(); log.info("Found resource {}", resource); - processWithModel(repoUrl, path, model); + HarvesterStatsHolder harvesterStatsHolder = processWithModel(repoUrl, path, model); log.info("Path {} processed", path); + + return harvesterStatsHolder; } catch (Exception e) { log.error("Error processing {}", path, e); if (e instanceof SinglePathProcessingException singlePathProcessingException) { @@ -113,7 +130,7 @@ public void process(String repoUrl, P path) { } } - private void indexMetadataForSearch(M model) { + private SemanticAssetMetadata indexMetadataForSearch(M model) { log.debug("Indexing {} for search", model.getMainResource()); SemanticAssetMetadata metadata = tryExtractMetadata(model); postProcessMetadata(metadata); @@ -125,6 +142,7 @@ private void indexMetadataForSearch(M model) { .harvesterStatus(HarvesterRun.Status.RUNNING) .additionalInfo("metadata", metadata) .build()); + return metadata; } catch (Exception e) { log.error("Error saving metadata for {}", model.getMainResource(), e); throw new SinglePathProcessingException("Cannot save metadata", e, true); diff --git a/src/main/java/it/gov/innovazione/ndc/harvester/pathprocessors/ControlledVocabularyPathProcessor.java b/src/main/java/it/gov/innovazione/ndc/harvester/pathprocessors/ControlledVocabularyPathProcessor.java index 03f4a60c..2b4ee07c 100644 --- a/src/main/java/it/gov/innovazione/ndc/harvester/pathprocessors/ControlledVocabularyPathProcessor.java +++ b/src/main/java/it/gov/innovazione/ndc/harvester/pathprocessors/ControlledVocabularyPathProcessor.java @@ -4,6 +4,7 @@ import it.gov.innovazione.ndc.harvester.csv.CsvParser.CsvData; import it.gov.innovazione.ndc.harvester.model.ControlledVocabularyModel; import it.gov.innovazione.ndc.harvester.model.CvPath; +import it.gov.innovazione.ndc.harvester.model.HarvesterStatsHolder; import it.gov.innovazione.ndc.harvester.model.Instance; import it.gov.innovazione.ndc.harvester.model.SemanticAssetModelFactory; import it.gov.innovazione.ndc.harvester.model.index.SemanticAssetMetadata; @@ -44,8 +45,8 @@ public ControlledVocabularyPathProcessor(TripleStoreRepository tripleStoreReposi } @Override - protected void processWithModel(String repoUrl, CvPath path, ControlledVocabularyModel model) { - super.processWithModel(repoUrl, path, model); + protected HarvesterStatsHolder processWithModel(String repoUrl, CvPath path, ControlledVocabularyModel model) { + HarvesterStatsHolder harvesterStatsHolder = super.processWithModel(repoUrl, path, model); path.getCsvPath().ifPresent(p -> { String keyConcept = model.getKeyConcept(); @@ -64,6 +65,8 @@ protected void processWithModel(String repoUrl, CvPath path, ControlledVocabular parseAndIndexCsv(vocabularyIdentifier, p); }); + + return harvesterStatsHolder; } @Override diff --git a/src/main/java/it/gov/innovazione/ndc/harvester/pathprocessors/SemanticAssetPathProcessor.java b/src/main/java/it/gov/innovazione/ndc/harvester/pathprocessors/SemanticAssetPathProcessor.java index 0fcd1a5e..76e0fcd7 100644 --- a/src/main/java/it/gov/innovazione/ndc/harvester/pathprocessors/SemanticAssetPathProcessor.java +++ b/src/main/java/it/gov/innovazione/ndc/harvester/pathprocessors/SemanticAssetPathProcessor.java @@ -1,7 +1,8 @@ package it.gov.innovazione.ndc.harvester.pathprocessors; +import it.gov.innovazione.ndc.harvester.model.HarvesterStatsHolder; import it.gov.innovazione.ndc.harvester.model.SemanticAssetPath; public interface SemanticAssetPathProcessor

{ - void process(String repoUrl, P path); + HarvesterStatsHolder process(String repoUrl, P path); } diff --git a/src/main/java/it/gov/innovazione/ndc/harvester/service/HarvesterRunService.java b/src/main/java/it/gov/innovazione/ndc/harvester/service/HarvesterRunService.java index e39635b7..f26b9842 100644 --- a/src/main/java/it/gov/innovazione/ndc/harvester/service/HarvesterRunService.java +++ b/src/main/java/it/gov/innovazione/ndc/harvester/service/HarvesterRunService.java @@ -41,11 +41,12 @@ public int saveHarvesterRun(HarvesterRun harvesterRun) { + "REPOSITORY_URL, " + "INSTANCE, " + "REVISION, " + + "REVISION_COMMITTED_AT, " + "STARTED, " + "STARTED_BY, " + "FINISHED, " + "STATUS, " - + "REASON) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)"; + + "REASON) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)"; return jdbcTemplate.update(query, harvesterRun.getId(), harvesterRun.getCorrelationId(), @@ -53,6 +54,7 @@ public int saveHarvesterRun(HarvesterRun harvesterRun) { harvesterRun.getRepositoryUrl(), harvesterRun.getInstance(), harvesterRun.getRevision(), + harvesterRun.getRevisionCommittedAt(), harvesterRun.getStartedAt(), harvesterRun.getStartedBy(), harvesterRun.getEndedAt(), @@ -107,6 +109,15 @@ public int updateHarvesterRun(HarvesterRun harvesterRun) { harvesterRun.getId()); } + public int updateHarvesterRunCommittedAt(HarvesterRun harvesterRun) { + String query = "UPDATE HARVESTER_RUN SET " + + "REVISION_COMMITTED_AT = ? " + + "WHERE ID = ?"; + return jdbcTemplate.update(query, + harvesterRun.getRevisionCommittedAt(), + harvesterRun.getId()); + } + public Stream getRecentRuns(Long days) { return getAllRuns().stream() .filter(harvesterRun -> isMoreRecentThan(harvesterRun, days)); @@ -120,6 +131,7 @@ public List getAllRuns() { + "REPOSITORY_URL, " + "INSTANCE, " + "REVISION, " + + "REVISION_COMMITTED_AT, " + "STARTED, " + "STARTED_BY, " + "FINISHED, " @@ -135,6 +147,7 @@ public List getAllRuns() { .repositoryUrl(rs.getString("REPOSITORY_URL")) .instance(rs.getString("INSTANCE")) .revision(rs.getString("REVISION")) + .revisionCommittedAt(getInstant(rs, "REVISION_COMMITTED_AT")) .startedAt(getInstant(rs, "STARTED")) .startedBy(rs.getString("STARTED_BY")) .endedAt(getInstant(rs, "FINISHED")) diff --git a/src/main/java/it/gov/innovazione/ndc/harvester/service/SemanticContentStatsService.java b/src/main/java/it/gov/innovazione/ndc/harvester/service/SemanticContentStatsService.java new file mode 100644 index 00000000..a345adde --- /dev/null +++ b/src/main/java/it/gov/innovazione/ndc/harvester/service/SemanticContentStatsService.java @@ -0,0 +1,91 @@ +package it.gov.innovazione.ndc.harvester.service; + +import com.fasterxml.jackson.databind.ObjectMapper; +import it.gov.innovazione.ndc.harvester.context.HarvestExecutionContext; +import it.gov.innovazione.ndc.harvester.context.HarvestExecutionContextUtils; +import it.gov.innovazione.ndc.harvester.model.HarvesterStatsHolder; +import it.gov.innovazione.ndc.model.harvester.SemanticContentStats; +import it.gov.innovazione.ndc.service.logging.LoggingContext; +import it.gov.innovazione.ndc.service.logging.NDCHarvesterLogger; +import lombok.RequiredArgsConstructor; +import lombok.extern.slf4j.Slf4j; +import org.apache.commons.collections4.CollectionUtils; +import org.springframework.jdbc.core.JdbcTemplate; +import org.springframework.stereotype.Service; + +import java.util.List; +import java.util.Objects; +import java.util.UUID; + +@Service +@Slf4j +@RequiredArgsConstructor +public class SemanticContentStatsService { + + private final JdbcTemplate jdbcTemplate; + private final ObjectMapper objectMapper; + + public void saveStats() { + List semanticContentStats = HarvestExecutionContextUtils.getSemanticContentStats(); + semanticContentStats.forEach(this::save); + HarvestExecutionContextUtils.clearSemanticContentStats(); + } + + public int save(SemanticContentStats semanticContentStats) { + String statement = "INSERT INTO SEMANTIC_CONTENT_STATS (" + + "ID, " + + "HARVESTER_RUN_ID, " + + "RESOURCE_URI, " + + "RESOURCE_TYPE, " + + "RIGHT_HOLDER, " + + "ISSUED_ON, " + + "MODIFIED_ON, " + + "HAS_ERRORS," + + "HAS_WARNINGS, " + + "STATUS ) " + + "VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)"; + return jdbcTemplate.update(statement, + UUID.randomUUID().toString(), + semanticContentStats.getHarvesterRunId(), + semanticContentStats.getResourceUri(), + semanticContentStats.getResourceType().name(), + semanticContentStats.getRightHolder(), + semanticContentStats.getIssuedOn(), + semanticContentStats.getModifiedOn(), + semanticContentStats.isHasErrors(), + semanticContentStats.isHasWarnings(), + toJsonArray(semanticContentStats.getStatus())); + } + + private String toJsonArray(List status) { + try { + return objectMapper.writeValueAsString(CollectionUtils.emptyIfNull(status)); + } catch (Exception e) { + NDCHarvesterLogger.logApplicationError(LoggingContext.builder() + .message("Error converting status to json array") + .details(e.getMessage()) + .additionalInfo("status", status) + .build()); + log.error("Error converting status to json", e); + return "[]"; + } + } + + public void updateStats(HarvesterStatsHolder harvesterStatsHolder) { + HarvestExecutionContext context = HarvestExecutionContextUtils.getContext(); + if (Objects.nonNull(context)) { + SemanticContentStats stats = SemanticContentStats.builder() + .harvesterRunId(context.getRunId()) + .resourceUri(harvesterStatsHolder.getMetadata().getIri()) + .hasErrors(harvesterStatsHolder.getValidationContextStats().getErrors() > 0) + .hasWarnings(harvesterStatsHolder.getValidationContextStats().getWarnings() > 0) + .status(harvesterStatsHolder.getMetadata().getStatus()) + .rightHolder(harvesterStatsHolder.getMetadata().getAgencyId()) + .issuedOn(harvesterStatsHolder.getMetadata().getIssuedOn()) + .modifiedOn(harvesterStatsHolder.getMetadata().getModifiedOn()) + .resourceType(harvesterStatsHolder.getMetadata().getType()) + .build(); + HarvestExecutionContextUtils.addSemanticContentStat(stats); + } + } +} diff --git a/src/main/java/it/gov/innovazione/ndc/harvester/service/startupjob/StartupJobsRunner.java b/src/main/java/it/gov/innovazione/ndc/harvester/service/startupjob/StartupJobsRunner.java index 6d96be82..87b6f68d 100644 --- a/src/main/java/it/gov/innovazione/ndc/harvester/service/startupjob/StartupJobsRunner.java +++ b/src/main/java/it/gov/innovazione/ndc/harvester/service/startupjob/StartupJobsRunner.java @@ -1,6 +1,7 @@ package it.gov.innovazione.ndc.harvester.service.startupjob; import lombok.RequiredArgsConstructor; +import org.springframework.boot.context.event.ApplicationReadyEvent; import org.springframework.boot.context.event.ApplicationStartedEvent; import org.springframework.context.event.EventListener; import org.springframework.stereotype.Service; @@ -13,7 +14,7 @@ public class StartupJobsRunner { private final List startupJobs; - @EventListener(ApplicationStartedEvent.class) + @EventListener(ApplicationReadyEvent.class) public void run() { startupJobs.forEach(StartupJob::run); } diff --git a/src/main/java/it/gov/innovazione/ndc/harvester/service/startupjob/UpdateRevisionCommittedAt.java b/src/main/java/it/gov/innovazione/ndc/harvester/service/startupjob/UpdateRevisionCommittedAt.java new file mode 100644 index 00000000..99c9a72f --- /dev/null +++ b/src/main/java/it/gov/innovazione/ndc/harvester/service/startupjob/UpdateRevisionCommittedAt.java @@ -0,0 +1,79 @@ +package it.gov.innovazione.ndc.harvester.service.startupjob; + +import it.gov.innovazione.ndc.harvester.service.HarvesterRunService; +import it.gov.innovazione.ndc.harvester.util.GitUtils; +import it.gov.innovazione.ndc.model.harvester.HarvesterRun; +import it.gov.innovazione.ndc.service.logging.LoggingContext; +import it.gov.innovazione.ndc.service.logging.NDCHarvesterLogger; +import lombok.RequiredArgsConstructor; +import org.apache.commons.lang3.tuple.Pair; +import org.springframework.scheduling.annotation.Scheduled; +import org.springframework.stereotype.Service; + +import java.time.Instant; +import java.util.List; +import java.util.Map; +import java.util.Optional; +import java.util.stream.Collectors; + +import static java.util.Objects.isNull; + +@Service +@RequiredArgsConstructor +public class UpdateRevisionCommittedAt implements StartupJob { + + private final GitUtils gitUtils; + private final HarvesterRunService harvesterRunService; + + @Override + public void run() { + List harvesterRunsToUpdate = harvesterRunService.getAllRuns().stream() + .filter(harvesterRun -> isNull(harvesterRun.getRevisionCommittedAt())) + .toList(); + NDCHarvesterLogger.logApplicationInfo( + LoggingContext.builder() + .component("UpdateRevisionCommittedAt") + .message(String.format("Found %d harvester runs to update", harvesterRunsToUpdate.size())) + .build()); + Map, List> byRepositoryAndRevision = harvesterRunsToUpdate.stream() + .collect(Collectors.groupingBy(harvesterRun -> Pair.of(harvesterRun.getRepositoryUrl(), harvesterRun.getRevision()))); + + byRepositoryAndRevision.entrySet().stream() + .map(this::withUpdatedRevisionCommittedAt) + .flatMap(List::stream) + .forEach(this::logAndUpdate); + + } + + @Scheduled(cron = "${ndc.harvester.update-revision-committed-at.cron}") + public void scheduledRun() { + NDCHarvesterLogger.logApplicationInfo( + LoggingContext.builder() + .component("UpdateRevisionCommittedAt") + .message("Scheduled run of UpdateRevisionCommittedAt") + .build()); + run(); + } + + private void logAndUpdate(HarvesterRun harvesterRun) { + Instant revisionCommittedAt = harvesterRun.getRevisionCommittedAt(); + String repositoryUrl = harvesterRun.getRepositoryUrl(); + String revision = harvesterRun.getRevision(); + String harvesterRunId = harvesterRun.getId(); + NDCHarvesterLogger.logApplicationInfo( + LoggingContext.builder() + .message(String.format("Updated revisionCommittedAt for harvester run %s with revision %s and repository %s to %s", + harvesterRunId, revision, repositoryUrl, revisionCommittedAt)) + .build()); + harvesterRunService.updateHarvesterRunCommittedAt(harvesterRun); + } + + private List withUpdatedRevisionCommittedAt(Map.Entry, List> byRepositoryAndRevision) { + Pair repositoryAndRevision = byRepositoryAndRevision.getKey(); + Optional commitDate = gitUtils.getCommitDate(repositoryAndRevision.getLeft(), repositoryAndRevision.getRight()); + return commitDate.map(instant -> byRepositoryAndRevision.getValue().stream() + .map(harvesterRun -> harvesterRun.withRevisionCommittedAt(instant)) + .toList()).orElseGet(byRepositoryAndRevision::getValue); + } +} + diff --git a/src/main/java/it/gov/innovazione/ndc/harvester/util/GitUtils.java b/src/main/java/it/gov/innovazione/ndc/harvester/util/GitUtils.java index 883a77da..1cb8e3b1 100644 --- a/src/main/java/it/gov/innovazione/ndc/harvester/util/GitUtils.java +++ b/src/main/java/it/gov/innovazione/ndc/harvester/util/GitUtils.java @@ -1,20 +1,37 @@ package it.gov.innovazione.ndc.harvester.util; +import it.gov.innovazione.ndc.service.logging.LoggingContext; +import it.gov.innovazione.ndc.service.logging.NDCHarvesterLogger; import lombok.RequiredArgsConstructor; import org.apache.commons.lang3.StringUtils; import org.eclipse.jgit.api.Git; import org.eclipse.jgit.api.errors.GitAPIException; import org.eclipse.jgit.lib.AnyObjectId; +import org.eclipse.jgit.lib.PersonIdent; import org.eclipse.jgit.lib.Ref; +import org.eclipse.jgit.revwalk.RevCommit; import org.springframework.stereotype.Component; import java.io.File; +import java.io.IOException; +import java.nio.file.Path; +import java.time.Instant; +import java.util.Optional; +import java.util.stream.StreamSupport; + +import static it.gov.innovazione.ndc.harvester.AgencyRepositoryService.TEMP_DIR_PREFIX; @Component @RequiredArgsConstructor public class GitUtils { + private final FileUtils fileUtils; + public void cloneRepo(String repoUrl, File destination, String revision) { + cloneRepoAndGetLastCommitDate(repoUrl, destination, revision); + } + + public Instant cloneRepoAndGetLastCommitDate(String repoUrl, File destination, String revision) { try { Git call = Git.cloneRepository() .setURI(repoUrl) @@ -24,6 +41,7 @@ public void cloneRepo(String repoUrl, File destination, String revision) { if (StringUtils.isNotBlank(revision)) { call.checkout().setName(revision).call(); } + return safelyGetLastCommitDate(call); } catch (GitAPIException e) { throw new GitRepoCloneException(String.format("Cannot clone repo '%s'", repoUrl), e); } @@ -48,4 +66,76 @@ public String getHeadRemoteRevision(String url) { throw new GitRepoCloneException(String.format("Cannot get latest revision from repo '%s'", url), e); } } + + public Optional getCommitDate(String repositoryUrl, String revision) { + Optional tempDirectory = safelyGetTempDirectory(repositoryUrl, revision); + if (tempDirectory.isEmpty()) { + return Optional.empty(); + } + + Optional gitOpt = cloneSafely(repositoryUrl, tempDirectory.get(), revision); + + if (gitOpt.isEmpty()) { + return Optional.empty(); + } + + Optional instant = gitOpt.map(this::safelyGetLastCommitDate); + tryRemoveDirectory(tempDirectory.get()); + return instant; + } + + private void tryRemoveDirectory(Path path) { + try { + fileUtils.removeDirectory(path); + } catch (IOException e) { + NDCHarvesterLogger.logApplicationError(LoggingContext.builder() + .message("Error removing temp directory") + .details(e.getMessage()) + .build()); + } + } + + private Optional safelyGetTempDirectory(String repositoryUrl, String revision) { + try { + return Optional.of(fileUtils.createTempDirectory(TEMP_DIR_PREFIX)); + } catch (IOException e) { + NDCHarvesterLogger.logApplicationError(LoggingContext.builder() + .message("Error creating temp directory while retrieving commit date") + .details(e.getMessage()) + .additionalInfo("repositoryUrl", repositoryUrl) + .additionalInfo("revision", revision) + .build()); + return Optional.empty(); + } + } + + private Instant safelyGetLastCommitDate(Git git) { + try { + return StreamSupport.stream(git.log().call().spliterator(), false) + .findFirst() + .map(RevCommit::getAuthorIdent) + .map(PersonIdent::getWhenAsInstant) + .orElse(null); + } catch (GitAPIException e) { + NDCHarvesterLogger.logApplicationError(LoggingContext.builder() + .message("Error getting commit date") + .details(e.getMessage()) + .build()); + return null; + } + } + + private Optional cloneSafely(String repositoryUrl, Path tempDirectory, String revision) { + try { + Git git = Git.cloneRepository() + .setURI(repositoryUrl) + .setDirectory(tempDirectory.toFile()) + .call(); + git.checkout().setName(revision).call(); + + return Optional.of(git); + } catch (GitAPIException e) { + return Optional.empty(); + } + } } diff --git a/src/main/java/it/gov/innovazione/ndc/model/harvester/HarvesterRun.java b/src/main/java/it/gov/innovazione/ndc/model/harvester/HarvesterRun.java index 7ab554f4..5910b2bd 100644 --- a/src/main/java/it/gov/innovazione/ndc/model/harvester/HarvesterRun.java +++ b/src/main/java/it/gov/innovazione/ndc/model/harvester/HarvesterRun.java @@ -1,8 +1,11 @@ package it.gov.innovazione.ndc.model.harvester; +import it.gov.innovazione.ndc.harvester.context.HarvestExecutionContext; +import it.gov.innovazione.ndc.harvester.context.HarvestExecutionContextUtils; import lombok.Builder; import lombok.Data; import lombok.RequiredArgsConstructor; +import lombok.With; import java.time.Instant; @@ -19,9 +22,19 @@ public class HarvesterRun { private final String startedBy; private final Instant endedAt; private final String revision; + @With + private final Instant revisionCommittedAt; private final Status status; private final String reason; + public static String getCurrentRunId() { + HarvestExecutionContext context = HarvestExecutionContextUtils.getContext(); + if (context == null) { + return null; + } + return context.getRunId(); + } + public enum Status { SUCCESS, UNCHANGED, ALREADY_RUNNING, RUNNING, NDC_ISSUES_PRESENT, FAILURE } diff --git a/src/main/java/it/gov/innovazione/ndc/model/harvester/SemanticContentStats.java b/src/main/java/it/gov/innovazione/ndc/model/harvester/SemanticContentStats.java new file mode 100644 index 00000000..0314a1f7 --- /dev/null +++ b/src/main/java/it/gov/innovazione/ndc/model/harvester/SemanticContentStats.java @@ -0,0 +1,25 @@ +package it.gov.innovazione.ndc.model.harvester; + +import it.gov.innovazione.ndc.harvester.SemanticAssetType; +import lombok.Builder; +import lombok.Data; +import lombok.RequiredArgsConstructor; + +import java.time.LocalDate; +import java.util.List; + +@Data +@Builder +@RequiredArgsConstructor +public class SemanticContentStats { + private final String id; + private final String harvesterRunId; + private final String resourceUri; + private final SemanticAssetType resourceType; + private final String rightHolder; + private final LocalDate issuedOn; + private final LocalDate modifiedOn; + private final boolean hasErrors; + private final boolean hasWarnings; + private final List status; +} diff --git a/src/main/java/it/gov/innovazione/ndc/service/logging/NDCHarvesterLogger.java b/src/main/java/it/gov/innovazione/ndc/service/logging/NDCHarvesterLogger.java index b500bc6a..72a9d1c5 100644 --- a/src/main/java/it/gov/innovazione/ndc/service/logging/NDCHarvesterLogger.java +++ b/src/main/java/it/gov/innovazione/ndc/service/logging/NDCHarvesterLogger.java @@ -3,6 +3,7 @@ import lombok.extern.slf4j.Slf4j; import org.springframework.boot.logging.LogLevel; +import java.io.IOException; import java.util.Map; import java.util.function.Consumer; @@ -69,4 +70,8 @@ public static void logApplicationInfo(LoggingContext loggingContext) { public static void logApplicationWarn(LoggingContext build) { log(build.withLevel(LogLevel.WARN).application()); } + + public static void logApplicationError(LoggingContext build) { + log(build.withLevel(LogLevel.ERROR).application()); + } } diff --git a/src/main/java/it/gov/innovazione/ndc/service/logging/NDCHarvesterLoggerUtils.java b/src/main/java/it/gov/innovazione/ndc/service/logging/NDCHarvesterLoggerUtils.java index 68a9de58..b96453e5 100644 --- a/src/main/java/it/gov/innovazione/ndc/service/logging/NDCHarvesterLoggerUtils.java +++ b/src/main/java/it/gov/innovazione/ndc/service/logging/NDCHarvesterLoggerUtils.java @@ -34,7 +34,7 @@ public static void overrideContext(LoggingContext context) { public static void clearContext() { SEEN_MESSAGES.get().clear(); CONTEXT_HOLDER.remove(); - log.info("Context cleared"); + log.info("Contexts cleared"); } public static boolean notSeen(String message) { diff --git a/src/main/resources/application-local.properties b/src/main/resources/application-local.properties index d838ca84..7713894a 100644 --- a/src/main/resources/application-local.properties +++ b/src/main/resources/application-local.properties @@ -1,4 +1,4 @@ -harvester.repositories=https://github.com/FrankMaverick/Leo-OpenData +harvester.repositories=https://github.com/FrankMaverick/Leo-OpenData,https://github.com/italia/daf-ontologie-vocabolari-controllati,https://github.com/InailUfficio5/inail-ndc virtuoso.sparql=http://localhost:8890/sparql-auth virtuoso.sparql-graph-store=http://localhost:8890/sparql-graph-crud-auth diff --git a/src/main/resources/application.properties b/src/main/resources/application.properties index cbb858c6..d57e3c74 100644 --- a/src/main/resources/application.properties +++ b/src/main/resources/application.properties @@ -54,3 +54,5 @@ elasticsearch.port=${ELASTICSEARCH_PORT:9200} elasticsearch.scheme=${ELASTICSEARCH_SCHEME:https} elasticsearch.username=${ELASTICSEARCH_USERNAME:elastic} elasticsearch.password=${ELASTICSEARCH_PASSWORD:changeme} +# The following properties is used to set the cron scheduled to once a week to update the revision of the committed at field +ndc.harvester.update-revision-committed-at.cron=${NDC_HARVESTER_UPDATE_REVISION_COMMITTED_AT_CRON:0 0 0 * * SUN} diff --git a/src/main/resources/db/migration/V8__tracing.sql b/src/main/resources/db/migration/V8__tracing.sql new file mode 100644 index 00000000..9e44ebaa --- /dev/null +++ b/src/main/resources/db/migration/V8__tracing.sql @@ -0,0 +1,2 @@ +alter table HARVESTER_RUN + add REVISION_COMMITTED_AT TIMESTAMP null; \ No newline at end of file diff --git a/src/main/resources/db/migration/V9__semantic_content_stats.sql b/src/main/resources/db/migration/V9__semantic_content_stats.sql new file mode 100644 index 00000000..7bdb851e --- /dev/null +++ b/src/main/resources/db/migration/V9__semantic_content_stats.sql @@ -0,0 +1,13 @@ +create table SEMANTIC_CONTENT_STATS +( + ID VARCHAR(256) PRIMARY KEY, + HARVESTER_RUN_ID VARCHAR(256) NOT NULL REFERENCES HARVESTER_RUN (ID), + RESOURCE_URI VARCHAR(255) NOT NULL, + RESOURCE_TYPE VARCHAR(64) NOT NULL, + RIGHT_HOLDER VARCHAR(64) NOT NULL, + ISSUED_ON TIMESTAMP NOT NULL, + MODIFIED_ON TIMESTAMP NOT NULL, + HAS_ERRORS BOOLEAN NOT NULL, + HAS_WARNINGS BOOLEAN NOT NULL, + STATUS TEXT NOT NULL +) ENGINE = InnoDB; diff --git a/src/test/java/it/gov/innovazione/ndc/harvester/AgencyRepositoryServiceTest.java b/src/test/java/it/gov/innovazione/ndc/harvester/AgencyRepositoryServiceTest.java index c7ce2c1b..9551e617 100644 --- a/src/test/java/it/gov/innovazione/ndc/harvester/AgencyRepositoryServiceTest.java +++ b/src/test/java/it/gov/innovazione/ndc/harvester/AgencyRepositoryServiceTest.java @@ -1,5 +1,6 @@ package it.gov.innovazione.ndc.harvester; +import it.gov.innovazione.ndc.eventhandler.NdcEventPublisher; import it.gov.innovazione.ndc.harvester.model.CvPath; import it.gov.innovazione.ndc.harvester.model.SemanticAssetPath; import it.gov.innovazione.ndc.harvester.scanners.ControlledVocabularyFolderScanner; @@ -23,6 +24,7 @@ import static it.gov.innovazione.ndc.harvester.SemanticAssetType.SCHEMA; import static org.assertj.core.api.Assertions.assertThat; import static org.mockito.ArgumentMatchers.any; +import static org.mockito.Mockito.doNothing; import static org.mockito.Mockito.mock; import static org.mockito.Mockito.verify; import static org.mockito.Mockito.when; @@ -31,17 +33,20 @@ public class AgencyRepositoryServiceTest { FileUtils fileUtils; GitUtils gitUtils; AgencyRepositoryService agencyRepoService; + NdcEventPublisher eventPublisher; @BeforeEach public void setup() { fileUtils = mock(FileUtils.class); + eventPublisher = mock(NdcEventPublisher.class); when(fileUtils.getLowerCaseFileName(any())).thenCallRealMethod(); + doNothing().when(eventPublisher).publishEvent(any(), any(), any(), any(), any()); gitUtils = mock(GitUtils.class); OntologyFolderScanner ontologyScanner = new OntologyFolderScanner(fileUtils, OntologyFolderScannerProperties.forWords("aligns")); ControlledVocabularyFolderScanner cvScanner = new ControlledVocabularyFolderScanner(fileUtils, ControlledVocabularyFolderScannerProperties.forWords()); SchemaFolderScanner schemaScanner = new SchemaFolderScanner(fileUtils); agencyRepoService = new AgencyRepositoryService(fileUtils, gitUtils, ontologyScanner, - cvScanner, schemaScanner, AgencyRepositoryServiceProperties.forWords("scriptR2RML")); + cvScanner, schemaScanner, AgencyRepositoryServiceProperties.forWords("scriptR2RML"), eventPublisher); } @Test @@ -52,7 +57,7 @@ void shouldCloneTheRepoInTempDir() throws IOException { assertThat(clonedTempDir).isEqualTo(Path.of("temp")); verify(fileUtils).createTempDirectory(TEMP_DIR_PREFIX); - verify(gitUtils).cloneRepo("someURI", new File("temp"), null); + verify(gitUtils).cloneRepoAndGetLastCommitDate("someURI", new File("temp"), null); } /** diff --git a/src/test/java/it/gov/innovazione/ndc/harvester/harvesters/BaseSemanticAssetHarvesterTest.java b/src/test/java/it/gov/innovazione/ndc/harvester/harvesters/BaseSemanticAssetHarvesterTest.java index c62bc50d..e2abccd2 100644 --- a/src/test/java/it/gov/innovazione/ndc/harvester/harvesters/BaseSemanticAssetHarvesterTest.java +++ b/src/test/java/it/gov/innovazione/ndc/harvester/harvesters/BaseSemanticAssetHarvesterTest.java @@ -6,7 +6,9 @@ import it.gov.innovazione.ndc.harvester.context.HarvestExecutionContext; import it.gov.innovazione.ndc.harvester.context.HarvestExecutionContextUtils; import it.gov.innovazione.ndc.harvester.exception.InvalidAssetException; +import it.gov.innovazione.ndc.harvester.model.HarvesterStatsHolder; import it.gov.innovazione.ndc.harvester.model.SemanticAssetPath; +import it.gov.innovazione.ndc.harvester.service.SemanticContentStatsService; import it.gov.innovazione.ndc.model.harvester.Repository; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.extension.ExtendWith; @@ -35,6 +37,8 @@ class BaseSemanticAssetHarvesterTest { private NdcEventPublisher eventPublisher; @Mock private ConfigService configService; + @Mock + private SemanticContentStatsService semanticContentStatsService; @Test void shouldMoveOnToNextOntologyIfProcessingOneFails() { @@ -74,6 +78,8 @@ void shouldNotifyIfSizeExceed() { .build(); doNothing().when(processor).accept(repoUrl, path); + doNothing().when(semanticContentStatsService).updateStats(any()); + when(configService.getFromRepoOrGlobalOrDefault(any(), any(), any())).thenReturn(1L); try (MockedStatic contextUtils = mockStatic(HarvestExecutionContextUtils.class)) { @@ -105,13 +111,14 @@ private class TestHarvester extends BaseSemanticAssetHarvester paths; public TestHarvester(List paths) { - super(SemanticAssetType.ONTOLOGY, eventPublisher, configService); + super(SemanticAssetType.ONTOLOGY, eventPublisher, configService, semanticContentStatsService); this.paths = paths; } @Override - protected void processPath(String repoUrl, SemanticAssetPath path) { + protected HarvesterStatsHolder processPath(String repoUrl, SemanticAssetPath path) { processor.accept(repoUrl, path); + return HarvesterStatsHolder.builder().build(); } @Override diff --git a/src/test/java/it/gov/innovazione/ndc/harvester/harvesters/ControlledVocabularyHarvesterTest.java b/src/test/java/it/gov/innovazione/ndc/harvester/harvesters/ControlledVocabularyHarvesterTest.java index 251c2819..809b588f 100644 --- a/src/test/java/it/gov/innovazione/ndc/harvester/harvesters/ControlledVocabularyHarvesterTest.java +++ b/src/test/java/it/gov/innovazione/ndc/harvester/harvesters/ControlledVocabularyHarvesterTest.java @@ -5,6 +5,7 @@ import it.gov.innovazione.ndc.harvester.model.CvPath; import it.gov.innovazione.ndc.harvester.model.Instance; import it.gov.innovazione.ndc.harvester.pathprocessors.ControlledVocabularyPathProcessor; +import it.gov.innovazione.ndc.harvester.service.SemanticContentStatsService; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.extension.ExtendWith; import org.mockito.InjectMocks; @@ -18,6 +19,7 @@ import static it.gov.innovazione.ndc.harvester.service.RepositoryUtils.asRepo; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.mockito.ArgumentMatchers.any; +import static org.mockito.Mockito.doNothing; import static org.mockito.Mockito.verify; import static org.mockito.Mockito.when; @@ -29,6 +31,8 @@ class ControlledVocabularyHarvesterTest { ControlledVocabularyPathProcessor pathProcessor; @Mock ConfigService configService; + @Mock + SemanticContentStatsService semanticContentStatsService; @InjectMocks ControlledVocabularyHarvester harvester; diff --git a/src/test/java/it/gov/innovazione/ndc/harvester/harvesters/OntologyHarvesterTest.java b/src/test/java/it/gov/innovazione/ndc/harvester/harvesters/OntologyHarvesterTest.java index dede1450..e06101e2 100644 --- a/src/test/java/it/gov/innovazione/ndc/harvester/harvesters/OntologyHarvesterTest.java +++ b/src/test/java/it/gov/innovazione/ndc/harvester/harvesters/OntologyHarvesterTest.java @@ -4,6 +4,7 @@ import it.gov.innovazione.ndc.harvester.AgencyRepositoryService; import it.gov.innovazione.ndc.harvester.model.SemanticAssetPath; import it.gov.innovazione.ndc.harvester.pathprocessors.OntologyPathProcessor; +import it.gov.innovazione.ndc.harvester.service.SemanticContentStatsService; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.extension.ExtendWith; import org.mockito.InjectMocks; @@ -26,6 +27,8 @@ class OntologyHarvesterTest { OntologyPathProcessor pathProcessor; @Mock ConfigService configService; + @Mock + SemanticContentStatsService semanticContentStatsService; @InjectMocks OntologyHarvester harvester; diff --git a/src/test/java/it/gov/innovazione/ndc/harvester/harvesters/SchemaHarvesterTest.java b/src/test/java/it/gov/innovazione/ndc/harvester/harvesters/SchemaHarvesterTest.java index c8b93439..e39d5b95 100644 --- a/src/test/java/it/gov/innovazione/ndc/harvester/harvesters/SchemaHarvesterTest.java +++ b/src/test/java/it/gov/innovazione/ndc/harvester/harvesters/SchemaHarvesterTest.java @@ -4,6 +4,7 @@ import it.gov.innovazione.ndc.harvester.AgencyRepositoryService; import it.gov.innovazione.ndc.harvester.model.SemanticAssetPath; import it.gov.innovazione.ndc.harvester.pathprocessors.SchemaPathProcessor; +import it.gov.innovazione.ndc.harvester.service.SemanticContentStatsService; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.extension.ExtendWith; import org.mockito.InjectMocks; @@ -26,6 +27,8 @@ class SchemaHarvesterTest { SchemaPathProcessor pathProcessor; @Mock ConfigService configService; + @Mock + SemanticContentStatsService semanticContentStatsService; @InjectMocks SchemaHarvester harvester; diff --git a/src/test/java/it/gov/innovazione/ndc/harvester/pathprocessors/ControlledVocabularyPathProcessorTest.java b/src/test/java/it/gov/innovazione/ndc/harvester/pathprocessors/ControlledVocabularyPathProcessorTest.java index ac32d60e..6466036e 100644 --- a/src/test/java/it/gov/innovazione/ndc/harvester/pathprocessors/ControlledVocabularyPathProcessorTest.java +++ b/src/test/java/it/gov/innovazione/ndc/harvester/pathprocessors/ControlledVocabularyPathProcessorTest.java @@ -8,6 +8,7 @@ import it.gov.innovazione.ndc.harvester.model.SemanticAssetModelFactory; import it.gov.innovazione.ndc.harvester.model.index.RightsHolder; import it.gov.innovazione.ndc.harvester.model.index.SemanticAssetMetadata; +import it.gov.innovazione.ndc.harvester.service.SemanticContentStatsService; import it.gov.innovazione.ndc.repository.SemanticAssetMetadataRepository; import it.gov.innovazione.ndc.repository.TripleStoreRepository; import it.gov.innovazione.ndc.service.VocabularyDataService; @@ -45,6 +46,8 @@ class ControlledVocabularyPathProcessorTest { @Mock SemanticAssetMetadataRepository metadataRepository; @Mock + SemanticContentStatsService semanticContentStatsService; + @Mock Model jenaModel; String baseUrl = "http://ndc"; diff --git a/src/test/java/it/gov/innovazione/ndc/harvester/pathprocessors/OntologyPathProcessorTest.java b/src/test/java/it/gov/innovazione/ndc/harvester/pathprocessors/OntologyPathProcessorTest.java index 59f14b09..8df7d2ea 100644 --- a/src/test/java/it/gov/innovazione/ndc/harvester/pathprocessors/OntologyPathProcessorTest.java +++ b/src/test/java/it/gov/innovazione/ndc/harvester/pathprocessors/OntologyPathProcessorTest.java @@ -4,6 +4,7 @@ import it.gov.innovazione.ndc.harvester.model.SemanticAssetModelFactory; import it.gov.innovazione.ndc.harvester.model.SemanticAssetPath; import it.gov.innovazione.ndc.harvester.model.index.SemanticAssetMetadata; +import it.gov.innovazione.ndc.harvester.service.SemanticContentStatsService; import it.gov.innovazione.ndc.repository.SemanticAssetMetadataRepository; import it.gov.innovazione.ndc.repository.TripleStoreRepository; import org.apache.jena.rdf.model.Resource; @@ -28,6 +29,8 @@ class OntologyPathProcessorTest { @Mock TripleStoreRepository repository; @Mock + SemanticContentStatsService semanticContentStatsService; + @Mock SemanticAssetMetadataRepository metadataRepository; @InjectMocks diff --git a/src/test/java/it/gov/innovazione/ndc/harvester/pathprocessors/SchemaPathProcessorTest.java b/src/test/java/it/gov/innovazione/ndc/harvester/pathprocessors/SchemaPathProcessorTest.java index b319d070..d13010c7 100644 --- a/src/test/java/it/gov/innovazione/ndc/harvester/pathprocessors/SchemaPathProcessorTest.java +++ b/src/test/java/it/gov/innovazione/ndc/harvester/pathprocessors/SchemaPathProcessorTest.java @@ -4,6 +4,7 @@ import it.gov.innovazione.ndc.harvester.model.SemanticAssetModelFactory; import it.gov.innovazione.ndc.harvester.model.SemanticAssetPath; import it.gov.innovazione.ndc.harvester.model.index.SemanticAssetMetadata; +import it.gov.innovazione.ndc.harvester.service.SemanticContentStatsService; import it.gov.innovazione.ndc.repository.SemanticAssetMetadataRepository; import it.gov.innovazione.ndc.repository.TripleStoreRepository; import org.apache.jena.rdf.model.ResourceFactory; @@ -30,7 +31,8 @@ class SchemaPathProcessorTest { SemanticAssetMetadataRepository metadataRepository; @Mock TripleStoreRepository repository; - + @Mock + SemanticContentStatsService semanticContentStatsService; @InjectMocks SchemaPathProcessor schemaPathProcessor; diff --git a/src/test/java/it/gov/innovazione/ndc/harvester/pathprocessors/SemanticAssetPathProcessorTest.java b/src/test/java/it/gov/innovazione/ndc/harvester/pathprocessors/SemanticAssetPathProcessorTest.java index 23a1003d..6bfe29cf 100644 --- a/src/test/java/it/gov/innovazione/ndc/harvester/pathprocessors/SemanticAssetPathProcessorTest.java +++ b/src/test/java/it/gov/innovazione/ndc/harvester/pathprocessors/SemanticAssetPathProcessorTest.java @@ -5,6 +5,7 @@ import it.gov.innovazione.ndc.harvester.model.SemanticAssetPath; import it.gov.innovazione.ndc.harvester.model.exception.InvalidModelException; import it.gov.innovazione.ndc.harvester.model.index.SemanticAssetMetadata; +import it.gov.innovazione.ndc.harvester.service.SemanticContentStatsService; import it.gov.innovazione.ndc.repository.SemanticAssetMetadataRepository; import it.gov.innovazione.ndc.repository.TripleStoreRepository; import it.gov.innovazione.ndc.repository.TripleStoreRepositoryException; @@ -56,6 +57,8 @@ protected void enrichModelBeforePersisting(OntologyModel model, SemanticAssetPat @Mock private Model model; @Mock + SemanticContentStatsService semanticContentStatsService; + @Mock private Consumer modelEnricher; @Test