Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: improved data integrity execution [DHIS2-16223] #15789

Merged
merged 9 commits into from
Nov 29, 2023
Original file line number Diff line number Diff line change
Expand Up @@ -27,10 +27,13 @@
*/
package org.hisp.dhis.dataintegrity;

import static java.util.Comparator.comparingLong;
import static java.util.stream.Collectors.joining;

import com.fasterxml.jackson.annotation.JsonIgnore;
import com.fasterxml.jackson.annotation.JsonProperty;
import java.io.Serializable;
import java.util.Comparator;
import java.util.function.Function;
import java.util.stream.Stream;
import javax.annotation.Nonnull;
Expand All @@ -54,25 +57,35 @@
@Builder
@AllArgsConstructor(access = AccessLevel.PRIVATE)
public final class DataIntegrityCheck implements Serializable {
@JsonProperty private final String name;

@JsonProperty private final String displayName;
public static final Comparator<DataIntegrityCheck> FAST_TO_SLOW =
comparingLong(DataIntegrityCheck::getExecutionTimeIndicator);

@JsonProperty private final String name;
@JsonProperty private final String displayName;
@JsonProperty private final String section;

@JsonProperty private final int sectionOrder;

@JsonProperty private final DataIntegritySeverity severity;

@JsonProperty private final String description;

@JsonProperty private final String introduction;

@JsonProperty private final String recommendation;

@JsonProperty private final String issuesIdType;

@JsonProperty private final boolean isSlow;
@JsonProperty private final boolean isProgrammatic;

private long executionTime;
private int executionCount;

public @JsonProperty Long getAverageExecutionTime() {
return executionTime <= 0L ? null : executionTime / executionCount;
}

@JsonIgnore
long getExecutionTimeIndicator() {
Long time = getAverageExecutionTime();
if (time != null) return time;
return isSlow ? Long.MAX_VALUE : 1000;
}

@JsonProperty
public String getCode() {
Expand All @@ -83,6 +96,12 @@ public String getCode() {

private final transient Function<DataIntegrityCheck, DataIntegrityDetails> runDetailsCheck;

public DataIntegrityCheck addExecution(long time) {
executionCount++;
executionTime += time;
return this;
}

/**
* Method that takes in a name of a {@link DataIntegrityCheck} and converts it to an acronym of
* its name using the first letter from each word e.g. my_data_integrity_check -> MDIC
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@
import org.hisp.dhis.scheduling.parameters.AggregateDataExchangeJobParameters;
import org.hisp.dhis.scheduling.parameters.AnalyticsJobParameters;
import org.hisp.dhis.scheduling.parameters.ContinuousAnalyticsJobParameters;
import org.hisp.dhis.scheduling.parameters.DataIntegrityDetailsJobParameters;
import org.hisp.dhis.scheduling.parameters.DataIntegrityJobParameters;
import org.hisp.dhis.scheduling.parameters.DataSynchronizationJobParameters;
import org.hisp.dhis.scheduling.parameters.DisableInactiveUsersJobParameters;
Expand Down Expand Up @@ -71,6 +72,7 @@ public enum JobType {
User defined jobs
*/
DATA_INTEGRITY(DataIntegrityJobParameters.class),
DATA_INTEGRITY_DETAILS(DataIntegrityDetailsJobParameters.class),
RESOURCE_TABLE(),
ANALYTICS_TABLE(AnalyticsJobParameters.class),
CONTINUOUS_ANALYTICS_TABLE(ContinuousAnalyticsJobParameters.class),
Expand Down Expand Up @@ -248,7 +250,7 @@ public boolean isUserDefined() {

public Map<String, String> getRelativeApiElements() {
return switch (this) {
case DATA_INTEGRITY -> Map.of("checks", "/api/dataIntegrity");
case DATA_INTEGRITY, DATA_INTEGRITY_DETAILS -> Map.of("checks", "/api/dataIntegrity");
case ANALYTICS_TABLE -> Map.of(
"skipTableTypes", "/api/analytics/tableTypes",
"skipPrograms", "/api/programs");
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
/*
* Copyright (c) 2004-2023, University of Oslo
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer.
*
* Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
* Neither the name of the HISP project nor the names of its contributors may
* be used to endorse or promote products derived from this software without
* specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
* ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
package org.hisp.dhis.scheduling.parameters;

import com.fasterxml.jackson.annotation.JsonProperty;
import com.fasterxml.jackson.dataformat.xml.annotation.JacksonXmlProperty;
import java.util.Set;
import lombok.AllArgsConstructor;
import lombok.Getter;
import lombok.NoArgsConstructor;
import lombok.Setter;
import org.hisp.dhis.common.DxfNamespaces;
import org.hisp.dhis.scheduling.JobParameters;

/**
* @author Jan Bernitt
*/
@Getter
@Setter
@NoArgsConstructor
@AllArgsConstructor
public class DataIntegrityDetailsJobParameters implements JobParameters {

@JsonProperty(required = false)
@JacksonXmlProperty(namespace = DxfNamespaces.DXF_2_0)
private Set<String> checks;
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
/*
* Copyright (c) 2004-2023, University of Oslo
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer.
*
* Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
* Neither the name of the HISP project nor the names of its contributors may
* be used to endorse or promote products derived from this software without
* specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
* ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
package org.hisp.dhis.dataintegrity;

import static org.junit.jupiter.api.Assertions.assertEquals;

import java.util.List;
import org.junit.jupiter.api.Test;

/**
* @author Jan Bernitt
*/
class DataIntegrityCheckTest {

@Test
void testSortingFastToSlow_NoExecutionTime() {
List<DataIntegrityCheck> checks =
List.of(
DataIntegrityCheck.builder().name("a").isSlow(true).build(),
DataIntegrityCheck.builder().name("b").isSlow(false).build(),
DataIntegrityCheck.builder().name("c").isSlow(true).build());
List<DataIntegrityCheck> actual =
checks.stream().sorted(DataIntegrityCheck.FAST_TO_SLOW).toList();
assertEquals(List.of("b", "a", "c"), actual.stream().map(DataIntegrityCheck::getName).toList());
}

@Test
void testSortingFastToSlow() {
List<DataIntegrityCheck> checks =
List.of(
DataIntegrityCheck.builder().name("a").isSlow(true).build().addExecution(12345),
DataIntegrityCheck.builder().name("b").isSlow(false).build().addExecution(20),
DataIntegrityCheck.builder().name("c").isSlow(true).build().addExecution(500));
List<DataIntegrityCheck> actual =
checks.stream().sorted(DataIntegrityCheck.FAST_TO_SLOW).toList();
assertEquals(List.of("b", "c", "a"), actual.stream().map(DataIntegrityCheck::getName).toList());
}

@Test
void testSortingFastToSlow_MixedExecutionTime() {
List<DataIntegrityCheck> checks =
List.of(
DataIntegrityCheck.builder().name("a").isSlow(true).build().addExecution(12345),
DataIntegrityCheck.builder().name("b").isSlow(false).build(),
DataIntegrityCheck.builder().name("c").isSlow(true).build().addExecution(500));
List<DataIntegrityCheck> actual =
checks.stream().sorted(DataIntegrityCheck.FAST_TO_SLOW).toList();
assertEquals(List.of("c", "b", "a"), actual.stream().map(DataIntegrityCheck::getName).toList());
}

@Test
void testAddExecution() {
DataIntegrityCheck check = DataIntegrityCheck.builder().name("a").build();

check.addExecution(1000).addExecution(1000); // sum is 2000, 2 times

assertEquals(1000L, check.getAverageExecutionTime());

check.addExecution(4000); // sum is now 6000, 3 times

assertEquals(2000L, check.getAverageExecutionTime());
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -491,7 +491,6 @@ private List<DataIntegrityIssue> getInvalidValidationRuleExpressions(
issues.add(toIssue(rule, i18n.getString(result.getKey())));
}
}

return issues;
}

Expand All @@ -508,11 +507,13 @@ private void registerNonDatabaseIntegrityCheck(
try {
Schema issueSchema = issueIdType == null ? null : schemaService.getDynamicSchema(issueIdType);
String issueIdTypeName = issueSchema == null ? null : issueSchema.getPlural();
checksByName.put(
checksByName.putIfAbsent(
name,
DataIntegrityCheck.builder()
.name(name)
.displayName(info.apply("name", name.replace('_', ' ')))
.isSlow(true)
.isProgrammatic(true)
.severity(
DataIntegritySeverity.valueOf(
info.apply("severity", DataIntegritySeverity.WARNING.name()).toUpperCase()))
Expand Down Expand Up @@ -942,7 +943,10 @@ private <T> void runDataIntegrityChecks(
progress.startingProcess("Data Integrity check");
progress.startingStage(stageDesc, checks.size(), SKIP_ITEM);
progress.runStage(
checks.stream().map(checksByName::get).filter(Objects::nonNull),
checks.stream()
.map(checksByName::get)
.filter(Objects::nonNull)
.sorted(DataIntegrityCheck.FAST_TO_SLOW),
DataIntegrityCheck::getDescription,
check -> {
Date startTime = new Date();
Expand All @@ -956,6 +960,7 @@ private <T> void runDataIntegrityChecks(
running.remove(check.getName());
}
if (res != null) {
check.addExecution(currentTimeMillis() - startTime.getTime());
cache.put(check.getName(), res);
}
});
Expand Down Expand Up @@ -1005,12 +1010,12 @@ private Set<String> getDefaultChecks() {

private void ensureConfigurationsAreLoaded() {
if (configurationsAreLoaded.compareAndSet(false, true)) {
// programmatic checks
initIntegrityChecks();

// load system-packaged data integrity checks
loadChecks(CLASS_PATH, "data-integrity-checks.yaml", "data-integrity-checks");

// programmatic checks
initIntegrityChecks();

// load user-packaged custom data integrity checks
try {
String dhis2Home = locationManager.getExternalDirectoryPath();
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
/*
* Copyright (c) 2004-2022, University of Oslo
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer.
*
* Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
* Neither the name of the HISP project nor the names of its contributors may
* be used to endorse or promote products derived from this software without
* specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
* ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
package org.hisp.dhis.dataintegrity.jobs;

import java.util.Set;
import lombok.AllArgsConstructor;
import org.hisp.dhis.dataintegrity.DataIntegrityService;
import org.hisp.dhis.scheduling.Job;
import org.hisp.dhis.scheduling.JobConfiguration;
import org.hisp.dhis.scheduling.JobProgress;
import org.hisp.dhis.scheduling.JobType;
import org.hisp.dhis.scheduling.parameters.DataIntegrityDetailsJobParameters;
import org.springframework.stereotype.Component;

/**
* @author Jan Bernitt
*/
@Component
@AllArgsConstructor
public class DataIntegrityDetailsJob implements Job {

private final DataIntegrityService dataIntegrityService;

@Override
public JobType getJobType() {
return JobType.DATA_INTEGRITY_DETAILS;
}

@Override
public void execute(JobConfiguration config, JobProgress progress) {
DataIntegrityDetailsJobParameters parameters =
(DataIntegrityDetailsJobParameters) config.getJobParameters();
Set<String> checks = parameters == null ? Set.of() : parameters.getChecks();

dataIntegrityService.runDetailsChecks(checks, progress);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,8 @@
import org.springframework.stereotype.Component;

/**
* @author Halvdan Hoem Grelland <[email protected]>
* @author Halvdan Hoem Grelland <[email protected]> (original)
* @author Jan Bernitt (refactored)
*/
@Component
@AllArgsConstructor
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,10 @@ default boolean getIsSlow() {
return getBoolean("isSlow").booleanValue();
}

default boolean getIsProgrammatic() {
return getBoolean("isProgrammatic").booleanValue();
}

default String getCode() {
return getString("code").string();
}
Expand Down
Loading
Loading