Skip to content

Commit

Permalink
Merge pull request #126 from cyenyxe/feature/filter-variants-by-fileid
Browse files Browse the repository at this point in the history
EVA-880 Reduce chance of timeouts in (re-)annotation search, restricts it by file (being loaded) in addition to the study
  • Loading branch information
Cristina Yenyxe Gonzalez Garcia authored and cyenyxe committed Sep 23, 2017
2 parents 22a3c07 + 2ab10e6 commit 9154295
Show file tree
Hide file tree
Showing 3 changed files with 48 additions and 38 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ public VariantsMongoReader variantsMongoReader(MongoOperations mongoOperations,
annotationParameters.getVepVersion(),
annotationParameters.getVepCacheVersion(),
inputParameters.getStudyId(),
inputParameters.getVcfId(),
excludeAnnotated);
variantsMongoReader.setSaveState(false);
return variantsMongoReader;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -54,22 +54,34 @@ public class VariantsMongoReader

private static final String STUDY_KEY = VariantDocument.FILES_FIELD + "." + VariantSourceEntryMongo.STUDYID_FIELD;

private static final String FILE_KEY = VariantDocument.FILES_FIELD + "." + VariantSourceEntryMongo.FILEID_FIELD;

/**
* @param studyId Can be the empty string or null, meaning to bring all non-annotated variants in the collection.
* If the studyId string is not empty, bring only non-annotated variants from that study.
* If the studyId string is not empty, bring only non-annotated variants from that study.
* @param fileId File identifier that it is checked inside a study. If the study identifier is not defined, the
* file is ignored. This is mainly due to performance reasons.
* Can be the empty string or null, meaning to bring all non-annotated variants in a study.
* If the studyId string is not empty, bring only non-annotated variants from that study and file.
* @param excludeAnnotated bring only non-annotated variants.
*/
public VariantsMongoReader(MongoOperations mongoOperations, String collectionVariantsName, String vepVersion,
String vepCacheVersion, String studyId, boolean excludeAnnotated) {
String vepCacheVersion, String studyId, String fileId, boolean excludeAnnotated) {
setName(ClassUtils.getShortName(VariantsMongoReader.class));
delegateReader = new MongoDbCursorItemReader();
delegateReader.setTemplate(mongoOperations);
delegateReader.setCollection(collectionVariantsName);

BasicDBObjectBuilder queryBuilder = BasicDBObjectBuilder.start();

if (studyId != null && !studyId.isEmpty()) {
queryBuilder.add(STUDY_KEY, studyId);

if (fileId != null && !fileId.isEmpty()) {
queryBuilder.add(FILE_KEY, fileId);
}
}

if (excludeAnnotated) {
BasicDBObject exists = new BasicDBObject("$exists", 1);
BasicDBObject annotationSubdocument = new BasicDBObject(VariantAnnotation.SO_ACCESSION_FIELD, exists)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,8 @@ public class VariantsMongoReaderTest {

private static final String COLLECTION_VARIANTS_NAME = "variants";

private static final int EXPECTED_NO_VARIANTS = 0;

private static final int EXPECTED_NON_ANNOTATED_VARIANTS_IN_STUDY = 1;

private static final int EXPECTED_NON_ANNOTATED_VARIANTS_IN_DB = 2;
Expand All @@ -65,7 +67,9 @@ public class VariantsMongoReaderTest {

private static final String STUDY_ID = "7";

private static final String ALL_STUDIES = "";
private static final String FILE_ID = "5";

private static final String ALL_IDS = "";

private static final String VEP_VERSION = "78";

Expand All @@ -82,73 +86,65 @@ public class VariantsMongoReaderTest {

@Test
public void shouldReadVariantsWithoutAnnotationFieldInAStudy() throws Exception {
checkNonAnnotatedVariantsRead(EXPECTED_NON_ANNOTATED_VARIANTS_IN_STUDY, STUDY_ID);
checkVariantsRead(EXPECTED_NON_ANNOTATED_VARIANTS_IN_STUDY, STUDY_ID, FILE_ID, true);
}

@Test
public void shouldReadVariantsWithoutAnnotationFieldInAllStudies() throws Exception {
checkNonAnnotatedVariantsRead(EXPECTED_NON_ANNOTATED_VARIANTS_IN_DB, ALL_STUDIES);
checkVariantsRead(EXPECTED_NON_ANNOTATED_VARIANTS_IN_DB, ALL_IDS, ALL_IDS, true);
}

@Test
public void shouldReadVariantsWithoutAnnotationFieldInAllStudiesWhenNoStudySpecified() throws Exception {
checkNonAnnotatedVariantsRead(EXPECTED_NON_ANNOTATED_VARIANTS_IN_DB, null);
checkVariantsRead(EXPECTED_NON_ANNOTATED_VARIANTS_IN_DB, null, null, true);
}

private void checkNonAnnotatedVariantsRead(int expectedNonAnnotatedVariants, String study) throws Exception {
ExecutionContext executionContext = MetaDataInstanceFactory.createStepExecution().getExecutionContext();
String databaseName = mongoRule.createDBAndInsertDocuments(COLLECTION_VARIANTS_NAME, Arrays.asList(
VariantData.getVariantWithAnnotation(),
VariantData.getVariantWithoutAnnotation(),
VariantData.getVariantWithoutAnnotationOtherStudy()));

MongoOperations mongoOperations = MongoConfiguration.getMongoOperations(databaseName, mongoConnection,
mongoMappingContext);

boolean excludeAnnotated = true;
VariantsMongoReader mongoItemReader = new VariantsMongoReader(
mongoOperations, COLLECTION_VARIANTS_NAME, VEP_VERSION, VEP_CACHE_VERSION, study, excludeAnnotated);
mongoItemReader.open(executionContext);

int itemCount = 0;
EnsemblVariant ensemblVariant;
while ((ensemblVariant = mongoItemReader.read()) != null) {
itemCount++;
assertFalse(ensemblVariant.getChr().isEmpty());
assertNotEquals(0, ensemblVariant.getStart());
}
assertEquals(expectedNonAnnotatedVariants, itemCount);
mongoItemReader.close();
@Test
public void shouldReadVariantsInAStudy() throws Exception {
checkVariantsRead(EXPECTED_VARIANTS_IN_STUDY, STUDY_ID, FILE_ID, false);
}

@Test
public void shouldReadVariantsInAStudy() throws Exception {
checkAllVariantsRead(EXPECTED_VARIANTS_IN_STUDY, STUDY_ID);
public void shouldReadVariantsInAStudyWhenNoFileSpecified() throws Exception {
checkVariantsRead(EXPECTED_VARIANTS_IN_STUDY, STUDY_ID, null, false);
}

@Test
public void shouldReadVariantsInAllStudies() throws Exception {
checkAllVariantsRead(EXPECTED_VARIANTS_IN_DB, ALL_STUDIES);
checkVariantsRead(EXPECTED_VARIANTS_IN_DB, ALL_IDS, ALL_IDS, false);
}

@Test
public void shouldReadVariantsInAllStudiesWhenNoStudySpecified() throws Exception {
checkAllVariantsRead(EXPECTED_VARIANTS_IN_DB, null);
checkVariantsRead(EXPECTED_VARIANTS_IN_DB, null, null, false);
checkVariantsRead(EXPECTED_VARIANTS_IN_DB, null, FILE_ID, false);
}

private void checkAllVariantsRead(int expectedVariants, String study) throws Exception {
@Test
public void shouldNotReadVariantsWhenStudyDoesNotExist() throws Exception {
checkVariantsRead(EXPECTED_NO_VARIANTS, "nonExistingStudy", null, false);
}

@Test
public void shouldNotReadVariantsInAStudyWhenFileDoesNotExist() throws Exception {
checkVariantsRead(EXPECTED_NO_VARIANTS, STUDY_ID, "nonExistingFile", false);
}


private void checkVariantsRead(int expectedVariants, String study, String file, boolean excludeAnnotated)
throws Exception {
ExecutionContext executionContext = MetaDataInstanceFactory.createStepExecution().getExecutionContext();
String databaseName = mongoRule.createDBAndInsertDocuments(COLLECTION_VARIANTS_NAME, Arrays.asList(
VariantData.getVariantWithAnnotation(),
VariantData.getVariantWithoutAnnotation(),
VariantData.getVariantWithoutAnnotationOtherStudy()));

MongoOperations mongoOperations = MongoConfiguration.getMongoOperations(databaseName, mongoConnection,
mongoMappingContext);
mongoMappingContext);

boolean excludeAnnotated = false;
VariantsMongoReader mongoItemReader = new VariantsMongoReader(
mongoOperations, COLLECTION_VARIANTS_NAME, VEP_VERSION, VEP_CACHE_VERSION, study, excludeAnnotated);
mongoOperations, COLLECTION_VARIANTS_NAME, VEP_VERSION, VEP_CACHE_VERSION, study, file,
excludeAnnotated);
mongoItemReader.open(executionContext);

int itemCount = 0;
Expand All @@ -161,4 +157,5 @@ private void checkAllVariantsRead(int expectedVariants, String study) throws Exc
assertEquals(expectedVariants, itemCount);
mongoItemReader.close();
}

}

0 comments on commit 9154295

Please sign in to comment.