Skip to content

Commit

Permalink
upgrade : java 21, spring 6, spring boot 3, spring batch 5
Browse files Browse the repository at this point in the history
- switch to new cbioportal maf repo
- update docker to openjdk-21
- rather than upgrading databaseAnnotator, delete it due to:
	- infrequent use
	- testing challenges
	- difficult maintenance
- if databaseModule is desired by any users, an updated and tested version of the module can be re-introduced by a new PR.
- prevent JobParameters from containing null values (not allowed in spring batch 5)

Co-authored-by: Manda Wilson <[email protected]>
Co-authored-by: Robert Sheridan <[email protected]>
  • Loading branch information
mandawilson and sheridancbio committed Mar 22, 2024
1 parent 5d48eba commit fcc131a
Show file tree
Hide file tree
Showing 28 changed files with 84 additions and 1,517 deletions.
2 changes: 1 addition & 1 deletion .circleci/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ jobs:
build:
docker:
# specify the version you desire here
- image: cimg/openjdk:11.0.20-browsers
- image: cimg/openjdk:21.0.0-browsers

# Specify service dependencies here if necessary
# CircleCI maintains a library of pre-built images
Expand Down
5 changes: 3 additions & 2 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -1,18 +1,19 @@
FROM maven:3-openjdk-11
FROM openjdk:21-jdk-slim

ENV GN_HOME=/genome-nexus-annotation-pipeline
COPY . $GN_HOME
WORKDIR $GN_HOME

COPY annotationPipeline/src/main/resources/log4j.properties.EXAMPLE $GN_HOME/annotationPipeline/src/main/resources/log4j.properties
RUN apt-get update && apt-get install -y maven && apt-get clean;
# set log4j file in properties
RUN sed -i "s|log4j\.appender\.a\.File.*|log4j.appender.a.File = $GN_HOME/logs/genome-nexus-annotation-pipeline.log|" $GN_HOME/annotationPipeline/src/main/resources/log4j.properties

ARG mvnprofiles=''
RUN mvn -DskipTests clean install $mvnprofiles


FROM openjdk:11-slim
FROM openjdk:21-jdk-slim

ENV GN_HOME=/genome-nexus-annotation-pipeline

Expand Down
22 changes: 5 additions & 17 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -204,26 +204,14 @@ The output file was generated with:


## Direct Database Annotation
If you have data already loaded into a cBioPortal database but did not properly
annotate it or discovered issues later, you can use the `databaseAnnotator`
utility to fix it.

You will need to specify some database connection parameters inside the
`application.properties` file located in
`databaseAnnotator/src/main/resources`. Once this is done, build the project
using maven and run like so:

$JAVA_HOME/bin/java -jar databaseAnnotator/target/databaseAnnotator-*.jar \
--isoform <mskcc or uniprot>

As with the above tool, running the jar without any arguments or by providing
the optional parameter `-h` will bring up the full usage statement. You can
also specify a single study or set of studies to annotate by using the
`--studies` parameter.
There used to be a utility/module called databaseAnnotator which could be
used to annotate database records which were already loaded into a
cBioPortal database. This module was not being maintained and has been
removed (code is still reachable through git history).

## Annotator
The `annotator` module is the client code that makes calls to the Genome Nexus
server and interprets the response. The other two modules use this as a
server and interprets the response. The `annotationPipeline` module uses this as a
dependency.

## Updating the Genome Nexus Annotation Pipeline
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2016 Memorial Sloan-Kettering Cancer Center.
* Copyright (c) 2016, 2024 Memorial Sloan-Kettering Cancer Center.
*
* This library is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY, WITHOUT EVEN THE IMPLIED WARRANTY OF MERCHANTABILITY OR FITNESS
Expand Down Expand Up @@ -67,33 +67,40 @@ public class AnnotationPipeline {
private static final Logger LOG = LoggerFactory.getLogger(AnnotationPipeline.class);

private static void annotateJob(String[] args, String filename, String outputFilename, String outputFormat, String isoformOverride,
String errorReportLocation, boolean replace, String postIntervalSize, String stripMatchingBases, Boolean ignoreOriginalGenomicLocation, Boolean addOriginalGenomicLocation, Boolean noteColumn) throws Exception {
String errorReportLocation, boolean replace, String postIntervalSize, String stripMatchingBases,
Boolean ignoreOriginalGenomicLocation, Boolean addOriginalGenomicLocation, Boolean noteColumn) throws Exception {
SpringApplication app = new SpringApplication(AnnotationPipeline.class);
app.setWebApplicationType(WebApplicationType.NONE);
app.setAllowBeanDefinitionOverriding(Boolean.TRUE);
ConfigurableApplicationContext ctx = app.run(args);
JobLauncher jobLauncher = ctx.getBean(JobLauncher.class);

Job annotationJob = ctx.getBean(BatchConfiguration.ANNOTATION_JOB, Job.class);
JobParameters jobParameters = new JobParametersBuilder()
.addString("filename", filename)
.addString("outputFilename", outputFilename)
.addString("outputFormat", outputFormat)
.addString("replace", String.valueOf(replace))
.addString("isoformOverride", isoformOverride)
.addString("errorReportLocation", errorReportLocation)
.addString("postIntervalSize", postIntervalSize)
.addString("stripMatchingBases", stripMatchingBases)
.addString("ignoreOriginalGenomicLocation", String.valueOf(ignoreOriginalGenomicLocation))
.addString("addOriginalGenomicLocation", String.valueOf(addOriginalGenomicLocation))
.addString("noteColumn", String.valueOf(noteColumn))
.toJobParameters();
JobParametersBuilder jobParametersBuilder = new JobParametersBuilder();
addJobParameterIfValueIsNotNull(jobParametersBuilder, "filename", filename);
addJobParameterIfValueIsNotNull(jobParametersBuilder, "outputFilename", outputFilename);
addJobParameterIfValueIsNotNull(jobParametersBuilder, "outputFormat", outputFormat);
addJobParameterIfValueIsNotNull(jobParametersBuilder, "replace", String.valueOf(replace));
addJobParameterIfValueIsNotNull(jobParametersBuilder, "isoformOverride", isoformOverride);
addJobParameterIfValueIsNotNull(jobParametersBuilder, "errorReportLocation", errorReportLocation);
addJobParameterIfValueIsNotNull(jobParametersBuilder, "postIntervalSize", postIntervalSize);
addJobParameterIfValueIsNotNull(jobParametersBuilder, "stripMatchingBases", stripMatchingBases);
addJobParameterIfValueIsNotNull(jobParametersBuilder, "ignoreOriginalGenomicLocation", String.valueOf(ignoreOriginalGenomicLocation));
addJobParameterIfValueIsNotNull(jobParametersBuilder, "ignoreOriginalGenomicLocation", String.valueOf(addOriginalGenomicLocation));
addJobParameterIfValueIsNotNull(jobParametersBuilder, "noteColumn", String.valueOf(noteColumn));
JobParameters jobParameters = jobParametersBuilder.toJobParameters();
JobExecution jobExecution = jobLauncher.run(annotationJob, jobParameters);
if (!jobExecution.getExitStatus().equals(ExitStatus.COMPLETED)) {
System.exit(2);
}
}

// in Spring Batch 5.x, null valued JobParameters are not allowed. (java.lang.IllegalArgumentException: value must not be null)
private static void addJobParameterIfValueIsNotNull(JobParametersBuilder jobParametersBuilder, String key, String jobParameter) {
if (jobParameter != null && !jobParameter.trim().isEmpty()) {
jobParametersBuilder.addString(key, jobParameter);
}
}

public static void subMain(String[] args) throws NoSubcommandFoundException, ParseException, MergeFailedException, AnnotationFailedException {
boolean help = false;
for (String arg : args) {
Expand Down Expand Up @@ -219,7 +226,7 @@ private static void annotate(Subcommand subcommand, String[] args) throws Annota
subcommand.printHelp();
throw new AnnotationFailedException("required option: output-filename");
}
String outputFormat = null;
String outputFormat = "";
if (subcommand.hasOption("output-format")) {
String outputFormatFile = subcommand.getOptionValue("output-format");
if ("extended".equals(outputFormatFile)) {
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2016 Memorial Sloan-Kettering Cancer Center.
* Copyright (c) 2016 - 2024 Memorial Sloan-Kettering Cancer Center.
*
* This library is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY, WITHOUT EVEN THE IMPLIED WARRANTY OF MERCHANTABILITY OR FITNESS
Expand Down Expand Up @@ -36,37 +36,33 @@
import org.cbioportal.models.AnnotatedRecord;

import org.springframework.batch.core.*;
import org.springframework.batch.item.*;
import org.springframework.batch.core.configuration.annotation.*;
import org.springframework.context.annotation.*;
import org.springframework.batch.core.job.builder.JobBuilder;
import org.springframework.batch.core.repository.JobRepository;
import org.springframework.batch.core.step.builder.StepBuilder;
import org.springframework.batch.item.*;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.batch.core.configuration.annotation.StepScope;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.context.annotation.*;
import org.springframework.transaction.PlatformTransactionManager;

/**
* @author Zachary Heins
*/
@Configuration
@EnableBatchProcessing
@ComponentScan(basePackages="org.cbioportal.annotator")
public class BatchConfiguration
{
public static final String ANNOTATION_JOB = "annotationJob";

@Autowired
public JobBuilderFactory jobBuilderFactory;

@Autowired
public StepBuilderFactory stepBuilderFactory;

@Value("${chunk:1000000}")
private String chunk;
private String chunkSize;

@Bean
public Job annotationJob()
public Job annotationJob(JobRepository jobRepository, Step step)
{
return jobBuilderFactory.get(ANNOTATION_JOB)
.start(step())
return new JobBuilder(ANNOTATION_JOB, jobRepository)
.start(step)
.build();
}

Expand All @@ -76,10 +72,10 @@ public AnnotationUtil annotationUtil() {
}

@Bean
public Step step()
public Step step(JobRepository jobRepository, PlatformTransactionManager transactionManager)
{
return stepBuilderFactory.get("step")
.<AnnotatedRecord, String> chunk(Integer.parseInt(chunk))
return new StepBuilder("step", jobRepository)
.<AnnotatedRecord, String> chunk(Integer.parseInt(chunkSize), transactionManager)
.reader(reader())
.processor(processor())
.writer(writer())
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2016 - 2020 Memorial Sloan-Kettering Cancer Center.
* Copyright (c) 2016 - 2024 Memorial Sloan-Kettering Cancer Center.
*
* This library is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY, WITHOUT EVEN THE IMPLIED WARRANTY OF MERCHANTABILITY OR FITNESS
Expand Down Expand Up @@ -66,7 +66,7 @@ public class MutationRecordReader implements ItemStreamReader<AnnotatedRecord> {
private String filename;

@Value("#{jobParameters[replace]}")
private boolean replace;
private Boolean replace;

@Value("#{jobParameters[isoformOverride]}")
private String isoformOverride;
Expand Down Expand Up @@ -115,7 +115,7 @@ public void open(ExecutionContext ec) throws ItemStreamException {
allAnnotatedRecords = annotator.annotateRecordsUsingGET(summaryStatistics, mutationRecords, isoformOverride, replace, true, stripMatchingBases, ignoreOriginalGenomicLocation, addOriginalGenomicLocation, noteColumn);
}
// if output-format option is supplied, we only need to convert its data into header
if (outputFormat != null) {
if (outputFormat != null && !outputFormat.equals("")) {
if ("extended".equals(outputFormat)) {
header.addAll(ExtendedMafFormat.headers);
} else if ("minimal".equals(outputFormat)) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,7 @@ public void close() throws ItemStreamException {
}

@Override
public void write(List<? extends String> items) throws Exception {
public void write(Chunk<? extends String> items) throws Exception {
if (recordsToWriteCount > 0) {
flatFileItemWriter.write(items);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,6 @@ public void check_if_maf_file_still_the_same_when_annotating_with_uniprot_transc
.addString("outputFilename", actualFile)
.addString("replace", String.valueOf(true))
.addString("isoformOverride", "uniprot")
.addString("errorReportLocation", null)
.toJobParameters();
testWith(jobParameters, expectedFile, actualFile);
}
Expand All @@ -87,7 +86,6 @@ public void check_if_maf_file_still_the_same_when_annotating_with_mskcc_transcri
.addString("outputFilename", actualFile)
.addString("replace", String.valueOf(true))
.addString("isoformOverride", "mskcc")
.addString("errorReportLocation", null)
.toJobParameters();
testWith(jobParameters, expectedFile, actualFile);
}
Expand All @@ -104,7 +102,6 @@ public void check_if_minimal_example_maf_file_still_the_same_when_annotating_wit
.addString("outputFilename", actualFile)
.addString("replace", String.valueOf(true))
.addString("isoformOverride", "uniprot")
.addString("errorReportLocation", null)
.toJobParameters();
testWith(jobParameters, expectedFile, actualFile);
}
Expand All @@ -121,7 +118,6 @@ public void check_if_corner_cases_example_maf_file_still_the_same_when_annotatin
.addString("outputFilename", actualFile)
.addString("replace", String.valueOf(true))
.addString("isoformOverride", "uniprot")
.addString("errorReportLocation", null)
.toJobParameters();
testWith(jobParameters, expectedFile, actualFile);
}
Expand All @@ -138,7 +134,6 @@ public void check_if_corner_cases_example_maf_file_still_the_same_when_annotatin
.addString("outputFilename", actualFile)
.addString("replace", String.valueOf(true))
.addString("isoformOverride", "mskcc")
.addString("errorReportLocation", null)
.toJobParameters();
testWith(jobParameters, expectedFile, actualFile);
}
Expand All @@ -155,7 +150,6 @@ public void check_if_corner_cases_example_maf_file_still_the_same_when_annotatin
.addString("outputFilename", actualFile)
.addString("replace", String.valueOf(true))
.addString("isoformOverride", "uniprot")
.addString("errorReportLocation", null)
.toJobParameters();
testWith(jobParameters, expectedFile, actualFile);
}
Expand All @@ -172,7 +166,6 @@ public void check_if_corner_cases_example_maf_file_still_the_same_when_annotatin
.addString("outputFilename", actualFile)
.addString("replace", String.valueOf(true))
.addString("isoformOverride", "mskcc")
.addString("errorReportLocation", null)
.toJobParameters();
testWith(jobParameters, expectedFile, actualFile);
}
Expand All @@ -194,7 +187,6 @@ public void run_vcf2maf_test_case_mskcc() throws Exception {
.addString("outputFilename", actualFile)
.addString("replace", String.valueOf(true))
.addString("isoformOverride", "mskcc")
.addString("errorReportLocation", null)
.toJobParameters();
testWith(jobParameters, expectedFile, actualFile);
}
Expand All @@ -216,7 +208,6 @@ public void run_vcf2maf_test_case_uniprot() throws Exception {
.addString("outputFilename", actualFile)
.addString("replace", String.valueOf(true))
.addString("isoformOverride", "uniprot")
.addString("errorReportLocation", null)
.toJobParameters();
testWith(jobParameters, expectedFile, actualFile);
}
Expand All @@ -233,7 +224,6 @@ public void check_if_my_variant_info_provides_gnomad_annotations() throws Except
.addString("outputFilename", actualFile)
.addString("replace", String.valueOf(true))
.addString("isoformOverride", "uniprot")
.addString("errorReportLocation", null)
.toJobParameters();
testWith(jobParameters, expectedFile, actualFile);
}
Expand All @@ -250,7 +240,6 @@ public void check_if_nucleotide_context_provides_Ref_Tri_and_Var_Tri_columns() t
.addString("outputFilename", actualFile)
.addString("replace", String.valueOf(true))
.addString("isoformOverride", "uniprot")
.addString("errorReportLocation", null)
.toJobParameters();
testWith(jobParameters, expectedFile, actualFile);
}
Expand All @@ -268,7 +257,6 @@ public void test_output_format_extended() throws Exception {
.addString("outputFormat", "extended")
.addString("replace", String.valueOf(true))
.addString("isoformOverride", "uniprot")
.addString("errorReportLocation", null)
.addString("postIntervalSize", String.valueOf(-1))
.toJobParameters();
testWith(jobParameters, expectedFile, actualFile);
Expand All @@ -287,7 +275,6 @@ public void test_output_format_minimal() throws Exception {
.addString("outputFormat", "minimal")
.addString("replace", String.valueOf(true))
.addString("isoformOverride", "uniprot")
.addString("errorReportLocation", null)
.addString("postIntervalSize", String.valueOf(-1))
.toJobParameters();
testWith(jobParameters, expectedFile, actualFile);
Expand All @@ -310,7 +297,6 @@ public void test_output_format_with_formatFileHeaders() throws Exception {
.addString("outputFormat", "Hugo_Symbol,Entrez_Gene_Id,Center,NCBI_Build,Chromosome,Annotation_Status")
.addString("replace", String.valueOf(true))
.addString("isoformOverride", "uniprot")
.addString("errorReportLocation", null)
.addString("postIntervalSize", String.valueOf(-1))
.toJobParameters();
testWith(jobParameters, expectedFile, actualFile);
Expand All @@ -328,7 +314,6 @@ public void test_if_output_contains_original_genomic_location() throws Exception
.addString("outputFilename", actualFile)
.addString("replace", String.valueOf(true))
.addString("isoformOverride", "mskcc")
.addString("errorReportLocation", null)
.addString("postIntervalSize", String.valueOf(-1))
.addString("addOriginalGenomicLocation", String.valueOf(true))
.toJobParameters();
Expand All @@ -347,7 +332,6 @@ public void test_strip_off_first_matching_base() throws Exception {
.addString("outputFilename", actualFile)
.addString("replace", String.valueOf(true))
.addString("isoformOverride", "mskcc")
.addString("errorReportLocation", null)
.addString("postIntervalSize", String.valueOf(-1))
.addString("stripMatchingBases", "first")
.toJobParameters();
Expand All @@ -366,7 +350,6 @@ public void test_no_strip_off_matching_bases() throws Exception {
.addString("outputFilename", actualFile)
.addString("replace", String.valueOf(true))
.addString("isoformOverride", "mskcc")
.addString("errorReportLocation", null)
.addString("postIntervalSize", String.valueOf(-1))
.addString("stripMatchingBases", "none")
.toJobParameters();
Expand All @@ -385,7 +368,6 @@ public void test_ignore_original_genomic_location_in_input() throws Exception {
.addString("outputFilename", actualFile)
.addString("replace", String.valueOf(true))
.addString("isoformOverride", "mskcc")
.addString("errorReportLocation", null)
.addString("postIntervalSize", String.valueOf(-1))
.addString("stripMatchingBases", "none")
.addString("ignoreOriginalGenomicLocation", String.valueOf(true))
Expand All @@ -404,4 +386,4 @@ private void testWith(JobParameters jobParameters, String expectedPath, String a
assertEquals("COMPLETED", actualJobExitStatus.getExitCode());
AssertFile.assertFileEquals(expectedResult, actualResult);
}
}
}
Loading

0 comments on commit fcc131a

Please sign in to comment.