Skip to content

Commit

Permalink
feat: adding more protobuf files and generated code (#49)
Browse files Browse the repository at this point in the history
  • Loading branch information
holtgrewe authored Jan 30, 2024
1 parent 1cc2b6d commit fca2ec0
Show file tree
Hide file tree
Showing 38 changed files with 16,879 additions and 2 deletions.
20 changes: 18 additions & 2 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -69,11 +69,27 @@ PROTO_BASE := https://raw.githubusercontent.com/varfish-org/annonars/main

.PHONY: proto-fetch
proto-fetch:
mkdir -p protos/annonars/{genes,clinvar}
wget -O protos/annonars/genes/base.proto $(PROTO_BASE)/protos/annonars/genes/base.proto
mkdir -p protos/annonars/{clinvar,cons,dbsnp,functional,genes,gnomad,helixmtdb,regions}
wget -O protos/annonars/clinvar/minimal.proto $(PROTO_BASE)/protos/annonars/clinvar/minimal.proto
wget -O protos/annonars/clinvar/per_gene.proto $(PROTO_BASE)/protos/annonars/clinvar/per_gene.proto
wget -O protos/annonars/clinvar/sv.proto $(PROTO_BASE)/protos/annonars/clinvar/sv.proto
wget -O protos/annonars/cons/base.proto $(PROTO_BASE)/protos/annonars/cons/base.proto
wget -O protos/annonars/dbsnp/base.proto $(PROTO_BASE)/protos/annonars/dbsnp/base.proto
wget -O protos/annonars/functional/refseq.proto $(PROTO_BASE)/protos/annonars/functional/refseq.proto
wget -O protos/annonars/gnomad/exac_cnv.proto $(PROTO_BASE)/protos/annonars/gnomad/exac_cnv.proto
wget -O protos/annonars/gnomad/gnomad2.proto $(PROTO_BASE)/protos/annonars/gnomad/gnomad2.proto
wget -O protos/annonars/gnomad/gnomad3.proto $(PROTO_BASE)/protos/annonars/gnomad/gnomad3.proto
wget -O protos/annonars/gnomad/gnomad4.proto $(PROTO_BASE)/protos/annonars/gnomad/gnomad4.proto
wget -O protos/annonars/gnomad/gnomad_cnv4.proto $(PROTO_BASE)/protos/annonars/gnomad/gnomad_cnv4.proto
wget -O protos/annonars/gnomad/gnomad_sv2.proto $(PROTO_BASE)/protos/annonars/gnomad/gnomad_sv2.proto
wget -O protos/annonars/gnomad/gnomad_sv4.proto $(PROTO_BASE)/protos/annonars/gnomad/gnomad_sv4.proto
wget -O protos/annonars/gnomad/mtdna.proto $(PROTO_BASE)/protos/annonars/gnomad/mtdna.proto
wget -O protos/annonars/gnomad/vep_common.proto $(PROTO_BASE)/protos/annonars/gnomad/vep_common.proto
wget -O protos/annonars/gnomad/vep_gnomad2.proto $(PROTO_BASE)/protos/annonars/gnomad/vep_gnomad2.proto
wget -O protos/annonars/gnomad/vep_gnomad3.proto $(PROTO_BASE)/protos/annonars/gnomad/vep_gnomad3.proto
wget -O protos/annonars/gnomad/vep_gnomad4.proto $(PROTO_BASE)/protos/annonars/gnomad/vep_gnomad4.proto
wget -O protos/annonars/helixmtdb/base.proto $(PROTO_BASE)/protos/annonars/helixmtdb/base.proto
wget -O protos/annonars/regions/clingen.proto $(PROTO_BASE)/protos/annonars/regions/clingen.proto

.PHONY: proto-ts
proto-ts:
Expand Down
29 changes: 29 additions & 0 deletions protos/annonars/cons/base.proto
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
syntax = "proto3";

package annonars.cons.base;

// List of `Record`s.
message RecordList {
// The records in the list.
repeated Record records = 1;
}

// Protocol buffer for the UCSC conservation record.
message Record {
// Chromosome name.
string chrom = 1;
// 1-based, inclusive start position.
int32 start = 2;
// 1-based, inclusive stop position.
int32 stop = 3;
// HGNC identifier.
string hgnc_id = 4;
// ENST identifier.
string enst_id = 5;
// Exon number (1-based).
int32 exon_num = 6;
// Exon count.
int32 exon_count = 7;
// Alignment.
string alignment = 8;
}
17 changes: 17 additions & 0 deletions protos/annonars/dbsnp/base.proto
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
syntax = "proto3";

package annonars.dbsnp.base;

// Protocol buffer for the dbSNP VCF record.
message Record {
// Chromosome name.
string chrom = 1;
// 1-based start position.
int32 pos = 2;
// Reference allele.
string ref_allele = 3;
/// Alternate allele.
string alt_allele = 4;
/// The rs ID.
int32 rs_id = 5;
}
Empty file.
101 changes: 101 additions & 0 deletions protos/annonars/functional/refseq.proto
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
// Protocolbuffers definition for RefSeq functional elements.

syntax = "proto3";

package annonars.functional.refseq;

// Enumeration for `gbkey` field.
enum Category {
// unknown
CATEGORY_UNKNOWN = 0;
// Miscaellaneous feature.
CATEGORY_MISC_FEATURE = 1;
// Miscaellaneous recombination feature.
CATEGORY_MISC_RECOMB = 2;
// Miscaellaneous structure.
CATEGORY_MISC_STRUCTURE = 3;
// Mobile element.
CATEGORY_MOBILE_ELEMENT = 4;
// Protein binding annotation.
CATEGORY_PROTEIN_BIND = 5;
// Region.
CATEGORY_REGION = 6;
// Regulatory element.
CATEGORY_REGULATORY = 7;
// Repeat region
CATEGORY_REPEAT_REGION = 8;
// Replication origin.
CATEGORY_REP_ORIGIN = 9;
}

// Enumeration for `regulatory_class` field.
enum RegulatoryClass {
// unknown
REGULATORY_UNKNOWN = 0;
// CCAAT signal.
REGULATORY_CLASS_CAAT_SIGNAL = 1;
// DNase I hypersensitive site.
REGULATORY_CLASS_DNASE_I_HYPERSENSITIVE_SITE = 2;
// Enhancer.
REGULATORY_CLASS_ENHANCER = 3;
// Enhancer blocking element.
REGULATORY_CLASS_ENHANCER_BLOCKING_ELEMENT = 4;
// Epigenetically modified region.
REGULATORY_CLASS_EPIGENETICALLY_MODIFIED_REGION = 5;
// GC signal.
REGULATORY_CLASS_GC_SIGNAL = 6;
// Imprinting control region.
REGULATORY_CLASS_IMPRINTING_CONTROL_REGION = 7;
// Insulator.
REGULATORY_CLASS_INSULATOR = 8;
// Locus control region.
REGULATORY_CLASS_LOCUS_CONTROL_REGION = 9;
// Matrix attachment region.
REGULATORY_CLASS_MATRIX_ATTACHMENT_REGION = 10;
// Micrococcal nuclease hypersensitive site.
REGULATORY_CLASS_MICROCOCCAL_NUCLEASE_HYPERSENSITIVE_SITE = 11;
// Promoter.
REGULATORY_CLASS_PROMOTER = 12;
// Replication regulatory region.
REGULATORY_CLASS_REPLICATION_REGULATORY_REGION = 13;
// Response element.
REGULATORY_CLASS_RESPONSE_ELEMENT = 14;
// Silencer.
REGULATORY_CLASS_SILENCER = 15;
// TATA box.
REGULATORY_CLASS_TATA_BOX = 16;
// Transcriptional cis regulatory region.
REGULATORY_CLASS_TRANSCRIPTIONAL_CIS_REGULATORY_REGION = 17;
}

// Message for storing a RefSeq regulatory element.
message Record {
// Chromosome
string chromosome = 1;
// Start position (1-based)
int32 start = 2;
// Stop position (1-based)
int32 stop = 3;

// ID
string id = 4;
// Dbxref
string dbxref = 5;

// Category of record.
Category category = 6;
// Regulatory class of record.
optional RegulatoryClass regulatory_class = 7;
// note
optional string note = 8;
// experiment
optional string experiment = 9;
// function
optional string function = 10;
}

// Message for storing multiple Gbxref IDs.
message DbxrefList {
// Dbxref IDs.
repeated string dbxref = 1;
}
52 changes: 52 additions & 0 deletions protos/annonars/gnomad/exac_cnv.proto
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
// Protocol buffers for representing ExAC CNV data.

syntax = "proto3";

package annonars.gnomad.exac_cnv;

// Enumeration for the CNV type.
enum CnvType {
// unknown
CNV_TYPE_UNKNOWN = 0;
// Deletion.
CNV_TYPE_DEL = 1;
// Duplication.
CNV_TYPE_DUP = 2;
}

// ExAC population.
enum Population {
// unknown
POPULATION_UNKNOWN = 0;
// African
POPULATION_AFR = 1;
// Ad Mixed American
POPULATION_AMR = 2;
// East Asian
POPULATION_EAS = 3;
// Finnish
POPULATION_FIN = 4;
// Non-Finnish European
POPULATION_NFE = 5;
// South Asian
POPULATION_SAS = 6;
// Other,
POPULATION_OTHER = 7;
}

// Protocol buffer for the ExAC CNV data.
//
// The more specialized fields from the INFO column are stored in separate, optional fields such
// that we don't end up with a humongous message.
message Record {
// Chromosome name.
string chrom = 1;
// 1-based start position.
int32 start = 2;
// 1-based stop position.
int32 stop = 3;
/// The type of the variant.
CnvType sv_type = 4;
/// The population.
Population population = 5;
}
Loading

0 comments on commit fca2ec0

Please sign in to comment.