diff --git a/deploy.py b/deploy.py index ae132cc5a6..998fc66107 100755 --- a/deploy.py +++ b/deploy.py @@ -207,7 +207,7 @@ def generate_configs(from_live=False): generate_config(helm_chart, 'templates/loculus-website-config.yaml', runtime_config_path, codespace_name, from_live) ingest_configmap_path = TEMP_DIR / 'config.yaml' - ingest_template_path = 'templates/loculus-ingest-config.yaml' + ingest_template_path = 'templates/ingest-config.yaml' ingest_configout_path = TEMP_DIR / 'ingest-config.yaml' generate_config(helm_chart, ingest_template_path, ingest_configmap_path, codespace_name, from_live, ingest_configout_path) diff --git a/docs/src/content/docs/guides/getting-started.md b/docs/src/content/docs/guides/getting-started.md index 18091fa44c..6888c75585 100644 --- a/docs/src/content/docs/guides/getting-started.md +++ b/docs/src/content/docs/guides/getting-started.md @@ -25,14 +25,13 @@ Helm is a package manager for Kubernetes that simplifies the deployment and mana To deploy Loculus, you'll need to have Helm installed. Helm will be used to manage the dependencies and deploy the Loculus application using the provided Helm chart. - ## External Database By default, the provided Helm chart will create temporary databases for testing and development purposes. These temporary databases are suitable for initial setup and experimentation. However, for a production deployment, you must use a permanent database. We recommend using a managed database service like Amazon RDS, Google Cloud SQL, or DigitalOcean Managed Databases, or you can run your own database server, but you must not use the built in database for production. -To use an external database, you'll need to provide the necessary connection details, such as the database URL, username, and password. +To use an external database, you'll need to provide the necessary connection details, such as the database URL, username, and password. These details are configured in the `secrets` section of the `values.yaml` file. @@ -53,8 +52,8 @@ secrets: password: "unsecure" port: "5432" ``` -You can also use sealed secrets, see the [Sealed Secrets](#sealed-secrets) section for more information. +You can also use sealed secrets, see the [Sealed Secrets](#sealed-secrets) section for more information. ## Clone the repository @@ -107,7 +106,7 @@ organisms: displayName: INSDC accession customDisplay: type: link - url: "https://www.ncbi.nlm.nih.gov/nuccore/{{value}}" + url: "https://www.ncbi.nlm.nih.gov/nuccore/__value__" website: tableColumns: - country @@ -142,9 +141,13 @@ Additionally, the `tableColumns` section defines which metadata fields are shown You can add multiple organisms under the organisms section, each with its own unique configuration. ## Secrets + Our secrets configuration supports three types of secrets. + ### `raw` + This is the simplest type of secret, it is just a key value pair. + ```yaml secrets: database: @@ -154,8 +157,11 @@ secrets: username: "postgres" password: "password" ``` + ### `sealedsecret` + This is a sealed secret, it is encrypted and can only be decrypted by the cluster. + ```yaml secrets: database: @@ -168,7 +174,9 @@ secrets: ``` ### `autogen` + This is a secret that is automatically generated by the helm chart. + ```yaml secrets: secretKey: diff --git a/ingest/Snakefile b/ingest/Snakefile index 3c0a6a610a..988d827de2 100644 --- a/ingest/Snakefile +++ b/ingest/Snakefile @@ -149,9 +149,10 @@ rule get_previous_submissions: hashes="results/previous_submissions.json", params: log_level=LOG_LEVEL, + sleep=config["post_start_sleep"], shell: """ - sleep 120 # Run only once keycloak is up and database wiped + sleep {params.sleep} python scripts/call_loculus.py \ --mode get-submitted \ --config-file {input.config} \ diff --git a/ingest/config/defaults.yaml b/ingest/config/defaults.yaml index 4c658a7843..e76d30312f 100644 --- a/ingest/config/defaults.yaml +++ b/ingest/config/defaults.yaml @@ -1,5 +1,6 @@ # Values here are defaults for the `config` variable in the Snakefile # Purpose is to keep the `values.yaml` config file clean +post_start_sleep: 0 log_level: DEBUG compound_country_field: ncbi_geo_location fasta_id_field: genbank_accession @@ -32,6 +33,7 @@ keep: - ncbi_virus_name - ncbi_virus_tax_id - sequence_md5 + - genbank_accession all_fields: - accession - bioprojects diff --git a/ingest/scripts/prepare_metadata.py b/ingest/scripts/prepare_metadata.py index 5b3debaa94..d6634d4d80 100644 --- a/ingest/scripts/prepare_metadata.py +++ b/ingest/scripts/prepare_metadata.py @@ -100,7 +100,10 @@ def main(config_file: str, input: str, sequence_hashes: str, output: str, log_le # Calculate overall hash of metadata + sequence for record in metadata: - sequence_hash = sequence_hashes.get(record[config.rename[config.fasta_id_field]], "") + fasta_id_field = config.fasta_id_field + if config.fasta_id_field in config.rename: + fasta_id_field = config.rename[config.fasta_id_field] + sequence_hash = sequence_hashes.get(record[fasta_id_field], "") if sequence_hash == "": raise ValueError(f"No hash found for {record[config.fasta_id_field]}") @@ -109,7 +112,7 @@ def main(config_file: str, input: str, sequence_hashes: str, output: str, log_le record["hash"] = hashlib.md5(prehash.encode()).hexdigest() - meta_dict = {rec[config.rename[config.fasta_id_field]]: rec for rec in metadata} + meta_dict = {rec[fasta_id_field]: rec for rec in metadata} Path(output).write_text(json.dumps(meta_dict, indent=4)) diff --git a/kubernetes/loculus/templates/_common-metadata.tpl b/kubernetes/loculus/templates/_common-metadata.tpl index 6bf8e5c6b9..b15cfe8f0c 100644 --- a/kubernetes/loculus/templates/_common-metadata.tpl +++ b/kubernetes/loculus/templates/_common-metadata.tpl @@ -65,9 +65,20 @@ fields: type: string notSearchable: true header: Data Use Terms + customDisplay: + type: link + url: "__value__" {{- end}} {{- end}} +{{/* Patches schema by adding to it */}} +{{- define "loculus.patchMetadataSchema" -}} +{{- $patchedSchema := deepCopy . }} +{{- $toAdd := . | dig "metadataAdd" list -}} +{{- $patchedMetadata := concat .metadata $toAdd -}} +{{- set $patchedSchema "metadata" $patchedMetadata | toYaml -}} +{{- end -}} + {{/* Generate website config from passed config object */}} {{- define "loculus.generateWebsiteConfig" }} name: {{ quote $.Values.name }} @@ -84,9 +95,8 @@ accessionPrefix: {{ quote $.Values.accessionPrefix }} organisms: {{- range $key, $instance := (.Values.organisms | default .Values.defaultOrganisms) }} {{ $key }}: - schema: - {{- with $instance.schema }} + {{- with ($instance.schema | include "loculus.patchMetadataSchema" | fromYaml) }} instanceName: {{ quote .instanceName }} loadSequencesAutomatically: {{ .loadSequencesAutomatically | default false }} {{ if .image }} @@ -96,8 +106,7 @@ organisms: description: {{ quote .description }} {{ end }} primaryKey: accessionVersion - inputFields: - {{ $instance.schema.inputFields | toYaml | nindent 8}} + inputFields: {{- include "loculus.inputFields" . | nindent 8 }} metadata: {{ $metadata := concat $commonMetadata .metadata | include "loculus.generateWebsiteMetadata" @@ -116,7 +125,7 @@ organisms: fields: {{- range . }} - name: {{ quote .name }} - type: {{ quote .type }} + type: {{ .type | default "string" | quote }} {{- if .autocomplete }} autocomplete: {{ .autocomplete }} {{- end }} @@ -140,9 +149,7 @@ fields: type: {{ quote .customDisplay.type }} url: {{ .customDisplay.url }} {{- end }} - {{- if .header }} - header: {{ .header }} - {{- end }} + header: {{ default "Other" .header }} {{- end}} {{- end}} @@ -159,7 +166,10 @@ organisms: {{- with $instance.schema }} instanceName: {{ quote .instanceName }} metadata: - {{ $metadata := include "loculus.generateBackendMetadata" .metadata | fromYaml }} + {{ $metadata := (include "loculus.patchMetadataSchema" . + | fromYaml).metadata + | include "loculus.generateBackendMetadata" + | fromYaml }} {{ $metadata.fields | toYaml | nindent 8 }} {{- end }} referenceGenomes: @@ -172,7 +182,7 @@ organisms: fields: {{- range . }} - name: {{ quote .name }} - type: {{ quote .type }} + type: {{ .type | default "string" | quote }} {{- if .required }} required: {{ .required }} {{- end }} diff --git a/kubernetes/loculus/templates/_ingestRenameFromValues.tpl b/kubernetes/loculus/templates/_ingestRenameFromValues.tpl new file mode 100644 index 0000000000..08c8be45bb --- /dev/null +++ b/kubernetes/loculus/templates/_ingestRenameFromValues.tpl @@ -0,0 +1,11 @@ +{{- define "loculus.ingestRename" -}} +{{- $metadata := . }} +{{- $ingestRename := dict }} +{{- range $field := $metadata }} + {{- if hasKey $field "ingest" }} + {{- $_ := set $ingestRename (index $field "ingest") (index $field "name") }} + {{- end }} +{{- end }} +{{- $output := dict "rename" $ingestRename }} +{{- toYaml $output }} +{{- end -}} \ No newline at end of file diff --git a/kubernetes/loculus/templates/_inputFieldsFromValues.tpl b/kubernetes/loculus/templates/_inputFieldsFromValues.tpl new file mode 100644 index 0000000000..90a16ed164 --- /dev/null +++ b/kubernetes/loculus/templates/_inputFieldsFromValues.tpl @@ -0,0 +1,48 @@ +{{- define "loculus.inputFields" -}} +{{- $data := . }} +{{- $metadata := $data.metadata }} +{{- $extraFields := $data.extraInputFields }} +{{- $TO_KEEP := list "name" "displayName" "definition" "guidance" "example" "required" }} + + +{{- $fieldsDict := dict }} +{{- $index := 0 }} + +{{- /* Add fields with position "first" to the dict */}} +{{- range $field := $extraFields }} + {{- if eq $field.position "first" }} + {{- $_ := set $fieldsDict (printf "%03d" $index) $field }} + {{- $index = add $index 1 }} + {{- end }} +{{- end }} + +{{- /* Add filtered metadata fields to the dict */}} +{{- range $field := $metadata }} + {{- if not (hasKey $field "noInput") }} + {{- $_ := set $fieldsDict (printf "%03d" $index) $field }} + {{- $index = add $index 1 }} + {{- end }} +{{- end }} + +{{- /* Add fields with position "last" to the dict */}} +{{- range $field := $extraFields }} + {{- if eq $field.position "last" }} + {{- $_ := set $fieldsDict (printf "%03d" $index) $field }} + {{- $index = add $index 1 }} + {{- end }} +{{- end }} + +{{- /* Iterate over sorted index to get list of values (sorted by key) */}} +{{- $inputFields := list }} +{{- range $k:= keys $fieldsDict | sortAlpha }} + {{- $toAdd := dict }} + {{- range $k, $v := (index $fieldsDict $k) }} + {{- if has $k $TO_KEEP }} + {{- $_ := set $toAdd $k $v }} + {{- end }} + {{- end }} + {{- $inputFields = append $inputFields $toAdd }} +{{- end }} + +{{- toYaml $inputFields }} +{{- end -}} \ No newline at end of file diff --git a/kubernetes/loculus/templates/_preprocessingFromValues.tpl b/kubernetes/loculus/templates/_preprocessingFromValues.tpl new file mode 100644 index 0000000000..77631af85f --- /dev/null +++ b/kubernetes/loculus/templates/_preprocessingFromValues.tpl @@ -0,0 +1,39 @@ +{{- define "loculus.preprocessingSpecs" -}} +{{- $metadata := . }} +{{- $specs := dict }} + +{{- range $field := $metadata }} +{{- $name := index $field "name" }} +{{- $spec := dict "function" "identity" "inputs" (dict "input" $name) }} + +{{- if hasKey $field "type" }} + {{- $type := index $field "type" }} + {{- if eq $type "int" }} + {{- $_ := set $spec "args" (dict "type" "int") }} + {{- else if eq $type "float" }} + {{- $_ := set $spec "args" (dict "type" "float") }} + {{- end }} +{{- end }} + +{{- if hasKey $field "preprocessing" }} + {{- $preprocessing := index $field "preprocessing" }} + {{- if eq (typeOf $preprocessing) "string" }} + {{- $_ := set $spec "inputs" (dict "input" $preprocessing) }} + {{- else }} + {{- if hasKey $preprocessing "function" }} + {{- $_ := set $spec "function" (index $preprocessing "function") }} + {{- end }} + {{- if hasKey $preprocessing "args" }} + {{- $_ := set $spec "args" (index $preprocessing "args") }} + {{- end }} + {{- if hasKey $preprocessing "inputs" }} + {{- $_ := set $spec "inputs" (index $preprocessing "inputs") }} + {{- end }} + {{- end }} +{{- end }} + +{{- $_ := set $specs $name $spec }} +{{- end }} + +{{- toYaml $specs }} +{{- end -}} \ No newline at end of file diff --git a/kubernetes/loculus/templates/ingest-config.yaml b/kubernetes/loculus/templates/ingest-config.yaml new file mode 100644 index 0000000000..472e728540 --- /dev/null +++ b/kubernetes/loculus/templates/ingest-config.yaml @@ -0,0 +1,20 @@ +{{- $testconfig := .Values.testconfig | default false }} +{{- $backendHost := .Values.environment | eq "server" | ternary (printf "https://backend-%s" $.Values.host) ($testconfig | ternary "http://localhost:8079" "http://loculus-backend-service:8079") }} +{{- $keycloakHost := .Values.environment | eq "server" | ternary (printf "https://authentication-%s" $.Values.host) ($testconfig | ternary "http://localhost:8083" "http://loculus-keycloak-service:8083") }} +{{- range $key, $values := (.Values.organisms | default .Values.defaultOrganisms) }} +{{- if $values.ingest }} +{{- $metadata := (include "loculus.patchMetadataSchema" $values.schema | fromYaml).metadata }} +--- +apiVersion: v1 +kind: ConfigMap +metadata: + name: loculus-ingest-config-{{ $key }} +data: + config.yaml: | + {{- $values.ingest.configFile | toYaml | nindent 4 }} + organism: {{ $key }} + backend_url: {{ $backendHost }} + keycloak_token_url: {{ $keycloakHost -}}/realms/loculus/protocol/openid-connect/token + {{- include "loculus.ingestRename" $metadata | nindent 4 }} +{{- end }} +{{- end }} \ No newline at end of file diff --git a/kubernetes/loculus/templates/loculus-ingest-deployment.yaml b/kubernetes/loculus/templates/ingest-deployment.yaml similarity index 100% rename from kubernetes/loculus/templates/loculus-ingest-deployment.yaml rename to kubernetes/loculus/templates/ingest-deployment.yaml diff --git a/kubernetes/loculus/templates/lapis-silo-database-config.yaml b/kubernetes/loculus/templates/lapis-silo-database-config.yaml index 8eaff7258d..e64edd4160 100644 --- a/kubernetes/loculus/templates/lapis-silo-database-config.yaml +++ b/kubernetes/loculus/templates/lapis-silo-database-config.yaml @@ -9,7 +9,7 @@ kind: ConfigMap metadata: name: lapis-silo-database-config-{{ $key }} data: - {{- with $instance.schema }} + {{- with ($instance.schema | include "loculus.patchMetadataSchema" | fromYaml) }} database_config.yaml: | schema: instanceName: {{ .instanceName }} @@ -17,7 +17,8 @@ data: metadata: {{- range (concat $commonMetadata .metadata) }} - name: {{ .name }} - type: {{ (.type | eq "timestamp") | ternary "int" ((.type | eq "authors") | ternary "string" .type) }} + {{- $type := default "string" .type }} + type: {{ ($type | eq "timestamp") | ternary "int" (($type | eq "authors") | ternary "string" $type) }} {{- if .generateIndex }} generateIndex: {{ .generateIndex }} {{- end }} @@ -46,5 +47,6 @@ data: {{ range $importScriptWrapperLines }} {{ . }}{{ end }} - pangolineage_alias.json: "{}" + pangolineage_alias.json: | + {{ $instance.pangolineage_alias | default dict | toJson }} {{- end }} \ No newline at end of file diff --git a/kubernetes/loculus/templates/loculus-ingest-config.yaml b/kubernetes/loculus/templates/loculus-ingest-config.yaml deleted file mode 100644 index b2fec62127..0000000000 --- a/kubernetes/loculus/templates/loculus-ingest-config.yaml +++ /dev/null @@ -1,23 +0,0 @@ -{{ $backendHost := .Values.disableBackend | ternary - "http://host.k3d.internal:8079" - "http://loculus-backend-service:8079" -}} -{{- $keycloakHost := $.Values.environment | eq "server" | ternary - (printf "https://authentication-%s" $.Values.host) - "http://loculus-keycloak-service:8083" -}} -{{- range $key, $values := (.Values.organisms | default .Values.defaultOrganisms) }} -{{- if $values.ingest }} ---- -apiVersion: v1 -kind: ConfigMap -metadata: - name: loculus-ingest-config-{{ $key }} -data: - config.yaml: | - {{- $values.ingest.configFile | toYaml | nindent 4 }} - organism: {{ $key }} - backend_url: {{ $backendHost }} - keycloak_token_url: {{ $keycloakHost -}}/realms/loculus/protocol/openid-connect/token -{{- end }} -{{- end }} \ No newline at end of file diff --git a/kubernetes/loculus/templates/loculus-preprocessing-config.yaml b/kubernetes/loculus/templates/loculus-preprocessing-config.yaml index 348a278485..12cf2d3779 100644 --- a/kubernetes/loculus/templates/loculus-preprocessing-config.yaml +++ b/kubernetes/loculus/templates/loculus-preprocessing-config.yaml @@ -1,4 +1,5 @@ {{- range $organism, $organismConfig := (.Values.organisms | default .Values.defaultOrganisms) }} +{{- $metadata := ($organismConfig.schema | include "loculus.patchMetadataSchema" | fromYaml).metadata }} {{- range $processingIndex, $processingConfig := $organismConfig.preprocessing }} {{- if $processingConfig.configFile }} --- @@ -9,6 +10,8 @@ metadata: data: preprocessing-config.yaml: | {{- $processingConfig.configFile | toYaml | nindent 4 }} + processing_spec: + {{- include "loculus.preprocessingSpecs" $metadata | nindent 6 }} organism: {{ $organism }} {{- end }} {{- end }} diff --git a/kubernetes/loculus/templates/loculus-website-config.yaml b/kubernetes/loculus/templates/loculus-website-config.yaml index 808104858f..544a4a985d 100644 --- a/kubernetes/loculus/templates/loculus-website-config.yaml +++ b/kubernetes/loculus/templates/loculus-website-config.yaml @@ -16,7 +16,6 @@ metadata: data: website_config.json: | {{ include "loculus.generateWebsiteConfig" . | fromYaml | toJson }} - runtime_config.json: | { "name" : "{{ $.Values.name }}", @@ -38,7 +37,4 @@ data: {{- template "loculus.publicRuntimeConfig" dict "Values" .Values "externalLapisUrlConfig" $externalLapisUrlConfig -}} }, "backendKeycloakClientSecret" : "[[backendKeycloakClientSecret]]" - } - - - + } \ No newline at end of file diff --git a/kubernetes/loculus/values.yaml b/kubernetes/loculus/values.yaml index 2e52779551..7a7c01593d 100644 --- a/kubernetes/loculus/values.yaml +++ b/kubernetes/loculus/values.yaml @@ -18,25 +18,1019 @@ logo: url: "/favicon.svg" width: 100 height: 100 +defaultOrganismConfig: &defaultOrganismConfig + schema: &schema + loadSequencesAutomatically: true + instanceName: "Ebola Zaire" + description: "Zaire ebolavirus is a species of the genus Ebolavirus, which is a member of the Filoviridae family. It is the most dangerous of the known Ebola viruses, and is associated with the highest case-fatality rate." + image: "https://cdn.britannica.com/01/179201-050-FED1B381/filamentous-ebolavirus-particles-scanning-electron-micrograph-cell.jpg?w=400&h=300&c=crop" + ### Field list + ## General fields + # name: Key used across app to refer to this field (required) + # displayName: Name displayed to users (optional) | used by website (for input and details) + # type: Type of field (default: "string") + # required: Whether the field is required by backend (default false) | used by input, backend, preprocessing + ## Input related + # noInput: Whether the field should not be input by the user) + # definition + # guidance + # example + # ontology_id + ## Sequence details UI related + # header: Grouping of fields in sequence details UI (default: "Other") + # hideOnSequenceDetailsPage: Whether the field should be hidden on the sequence details page (default false) + # customDisplay: Custom display dict for the field + # type: Enum with values "link" + # url: URL to link to with template __value__ for the field value: "https://www.ncbi.nlm.nih.gov/nuccore/__value__" + ## SearchUI related + # initiallyVisible: Whether it appear in searchUI by default (default false) + # generateIndex: Whether the field should be indexed for search (default false, only allowed for string fields) + # autocomplete: Whether the field should be used for search autocomplete (default false, only allowed for string fields and probably generateIndex should be true) + ## Ingest related + # ingest: Which NCBI field to map to this field (optional) + ## Preprocessing related + # preprocessing: Preprocessing function to run on the field (optional: defaults to identity function on input field with same name) + + ## TODO: Allow extra metadata field by particular organisms (e.g. clade/lineage) + metadata: + - name: sample_collection_date + displayName: Collection date + type: date + initiallyVisible: true + header: Sample details + ingest: ncbi_collection_date + preprocessing: + function: process_date + inputs: + date: sample_collection_date + release_date: ncbi_release_date + - name: ncbi_release_date + displayName: NCBI release date + type: date + header: "INSDC" + preprocessing: + function: parse_timestamp + inputs: + timestamp: ncbi_release_date + noInput: true + - name: ncbi_update_date + type: date + displayName: NCBI update date + header: "INSDC" + preprocessing: + function: parse_timestamp + inputs: + timestamp: ncbi_update_date + noInput: true + - name: geo_loc_country + displayName: Collection country + generateIndex: true + autocomplete: true + initiallyVisible: true + header: Sample details + ingest: country + - name: geo_loc_admin_1 + displayName: Collection subdivision level 1 + generateIndex: true + autocomplete: true + initiallyVisible: true + header: Sample details + ingest: division + - name: geo_loc_admin_2 + displayName: Collection subdivision level 2 + generateIndex: true + autocomplete: true + header: Sample details + - name: geo_loc_city + displayName: Collection city + generateIndex: true + autocomplete: true + header: Sample details + - name: geo_loc_site + ontology_id: GENEPIO:0100436 + definition: The name of a specific geographical location e.g. Credit River (rather + than river). + guidance: Provide the name of the specific geographical site using a specific noun + (a word that names a certain place, thing). + example: Credit River + displayName: Collection site + header: Sample details + - name: specimen_collector_sample_id + displayName: Isolate name + header: Sample details + ingest: ncbi_isolate_name + - name: authors + displayName: Authors + type: authors + header: Authors + truncateColumnDisplayTo: 15 + ingest: ncbi_submitter_names + - name: author_affiliations + displayName: Author affiliations + generateIndex: true + autocomplete: true + truncateColumnDisplayTo: 15 + header: Authors + ingest: ncbi_submitter_affiliation + - name: ncbi_submitter_country + displayName: NCBI submitter country + generateIndex: true + autocomplete: true + hideOnSequenceDetailsPage: true + noInput: true + header: "INSDC" + - name: insdc_accession_base + header: "INSDC" + hideOnSequenceDetailsPage: true + noInput: true + - name: insdc_version + type: int + header: "INSDC" + hideOnSequenceDetailsPage: true + noInput: true + - name: insdc_accession_full + displayName: INSDC accession + customDisplay: + type: link + url: "https://www.ncbi.nlm.nih.gov/nuccore/__value__" + header: "INSDC" + ingest: genbank_accession + noInput: true + - name: bioproject_accessions + customDisplay: + type: link + url: "https://www.ncbi.nlm.nih.gov/bioproject/__value__" + header: "INSDC" + ingest: bioprojects + - name: biosample_accession + customDisplay: + type: link + url: "https://www.ncbi.nlm.nih.gov/biosample/__value__" + header: "INSDC" + noInput: true + - name: culture_id + displayName: Culture ID + header: Sample details + - name: sample_received_date + ontology_id: GENEPIO:0001177 + definition: The date on which the sample was received by the laboratory. + guidance: Alternative if "sample_collection_date" is not available. Record the date the sample was received by the laboratory. + Required granularity includes year, month and day. Before sharing this data, ensure this date is not + considered identifiable information. If this date is considered identifiable, + it is acceptable to add "jitter" to the received date by adding or subtracting + calendar days. Do not change the received date in your original records. Alternatively, + collection_date may be used as a substitute in the data you share. + The date should be provided in ISO 8601 standard format "YYYY-MM-DD". + example: '2020-03-20' + displayName: Sample Received Date + type: date + preprocessing: + function: process_date + inputs: + date: sample_received_date + header: Sample details + - name: sample_type + displayName: Sample type + header: Sampling + - name: purpose_of_sampling + ontology_id: GENEPIO:0001198 + definition: The reason that the sample was collected. + guidance: Select a value from the pick list in the template + example: Diagnostic testing + header: Sampling + - name: presampling_activity + ontology_id: GENEPIO:0100433 + definition: The activities or variables introduced upstream of sample collection + that may affect the sample collected. + guidance: If there was presampling activity that would affect the sample prior to + collection (this is different than sample processing which happens after the sample + is collected), provide the experimental activities by selecting one or more values + from the template pick list. If the information is unknown or cannot be provided, + leave blank or provide a null value. + example: Antimicrobial pre-treatment [GENEPIO:0100537] + displayName: Presampling activity + header: Sampling + - name: anatomical_material + ontology_id: GENEPIO:0001211 + definition: A substance obtained from an anatomical part of an organism e.g. tissue, + blood. + guidance: 'Provide a descriptor if an anatomical material was sampled. Use the pick + list provided in the template. If a desired term is missing from the pick list, + use this look-up service to identify a standardized term: https://www.ebi.ac.uk/ols/ontologies/uberon. + If not applicable, leave blank.' + example: Blood [UBERON:0000178] + displayName: Anatomical material + header: Sampling + - name: anatomical_part + ontology_id: GENEPIO:0001214 + definition: An anatomical part of an organism e.g. oropharynx. + guidance: 'Provide a descriptor if an anatomical part was sampled. Use the pick + list provided in the template. If a desired term is missing from the pick list, + use this look-up service to identify a standardized term: https://www.ebi.ac.uk/ols/ontologies/uberon. + If not applicable, leave blank.' + example: Nasopharynx (NP) [UBERON:0001728] + displayName: Anatomical part + header: Sampling + - name: body_product + ontology_id: GENEPIO:0001216 + definition: A substance excreted/secreted from an organism e.g. feces, urine, sweat. + guidance: 'Provide a descriptor if a body product was sampled. Use the pick list + provided in the template. If a desired term is missing from the pick list, use + this look-up service to identify a standardized term: https://www.ebi.ac.uk/ols/ontologies/uberon. + If not applicable, leave blank.' + example: Feces [UBERON:0001988] + displayName: Body product + header: Sampling + - name: environmental_material + ontology_id: GENEPIO:0001223 + definition: A substance obtained from the natural or man-made environment e.g. soil, + water, sewage, door handle, bed handrail, face mask. + guidance: 'Provide a descriptor if an environmental material was sampled. Use the + pick list provided in the template. If a desired term is missing from the pick + list, use this look-up service to identify a standardized term: https://www.ebi.ac.uk/ols/ontologies/envo. + If not applicable, leave blank.' + example: Face mask [OBI:0002787] + displayName: Environmental material + header: Sampling + - name: environmental_site + ontology_id: GENEPIO:0001232 + definition: An environmental location may describe a site in the natural or built + environment e.g. hospital, wet market, bat cave. + guidance: 'Provide a descriptor if an environmental site was sampled. Use the pick + list provided in the template. If a desired term is missing from the pick list, + use this look-up service to identify a standardized term: https://www.ebi.ac.uk/ols/ontologies/envo. + If not applicable, leave blank.' + example: Hospital [ENVO:00002173] + displayName: Environmental site + header: Sampling + - name: collection_device + ontology_id: GENEPIO:0001234 + definition: The instrument or container used to collect the sample e.g. swab. + guidance: 'Provide a descriptor if a collection device was used for sampling. Use + the pick list provided in the template. If a desired term is missing from the + pick list, use this look-up service to identify a standardized term: https://www.ebi.ac.uk/ols/ontologies/obi. + If not applicable, leave blank.' + example: Swab [GENEPIO:0100027] + displayName: Collection device + header: Sampling + - name: collection_method + ontology_id: GENEPIO:0001241 + definition: The process used to collect the sample e.g. phlebotomy, necropsy. + guidance: 'Provide a descriptor if a collection method was used for sampling. Use + the pick list provided in the template. If a desired term is missing from the + pick list, use this look-up service to identify a standardized term: https://www.ebi.ac.uk/ols/ontologies/obi. + If not applicable, leave blank.' + example: Bronchoalveolar lavage (BAL) [GENEPIO:0100032] + displayName: Collection method + header: Sampling + - name: food_product + ontology_id: GENEPIO:0100444 + definition: A material consumed and digested for nutritional value or enjoyment. + guidance: This field includes animal feed. If applicable, select the standardized + term and ontology ID for the anatomical material from the picklist provided. Multiple + values can be provided, separated by a semi-colon. + example: Feather meal [FOODON:00003927]; Bone meal [ENVO:02000054]; Chicken breast + [FOODON:00002703] + displayName: Food Product + header: Sampling + - name: food_product_properties + ontology_id: GENEPIO:0100445 + definition: Any characteristic of the food product pertaining to its state, processing, + a label claim, or implications for consumers. + guidance: Provide any characteristics of the food product including whether it has + been cooked, processed, preserved, any known information about its state (e.g. + raw, ready-to-eat), any known information about its containment (e.g. canned), + and any information about a label claim (e.g. organic, fat-free). + example: Food (chopped) [FOODON:00002777]; Ready-to-eat (RTE) [FOODON:03316636] + displayName: Food Product Properties + header: Sampling + - name: specimen_processing + ontology_id: GENEPIO:0100435 + definition: The processing applied to samples post-collection, prior to further + testing, characterization, or isolation procedures. + guidance: Provide the sample processing information by selecting a value from the + template pick list. If the information is unknown or cannot be provided, leave + blank or provide a null value. + example: Samples pooled [OBI:0600016] + displayName: Specimen Processing + header: Specimen processing + - name: specimen_processing_details + ontology_id: GENEPIO:0100311 + definition: Detailed information regarding the processing applied to a sample during + or after receiving the sample. + guidance: Provide a free text description of any processing details applied to a + sample. + example: 25 swabs were pooled and further prepared as a single sample during library + prep. + displayName: Specimen Processing Details + header: Specimen processing + - name: experimental_specimen_role_type + ontology_id: GENEPIO:0100921 + definition: The type of role that the sample represents in the experiment. + guidance: Samples can play different types of roles in experiments. A sample under + study in one experiment may act as a control or be a replicate of another sample + in another experiment. This field is used to distinguish samples under study from + controls, replicates, etc. If the sample acted as an experimental control or + a replicate, select a role type from the picklist. If the sample was not a control, + leave blank or select "Not Applicable". + example: Positive experimental control [GENEPIO:0101018] + displayName: Experimental Specimen Role Type + header: Specimen processing + - name: host_age + ontology_id: GENEPIO:0001392 + definition: Age of host at the time of sampling. + guidance: If known, provide age. Age-binning is also acceptable. + example: '79' + displayName: Host Age + type: int + header: Host + - name: host_age_bin + ontology_id: GENEPIO:0001394 + definition: The age category of the host at the time of sampling. + guidance: Age bins in 10 year intervals have been provided. If a host's age cannot + be specified due to provacy concerns, an age bin can be used as an alternative. + example: 50 - 59 [GENEPIO:0100054] + displayName: Host Age Bin + header: Host + - name: host_gender + ontology_id: GENEPIO:0001395 + definition: The gender of the host at the time of sample collection. + guidance: If known, select a value from the pick list. + example: Male [NCIT:C46109] + displayName: Host Gender + header: Host + - name: host_origin_country + ontology_id: GENEPIO:0100438 + definition: The country of origin of the host. + guidance: If a sample is from a human or animal host that originated from outside + of Canada, provide the the name of the country where the host originated by selecting + a value from the template pick list. If the information is unknown or cannot be + provided, leave blank or provide a null value. + example: South Africa [GAZ:00001094] + displayName: Host Origin Country + header: Host + - name: host_disease + ontology_id: GENEPIO:0001391 + definition: The name of the disease experienced by the host. + guidance: "This field is only required if the Pathogen.cl package was selected. + If the host was sick, provide the name of the disease.The standardized term + can be sourced from this look-up service: https://www.ebi.ac.uk/ols/ontologies/doid + If the disease is not known, put \u201Cmissing\u201D." + example: mastitis, gastroenteritis + displayName: Host disease + header: Host + - name: signs_and_symptoms + ontology_id: GENEPIO:0001400 + definition: A perceived change in function or sensation, (loss, disturbance or appearance) + indicative of a disease, reported by a patient. + guidance: Select all of the symptoms experienced by the host from the pick list. + example: Cough [HP:0012735], Fever [HP:0001945], Rigors (fever shakes) [HP:0025145] + displayName: Signs and symptoms + header: Host + - name: host_health_state + ontology_id: GENEPIO:0001388 + definition: Health status of the host at the time of sample collection. + guidance: If known, select a value from the pick list. + example: Asymptomatic [NCIT:C3833] + displayName: Host Health State + header: Host + - name: host_health_outcome + ontology_id: GENEPIO:0001390 + definition: Disease outcome in the host. + guidance: If known, select a value from the pick list. + example: Recovered [NCIT:C49498] + displayName: Host Health Outcome + header: Host + - name: travel_history + ontology_id: GENEPIO:0001416 + definition: Travel history in last six months. + guidance: Specify the countries (and more granular locations if known) travelled + in the last six months; can include multiple travels. Separate multiple travel + events with a semicolon. Provide as free text. + example: Canada, Vancouver; USA, Seattle; Italy, Milan + displayName: Travel History + header: Host + - name: exposure_event + ontology_id: GENEPIO:0001417 + definition: Event leading to exposure. + guidance: If known, select the exposure event from the pick list. + example: Mass Gathering [GENEPIO:0100237] + displayName: Exposure event + header: Host + - name: host_role + ontology_id: GENEPIO:0001419 + definition: The role of the host in relation to the exposure setting. + guidance: Select the host's personal role(s) from the pick list provided in the + template. If the desired term is missing, contact the curation team. + example: Patient [OMRSE:00000030] + displayName: Host role + header: Host + - name: exposure_setting + ontology_id: GENEPIO:0001428 + definition: The setting leading to exposure. + guidance: Select the host exposure setting(s) from the pick list provided in the + template. If a desired term is missing, contact the curation team. + example: Healthcare Setting [GENEPIO:0100201] + displayName: Exposure setting + header: Host + - name: exposure_details + ontology_id: GENEPIO:0001431 + definition: Additional host exposure information. + guidance: Free text description of the exposure. + example: 'Host role - Other: Bus Driver' + displayName: Exposure details + header: Host + - name: previous_infection_disease + definition: The name of the disease previously experienced by the host. + guidance: Provide the name(s) of the previous of ongoing disease(s). Multiple diseases + can be separated by a semi-colon. + example: COVID-19 + displayName: Previous infection (disease) + header: Host + - name: previous_infection_organism + definition: The name of the pathogen causing the disease previously experienced + by the host. + guidance: Provide the name(s) of the pathogen(s) causing the previous or ongoing + infections. Multiple pathogen names can be separated using a semi-colon. + example: Sudden Acute Respiratory Syndrome Coronavirus 2 (SARS-CoV-2) + displayName: Previous infection (organism) + header: Host + - name: host_vaccination_status + ontology_id: GENEPIO:0001404 + definition: The vaccination status of the host (fully vaccinated, partially vaccinated, + or not vaccinated). + guidance: Select the vaccination status of the host from the pick list. + example: Fully Vaccinated [GENEPIO:0100100] + displayName: Host Vaccination Status + header: Host + - name: purpose_of_sequencing + ontology_id: GENEPIO:0001445 + definition: The reason that the sample was sequenced. + guidance: The reason why a sample was originally collected may differ from the reason + why it was selected for sequencing. The reason a sample was sequenced may provide + information about potential biases in sequencing strategy. Provide the purpose + of sequencing from the picklist in the template. The reason for sample collection + should be indicated in the "purpose of sampling" field. + example: Baseline surveillance (random sampling) [GENEPIO:0100005] + displayName: Purpose Of Sequencing + header: Sequencing + - name: sequencing_date + ontology_id: GENEPIO:0001447 + definition: The date the sample was sequenced. + guidance: Provide the sequencing date in ISO 8601 standard format "YYYY-MM-DD". + example: '2021-04-26' + displayName: Sequencing Date + type: date + preprocessing: + function: process_date + inputs: + date: sequencing_date + header: Sequencing + - name: amplicon_pcr_primer_scheme + ontology_id: GENEPIO:0001456 + definition: The specifications of the primers (primer sequences, binding positions, + fragment size generated etc) used to generate the amplicons to be sequenced. + guidance: Provide the name and version of the primer scheme used to generate the + amplicons for sequencing. + example: https://github.com/joshquick/artic-ncov2019/blob/master/primer_schemes/nCoV-2019/V3/nCoV-2019.tsv + displayName: Amplicon pcr primer scheme + header: Sequencing + - name: amplicon_size + ontology_id: GENEPIO:0001449 + definition: The length of the amplicon generated by PCR amplification. + guidance: Provide the amplicon size, including the units. + example: 300bp + displayName: Amplicon Size + header: Sequencing + - name: sequencing_instrument + ontology_id: GENEPIO:0001452 + definition: The model of the sequencing instrument used. + guidance: Select the sequencing instrument from the pick list. + example: Oxford Nanopore MinION [GENEPIO:0100142] + displayName: Sequencing Instrument + header: Sequencing + - name: sequencing_protocol + ontology_id: GENEPIO:0001454 + definition: The protocol used to generate the sequence. + guidance: 'Provide a free text description of the methods and materials used to + generate the sequence. Suggested text, fill in information where indicated.: "Viral + sequencing was performed following a tiling amplicon strategy using the primer scheme. Sequencing was performed using a sequencing instrument. + Libraries were prepared using library kit. "' + example: Genomes were generated through amplicon sequencing of 1200 bp amplicons + with Freed schema primers. Libraries were created using Illumina DNA Prep kits, + and sequence data was produced using Miseq Micro v2 (500 cycles) sequencing kits. + displayName: Sequencing protocol + header: Sequencing + - name: sequencing_assay_type + ontology_id: GENEPIO:0100997 + definition: The overarching sequencing methodology that was used to determine the + sequence of a biomaterial. + guidance: 'Example Guidance: Provide the name of the DNA or RNA sequencing technology + used in your study. If unsure refer to the protocol documentation, or provide + a null value.' + example: whole genome sequencing assay [OBI:0002117] + displayName: Sequencing Assay Type + header: Sequencing + - name: sequenced_by_organization + ontology_id: GENEPIO:0100416 + definition: The name of the agency, organization or institution responsible for + sequencing the isolate's genome. + guidance: Provide the name of the agency, organization or institution that performed + the sequencing in full (avoid abbreviations). If the information is unknown or + cannot be provided, leave blank or provide a null value. + example: Public Health Agency of Canada (PHAC) [GENEPIO:0100551] + displayName: Sequenced By + header: Sequencing + - name: sequenced_by_contact_name + ontology_id: GENEPIO:0100471 + definition: The name or title of the contact responsible for follow-up regarding + the sequence. + guidance: Provide the name of an individual or their job title. As personnel turnover + may render the contact's name obsolete, it is more prefereable to provide a job + title for ensuring accuracy of information and institutional memory. If the information + is unknown or cannot be provided, leave blank or provide a null value. + example: Enterics Lab Manager + displayName: Sequenced By Contact Name + header: Sequencing + - name: sequenced_by_contact_email + ontology_id: GENEPIO:0100422 + definition: The email address of the contact responsible for follow-up regarding + the sequence. + guidance: Provide the email associated with the listed contact. As personnel turnover + may render an individual's email obsolete, it is more prefereable to provide an + address for a position or lab, to ensure accuracy of information and institutional + memory. If the information is unknown or cannot be provided, leave blank or provide + a null value. + example: enterics@lab.ca + displayName: Sequenced By Contact Email + header: Sequencing + - name: raw_sequence_data_processing_method + ontology_id: GENEPIO:0001458 + definition: The method used for raw data processing such as removing barcodes, adapter + trimming, filtering etc. + guidance: Provide the name and version numbers of the software used to process the + raw data. + example: Porechop 0.2.3 + displayName: Raw sequence data processing method + header: Sequencing + - name: dehosting_method + ontology_id: GENEPIO:0001459 + definition: The method used to remove host reads from the pathogen sequence. + guidance: Provide the name and version number of the software used to remove host + reads. + example: Nanostripper 1.2.3 + displayName: Dehosting method + header: Sequencing + - name: reference_genome_accession + ontology_id: GENEPIO:0001485 + definition: A persistent, unique identifier of a genome database entry. + guidance: Provide the accession number of the reference genome used for mapping/assembly. + example: NC_045512.2 + displayName: Reference genome accession + header: Sequencing + - name: consensus_sequence_software_name + ontology_id: GENEPIO:0001463 + definition: The name of software used to generate the consensus sequence. + guidance: Provide the name of the software used to generate the consensus sequence. + example: Ivar + displayName: Consensus sequence software name + header: Sequencing + - name: consensus_sequence_software_version + ontology_id: GENEPIO:0001469 + definition: The version of the software used to generate the consensus sequence. + guidance: Provide the version of the software used to generate the consensus sequence. + example: '1.3' + displayName: Consensus sequence software version + header: Sequencing + - name: depth_of_coverage + ontology_id: GENEPIO:0001474 + definition: The average number of reads representing a given nucleotide in the reconstructed + sequence. + guidance: Provide value as a fold of coverage (as a number). + example: 400 + displayName: Depth of coverage + type: int + header: Sequencing + - name: breadth_of_coverage + ontology_id: GENEPIO:0001475 + definition: The threshold used as a cut-off for the depth of coverage. + guidance: Provide the threshold fold coverage (as a number) + example: 100 + displayName: Breadth of coverage + type: int + header: Sequencing + - name: quality_control_method_name + ontology_id: GENEPIO:0100557 + definition: The name of the method used to assess whether a sequence passed a predetermined + quality control threshold. + guidance: Providing the name of the method used for quality control is very important + for interpreting the rest of the QC information. Method names can be provided + as the name of a pipeline or a link to a GitHub repository. Multiple methods should + be listed and separated by a semi-colon. Do not include QC tags in other fields + if no method name is provided. + example: ncov-tools + displayName: Quality control method name + header: Sequencing + - name: quality_control_method_version + ontology_id: GENEPIO:0100558 + definition: The version number of the method used to assess whether a sequence passed + a predetermined quality control threshold. + guidance: Methods updates can make big differences to their outputs. Provide the + version of the method used for quality control. The version can be expressed using + whatever convention the developer implements (e.g. date, semantic versioning). + If multiple methods were used, record the version numbers in the same order as + the method names. Separate the version numbers using a semi-colon. + example: "1.2.3" + displayName: Quality control method version + header: Sequencing + - name: quality_control_determination + ontology_id: GENEPIO:0100559 + definition: The determination of a quality control assessment. + guidance: Select a value from the pick list provided. If a desired value is missing, + submit a new term request to the PHA4GE QC Tag GitHub issuetracker using the New + Term Request form. + example: sequence failed quality control + displayName: Quality control determination + header: Sequencing + - name: quality_control_issues + ontology_id: GENEPIO:0100560 + definition: The reason contributing to, or causing, a low quality determination + in a quality control assessment. + guidance: Select a value from the pick list provided. If a desired value is missing, + submit a new term request to the PHA4GE QC Tag GitHub issuetracker using the New + Term Request form. + example: low average genome coverage + displayName: Quality control issues + header: Sequencing + - name: quality_control_details + ontology_id: GENEPIO:0100561 + definition: The details surrounding a low quality determination in a quality control + assessment. + guidance: Provide notes or details regarding QC results using free text. + example: CT value of 39. Low viral load. Low DNA concentration after amplification. + displayName: Quality control details + header: Diagnostics + - name: diagnostic_measurement_method + displayName: Diagnostic measurement method + header: Diagnostics + - name: diagnostic_target_presence + displayName: Diagnostic target presence + header: Diagnostics + - name: diagnostic_target_gene_name + displayName: Gene name + header: Diagnostics + - name: diagnostic_measurement_value + displayName: Diagnostic measurement value + header: Diagnostics + - name: diagnostic_measurement_unit + displayName: Diagnostic measurement unit + header: Diagnostics + - name: ncbi_completeness + generateIndex: true + autocomplete: true + header: "INSDC" + noInput: true + - name: length + type: int + header: "Alignment states and QC metrics" + noInput: true + - name: host_name_scientific + generateIndex: true + autocomplete: true + header: "Host" + ingest: ncbi_host_name + - name: host_name_common + generateIndex: true + autocomplete: true + header: "Host" + - name: host_taxon_id + type: int + autocomplete: true + customDisplay: + type: link + url: "https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?mode=info&id=__value__" + header: "Host" + ingest: ncbi_host_tax_id + - name: is_lab_host + generateIndex: true + autocomplete: true + header: "Host" + ingest: ncbi_is_lab_host + noInput: true + - name: cell_line + generateIndex: true + autocomplete: true + header: "Host" + - name: passage_number + type: int + header: "Host" + - name: passage_method + generateIndex: true + autocomplete: true + header: "Host" + - name: ncbi_length + type: int + header: "INSDC" + noInput: true + - name: ncbi_protein_count + type: int + header: "INSDC" + hideOnSequenceDetailsPage: true + noInput: true + - name: ncbi_sourcedb + generateIndex: true + autocomplete: true + header: "INSDC" + hideOnSequenceDetailsPage: true + noInput: true + - name: ncbi_virus_name + generateIndex: true + autocomplete: true + hideOnSequenceDetailsPage: true + noInput: true + header: "INSDC" + - name: ncbi_virus_tax_id + type: int + autocomplete: true + customDisplay: + type: link + url: "https://www.ncbi.nlm.nih.gov/labs/virus/vssi/#/virus?SeqType_s=Nucleotide&VirusLineage_ss=taxid:__value__" + hideOnSequenceDetailsPage: true + noInput: true + header: "INSDC" + - name: sra_run_accession + customDisplay: + type: link + url: "https://www.ncbi.nlm.nih.gov/sra/?term=__value__" + header: "INSDC" + ingest: ncbi_sra_accessions + - name: total_snps + type: int + header: "Alignment states and QC metrics" + noInput: true + preprocessing: + args: { type: int } + inputs: { input: nextclade.totalSubstitutions } + - name: total_inserted_nucs + type: int + header: "Alignment states and QC metrics" + noInput: true + preprocessing: + args: { type: int } + inputs: { input: nextclade.totalInsertions } + - name: total_deleted_nucs + type: int + header: "Alignment states and QC metrics" + noInput: true + preprocessing: + args: { type: int } + inputs: { input: nextclade.totalDeletions } + - name: total_ambiguous_nucs + type: int + header: "Alignment states and QC metrics" + noInput: true + preprocessing: + args: { type: int } + inputs: { input: "nextclade.totalNonACGTNs" } + - name: total_unknown_nucs + type: int + header: "Alignment states and QC metrics" + noInput: true + preprocessing: + args: { type: int } + inputs: { input: nextclade.totalMissing } + - name: total_frame_shifts + type: int + header: "Alignment states and QC metrics" + noInput: true + preprocessing: + args: { type: int } + inputs: { input: nextclade.totalFrameShifts } + - name: frame_shifts + header: "Alignment states and QC metrics" + noInput: true + preprocessing: + inputs: { input: nextclade.frameShifts } + - name: completeness + type: float + header: "Alignment states and QC metrics" + noInput: true + preprocessing: + args: { type: float } + inputs: { input: nextclade.coverage } + - name: total_stop_codons + type: int + header: "Alignment states and QC metrics" + noInput: true + preprocessing: + args: { type: int } + inputs: { input: nextclade.qc.stopCodons.totalStopCodons } + - name: stop_codons + header: "Alignment states and QC metrics" + noInput: true + preprocessing: + inputs: { input: nextclade.qc.stopCodons.stopCodons } + website: &website + tableColumns: + - sample_collection_date + - ncbi_release_date + - authors + - author_affiliations + - geo_loc_country + - geo_loc_admin_1 + - length + defaultOrderBy: sample_collection_date + defaultOrder: descending + silo: + dateToSortBy: sample_collection_date + extraInputFields: &extraInputFields + - name: submissionId + displayName: Submission ID + definition: FASTA ID + guidance: Used to match the sequence(s) to the metadata + example: GJP123 + position: first + preprocessing: + - &preprocessing + version: 2 + image: ghcr.io/loculus-project/preprocessing-nextclade + args: + - "prepro" + configFile: &preprocessingConfigFile + log_level: DEBUG + nextclade_dataset_name: nextstrain/ebola/zaire + genes: [ NP, VP35, VP40, GP, sGP, ssGP, VP30, VP24, L ] + batch_size: 100 + ingest: &ingest + image: ghcr.io/loculus-project/ingest + configFile: &ingestConfigFile + taxon_id: 186538 + referenceGenomes: + nucleotideSequences: + - name: "main" + sequence: CGGACACACAAAAAGAAAGAAGAATTTTTAGGATCTTTTGTGTGCGAATAACTATGAGGAAGATTAATAATTTTCCTCTCATTGAAATTTATATCGGAATTTAAATTGAAATTGTTACTGTAATCACACCTGGTTTGTTTCAGAGCCACATCACAAAGATAGAGAACAACCTAGGTCTCCGAAGGGAGCAAGGGCATCAGTGTGCTCAGTTGAAAATCCCTTGTCAACACCTAGGTCTTATCACATCACAAGTTCCACCTCAGACTCTGCAGGGTGATCCAACAACCTTAATAGAAACATTATTGTTAAAGGACAGCATTAGTTCACAGTCAAACAAGCAAGATTGAGAATTAACCTTGGTTTTGAACTTGAACACTTAGGGGATTGAAGATTCAACAACCCTAAAGCTTGGGGTAAAACATTGGAAATAGTTAAAAGACAAATTGCTCGGAATCACAAAATTCCGAGTATGGATTCTCGTCCTCAGAAAATCTGGATGGCGCCGAGTCTCACTGAATCTGACATGGATTACCACAAGATCTTGACAGCAGGTCTGTCCGTTCAACAGGGGATTGTTCGGCAAAGAGTCATCCCAGTGTATCAAGTAAACAATCTTGAAGAAATTTGCCAACTTATCATACAGGCCTTTGAAGCAGGTGTTGATTTTCAAGAGAGTGCGGACAGTTTCCTTCTCATGCTTTGTCTTCATCATGCGTACCAGGGAGATTACAAACTTTTCTTGGAAAGTGGCGCAGTCAAGTATTTGGAAGGGCACGGGTTCCGTTTTGAAGTCAAGAAGCGTGATGGAGTGAAGCGCCTTGAGGAATTGCTGCCAGCAGTATCTAGTGGAAAAAACATTAAGAGAACACTTGCTGCCATGCCGGAAGAGGAGACAACTGAAGCTAATGCCGGTCAGTTTCTCTCCTTTGCAAGTCTATTCCTTCCGAAATTGGTAGTAGGAGAAAAGGCTTGCCTTGAGAAGGTTCAAAGGCAAATTCAAGTACATGCAGAGCAAGGACTGATACAATATCCAACAGCTTGGCAATCAGTAGGACACATGATGGTGATTTTCCGTTTGATGCGAACAAATTTTCTGATCAAATTTCTCCTAATACACCAAGGGATGCACATGGTTGCCGGGCATGATGCCAACGATGCTGTGATTTCAAATTCAGTGGCTCAAGCTCGTTTTTCAGGCTTATTGATTGTCAAAACAGTACTTGATCATATCCTACAAAAGACAGAACGAGGAGTTCGTCTCCATCCTCTTGCAAGGACCGCCAAGGTAAAAAATGAGGTGAACTCCTTTAAGGCTGCACTCAGCTCCCTGGCCAAGCATGGAGAGTATGCTCCTTTCGCCCGACTTTTGAACCTTTCTGGAGTAAATAATCTTGAGCATGGTCTTTTCCCTCAACTATCGGCAATTGCACTCGGAGTCGCCACAGCACACGGGAGTACCCTCGCAGGAGTAAATGTTGGAGAACAGTATCAACAACTCAGAGAGGCTGCCACTGAGGCTGAGAAGCAACTCCAACAATATGCAGAGTCTCGCGAACTTGACCATCTTGGACTTGATGATCAGGAAAAGAAAATTCTTATGAACTTCCATCAGAAAAAGAACGAAATCAGCTTCCAGCAAACAAACGCTATGGTAACTCTAAGAAAAGAGCGCCTGGCCAAGCTGACAGAAGCTATCACTGCTGCGTCACTGCCCAAAACAAGTGGACATTACGATGATGATGACGACATTCCCTTTCCAGGACCCATCAATGATGACGACAATCCTGGCCATCAAGATGATGATCCGACTGACTCACAGGATACGACCATTCCCGATGTGGTGGTTGATCCCGATGATGGAAGCTACGGCGAATACCAGAGTTACTCGGAAAACGGCATGAATGCACCAGATGACTTGGTCCTATTCGATCTAGACGAGGACGACGAGGACACTAAGCCAGTGCCTAATAGATCGACCAAGGGTGGACAACAGAAGAACAGTCAAAAGGGCCAGCATATAGAGGGCAGACAGACACAATCCAGGCCAATTCAAAATGTCCCAGGCCCTCACAGAACAATCCACCACGCCAGTGCGCCACTCACGGACAATGACAGAAGAAATGAACCCTCCGGCTCAACCAGCCCTCGCATGCTGACACCAATTAACGAAGAGGCAGACCCACTGGACGATGCCGACGACGAGACGTCTAGCCTTCCGCCCTTGGAGTCAGATGATGAAGAGCAGGACAGGGACGGAACTTCCAACCGCACACCCACTGTCGCCCCACCGGCTCCCGTATACAGAGATCACTCTGAAAAGAAAGAACTCCCGCAAGACGAGCAACAAGATCAGGACCACACTCAAGAGGCCAGGAACCAGGACAGTGACAACACCCAGTCAGAACACTCTTTTGAGGAGATGTATCGCCACATTCTAAGATCACAGGGGCCATTTGATGCTGTTTTGTATTATCATATGATGAAGGATGAGCCTGTAGTTTTCAGTACCAGTGATGGCAAAGAGTACACGTATCCAGACTCCCTTGAAGAGGAATATCCACCATGGCTCACTGAAAAAGAGGCTATGAATGAAGAGAATAGATTTGTTACATTGGATGGTCAACAATTTTATTGGCCGGTGATGAATCACAAGAATAAATTCATGGCAATCCTGCAACATCATCAGTGAATGAGCATGGAACAATGGGATGATTCAACCGACAAATAGCTAACATTAAGTAGTCAAGGAACGAAAACAGGAAGAATTTTTGATGTCTAAGGTGTGAATTATTATCACAATAAAAGTGATTCTTATTTTTGAATTTAAAGCTAGCTTATTATTACTAGCCGTTTTTCAAAGTTCAATTTGAGTCTTAATGCAAATAGGCGTTAAGCCACAGTTATAGCCATAATTGTAACTCAATATTCTAACTAGCGATTTATCTAAATTAAATTACATTATGCTTTTATAACTTACCTACTAGCCTGCCCAACATTTACACGATCGTTTTATAATTAAGAAAAAACTAATGATGAAGATTAAAACCTTCATCATCCTTACGTCAATTGAATTCTCTAGCACTCGAAGCTTATTGTCTTCAATGTAAAAGAAAAGCTGGTCTAACAAGATGACAACTAGAACAAAGGGCAGGGGCCATACTGCGGCCACGACTCAAAACGACAGAATGCCAGGCCCTGAGCTTTCGGGCTGGATCTCTGAGCAGCTAATGACCGGAAGAATTCCTGTAAGCGACATCTTCTGTGATATTGAGAACAATCCAGGATTATGCTACGCATCCCAAATGCAACAAACGAAGCCAAACCCGAAGACGCGCAACAGTCAAACCCAAACGGACCCAATTTGCAATCATAGTTTTGAGGAGGTAGTACAAACATTGGCTTCATTGGCTACTGTTGTGCAACAACAAACCATCGCATCAGAATCATTAGAACAACGCATTACGAGTCTTGAGAATGGTCTAAAGCCAGTTTATGATATGGCAAAAACAATCTCCTCATTGAACAGGGTTTGTGCTGAGATGGTTGCAAAATATGATCTTCTGGTGATGACAACCGGTCGGGCAACAGCAACCGCTGCGGCAACTGAGGCTTATTGGGCCGAACATGGTCAACCACCACCTGGACCATCACTTTATGAAGAAAGTGCGATTCGGGGTAAGATTGAATCTAGAGATGAGACCGTCCCTCAAAGTGTTAGGGAGGCATTCAACAATCTAAACAGTACCACTTCACTAACTGAGGAAAATTTTGGGAAACCTGACATTTCGGCAAAGGATTTGAGAAACATTATGTATGATCACTTGCCTGGTTTTGGAACTGCTTTCCACCAATTAGTACAAGTGATTTGTAAATTGGGAAAAGATAGCAACTCATTGGACATCATTCATGCTGAGTTCCAGGCCAGCCTGGCTGAAGGAGACTCTCCTCAATGTGCCCTAATTCAAATTACAAAAAGAGTTCCAATCTTCCAAGATGCTGCTCCACCTGTCATCCACATCCGCTCTCGAGGTGACATTCCCCGAGCTTGCCAGAAAAGCTTGCGTCCAGTCCCACCATCGCCCAAGATTGATCGAGGTTGGGTATGTGTTTTTCAGCTTCAAGATGGTAAAACACTTGGACTCAAAATTTGAGCCAATCTCCCTTCCCTCCGAAAGAGGCGAATAATAGCAGAGGCTTCAACTGCTGAACTATAGGGTACGTTACATTAATGATACACTTGTGAGTATCAGCCCTGGATAATATAAGTCAATTAAACGACCAAGATAAAATTGTTCATATCTCGCTAGCAGCTTAAAATATAAATGTAATAGGAGCTATATCTCTGACAGTATTATAATCAATTGTTATTAAGTAACCCAAACCAAAAGTGATGAAGATTAAGAAAAACCTACCTCGGCTGAGAGAGTGTTTTTTCATTAACCTTCATCTTGTAAACGTTGAGCAAAATTGTTAAAAATATGAGGCGGGTTATATTGCCTACTGCTCCTCCTGAATATATGGAGGCCATATACCCTGTCAGGTCAAATTCAACAATTGCTAGAGGTGGCAACAGCAATACAGGCTTCCTGACACCGGAGTCAGTCAATGGGGACACTCCATCGAATCCACTCAGGCCAATTGCCGATGACACCATCGACCATGCCAGCCACACACCAGGCAGTGTGTCATCAGCATTCATCCTTGAAGCTATGGTGAATGTCATATCGGGCCCCAAAGTGCTAATGAAGCAAATTCCAATTTGGCTTCCTCTAGGTGTCGCTGATCAAAAGACCTACAGCTTTGACTCAACTACGGCCGCCATCATGCTTGCTTCATACACTATCACCCATTTCGGCAAGGCAACCAATCCACTTGTCAGAGTCAATCGGCTGGGTCCTGGAATCCCGGATCATCCCCTCAGGCTCCTGCGAATTGGAAACCAGGCTTTCCTCCAGGAGTTCGTTCTTCCGCCAGTCCAACTACCCCAGTATTTCACCTTTGATTTGACAGCACTCAAACTGATCACCCAACCACTGCCTGCTGCAACATGGACCGATGACACTCCAACAGGATCAAATGGAGCGTTGCGTCCAGGAATTTCATTTCATCCAAAACTTCGCCCCATTCTTTTACCCAACAAAAGTGGGAAGAAGGGGAACAGTGCCGATCTAACATCTCCGGAGAAAATCCAAGCAATAATGACTTCACTCCAGGACTTTAAGATCGTTCCAATTGATCCAACCAAAAATATCATGGGAATCGAAGTGCCAGAAACTCTGGTCCACAAGCTGACCGGTAAGAAGGTGACTTCTAAAAATGGACAACCAATCATCCCTGTTCTTTTGCCAAAGTACATTGGGTTGGACCCGGTGGCTCCAGGAGACCTCACCATGGTAATCACACAGGATTGTGACACGTGTCATTCTCCTGCAAGTCTTCCAGCTGTGATTGAGAAGTAATTGCAATAATTGACTCAGATCCAGTTTTATAGAATCTTCTCAGGGATAGTGATAACATCTATTTAGTAATCCGTCCATTAGAGGAGACACTTTTAATTGATCAATATACTAAAGGTGCTTTACACCATTGTCTTTTTTCTCTCCTAAATGTAGAACTTAACAAAAGACTCATAATATACTTGTTTTTAAAGGATTGATTGATGAAAGATCATAACTAATAACATTACAAATAATCCTACTATAATCAATACGGTGATTCAAATGTTAATCTTTCTCATTGCACATACTTTTTGCCCTTATCCTCAAATTGCCTGCATGCTTACATCTGAGGATAGCCAGTGTGACTTGGATTGGAAATGTGGAGAAAAAATCGGGACCCATTTCTAGGTTGTTCACAATCCAAGTACAGACATTGCCCTTCTAATTAAGAAAAAATCGGCGATGAAGATTAAGCCGACAGTGAGCGTAATCTTCATCTCTCTTAGATTATTTGTTTTCCAGAGTAGGGGTCGTCAGGTCCTTTTCAATCGTGTAACCAAAATAAACTCCACTAGAAGGATATTGTGGGGCAACAACACAATGGGCGTTACAGGAATATTGCAGTTACCTCGTGATCGATTCAAGAGGACATCATTCTTTCTTTGGGTAATTATCCTTTTCCAAAGAACATTTTCCATCCCACTTGGAGTCATCCACAATAGCACATTACAGGTTAGTGATGTCGACAAACTAGTTTGTCGTGACAAACTGTCATCCACAAATCAATTGAGATCAGTTGGACTGAATCTCGAAGGGAATGGAGTGGCAACTGACGTGCCATCTGCAACTAAAAGATGGGGCTTCAGGTCCGGTGTCCCACCAAAGGTGGTCAATTATGAAGCTGGTGAATGGGCTGAAAACTGCTACAATCTTGAAATCAAAAAACCTGACGGGAGTGAGTGTCTACCAGCAGCGCCAGACGGGATTCGGGGCTTCCCCCGGTGCCGGTATGTGCACAAAGTATCAGGAACGGGACCGTGTGCCGGAGACTTTGCCTTCCATAAAGAGGGTGCTTTCTTCCTGTATGATCGACTTGCTTCCACAGTTATCTACCGAGGAACGACTTTCGCTGAAGGTGTCGTTGCATTTCTGATACTGCCCCAAGCTAAGAAGGACTTCTTCAGCTCACACCCCTTGAGAGAGCCGGTCAATGCAACGGAGGACCCGTCTAGTGGCTACTATTCTACCACAATTAGATATCAGGCTACCGGTTTTGGAACCAATGAGACAGAGTACTTGTTCGAGGTTGACAATTTGACCTACGTCCAACTTGAATCAAGATTCACACCACAGTTTCTGCTCCAGCTGAATGAGACAATATATACAAGTGGGAAAAGGAGCAATACCACGGGAAAACTAATTTGGAAGGTCAACCCCGAAATTGATACAACAATCGGGGAGTGGGCCTTCTGGGAAACTAAAAAAACCTCACTAGAAAAATTCGCAGTGAAGAGTTGTCTTTCACAGTTGTATCAAACGGAGCCAAAAACATCAGTGGTCAGAGTCCGGCGCGAACTTCTTCCGACCCAGGGACCAACACAACAACTGAAGACCACAAAATCATGGCTTCAGAAAATTCCTCTGCAATGGTTCAAGTGCACAGTCAAGGAAGGGAAGCTGCAGTGTCGCATCTAACAACCCTTGCCACAATCTCCACGAGTCCCCAATCCCTCACAACCAAACCAGGTCCGGACAACAGCACCCATAATACACCCGTGTATAAACTTGACATCTCTGAGGCAACTCAAGTTGAACAACATCACCGCAGAACAGACAACGACAGCACAGCCTCCGACACTCCCTCTGCCACGACCGCAGCCGGACCCCCAAAAGCAGAGAACACCAACACGAGCAAGAGCACTGACTTCCTGGACCCCGCCACCACAACAAGTCCCCAAAACCACAGCGAGACCGCTGGCAACAACAACACTCATCACCAAGATACCGGAGAAGAGAGTGCCAGCAGCGGGAAGCTAGGCTTAATTACCAATACTATTGCTGGAGTCGCAGGACTGATCACAGGCGGGAGAAGAACTCGAAGAGAAGCAATTGTCAATGCTCAACCCAAATGCAACCCTAATTTACATTACTGGACTACTCAGGATGAAGGTGCTGCAATCGGACTGGCCTGGATACCATATTTCGGGCCAGCAGCCGAGGGAATTTACATAGAGGGGCTAATGCACAATCAAGATGGTTTAATCTGTGGGTTGAGACAGCTGGCCAACGAGACGACTCAAGCTCTTCAACTGTTCCTGAGAGCCACAACTGAGCTACGCACCTTTTCAATCCTCAACCGTAAGGCAATTGATTTCTTGCTGCAGCGATGGGGCGGCACATGCCACATTCTGGGACCGGACTGCTGTATCGAACCACATGATTGGACCAAGAACATAACAGACAAAATTGATCAGATTATTCATGATTTTGTTGATAAAACCCTTCCGGACCAGGGGGACAATGACAATTGGTGGACAGGATGGAGACAATGGATACCGGCAGGTATTGGAGTTACAGGCGTTATAATTGCAGTTATCGCTTTATTCTGTATATGCAAATTTGTCTTTTAGTTTTTCTTCAGATTGCTTCATGGAAAAGCTCAGCCTCAAATCAATGAAACCAGGATTTAATTATATGGATTACTTGAATCTAAGATTACTTGACAAATGATAATATAATACACTGGAGCTTTAAACATAGCCAATGTGATTCTAACTCCTTTAAACTCACAGTTAATCATAAACAAGGTTTGACATCAATCTAGTTATCTCTTTGAGAATGATAAACTTGATGAAGATTAAGAAAAAGGTAATCTTTCGATTATCTTTAATCTTCATCCTTGATTCTACAATCATGACAGTTGTCTTTAGTGACAAGGGAAAGAAGCCTTTTTATTAAGTTGTAATAATCAGATCTGCGAACCGGTAGAGTTTAGTTGCAACCTAACACACATAAAGCATTGGTCAAAAAGTCAATAGAAATTTAAACAGTGAGTGGAGACAACTTTTAAATGGAAGCTTCATATGAGAGAGGACGCCCACGAGCTGCCAGACAGCATTCAAGGGATGGACACGACCACCATGTTCGAGCACGATCATCATCCAGAGAGAATTATCGAGGTGAGTACCGTCAATCAAGGAGCGCCTCACAAGTGCGCGTTCCTACTGTATTTCATAAGAAGAGAGTTGAACCATTAACAGTTCCTCCAGCACCTAAAGACATATGTCCGACCTTGAAAAAAGGATTTTTGTGTGACAGTAGTTTTTGCAAAAAAGATCACCAGTTGGAGAGTTTAACTGATAGGGAATTACTCCTACTAATCGCCCGTAAGACTTGTGGATCAGTAGAACAACAATTAAATATAACTGCACCCAAGGACTCGCGCTTAGCAAATCCAACGGCTGATGATTTCCAGCAAGAGGAAGGTCCAAAAATTACCTTGTTGACACTGATCAAGACGGCAGAACACTGGGCGAGACAAGACATCAGAACCATAGAGGATTCAAAATTAAGAGCATTGTTGACTCTATGTGCTGTGATGACGAGGAAATTCTCAAAATCCCAGCTGAGTCTTTTATGTGAGACACACCTAAGGCGCGAGGGGCTTGGGCAAGATCAGGCAGAACCCGTTCTCGAAGTATATCAACGATTACACAGTGATAAAGGAGGCAGTTTTGAAGCTGCACTATGGCAACAATGGGACCGACAATCCCTAATTATGTTTATCACTGCATTCTTGAATATTGCTCTCCAGTTACCGTGTGAAAGTTCTGCTGTCGTTGTTTCAGGGTTAAGAACATTGGTTCCTCAATCAGATAATGAGGAAGCTTCAACCAACCCGGGGACATGCTCATGGTCTGATGAGGGTACCCCTTAATAAGGCTGACTAAAACACTATATAACCTTCTACTTGATCACAATACTCCGTATACCTATCATCATATATTTAATCAAGACGATATCCTTTAAAACTTATTCAGTACTATAATCACTCTCGTTTCAAATTAATAAGATGTGCATGATTGCCCTAATATATGAAGAGGTATGATACAACCCTAACAGTGATCAAAGAAAATCATAATCTCGTATCGCTCGTAATATAACCTGCCAAGCATACCTCTTGCACAAAGTGATTCTTGTACACAAATAATGTTTTACTCTACAGGAGGTAGCAACGATCCATCCCATCAAAAAATAAGTATTTCATGACTTACTAATGATCTCTTAAAATATTAAGAAAAACTGACGGAACATAAATTCTTTATGCTTCAAGCTGTGGAGGAGGTGTTTGGTATTGGCTATTGTTATATTACAATCAATAACAAGCTTGTAAAAATATTGTTCTTGTTTCAAGAGGTAGATTGTGACCGGAAATGCTAAACTAATGATGAAGATTAATGCGGAGGTCTGATAAGAATAAACCTTATTATTCAGATTAGGCCCCAAGAGGCATTCTTCATCTCCTTTTAGCAAAGTACTATTTCAGGGTAGTCCAATTAGTGGCACGTCTTTTAGCTGTATATCAGTCGCCCCTGAGATACGCCACAAAAGTGTCTCTAAGCTAAATTGGTCTGTACACATCCCATACATTGTATTAGGGGCAATAATATCTAATTGAACTTAGCCGTTTAAAATTTAGTGCATAAATCTGGGCTAACACCACCAGGTCAACTCCATTGGCTGAAAAGAAGCTTACCTACAACGAACATCACTTTGAGCGCCCTCACAATTAAAAAATAGGAACGTCGTTCCAACAATCGAGCGCAAGGTTTCAAGGTTGAACTGAGAGTGTCTAGACAACAAAATATTGATACTCCAGACACCAAGCAAGACCTGAGAAAAAACCATGGCTAAAGCTACGGGACGATACAATCTAATATCGCCCAAAAAGGACCTGGAGAAAGGGGTTGTCTTAAGCGACCTCTGTAACTTCTTAGTTAGCCAAACTATTCAGGGGTGGAAGGTTTATTGGGCTGGTATTGAGTTTGATGTGACTCACAAAGGAATGGCCCTATTGCATAGACTGAAAACTAATGACTTTGCCCCTGCATGGTCAATGACAAGGAATCTCTTTCCTCATTTATTTCAAAATCCGAATTCCACAATTGAATCACCGCTGTGGGCATTGAGAGTCATCCTTGCAGCAGGGATACAGGACCAGCTGATTGACCAGTCTTTGATTGAACCCTTAGCAGGAGCCCTTGGTCTGATCTCTGATTGGCTGCTAACAACCAACACTAACCATTTCAACATGCGAACACAACGTGTCAAGGAACAATTGAGCCTAAAAATGCTGTCGTTGATTCGATCCAATATTCTCAAGTTTATTAACAAATTGGATGCTCTACATGTCGTGAACTACAACGGATTGTTGAGCAGTATTGAAATTGGAACTCAAAATCATACAATCATCATAACTCGAACTAACATGGGTTTTCTGGTGGAGCTCCAAGAACCCGACAAATCGGCAATGAACCGCATGAAGCCTGGGCCGGCGAAATTTTCCCTCCTTCATGAGTCCACACTGAAAGCATTTACACAAGGATCCTCGACACGAATGCAAAGTTTGATTCTTGAATTTAATAGCTCTCTTGCTATCTAACTAAGGTAGAATACTTCATATTGAGCTAACTCATATATGCTGACTCAATAGTTATCTTGACATCTCTGCTTTCATAATCAGATATATAAGCATAATAAATAAATACTCATATTTCTTGATAATTTGTTTAACCACAGATAAATCCTCACTGTAAGCCAGCTTCCAAGTTGACACCCTTACAAAAACCAGGACTCAGAATCCCTCAAACAAGAGATTCCAAGACAACATCATAGAATTGCTTTATTATATGAATAAGCATTTTATCACCAGAAATCCTATATACTAAATGGTTAATTGTAACTGAACCCGCAGGTCACATGTGTTAGGTTTCACAGATTCTATATATTACTAACTCTATACTCGTAATTAACATTAGATAAGTAGATTAAGAAAAAAGCCTGAGGAAGATTAAGAAAAACTGCTTATTGGGTCTTTCCGTGTTTTAGATGAAGCAGTTGAAATTCTTCCTCTTGATATTAAATGGCTACACAACATACCCAATACCCAGACGCTAGGTTATCATCACCAATTGTATTGGACCAATGTGACCTAGTCACTAGAGCTTGCGGGTTATATTCATCATACTCCCTTAATCCGCAACTACGCAACTGTAAACTCCCGAAACATATCTACCGTTTGAAATACGATGTAACTGTTACCAAGTTCTTGAGTGATGTACCAGTGGCGACATTGCCCATAGATTTCATAGTCCCAGTTCTTCTCAAGGCACTGTCAGGCAATGGATTCTGTCCTGTTGAGCCGCGGTGCCAACAGTTCTTAGATGAAATCATTAAGTACACAATGCAAGATGCTCTCTTCTTGAAATATTATCTCAAAAATGTGGGTGCTCAAGAAGACTGTGTTGATGAACACTTTCAAGAGAAAATCTTATCTTCAATTCAGGGCAATGAATTTTTACATCAAATGTTTTTCTGGTATGATCTGGCTATTTTAACTCGAAGGGGTAGATTAAATCGAGGAAACTCTAGATCAACATGGTTTGTTCATGATGATTTAATAGACATCTTAGGCTATGGGGACTATGTTTTTTGGAAGATCCCAATTTCAATGTTACCACTGAACACACAAGGAATCCCCCATGCTGCTATGGACTGGTATCAGGCATCAGTATTCAAAGAAGCGGTTCAAGGGCATACACACATTGTTTCTGTTTCTACTGCCGACGTCTTGATAATGTGCAAAGATTTAATTACATGTCGATTCAACACAACTCTAATCTCAAAAATAGCAGAGATTGAGGATCCAGTTTGTTCTGATTATCCCAATTTTAAGATTGTGTCTATGCTTTACCAGAGCGGAGATTACTTACTCTCCATATTAGGGTCTGATGGGTATAAAATTATTAAGTTCCTCGAACCATTGTGCTTGGCCAAAATTCAATTATGCTCAAAGTACACTGAGAGGAAGGGCCGATTCTTAACACAAATGCATTTAGCTGTAAATCACACCCTAGAAGAAATTACAGAAATGCGTGCACTAAAGCCTTCACAGGCTCAAAAGATCCGTGAATTCCATAGAACATTGATAAGGCTGGAGATGACGCCACAACAACTTTGTGAGCTATTTTCCATTCAAAAACACTGGGGGCATCCTGTGCTACATAGTGAAACAGCAATCCAAAAAGTTAAAAAACATGCTACGGTGCTAAAAGCATTACGCCCTATAGTGATTTTCGAGACATACTGTGTTTTTAAATATAGTATTGCCAAACATTATTTTGATAGTCAAGGATCTTGGTACAGTGTTACTTCAGATAGGAATCTAACACCGGGTCTTAATTCTTATATCAAAAGAAATCAATTCCCTCCGTTGCCAATGATTAAAGAACTACTATGGGAATTTTACCACCTTGACCACCCTCCACTTTTCTCAACCAAAATTATTAGTGACTTAAGTATTTTTATAAAAGACAGAGCTACCGCAGTAGAAAGGACATGCTGGGATGCAGTATTCGAGCCTAATGTTCTAGGATATAATCCACCTCACAAATTTAGTACTAAACGTGTACCGGAACAATTTTTAGAGCAAGAAAACTTTTCTATTGAGAATGTTCTTTCCTACGCACAAAAACTCGAGTATCTACTACCACAATATCGGAACTTTTCTTTCTCATTGAAAGAGAAAGAGTTGAATGTAGGTAGAACCTTCGGAAAATTGCCTTATCCGACTCGCAATGTTCAAACACTTTGTGAAGCTCTGTTAGCTGATGGTCTTGCTAAAGCATTTCCTAGCAATATGATGGTAGTTACGGAACGTGAGCAAAAAGAAAGCTTATTGCATCAAGCATCATGGCACCACACAAGTGATGATTTTGGTGAACATGCCACAGTTAGAGGGAGTAGCTTTGTAACTGATTTAGAGAAATACAATCTTGCATTTAGATATGAGTTTACAGCACCTTTTATAGAATATTGCAACCGTTGCTATGGTGTTAAGAATGTTTTTAATTGGATGCATTATACAATCCCACAGTGTTATATGCATGTCAGTGATTATTATAATCCACCACATAACCTCACACTGGAGAATCGAGACAACCCCCCCGAAGGGCCTAGTTCATACAGGGGTCATATGGGAGGGATTGAAGGACTGCAACAAAAACTCTGGACAAGTATTTCATGTGCTCAAATTTCTTTAGTTGAAATTAAGACTGGTTTTAAGTTACGCTCAGCTGTGATGGGTGACAATCAGTGCATTACTGTTTTATCAGTCTTCCCCTTAGAGACTGACGCAGACGAGCAGGAACAGAGCGCCGAAGACAATGCAGCGAGGGTGGCCGCCAGCCTAGCAAAAGTTACAAGTGCCTGTGGAATCTTTTTAAAACCTGATGAAACATTTGTACATTCAGGTTTTATCTATTTTGGAAAAAAACAATATTTGAATGGGGTCCAATTGCCTCAGTCCCTTAAAACGGCTACAAGAATGGCACCATTGTCTGATGCAATTTTTGATGATCTTCAAGGGACCCTGGCTAGTATAGGCACTGCTTTTGAGCGATCCATCTCTGAGACACGACATATCTTTCCTTGCAGGATAACCGCAGCTTTCCATACGTTTTTTTCGGTGAGAATCTTGCAATATCATCATCTCGGGTTCAATAAAGGTTTTGACCTTGGACAGTTAACACTCGGCAAACCTCTGGATTTCGGAACAATATCATTGGCACTAGCGGTACCGCAGGTGCTTGGAGGGTTATCCTTCTTGAATCCTGAGAAATGTTTCTACCGGAATCTAGGAGATCCAGTTACCTCAGGCTTATTCCAGTTAAAAACTTATCTCCGAATGATTGAGATGGATGATTTATTCTTACCTTTAATTGCGAAGAACCCTGGGAACTGCACTGCCATTGACTTTGTGCTAAATCCTAGCGGATTAAATGTCCCTGGGTCGCAAGACTTAACTTCATTTCTGCGCCAGATTGTACGCAGGACCATCACCCTAAGTGCGAAAAACAAACTTATTAATACCTTATTTCATGCGTCAGCTGACTTCGAAGACGAAATGGTTTGTAAATGGCTATTATCATCAACTCCTGTTATGAGTCGTTTTGCGGCCGATATCTTTTCACGCACGCCGAGCGGGAAGCGATTGCAAATTCTAGGATACCTGGAAGGAACACGCACATTATTAGCCTCTAAGATCATCAACAATAATACAGAGACACCGGTTTTGGACAGACTGAGGAAAATAACATTGCAAAGGTGGAGCCTATGGTTTAGTTATCTTGATCATTGTGATAATATCCTGGCGGAGGCTTTAACCCAAATAACTTGCACAGTTGATTTAGCACAGATTCTGAGGGAATATTCATGGGCTCATATTTTAGAGGGAAGACCTCTTATTGGAGCCACACTCCCATGTATGATTGAGCAATTCAAAGTGTTTTGGCTGAAACCCTACGAACAATGTCCGCAGTGTTCAAATGCAAAGCAACCAGGTGGGAAACCATTCGTGTCAGTGGCAGTCAAGAAACATATTGTTAGTGCATGGCCGAACGCATCCCGAATAAGCTGGACTATCGGGGATGGAATCCCATACATTGGATCAAGGACAGAAGATAAGATAGGACAACCTGCTATTAAACCAAAATGTCCTTCCGCAGCCTTAAGAGAGGCCATTGAATTGGCGTCCCGTTTAACATGGGTAACTCAAGGCAGTTCGAACAGTGACTTGCTAATAAAACCATTTTTGGAAGCACGAGTAAATTTAAGTGTTCAAGAAATACTTCAAATGACCCCTTCACATTACTCAGGAAATATTGTTCACAGGTACAACGATCAATACAGTCCTCATTCTTTCATGGCCAATCGTATGAGTAATTCAGCAACGCGATTGATTGTTTCTACAAACACTTTAGGTGAGTTTTCAGGAGGTGGCCAGTCTGCACGCGACAGCAATATTATTTTCCAGAATGTTATAAATTATGCAGTTGCACTGTTCGATATTAAATTTAGAAACACTGAGGCTACAGATATCCAATATAATCGTGCTCACCTTCATCTAACTAAGTGTTGCACCCGGGAAGTACCAGCTCAGTATTTAACATACACATCTACATTGGATTTAGATTTAACAAGATACCGAGAAAACGAATTGATTTATGACAGTAATCCTCTAAAAGGAGGACTCAATTGCAATATCTCATTCGATAATCCATTTTTCCAAGGTAAACGGCTGAACATTATAGAAGATGATCTTATTCGACTGCCTCACTTATCTGGATGGGAGCTAGCCAAGACCATCATGCAATCAATTATTTCAGATAGCAACAATTCATCTACAGACCCAATTAGCAGTGGAGAAACAAGATCATTCACTACCCATTTCTTAACTTATCCCAAGATAGGACTTCTGTACAGTTTTGGGGCCTTTGTAAGTTATTATCTTGGCAATACAATTCTTCGGACTAAGAAATTAACACTTGACAATTTTTTATATTACTTAACTACTCAAATTCATAATCTACCACATCGCTCATTGCGAATACTTAAGCCAACATTCAAACATGCAAGCGTTATGTCACGGTTAATGAGTATTGATCCTCATTTTTCTATTTACATAGGCGGTGCTGCAGGTGACAGAGGACTCTCAGATGCGGCCAGGTTATTTTTGAGAACGTCCATTTCATCTTTTCTTACATTTGTAAAAGAATGGATAATTAATCGCGGAACAATTGTCCCTTTATGGATAGTATATCCGCTAGAGGGTCAAAACCCAACACCTGTGAATAATTTTCTCTATCAGATCGTAGAACTGCTGGTGCATGATTCATCAAGACAACAGGCTTTTAAAACTACCATAAGTGATCATGTACATCCTCACGACAATCTTGTTTACACATGTAAGAGTACAGCCAGCAATTTCTTCCATGCATCATTGGCGTACTGGAGGAGCAGACACAGAAACAGCAACCGAAAATACTTGGCAAGAGACTCTTCAACTGGATCAAGCACAAACAACAGTGATGGTCATATTGAGAGAAGTCAAGAACAAACCACCAGAGATCCACATGATGGCACTGAACGGAATCTAGTCCTACAAATGAGCCATGAAATAAAAAGAACGACAATTCCACAAGAAAACACGCACCAGGGTCCGTCGTTCCAGTCCTTTCTAAGTGACTCTGCTTGTGGTACAGCAAATCCAAAACTAAATTTCGATCGATCGAGACACAATGTGAAATTTCAGGATCATAACTCGGCATCCAAGAGGGAAGGTCATCAAATAATCTCACACCGTCTAGTCCTACCTTTCTTTACATTATCTCAAGGGACACGCCAATTAACGTCATCCAATGAGTCACAAACCCAAGACGAGATATCAAAGTACTTACGGCAATTGAGATCCGTCATTGATACCACAGTTTATTGTAGATTTACCGGTATAGTCTCGTCCATGCATTACAAACTTGATGAGGTCCTTTGGGAAATAGAGAGTTTCAAGTCGGCTGTGACGCTAGCAGAGGGAGAAGGTGCTGGTGCCTTACTATTGATTCAGAAATACCAAGTTAAGACCTTATTTTTCAACACGCTAGCTACTGAGTCCAGTATAGAGTCAGAAATAGTATCAGGAATGACTACTCCTAGGATGCTTCTACCTGTTATGTCAAAATTCCATAATGACCAAATTGAGATTATTCTTAACAACTCAGCAAGCCAAATAACAGACATAACAAATCCTACTTGGTTTAAAGACCAAAGAGCAAGGCTACCTAAGCAAGTCGAGGTTATAACCATGGATGCAGAGACAACAGAGAATATAAACAGATCGAAATTGTACGAAGCTGTATATAAATTGATCTTACACCATATTGATCCTAGCGTATTGAAAGCAGTGGTCCTTAAAGTCTTTCTAAGTGATACTGAGGGTATGTTATGGCTAAATGATAATTTAGCCCCGTTTTTTGCCACTGGTTATTTAATTAAGCCAATAACGTCAAGTGCTAGATCTAGTGAGTGGTATCTTTGTCTGACGAACTTCTTATCAACTACACGTAAGATGCCACACCAAAACCATCTCAGTTGTAAACAGGTAATACTTACGGCATTGCAACTGCAAATTCAACGAAGCCCATACTGGCTAAGTCATTTAACTCAGTATGCTGACTGTGAGTTACATTTAAGTTATATCCGCCTTGGTTTTCCATCATTAGAGAAAGTACTATACCACAGGTATAACCTCGTCGATTCAAAAAGAGGTCCACTAGTCTCTATCACTCAGCACTTAGCACATCTTAGAGCAGAGATTCGAGAATTAACTAATGATTATAATCAACAGCGACAAAGTCGGACTCAAACATATCACTTTATTCGTACTGCAAAAGGACGAATCACAAAACTAGTCAATGATTATTTAAAATTCTTTCTTATTGTGCAAGCATTAAAACATAATGGGACATGGCAAGCTGAGTTTAAGAAATTACCAGAGTTGATTAGTGTGTGCAATAGGTTCTACCATATTAGAGATTGCAATTGTGAAGAACGTTTCTTAGTTCAAACCTTATATTTACATAGAATGCAGGATTCTGAAGTTAAGCTTATCGAAAGGCTGACAGGGCTTCTGAGTTTATTTCCGGATGGTCTCTACAGGTTTGATTGAATTACCGTGCATAGTATCCTGATACTTGCAAAGGTTGGTTATTAACATACAGATTATAAAAAACTCATAAATTGCTCTCATACATCATATTGATCTAATCTCAATAAACAACTATTTAAATAACGAAAGGAGTCCCTATATTATATACTATATTTAGCCTCTCTCCCTGCGTGATAATCAAAAAATTCACAATGCAGCATGTGTGACATATTACTGCCGCAATGAATTTAACGCAACATAATAAACTCTGCACTCTTTATAATTAAGCTTTAACGAAAGGTCTGGGCTCATATTGTTATTGATATAATAATGTTGTATCAATATCCTGTCAGATGGAATAGTGTTTTGGTTGATAACACAACTTCTTAAAACAAAATTGATCTTTAAGATTAAGTTTTTTATAATTATCATTACTTTAATTTGTCGTTTTAAAAACGGTGATAGCCTTAATCTTTGTGTAAAATAAGAGATTAGGTGTAATAACCTTAACATTTTTGTCTAGTAAGCTACTATTTCATACAGAATGATAAAATTAAAAGAAAAGGCAGGACTGTAAAATCAGAAATACCTTCTTTACAATATAGCAGACTAGATAATAATCTTCGTGTTAATGATAATTAAGACATTGACCACGCTCATCAGAAGGCTCGCCAGAATAAACGTTGCAAAAAGGATTCCTGGAAAAATGGTCGCACACAAAAATTTAAAAATAAATCTATTTCTTCTTTTTTGTGTGTCCA + genes: + - name: NP + sequence: MDSRPQKIWMAPSLTESDMDYHKILTAGLSVQQGIVRQRVIPVYQVNNLEEICQLIIQAFEAGVDFQESADSFLLMLCLHHAYQGDYKLFLESGAVKYLEGHGFRFEVKKRDGVKRLEELLPAVSSGKNIKRTLAAMPEEETTEANAGQFLSFASLFLPKLVVGEKACLEKVQRQIQVHAEQGLIQYPTAWQSVGHMMVIFRLMRTNFLIKFLLIHQGMHMVAGHDANDAVISNSVAQARFSGLLIVKTVLDHILQKTERGVRLHPLARTAKVKNEVNSFKAALSSLAKHGEYAPFARLLNLSGVNNLEHGLFPQLSAIALGVATAHGSTLAGVNVGEQYQQLREAATEAEKQLQQYAESRELDHLGLDDQEKKILMNFHQKKNEISFQQTNAMVTLRKERLAKLTEAITAASLPKTSGHYDDDDDIPFPGPINDDDNPGHQDDDPTDSQDTTIPDVVVDPDDGSYGEYQSYSENGMNAPDDLVLFDLDEDDEDTKPVPNRSTKGGQQKNSQKGQHIEGRQTQSRPIQNVPGPHRTIHHASAPLTDNDRRNEPSGSTSPRMLTPINEEADPLDDADDETSSLPPLESDDEEQDRDGTSNRTPTVAPPAPVYRDHSEKKELPQDEQQDQDHTQEARNQDSDNTQSEHSFEEMYRHILRSQGPFDAVLYYHMMKDEPVVFSTSDGKEYTYPDSLEEEYPPWLTEKEAMNEENRFVTLDGQQFYWPVMNHKNKFMAILQHHQ* + - name: VP35 + sequence: MTTRTKGRGHTAATTQNDRMPGPELSGWISEQLMTGRIPVSDIFCDIENNPGLCYASQMQQTKPNPKTRNSQTQTDPICNHSFEEVVQTLASLATVVQQQTIASESLEQRITSLENGLKPVYDMAKTISSLNRVCAEMVAKYDLLVMTTGRATATAAATEAYWAEHGQPPPGPSLYEESAIRGKIESRDETVPQSVREAFNNLNSTTSLTEENFGKPDISAKDLRNIMYDHLPGFGTAFHQLVQVICKLGKDSNSLDIIHAEFQASLAEGDSPQCALIQITKRVPIFQDAAPPVIHIRSRGDIPRACQKSLRPVPPSPKIDRGWVCVFQLQDGKTLGLKI* + - name: VP40 + sequence: MRRVILPTAPPEYMEAIYPVRSNSTIARGGNSNTGFLTPESVNGDTPSNPLRPIADDTIDHASHTPGSVSSAFILEAMVNVISGPKVLMKQIPIWLPLGVADQKTYSFDSTTAAIMLASYTITHFGKATNPLVRVNRLGPGIPDHPLRLLRIGNQAFLQEFVLPPVQLPQYFTFDLTALKLITQPLPAATWTDDTPTGSNGALRPGISFHPKLRPILLPNKSGKKGNSADLTSPEKIQAIMTSLQDFKIVPIDPTKNIMGIEVPETLVHKLTGKKVTSKNGQPIIPVLLPKYIGLDPVAPGDLTMVITQDCDTCHSPASLPAVIEK* + - name: GP + sequence: MGVTGILQLPRDRFKRTSFFLWVIILFQRTFSIPLGVIHNSTLQVSDVDKLVCRDKLSSTNQLRSVGLNLEGNGVATDVPSATKRWGFRSGVPPKVVNYEAGEWAENCYNLEIKKPDGSECLPAAPDGIRGFPRCRYVHKVSGTGPCAGDFAFHKEGAFFLYDRLASTVIYRGTTFAEGVVAFLILPQAKKDFFSSHPLREPVNATEDPSSGYYSTTIRYQATGFGTNETEYLFEVDNLTYVQLESRFTPQFLLQLNETIYTSGKRSNTTGKLIWKVNPEIDTTIGEWAFWETKKNLTRKIRSEELSFTVVSNGAKNISGQSPARTSSDPGTNTTTEDHKIMASENSSAMVQVHSQGREAAVSHLTTLATISTSPQSLTTKPGPDNSTHNTPVYKLDISEATQVEQHHRRTDNDSTASDTPSATTAAGPPKAENTNTSKSTDFLDPATTTSPQNHSETAGNNNTHHQDTGEESASSGKLGLITNTIAGVAGLITGGRRTRREAIVNAQPKCNPNLHYWTTQDEGAAIGLAWIPYFGPAAEGIYIEGLMHNQDGLICGLRQLANETTQALQLFLRATTELRTFSILNRKAIDFLLQRWGGTCHILGPDCCIEPHDWTKNITDKIDQIIHDFVDKTLPDQGDNDNWWTGWRQWIPAGIGVTGVIIAVIALFCICKFVF* + - name: ssGP + sequence: MGVTGILQLPRDRFKRTSFFLWVIILFQRTFSIPLGVIHNSTLQVSDVDKLVCRDKLSSTNQLRSVGLNLEGNGVATDVPSATKRWGFRSGVPPKVVNYEAGEWAENCYNLEIKKPDGSECLPAAPDGIRGFPRCRYVHKVSGTGPCAGDFAFHKEGAFFLYDRLASTVIYRGTTFAEGVVAFLILPQAKKDFFSSHPLREPVNATEDPSSGYYSTTIRYQATGFGTNETEYLFEVDNLTYVQLESRFTPQFLLQLNETIYTSGKRSNTTGKLIWKVNPEIDTTIGEWAFWETKKPH* + - name: sGP + sequence: MGVTGILQLPRDRFKRTSFFLWVIILFQRTFSIPLGVIHNSTLQVSDVDKLVCRDKLSSTNQLRSVGLNLEGNGVATDVPSATKRWGFRSGVPPKVVNYEAGEWAENCYNLEIKKPDGSECLPAAPDGIRGFPRCRYVHKVSGTGPCAGDFAFHKEGAFFLYDRLASTVIYRGTTFAEGVVAFLILPQAKKDFFSSHPLREPVNATEDPSSGYYSTTIRYQATGFGTNETEYLFEVDNLTYVQLESRFTPQFLLQLNETIYTSGKRSNTTGKLIWKVNPEIDTTIGEWAFWETKKTSLEKFAVKSCLSQLYQTEPKTSVVRVRRELLPTQGPTQQLKTTKSWLQKIPLQWFKCTVKEGKLQCRI* + - name: VP30 + sequence: MEASYERGRPRAARQHSRDGHDHHVRARSSSRENYRGEYRQSRSASQVRVPTVFHKKRVEPLTVPPAPKDICPTLKKGFLCDSSFCKKDHQLESLTDRELLLLIARKTCGSVEQQLNITAPKDSRLANPTADDFQQEEGPKITLLTLIKTAEHWARQDIRTIEDSKLRALLTLCAVMTRKFSKSQLSLLCETHLRREGLGQDQAEPVLEVYQRLHSDKGGSFEAALWQQWDRQSLIMFITAFLNIALQLPCESSAVVVSGLRTLVPQSDNEEASTNPGTCSWSDEGTP* + - name: VP24 + sequence: MAKATGRYNLISPKKDLEKGVVLSDLCNFLVSQTIQGWKVYWAGIEFDVTHKGMALLHRLKTNDFAPAWSMTRNLFPHLFQNPNSTIESPLWALRVILAAGIQDQLIDQSLIEPLAGALGLISDWLLTTNTNHFNMRTQRVKEQLSLKMLSLIRSNILKFINKLDALHVVNYNGLLSSIEIGTQNHTIIITRTNMGFLVELQEPDKSAMNRMKPGPAKFSLLHESTLKAFTQGSSTRMQSLILEFNSSLAI* + - name: L + sequence: MATQHTQYPDARLSSPIVLDQCDLVTRACGLYSSYSLNPQLRNCKLPKHIYRLKYDVTVTKFLSDVPVATLPIDFIVPVLLKALSGNGFCPVEPRCQQFLDEIIKYTMQDALFLKYYLKNVGAQEDCVDEHFQEKILSSIQGNEFLHQMFFWYDLAILTRRGRLNRGNSRSTWFVHDDLIDILGYGDYVFWKIPISMLPLNTQGIPHAAMDWYQASVFKEAVQGHTHIVSVSTADVLIMCKDLITCRFNTTLISKIAEIEDPVCSDYPNFKIVSMLYQSGDYLLSILGSDGYKIIKFLEPLCLAKIQLCSKYTERKGRFLTQMHLAVNHTLEEITEMRALKPSQAQKIREFHRTLIRLEMTPQQLCELFSIQKHWGHPVLHSETAIQKVKKHATVLKALRPIVIFETYCVFKYSIAKHYFDSQGSWYSVTSDRNLTPGLNSYIKRNQFPPLPMIKELLWEFYHLDHPPLFSTKIISDLSIFIKDRATAVERTCWDAVFEPNVLGYNPPHKFSTKRVPEQFLEQENFSIENVLSYAQKLEYLLPQYRNFSFSLKEKELNVGRTFGKLPYPTRNVQTLCEALLADGLAKAFPSNMMVVTEREQKESLLHQASWHHTSDDFGEHATVRGSSFVTDLEKYNLAFRYEFTAPFIEYCNRCYGVKNVFNWMHYTIPQCYMHVSDYYNPPHNLTLENRDNPPEGPSSYRGHMGGIEGLQQKLWTSISCAQISLVEIKTGFKLRSAVMGDNQCITVLSVFPLETDADEQEQSAEDNAARVAASLAKVTSACGIFLKPDETFVHSGFIYFGKKQYLNGVQLPQSLKTATRMAPLSDAIFDDLQGTLASIGTAFERSISETRHIFPCRITAAFHTFFSVRILQYHHLGFNKGFDLGQLTLGKPLDFGTISLALAVPQVLGGLSFLNPEKCFYRNLGDPVTSGLFQLKTYLRMIEMDDLFLPLIAKNPGNCTAIDFVLNPSGLNVPGSQDLTSFLRQIVRRTITLSAKNKLINTLFHASADFEDEMVCKWLLSSTPVMSRFAADIFSRTPSGKRLQILGYLEGTRTLLASKIINNNTETPVLDRLRKITLQRWSLWFSYLDHCDNILAEALTQITCTVDLAQILREYSWAHILEGRPLIGATLPCMIEQFKVFWLKPYEQCPQCSNAKQPGGKPFVSVAVKKHIVSAWPNASRISWTIGDGIPYIGSRTEDKIGQPAIKPKCPSAALREAIELASRLTWVTQGSSNSDLLIKPFLEARVNLSVQEILQMTPSHYSGNIVHRYNDQYSPHSFMANRMSNSATRLIVSTNTLGEFSGGGQSARDSNIIFQNVINYAVALFDIKFRNTEATDIQYNRAHLHLTKCCTREVPAQYLTYTSTLDLDLTRYRENELIYDSNPLKGGLNCNISFDNPFFQGKRLNIIEDDLIRLPHLSGWELAKTIMQSIISDSNNSSTDPISSGETRSFTTHFLTYPKIGLLYSFGAFVSYYLGNTILRTKKLTLDNFLYYLTTQIHNLPHRSLRILKPTFKHASVMSRLMSIDPHFSIYIGGAAGDRGLSDAARLFLRTSISSFLTFVKEWIINRGTIVPLWIVYPLEGQNPTPVNNFLYQIVELLVHDSSRQQAFKTTISDHVHPHDNLVYTCKSTASNFFHASLAYWRSRHRNSNRKYLARDSSTGSSTNNSDGHIERSQEQTTRDPHDGTERNLVLQMSHEIKRTTIPQENTHQGPSFQSFLSDSACGTANPKLNFDRSRHNVKFQDHNSASKREGHQIISHRLVLPFFTLSQGTRQLTSSNESQTQDEISKYLRQLRSVIDTTVYCRFTGIVSSMHYKLDEVLWEIESFKSAVTLAEGEGAGALLLIQKYQVKTLFFNTLATESSIESEIVSGMTTPRMLLPVMSKFHNDQIEIILNNSASQITDITNPTWFKDQRARLPKQVEVITMDAETTENINRSKLYEAVYKLILHHIDPSVLKAVVLKVFLSDTEGMLWLNDNLAPFFATGYLIKPITSSARSSEWYLCLTNFLSTTRKMPHQNHLSCKQVILTALQLQIQRSPYWLSHLTQYADCELHLSYIRLGFPSLEKVLYHRYNLVDSKRGPLVSITQHLAHLRAEIRELTNDYNQQRQSRTQTYHFIRTAKGRITKLVNDYLKFFLIVQALKHNGTWQAEFKKLPELISVCNRFYHIRDCNCEERFLVQTLYLHRMQDSEVKLIERLTGLLSLFPDGLYRFD* defaultOrganisms: + ebola-zaire: + <<: *defaultOrganismConfig + preprocessing: + - <<: *preprocessing + configFile: + <<: *preprocessingConfigFile + taxon_id: 186538 + nextclade_dataset_server: https://raw.githubusercontent.com/nextstrain/nextclade_data/ebola/data_output + mpox: + <<: *defaultOrganismConfig + schema: + <<: *schema + instanceName: "Mpox Virus" + image: "https://cdn.who.int/media/images/default-source/health-topics/monkeypox/12763.tmb-1200v.jpg?sfvrsn=cd044fbd_37" + description: "Mpox, formerly monkeypox, is a rare viral disease that occurs mainly in remote parts of Central and West Africa, near tropical rainforests. Recently global outbreaks have occurred." + metadataAdd: + - name: clade + header: "Clade & Lineage" + noInput: true + generateIndex: true + autocomplete: true + initiallyVisible: true + preprocessing: + inputs: { input: nextclade.clade } + - name: outbreak + header: "Clade & Lineage" + noInput: true + generateIndex: true + autocomplete: true + preprocessing: + inputs: { input: nextclade.customNodeAttributes.outbreak } + - name: lineage + header: "Clade & Lineage" + noInput: true + generateIndex: true + autocomplete: true + initiallyVisible: true + preprocessing: + inputs: { input: nextclade.customNodeAttributes.lineage } + website: + <<: *website + tableColumns: + - sample_collection_date + - ncbi_release_date + - authors + - author_affiliations + - geo_loc_country + - length + - clade + - lineage + preprocessing: + - <<: *preprocessing + configFile: + <<: *preprocessingConfigFile + nextclade_dataset_name: nextstrain/mpox/all-clades + batch_size: 5 + genes: + - OPG001 + ingest: + <<: *ingest + configFile: + <<: *ingestConfigFile + taxon_id: 10244 + subsample_fraction: 0.1 + referenceGenomes: + nucleotideSequences: + - name: "main" + sequence: "[[URL:https://cov2tree.nyc3.cdn.digitaloceanspaces.com/mpox]]" + genes: + - name: OPG001 + sequence: MKQYIVLACMCLVAAAMPTSLQQSSSSCTEEENKHHMGIDVIIKVTKQDQTPTNDKICQSVTEVTETEDDEVSEEVVKGDPTTYYTIVGAGLNMNFGFTKCPKISSISESSDGNTVNTRLSSVSPGQGKDSPAITREEALAMIKDCEMSIDIRCSEEEKDSDIKTHPVLGSNISHKKVSYKDIIGSTIVDTKCVKNLEFSVRIGDMCEESSELEVKDGFKYVDGSASEGATDDTSLIDSTKLKACV* + west-nile: + <<: *defaultOrganismConfig + schema: + <<: *schema + instanceName: "West Nile Virus" + image: "https://upload.wikimedia.org/wikipedia/commons/thumb/1/1e/West_Nile_Virus_Image.jpg/256px-West_Nile_Virus_Image.jpg?20200815184100" + description: "West Nile Virus (WNV) is a mosquito-borne flavivirus of the family Flaviviridae, which also contains the Zika virus, dengue virus, and yellow fever virus. It is primarily transmitted by Culex mosquitoes, which acquire the virus by feeding on infected birds." + metadataAdd: + - name: lineage + header: "Lineage" + noInput: true + generateIndex: true + autocomplete: true + initiallyVisible: true + preprocessing: + inputs: { input: nextclade.clade } + website: + <<: *website + tableColumns: + - sample_collection_date + - ncbi_release_date + - authors + - author_affiliations + - geo_loc_country + - geo_loc_admin_1 + - length + - lineage + preprocessing: + - <<: *preprocessing + configFile: + <<: *preprocessingConfigFile + nextclade_dataset_name: nextstrain/wnv/all-lineages + nextclade_dataset_server: https://raw.githubusercontent.com/nextstrain/nextclade_data/wnv/data_output + genes: [ capsid, prM, env, NS1, NS2A, NS2B, NS3, NS4A, 2K, NS4B, NS5 ] + ingest: + <<: *ingest + configFile: + taxon_id: 3048448 + referenceGenomes: + nucleotideSequences: + - name: main + sequence: "AGTAGTTCGCCTGTGTGAGCTGACAAACTTAGTAGTGTTTGTGAGGATTAACAACAATTAACACAGTGCGAGCTGTTTCTTAGCACGAAGATCTCGATGTCTAAGAAACCAGGAGGGCCCGGCAAGAGCCGGGCTGTCAATATGCTAAAACGCGGAATGCCCCGCGTGTTGTCCTTGATTGGACTGAAGAGGGCTATGTTGAGCCTGATCGACGGCAAGGGGCCAATACGATTTGTGTTGGCTCTCTTGGCGTTCTTCAGGTTCACAGCAATTGCTCCGACCCGAGCAGTGCTGGATCGATGGAGAGGTGTGAACAAACAAACAGCGATGAAACACCTTCTGAGTTTTAAGAAGGAACTAGGGACCTTGACCAGTGCTATCAATCGGCGGAGCTCAAAACAAAAGAAAAGAGGAGGAAAGACCGGAATTGCAGTCATGATTGGCCTGATCGCCAGCGTAGGAGCAGTTACCCTCTCTAACTTCCAAGGGAAGGTGATGATGACGGTAAATGCTACTGACGTCACAGATGTCATCACGATTCCAACAGCTGCTGGAAAGAACCTATGCATTGTCAGAGCAATGGATGTGGGATACATGTGCGATGATACTATCACTTATGAATGCCCAGTACTGTCGGCTGGTAATGATCCAGAAGACATCGACTGTTGGTGCACAAAGTCAGCAGTCTACGTCAGGTATGGAAGATGCACCAAGACACGCCACTCAAGACGCAGTCGGAGGTCACTGACAGTGCAGACACACGGAGAAAGCACTCTAGCGAACAAGAAGGGGGCTTGGATGGACAGCACCAAGGCCACAAGGTATTTGGTAAAAACAGAATCATGGATCTTGAGGAACCCTGGATATGCCCTGGTGGCAGCCGTCATTGGTTGGATGCTTGGGAGCAACACCATGCAGAGAGTTGTGTTTGTCGTGCTATTGCTTTTGGTGGCCCCAGCTTACAGCTTCAACTGCCTTGGAATGAGCAACAGAGACTTCTTGGAAGGAGTGTCTGGAGCAACATGGGTGGATTTGGTTCTCGAAGGCGACAGCTGCGTGACTATCATGTCTAAGGACAAGCCTACCATCGATGTGAAGATGATGAATATGGAGGCGGCCAACCTGGCAGAGGTCCGCAGTTATTGCTATTTGGCTACCGTCAGCGATCTCTCCACCAAAGCTGCGTGCCCGACCATGGGAGAAGCTCACAATGACAAACGTGCTGACCCAGCTTTTGTGTGCAGACAAGGAGTGGTGGACAGGGGCTGGGGCAACGGCTGCGGACTATTTGGCAAAGGAAGCATTGACACATGCGCCAAATTTGCCTGCTCTACCAAGGCAATAGGAAGAACCATCTTGAAAGAGAATATCAAGTACGAAGTGGCCATTTTTGTCCATGGACCAACTACTGTGGAGTCGCACGGAAACTACTCCACACAGGTTGGAGCCACTCAGGCAGGGAGACTCAGCATCACTCCTGCGGCGCCTTCATACACACTAAAGCTTGGAGAATATGGAGAGGTGACAGTGGACTGTGAACCACGGTCAGGGATTGACACCAATGCATACTACGTGATGACTGTTGGAACAAAGACGTTCTTGGTCCATCGTGAGTGGTTCATGGACCTCAACCTCCCTTGGAGCAGTGCTGGAAGTACTGTGTGGAGGAACAGAGAGACGTTAATGGAGTTTGAGGAACCACACGCCACGAAGCAGTCTGTGATAGCATTGGGCTCACAAGAGGGAGCTCTGCATCAAGCTTTGGCTGGAGCCATTCCTGTGGAATTTTCAAGCAACACTGTCAAGTTGACGTCGGGTCATTTGAAGTGTAGAGTGAAGATGGAAAAATTGCAGTTGAAGGGAACAACCTATGGCGTCTGTTCAAAGGCTTTCAAGTTTCTTGGGACTCCCGCAGACACAGGTCACGGCACTGTGGTGTTGGAATTGCAGTACACTGGCACGGATGGACCTTGCAAAGTTCCTATCTCGTCAGTGGCTTCATTGAACGACCTAACGCCAGTGGGCAGATTGGTCACTGTCAACCCTTTTGTTTCAGTGGCCACGGCCAACGCTAAGGTCCTGATTGAATTGGAACCACCCTTTGGAGACTCATACATAGTGGTGGGCAGAGGAGAACAACAGATCAATCACCATTGGCACAAGTCTGGAAGCAGCATTGGCAAAGCCTTTACAACCACCCTCAAAGGAGCGCAGAGACTAGCCGCTCTAGGAGACACAGCTTGGGACTTTGGATCAGTTGGAGGGGTGTTCACCTCAGTTGGGAAGGCTGTCCATCAAGTGTTCGGAGGAGCATTCCGCTCACTGTTCGGAGGCATGTCCTGGATAACGCAAGGATTGCTGGGGGCTCTCCTGTTGTGGATGGGCATCAATGCTCGTGATAGGTCCATAGCTCTCACGTTTCTCGCAGTTGGAGGAGTTCTGCTCTTCCTCTCCGTGAACGTGCACGCTGACACTGGGTGTGCCATAGACATCAGCCGGCAAGAGCTGAGATGTGGAAGTGGAGTGTTCATACACAATGATGTGGAGGCTTGGATGGACCGGTACAAGTATTACCCTGAAACGCCACAAGGCCTAGCCAAGATCATTCAGAAAGCTCATAAGGAAGGAGTGTGCGGTCTACGATCAGTTTCCAGACTGGAGCATCAAATGTGGGAAGCAGTGAAGGACGAGCTGAACACTCTTTTGAAGGAGAATGGTGTGGACCTTAGTGTCGTGGTTGAGAAACAGGAGGGAATGTACAAGTCAGCACCTAAACGCCTCACCGCCACCACGGAAAAATTGGAAATTGGCTGGAAGGCCTGGGGAAAGAGTATTTTATTTGCACCAGAACTCGCCAACAACACCTTTGTGGTTGATGGTCCGGAGACCAAGGAATGTCCGACTCAGAATCGCGCTTGGAATAGCTTAGAAGTGGAGGATTTTGGATTTGGTCTCACCAGCACTCGGATGTTCCTGAAGGTCAGAGAGAGCAACACAACTGAATGTGACTCGAAGATCATTGGAACGGCTGTCAAGAACAACTTGGCGATCCACAGTGACCTGTCCTATTGGATTGAAAGCAGGCTCAATGATACGTGGAAGCTTGAAAGGGCAGTTCTGGGTGAAGTCAAATCATGTACGTGGCCTGAGACGCATACCTTGTGGGGCGATGGAATCCTTGAGAGTGACTTGATAATACCAGTCACACTGGCGGGACCACGAAGCAATCACAATCGGAGACCTGGGTACAAGACACAAAACCAGGGCCCATGGGACGAAGGCCGGGTAGAGATTGACTTCGATTACTGCCCAGGAACTACGGTCACCCTGAGTGAGAGCTGCGGACACCGTGGACCTGCCACTCGCACCACCACAGAGAGCGGAAAGTTGATAACAGATTGGTGCTGCAGGAGCTGCACCTTACCACCACTGCGCTACCAAACTGACAGCGGCTGTTGGTATGGTATGGAGATCAGACCACAGAGACATGATGAAAAGACCCTCGTGCAGTCACAAGTGAATGCTTATAATGCTGATATGATTGACCCTTTTCAGTTGGGCCTTCTGGTCGTGTTCTTGGCCACCCAGGAGGTCCTTCGCAAGAGGTGGACAGCCAAGATCAGCATGCCAGCTATACTGATTGCTCTGCTAGTCCTGGTGTTTGGGGGCATTACTTACACTGATGTGTTACGCTATGTCATCTTGGTGGGGGCAGCTTTCGCAGAATCTAATTCGGGAGGAGACGTGGTACACTTGGCGCTCATGGCGACCTTCAAGATACAACCAGTGTTTATGGTGGCATCGTTTCTCAAAGCGAGATGGACCAACCAGGAGAACATTTTGTTGATGTTGGCGGCTGTTTTCTTTCAAATGGCTTATCACGATGCCCGCCAAATTCTGCTCTGGGAGATCCCTGATGTGTTGAATTCACTGGCGGTAGCTTGGATGATACTGAGAGCCATAACATTCACAACGACATCAAACGTGGTTGTTCCGCTGCTAGCCCTGCTAACACCCGGGCTGAGATGCTTGAATCTGGATGTGTACAGGATACTGCTGTTGATGGTCGGAATAGGCAGCTTGATCAGGGAGAAGAGGAGTGCAGCCGCAAAAAAGAAAGGAGCAAGTCTGCTATGCTTGGCTCTAGCCTCAACAGGACTTTTCAACCCCATGATCCTTGCTGCTGGACTGATTGCATGTGATCCCAACCGTAAACGCGGATGGCCCGCAACTGAAGTGATGACAGCTGTCGGCCTAATGTTTGCCATCGTCGGAGGGCTGGCAGAGCTTGACATTGACTCCATGGCCATTCCAATGACTATCGCGGGGCTCATGTTTGCTGCTTTCGTGATTTCTGGGAAATCAACAGATATGTGGATTGAGAGAACGGCGGACATTTCCTGGGAAAGTGATGCAGAAATTACAGGCTCGAGCGAAAGAGTTGATGTGCGGCTTGATGATGATGGAAACTTCCAGCTCATGAATGATCCAGGAGCACCTTGGAAGATATGGATGCTCAGAATGGTCTGTCTCGCGATTAGTGCGTACACCCCCTGGGCAATCTTGCCCTCAGTAGTTGGATTTTGGATAACTCTCCAATACACAAAGAGAGGAGGCGTGTTGTGGGACACTCCCTCACCAAAGGAGTACAAAAAGGGGGACACGACCACCGGCGTCTACAGGATCATGACTCGTGGGCTGCTCGGCAGTTATCAAGCAGGAGCGGGCGTGATGGTTGAAGGTGTTTTCCACACCCTTTGGCATACAACAAAAGGAGCCGCTTTGATGAGCGGAGAGGGCCGCCTGGACCCATACTGGGGCAGTGTCAAGGAGGATCGACTTTGTTACGGAGGACCCTGGAAATTGCAGCACAAGTGGAACGGGCAGGATGAGGTGCAGATGATTGTGGTGGAACCTGGCAAGAACGTTAAGAACGTCCAGACGAAACCAGGGGTGTTCAAAACACCTGAAGGAGAAATCGGGGCCGTGACTTTGGACTTCCCCACTGGAACATCAGGCTCACCAATAGTGGACAAAAACGGTGATGTGATTGGGCTTTATGGCAATGGAGTCATAATGCCCAACGGCTCATACATAAGCGCGATAGTGCAGGGTGAAAGGATGGATGAGCCAATCCCAGCCGGATTCGAACCTGAGATGCTGAGGAAAAAACAGATCACTGTACTGGATCTCCATCCCGGCGCCGGTAAAACAAGGAGGATTCTGCCACAGATCATCAAAGAGGCCATAAACAGAAGACTGAGAACAGCCGTGCTAGCGCCAACCAGGGTTGTGGCTGCTGAGATGGCTGAAGCACTGAGAGGACTGCCCATCCGGTACCAGACATCCGCAGTGCCCAGAGAACATAATGGAAATGAGATTGTTGATGTCATGTGTCATGCTACCCTCACCCACAGGCTGATGTCTCCTCACAGGGTGCCGAACTACAACCTGTTCGTGATGGATGAGGCTCATTTCACCGACCCAGCTAGCATTGCAGCAAGAGGTTACATTTCCACAAAGGTCGAGCTAGGGGAGGCGGCGGCAATATTCATGACAGCCACCCCACCAGGCACTTCAGATCCATTCCCAGAGTCCAATTCACCAATTTCCGACTTACAGACTGAGATCCCGGATCGAGCTTGGAACTCTGGATACGAATGGATCACAGAATACACCGGGAAGACGGTTTGGTTTGTGCCTAGTGTCAAGATGGGGAATGAGATTGCCCTTTGCCTACAACGTGCTGGAAAGAAAGTAGTCCAATTGAACAGAAAGTCGTACGAGACGGAGTACCCAAAATGTAAGAACGATGATTGGGACTTTGTTATCACAACAGACATATCTGAAATGGGGGCTAACTTCAAGGCGAGCAGGGTGATTGACAGCCGGAAGAGTGTGAAACCAACCATCATAACAGAAGGAGAAGGGAGAGTGATCCTGGGAGAACCATCTGCAGTGACAGCAGCTAGTGCCGCCCAGAGACGTGGACGTATCGGTAGAAATCCGTCGCAAGTTGGTGATGAGTACTGTTATGGGGGGCACACGAATGAAGACGACTCGAACTTCGCCCATTGGACTGAGGCACGAATCATGCTGGACAACATCAACATGCCAAACGGACTGATCGCTCAATTCTACCAACCAGAGCGTGAGAAGGTATATACCATGGATGGGGAATACCGGCTCAGAGGAGAAGAGAGAAAAAACTTTCTGGAACTGTTGAGGACTGCAGATCTGCCAGTTTGGCTGGCTTACAAGGTTGCAGCGGCTGGAGTGTCATACCACGACCGGAGGTGGTGCTTTGATGGTCCTAGGACAAACACAATTTTAGAAGACAACAACGAAGTGGAAGTCATCACGAAGCTTGGTGAAAGGAAGATTCTGAGGCCGCGCTGGATTGATGCCAGGGTGTACTCGGATCACCAGGCACTAAAGGCGTTCAAGGACTTCGCCTCGGGAAAACGTTCTCAGATAGGGCTCATTGAGGTTCTGGGAAAGATGCCTGAGCACTTCATGGGGAAGACATGGGAAGCACTTGACACCATGTACGTTGTGGCCACTGCAGAGAAAGGAGGAAGAGCTCACAGAATGGCCCTGGAGGAACTGCCAGATGCTCTTCAGACAATTGCCTTGATTGCCTTATTGAGTGTGATGACCATGGGAGTATTCTTCCTCCTCATGCAGCGGAAGGGCATTGGAAAGATAGGTTTGGGAGGCGCTGTCTTGGGAGTCGCGACCTTTTTCTGTTGGATGGCTGAAGTTCCAGGAACGAAGATCGCCGGAATGTTGCTGCTCTCCCTTCTCTTGATGATTGTGCTAATTCCTGAGCCAGAGAAGCAACGTTCGCAGACAGACAACCAGCTAGCCGTGTTCCTGATTTGTGTCATGACCCTTGTGAGCGCAGTGGCAGCCAACGAGATGGGTTGGCTAGATAAGACCAAGAGTGACATAAGCAGTTTGTTTGGGCAAAGAATTGAGGTCAAGGAGAATTTCAGCATGGGAGAGTTTCTTCTGGACTTGAGGCCGGCAACAGCCTGGTCACTGTACGCTGTGACAACAGCGGTCCTCACTCCACTGCTAAAGCATTTGATCACGTCAGATTACATCAACACCTCATTGACCTCAATAAACGTTCAGGCAAGTGCACTATTCACACTCGCGCGAGGCTTCCCCTTCGTCGATGTTGGAGTGTCGGCTCTCCTGCTAGCAGCCGGATGCTGGGGACAAGTCACCCTCACCGTTACGGTAACAGCGGCAACACTCCTTTTTTGCCACTATGCCTACATGGTTCCCGGTTGGCAAGCTGAGGCAATGCGCTCAGCCCAGCGGCGGACAGCGGCCGGAATCATGAAGAACGCTGTAGTGGATGGCATCGTGGCCACGGACGTCCCAGAATTAGAGCGCACCACACCCATCATGCAGAAGAAAGTTGGACAGATCATGCTGATCTTGGTGTCTCTAGCTGCAGTAGTAGTGAACCCGTCTGTGAAGACAGTACGAGAAGCCGGAATTTTGATCACGGCCGCAGCGGTGACGCTTTGGGAGAATGGAGCAAGCTCTGTTTGGAACGCAACAACTGCCATCGGACTCTGCCACATCATGCGTGGGGGTTGGTTGTCATGTCTATCCATAACATGGACACTCATAAAGAACATGGAAAAACCAGGACTAAAAAGAGGTGGGGCAAAAGGACGCACCTTGGGAGAGGTTTGGAAAGAAAGACTCAACCAGATGACAAAAGAAGAGTTCACTAGGTACCGCAAAGAGGCCATCATCGAAGTCGATCGCTCAGCGGCAAAACACGCCAGGAAAGAAGGCAATGTCACTGGAGGGCATCCAGTCTCTAGGGGCACAGCAAAACTGAGATGGCTGGTCGAACGGAGGTTTCTCGAACCGGTCGGAAAAGTGATTGACCTTGGATGTGGAAGAGGCGGTTGGTGTTACTATATGGCAACCCAAAAAAGAGTCCAAGAAGTCAGAGGGTACACAAAGGGCGGTCCCGGACATGAAGAGCCCCAACTAGTGCAAAGTTATGGATGGAACATTGTCACCATGAAGAGTGGAGTGGATGTGTTCTACAGACCTTCTGAGTGTTGTGACACCCTCCTTTGTGACATCGGAGAGTCCTCGTCAAGTGCTGAGGTTGAAGAGCATAGGACGATTCGGGTCCTTGAAATGGTTGAGGACTGGCTGCACCGAGGGCCAAGGGAATTTTGCGTGAAGGTGCTCTGTCCCTACATGCCGAAAGTCATAGAGAAGATGGAGCTGCTCCAACGCCGGTATGGGGGGGGACTGGTCAGAAACCCACTCTCACGGAATTCCACGCACGAGATGTATTGGGTGAGTCGAGCTTCAGGCAATGTGGTACATTCAGTGAATATGACCAGCCAGGTGCTCCTAGGAAGAATGGAAAAAAGGACCTGGAAGGGACCCCAATACGAGGAAGATGTAAACTTGGGAAGTGGAACCAGGGCGGTGGGAAAACCCCTGCTCAACTCAGACACCAGTAAAATCAAGAACAGGATTGAACGACTCAGGCGTGAGTACAGTTCGACGTGGCACCACGATGAGAACCACCCATATAGAACCTGGAACTATCACGGCAGTTATGATGTGAAGCCCACAGGCTCCGCCAGTTCGCTGGTCAATGGAGTGGTCAGGCTCCTCTCAAAACCATGGGACACCATCACGAATGTTACCACCATGGCCATGACTGACACTACTCCCTTCGGGCAGCAGCGAGTGTTCAAAGAGAAGGTGGACACGAAAGCTCCTGAACCGCCAGAAGGAGTGAAGTACGTGCTCAACGAGACCACCAACTGGTTGTGGGCGTTTTTGGCCAGAGAAAAACGTCCCAGAATGTGCTCTCGAGAGGAATTCATAAGAAAGGTCAACAGCAATGCAGCTTTGGGTGCCATGTTTGAAGAGCAGAATCAATGGAGGAGCGCCAGAGAAGCAGTTGAAGATCCAAAATTTTGGGAGATGGTGGATGAGGAGCGCGAGGCACATCTGCGGGGGGAATGTCACACTTGCATTTACAACATGATGGGAAAGAGAGAGAAAAAACCCGGAGAGTTCGGAAAGGCCAAGGGAAGCAGAGCCATTTGGTTCATGTGGCTCGGAGCTCGCTTTCTGGAGTTCGAGGCTCTGGGTTTTCTCAATGAAGACCACTGGCTTGGAAGAAAGAACTCAGGAGGAGGTGTCGAGGGCTTGGGCCTCCAAAAACTGGGTTACATCCTGCGTGAAGTTGGCACCCGGCCTGGGGGCAAGATCTATGCTGATGACACAGCTGGCTGGGACACCCGCATCACGAGAGCTGACTTGGAAAATGAAGCTAAGGTGCTTGAGCTGCTTGATGGGGAACATCGGCGTCTTGCCAGGGCCATCATTGAGCTCACCTATCGTCACAAAGTTGTGAAAGTGATGCGCCCGGCTGCTGATGGAAGAACCGTCATGGATGTTATCTCCAGAGAAGATCAGAGGGGGAGTGGACAAGTTGTCACCTACGCCCTAAACACTTTCACCAACCTGGCCGTCCAGCTGGTGAGGATGATGGAAGGGGAAGGAGTGATTGGCCCAGATGATGTGGAGAAACTCACAAAAGGGAAAGGACCCAAAGTCAGGACCTGGCTGTTTGAGAATGGGGAAGAAAGACTCAGCCGCATGGCTGTCAGTGGAGATGACTGTGTGGTAAAGCCCCTGGACGATCGCTTTGCCACCTCGCTCCACTTCCTCAATGCTATGTCAAAGGTTCGCAAAGACATCCAAGAGTGGAAACCGTCAACTGGATGGTATGATTGGCAGCAGGTTCCATTTTGCTCAAACCATTTCACTGAATTGATCATGAAAGATGGAAGAACACTGGTGGTTCCATGCCGAGGACAGGATGAATTGGTAGGCAGAGCTCGCATATCTCCAGGGGCCGGATGGAACGTCCGCGACACTGCTTGTCTGGCTAAGTCTTATGCCCAGATGTGGCTGCTTCTGTACTTCCACAGAAGAGACCTGCGGCTCATGGCCAACGCCATTTGCTCCGCTGTCCCTGTGAATTGGGTCCCTACCGGAAGAACCACGTGGTCCATCCATGCAGGAGGAGAGTGGATGACAACAGAGGACATGTTGGAGGTCTGGAACCGTGTTTGGATAGAGGAGAATGAATGGATGGAAGACAAAACCCCAGTGGAGAAATGGAGTGACGTCCCATATTCAGGAAAACGAGAGGACATCTGGTGTGGCAGCCTGATTGGCACAAGAGCCCGAGCCACGTGGGCAGAAAACATCCAGGTGGCTATCAACCAAGTCAGAGCAATCATCGGAGATGAGAAGTATGTGGACTACATGAGTTCACTAAAGAGATATGAAGACACAACTTTGGTTGAGGACACAGTACTGTAGATATTTAATCAATTGTAAATAGACAATATAAGTATGCATAAAAGTGTAGTTTTATAGTAGTATTTAGTGGTGTTAGTGTAAATAGTTAAGAAAATTTTGAGGAGAAAGTCAGGCCGGGAAGTTCCCGCCACCGGAAGTTGAGTAGACGGTGCTGCCTGCGACTCAACCCCAGGAGGACTGGGTGAACAAAGCCGCGAAGTGATCCATGTAAGCCCTCAGAACCGTCTCGGAAGGAGGACCCCACATGTTGTAACTTCAAAGCCCAATGTCAGACCACGCTACGGCGTGCTACTCTGCGGAGAGTGCAGTCTGCGATAGTGCCCCAGGAGGACTGGGTTAACAAAGGCAAACCAACGCCCCACGCGGCCCTAGCCCCGGTAATGGTGTTAACCAGGGCGAAAGGACTAGAGGTTAGAGGAGACCCCGCGGTTTAAAGTGCACGGCCCAGCCTGGCTGAAGCTGTAGGTCAGGGGAAGGACTAGAGGTTAGTGGAGACCCCGTGCCACAAAACACCACAACAAAACAGCATATTGACACCTGGGATAGACTAGGAGATCTTCTGCTCTGCACAACCAGCCACACGGCACAGTGCGCCGACAATGGTGGCTGGTGGTGCGAGAACACAGGATCT" + genes: + - name: 2K + sequence: SQTDNQLAVFLICVMTLVSAVAA + - name: NS1 + sequence: DTGCAIDISRQELRCGSGVFIHNDVEAWMDRYKYYPETPQGLAKIIQKAHKEGVCGLRSVSRLEHQMWEAVKDELNTLLKENGVDLSVVVEKQEGMYKSAPKRLTATTEKLEIGWKAWGKSILFAPELANNTFVVDGPETKECPTQNRAWNSLEVEDFGFGLTSTRMFLKVRESNTTECDSKIIGTAVKNNLAIHSDLSYWIESRLNDTWKLERAVLGEVKSCTWPETHTLWGDGILESDLIIPVTLAGPRSNHNRRPGYKTQNQGPWDEGRVEIDFDYCPGTTVTLSESCGHRGPATRTTTESGKLITDWCCRSCTLPPLRYQTDSGCWYGMEIRPQRHDEKTLVQSQVNA + - name: NS2A + sequence: YNADMIDPFQLGLLVVFLATQEVLRKRWTAKISMPAILIALLVLVFGGITYTDVLRYVILVGAAFAESNSGGDVVHLALMATFKIQPVFMVASFLKARWTNQENILLMLAAVFFQMAYHDARQILLWEIPDVLNSLAVAWMILRAITFTTTSNVVVPLLALLTPGLRCLNLDVYRILLLMVGIGSLIREKRSAAAKKKGASLLCLALASTGLFNPMILAAGLIACDPNRKR + - name: NS2B + sequence: GWPATEVMTAVGLMFAIVGGLAELDIDSMAIPMTIAGLMFAAFVISGKSTDMWIERTADISWESDAEITGSSERVDVRLDDDGNFQLMNDPGAPWKIWMLRMVCLAISAYTPWAILPSVVGFWITLQYTKR + - name: NS3 + sequence: GGVLWDTPSPKEYKKGDTTTGVYRIMTRGLLGSYQAGAGVMVEGVFHTLWHTTKGAALMSGEGRLDPYWGSVKEDRLCYGGPWKLQHKWNGQDEVQMIVVEPGKNVKNVQTKPGVFKTPEGEIGAVTLDFPTGTSGSPIVDKNGDVIGLYGNGVIMPNGSYISAIVQGERMDEPIPAGFEPEMLRKKQITVLDLHPGAGKTRRILPQIIKEAINRRLRTAVLAPTRVVAAEMAEALRGLPIRYQTSAVPREHNGNEIVDVMCHATLTHRLMSPHRVPNYNLFVMDEAHFTDPASIAARGYISTKVELGEAAAIFMTATPPGTSDPFPESNSPISDLQTEIPDRAWNSGYEWITEYTGKTVWFVPSVKMGNEIALCLQRAGKKVVQLNRKSYETEYPKCKNDDWDFVITTDISEMGANFKASRVIDSRKSVKPTIITEGEGRVILGEPSAVTAASAAQRRGRIGRNPSQVGDEYCYGGHTNEDDSNFAHWTEARIMLDNINMPNGLIAQFYQPEREKVYTMDGEYRLRGEERKNFLELLRTADLPVWLAYKVAAAGVSYHDRRWCFDGPRTNTILEDNNEVEVITKLGERKILRPRWIDARVYSDHQALKAFKDFASGKR + - name: NS4A + sequence: SQIGLIEVLGKMPEHFMGKTWEALDTMYVVATAEKGGRAHRMALEELPDALQTIALIALLSVMTMGVFFLLMQRKGIGKIGLGGAVLGVATFFCWMAEVPGTKIAGMLLLSLLLMIVLIPEPEKQR + - name: NS4B + sequence: NEMGWLDKTKSDISSLFGQRIEVKENFSMGEFLLDLRPATAWSLYAVTTAVLTPLLKHLITSDYINTSLTSINVQASALFTLARGFPFVDVGVSALLLAAGCWGQVTLTVTVTAATLLFCHYAYMVPGWQAEAMRSAQRRTAAGIMKNAVVDGIVATDVPELERTTPIMQKKVGQIMLILVSLAAVVVNPSVKTVREAGILITAAAVTLWENGASSVWNATTAIGLCHIMRGGWLSCLSITWTLIKNMEKPGLKR + - name: NS5 + sequence: GGAKGRTLGEVWKERLNQMTKEEFTRYRKEAIIEVDRSAAKHARKEGNVTGGHPVSRGTAKLRWLVERRFLEPVGKVIDLGCGRGGWCYYMATQKRVQEVRGYTKGGPGHEEPQLVQSYGWNIVTMKSGVDVFYRPSECCDTLLCDIGESSSSAEVEEHRTIRVLEMVEDWLHRGPREFCVKVLCPYMPKVIEKMELLQRRYGGGLVRNPLSRNSTHEMYWVSRASGNVVHSVNMTSQVLLGRMEKRTWKGPQYEEDVNLGSGTRAVGKPLLNSDTSKIKNRIERLRREYSSTWHHDENHPYRTWNYHGSYDVKPTGSASSLVNGVVRLLSKPWDTITNVTTMAMTDTTPFGQQRVFKEKVDTKAPEPPEGVKYVLNETTNWLWAFLAREKRPRMCSREEFIRKVNSNAALGAMFEEQNQWRSAREAVEDPKFWEMVDEEREAHLRGECHTCIYNMMGKREKKPGEFGKAKGSRAIWFMWLGARFLEFEALGFLNEDHWLGRKNSGGGVEGLGLQKLGYILREVGTRPGGKIYADDTAGWDTRITRADLENEAKVLELLDGEHRRLARAIIELTYRHKVVKVMRPAADGRTVMDVISREDQRGSGQVVTYALNTFTNLAVQLVRMMEGEGVIGPDDVEKLTKGKGPKVRTWLFENGEERLSRMAVSGDDCVVKPLDDRFATSLHFLNAMSKVRKDIQEWKPSTGWYDWQQVPFCSNHFTELIMKDGRTLVVPCRGQDELVGRARISPGAGWNVRDTACLAKSYAQMWLLLYFHRRDLRLMANAICSAVPVNWVPTGRTTWSIHAGGEWMTTEDMLEVWNRVWIEENEWMEDKTPVEKWSDVPYSGKREDIWCGSLIGTRARATWAENIQVAINQVRAIIGDEKYVDYMSSLKRYEDTTLVEDTVL + - name: capsid + sequence: MSKKPGGPGKSRAVNMLKRGMPRVLSLIGLKRAMLSLIDGKGPIRFVLALLAFFRFTAIAPTRAVLDRWRGVNKQTAMKHLLSFKKELGTLTSAINRRSSKQKKRGGKTGIAVMIGLIASVGA + - name: env + sequence: FNCLGMSNRDFLEGVSGATWVDLVLEGDSCVTIMSKDKPTIDVKMMNMEAANLAEVRSYCYLATVSDLSTKAACPTMGEAHNDKRADPAFVCRQGVVDRGWGNGCGLFGKGSIDTCAKFACSTKAIGRTILKENIKYEVAIFVHGPTTVESHGNYSTQVGATQAGRLSITPAAPSYTLKLGEYGEVTVDCEPRSGIDTNAYYVMTVGTKTFLVHREWFMDLNLPWSSAGSTVWRNRETLMEFEEPHATKQSVIALGSQEGALHQALAGAIPVEFSSNTVKLTSGHLKCRVKMEKLQLKGTTYGVCSKAFKFLGTPADTGHGTVVLELQYTGTDGPCKVPISSVASLNDLTPVGRLVTVNPFVSVATANAKVLIELEPPFGDSYIVVGRGEQQINHHWHKSGSSIGKAFTTTLKGAQRLAALGDTAWDFGSVGGVFTSVGKAVHQVFGGAFRSLFGGMSWITQGLLGALLLWMGINARDRSIALTFLAVGGVLLFLSVNVHA + - name: prM + sequence: VTLSNFQGKVMMTVNATDVTDVITIPTAAGKNLCIVRAMDVGYMCDDTITYECPVLSAGNDPEDIDCWCTKSAVYVRYGRCTKTRHSRRSRRSLTVQTHGESTLANKKGAWMDSTKATRYLVKTESWILRNPGYALVAAVIGWMLGSNTMQRVVFVVLLLLVAPAYS dummy-organism: schema: image: "https://www.un.org/sites/un2.un.org/files/field/image/1583952355.1997.jpg" instanceName: "Test Dummy Organism" description: "This is a test organism for testing purposes." - inputFields: - - name: date - displayName: Date - - name: region - displayName: Region - - name: country - displayName: Country - - name: division - displayName: Division - - name: host - displayName: Host - - name: pango_lineage - displayName: Pango Lineage metadata: - name: date type: date @@ -125,1398 +1119,6 @@ defaultOrganisms: sequence: "MDPKISEMHPALRLVDPQIQLAVTRMENAVGRDQNNVGPKVYPIILRLGSPLSLNMARKTLNSLEDKAFQLTPIAVQMTKLATTEELPDEFVVVTVK*" - name: "S" sequence: "MFVFLVLLPLVSSQCVNLTTRTQLPPAYTNSFTRGVYYPDKVFRSSVLHSTQDLFLPFFSNVTWFHAIHVSGTNGTKRFDNPVLPFNDGVYFASTEKSNIIRGWIFGTTLDSKTQSLLIVNNATNVVIKVCEFQFCNDPFLGVYYHKNNKSWMESEFRVYSSANNCTFEYVSQPFLMDLEGKQGNFKNLREFVFKNIDGYFKIYSKHTPINLVRDLPQGFSALEPLVDLPIGINITRFQTLLALHRSYLTPGDSSSGWTAGAAAYYVGYLQPRTFLLKYNENGTITDAVDCALDPLSETKCTLKSFTVEKGIYQTSNFRVQPTESIVRFPNITNLCPFGEVFNATRFASVYAWNRKRISNCVADYSVLYNSASFSTFKCYGVSPTKLNDLCFTNVYADSFVIRGDEVRQIAPGQTGKIADYNYKLPDDFTGCVIAWNSNNLDSKVGGNYNYLYRLFRKSNLKPFERDISTEIYQAGSTPCNGVEGFNCYFPLQSYGFQPTNGVGYQPYRVVVLSFELLHAPATVCGPKKSTNLVKNKCVNFNFNGLTGTGVLTESNKKFLPFQQFGRDIADTTDAVRDPQTLEILDITPCSFGGVSVITPGTNTSNQVAVLYQDVNCTEVPVAIHADQLTPTWRVYSTGSNVFQTRAGCLIGAEHVNNSYECDIPIGAGICASYQTQTNSPRRARSVASQSIIAYTMSLGAENSVAYSNNSIAIPTNFTISVTTEILPVSMTKTSVDCTMYICGDSTECSNLLLQYGSFCTQLNRALTGIAVEQDKNTQEVFAQVKQIYKTPPIKDFGGFNFSQILPDPSKPSKRSFIEDLLFNKVTLADAGFIKQYGDCLGDIAARDLICAQKFNGLTVLPPLLTDEMIAQYTSALLAGTITSGWTFGAGAALQIPFAMQMAYRFNGIGVTQNVLYENQKLIANQFNSAIGKIQDSLSSTASALGKLQDVVNQNAQALNTLVKQLSSNFGAISSVLNDILSRLDKVEAEVQIDRLITGRLQSLQTYVTQQLIRAAEIRASANLAATKMSECVLGQSKRVDFCGKGYHLMSFPQSAPHGVVFLHVTYVPAQEKNFTTAPAICHDGKAHFPREGVFVSNGTHWFVTQRNFYEPQIITTDNTFVSGNCDVVIGIVNNTVYDPLQPELDSFKEELDKYFKNHTSPDVDLGDISGINASVVNIQKEIDRLNEVAKNLNESLIDLQELGKYEQYIKWPWYIWLGFIAGLIAIVMVTIMLCCMTSCCSCLKGCCSCGSCCKFDEDDSEPVLKGVKLHYT*" - mpox: - schema: - instanceName: "Mpox Virus" - image: "https://cdn.who.int/media/images/default-source/health-topics/monkeypox/12763.tmb-1200v.jpg?sfvrsn=cd044fbd_37" - description: "Mpox, formerly monkeypox, is a rare viral disease that occurs mainly in remote parts of Central and West Africa, near tropical rainforests. Recently global outbreaks have occurred." - metadata: - - name: collection_date - displayName: Collection date - type: date - required: true - initiallyVisible: true - header: Sample details - - name: ncbi_release_date - displayName: NCBI release date - type: date - header: "INSDC" - - name: country - type: string - required: true - generateIndex: true - autocomplete: true - initiallyVisible: true - header: Sample details - - name: clade - displayName: Clade - type: string - generateIndex: true - autocomplete: true - header: Clade & Lineage - - name: outbreak - displayName: Outbreak - type: string - generateIndex: true - autocomplete: true - header: Clade & Lineage - - name: lineage - displayName: Lineage - type: string - generateIndex: true - autocomplete: true - header: Clade & Lineage - - name: isolate_name - displayName: Isolate name - type: string - header: Sample details - - name: author_affiliation - displayName: Author affiliation - type: string - generateIndex: true - autocomplete: true - truncateColumnDisplayTo: 15 - header: Authors - - name: authors - displayName: Authors - type: authors - header: Authors - truncateColumnDisplayTo: 15 - - name: submitter_country - displayName: Submitter country - type: string - generateIndex: true - autocomplete: true - hideOnSequenceDetailsPage: true - - name: division - type: string - generateIndex: true - autocomplete: true - header: Sample details - - name: insdc_accession_base - type: string - header: "INSDC" - hideOnSequenceDetailsPage: true - - name: insdc_version - type: int - header: "INSDC" - hideOnSequenceDetailsPage: true - - name: insdc_accession_full - type: string - displayName: INSDC accession - customDisplay: - type: link - url: "https://www.ncbi.nlm.nih.gov/nuccore/{{value}}" - header: "INSDC" - - name: bioprojects - type: string - customDisplay: - type: link - url: "https://www.ncbi.nlm.nih.gov/bioproject/{{value}}" - header: "INSDC" - - name: biosample_accession - type: string - customDisplay: - type: link - url: "https://www.ncbi.nlm.nih.gov/biosample/{{value}}" - header: "INSDC" - - name: ncbi_completeness - type: string - generateIndex: true - autocomplete: true - header: "Alignment states and QC metrics" - - name: ncbi_host_name - type: string - generateIndex: true - autocomplete: true - header: "Host" - - name: ncbi_host_tax_id - type: int - autocomplete: true - customDisplay: - type: link - url: "https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?mode=info&id={{value}}" - header: "Host" - - name: ncbi_is_lab_host - type: string - generateIndex: true - autocomplete: true - header: "Host" - - name: ncbi_length - type: int - header: "INSDC" - - name: ncbi_protein_count - type: int - header: "INSDC" - hideOnSequenceDetailsPage: true - - name: ncbi_update_date - type: date - header: "INSDC" - - name: ncbi_sourcedb - type: string - generateIndex: true - autocomplete: true - header: "INSDC" - hideOnSequenceDetailsPage: true - - name: ncbi_virus_name - type: string - generateIndex: true - autocomplete: true - hideOnSequenceDetailsPage: true - - name: ncbi_virus_tax_id - type: int - autocomplete: true - customDisplay: - type: link - url: "https://www.ncbi.nlm.nih.gov/labs/virus/vssi/#/virus?SeqType_s=Nucleotide&VirusLineage_ss=taxid:{{value}}" - hideOnSequenceDetailsPage: true - - name: isolate_source - type: string - generateIndex: true - autocomplete: true - header: "Host" - - name: sra_accessions - type: string - customDisplay: - type: link - url: "https://www.ncbi.nlm.nih.gov/sra/?term={{value}}" - header: "INSDC" - - name: length - type: int - autocomplete: true - - name: total_snps - type: int - header: "Alignment states and QC metrics" - - name: total_inserted_nucs - type: int - header: "Alignment states and QC metrics" - - name: total_deleted_nucs - type: int - header: "Alignment states and QC metrics" - - name: total_ambiguous_nucs - type: int - header: "Alignment states and QC metrics" - - name: total_unknown_nucs - type: int - header: "Alignment states and QC metrics" - - name: total_frame_shifts - type: int - header: "Alignment states and QC metrics" - - name: frame_shifts - type: string - header: "Alignment states and QC metrics" - - name: completeness - type: float - header: "Alignment states and QC metrics" - - name: total_stop_codons - type: int - header: "Alignment states and QC metrics" - - name: stop_codons - type: string - header: "Alignment states and QC metrics" - website: - tableColumns: - - collection_date - - country - - division - - authors - - author_affiliation - - ncbi_release_date - - insdc_accession_full - - length - - clade - - lineage - defaultOrderBy: collection_date - defaultOrder: descending - silo: - dateToSortBy: collection_date - inputFields: - - name: collection_date - displayName: Collection Date - - name: ncbi_release_date - displayName: NCBI Release Date - - name: country - displayName: Country - - name: isolate_name - displayName: Isolate Name - - name: author_affiliation - displayName: Author Affiliation - - name: authors - displayName: Authors - - name: submitter_country - displayName: Submitter Country - - name: division - displayName: Division - - name: insdc_accession_base - displayName: INSDC Accession Base - - name: insdc_version - displayName: INSDC Version - - name: insdc_accession_full - displayName: INSDC Accession Full - - name: bioprojects - displayName: BioProjects - - name: biosample_accession - displayName: BioSample Accession - - name: ncbi_completeness - displayName: NCBI Completeness - - name: ncbi_host_name - displayName: NCBI Host Name - - name: ncbi_host_tax_id - displayName: NCBI Host Tax ID - - name: ncbi_is_lab_host - displayName: NCBI Is Lab Host - - name: ncbi_length - displayName: NCBI Length - - name: ncbi_protein_count - displayName: NCBI Protein Count - - name: ncbi_update_date - displayName: NCBI Update Date - - name: ncbi_sourcedb - displayName: NCBI Source DB - - name: ncbi_virus_name - displayName: NCBI Virus Name - - name: ncbi_virus_tax_id - displayName: NCBI Virus Tax ID - - name: isolate_source - displayName: Isolate Source - - name: sra_accessions - displayName: SRA Accessions - preprocessing: - - version: 2 - image: ghcr.io/loculus-project/preprocessing-nextclade - args: - - "prepro" - configFile: - log_level: DEBUG - nextclade_dataset_name: nextstrain/mpox/all-clades - nextclade_dataset_tag: 2024-04-19--07-50-39Z - genes: - - OPG001 - batch_size: 5 - processing_spec: - total_snps: - function: identity - args: - type: int - inputs: - input: nextclade.totalSubstitutions - total_inserted_nucs: - function: identity - args: - type: int - inputs: - input: nextclade.totalInsertions - total_deleted_nucs: - function: identity - args: - type: int - inputs: - input: nextclade.totalDeletions - total_ambiguous_nucs: - function: identity - args: - type: int - inputs: - input: nextclade.totalNonACGTNs - total_unknown_nucs: - function: identity - args: - type: int - inputs: - input: nextclade.totalMissing - total_frame_shifts: - function: identity - args: - type: int - inputs: - input: nextclade.totalFrameShifts - frame_shifts: - function: identity - inputs: - input: nextclade.frameShifts - completeness: - function: identity - args: - type: float - inputs: - input: nextclade.coverage - total_stop_codons: - function: identity - args: - type: int - inputs: - input: nextclade.qc.stopCodons.totalStopCodons - stop_codons: - function: identity - inputs: - input: nextclade.qc.stopCodons.stopCodons - collection_date: - function: process_date - inputs: - date: collection_date - release_date: ncbi_release_date - required: true - ncbi_release_date: - function: parse_timestamp - inputs: - timestamp: ncbi_release_date - country: - function: identity - inputs: - input: country - required: true - author_affiliation: - function: identity - inputs: - input: author_affiliation - authors: - function: identity - inputs: - input: authors - isolate_name: - function: identity - inputs: - input: isolate_name - submitter_country: - function: identity - inputs: - input: submitter_country - division: - function: identity - inputs: - input: division - insdc_accession_base: - function: identity - inputs: - input: insdc_accession_base - insdc_version: - function: identity - args: - type: int - inputs: - input: insdc_version - insdc_accession_full: - function: identity - inputs: - input: insdc_accession_full - bioprojects: - function: identity - inputs: - input: bioprojects - biosample_accession: - function: identity - inputs: - input: biosample_accession - ncbi_completeness: - function: identity - inputs: - input: ncbi_completeness - ncbi_host_name: - function: identity - inputs: - input: ncbi_host_name - ncbi_host_tax_id: - function: identity - args: - type: int - inputs: - input: ncbi_host_tax_id - ncbi_is_lab_host: - function: identity - inputs: - input: ncbi_is_lab_host - ncbi_length: - function: identity - args: - type: int - inputs: - input: ncbi_length - ncbi_protein_count: - function: identity - args: - type: int - inputs: - input: ncbi_protein_count - ncbi_update_date: - function: parse_timestamp - inputs: - timestamp: ncbi_update_date - ncbi_sourcedb: - function: identity - inputs: - input: ncbi_sourcedb - ncbi_virus_name: - function: identity - inputs: - input: ncbi_virus_name - ncbi_virus_tax_id: - function: identity - args: - type: int - inputs: - input: ncbi_virus_tax_id - isolate_source: - function: identity - inputs: - input: isolate_source - sra_accessions: - function: identity - inputs: - input: sra_accessions - clade: - function: identity - inputs: - input: nextclade.clade - outbreak: - function: identity - inputs: - input: nextclade.customNodeAttributes.outbreak - lineage: - function: identity - inputs: - input: nextclade.customNodeAttributes.lineage - ingest: - image: ghcr.io/loculus-project/ingest - configFile: - taxon_id: 10244 - subsample_fraction: 0.1 - referenceGenomes: - nucleotideSequences: - - name: "main" - sequence: "[[URL:https://cov2tree.nyc3.cdn.digitaloceanspaces.com/mpox]]" - genes: - - name: OPG001 - sequence: MKQYIVLACMCLVAAAMPTSLQQSSSSCTEEENKHHMGIDVIIKVTKQDQTPTNDKICQSVTEVTETEDDEVSEEVVKGDPTTYYTIVGAGLNMNFGFTKCPKISSISESSDGNTVNTRLSSVSPGQGKDSPAITREEALAMIKDCEMSIDIRCSEEEKDSDIKTHPVLGSNISHKKVSYKDIIGSTIVDTKCVKNLEFSVRIGDMCEESSELEVKDGFKYVDGSASEGATDDTSLIDSTKLKACV* - ebola-zaire: - schema: - loadSequencesAutomatically: true - instanceName: "Ebola Zaire" - description: "Zaire ebolavirus is a species of the genus Ebolavirus, which is a member of the Filoviridae family. It is the most dangerous of the known Ebola viruses, and is associated with the highest case-fatality rate." - image: "https://cdn.britannica.com/01/179201-050-FED1B381/filamentous-ebolavirus-particles-scanning-electron-micrograph-cell.jpg?w=400&h=300&c=crop" - metadata: - - name: collection_date - displayName: Collection date - type: date - required: true - initiallyVisible: true - header: Sample details - - name: ncbi_release_date - displayName: NCBI release date - type: date - header: "INSDC" - - name: country - type: string - required: true - generateIndex: true - autocomplete: true - initiallyVisible: true - header: Sample details - - name: isolate_name - displayName: Isolate name - type: string - header: Sample details - - name: author_affiliation - displayName: Author affiliation - type: string - generateIndex: true - autocomplete: true - truncateColumnDisplayTo: 15 - header: Authors - - name: authors - displayName: Authors - type: authors - header: Authors - truncateColumnDisplayTo: 15 - - name: submitter_country - displayName: Submitter country - type: string - generateIndex: true - autocomplete: true - hideOnSequenceDetailsPage: true - - name: division - type: string - generateIndex: true - autocomplete: true - header: Sample details - - name: insdc_accession_base - type: string - header: "INSDC" - hideOnSequenceDetailsPage: true - - name: insdc_version - type: int - header: "INSDC" - hideOnSequenceDetailsPage: true - - name: insdc_accession_full - type: string - displayName: INSDC accession - customDisplay: - type: link - url: "https://www.ncbi.nlm.nih.gov/nuccore/{{value}}" - header: "INSDC" - - name: bioprojects - type: string - customDisplay: - type: link - url: "https://www.ncbi.nlm.nih.gov/bioproject/{{value}}" - header: "INSDC" - - name: biosample_accession - type: string - customDisplay: - type: link - url: "https://www.ncbi.nlm.nih.gov/biosample/{{value}}" - header: "INSDC" - - name: ncbi_completeness - type: string - generateIndex: true - autocomplete: true - header: "Alignment states and QC metrics" - - name: ncbi_host_name - type: string - generateIndex: true - autocomplete: true - header: "Host" - - name: ncbi_host_tax_id - type: int - autocomplete: true - customDisplay: - type: link - url: "https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?mode=info&id={{value}}" - header: "Host" - - name: ncbi_is_lab_host - type: string - generateIndex: true - autocomplete: true - header: "Host" - - name: ncbi_length - type: int - header: "INSDC" - - name: ncbi_protein_count - type: int - header: "INSDC" - hideOnSequenceDetailsPage: true - - name: ncbi_update_date - type: date - header: "INSDC" - - name: ncbi_sourcedb - type: string - generateIndex: true - autocomplete: true - header: "INSDC" - hideOnSequenceDetailsPage: true - - name: ncbi_virus_name - type: string - generateIndex: true - autocomplete: true - hideOnSequenceDetailsPage: true - - name: ncbi_virus_tax_id - type: int - autocomplete: true - customDisplay: - type: link - url: "https://www.ncbi.nlm.nih.gov/labs/virus/vssi/#/virus?SeqType_s=Nucleotide&VirusLineage_ss=taxid:{{value}}" - hideOnSequenceDetailsPage: true - - name: isolate_source - type: string - generateIndex: true - autocomplete: true - header: "Host" - - name: sra_accessions - type: string - customDisplay: - type: link - url: "https://www.ncbi.nlm.nih.gov/sra/?term={{value}}" - header: "INSDC" - - name: length - type: int - autocomplete: true - - name: total_snps - type: int - header: "Alignment states and QC metrics" - - name: total_inserted_nucs - type: int - header: "Alignment states and QC metrics" - - name: total_deleted_nucs - type: int - header: "Alignment states and QC metrics" - - name: total_ambiguous_nucs - type: int - header: "Alignment states and QC metrics" - - name: total_unknown_nucs - type: int - header: "Alignment states and QC metrics" - - name: total_frame_shifts - type: int - header: "Alignment states and QC metrics" - - name: frame_shifts - type: string - header: "Alignment states and QC metrics" - - name: completeness - type: float - header: "Alignment states and QC metrics" - - name: total_stop_codons - type: int - header: "Alignment states and QC metrics" - - name: stop_codons - type: string - header: "Alignment states and QC metrics" - website: - tableColumns: - - collection_date - - country - - division - - authors - - author_affiliation - - ncbi_release_date - - insdc_accession_full - - length - - ncbi_host_name - defaultOrderBy: collection_date - defaultOrder: descending - silo: - dateToSortBy: collection_date - inputFields: - - name: collection_date - displayName: Collection Date - - name: ncbi_release_date - displayName: NCBI Release Date - - name: country - displayName: Country - - name: isolate_name - displayName: Isolate Name - - name: author_affiliation - displayName: Author Affiliation - - name: authors - displayName: Authors - - name: submitter_country - displayName: Submitter Country - - name: division - displayName: Division - - name: insdc_accession_base - displayName: INSDC Accession Base - - name: insdc_version - displayName: INSDC Version - - name: insdc_accession_full - displayName: INSDC Accession Full - - name: bioprojects - displayName: BioProjects - - name: biosample_accession - displayName: BioSample Accession - - name: ncbi_completeness - displayName: NCBI Completeness - - name: ncbi_host_name - displayName: NCBI Host Name - - name: ncbi_host_tax_id - displayName: NCBI Host Tax ID - - name: ncbi_is_lab_host - displayName: NCBI Is Lab Host - - name: ncbi_length - displayName: NCBI Length - - name: ncbi_protein_count - displayName: NCBI Protein Count - - name: ncbi_update_date - displayName: NCBI Update Date - - name: ncbi_sourcedb - displayName: NCBI Source DB - - name: ncbi_virus_name - displayName: NCBI Virus Name - - name: ncbi_virus_tax_id - displayName: NCBI Virus Tax ID - - name: isolate_source - displayName: Isolate Source - - name: sra_accessions - displayName: SRA Accessions - preprocessing: - - version: 2 - image: ghcr.io/loculus-project/preprocessing-nextclade - args: - - "prepro" - configFile: - log_level: DEBUG - nextclade_dataset_name: nextstrain/ebola/zaire - nextclade_dataset_server: https://raw.githubusercontent.com/nextstrain/nextclade_data/ebola/data_output - genes: - - NP - - VP35 - - VP40 - - GP - - sGP - - ssGP - - VP30 - - VP24 - - L - batch_size: 100 - processing_spec: - total_snps: - function: identity - args: - type: int - inputs: - input: nextclade.totalSubstitutions - total_inserted_nucs: - function: identity - args: - type: int - inputs: - input: nextclade.totalInsertions - total_deleted_nucs: - function: identity - args: - type: int - inputs: - input: nextclade.totalDeletions - total_ambiguous_nucs: - function: identity - args: - type: int - inputs: - input: nextclade.totalNonACGTNs - total_unknown_nucs: - function: identity - args: - type: int - inputs: - input: nextclade.totalMissing - total_frame_shifts: - function: identity - args: - type: int - inputs: - input: nextclade.totalFrameShifts - frame_shifts: - function: identity - inputs: - input: nextclade.frameShifts - completeness: - function: identity - args: - type: float - inputs: - input: nextclade.coverage - total_stop_codons: - function: identity - args: - type: int - inputs: - input: nextclade.qc.stopCodons.totalStopCodons - stop_codons: - function: identity - inputs: - input: nextclade.qc.stopCodons.stopCodons - collection_date: - function: process_date - inputs: - date: collection_date - release_date: ncbi_release_date - required: true - ncbi_release_date: - function: parse_timestamp - inputs: - timestamp: ncbi_release_date - country: - function: identity - inputs: - input: country - required: true - author_affiliation: - function: identity - inputs: - input: author_affiliation - authors: - function: identity - inputs: - input: authors - isolate_name: - function: identity - inputs: - input: isolate_name - submitter_country: - function: identity - inputs: - input: submitter_country - division: - function: identity - inputs: - input: division - insdc_accession_base: - function: identity - inputs: - input: insdc_accession_base - insdc_version: - function: identity - args: - type: int - inputs: - input: insdc_version - insdc_accession_full: - function: identity - inputs: - input: insdc_accession_full - bioprojects: - function: identity - inputs: - input: bioprojects - biosample_accession: - function: identity - inputs: - input: biosample_accession - ncbi_completeness: - function: identity - inputs: - input: ncbi_completeness - ncbi_host_name: - function: identity - inputs: - input: ncbi_host_name - ncbi_host_tax_id: - function: identity - args: - type: int - inputs: - input: ncbi_host_tax_id - ncbi_is_lab_host: - function: identity - inputs: - input: ncbi_is_lab_host - ncbi_length: - function: identity - args: - type: int - inputs: - input: ncbi_length - ncbi_protein_count: - function: identity - args: - type: int - inputs: - input: ncbi_protein_count - ncbi_update_date: - function: parse_timestamp - inputs: - timestamp: ncbi_update_date - ncbi_sourcedb: - function: identity - inputs: - input: ncbi_sourcedb - ncbi_virus_name: - function: identity - inputs: - input: ncbi_virus_name - ncbi_virus_tax_id: - function: identity - args: - type: int - inputs: - input: ncbi_virus_tax_id - isolate_source: - function: identity - inputs: - input: isolate_source - sra_accessions: - function: identity - inputs: - input: sra_accessions - ingest: - image: ghcr.io/loculus-project/ingest - configFile: - taxon_id: 186538 - referenceGenomes: - nucleotideSequences: - - name: "main" - sequence: CGGACACACAAAAAGAAAGAAGAATTTTTAGGATCTTTTGTGTGCGAATAACTATGAGGAAGATTAATAATTTTCCTCTCATTGAAATTTATATCGGAATTTAAATTGAAATTGTTACTGTAATCACACCTGGTTTGTTTCAGAGCCACATCACAAAGATAGAGAACAACCTAGGTCTCCGAAGGGAGCAAGGGCATCAGTGTGCTCAGTTGAAAATCCCTTGTCAACACCTAGGTCTTATCACATCACAAGTTCCACCTCAGACTCTGCAGGGTGATCCAACAACCTTAATAGAAACATTATTGTTAAAGGACAGCATTAGTTCACAGTCAAACAAGCAAGATTGAGAATTAACCTTGGTTTTGAACTTGAACACTTAGGGGATTGAAGATTCAACAACCCTAAAGCTTGGGGTAAAACATTGGAAATAGTTAAAAGACAAATTGCTCGGAATCACAAAATTCCGAGTATGGATTCTCGTCCTCAGAAAATCTGGATGGCGCCGAGTCTCACTGAATCTGACATGGATTACCACAAGATCTTGACAGCAGGTCTGTCCGTTCAACAGGGGATTGTTCGGCAAAGAGTCATCCCAGTGTATCAAGTAAACAATCTTGAAGAAATTTGCCAACTTATCATACAGGCCTTTGAAGCAGGTGTTGATTTTCAAGAGAGTGCGGACAGTTTCCTTCTCATGCTTTGTCTTCATCATGCGTACCAGGGAGATTACAAACTTTTCTTGGAAAGTGGCGCAGTCAAGTATTTGGAAGGGCACGGGTTCCGTTTTGAAGTCAAGAAGCGTGATGGAGTGAAGCGCCTTGAGGAATTGCTGCCAGCAGTATCTAGTGGAAAAAACATTAAGAGAACACTTGCTGCCATGCCGGAAGAGGAGACAACTGAAGCTAATGCCGGTCAGTTTCTCTCCTTTGCAAGTCTATTCCTTCCGAAATTGGTAGTAGGAGAAAAGGCTTGCCTTGAGAAGGTTCAAAGGCAAATTCAAGTACATGCAGAGCAAGGACTGATACAATATCCAACAGCTTGGCAATCAGTAGGACACATGATGGTGATTTTCCGTTTGATGCGAACAAATTTTCTGATCAAATTTCTCCTAATACACCAAGGGATGCACATGGTTGCCGGGCATGATGCCAACGATGCTGTGATTTCAAATTCAGTGGCTCAAGCTCGTTTTTCAGGCTTATTGATTGTCAAAACAGTACTTGATCATATCCTACAAAAGACAGAACGAGGAGTTCGTCTCCATCCTCTTGCAAGGACCGCCAAGGTAAAAAATGAGGTGAACTCCTTTAAGGCTGCACTCAGCTCCCTGGCCAAGCATGGAGAGTATGCTCCTTTCGCCCGACTTTTGAACCTTTCTGGAGTAAATAATCTTGAGCATGGTCTTTTCCCTCAACTATCGGCAATTGCACTCGGAGTCGCCACAGCACACGGGAGTACCCTCGCAGGAGTAAATGTTGGAGAACAGTATCAACAACTCAGAGAGGCTGCCACTGAGGCTGAGAAGCAACTCCAACAATATGCAGAGTCTCGCGAACTTGACCATCTTGGACTTGATGATCAGGAAAAGAAAATTCTTATGAACTTCCATCAGAAAAAGAACGAAATCAGCTTCCAGCAAACAAACGCTATGGTAACTCTAAGAAAAGAGCGCCTGGCCAAGCTGACAGAAGCTATCACTGCTGCGTCACTGCCCAAAACAAGTGGACATTACGATGATGATGACGACATTCCCTTTCCAGGACCCATCAATGATGACGACAATCCTGGCCATCAAGATGATGATCCGACTGACTCACAGGATACGACCATTCCCGATGTGGTGGTTGATCCCGATGATGGAAGCTACGGCGAATACCAGAGTTACTCGGAAAACGGCATGAATGCACCAGATGACTTGGTCCTATTCGATCTAGACGAGGACGACGAGGACACTAAGCCAGTGCCTAATAGATCGACCAAGGGTGGACAACAGAAGAACAGTCAAAAGGGCCAGCATATAGAGGGCAGACAGACACAATCCAGGCCAATTCAAAATGTCCCAGGCCCTCACAGAACAATCCACCACGCCAGTGCGCCACTCACGGACAATGACAGAAGAAATGAACCCTCCGGCTCAACCAGCCCTCGCATGCTGACACCAATTAACGAAGAGGCAGACCCACTGGACGATGCCGACGACGAGACGTCTAGCCTTCCGCCCTTGGAGTCAGATGATGAAGAGCAGGACAGGGACGGAACTTCCAACCGCACACCCACTGTCGCCCCACCGGCTCCCGTATACAGAGATCACTCTGAAAAGAAAGAACTCCCGCAAGACGAGCAACAAGATCAGGACCACACTCAAGAGGCCAGGAACCAGGACAGTGACAACACCCAGTCAGAACACTCTTTTGAGGAGATGTATCGCCACATTCTAAGATCACAGGGGCCATTTGATGCTGTTTTGTATTATCATATGATGAAGGATGAGCCTGTAGTTTTCAGTACCAGTGATGGCAAAGAGTACACGTATCCAGACTCCCTTGAAGAGGAATATCCACCATGGCTCACTGAAAAAGAGGCTATGAATGAAGAGAATAGATTTGTTACATTGGATGGTCAACAATTTTATTGGCCGGTGATGAATCACAAGAATAAATTCATGGCAATCCTGCAACATCATCAGTGAATGAGCATGGAACAATGGGATGATTCAACCGACAAATAGCTAACATTAAGTAGTCAAGGAACGAAAACAGGAAGAATTTTTGATGTCTAAGGTGTGAATTATTATCACAATAAAAGTGATTCTTATTTTTGAATTTAAAGCTAGCTTATTATTACTAGCCGTTTTTCAAAGTTCAATTTGAGTCTTAATGCAAATAGGCGTTAAGCCACAGTTATAGCCATAATTGTAACTCAATATTCTAACTAGCGATTTATCTAAATTAAATTACATTATGCTTTTATAACTTACCTACTAGCCTGCCCAACATTTACACGATCGTTTTATAATTAAGAAAAAACTAATGATGAAGATTAAAACCTTCATCATCCTTACGTCAATTGAATTCTCTAGCACTCGAAGCTTATTGTCTTCAATGTAAAAGAAAAGCTGGTCTAACAAGATGACAACTAGAACAAAGGGCAGGGGCCATACTGCGGCCACGACTCAAAACGACAGAATGCCAGGCCCTGAGCTTTCGGGCTGGATCTCTGAGCAGCTAATGACCGGAAGAATTCCTGTAAGCGACATCTTCTGTGATATTGAGAACAATCCAGGATTATGCTACGCATCCCAAATGCAACAAACGAAGCCAAACCCGAAGACGCGCAACAGTCAAACCCAAACGGACCCAATTTGCAATCATAGTTTTGAGGAGGTAGTACAAACATTGGCTTCATTGGCTACTGTTGTGCAACAACAAACCATCGCATCAGAATCATTAGAACAACGCATTACGAGTCTTGAGAATGGTCTAAAGCCAGTTTATGATATGGCAAAAACAATCTCCTCATTGAACAGGGTTTGTGCTGAGATGGTTGCAAAATATGATCTTCTGGTGATGACAACCGGTCGGGCAACAGCAACCGCTGCGGCAACTGAGGCTTATTGGGCCGAACATGGTCAACCACCACCTGGACCATCACTTTATGAAGAAAGTGCGATTCGGGGTAAGATTGAATCTAGAGATGAGACCGTCCCTCAAAGTGTTAGGGAGGCATTCAACAATCTAAACAGTACCACTTCACTAACTGAGGAAAATTTTGGGAAACCTGACATTTCGGCAAAGGATTTGAGAAACATTATGTATGATCACTTGCCTGGTTTTGGAACTGCTTTCCACCAATTAGTACAAGTGATTTGTAAATTGGGAAAAGATAGCAACTCATTGGACATCATTCATGCTGAGTTCCAGGCCAGCCTGGCTGAAGGAGACTCTCCTCAATGTGCCCTAATTCAAATTACAAAAAGAGTTCCAATCTTCCAAGATGCTGCTCCACCTGTCATCCACATCCGCTCTCGAGGTGACATTCCCCGAGCTTGCCAGAAAAGCTTGCGTCCAGTCCCACCATCGCCCAAGATTGATCGAGGTTGGGTATGTGTTTTTCAGCTTCAAGATGGTAAAACACTTGGACTCAAAATTTGAGCCAATCTCCCTTCCCTCCGAAAGAGGCGAATAATAGCAGAGGCTTCAACTGCTGAACTATAGGGTACGTTACATTAATGATACACTTGTGAGTATCAGCCCTGGATAATATAAGTCAATTAAACGACCAAGATAAAATTGTTCATATCTCGCTAGCAGCTTAAAATATAAATGTAATAGGAGCTATATCTCTGACAGTATTATAATCAATTGTTATTAAGTAACCCAAACCAAAAGTGATGAAGATTAAGAAAAACCTACCTCGGCTGAGAGAGTGTTTTTTCATTAACCTTCATCTTGTAAACGTTGAGCAAAATTGTTAAAAATATGAGGCGGGTTATATTGCCTACTGCTCCTCCTGAATATATGGAGGCCATATACCCTGTCAGGTCAAATTCAACAATTGCTAGAGGTGGCAACAGCAATACAGGCTTCCTGACACCGGAGTCAGTCAATGGGGACACTCCATCGAATCCACTCAGGCCAATTGCCGATGACACCATCGACCATGCCAGCCACACACCAGGCAGTGTGTCATCAGCATTCATCCTTGAAGCTATGGTGAATGTCATATCGGGCCCCAAAGTGCTAATGAAGCAAATTCCAATTTGGCTTCCTCTAGGTGTCGCTGATCAAAAGACCTACAGCTTTGACTCAACTACGGCCGCCATCATGCTTGCTTCATACACTATCACCCATTTCGGCAAGGCAACCAATCCACTTGTCAGAGTCAATCGGCTGGGTCCTGGAATCCCGGATCATCCCCTCAGGCTCCTGCGAATTGGAAACCAGGCTTTCCTCCAGGAGTTCGTTCTTCCGCCAGTCCAACTACCCCAGTATTTCACCTTTGATTTGACAGCACTCAAACTGATCACCCAACCACTGCCTGCTGCAACATGGACCGATGACACTCCAACAGGATCAAATGGAGCGTTGCGTCCAGGAATTTCATTTCATCCAAAACTTCGCCCCATTCTTTTACCCAACAAAAGTGGGAAGAAGGGGAACAGTGCCGATCTAACATCTCCGGAGAAAATCCAAGCAATAATGACTTCACTCCAGGACTTTAAGATCGTTCCAATTGATCCAACCAAAAATATCATGGGAATCGAAGTGCCAGAAACTCTGGTCCACAAGCTGACCGGTAAGAAGGTGACTTCTAAAAATGGACAACCAATCATCCCTGTTCTTTTGCCAAAGTACATTGGGTTGGACCCGGTGGCTCCAGGAGACCTCACCATGGTAATCACACAGGATTGTGACACGTGTCATTCTCCTGCAAGTCTTCCAGCTGTGATTGAGAAGTAATTGCAATAATTGACTCAGATCCAGTTTTATAGAATCTTCTCAGGGATAGTGATAACATCTATTTAGTAATCCGTCCATTAGAGGAGACACTTTTAATTGATCAATATACTAAAGGTGCTTTACACCATTGTCTTTTTTCTCTCCTAAATGTAGAACTTAACAAAAGACTCATAATATACTTGTTTTTAAAGGATTGATTGATGAAAGATCATAACTAATAACATTACAAATAATCCTACTATAATCAATACGGTGATTCAAATGTTAATCTTTCTCATTGCACATACTTTTTGCCCTTATCCTCAAATTGCCTGCATGCTTACATCTGAGGATAGCCAGTGTGACTTGGATTGGAAATGTGGAGAAAAAATCGGGACCCATTTCTAGGTTGTTCACAATCCAAGTACAGACATTGCCCTTCTAATTAAGAAAAAATCGGCGATGAAGATTAAGCCGACAGTGAGCGTAATCTTCATCTCTCTTAGATTATTTGTTTTCCAGAGTAGGGGTCGTCAGGTCCTTTTCAATCGTGTAACCAAAATAAACTCCACTAGAAGGATATTGTGGGGCAACAACACAATGGGCGTTACAGGAATATTGCAGTTACCTCGTGATCGATTCAAGAGGACATCATTCTTTCTTTGGGTAATTATCCTTTTCCAAAGAACATTTTCCATCCCACTTGGAGTCATCCACAATAGCACATTACAGGTTAGTGATGTCGACAAACTAGTTTGTCGTGACAAACTGTCATCCACAAATCAATTGAGATCAGTTGGACTGAATCTCGAAGGGAATGGAGTGGCAACTGACGTGCCATCTGCAACTAAAAGATGGGGCTTCAGGTCCGGTGTCCCACCAAAGGTGGTCAATTATGAAGCTGGTGAATGGGCTGAAAACTGCTACAATCTTGAAATCAAAAAACCTGACGGGAGTGAGTGTCTACCAGCAGCGCCAGACGGGATTCGGGGCTTCCCCCGGTGCCGGTATGTGCACAAAGTATCAGGAACGGGACCGTGTGCCGGAGACTTTGCCTTCCATAAAGAGGGTGCTTTCTTCCTGTATGATCGACTTGCTTCCACAGTTATCTACCGAGGAACGACTTTCGCTGAAGGTGTCGTTGCATTTCTGATACTGCCCCAAGCTAAGAAGGACTTCTTCAGCTCACACCCCTTGAGAGAGCCGGTCAATGCAACGGAGGACCCGTCTAGTGGCTACTATTCTACCACAATTAGATATCAGGCTACCGGTTTTGGAACCAATGAGACAGAGTACTTGTTCGAGGTTGACAATTTGACCTACGTCCAACTTGAATCAAGATTCACACCACAGTTTCTGCTCCAGCTGAATGAGACAATATATACAAGTGGGAAAAGGAGCAATACCACGGGAAAACTAATTTGGAAGGTCAACCCCGAAATTGATACAACAATCGGGGAGTGGGCCTTCTGGGAAACTAAAAAAACCTCACTAGAAAAATTCGCAGTGAAGAGTTGTCTTTCACAGTTGTATCAAACGGAGCCAAAAACATCAGTGGTCAGAGTCCGGCGCGAACTTCTTCCGACCCAGGGACCAACACAACAACTGAAGACCACAAAATCATGGCTTCAGAAAATTCCTCTGCAATGGTTCAAGTGCACAGTCAAGGAAGGGAAGCTGCAGTGTCGCATCTAACAACCCTTGCCACAATCTCCACGAGTCCCCAATCCCTCACAACCAAACCAGGTCCGGACAACAGCACCCATAATACACCCGTGTATAAACTTGACATCTCTGAGGCAACTCAAGTTGAACAACATCACCGCAGAACAGACAACGACAGCACAGCCTCCGACACTCCCTCTGCCACGACCGCAGCCGGACCCCCAAAAGCAGAGAACACCAACACGAGCAAGAGCACTGACTTCCTGGACCCCGCCACCACAACAAGTCCCCAAAACCACAGCGAGACCGCTGGCAACAACAACACTCATCACCAAGATACCGGAGAAGAGAGTGCCAGCAGCGGGAAGCTAGGCTTAATTACCAATACTATTGCTGGAGTCGCAGGACTGATCACAGGCGGGAGAAGAACTCGAAGAGAAGCAATTGTCAATGCTCAACCCAAATGCAACCCTAATTTACATTACTGGACTACTCAGGATGAAGGTGCTGCAATCGGACTGGCCTGGATACCATATTTCGGGCCAGCAGCCGAGGGAATTTACATAGAGGGGCTAATGCACAATCAAGATGGTTTAATCTGTGGGTTGAGACAGCTGGCCAACGAGACGACTCAAGCTCTTCAACTGTTCCTGAGAGCCACAACTGAGCTACGCACCTTTTCAATCCTCAACCGTAAGGCAATTGATTTCTTGCTGCAGCGATGGGGCGGCACATGCCACATTCTGGGACCGGACTGCTGTATCGAACCACATGATTGGACCAAGAACATAACAGACAAAATTGATCAGATTATTCATGATTTTGTTGATAAAACCCTTCCGGACCAGGGGGACAATGACAATTGGTGGACAGGATGGAGACAATGGATACCGGCAGGTATTGGAGTTACAGGCGTTATAATTGCAGTTATCGCTTTATTCTGTATATGCAAATTTGTCTTTTAGTTTTTCTTCAGATTGCTTCATGGAAAAGCTCAGCCTCAAATCAATGAAACCAGGATTTAATTATATGGATTACTTGAATCTAAGATTACTTGACAAATGATAATATAATACACTGGAGCTTTAAACATAGCCAATGTGATTCTAACTCCTTTAAACTCACAGTTAATCATAAACAAGGTTTGACATCAATCTAGTTATCTCTTTGAGAATGATAAACTTGATGAAGATTAAGAAAAAGGTAATCTTTCGATTATCTTTAATCTTCATCCTTGATTCTACAATCATGACAGTTGTCTTTAGTGACAAGGGAAAGAAGCCTTTTTATTAAGTTGTAATAATCAGATCTGCGAACCGGTAGAGTTTAGTTGCAACCTAACACACATAAAGCATTGGTCAAAAAGTCAATAGAAATTTAAACAGTGAGTGGAGACAACTTTTAAATGGAAGCTTCATATGAGAGAGGACGCCCACGAGCTGCCAGACAGCATTCAAGGGATGGACACGACCACCATGTTCGAGCACGATCATCATCCAGAGAGAATTATCGAGGTGAGTACCGTCAATCAAGGAGCGCCTCACAAGTGCGCGTTCCTACTGTATTTCATAAGAAGAGAGTTGAACCATTAACAGTTCCTCCAGCACCTAAAGACATATGTCCGACCTTGAAAAAAGGATTTTTGTGTGACAGTAGTTTTTGCAAAAAAGATCACCAGTTGGAGAGTTTAACTGATAGGGAATTACTCCTACTAATCGCCCGTAAGACTTGTGGATCAGTAGAACAACAATTAAATATAACTGCACCCAAGGACTCGCGCTTAGCAAATCCAACGGCTGATGATTTCCAGCAAGAGGAAGGTCCAAAAATTACCTTGTTGACACTGATCAAGACGGCAGAACACTGGGCGAGACAAGACATCAGAACCATAGAGGATTCAAAATTAAGAGCATTGTTGACTCTATGTGCTGTGATGACGAGGAAATTCTCAAAATCCCAGCTGAGTCTTTTATGTGAGACACACCTAAGGCGCGAGGGGCTTGGGCAAGATCAGGCAGAACCCGTTCTCGAAGTATATCAACGATTACACAGTGATAAAGGAGGCAGTTTTGAAGCTGCACTATGGCAACAATGGGACCGACAATCCCTAATTATGTTTATCACTGCATTCTTGAATATTGCTCTCCAGTTACCGTGTGAAAGTTCTGCTGTCGTTGTTTCAGGGTTAAGAACATTGGTTCCTCAATCAGATAATGAGGAAGCTTCAACCAACCCGGGGACATGCTCATGGTCTGATGAGGGTACCCCTTAATAAGGCTGACTAAAACACTATATAACCTTCTACTTGATCACAATACTCCGTATACCTATCATCATATATTTAATCAAGACGATATCCTTTAAAACTTATTCAGTACTATAATCACTCTCGTTTCAAATTAATAAGATGTGCATGATTGCCCTAATATATGAAGAGGTATGATACAACCCTAACAGTGATCAAAGAAAATCATAATCTCGTATCGCTCGTAATATAACCTGCCAAGCATACCTCTTGCACAAAGTGATTCTTGTACACAAATAATGTTTTACTCTACAGGAGGTAGCAACGATCCATCCCATCAAAAAATAAGTATTTCATGACTTACTAATGATCTCTTAAAATATTAAGAAAAACTGACGGAACATAAATTCTTTATGCTTCAAGCTGTGGAGGAGGTGTTTGGTATTGGCTATTGTTATATTACAATCAATAACAAGCTTGTAAAAATATTGTTCTTGTTTCAAGAGGTAGATTGTGACCGGAAATGCTAAACTAATGATGAAGATTAATGCGGAGGTCTGATAAGAATAAACCTTATTATTCAGATTAGGCCCCAAGAGGCATTCTTCATCTCCTTTTAGCAAAGTACTATTTCAGGGTAGTCCAATTAGTGGCACGTCTTTTAGCTGTATATCAGTCGCCCCTGAGATACGCCACAAAAGTGTCTCTAAGCTAAATTGGTCTGTACACATCCCATACATTGTATTAGGGGCAATAATATCTAATTGAACTTAGCCGTTTAAAATTTAGTGCATAAATCTGGGCTAACACCACCAGGTCAACTCCATTGGCTGAAAAGAAGCTTACCTACAACGAACATCACTTTGAGCGCCCTCACAATTAAAAAATAGGAACGTCGTTCCAACAATCGAGCGCAAGGTTTCAAGGTTGAACTGAGAGTGTCTAGACAACAAAATATTGATACTCCAGACACCAAGCAAGACCTGAGAAAAAACCATGGCTAAAGCTACGGGACGATACAATCTAATATCGCCCAAAAAGGACCTGGAGAAAGGGGTTGTCTTAAGCGACCTCTGTAACTTCTTAGTTAGCCAAACTATTCAGGGGTGGAAGGTTTATTGGGCTGGTATTGAGTTTGATGTGACTCACAAAGGAATGGCCCTATTGCATAGACTGAAAACTAATGACTTTGCCCCTGCATGGTCAATGACAAGGAATCTCTTTCCTCATTTATTTCAAAATCCGAATTCCACAATTGAATCACCGCTGTGGGCATTGAGAGTCATCCTTGCAGCAGGGATACAGGACCAGCTGATTGACCAGTCTTTGATTGAACCCTTAGCAGGAGCCCTTGGTCTGATCTCTGATTGGCTGCTAACAACCAACACTAACCATTTCAACATGCGAACACAACGTGTCAAGGAACAATTGAGCCTAAAAATGCTGTCGTTGATTCGATCCAATATTCTCAAGTTTATTAACAAATTGGATGCTCTACATGTCGTGAACTACAACGGATTGTTGAGCAGTATTGAAATTGGAACTCAAAATCATACAATCATCATAACTCGAACTAACATGGGTTTTCTGGTGGAGCTCCAAGAACCCGACAAATCGGCAATGAACCGCATGAAGCCTGGGCCGGCGAAATTTTCCCTCCTTCATGAGTCCACACTGAAAGCATTTACACAAGGATCCTCGACACGAATGCAAAGTTTGATTCTTGAATTTAATAGCTCTCTTGCTATCTAACTAAGGTAGAATACTTCATATTGAGCTAACTCATATATGCTGACTCAATAGTTATCTTGACATCTCTGCTTTCATAATCAGATATATAAGCATAATAAATAAATACTCATATTTCTTGATAATTTGTTTAACCACAGATAAATCCTCACTGTAAGCCAGCTTCCAAGTTGACACCCTTACAAAAACCAGGACTCAGAATCCCTCAAACAAGAGATTCCAAGACAACATCATAGAATTGCTTTATTATATGAATAAGCATTTTATCACCAGAAATCCTATATACTAAATGGTTAATTGTAACTGAACCCGCAGGTCACATGTGTTAGGTTTCACAGATTCTATATATTACTAACTCTATACTCGTAATTAACATTAGATAAGTAGATTAAGAAAAAAGCCTGAGGAAGATTAAGAAAAACTGCTTATTGGGTCTTTCCGTGTTTTAGATGAAGCAGTTGAAATTCTTCCTCTTGATATTAAATGGCTACACAACATACCCAATACCCAGACGCTAGGTTATCATCACCAATTGTATTGGACCAATGTGACCTAGTCACTAGAGCTTGCGGGTTATATTCATCATACTCCCTTAATCCGCAACTACGCAACTGTAAACTCCCGAAACATATCTACCGTTTGAAATACGATGTAACTGTTACCAAGTTCTTGAGTGATGTACCAGTGGCGACATTGCCCATAGATTTCATAGTCCCAGTTCTTCTCAAGGCACTGTCAGGCAATGGATTCTGTCCTGTTGAGCCGCGGTGCCAACAGTTCTTAGATGAAATCATTAAGTACACAATGCAAGATGCTCTCTTCTTGAAATATTATCTCAAAAATGTGGGTGCTCAAGAAGACTGTGTTGATGAACACTTTCAAGAGAAAATCTTATCTTCAATTCAGGGCAATGAATTTTTACATCAAATGTTTTTCTGGTATGATCTGGCTATTTTAACTCGAAGGGGTAGATTAAATCGAGGAAACTCTAGATCAACATGGTTTGTTCATGATGATTTAATAGACATCTTAGGCTATGGGGACTATGTTTTTTGGAAGATCCCAATTTCAATGTTACCACTGAACACACAAGGAATCCCCCATGCTGCTATGGACTGGTATCAGGCATCAGTATTCAAAGAAGCGGTTCAAGGGCATACACACATTGTTTCTGTTTCTACTGCCGACGTCTTGATAATGTGCAAAGATTTAATTACATGTCGATTCAACACAACTCTAATCTCAAAAATAGCAGAGATTGAGGATCCAGTTTGTTCTGATTATCCCAATTTTAAGATTGTGTCTATGCTTTACCAGAGCGGAGATTACTTACTCTCCATATTAGGGTCTGATGGGTATAAAATTATTAAGTTCCTCGAACCATTGTGCTTGGCCAAAATTCAATTATGCTCAAAGTACACTGAGAGGAAGGGCCGATTCTTAACACAAATGCATTTAGCTGTAAATCACACCCTAGAAGAAATTACAGAAATGCGTGCACTAAAGCCTTCACAGGCTCAAAAGATCCGTGAATTCCATAGAACATTGATAAGGCTGGAGATGACGCCACAACAACTTTGTGAGCTATTTTCCATTCAAAAACACTGGGGGCATCCTGTGCTACATAGTGAAACAGCAATCCAAAAAGTTAAAAAACATGCTACGGTGCTAAAAGCATTACGCCCTATAGTGATTTTCGAGACATACTGTGTTTTTAAATATAGTATTGCCAAACATTATTTTGATAGTCAAGGATCTTGGTACAGTGTTACTTCAGATAGGAATCTAACACCGGGTCTTAATTCTTATATCAAAAGAAATCAATTCCCTCCGTTGCCAATGATTAAAGAACTACTATGGGAATTTTACCACCTTGACCACCCTCCACTTTTCTCAACCAAAATTATTAGTGACTTAAGTATTTTTATAAAAGACAGAGCTACCGCAGTAGAAAGGACATGCTGGGATGCAGTATTCGAGCCTAATGTTCTAGGATATAATCCACCTCACAAATTTAGTACTAAACGTGTACCGGAACAATTTTTAGAGCAAGAAAACTTTTCTATTGAGAATGTTCTTTCCTACGCACAAAAACTCGAGTATCTACTACCACAATATCGGAACTTTTCTTTCTCATTGAAAGAGAAAGAGTTGAATGTAGGTAGAACCTTCGGAAAATTGCCTTATCCGACTCGCAATGTTCAAACACTTTGTGAAGCTCTGTTAGCTGATGGTCTTGCTAAAGCATTTCCTAGCAATATGATGGTAGTTACGGAACGTGAGCAAAAAGAAAGCTTATTGCATCAAGCATCATGGCACCACACAAGTGATGATTTTGGTGAACATGCCACAGTTAGAGGGAGTAGCTTTGTAACTGATTTAGAGAAATACAATCTTGCATTTAGATATGAGTTTACAGCACCTTTTATAGAATATTGCAACCGTTGCTATGGTGTTAAGAATGTTTTTAATTGGATGCATTATACAATCCCACAGTGTTATATGCATGTCAGTGATTATTATAATCCACCACATAACCTCACACTGGAGAATCGAGACAACCCCCCCGAAGGGCCTAGTTCATACAGGGGTCATATGGGAGGGATTGAAGGACTGCAACAAAAACTCTGGACAAGTATTTCATGTGCTCAAATTTCTTTAGTTGAAATTAAGACTGGTTTTAAGTTACGCTCAGCTGTGATGGGTGACAATCAGTGCATTACTGTTTTATCAGTCTTCCCCTTAGAGACTGACGCAGACGAGCAGGAACAGAGCGCCGAAGACAATGCAGCGAGGGTGGCCGCCAGCCTAGCAAAAGTTACAAGTGCCTGTGGAATCTTTTTAAAACCTGATGAAACATTTGTACATTCAGGTTTTATCTATTTTGGAAAAAAACAATATTTGAATGGGGTCCAATTGCCTCAGTCCCTTAAAACGGCTACAAGAATGGCACCATTGTCTGATGCAATTTTTGATGATCTTCAAGGGACCCTGGCTAGTATAGGCACTGCTTTTGAGCGATCCATCTCTGAGACACGACATATCTTTCCTTGCAGGATAACCGCAGCTTTCCATACGTTTTTTTCGGTGAGAATCTTGCAATATCATCATCTCGGGTTCAATAAAGGTTTTGACCTTGGACAGTTAACACTCGGCAAACCTCTGGATTTCGGAACAATATCATTGGCACTAGCGGTACCGCAGGTGCTTGGAGGGTTATCCTTCTTGAATCCTGAGAAATGTTTCTACCGGAATCTAGGAGATCCAGTTACCTCAGGCTTATTCCAGTTAAAAACTTATCTCCGAATGATTGAGATGGATGATTTATTCTTACCTTTAATTGCGAAGAACCCTGGGAACTGCACTGCCATTGACTTTGTGCTAAATCCTAGCGGATTAAATGTCCCTGGGTCGCAAGACTTAACTTCATTTCTGCGCCAGATTGTACGCAGGACCATCACCCTAAGTGCGAAAAACAAACTTATTAATACCTTATTTCATGCGTCAGCTGACTTCGAAGACGAAATGGTTTGTAAATGGCTATTATCATCAACTCCTGTTATGAGTCGTTTTGCGGCCGATATCTTTTCACGCACGCCGAGCGGGAAGCGATTGCAAATTCTAGGATACCTGGAAGGAACACGCACATTATTAGCCTCTAAGATCATCAACAATAATACAGAGACACCGGTTTTGGACAGACTGAGGAAAATAACATTGCAAAGGTGGAGCCTATGGTTTAGTTATCTTGATCATTGTGATAATATCCTGGCGGAGGCTTTAACCCAAATAACTTGCACAGTTGATTTAGCACAGATTCTGAGGGAATATTCATGGGCTCATATTTTAGAGGGAAGACCTCTTATTGGAGCCACACTCCCATGTATGATTGAGCAATTCAAAGTGTTTTGGCTGAAACCCTACGAACAATGTCCGCAGTGTTCAAATGCAAAGCAACCAGGTGGGAAACCATTCGTGTCAGTGGCAGTCAAGAAACATATTGTTAGTGCATGGCCGAACGCATCCCGAATAAGCTGGACTATCGGGGATGGAATCCCATACATTGGATCAAGGACAGAAGATAAGATAGGACAACCTGCTATTAAACCAAAATGTCCTTCCGCAGCCTTAAGAGAGGCCATTGAATTGGCGTCCCGTTTAACATGGGTAACTCAAGGCAGTTCGAACAGTGACTTGCTAATAAAACCATTTTTGGAAGCACGAGTAAATTTAAGTGTTCAAGAAATACTTCAAATGACCCCTTCACATTACTCAGGAAATATTGTTCACAGGTACAACGATCAATACAGTCCTCATTCTTTCATGGCCAATCGTATGAGTAATTCAGCAACGCGATTGATTGTTTCTACAAACACTTTAGGTGAGTTTTCAGGAGGTGGCCAGTCTGCACGCGACAGCAATATTATTTTCCAGAATGTTATAAATTATGCAGTTGCACTGTTCGATATTAAATTTAGAAACACTGAGGCTACAGATATCCAATATAATCGTGCTCACCTTCATCTAACTAAGTGTTGCACCCGGGAAGTACCAGCTCAGTATTTAACATACACATCTACATTGGATTTAGATTTAACAAGATACCGAGAAAACGAATTGATTTATGACAGTAATCCTCTAAAAGGAGGACTCAATTGCAATATCTCATTCGATAATCCATTTTTCCAAGGTAAACGGCTGAACATTATAGAAGATGATCTTATTCGACTGCCTCACTTATCTGGATGGGAGCTAGCCAAGACCATCATGCAATCAATTATTTCAGATAGCAACAATTCATCTACAGACCCAATTAGCAGTGGAGAAACAAGATCATTCACTACCCATTTCTTAACTTATCCCAAGATAGGACTTCTGTACAGTTTTGGGGCCTTTGTAAGTTATTATCTTGGCAATACAATTCTTCGGACTAAGAAATTAACACTTGACAATTTTTTATATTACTTAACTACTCAAATTCATAATCTACCACATCGCTCATTGCGAATACTTAAGCCAACATTCAAACATGCAAGCGTTATGTCACGGTTAATGAGTATTGATCCTCATTTTTCTATTTACATAGGCGGTGCTGCAGGTGACAGAGGACTCTCAGATGCGGCCAGGTTATTTTTGAGAACGTCCATTTCATCTTTTCTTACATTTGTAAAAGAATGGATAATTAATCGCGGAACAATTGTCCCTTTATGGATAGTATATCCGCTAGAGGGTCAAAACCCAACACCTGTGAATAATTTTCTCTATCAGATCGTAGAACTGCTGGTGCATGATTCATCAAGACAACAGGCTTTTAAAACTACCATAAGTGATCATGTACATCCTCACGACAATCTTGTTTACACATGTAAGAGTACAGCCAGCAATTTCTTCCATGCATCATTGGCGTACTGGAGGAGCAGACACAGAAACAGCAACCGAAAATACTTGGCAAGAGACTCTTCAACTGGATCAAGCACAAACAACAGTGATGGTCATATTGAGAGAAGTCAAGAACAAACCACCAGAGATCCACATGATGGCACTGAACGGAATCTAGTCCTACAAATGAGCCATGAAATAAAAAGAACGACAATTCCACAAGAAAACACGCACCAGGGTCCGTCGTTCCAGTCCTTTCTAAGTGACTCTGCTTGTGGTACAGCAAATCCAAAACTAAATTTCGATCGATCGAGACACAATGTGAAATTTCAGGATCATAACTCGGCATCCAAGAGGGAAGGTCATCAAATAATCTCACACCGTCTAGTCCTACCTTTCTTTACATTATCTCAAGGGACACGCCAATTAACGTCATCCAATGAGTCACAAACCCAAGACGAGATATCAAAGTACTTACGGCAATTGAGATCCGTCATTGATACCACAGTTTATTGTAGATTTACCGGTATAGTCTCGTCCATGCATTACAAACTTGATGAGGTCCTTTGGGAAATAGAGAGTTTCAAGTCGGCTGTGACGCTAGCAGAGGGAGAAGGTGCTGGTGCCTTACTATTGATTCAGAAATACCAAGTTAAGACCTTATTTTTCAACACGCTAGCTACTGAGTCCAGTATAGAGTCAGAAATAGTATCAGGAATGACTACTCCTAGGATGCTTCTACCTGTTATGTCAAAATTCCATAATGACCAAATTGAGATTATTCTTAACAACTCAGCAAGCCAAATAACAGACATAACAAATCCTACTTGGTTTAAAGACCAAAGAGCAAGGCTACCTAAGCAAGTCGAGGTTATAACCATGGATGCAGAGACAACAGAGAATATAAACAGATCGAAATTGTACGAAGCTGTATATAAATTGATCTTACACCATATTGATCCTAGCGTATTGAAAGCAGTGGTCCTTAAAGTCTTTCTAAGTGATACTGAGGGTATGTTATGGCTAAATGATAATTTAGCCCCGTTTTTTGCCACTGGTTATTTAATTAAGCCAATAACGTCAAGTGCTAGATCTAGTGAGTGGTATCTTTGTCTGACGAACTTCTTATCAACTACACGTAAGATGCCACACCAAAACCATCTCAGTTGTAAACAGGTAATACTTACGGCATTGCAACTGCAAATTCAACGAAGCCCATACTGGCTAAGTCATTTAACTCAGTATGCTGACTGTGAGTTACATTTAAGTTATATCCGCCTTGGTTTTCCATCATTAGAGAAAGTACTATACCACAGGTATAACCTCGTCGATTCAAAAAGAGGTCCACTAGTCTCTATCACTCAGCACTTAGCACATCTTAGAGCAGAGATTCGAGAATTAACTAATGATTATAATCAACAGCGACAAAGTCGGACTCAAACATATCACTTTATTCGTACTGCAAAAGGACGAATCACAAAACTAGTCAATGATTATTTAAAATTCTTTCTTATTGTGCAAGCATTAAAACATAATGGGACATGGCAAGCTGAGTTTAAGAAATTACCAGAGTTGATTAGTGTGTGCAATAGGTTCTACCATATTAGAGATTGCAATTGTGAAGAACGTTTCTTAGTTCAAACCTTATATTTACATAGAATGCAGGATTCTGAAGTTAAGCTTATCGAAAGGCTGACAGGGCTTCTGAGTTTATTTCCGGATGGTCTCTACAGGTTTGATTGAATTACCGTGCATAGTATCCTGATACTTGCAAAGGTTGGTTATTAACATACAGATTATAAAAAACTCATAAATTGCTCTCATACATCATATTGATCTAATCTCAATAAACAACTATTTAAATAACGAAAGGAGTCCCTATATTATATACTATATTTAGCCTCTCTCCCTGCGTGATAATCAAAAAATTCACAATGCAGCATGTGTGACATATTACTGCCGCAATGAATTTAACGCAACATAATAAACTCTGCACTCTTTATAATTAAGCTTTAACGAAAGGTCTGGGCTCATATTGTTATTGATATAATAATGTTGTATCAATATCCTGTCAGATGGAATAGTGTTTTGGTTGATAACACAACTTCTTAAAACAAAATTGATCTTTAAGATTAAGTTTTTTATAATTATCATTACTTTAATTTGTCGTTTTAAAAACGGTGATAGCCTTAATCTTTGTGTAAAATAAGAGATTAGGTGTAATAACCTTAACATTTTTGTCTAGTAAGCTACTATTTCATACAGAATGATAAAATTAAAAGAAAAGGCAGGACTGTAAAATCAGAAATACCTTCTTTACAATATAGCAGACTAGATAATAATCTTCGTGTTAATGATAATTAAGACATTGACCACGCTCATCAGAAGGCTCGCCAGAATAAACGTTGCAAAAAGGATTCCTGGAAAAATGGTCGCACACAAAAATTTAAAAATAAATCTATTTCTTCTTTTTTGTGTGTCCA - genes: - - name: NP - sequence: MDSRPQKIWMAPSLTESDMDYHKILTAGLSVQQGIVRQRVIPVYQVNNLEEICQLIIQAFEAGVDFQESADSFLLMLCLHHAYQGDYKLFLESGAVKYLEGHGFRFEVKKRDGVKRLEELLPAVSSGKNIKRTLAAMPEEETTEANAGQFLSFASLFLPKLVVGEKACLEKVQRQIQVHAEQGLIQYPTAWQSVGHMMVIFRLMRTNFLIKFLLIHQGMHMVAGHDANDAVISNSVAQARFSGLLIVKTVLDHILQKTERGVRLHPLARTAKVKNEVNSFKAALSSLAKHGEYAPFARLLNLSGVNNLEHGLFPQLSAIALGVATAHGSTLAGVNVGEQYQQLREAATEAEKQLQQYAESRELDHLGLDDQEKKILMNFHQKKNEISFQQTNAMVTLRKERLAKLTEAITAASLPKTSGHYDDDDDIPFPGPINDDDNPGHQDDDPTDSQDTTIPDVVVDPDDGSYGEYQSYSENGMNAPDDLVLFDLDEDDEDTKPVPNRSTKGGQQKNSQKGQHIEGRQTQSRPIQNVPGPHRTIHHASAPLTDNDRRNEPSGSTSPRMLTPINEEADPLDDADDETSSLPPLESDDEEQDRDGTSNRTPTVAPPAPVYRDHSEKKELPQDEQQDQDHTQEARNQDSDNTQSEHSFEEMYRHILRSQGPFDAVLYYHMMKDEPVVFSTSDGKEYTYPDSLEEEYPPWLTEKEAMNEENRFVTLDGQQFYWPVMNHKNKFMAILQHHQ* - - name: VP35 - sequence: MTTRTKGRGHTAATTQNDRMPGPELSGWISEQLMTGRIPVSDIFCDIENNPGLCYASQMQQTKPNPKTRNSQTQTDPICNHSFEEVVQTLASLATVVQQQTIASESLEQRITSLENGLKPVYDMAKTISSLNRVCAEMVAKYDLLVMTTGRATATAAATEAYWAEHGQPPPGPSLYEESAIRGKIESRDETVPQSVREAFNNLNSTTSLTEENFGKPDISAKDLRNIMYDHLPGFGTAFHQLVQVICKLGKDSNSLDIIHAEFQASLAEGDSPQCALIQITKRVPIFQDAAPPVIHIRSRGDIPRACQKSLRPVPPSPKIDRGWVCVFQLQDGKTLGLKI* - - name: VP40 - sequence: MRRVILPTAPPEYMEAIYPVRSNSTIARGGNSNTGFLTPESVNGDTPSNPLRPIADDTIDHASHTPGSVSSAFILEAMVNVISGPKVLMKQIPIWLPLGVADQKTYSFDSTTAAIMLASYTITHFGKATNPLVRVNRLGPGIPDHPLRLLRIGNQAFLQEFVLPPVQLPQYFTFDLTALKLITQPLPAATWTDDTPTGSNGALRPGISFHPKLRPILLPNKSGKKGNSADLTSPEKIQAIMTSLQDFKIVPIDPTKNIMGIEVPETLVHKLTGKKVTSKNGQPIIPVLLPKYIGLDPVAPGDLTMVITQDCDTCHSPASLPAVIEK* - - name: GP - sequence: MGVTGILQLPRDRFKRTSFFLWVIILFQRTFSIPLGVIHNSTLQVSDVDKLVCRDKLSSTNQLRSVGLNLEGNGVATDVPSATKRWGFRSGVPPKVVNYEAGEWAENCYNLEIKKPDGSECLPAAPDGIRGFPRCRYVHKVSGTGPCAGDFAFHKEGAFFLYDRLASTVIYRGTTFAEGVVAFLILPQAKKDFFSSHPLREPVNATEDPSSGYYSTTIRYQATGFGTNETEYLFEVDNLTYVQLESRFTPQFLLQLNETIYTSGKRSNTTGKLIWKVNPEIDTTIGEWAFWETKKNLTRKIRSEELSFTVVSNGAKNISGQSPARTSSDPGTNTTTEDHKIMASENSSAMVQVHSQGREAAVSHLTTLATISTSPQSLTTKPGPDNSTHNTPVYKLDISEATQVEQHHRRTDNDSTASDTPSATTAAGPPKAENTNTSKSTDFLDPATTTSPQNHSETAGNNNTHHQDTGEESASSGKLGLITNTIAGVAGLITGGRRTRREAIVNAQPKCNPNLHYWTTQDEGAAIGLAWIPYFGPAAEGIYIEGLMHNQDGLICGLRQLANETTQALQLFLRATTELRTFSILNRKAIDFLLQRWGGTCHILGPDCCIEPHDWTKNITDKIDQIIHDFVDKTLPDQGDNDNWWTGWRQWIPAGIGVTGVIIAVIALFCICKFVF* - - name: ssGP - sequence: MGVTGILQLPRDRFKRTSFFLWVIILFQRTFSIPLGVIHNSTLQVSDVDKLVCRDKLSSTNQLRSVGLNLEGNGVATDVPSATKRWGFRSGVPPKVVNYEAGEWAENCYNLEIKKPDGSECLPAAPDGIRGFPRCRYVHKVSGTGPCAGDFAFHKEGAFFLYDRLASTVIYRGTTFAEGVVAFLILPQAKKDFFSSHPLREPVNATEDPSSGYYSTTIRYQATGFGTNETEYLFEVDNLTYVQLESRFTPQFLLQLNETIYTSGKRSNTTGKLIWKVNPEIDTTIGEWAFWETKKPH* - - name: sGP - sequence: MGVTGILQLPRDRFKRTSFFLWVIILFQRTFSIPLGVIHNSTLQVSDVDKLVCRDKLSSTNQLRSVGLNLEGNGVATDVPSATKRWGFRSGVPPKVVNYEAGEWAENCYNLEIKKPDGSECLPAAPDGIRGFPRCRYVHKVSGTGPCAGDFAFHKEGAFFLYDRLASTVIYRGTTFAEGVVAFLILPQAKKDFFSSHPLREPVNATEDPSSGYYSTTIRYQATGFGTNETEYLFEVDNLTYVQLESRFTPQFLLQLNETIYTSGKRSNTTGKLIWKVNPEIDTTIGEWAFWETKKTSLEKFAVKSCLSQLYQTEPKTSVVRVRRELLPTQGPTQQLKTTKSWLQKIPLQWFKCTVKEGKLQCRI* - - name: VP30 - sequence: MEASYERGRPRAARQHSRDGHDHHVRARSSSRENYRGEYRQSRSASQVRVPTVFHKKRVEPLTVPPAPKDICPTLKKGFLCDSSFCKKDHQLESLTDRELLLLIARKTCGSVEQQLNITAPKDSRLANPTADDFQQEEGPKITLLTLIKTAEHWARQDIRTIEDSKLRALLTLCAVMTRKFSKSQLSLLCETHLRREGLGQDQAEPVLEVYQRLHSDKGGSFEAALWQQWDRQSLIMFITAFLNIALQLPCESSAVVVSGLRTLVPQSDNEEASTNPGTCSWSDEGTP* - - name: VP24 - sequence: MAKATGRYNLISPKKDLEKGVVLSDLCNFLVSQTIQGWKVYWAGIEFDVTHKGMALLHRLKTNDFAPAWSMTRNLFPHLFQNPNSTIESPLWALRVILAAGIQDQLIDQSLIEPLAGALGLISDWLLTTNTNHFNMRTQRVKEQLSLKMLSLIRSNILKFINKLDALHVVNYNGLLSSIEIGTQNHTIIITRTNMGFLVELQEPDKSAMNRMKPGPAKFSLLHESTLKAFTQGSSTRMQSLILEFNSSLAI* - - name: L - sequence: MATQHTQYPDARLSSPIVLDQCDLVTRACGLYSSYSLNPQLRNCKLPKHIYRLKYDVTVTKFLSDVPVATLPIDFIVPVLLKALSGNGFCPVEPRCQQFLDEIIKYTMQDALFLKYYLKNVGAQEDCVDEHFQEKILSSIQGNEFLHQMFFWYDLAILTRRGRLNRGNSRSTWFVHDDLIDILGYGDYVFWKIPISMLPLNTQGIPHAAMDWYQASVFKEAVQGHTHIVSVSTADVLIMCKDLITCRFNTTLISKIAEIEDPVCSDYPNFKIVSMLYQSGDYLLSILGSDGYKIIKFLEPLCLAKIQLCSKYTERKGRFLTQMHLAVNHTLEEITEMRALKPSQAQKIREFHRTLIRLEMTPQQLCELFSIQKHWGHPVLHSETAIQKVKKHATVLKALRPIVIFETYCVFKYSIAKHYFDSQGSWYSVTSDRNLTPGLNSYIKRNQFPPLPMIKELLWEFYHLDHPPLFSTKIISDLSIFIKDRATAVERTCWDAVFEPNVLGYNPPHKFSTKRVPEQFLEQENFSIENVLSYAQKLEYLLPQYRNFSFSLKEKELNVGRTFGKLPYPTRNVQTLCEALLADGLAKAFPSNMMVVTEREQKESLLHQASWHHTSDDFGEHATVRGSSFVTDLEKYNLAFRYEFTAPFIEYCNRCYGVKNVFNWMHYTIPQCYMHVSDYYNPPHNLTLENRDNPPEGPSSYRGHMGGIEGLQQKLWTSISCAQISLVEIKTGFKLRSAVMGDNQCITVLSVFPLETDADEQEQSAEDNAARVAASLAKVTSACGIFLKPDETFVHSGFIYFGKKQYLNGVQLPQSLKTATRMAPLSDAIFDDLQGTLASIGTAFERSISETRHIFPCRITAAFHTFFSVRILQYHHLGFNKGFDLGQLTLGKPLDFGTISLALAVPQVLGGLSFLNPEKCFYRNLGDPVTSGLFQLKTYLRMIEMDDLFLPLIAKNPGNCTAIDFVLNPSGLNVPGSQDLTSFLRQIVRRTITLSAKNKLINTLFHASADFEDEMVCKWLLSSTPVMSRFAADIFSRTPSGKRLQILGYLEGTRTLLASKIINNNTETPVLDRLRKITLQRWSLWFSYLDHCDNILAEALTQITCTVDLAQILREYSWAHILEGRPLIGATLPCMIEQFKVFWLKPYEQCPQCSNAKQPGGKPFVSVAVKKHIVSAWPNASRISWTIGDGIPYIGSRTEDKIGQPAIKPKCPSAALREAIELASRLTWVTQGSSNSDLLIKPFLEARVNLSVQEILQMTPSHYSGNIVHRYNDQYSPHSFMANRMSNSATRLIVSTNTLGEFSGGGQSARDSNIIFQNVINYAVALFDIKFRNTEATDIQYNRAHLHLTKCCTREVPAQYLTYTSTLDLDLTRYRENELIYDSNPLKGGLNCNISFDNPFFQGKRLNIIEDDLIRLPHLSGWELAKTIMQSIISDSNNSSTDPISSGETRSFTTHFLTYPKIGLLYSFGAFVSYYLGNTILRTKKLTLDNFLYYLTTQIHNLPHRSLRILKPTFKHASVMSRLMSIDPHFSIYIGGAAGDRGLSDAARLFLRTSISSFLTFVKEWIINRGTIVPLWIVYPLEGQNPTPVNNFLYQIVELLVHDSSRQQAFKTTISDHVHPHDNLVYTCKSTASNFFHASLAYWRSRHRNSNRKYLARDSSTGSSTNNSDGHIERSQEQTTRDPHDGTERNLVLQMSHEIKRTTIPQENTHQGPSFQSFLSDSACGTANPKLNFDRSRHNVKFQDHNSASKREGHQIISHRLVLPFFTLSQGTRQLTSSNESQTQDEISKYLRQLRSVIDTTVYCRFTGIVSSMHYKLDEVLWEIESFKSAVTLAEGEGAGALLLIQKYQVKTLFFNTLATESSIESEIVSGMTTPRMLLPVMSKFHNDQIEIILNNSASQITDITNPTWFKDQRARLPKQVEVITMDAETTENINRSKLYEAVYKLILHHIDPSVLKAVVLKVFLSDTEGMLWLNDNLAPFFATGYLIKPITSSARSSEWYLCLTNFLSTTRKMPHQNHLSCKQVILTALQLQIQRSPYWLSHLTQYADCELHLSYIRLGFPSLEKVLYHRYNLVDSKRGPLVSITQHLAHLRAEIRELTNDYNQQRQSRTQTYHFIRTAKGRITKLVNDYLKFFLIVQALKHNGTWQAEFKKLPELISVCNRFYHIRDCNCEERFLVQTLYLHRMQDSEVKLIERLTGLLSLFPDGLYRFD* - west-nile: - schema: - instanceName: "West Nile Virus" - image: "https://upload.wikimedia.org/wikipedia/commons/thumb/1/1e/West_Nile_Virus_Image.jpg/256px-West_Nile_Virus_Image.jpg?20200815184100" - description: "West Nile Virus (WNV) is a mosquito-borne flavivirus of the family Flaviviridae, which also contains the Zika virus, dengue virus, and yellow fever virus. It is primarily transmitted by Culex mosquitoes, which acquire the virus by feeding on infected birds." - metadata: - - name: collection_date - displayName: Collection date - type: date - required: true - initiallyVisible: true - header: Sample details - - name: ncbi_release_date - displayName: NCBI release date - type: date - header: "INSDC" - - name: country - type: string - required: true - generateIndex: true - autocomplete: true - initiallyVisible: true - header: Sample details - - name: lineage - displayName: Lineage - type: string - generateIndex: true - autocomplete: true - initiallyVisible: true - header: Sample details - - name: isolate_name - displayName: Isolate name - type: string - header: Sample details - - name: author_affiliation - displayName: Author affiliation - type: string - generateIndex: true - autocomplete: true - truncateColumnDisplayTo: 15 - header: Authors - - name: authors - displayName: Authors - type: authors - header: Authors - truncateColumnDisplayTo: 15 - - name: submitter_country - displayName: Submitter country - type: string - generateIndex: true - autocomplete: true - hideOnSequenceDetailsPage: true - - name: division - type: string - generateIndex: true - autocomplete: true - initiallyVisible: true - header: Sample details - - name: insdc_accession_base - type: string - header: "INSDC" - hideOnSequenceDetailsPage: true - - name: insdc_version - type: int - header: "INSDC" - hideOnSequenceDetailsPage: true - - name: insdc_accession_full - type: string - displayName: INSDC accession - customDisplay: - type: link - url: "https://www.ncbi.nlm.nih.gov/nuccore/{{value}}" - header: "INSDC" - - name: bioprojects - type: string - customDisplay: - type: link - url: "https://www.ncbi.nlm.nih.gov/bioproject/{{value}}" - header: "INSDC" - - name: biosample_accession - type: string - customDisplay: - type: link - url: "https://www.ncbi.nlm.nih.gov/biosample/{{value}}" - header: "INSDC" - - name: ncbi_completeness - type: string - generateIndex: true - autocomplete: true - header: "Alignment states and QC metrics" - - name: ncbi_host_name - type: string - generateIndex: true - autocomplete: true - initiallyVisible: true - header: "Host" - - name: ncbi_host_tax_id - type: int - autocomplete: true - customDisplay: - type: link - url: "https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?mode=info&id={{value}}" - header: "Host" - - name: ncbi_is_lab_host - type: string - generateIndex: true - autocomplete: true - initiallyVisible: true - header: "Host" - - name: ncbi_length - type: int - header: "INSDC" - hideOnSequenceDetailsPage: true - - name: ncbi_protein_count - type: int - header: "INSDC" - hideOnSequenceDetailsPage: true - - name: ncbi_update_date - type: date - header: "INSDC" - - name: ncbi_sourcedb - type: string - generateIndex: true - autocomplete: true - header: "INSDC" - hideOnSequenceDetailsPage: true - - name: ncbi_virus_name - type: string - generateIndex: true - autocomplete: true - hideOnSequenceDetailsPage: true - - name: ncbi_virus_tax_id - type: int - autocomplete: true - customDisplay: - type: link - url: "https://www.ncbi.nlm.nih.gov/labs/virus/vssi/#/virus?SeqType_s=Nucleotide&VirusLineage_ss=taxid:{{value}}" - hideOnSequenceDetailsPage: true - - name: isolate_source - type: string - generateIndex: true - autocomplete: true - header: "Host" - - name: sra_accessions - type: string - customDisplay: - type: link - url: "https://www.ncbi.nlm.nih.gov/sra/?term={{value}}" - header: "INSDC" - - name: length - type: int - autocomplete: true - - name: total_snps - type: int - header: "Alignment states and QC metrics" - - name: total_inserted_nucs - type: int - header: "Alignment states and QC metrics" - - name: total_deleted_nucs - type: int - header: "Alignment states and QC metrics" - - name: total_ambiguous_nucs - type: int - header: "Alignment states and QC metrics" - - name: total_unknown_nucs - type: int - header: "Alignment states and QC metrics" - - name: total_frame_shifts - type: int - header: "Alignment states and QC metrics" - - name: frame_shifts - type: string - header: "Alignment states and QC metrics" - - name: completeness - type: float - header: "Alignment states and QC metrics" - - name: total_stop_codons - type: int - header: "Alignment states and QC metrics" - - name: stop_codons - type: string - header: "Alignment states and QC metrics" - website: - tableColumns: - - collection_date - - country - - division - - authors - - author_affiliation - - ncbi_release_date - - ncbi_host_name - - length - - lineage - defaultOrderBy: collection_date - defaultOrder: descending - silo: - dateToSortBy: collection_date - inputFields: - - name: collection_date - displayName: Collection Date - - name: ncbi_release_date - displayName: NCBI Release Date - - name: country - displayName: Country - - name: isolate_name - displayName: Isolate Name - - name: author_affiliation - displayName: Author Affiliation - - name: authors - displayName: Authors - - name: submitter_country - displayName: Submitter Country - - name: division - displayName: Division - - name: insdc_accession_base - displayName: INSDC Accession Base - - name: insdc_version - displayName: INSDC Version - - name: insdc_accession_full - displayName: INSDC Accession Full - - name: bioprojects - displayName: BioProjects - - name: biosample_accession - displayName: BioSample Accession - - name: ncbi_completeness - displayName: NCBI Completeness - - name: ncbi_host_name - displayName: NCBI Host Name - - name: ncbi_host_tax_id - displayName: NCBI Host Tax ID - - name: ncbi_is_lab_host - displayName: NCBI Is Lab Host - - name: ncbi_length - displayName: NCBI Length - - name: ncbi_protein_count - displayName: NCBI Protein Count - - name: ncbi_update_date - displayName: NCBI Update Date - - name: ncbi_sourcedb - displayName: NCBI Source DB - - name: ncbi_virus_name - displayName: NCBI Virus Name - - name: ncbi_virus_tax_id - displayName: NCBI Virus Tax ID - - name: isolate_source - displayName: Isolate Source - - name: sra_accessions - displayName: SRA Accessions - preprocessing: - - version: 2 - image: ghcr.io/loculus-project/preprocessing-nextclade - args: - - "prepro" - configFile: - log_level: DEBUG - nextclade_dataset_name: nextstrain/wnv/all-lineages - nextclade_dataset_server: https://raw.githubusercontent.com/nextstrain/nextclade_data/wnv/data_output - genes: - - capsid - - prM - - env - - NS1 - - NS2A - - NS2B - - NS3 - - NS4A - - 2K - - NS4B - - NS5 - batch_size: 100 - processing_spec: - total_snps: - function: identity - args: - type: int - inputs: - input: nextclade.totalSubstitutions - total_inserted_nucs: - function: identity - args: - type: int - inputs: - input: nextclade.totalInsertions - total_deleted_nucs: - function: identity - args: - type: int - inputs: - input: nextclade.totalDeletions - total_ambiguous_nucs: - function: identity - args: - type: int - inputs: - input: nextclade.totalNonACGTNs - total_unknown_nucs: - function: identity - args: - type: int - inputs: - input: nextclade.totalMissing - total_frame_shifts: - function: identity - args: - type: int - inputs: - input: nextclade.totalFrameShifts - frame_shifts: - function: identity - inputs: - input: nextclade.frameShifts - completeness: - function: identity - args: - type: float - inputs: - input: nextclade.coverage - total_stop_codons: - function: identity - args: - type: int - inputs: - input: nextclade.qc.stopCodons.totalStopCodons - stop_codons: - function: identity - inputs: - input: nextclade.qc.stopCodons.stopCodons - collection_date: - function: process_date - inputs: - date: collection_date - release_date: ncbi_release_date - required: true - ncbi_release_date: - function: parse_timestamp - inputs: - timestamp: ncbi_release_date - country: - function: identity - inputs: - input: country - required: true - author_affiliation: - function: identity - inputs: - input: author_affiliation - authors: - function: identity - inputs: - input: authors - isolate_name: - function: identity - inputs: - input: isolate_name - submitter_country: - function: identity - inputs: - input: submitter_country - division: - function: identity - inputs: - input: division - insdc_accession_base: - function: identity - inputs: - input: insdc_accession_base - insdc_version: - function: identity - args: - type: int - inputs: - input: insdc_version - insdc_accession_full: - function: identity - inputs: - input: insdc_accession_full - bioprojects: - function: identity - inputs: - input: bioprojects - biosample_accession: - function: identity - inputs: - input: biosample_accession - ncbi_completeness: - function: identity - inputs: - input: ncbi_completeness - ncbi_host_name: - function: identity - inputs: - input: ncbi_host_name - ncbi_host_tax_id: - function: identity - args: - type: int - inputs: - input: ncbi_host_tax_id - ncbi_is_lab_host: - function: identity - inputs: - input: ncbi_is_lab_host - ncbi_length: - function: identity - args: - type: int - inputs: - input: ncbi_length - ncbi_protein_count: - function: identity - args: - type: int - inputs: - input: ncbi_protein_count - ncbi_update_date: - function: parse_timestamp - inputs: - timestamp: ncbi_update_date - ncbi_sourcedb: - function: identity - inputs: - input: ncbi_sourcedb - ncbi_virus_name: - function: identity - inputs: - input: ncbi_virus_name - ncbi_virus_tax_id: - function: identity - args: - type: int - inputs: - input: ncbi_virus_tax_id - isolate_source: - function: identity - inputs: - input: isolate_source - sra_accessions: - function: identity - inputs: - input: sra_accessions - lineage: - function: identity - inputs: - input: nextclade.clade - ingest: - image: ghcr.io/loculus-project/ingest - configFile: - taxon_id: 3048448 - referenceGenomes: - nucleotideSequences: - - name: main - sequence: "AGTAGTTCGCCTGTGTGAGCTGACAAACTTAGTAGTGTTTGTGAGGATTAACAACAATTAACACAGTGCGAGCTGTTTCTTAGCACGAAGATCTCGATGTCTAAGAAACCAGGAGGGCCCGGCAAGAGCCGGGCTGTCAATATGCTAAAACGCGGAATGCCCCGCGTGTTGTCCTTGATTGGACTGAAGAGGGCTATGTTGAGCCTGATCGACGGCAAGGGGCCAATACGATTTGTGTTGGCTCTCTTGGCGTTCTTCAGGTTCACAGCAATTGCTCCGACCCGAGCAGTGCTGGATCGATGGAGAGGTGTGAACAAACAAACAGCGATGAAACACCTTCTGAGTTTTAAGAAGGAACTAGGGACCTTGACCAGTGCTATCAATCGGCGGAGCTCAAAACAAAAGAAAAGAGGAGGAAAGACCGGAATTGCAGTCATGATTGGCCTGATCGCCAGCGTAGGAGCAGTTACCCTCTCTAACTTCCAAGGGAAGGTGATGATGACGGTAAATGCTACTGACGTCACAGATGTCATCACGATTCCAACAGCTGCTGGAAAGAACCTATGCATTGTCAGAGCAATGGATGTGGGATACATGTGCGATGATACTATCACTTATGAATGCCCAGTACTGTCGGCTGGTAATGATCCAGAAGACATCGACTGTTGGTGCACAAAGTCAGCAGTCTACGTCAGGTATGGAAGATGCACCAAGACACGCCACTCAAGACGCAGTCGGAGGTCACTGACAGTGCAGACACACGGAGAAAGCACTCTAGCGAACAAGAAGGGGGCTTGGATGGACAGCACCAAGGCCACAAGGTATTTGGTAAAAACAGAATCATGGATCTTGAGGAACCCTGGATATGCCCTGGTGGCAGCCGTCATTGGTTGGATGCTTGGGAGCAACACCATGCAGAGAGTTGTGTTTGTCGTGCTATTGCTTTTGGTGGCCCCAGCTTACAGCTTCAACTGCCTTGGAATGAGCAACAGAGACTTCTTGGAAGGAGTGTCTGGAGCAACATGGGTGGATTTGGTTCTCGAAGGCGACAGCTGCGTGACTATCATGTCTAAGGACAAGCCTACCATCGATGTGAAGATGATGAATATGGAGGCGGCCAACCTGGCAGAGGTCCGCAGTTATTGCTATTTGGCTACCGTCAGCGATCTCTCCACCAAAGCTGCGTGCCCGACCATGGGAGAAGCTCACAATGACAAACGTGCTGACCCAGCTTTTGTGTGCAGACAAGGAGTGGTGGACAGGGGCTGGGGCAACGGCTGCGGACTATTTGGCAAAGGAAGCATTGACACATGCGCCAAATTTGCCTGCTCTACCAAGGCAATAGGAAGAACCATCTTGAAAGAGAATATCAAGTACGAAGTGGCCATTTTTGTCCATGGACCAACTACTGTGGAGTCGCACGGAAACTACTCCACACAGGTTGGAGCCACTCAGGCAGGGAGACTCAGCATCACTCCTGCGGCGCCTTCATACACACTAAAGCTTGGAGAATATGGAGAGGTGACAGTGGACTGTGAACCACGGTCAGGGATTGACACCAATGCATACTACGTGATGACTGTTGGAACAAAGACGTTCTTGGTCCATCGTGAGTGGTTCATGGACCTCAACCTCCCTTGGAGCAGTGCTGGAAGTACTGTGTGGAGGAACAGAGAGACGTTAATGGAGTTTGAGGAACCACACGCCACGAAGCAGTCTGTGATAGCATTGGGCTCACAAGAGGGAGCTCTGCATCAAGCTTTGGCTGGAGCCATTCCTGTGGAATTTTCAAGCAACACTGTCAAGTTGACGTCGGGTCATTTGAAGTGTAGAGTGAAGATGGAAAAATTGCAGTTGAAGGGAACAACCTATGGCGTCTGTTCAAAGGCTTTCAAGTTTCTTGGGACTCCCGCAGACACAGGTCACGGCACTGTGGTGTTGGAATTGCAGTACACTGGCACGGATGGACCTTGCAAAGTTCCTATCTCGTCAGTGGCTTCATTGAACGACCTAACGCCAGTGGGCAGATTGGTCACTGTCAACCCTTTTGTTTCAGTGGCCACGGCCAACGCTAAGGTCCTGATTGAATTGGAACCACCCTTTGGAGACTCATACATAGTGGTGGGCAGAGGAGAACAACAGATCAATCACCATTGGCACAAGTCTGGAAGCAGCATTGGCAAAGCCTTTACAACCACCCTCAAAGGAGCGCAGAGACTAGCCGCTCTAGGAGACACAGCTTGGGACTTTGGATCAGTTGGAGGGGTGTTCACCTCAGTTGGGAAGGCTGTCCATCAAGTGTTCGGAGGAGCATTCCGCTCACTGTTCGGAGGCATGTCCTGGATAACGCAAGGATTGCTGGGGGCTCTCCTGTTGTGGATGGGCATCAATGCTCGTGATAGGTCCATAGCTCTCACGTTTCTCGCAGTTGGAGGAGTTCTGCTCTTCCTCTCCGTGAACGTGCACGCTGACACTGGGTGTGCCATAGACATCAGCCGGCAAGAGCTGAGATGTGGAAGTGGAGTGTTCATACACAATGATGTGGAGGCTTGGATGGACCGGTACAAGTATTACCCTGAAACGCCACAAGGCCTAGCCAAGATCATTCAGAAAGCTCATAAGGAAGGAGTGTGCGGTCTACGATCAGTTTCCAGACTGGAGCATCAAATGTGGGAAGCAGTGAAGGACGAGCTGAACACTCTTTTGAAGGAGAATGGTGTGGACCTTAGTGTCGTGGTTGAGAAACAGGAGGGAATGTACAAGTCAGCACCTAAACGCCTCACCGCCACCACGGAAAAATTGGAAATTGGCTGGAAGGCCTGGGGAAAGAGTATTTTATTTGCACCAGAACTCGCCAACAACACCTTTGTGGTTGATGGTCCGGAGACCAAGGAATGTCCGACTCAGAATCGCGCTTGGAATAGCTTAGAAGTGGAGGATTTTGGATTTGGTCTCACCAGCACTCGGATGTTCCTGAAGGTCAGAGAGAGCAACACAACTGAATGTGACTCGAAGATCATTGGAACGGCTGTCAAGAACAACTTGGCGATCCACAGTGACCTGTCCTATTGGATTGAAAGCAGGCTCAATGATACGTGGAAGCTTGAAAGGGCAGTTCTGGGTGAAGTCAAATCATGTACGTGGCCTGAGACGCATACCTTGTGGGGCGATGGAATCCTTGAGAGTGACTTGATAATACCAGTCACACTGGCGGGACCACGAAGCAATCACAATCGGAGACCTGGGTACAAGACACAAAACCAGGGCCCATGGGACGAAGGCCGGGTAGAGATTGACTTCGATTACTGCCCAGGAACTACGGTCACCCTGAGTGAGAGCTGCGGACACCGTGGACCTGCCACTCGCACCACCACAGAGAGCGGAAAGTTGATAACAGATTGGTGCTGCAGGAGCTGCACCTTACCACCACTGCGCTACCAAACTGACAGCGGCTGTTGGTATGGTATGGAGATCAGACCACAGAGACATGATGAAAAGACCCTCGTGCAGTCACAAGTGAATGCTTATAATGCTGATATGATTGACCCTTTTCAGTTGGGCCTTCTGGTCGTGTTCTTGGCCACCCAGGAGGTCCTTCGCAAGAGGTGGACAGCCAAGATCAGCATGCCAGCTATACTGATTGCTCTGCTAGTCCTGGTGTTTGGGGGCATTACTTACACTGATGTGTTACGCTATGTCATCTTGGTGGGGGCAGCTTTCGCAGAATCTAATTCGGGAGGAGACGTGGTACACTTGGCGCTCATGGCGACCTTCAAGATACAACCAGTGTTTATGGTGGCATCGTTTCTCAAAGCGAGATGGACCAACCAGGAGAACATTTTGTTGATGTTGGCGGCTGTTTTCTTTCAAATGGCTTATCACGATGCCCGCCAAATTCTGCTCTGGGAGATCCCTGATGTGTTGAATTCACTGGCGGTAGCTTGGATGATACTGAGAGCCATAACATTCACAACGACATCAAACGTGGTTGTTCCGCTGCTAGCCCTGCTAACACCCGGGCTGAGATGCTTGAATCTGGATGTGTACAGGATACTGCTGTTGATGGTCGGAATAGGCAGCTTGATCAGGGAGAAGAGGAGTGCAGCCGCAAAAAAGAAAGGAGCAAGTCTGCTATGCTTGGCTCTAGCCTCAACAGGACTTTTCAACCCCATGATCCTTGCTGCTGGACTGATTGCATGTGATCCCAACCGTAAACGCGGATGGCCCGCAACTGAAGTGATGACAGCTGTCGGCCTAATGTTTGCCATCGTCGGAGGGCTGGCAGAGCTTGACATTGACTCCATGGCCATTCCAATGACTATCGCGGGGCTCATGTTTGCTGCTTTCGTGATTTCTGGGAAATCAACAGATATGTGGATTGAGAGAACGGCGGACATTTCCTGGGAAAGTGATGCAGAAATTACAGGCTCGAGCGAAAGAGTTGATGTGCGGCTTGATGATGATGGAAACTTCCAGCTCATGAATGATCCAGGAGCACCTTGGAAGATATGGATGCTCAGAATGGTCTGTCTCGCGATTAGTGCGTACACCCCCTGGGCAATCTTGCCCTCAGTAGTTGGATTTTGGATAACTCTCCAATACACAAAGAGAGGAGGCGTGTTGTGGGACACTCCCTCACCAAAGGAGTACAAAAAGGGGGACACGACCACCGGCGTCTACAGGATCATGACTCGTGGGCTGCTCGGCAGTTATCAAGCAGGAGCGGGCGTGATGGTTGAAGGTGTTTTCCACACCCTTTGGCATACAACAAAAGGAGCCGCTTTGATGAGCGGAGAGGGCCGCCTGGACCCATACTGGGGCAGTGTCAAGGAGGATCGACTTTGTTACGGAGGACCCTGGAAATTGCAGCACAAGTGGAACGGGCAGGATGAGGTGCAGATGATTGTGGTGGAACCTGGCAAGAACGTTAAGAACGTCCAGACGAAACCAGGGGTGTTCAAAACACCTGAAGGAGAAATCGGGGCCGTGACTTTGGACTTCCCCACTGGAACATCAGGCTCACCAATAGTGGACAAAAACGGTGATGTGATTGGGCTTTATGGCAATGGAGTCATAATGCCCAACGGCTCATACATAAGCGCGATAGTGCAGGGTGAAAGGATGGATGAGCCAATCCCAGCCGGATTCGAACCTGAGATGCTGAGGAAAAAACAGATCACTGTACTGGATCTCCATCCCGGCGCCGGTAAAACAAGGAGGATTCTGCCACAGATCATCAAAGAGGCCATAAACAGAAGACTGAGAACAGCCGTGCTAGCGCCAACCAGGGTTGTGGCTGCTGAGATGGCTGAAGCACTGAGAGGACTGCCCATCCGGTACCAGACATCCGCAGTGCCCAGAGAACATAATGGAAATGAGATTGTTGATGTCATGTGTCATGCTACCCTCACCCACAGGCTGATGTCTCCTCACAGGGTGCCGAACTACAACCTGTTCGTGATGGATGAGGCTCATTTCACCGACCCAGCTAGCATTGCAGCAAGAGGTTACATTTCCACAAAGGTCGAGCTAGGGGAGGCGGCGGCAATATTCATGACAGCCACCCCACCAGGCACTTCAGATCCATTCCCAGAGTCCAATTCACCAATTTCCGACTTACAGACTGAGATCCCGGATCGAGCTTGGAACTCTGGATACGAATGGATCACAGAATACACCGGGAAGACGGTTTGGTTTGTGCCTAGTGTCAAGATGGGGAATGAGATTGCCCTTTGCCTACAACGTGCTGGAAAGAAAGTAGTCCAATTGAACAGAAAGTCGTACGAGACGGAGTACCCAAAATGTAAGAACGATGATTGGGACTTTGTTATCACAACAGACATATCTGAAATGGGGGCTAACTTCAAGGCGAGCAGGGTGATTGACAGCCGGAAGAGTGTGAAACCAACCATCATAACAGAAGGAGAAGGGAGAGTGATCCTGGGAGAACCATCTGCAGTGACAGCAGCTAGTGCCGCCCAGAGACGTGGACGTATCGGTAGAAATCCGTCGCAAGTTGGTGATGAGTACTGTTATGGGGGGCACACGAATGAAGACGACTCGAACTTCGCCCATTGGACTGAGGCACGAATCATGCTGGACAACATCAACATGCCAAACGGACTGATCGCTCAATTCTACCAACCAGAGCGTGAGAAGGTATATACCATGGATGGGGAATACCGGCTCAGAGGAGAAGAGAGAAAAAACTTTCTGGAACTGTTGAGGACTGCAGATCTGCCAGTTTGGCTGGCTTACAAGGTTGCAGCGGCTGGAGTGTCATACCACGACCGGAGGTGGTGCTTTGATGGTCCTAGGACAAACACAATTTTAGAAGACAACAACGAAGTGGAAGTCATCACGAAGCTTGGTGAAAGGAAGATTCTGAGGCCGCGCTGGATTGATGCCAGGGTGTACTCGGATCACCAGGCACTAAAGGCGTTCAAGGACTTCGCCTCGGGAAAACGTTCTCAGATAGGGCTCATTGAGGTTCTGGGAAAGATGCCTGAGCACTTCATGGGGAAGACATGGGAAGCACTTGACACCATGTACGTTGTGGCCACTGCAGAGAAAGGAGGAAGAGCTCACAGAATGGCCCTGGAGGAACTGCCAGATGCTCTTCAGACAATTGCCTTGATTGCCTTATTGAGTGTGATGACCATGGGAGTATTCTTCCTCCTCATGCAGCGGAAGGGCATTGGAAAGATAGGTTTGGGAGGCGCTGTCTTGGGAGTCGCGACCTTTTTCTGTTGGATGGCTGAAGTTCCAGGAACGAAGATCGCCGGAATGTTGCTGCTCTCCCTTCTCTTGATGATTGTGCTAATTCCTGAGCCAGAGAAGCAACGTTCGCAGACAGACAACCAGCTAGCCGTGTTCCTGATTTGTGTCATGACCCTTGTGAGCGCAGTGGCAGCCAACGAGATGGGTTGGCTAGATAAGACCAAGAGTGACATAAGCAGTTTGTTTGGGCAAAGAATTGAGGTCAAGGAGAATTTCAGCATGGGAGAGTTTCTTCTGGACTTGAGGCCGGCAACAGCCTGGTCACTGTACGCTGTGACAACAGCGGTCCTCACTCCACTGCTAAAGCATTTGATCACGTCAGATTACATCAACACCTCATTGACCTCAATAAACGTTCAGGCAAGTGCACTATTCACACTCGCGCGAGGCTTCCCCTTCGTCGATGTTGGAGTGTCGGCTCTCCTGCTAGCAGCCGGATGCTGGGGACAAGTCACCCTCACCGTTACGGTAACAGCGGCAACACTCCTTTTTTGCCACTATGCCTACATGGTTCCCGGTTGGCAAGCTGAGGCAATGCGCTCAGCCCAGCGGCGGACAGCGGCCGGAATCATGAAGAACGCTGTAGTGGATGGCATCGTGGCCACGGACGTCCCAGAATTAGAGCGCACCACACCCATCATGCAGAAGAAAGTTGGACAGATCATGCTGATCTTGGTGTCTCTAGCTGCAGTAGTAGTGAACCCGTCTGTGAAGACAGTACGAGAAGCCGGAATTTTGATCACGGCCGCAGCGGTGACGCTTTGGGAGAATGGAGCAAGCTCTGTTTGGAACGCAACAACTGCCATCGGACTCTGCCACATCATGCGTGGGGGTTGGTTGTCATGTCTATCCATAACATGGACACTCATAAAGAACATGGAAAAACCAGGACTAAAAAGAGGTGGGGCAAAAGGACGCACCTTGGGAGAGGTTTGGAAAGAAAGACTCAACCAGATGACAAAAGAAGAGTTCACTAGGTACCGCAAAGAGGCCATCATCGAAGTCGATCGCTCAGCGGCAAAACACGCCAGGAAAGAAGGCAATGTCACTGGAGGGCATCCAGTCTCTAGGGGCACAGCAAAACTGAGATGGCTGGTCGAACGGAGGTTTCTCGAACCGGTCGGAAAAGTGATTGACCTTGGATGTGGAAGAGGCGGTTGGTGTTACTATATGGCAACCCAAAAAAGAGTCCAAGAAGTCAGAGGGTACACAAAGGGCGGTCCCGGACATGAAGAGCCCCAACTAGTGCAAAGTTATGGATGGAACATTGTCACCATGAAGAGTGGAGTGGATGTGTTCTACAGACCTTCTGAGTGTTGTGACACCCTCCTTTGTGACATCGGAGAGTCCTCGTCAAGTGCTGAGGTTGAAGAGCATAGGACGATTCGGGTCCTTGAAATGGTTGAGGACTGGCTGCACCGAGGGCCAAGGGAATTTTGCGTGAAGGTGCTCTGTCCCTACATGCCGAAAGTCATAGAGAAGATGGAGCTGCTCCAACGCCGGTATGGGGGGGGACTGGTCAGAAACCCACTCTCACGGAATTCCACGCACGAGATGTATTGGGTGAGTCGAGCTTCAGGCAATGTGGTACATTCAGTGAATATGACCAGCCAGGTGCTCCTAGGAAGAATGGAAAAAAGGACCTGGAAGGGACCCCAATACGAGGAAGATGTAAACTTGGGAAGTGGAACCAGGGCGGTGGGAAAACCCCTGCTCAACTCAGACACCAGTAAAATCAAGAACAGGATTGAACGACTCAGGCGTGAGTACAGTTCGACGTGGCACCACGATGAGAACCACCCATATAGAACCTGGAACTATCACGGCAGTTATGATGTGAAGCCCACAGGCTCCGCCAGTTCGCTGGTCAATGGAGTGGTCAGGCTCCTCTCAAAACCATGGGACACCATCACGAATGTTACCACCATGGCCATGACTGACACTACTCCCTTCGGGCAGCAGCGAGTGTTCAAAGAGAAGGTGGACACGAAAGCTCCTGAACCGCCAGAAGGAGTGAAGTACGTGCTCAACGAGACCACCAACTGGTTGTGGGCGTTTTTGGCCAGAGAAAAACGTCCCAGAATGTGCTCTCGAGAGGAATTCATAAGAAAGGTCAACAGCAATGCAGCTTTGGGTGCCATGTTTGAAGAGCAGAATCAATGGAGGAGCGCCAGAGAAGCAGTTGAAGATCCAAAATTTTGGGAGATGGTGGATGAGGAGCGCGAGGCACATCTGCGGGGGGAATGTCACACTTGCATTTACAACATGATGGGAAAGAGAGAGAAAAAACCCGGAGAGTTCGGAAAGGCCAAGGGAAGCAGAGCCATTTGGTTCATGTGGCTCGGAGCTCGCTTTCTGGAGTTCGAGGCTCTGGGTTTTCTCAATGAAGACCACTGGCTTGGAAGAAAGAACTCAGGAGGAGGTGTCGAGGGCTTGGGCCTCCAAAAACTGGGTTACATCCTGCGTGAAGTTGGCACCCGGCCTGGGGGCAAGATCTATGCTGATGACACAGCTGGCTGGGACACCCGCATCACGAGAGCTGACTTGGAAAATGAAGCTAAGGTGCTTGAGCTGCTTGATGGGGAACATCGGCGTCTTGCCAGGGCCATCATTGAGCTCACCTATCGTCACAAAGTTGTGAAAGTGATGCGCCCGGCTGCTGATGGAAGAACCGTCATGGATGTTATCTCCAGAGAAGATCAGAGGGGGAGTGGACAAGTTGTCACCTACGCCCTAAACACTTTCACCAACCTGGCCGTCCAGCTGGTGAGGATGATGGAAGGGGAAGGAGTGATTGGCCCAGATGATGTGGAGAAACTCACAAAAGGGAAAGGACCCAAAGTCAGGACCTGGCTGTTTGAGAATGGGGAAGAAAGACTCAGCCGCATGGCTGTCAGTGGAGATGACTGTGTGGTAAAGCCCCTGGACGATCGCTTTGCCACCTCGCTCCACTTCCTCAATGCTATGTCAAAGGTTCGCAAAGACATCCAAGAGTGGAAACCGTCAACTGGATGGTATGATTGGCAGCAGGTTCCATTTTGCTCAAACCATTTCACTGAATTGATCATGAAAGATGGAAGAACACTGGTGGTTCCATGCCGAGGACAGGATGAATTGGTAGGCAGAGCTCGCATATCTCCAGGGGCCGGATGGAACGTCCGCGACACTGCTTGTCTGGCTAAGTCTTATGCCCAGATGTGGCTGCTTCTGTACTTCCACAGAAGAGACCTGCGGCTCATGGCCAACGCCATTTGCTCCGCTGTCCCTGTGAATTGGGTCCCTACCGGAAGAACCACGTGGTCCATCCATGCAGGAGGAGAGTGGATGACAACAGAGGACATGTTGGAGGTCTGGAACCGTGTTTGGATAGAGGAGAATGAATGGATGGAAGACAAAACCCCAGTGGAGAAATGGAGTGACGTCCCATATTCAGGAAAACGAGAGGACATCTGGTGTGGCAGCCTGATTGGCACAAGAGCCCGAGCCACGTGGGCAGAAAACATCCAGGTGGCTATCAACCAAGTCAGAGCAATCATCGGAGATGAGAAGTATGTGGACTACATGAGTTCACTAAAGAGATATGAAGACACAACTTTGGTTGAGGACACAGTACTGTAGATATTTAATCAATTGTAAATAGACAATATAAGTATGCATAAAAGTGTAGTTTTATAGTAGTATTTAGTGGTGTTAGTGTAAATAGTTAAGAAAATTTTGAGGAGAAAGTCAGGCCGGGAAGTTCCCGCCACCGGAAGTTGAGTAGACGGTGCTGCCTGCGACTCAACCCCAGGAGGACTGGGTGAACAAAGCCGCGAAGTGATCCATGTAAGCCCTCAGAACCGTCTCGGAAGGAGGACCCCACATGTTGTAACTTCAAAGCCCAATGTCAGACCACGCTACGGCGTGCTACTCTGCGGAGAGTGCAGTCTGCGATAGTGCCCCAGGAGGACTGGGTTAACAAAGGCAAACCAACGCCCCACGCGGCCCTAGCCCCGGTAATGGTGTTAACCAGGGCGAAAGGACTAGAGGTTAGAGGAGACCCCGCGGTTTAAAGTGCACGGCCCAGCCTGGCTGAAGCTGTAGGTCAGGGGAAGGACTAGAGGTTAGTGGAGACCCCGTGCCACAAAACACCACAACAAAACAGCATATTGACACCTGGGATAGACTAGGAGATCTTCTGCTCTGCACAACCAGCCACACGGCACAGTGCGCCGACAATGGTGGCTGGTGGTGCGAGAACACAGGATCT" - genes: - - name: 2K - sequence: SQTDNQLAVFLICVMTLVSAVAA - - name: NS1 - sequence: DTGCAIDISRQELRCGSGVFIHNDVEAWMDRYKYYPETPQGLAKIIQKAHKEGVCGLRSVSRLEHQMWEAVKDELNTLLKENGVDLSVVVEKQEGMYKSAPKRLTATTEKLEIGWKAWGKSILFAPELANNTFVVDGPETKECPTQNRAWNSLEVEDFGFGLTSTRMFLKVRESNTTECDSKIIGTAVKNNLAIHSDLSYWIESRLNDTWKLERAVLGEVKSCTWPETHTLWGDGILESDLIIPVTLAGPRSNHNRRPGYKTQNQGPWDEGRVEIDFDYCPGTTVTLSESCGHRGPATRTTTESGKLITDWCCRSCTLPPLRYQTDSGCWYGMEIRPQRHDEKTLVQSQVNA - - name: NS2A - sequence: YNADMIDPFQLGLLVVFLATQEVLRKRWTAKISMPAILIALLVLVFGGITYTDVLRYVILVGAAFAESNSGGDVVHLALMATFKIQPVFMVASFLKARWTNQENILLMLAAVFFQMAYHDARQILLWEIPDVLNSLAVAWMILRAITFTTTSNVVVPLLALLTPGLRCLNLDVYRILLLMVGIGSLIREKRSAAAKKKGASLLCLALASTGLFNPMILAAGLIACDPNRKR - - name: NS2B - sequence: GWPATEVMTAVGLMFAIVGGLAELDIDSMAIPMTIAGLMFAAFVISGKSTDMWIERTADISWESDAEITGSSERVDVRLDDDGNFQLMNDPGAPWKIWMLRMVCLAISAYTPWAILPSVVGFWITLQYTKR - - name: NS3 - sequence: GGVLWDTPSPKEYKKGDTTTGVYRIMTRGLLGSYQAGAGVMVEGVFHTLWHTTKGAALMSGEGRLDPYWGSVKEDRLCYGGPWKLQHKWNGQDEVQMIVVEPGKNVKNVQTKPGVFKTPEGEIGAVTLDFPTGTSGSPIVDKNGDVIGLYGNGVIMPNGSYISAIVQGERMDEPIPAGFEPEMLRKKQITVLDLHPGAGKTRRILPQIIKEAINRRLRTAVLAPTRVVAAEMAEALRGLPIRYQTSAVPREHNGNEIVDVMCHATLTHRLMSPHRVPNYNLFVMDEAHFTDPASIAARGYISTKVELGEAAAIFMTATPPGTSDPFPESNSPISDLQTEIPDRAWNSGYEWITEYTGKTVWFVPSVKMGNEIALCLQRAGKKVVQLNRKSYETEYPKCKNDDWDFVITTDISEMGANFKASRVIDSRKSVKPTIITEGEGRVILGEPSAVTAASAAQRRGRIGRNPSQVGDEYCYGGHTNEDDSNFAHWTEARIMLDNINMPNGLIAQFYQPEREKVYTMDGEYRLRGEERKNFLELLRTADLPVWLAYKVAAAGVSYHDRRWCFDGPRTNTILEDNNEVEVITKLGERKILRPRWIDARVYSDHQALKAFKDFASGKR - - name: NS4A - sequence: SQIGLIEVLGKMPEHFMGKTWEALDTMYVVATAEKGGRAHRMALEELPDALQTIALIALLSVMTMGVFFLLMQRKGIGKIGLGGAVLGVATFFCWMAEVPGTKIAGMLLLSLLLMIVLIPEPEKQR - - name: NS4B - sequence: NEMGWLDKTKSDISSLFGQRIEVKENFSMGEFLLDLRPATAWSLYAVTTAVLTPLLKHLITSDYINTSLTSINVQASALFTLARGFPFVDVGVSALLLAAGCWGQVTLTVTVTAATLLFCHYAYMVPGWQAEAMRSAQRRTAAGIMKNAVVDGIVATDVPELERTTPIMQKKVGQIMLILVSLAAVVVNPSVKTVREAGILITAAAVTLWENGASSVWNATTAIGLCHIMRGGWLSCLSITWTLIKNMEKPGLKR - - name: NS5 - sequence: GGAKGRTLGEVWKERLNQMTKEEFTRYRKEAIIEVDRSAAKHARKEGNVTGGHPVSRGTAKLRWLVERRFLEPVGKVIDLGCGRGGWCYYMATQKRVQEVRGYTKGGPGHEEPQLVQSYGWNIVTMKSGVDVFYRPSECCDTLLCDIGESSSSAEVEEHRTIRVLEMVEDWLHRGPREFCVKVLCPYMPKVIEKMELLQRRYGGGLVRNPLSRNSTHEMYWVSRASGNVVHSVNMTSQVLLGRMEKRTWKGPQYEEDVNLGSGTRAVGKPLLNSDTSKIKNRIERLRREYSSTWHHDENHPYRTWNYHGSYDVKPTGSASSLVNGVVRLLSKPWDTITNVTTMAMTDTTPFGQQRVFKEKVDTKAPEPPEGVKYVLNETTNWLWAFLAREKRPRMCSREEFIRKVNSNAALGAMFEEQNQWRSAREAVEDPKFWEMVDEEREAHLRGECHTCIYNMMGKREKKPGEFGKAKGSRAIWFMWLGARFLEFEALGFLNEDHWLGRKNSGGGVEGLGLQKLGYILREVGTRPGGKIYADDTAGWDTRITRADLENEAKVLELLDGEHRRLARAIIELTYRHKVVKVMRPAADGRTVMDVISREDQRGSGQVVTYALNTFTNLAVQLVRMMEGEGVIGPDDVEKLTKGKGPKVRTWLFENGEERLSRMAVSGDDCVVKPLDDRFATSLHFLNAMSKVRKDIQEWKPSTGWYDWQQVPFCSNHFTELIMKDGRTLVVPCRGQDELVGRARISPGAGWNVRDTACLAKSYAQMWLLLYFHRRDLRLMANAICSAVPVNWVPTGRTTWSIHAGGEWMTTEDMLEVWNRVWIEENEWMEDKTPVEKWSDVPYSGKREDIWCGSLIGTRARATWAENIQVAINQVRAIIGDEKYVDYMSSLKRYEDTTLVEDTVL - - name: capsid - sequence: MSKKPGGPGKSRAVNMLKRGMPRVLSLIGLKRAMLSLIDGKGPIRFVLALLAFFRFTAIAPTRAVLDRWRGVNKQTAMKHLLSFKKELGTLTSAINRRSSKQKKRGGKTGIAVMIGLIASVGA - - name: env - sequence: FNCLGMSNRDFLEGVSGATWVDLVLEGDSCVTIMSKDKPTIDVKMMNMEAANLAEVRSYCYLATVSDLSTKAACPTMGEAHNDKRADPAFVCRQGVVDRGWGNGCGLFGKGSIDTCAKFACSTKAIGRTILKENIKYEVAIFVHGPTTVESHGNYSTQVGATQAGRLSITPAAPSYTLKLGEYGEVTVDCEPRSGIDTNAYYVMTVGTKTFLVHREWFMDLNLPWSSAGSTVWRNRETLMEFEEPHATKQSVIALGSQEGALHQALAGAIPVEFSSNTVKLTSGHLKCRVKMEKLQLKGTTYGVCSKAFKFLGTPADTGHGTVVLELQYTGTDGPCKVPISSVASLNDLTPVGRLVTVNPFVSVATANAKVLIELEPPFGDSYIVVGRGEQQINHHWHKSGSSIGKAFTTTLKGAQRLAALGDTAWDFGSVGGVFTSVGKAVHQVFGGAFRSLFGGMSWITQGLLGALLLWMGINARDRSIALTFLAVGGVLLFLSVNVHA - - name: prM - sequence: VTLSNFQGKVMMTVNATDVTDVITIPTAAGKNLCIVRAMDVGYMCDDTITYECPVLSAGNDPEDIDCWCTKSAVYVRYGRCTKTRHSRRSRRSLTVQTHGESTLANKKGAWMDSTKATRYLVKTESWILRNPGYALVAAVIGWMLGSNTMQRVVFVVLLLLVAPAYS auth: smtp: host: "in-v3.mailjet.com" diff --git a/preprocessing/nextclade/src/loculus_preprocessing/prepro.py b/preprocessing/nextclade/src/loculus_preprocessing/prepro.py index b753662d71..b0be151b24 100644 --- a/preprocessing/nextclade/src/loculus_preprocessing/prepro.py +++ b/preprocessing/nextclade/src/loculus_preprocessing/prepro.py @@ -232,6 +232,8 @@ def process_single( } for output_field, spec_dict in config.processing_spec.items(): + if output_field == "length": + continue spec = ProcessingSpec( inputs=spec_dict["inputs"], function=spec_dict["function"], @@ -269,14 +271,15 @@ def process_single( ) continue if input_path not in unprocessed.inputMetadata: - warnings.append( - ProcessingAnnotation( - source=[ - AnnotationSource(name=input_path, type=AnnotationSourceType.METADATA) - ], - message=f"Metadata field '{input_path}' not found in input", - ) - ) + # Suppress warning to prevent spamming for now until we have more sophisticated solution + # warnings.append( + # ProcessingAnnotation( + # source=[ + # AnnotationSource(name=input_path, type=AnnotationSourceType.METADATA) + # ], + # message=f"Metadata field '{input_path}' not found in input", + # ) + # ) continue input_data[arg_name] = unprocessed.inputMetadata[input_path] processing_result = ProcessingFunctions.call_function( diff --git a/preprocessing/nextclade/src/loculus_preprocessing/processing_functions.py b/preprocessing/nextclade/src/loculus_preprocessing/processing_functions.py index 2665fb11ae..f482895f78 100644 --- a/preprocessing/nextclade/src/loculus_preprocessing/processing_functions.py +++ b/preprocessing/nextclade/src/loculus_preprocessing/processing_functions.py @@ -120,7 +120,13 @@ def check_date( def process_date( input_data: InputMetadata, output_field, args: FunctionArgs = None ) -> ProcessingResult: - """Parse date string. If it's incomplete, add 01-01, if no year, return null and error""" + """Parse date string. If it's incomplete, add 01-01, if no year, return null and error + input_data: + date: str, date string to parse + release_date: str, optional release date to compare against + args: + required: bool, if true, return error if date is missing (optional) + """ logger.debug(f"input_data: {input_data}") date_str = input_data["date"] or "" release_date_str = input_data.get("release_date", "") or "" @@ -141,17 +147,19 @@ def process_date( errors = [] if len(date_str) == 0: + if args and args.get("required"): + errors.append( + ProcessingAnnotation( + source=[ + AnnotationSource(name=output_field, type=AnnotationSourceType.METADATA) + ], + message="Collection date is required", + ) + ) return ProcessingResult( datum=None, warnings=[], - errors=[ - ProcessingAnnotation( - source=[ - AnnotationSource(name=output_field, type=AnnotationSourceType.METADATA) - ], - message="Collection date is required", - ) - ], + errors=errors, ) for format, message in formats_to_messages.items(): diff --git a/website/src/components/SequenceDetailsPage/DataTableEntryValue.tsx b/website/src/components/SequenceDetailsPage/DataTableEntryValue.tsx index a7bbf7905d..b65d1e8b4a 100644 --- a/website/src/components/SequenceDetailsPage/DataTableEntryValue.tsx +++ b/website/src/components/SequenceDetailsPage/DataTableEntryValue.tsx @@ -25,7 +25,7 @@ const CustomDisplayComponent: React.FC = ({ data, dataUseTermsHistory }) ))} {customDisplay?.type === 'link' && customDisplay.url !== undefined && (