Skip to content

Commit

Permalink
chore: update l2g train and predict configs (#109)
Browse files Browse the repository at this point in the history
* chore: update l2g train and predict configs

* chore(l2g): set cross validate to false

* chore: remove `interactions` from l2g training dependencies
  • Loading branch information
ireneisdoomed authored Feb 19, 2025
1 parent e0e31ce commit b49922a
Show file tree
Hide file tree
Showing 3 changed files with 17 additions and 24 deletions.
25 changes: 12 additions & 13 deletions src/ot_orchestration/dags/config/genetics_etl.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -205,34 +205,33 @@ nodes:
params:
step: locus_to_gene
step.run_mode: train
+step.session.extended_spark_conf: "{spark.kryoserializer.buffer.max:500m, spark.sql.autoBroadcastJoinThreshold:'-1'}"
step.wandb_run_name: 24.11_freeze10
step.cross_validate: false
step.hf_hub_repo_id: opentargets/locus_to_gene
step.hf_model_commit_message: 'chore: update model based on 24.11_freeze10 run'
step.model_path: '{release_dir}/locus_to_gene_model/classifier.skops'
# INPUTS
step.credible_set_path: '{release_dir}/credible_set'
step.variant_index_path: '{release_dir}/variant_index'
step.feature_matrix_path: '{release_dir}/locus_to_gene_feature_matrix'
step.gold_standard_curation_path: gs://genetics_etl_python_playground/input/l2g/gold_standard/curation.json
step.gene_interactions_path: gs://genetics_etl_python_playground/static_assets/interaction # OTP 23.12 data
step.hyperparameters.n_estimators: 100
step.hyperparameters.max_depth: 5
step.hyperparameters.loss: log_loss
+step.session.extended_spark_conf: "{spark.kryoserializer.buffer.max:500m, spark.sql.autoBroadcastJoinThreshold:'-1'}"
# OUTPUTS
step.model_path: '{release_dir}/locus_to_gene_model/classifier.skops'

- id: l2g_predict
prerequisites:
- l2g_train
params:
step: locus_to_gene
step.run_mode: predict
step.predictions_path: '{release_dir}/locus_to_gene_predictions'
step.feature_matrix_path: '{release_dir}/locus_to_gene_feature_matrix'
step.credible_set_path: '{release_dir}/credible_set'
step.download_from_hub: false
step.session.write_mode: overwrite
step.l2g_threshold: 0.05
step.model_path: gs://ot_orchestration/benchmarks/l2g/fm0/v5.1_best_cv/locus_to_gene_model/classifier.skops
step.download_from_hub: true
# INPUTS
step.hf_hub_repo_id: opentargets/locus_to_gene
step.session.write_mode: overwrite
step.feature_matrix_path: '{release_dir}/locus_to_gene_feature_matrix'
step.credible_set_path: '{release_dir}/credible_set'
# OUTPUTS
step.predictions_path: '{release_dir}/locus_to_gene_predictions'

- id: l2g_evidence
prerequisites:
Expand Down
15 changes: 5 additions & 10 deletions src/ot_orchestration/dags/config/gentropy.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -202,38 +202,33 @@ steps:
cluster_name: otg-etl
params:
step: locus_to_gene
step.run_mode: train
step.session.write_mode: ignore
+step.session.extended_spark_conf: "{spark.kryoserializer.buffer.max:500m, spark.sql.autoBroadcastJoinThreshold:'-1'}"
step.run_mode: train
step.hyperparameters.n_estimators: 100
step.hyperparameters.max_depth: 5
step.hyperparameters.loss: log_loss
step.wandb_run_name: '{l2g_training}'
step.cross_validate: false
step.hf_hub_repo_id: opentargets/locus_to_gene
step.hf_model_commit_message: 'chore: update model base model for {l2g_training} run'
# INPUTS
step.credible_set_path: '{release_uri}/output/credible_set'
step.variant_index_path: '{release_uri}/output/variant'
step.feature_matrix_path: '{release_uri}/output/l2g_feature_matrix'
step.gold_standard_curation_path: '{release_uri}/inputs/l2g/gold_standard.json'
step.gene_interactions_path: '{release_uri}/output/interaction'
# OUTPUTS
step.model_path: '{release_uri}/etc/model/locus_to_gene_model/classifier.skops'

l2g_prediction:
cluster_name: otg-etl
params:
step: locus_to_gene
step.run_mode: predict
step.session.write_mode: ignore
step.session.output_partitions: 1
step.run_mode: predict
step.l2g_threshold: 0.05
step.download_from_hub: false
step.hf_hub_repo_id: opentargets/locus_to_gene
step.download_from_hub: true
# INPUTS
step.hf_hub_repo_id: opentargets/locus_to_gene
step.feature_matrix_path: '{release_uri}/output/l2g_feature_matrix'
step.credible_set_path: '{release_uri}/output/credible_set'
step.model_path: '{release_uri}/etc/model/locus_to_gene_model/classifier.skops'
# OUTPUTS
step.predictions_path: '{release_uri}/output/l2g_prediction'

Expand Down
1 change: 0 additions & 1 deletion src/ot_orchestration/dags/config/unified_pipeline.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -205,7 +205,6 @@ steps:
- gentropy_variant
- gentropy_l2g_feature_matrix
- pis_l2g_gold_standard
- etl_interaction

gentropy_l2g_prediction:
depends_on:
Expand Down

0 comments on commit b49922a

Please sign in to comment.