examples/config_templates/config_Train_MultiOmicsData_template.yaml

# Mode
mode: "Train"

# Directories
cache_dir: "/path_to_your_cache_dir/cache" ## cache dir to save the model template files
model_dir: "/path_to_your_output_model_dir/" ## output model dir
log_dir: "/path_to_your_log_dir/logs/"

#### Input data files ####
train_file: "/path_to_your_trainfile/train.h5ad"
val_file: "/path_to_your_valfile/val.h5ad"   ## Optiona
train_ADTfile: "/path_to_your_trained_ADT_file/train_ADT.h5ad"
val_ADTfile: "/path_to_your_val_ADT_file/val_ADT.h5ad"  ## Optiona


########################################### General setting ####################################################
savelog: "Yes"
target_feature: "celltype.l2"  # Target name for prediction
num_bins: 10  # Bins for gene expression discretization

########## Model template and context method setting #########
model_backbone_name: "llama" ### "llama", "gpt", "bigbird", "scgent"
model_backbone_size: "small" ### "small", "normal", "large". Suggest "small" for llama
max_length: 5120 
context_method: "random"
multiomics: "Yes"  # Trigger the multiomics method when set to Yes

########################################### Other settings ####################################################
depth: 2  # suggest 2
min_cells: 50  # suggest 50
batch_size: 1 # set this based on GPU memory, higher batch_size higher training speed, but also much more GPU memory will be used
learning_rate: 1e-5  # suggest 1e-5
num_epochs: 30  # suggest 30