Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Curriculum #38

Merged
merged 2 commits into from
Jun 11, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 27 additions & 12 deletions scripts/a2c/a2c.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,39 +77,53 @@ def main(cfg: "DictConfig"):
yaml.dump(cfg_dict, yaml_file, default_flow_style=False)

# Define training task and run
if cfg.get("molscore", None):
if cfg.get("molscore_task", None):

if not _has_molscore:
raise RuntimeError(
"MolScore library not found. Unable to create a scoring function. "
"To install MolScore, use: `pip install MolScore`"
) from MOLSCORE_ERR

if cfg.molscore in MolScoreBenchmark.presets:
if cfg.molscore_mode == "single":
# Save molscore output. Also redirect output to save_dir
cfg.molscore_task = shutil.copy(cfg.molscore_task, save_dir)
data = json.load(open(cfg.molscore_task, "r"))
json.dump(data, open(cfg.molscore_task, "w"), indent=4)
task = MolScore(
model_name=cfg.agent_name,
task_config=cfg.molscore_task,
budget=cfg.total_smiles,
output_dir=os.path.abspath(save_dir),
add_run_dir=False,
**cfg.get("molscore_kwargs", {}),
)
run_a2c(cfg, task)

if cfg.molscore_mode == "benchmark":
MSB = MolScoreBenchmark(
model_name=cfg.agent_name,
model_parameters=dict(cfg),
benchmark=cfg.molscore,
benchmark=cfg.molscore_task,
budget=cfg.total_smiles,
output_dir=os.path.abspath(save_dir),
add_benchmark_dir=False,
include=cfg.molscore_include,
**cfg.get("molscore_kwargs", {}),
)
for task in MSB:
run_a2c(cfg, task)
else:
# Save molscore output. Also redirect output to save_dir
cfg.molscore = shutil.copy(cfg.molscore, save_dir)
data = json.load(open(cfg.molscore, "r"))
json.dump(data, open(cfg.molscore, "w"), indent=4)
task = MolScore(

if cfg.molscore_mode == "curriculum":
task = MolScoreCurriculum(
model_name=cfg.agent_name,
task_config=cfg.molscore,
model_parameters=dict(cfg),
benchmark=cfg.molscore_task,
budget=cfg.total_smiles,
output_dir=os.path.abspath(save_dir),
add_run_dir=False,
**cfg.get("molscore_kwargs", {}),
)
run_a2c(cfg, task)

elif cfg.get("custom_task", None):
if cfg.custom_task not in custom_scoring_functions:
register_custom_scoring_function(cfg.custom_task, cfg.custom_task)
Expand All @@ -120,6 +134,7 @@ def main(cfg: "DictConfig"):
output_dir=save_dir,
)
run_a2c(cfg, task)

else:
raise ValueError("No scoring function specified.")

Expand Down
6 changes: 4 additions & 2 deletions scripts/a2c/config_denovo.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,10 @@ num_envs: 16 # Number of smiles to generate in parallel
total_smiles: 10_000 # Total number of smiles to generate

# Scoring function
molscore: MolOpt
molscore_include: ["Albuterol_similarity"]
molscore_mode: benchmark # single, benchmark, or curriculum
molscore_task: MolOpt # task configuration (JSON), benchmark (preset only), or curriculum task (preset only)
molscore_kwargs:
include: ["Albuterol_similarity"]
custom_task: null # Requires molscore to be set to null

# Promptsmiles configuration
Expand Down
6 changes: 4 additions & 2 deletions scripts/a2c/config_fragment.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,10 @@ num_envs: 16 # Number of smiles to generate in parallel
total_smiles: 10_000 # Total number of smiles to generate

# Scoring function
molscore: MolOpt
molscore_include: ["Celecoxxib_rediscovery"]
molscore_mode: benchmark # single, benchmark, or curriculum
molscore_task: MolOpt # task configuration (JSON), benchmark (preset only), or curriculum task (preset only)
molscore_kwargs:
include: ["Celecoxxib_rediscovery"]
custom_task: null # Requires molscore to be set to null

# Promptsmiles configuration
Expand Down
6 changes: 4 additions & 2 deletions scripts/a2c/config_scaffold.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,10 @@ num_envs: 16 # Number of smiles to generate in parallel
total_smiles: 10_000 # Total number of smiles to generate

# Scoring function
molscore: LibINVENT_Exp1
molscore_include: ["DRD2_SelRF_SubFilt_DF"]
molscore_mode: benchmark # single, benchmark, or curriculum
molscore_task: LibINVENT_Exp1 # task configuration (JSON), benchmark (preset only), or curriculum task (preset only)
molscore_kwargs:
include: ["DRD2_SelRF_SubFilt_DF"]
custom_task: null # Requires molscore to be set to null

# Promptsmiles configuration
Expand Down
41 changes: 28 additions & 13 deletions scripts/ahc/ahc.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,39 +79,53 @@ def main(cfg: "DictConfig"):
yaml.dump(cfg_dict, yaml_file, default_flow_style=False)

# Define training task and run
if cfg.get("molscore", None):
if cfg.get("molscore_task", None):

if not _has_molscore:
raise RuntimeError(
"MolScore library not found. Unable to create a scoring function. "
"To install MolScore, use: `pip install MolScore`"
) from MOLSCORE_ERR

if cfg.molscore in MolScoreBenchmark.presets:
if cfg.molscore_mode == "single":
# Save molscore output. Also redirect output to save_dir
cfg.molscore_task = shutil.copy(cfg.molscore_task, save_dir)
data = json.load(open(cfg.molscore_task, "r"))
json.dump(data, open(cfg.molscore_task, "w"), indent=4)
task = MolScore(
model_name=cfg.agent_name,
task_config=cfg.molscore_task,
budget=cfg.total_smiles,
output_dir=os.path.abspath(save_dir),
add_run_dir=False,
**cfg.get("molscore_kwargs", {}),
)
run_ahc(cfg, task)

if cfg.molscore_mode == "benchmark":
MSB = MolScoreBenchmark(
model_name=cfg.agent_name,
model_parameters=dict(cfg),
benchmark=cfg.molscore,
benchmark=cfg.molscore_task,
budget=cfg.total_smiles,
output_dir=os.path.abspath(save_dir),
add_benchmark_dir=False,
include=cfg.molscore_include,
**cfg.get("molscore_kwargs", {}),
)
for task in MSB:
run_ahc(cfg, task)
else:
# Save molscore output. Also redirect output to save_dir
cfg.molscore = shutil.copy(cfg.molscore, save_dir)
data = json.load(open(cfg.molscore, "r"))
json.dump(data, open(cfg.molscore, "w"), indent=4)
task = MolScore(
run_a2c(cfg, task)

if cfg.molscore_mode == "curriculum":
task = MolScoreCurriculum(
model_name=cfg.agent_name,
task_config=cfg.molscore,
model_parameters=dict(cfg),
benchmark=cfg.molscore_task,
budget=cfg.total_smiles,
output_dir=os.path.abspath(save_dir),
add_run_dir=False,
**cfg.get("molscore_kwargs", {}),
)
run_ahc(cfg, task)

elif cfg.get("custom_task", None):
if cfg.custom_task not in custom_scoring_functions:
register_custom_scoring_function(cfg.custom_task, cfg.custom_task)
Expand All @@ -122,6 +136,7 @@ def main(cfg: "DictConfig"):
output_dir=save_dir,
)
run_ahc(cfg, task)

else:
raise ValueError("No scoring function specified.")

Expand Down
6 changes: 4 additions & 2 deletions scripts/ahc/config_denovo.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,10 @@ num_envs: 128 # Number of smiles to generate in parallel
total_smiles: 10_000 # Total number of smiles to generate

# Scoring function
molscore: MolOpt
molscore_include: ["Albuterol_similarity"]
molscore_mode: benchmark # single, benchmark, or curriculum
molscore_task: MolOpt # task configuration (JSON), benchmark (preset only), or curriculum task (preset only)
molscore_kwargs:
include: ["Albuterol_similarity"]
custom_task: null # Requires molscore to be set to null

# Promptsmiles configuration
Expand Down
8 changes: 5 additions & 3 deletions scripts/ahc/config_fragment.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -10,15 +10,17 @@ num_envs: 128 # Number of smiles to generate in parallel
total_smiles: 10_000 # Total number of smiles to generate

# Scoring function
molscore: MolOpt
molscore_include: ["Celecoxxib_rediscovery"]
molscore_mode: benchmark # single, benchmark, or curriculum
molscore_task: MolOpt # task configuration (JSON), benchmark (preset only), or curriculum task (preset only)
molscore_kwargs:
include: ["Celecoxxib_rediscovery"]
custom_task: null # Requires molscore to be set to null

# Promptsmiles configuration
promptsmiles: c1(C)ccc(*)cc1.NS(=O)(=O)(*)
promptsmiles_optimize: True
promptsmiles_shuffle: True
promptsmiles_multi: True
promptsmiles_multi: False

# Model architecture
model: gru # gru, lstm, or gpt2
Expand Down
6 changes: 4 additions & 2 deletions scripts/ahc/config_scaffold.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,10 @@ num_envs: 128 # Number of smiles to generate in parallel
total_smiles: 10_000 # Total number of smiles to generate

# Scoring function
molscore: LibINVENT_Exp1
molscore_include: ["DRD2_SelRF_SubFilt_DF"]
molscore_mode: benchmark # single, benchmark, or curriculum
molscore_task: LibINVENT_Exp1 # task configuration (JSON), benchmark (preset only), or curriculum task (preset only)
molscore_kwargs:
include: ["DRD2_SelRF_SubFilt_DF"]
custom_task: null # Requires molscore to be set to null

# Promptsmiles configuration
Expand Down
6 changes: 4 additions & 2 deletions scripts/dpo/config_denovo.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,10 @@ num_envs: 128 # Number of smiles to generate in parallel
total_smiles: 20_000 # Total number of smiles to generate

# Scoring function
molscore: MolOpt
molscore_include: ["Albuterol_similarity"]
molscore_mode: benchmark # single, benchmark, or curriculum
molscore_task: MolOpt # task configuration (JSON), benchmark (preset only), or curriculum task (preset only)
molscore_kwargs:
include: ["Albuterol_similarity"]
custom_task: null # Requires molscore to be set to null

# Promptsmiles configuration
Expand Down
6 changes: 4 additions & 2 deletions scripts/dpo/config_fragment.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,10 @@ num_envs: 128 # Number of smiles to generate in parallel
total_smiles: 20_000 # Total number of smiles to generate

# Scoring function
molscore: MolOpt
molscore_include: ["Celecoxxib_rediscovery"]
molscore_mode: benchmark # single, benchmark, or curriculum
molscore_task: MolOpt # task configuration (JSON), benchmark (preset only), or curriculum task (preset only)
molscore_kwargs:
include: ["Celecoxxib_rediscovery"]
custom_task: null # Requires molscore to be set to null

# Promptsmiles configuration
Expand Down
6 changes: 4 additions & 2 deletions scripts/dpo/config_scaffold.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,10 @@ num_envs: 128 # Number of smiles to generate in parallel
total_smiles: 20_000 # Total number of smiles to generate

# Scoring function
molscore: LibINVENT_Exp1
molscore_include: ["DRD2_SelRF_SubFilt_DF"]
molscore_mode: benchmark # single, benchmark, or curriculum
molscore_task: LibINVENT_Exp1 # task configuration (JSON), benchmark (preset only), or curriculum task (preset only)
molscore_kwargs:
include: ["DRD2_SelRF_SubFilt_DF"]
custom_task: null # Requires molscore to be set to null

# Promptsmiles configuration
Expand Down
39 changes: 27 additions & 12 deletions scripts/dpo/dpo.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,39 +71,53 @@ def main(cfg: "DictConfig"):
yaml.dump(cfg_dict, yaml_file, default_flow_style=False)

# Define training task and run
if cfg.get("molscore", None):
if cfg.get("molscore_task", None):

if not _has_molscore:
raise RuntimeError(
"MolScore library not found. Unable to create a scoring function. "
"To install MolScore, use: `pip install MolScore`"
) from MOLSCORE_ERR

if cfg.molscore in MolScoreBenchmark.presets:
if cfg.molscore_mode == "single":
# Save molscore output. Also redirect output to save_dir
cfg.molscore_task = shutil.copy(cfg.molscore_task, save_dir)
data = json.load(open(cfg.molscore_task, "r"))
json.dump(data, open(cfg.molscore_task, "w"), indent=4)
task = MolScore(
model_name=cfg.agent_name,
task_config=cfg.molscore_task,
budget=cfg.total_smiles,
output_dir=os.path.abspath(save_dir),
add_run_dir=False,
**cfg.get("molscore_kwargs", {}),
)
run_dpo(cfg, task)

if cfg.molscore_mode == "benchmark":
MSB = MolScoreBenchmark(
model_name=cfg.agent_name,
model_parameters=dict(cfg),
benchmark=cfg.molscore,
benchmark=cfg.molscore_task,
budget=cfg.total_smiles,
output_dir=os.path.abspath(save_dir),
add_benchmark_dir=False,
include=cfg.molscore_include,
**cfg.get("molscore_kwargs", {}),
)
for task in MSB:
run_dpo(cfg, task)
else:
# Save molscore output. Also redirect output to save_dir
cfg.molscore = shutil.copy(cfg.molscore, save_dir)
data = json.load(open(cfg.molscore, "r"))
json.dump(data, open(cfg.molscore, "w"), indent=4)
task = MolScore(

if cfg.molscore_mode == "curriculum":
task = MolScoreCurriculum(
model_name=cfg.agent_name,
task_config=cfg.molscore,
model_parameters=dict(cfg),
benchmark=cfg.molscore_task,
budget=cfg.total_smiles,
output_dir=os.path.abspath(save_dir),
add_run_dir=False,
**cfg.get("molscore_kwargs", {}),
)
run_dpo(cfg, task)

elif cfg.get("custom_task", None):
if cfg.custom_task not in custom_scoring_functions:
register_custom_scoring_function(cfg.custom_task, cfg.custom_task)
Expand All @@ -114,6 +128,7 @@ def main(cfg: "DictConfig"):
output_dir=save_dir,
)
run_dpo(cfg, task)

else:
raise ValueError("No scoring function specified.")

Expand Down
6 changes: 4 additions & 2 deletions scripts/ppo/config_denovo.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,10 @@ num_envs: 64 # Number of smiles to generate in parallel
total_smiles: 10_000 # Total number of smiles to generate

# Scoring function
molscore: MolOpt
molscore_include: ["Albuterol_similarity"]
molscore_mode: benchmark # single, benchmark, or curriculum
molscore_task: MolOpt # task configuration (JSON), benchmark (preset only), or curriculum task (preset only)
molscore_kwargs:
include: ["Albuterol_similarity"]
custom_task: null # Requires molscore to be set to null

# Promptsmiles configuration
Expand Down
6 changes: 4 additions & 2 deletions scripts/ppo/config_fragment.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,10 @@ num_envs: 64 # Number of smiles to generate in parallel
total_smiles: 10_000 # Total number of smiles to generate

# Scoring function
molscore: MolOpt
molscore_include: ["Celecoxxib_rediscovery"]
molscore_mode: benchmark # single, benchmark, or curriculum
molscore_task: MolOpt # task configuration (JSON), benchmark (preset only), or curriculum task (preset only)
molscore_kwargs:
include: ["Celecoxxib_rediscovery"]
custom_task: null # Requires molscore to be set to null

# Promptsmiles configuration
Expand Down
6 changes: 4 additions & 2 deletions scripts/ppo/config_scaffold.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,10 @@ num_envs: 64 # Number of smiles to generate in parallel
total_smiles: 10_000 # Total number of smiles to generate

# Scoring function
molscore: LibINVENT_Exp1
molscore_include: ["DRD2_SelRF_SubFilt_DF"]
molscore_mode: benchmark # single, benchmark, or curriculum
molscore_task: LibINVENT_Exp1 # task configuration (JSON), benchmark (preset only), or curriculum task (preset only)
molscore_kwargs:
include: ["DRD2_SelRF_SubFilt_DF"]
custom_task: null # Requires molscore to be set to null

# Promptsmiles configuration
Expand Down
Loading