Skip to content

Commit

Permalink
fix flaky cb tutorial test
Browse files Browse the repository at this point in the history
Summary: reducing training time and arc size to complete the test faster. the goal is only to check the feedforward method that does not require long training

Reviewed By: rodrigodesalvobraz

Differential Revision: D65942911

fbshipit-source-id: a5978fdd2c452a6e3b7a468b91abc73238e6a021
  • Loading branch information
Yonathan Efroni authored and facebook-github-bot committed Nov 15, 2024
1 parent 357ca0c commit 5bea7f4
Showing 1 changed file with 11 additions and 11 deletions.
22 changes: 11 additions & 11 deletions test/unit/test_tutorials/test_cb_tutorial.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ def test_cb_tutorials(self) -> None:
uci_data_path = "./utils/instantiations/environments/uci_datasets"
if not os.path.exists(uci_data_path):
os.makedirs(uci_data_path)
download_uci_data(data_path=uci_data_path)
download_uci_data(data_path=uci_data_path)

# Built CB environment using the pendigits UCI dataset
pendigits_uci_dict = {
Expand All @@ -71,8 +71,8 @@ def test_cb_tutorials(self) -> None:
env = SLCBEnvironment(**pendigits_uci_dict) # pyre-ignore

# experiment code
number_of_steps = 300
record_period = 300
number_of_steps = 10
record_period = 10

"""
SquareCB
Expand All @@ -85,8 +85,8 @@ def test_cb_tutorials(self) -> None:
agent = PearlAgent(
policy_learner=NeuralBandit(
feature_dim=env.observation_dim + env.unique_labels_num,
hidden_dims=[64, 16],
training_rounds=10,
hidden_dims=[2],
training_rounds=2,
learning_rate=0.01,
action_representation_module=action_representation_module,
exploration_module=SquareCBExploration(
Expand All @@ -101,7 +101,7 @@ def test_cb_tutorials(self) -> None:
agent=agent,
env=env,
number_of_steps=number_of_steps,
print_every_x_steps=100,
print_every_x_steps=10,
record_period=record_period,
learn_after_episode=True,
)
Expand All @@ -114,9 +114,9 @@ def test_cb_tutorials(self) -> None:
agent = PearlAgent(
policy_learner=NeuralLinearBandit(
feature_dim=env.observation_dim + env.unique_labels_num,
hidden_dims=[64, 16],
hidden_dims=[2],
state_features_only=False,
training_rounds=10,
training_rounds=2,
learning_rate=0.01,
action_representation_module=action_representation_module,
exploration_module=UCBExploration(alpha=1.0),
Expand All @@ -143,9 +143,9 @@ def test_cb_tutorials(self) -> None:
agent = PearlAgent(
policy_learner=NeuralLinearBandit(
feature_dim=env.observation_dim + env.unique_labels_num,
hidden_dims=[64, 16],
hidden_dims=[2],
state_features_only=False,
training_rounds=10,
training_rounds=2,
learning_rate=0.01,
action_representation_module=action_representation_module,
exploration_module=ThompsonSamplingExplorationLinear(),
Expand All @@ -158,7 +158,7 @@ def test_cb_tutorials(self) -> None:
agent=agent,
env=env,
number_of_steps=number_of_steps,
print_every_x_steps=100,
print_every_x_steps=10,
record_period=record_period,
learn_after_episode=True,
)

0 comments on commit 5bea7f4

Please sign in to comment.