fix flaky cb tutorial test

Summary: reducing training time and arc size to complete the test faster. the goal is only to check the feedforward method that does not require long training Reviewed By: rodrigodesalvobraz Differential Revision: D65942911 fbshipit-source-id: a5978fdd2c452a6e3b7a468b91abc73238e6a021
facebookresearch · Nov 15, 2024 · 5bea7f4 · 5bea7f4
1 parent 357ca0c
commit 5bea7f4
Showing 1 changed file with 11 additions and 11 deletions.
diff --git a/test/unit/test_tutorials/test_cb_tutorial.py b/test/unit/test_tutorials/test_cb_tutorial.py
@@ -58,7 +58,7 @@ def test_cb_tutorials(self) -> None:
         uci_data_path = "./utils/instantiations/environments/uci_datasets"
         if not os.path.exists(uci_data_path):
             os.makedirs(uci_data_path)
-            download_uci_data(data_path=uci_data_path)
+        download_uci_data(data_path=uci_data_path)
 
         # Built CB environment using the pendigits UCI dataset
         pendigits_uci_dict = {
@@ -71,8 +71,8 @@ def test_cb_tutorials(self) -> None:
         env = SLCBEnvironment(**pendigits_uci_dict)  # pyre-ignore
 
         # experiment code
-        number_of_steps = 300
-        record_period = 300
+        number_of_steps = 10
+        record_period = 10
 
         """
         SquareCB
@@ -85,8 +85,8 @@ def test_cb_tutorials(self) -> None:
         agent = PearlAgent(
             policy_learner=NeuralBandit(
                 feature_dim=env.observation_dim + env.unique_labels_num,
-                hidden_dims=[64, 16],
-                training_rounds=10,
+                hidden_dims=[2],
+                training_rounds=2,
                 learning_rate=0.01,
                 action_representation_module=action_representation_module,
                 exploration_module=SquareCBExploration(
@@ -101,7 +101,7 @@ def test_cb_tutorials(self) -> None:
             agent=agent,
             env=env,
             number_of_steps=number_of_steps,
-            print_every_x_steps=100,
+            print_every_x_steps=10,
             record_period=record_period,
             learn_after_episode=True,
         )
@@ -114,9 +114,9 @@ def test_cb_tutorials(self) -> None:
         agent = PearlAgent(
             policy_learner=NeuralLinearBandit(
                 feature_dim=env.observation_dim + env.unique_labels_num,
-                hidden_dims=[64, 16],
+                hidden_dims=[2],
                 state_features_only=False,
-                training_rounds=10,
+                training_rounds=2,
                 learning_rate=0.01,
                 action_representation_module=action_representation_module,
                 exploration_module=UCBExploration(alpha=1.0),
@@ -143,9 +143,9 @@ def test_cb_tutorials(self) -> None:
         agent = PearlAgent(
             policy_learner=NeuralLinearBandit(
                 feature_dim=env.observation_dim + env.unique_labels_num,
-                hidden_dims=[64, 16],
+                hidden_dims=[2],
                 state_features_only=False,
-                training_rounds=10,
+                training_rounds=2,
                 learning_rate=0.01,
                 action_representation_module=action_representation_module,
                 exploration_module=ThompsonSamplingExplorationLinear(),
@@ -158,7 +158,7 @@ def test_cb_tutorials(self) -> None:
             agent=agent,
             env=env,
             number_of_steps=number_of_steps,
-            print_every_x_steps=100,
+            print_every_x_steps=10,
             record_period=record_period,
             learn_after_episode=True,
         )