diff --git a/e2e_pipeline/model.py b/e2e_pipeline/model.py index d71a5b1..aaa6be0 100644 --- a/e2e_pipeline/model.py +++ b/e2e_pipeline/model.py @@ -15,7 +15,7 @@ class EntResModel(torch.nn.Module): def __init__(self, n_features, neumiss_depth, dropout_p, dropout_only_once, add_neumiss, neumiss_deq, hidden_dim, n_hidden_layers, add_batchnorm, activation, - negative_slope, hidden_config, sdp_max_iters, sdp_eps): + negative_slope, hidden_config, sdp_max_iters, sdp_eps, use_rounded_loss=True): super().__init__() self.mlp_layer = MLPLayer(n_features=n_features, neumiss_depth=neumiss_depth, dropout_p=dropout_p, dropout_only_once=dropout_only_once, add_neumiss=add_neumiss, neumiss_deq=neumiss_deq, @@ -24,24 +24,30 @@ def __init__(self, n_features, neumiss_depth, dropout_p, dropout_only_once, add_ self.uncompress_layer = UncompressTransformLayer() self.sdp_layer = SDPLayer(max_iters=sdp_max_iters, eps=sdp_eps) self.hac_cut_layer = HACCutLayer() + self.use_rounded_loss = use_rounded_loss def forward(self, x, N, verbose=False): edge_weights = torch.squeeze(self.mlp_layer(x)) - edge_weights_uncompressed = self.uncompress_layer(edge_weights, N) - output_probs = self.sdp_layer(edge_weights_uncompressed, N) - pred_clustering = self.hac_cut_layer(output_probs, edge_weights_uncompressed) - if verbose: logger.info(f"Size of W = {edge_weights.size()}") logger.info(f"\n{edge_weights}") + edge_weights_uncompressed = self.uncompress_layer(edge_weights, N) + if verbose: logger.info(f"Size of W_matrix = {edge_weights_uncompressed.size()}") logger.info(f"\n{edge_weights_uncompressed}") + output_probs = self.sdp_layer(edge_weights_uncompressed, N) + if verbose: logger.info(f"Size of X = {output_probs.size()}") logger.info(f"\n{output_probs}") - logger.info(f"Size of X_r = {pred_clustering.size()}") - logger.info(f"\n{pred_clustering}") + if not self.training or self.use_rounded_loss: + pred_clustering = self.hac_cut_layer(output_probs, edge_weights_uncompressed) + if verbose: + logger.info(f"Size of X_r = {pred_clustering.size()}") + logger.info(f"\n{pred_clustering}") + + return pred_clustering - return pred_clustering + return output_probs diff --git a/e2e_scripts/train.py b/e2e_scripts/train.py index 4e64845..9e2f6da 100644 --- a/e2e_scripts/train.py +++ b/e2e_scripts/train.py @@ -87,8 +87,8 @@ def train(hyperparams={}, verbose=False, project=None, entity=None, tags=None, g # Get data loaders (optionally with imputation, normalization) train_dataloader, val_dataloader, test_dataloader = get_dataloaders(hyp["dataset"], hyp["dataset_random_seed"], hyp["convert_nan"], hyp["nan_value"], - hyp["normalize_data"], hyp["subsample_sz"], - hyp["subsample_dev"], pairwise_mode, + hyp["normalize_data"], hyp["subsample_sz_train"], + hyp["subsample_sz_dev"], pairwise_mode, batch_size) n_features = train_dataloader.dataset[0][0].shape[1] @@ -96,7 +96,8 @@ def train(hyperparams={}, verbose=False, project=None, entity=None, tags=None, g if not pairwise_mode: model = EntResModel(n_features, neumiss_depth, dropout_p, dropout_only_once, add_neumiss, neumiss_deq, hidden_dim, n_hidden_layers, add_batchnorm, activation, - negative_slope, hidden_config, sdp_max_iters, sdp_eps) + negative_slope, hidden_config, sdp_max_iters, sdp_eps, + use_rounded_loss=hyp["use_rounded_loss"]) # Define loss loss_fn = lambda pred, gold: torch.norm(gold - pred) # Define eval diff --git a/e2e_scripts/train_utils.py b/e2e_scripts/train_utils.py index d42656f..abb8afa 100644 --- a/e2e_scripts/train_utils.py +++ b/e2e_scripts/train_utils.py @@ -22,8 +22,8 @@ # Dataset "dataset": "pubmed", "dataset_random_seed": 1, - "subsample_sz": -1, - "subsample_dev": True, + "subsample_sz_train": -1, + "subsample_sz_dev": -1, # Run config "run_random_seed": 17, # Data config @@ -42,14 +42,15 @@ "hidden_config": None, "activation": "leaky_relu", "negative_slope": 0.01, + "use_rounded_loss": True, # Solver config "sdp_max_iters": 50000, - "sdp_eps": 1e-3, + "sdp_eps": 1e-1, # Training config "batch_size": 10000, # For pairwise_mode only "lr": 1e-4, "n_epochs": 5, - "weighted_loss": True, # For pairwise_mode only; TODO: Implement for e2e + "weighted_loss": True, # For pairwise_mode only; TODO: Think about implementing for e2e "use_lr_scheduler": True, "lr_scheduler": "plateau", # "step" "lr_factor": 0.7, @@ -58,7 +59,7 @@ "lr_step_size": 200, "lr_gamma": 0.1, "weight_decay": 0.01, - "dev_opt_metric": 'b3_f1', # e2e: {'vmeasure', 'b3_f1'}; pairwise: {'auroc', 'f1'} + "dev_opt_metric": 'b3_f1', # e2e: {'b3_f1', 'vmeasure'}; pairwise: {'auroc', 'f1'} "overfit_batch_idx": -1 } @@ -70,19 +71,19 @@ def read_blockwise_features(pkl): return blockwise_data -def get_dataloaders(dataset, dataset_seed, convert_nan, nan_value, normalize, subsample_sz, subsample_dev, +def get_dataloaders(dataset, dataset_seed, convert_nan, nan_value, normalize, subsample_sz_train, subsample_sz_dev, pairwise_mode, batch_size): train_pkl = f"{PREPROCESSED_DATA_DIR}/{dataset}/seed{dataset_seed}/train_features.pkl" val_pkl = f"{PREPROCESSED_DATA_DIR}/{dataset}/seed{dataset_seed}/val_features.pkl" test_pkl = f"{PREPROCESSED_DATA_DIR}/{dataset}/seed{dataset_seed}/test_features.pkl" train_dataset = S2BlocksDataset(read_blockwise_features(train_pkl), convert_nan=convert_nan, nan_value=nan_value, - scale=normalize, subsample_sz=subsample_sz, pairwise_mode=pairwise_mode) + scale=normalize, subsample_sz=subsample_sz_train, pairwise_mode=pairwise_mode) train_dataloader = DataLoader(train_dataset, shuffle=False, batch_size=batch_size) val_dataset = S2BlocksDataset(read_blockwise_features(val_pkl), convert_nan=convert_nan, nan_value=nan_value, - scale=normalize, scaler=train_dataset.scaler, - subsample_sz=subsample_sz if subsample_dev else -1, pairwise_mode=pairwise_mode) + scale=normalize, scaler=train_dataset.scaler, subsample_sz=subsample_sz_dev, + pairwise_mode=pairwise_mode) val_dataloader = DataLoader(val_dataset, shuffle=False, batch_size=batch_size) test_dataset = S2BlocksDataset(read_blockwise_features(test_pkl), convert_nan=convert_nan, nan_value=nan_value, diff --git a/run_sweep.sh b/run_sweep.sh index a0ee53e..8cb378d 100644 --- a/run_sweep.sh +++ b/run_sweep.sh @@ -13,6 +13,6 @@ for ((i = 1; i <= ${n_seeds}; i++)); do --dataset_random_seed=${i} \ --wandb_sweep_name="main_${dataset}_${i}" \ --wandb_sweep_params="wandb_configs/sweeps/e2e_main.json" \ - --skip_initial_eval --sdp_eps=1e-1 + --skip_initial_eval --sdp_eps=1e-1 --silent echo " Logs: jobs/${JOB_NAME}.err" done diff --git a/wandb_configs/sweeps/e2e_main.json b/wandb_configs/sweeps/e2e_main.json index 9198a86..7fe1639 100644 --- a/wandb_configs/sweeps/e2e_main.json +++ b/wandb_configs/sweeps/e2e_main.json @@ -1,14 +1,14 @@ { "n_epochs": {"value": 10}, - "lr": {"max": 1e-1, "min": 1e-5}, + "lr": {"max": 2e-1, "min": 1e-5}, "weight_decay": {"values": [1e-1, 1e-2, 1e-3, 0]}, "dev_opt_metric": {"value": "b3_f1"}, - "neumiss_depth": {"values": [5, 10, 20]}, - "hidden_dim": {"values": [256, 512, 1024]}, + "neumiss_depth": {"values": [10, 20]}, + "hidden_dim": {"values": [512, 1024]}, "n_hidden_layers": {"values": [1, 2]}, "dropout_p": {"values": [0, 0.1, 0.2, 0.3, 0.4, 0.5]}, - "use_lr_scheduler": {"values": [true, false]}, "lr_scheduler": {"values": ["plateau", "step"]}, - "subsample_sz": {"value": 100}, + "subsample_sz_train": {"value": 80}, + "subsample_sz_dev": {"value": 100}, "activation": {"values": ["leaky_relu", "relu"]} } \ No newline at end of file