Skip to content

Commit

Permalink
Merge pull request #72 from BIMSBbioinfo/71-regulate-hpo-configs-acco…
Browse files Browse the repository at this point in the history
…rding-to-input

71 regulate hpo configs according to input
  • Loading branch information
borauyar authored Apr 2, 2024
2 parents 33df51f + 913bebb commit 1197618
Show file tree
Hide file tree
Showing 7 changed files with 43 additions and 29 deletions.
26 changes: 8 additions & 18 deletions flexynesis/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,48 +6,38 @@
search_spaces = {
'DirectPred': [
Integer(16, 128, name='latent_dim'),
Integer(64, 512, name='hidden_dim'),
Real(0.5, 2, name='hidden_dim_factor'), # relative size of the hidden_dim w.r.t input_dim
Real(0.0001, 0.01, prior='log-uniform', name='lr'),
Integer(32, 128, name='batch_size'),
Integer(16, 128, name='supervisor_hidden_dim'),
Categorical(epochs, name='epochs')
],
'supervised_vae': [
Integer(16, 128, name='latent_dim'),
Integer(64, 512, name='hidden_dim'),
Integer(8, 32, name='supervisor_hidden_dim'),
Real(0.5, 2, name='hidden_dim_factor'), # relative size of the hidden_dim w.r.t input_dim
Integer(16, 128, name='supervisor_hidden_dim'),
Real(0.0001, 0.01, prior='log-uniform', name='lr'),
Integer(32, 128, name='batch_size'),
Categorical(epochs, name='epochs')
],
'CrossModalPred': [
Integer(16, 128, name='latent_dim'),
Integer(64, 512, name='hidden_dim'),
Real(0.5, 2, name='hidden_dim_factor'), # relative size of the hidden_dim w.r.t input_dim
Integer(8, 32, name='supervisor_hidden_dim'),
Real(0.0001, 0.01, prior='log-uniform', name='lr'),
Integer(32, 128, name='batch_size'),
Categorical(epochs, name='epochs')
],
'MultiTripletNetwork': [
Integer(16, 128, name='latent_dim'),
Integer(64, 512, name='hidden_dim'),
Real(0.5, 2, name='hidden_dim_factor'), # relative size of the hidden_dim w.r.t input_dim
Integer(8, 32, name='supervisor_hidden_dim'),
Real(0.0001, 0.01, prior='log-uniform', name='lr'),
Integer(32, 128, name='batch_size'),
Categorical(epochs, name='epochs')
],
"DirectPredCNN": [
Integer(16, 128, name="latent_dim"),
Integer(64, 512, name="hidden_dim"),
Real(0.0001, 0.01, prior="log-uniform", name="lr"),
Integer(32, 128, name="batch_size"),
Categorical(epochs, name="epochs")
],
"DirectPredGCNN": [
Integer(16, 128, name="latent_dim"),
Integer(64, 512, name="hidden_dim"),
Real(0.5, 2, name='hidden_dim_factor'), # relative size of the hidden_dim w.r.t input_dim
Real(0.0001, 0.01, prior="log-uniform", name="lr"),
Integer(32, 128, name="batch_size"),
Categorical(epochs, name="epochs"),
Integer(16, 128, name='supervisor_hidden_dim'),
Categorical(['relu', 'sigmoid', 'tanh', 'gelu'], name="activation")
]
}
11 changes: 11 additions & 0 deletions flexynesis/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,9 +86,20 @@ def __init__(self, dataset, model_class, config_name, target_variables,
else:
if self.config_name in search_spaces:
self.space = search_spaces[self.config_name]
# get batch sizes (a function of dataset size)
self.space.append(self.get_batch_space())
else:
raise ValueError(f"'{self.config_name}' not found in the default config.")

def get_batch_space(self, min_size = 16, max_size = 256):
m = int(np.log2(len(self.dataset) * (1 - self.val_size)))
st = int(np.log2(min_size))
end = int(np.log2(max_size))
if m < end:
end = m
s = Categorical([np.power(2, x) for x in range(st, end+1)], name = 'batch_size')
return s

def objective(self, params, current_step, total_steps):

# args common to all model classes
Expand Down
9 changes: 7 additions & 2 deletions flexynesis/models/crossmodal_pred.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,10 @@ def __init__(self, config, dataset, target_variables = None, batch_variables =

# create a list of Encoder instances for separately encoding each input omics layer
input_dims = [len(dataset.features[self.input_layers[i]]) for i in range(len(self.input_layers))]
self.encoders = nn.ModuleList([Encoder(input_dims[i], [config['hidden_dim']], config['latent_dim'])
self.encoders = nn.ModuleList([Encoder(input_dims[i],
# define hidden_dim size as a factor of input_dim
[int(input_dims[i] * config['hidden_dim_factor'])],
config['latent_dim'])
for i in range(len(self.input_layers))])

# Fully connected layers for concatenated means and log_vars
Expand All @@ -76,7 +79,9 @@ def __init__(self, config, dataset, target_variables = None, batch_variables =

# list of decoders to decode the latent layer into the target/output layers
output_dims = [len(dataset.features[self.output_layers[i]]) for i in range(len(self.output_layers))]
self.decoders = nn.ModuleList([Decoder(config['latent_dim'], [config['hidden_dim']], output_dims[i])
self.decoders = nn.ModuleList([Decoder(config['latent_dim'],
[int(input_dims[i] * config['hidden_dim_factor'])],
output_dims[i])
for i in range(len(self.output_layers))])

# define supervisor heads
Expand Down
5 changes: 3 additions & 2 deletions flexynesis/models/direct_pred.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,8 @@ def __init__(self, config, dataset, target_variables, batch_variables = None,

self.encoders = nn.ModuleList([
MLP(input_dim=self.input_dims[i],
hidden_dim=self.config['hidden_dim'],
# define hidden_dim size relative to the input_dim size
hidden_dim=int(self.input_dims[i] * self.config['hidden_dim_factor']),
output_dim=self.config['latent_dim']) for i in range(len(self.layers))])

self.MLPs = nn.ModuleDict() # using ModuleDict to store multiple MLPs
Expand All @@ -60,7 +61,7 @@ def __init__(self, config, dataset, target_variables, batch_variables = None,
else:
num_class = len(np.unique(self.ann[var]))
self.MLPs[var] = MLP(input_dim=self.config['latent_dim'] * len(self.layers),
hidden_dim=self.config['hidden_dim'],
hidden_dim=self.config['supervisor_hidden_dim'],
output_dim=num_class)

def forward(self, x_list):
Expand Down
9 changes: 6 additions & 3 deletions flexynesis/models/direct_pred_gcnn.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,12 +63,15 @@ def __init__(
self.layers = list(dataset.dat.keys())
# NOTE: For now we use matrices, so number of node input features is 1.
input_dims = [1 for _ in range(len(self.layers))]


# define this to be able to make hidden_dim as a factor of number of features in each layer
feature_counts = [len(dataset.features[x]) for x in self.layers]

self.encoders = nn.ModuleList(
[
GNNs(
input_dim=input_dims[i],
hidden_dim=int(self.config["hidden_dim"]), # int because of pyg
hidden_dim=int(self.config["hidden_dim_factor"] * feature_counts[i]),
output_dim=self.config["latent_dim"],
act = self.config['activation'],
conv = self.gnn_conv_type
Expand All @@ -85,7 +88,7 @@ def __init__(
num_class = len(np.unique(self.ann[var]))
self.MLPs[var] = MLP(
input_dim=self.config["latent_dim"] * len(self.layers),
hidden_dim=self.config["hidden_dim"],
hidden_dim=self.config["supervisor_hidden_dim"],
output_dim=num_class,
)

Expand Down
9 changes: 7 additions & 2 deletions flexynesis/models/supervised_vae.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,12 +78,17 @@ def __init__(self, config, dataset, target_variables, batch_variables = None,
layers = list(dataset.dat.keys())
input_dims = [len(dataset.features[layers[i]]) for i in range(len(layers))]
# create a list of Encoder instances for separately encoding each omics layer
self.encoders = nn.ModuleList([Encoder(input_dims[i], [config['hidden_dim']], config['latent_dim']) for i in range(len(layers))])
self.encoders = nn.ModuleList([Encoder(input_dims[i],
# define hidden_dim size as a factor of input_dim
[int(input_dims[i] * config['hidden_dim_factor'])],
config['latent_dim']) for i in range(len(layers))])
# Fully connected layers for concatenated means and log_vars
self.FC_mean = nn.Linear(len(layers) * config['latent_dim'], config['latent_dim'])
self.FC_log_var = nn.Linear(len(layers) * config['latent_dim'], config['latent_dim'])
# list of decoders to decode each omics layer separately
self.decoders = nn.ModuleList([Decoder(config['latent_dim'], [config['hidden_dim']], input_dims[i]) for i in range(len(layers))])
self.decoders = nn.ModuleList([Decoder(config['latent_dim'],
[int(input_dims[i] * config['hidden_dim_factor'])],
input_dims[i]) for i in range(len(layers))])

# define supervisor heads
# using ModuleDict to store multiple MLPs
Expand Down
3 changes: 1 addition & 2 deletions flexynesis/models/triplet_encoder.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,8 +89,7 @@ def __init__(self, config, dataset, target_variables, batch_variables = None,

self.layers = list(dataset.dat.keys())
input_sizes = [len(dataset.features[self.layers[i]]) for i in range(len(self.layers))]
hidden_sizes = [config['hidden_dim'] for x in range(len(self.layers))]

hidden_sizes = [int(self.config['hidden_dim_factor'] * input_sizes[i]) for i in range(len(self.layers))]

# The first target variable is the main variable that dictates the triplets
# it has to be a categorical variable
Expand Down

0 comments on commit 1197618

Please sign in to comment.