Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Sample new embedding from maximum likelihood estimate #289

Open
wants to merge 3 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions flambe/cluster/aws.py
Original file line number Diff line number Diff line change
Expand Up @@ -587,7 +587,7 @@ def _generic_launch_instances(
for idx, ins in enumerate(boto_instances):
ins.wait_until_running()

logger.debug(f"Created instances running")
logger.debug("Created instances running")

ret = []
for idx, ins in enumerate(boto_instances):
Expand Down Expand Up @@ -833,7 +833,7 @@ def create_cloudwatch_events(self) -> None:
self._create_cloudwatch_event(f_id, mins=mins, cpu_thresh=0.5)
logger.info(cl.YE(f"{f.host} timeout of {mins} mins set"))
else:
logger.info(cl.YE(f"Factories have no timeout"))
logger.info(cl.YE("Factories have no timeout"))

orch_host = self.orchestrator.host
orch_id = self._get_instance_id_by_host(orch_host)
Expand Down
2 changes: 1 addition & 1 deletion flambe/cluster/cluster.py
Original file line number Diff line number Diff line change
Expand Up @@ -752,7 +752,7 @@ def cluster_has_key(self) -> bool:
logger.debug(f"Key in {ins.host} differs from others")
return False # Keys mismatch

logger.debug(f"All hosts contain same key pair")
logger.debug("All hosts contain same key pair")
return True

def distribute_keys(self) -> None:
Expand Down
8 changes: 4 additions & 4 deletions flambe/cluster/instance/instance.py
Original file line number Diff line number Diff line change
Expand Up @@ -481,7 +481,7 @@ def clean_containers(self) -> None:
If command fails

"""
cmd = f'''
cmd = '''
docker stop $(docker ps -a -q);
docker rm $(docker ps -a -q);
'''
Expand Down Expand Up @@ -905,7 +905,7 @@ def num_cpus(self) -> int:
"""Return the number of CPUs this host contains.

"""
cmd = self._run_cmd(f"python3 -c 'import multiprocessing; " +
cmd = self._run_cmd("python3 -c 'import multiprocessing; " +
"print(multiprocessing.cpu_count())'")

if cmd.success:
Expand All @@ -927,7 +927,7 @@ def num_gpus(self) -> int:
If command to get the number of GPUs fails.

"""
cmd = self._run_cmd(f"python3 -c 'import torch; print(torch.cuda.device_count())'")
cmd = self._run_cmd("python3 -c 'import torch; print(torch.cuda.device_count())'")

if cmd.success:
return int(cmd.msg)
Expand Down Expand Up @@ -1179,7 +1179,7 @@ def launch_flambe(self,
"""
force_params = "--force" if force else ""
cmd = (
f"tmux new-session -d -s 'flambe' " +
"tmux new-session -d -s 'flambe' " +
f"'bash -lc \"flambe {config_file} --secrets {secrets_file} " +
f"{force_params} &> output.log\"'"
)
Expand Down
4 changes: 2 additions & 2 deletions flambe/compile/component.py
Original file line number Diff line number Diff line change
Expand Up @@ -703,7 +703,7 @@ def from_yaml(cls, constructor: Any, node: Any, factory_name: str) -> 'Link':

def convert(self) -> Callable[..., Any]:
if self.local:
return ray.tune.function(lambda spec: eval(f'spec')) # TODO what do here
return ray.tune.function(lambda spec: eval('spec')) # TODO what do here
return ray.tune.function(lambda spec: eval(f'spec.config.params.{self.root_schema}'))


Expand Down Expand Up @@ -1412,7 +1412,7 @@ def helper(obj: Any) -> Any:
for kw in newkeywords:
if isinstance(newkeywords[kw], YAML_TYPES):
msg = f"'{cls}' property '{kw}' is still yaml type {type(newkeywords[kw])}\n"
msg += f"This could be because of a typo or the class is not registered properly"
msg += "This could be because of a typo or the class is not registered properly"
warn(msg)
# Find intended constructor in case using some factory
factory_method: Callable[..., Any] = cls
Expand Down
4 changes: 2 additions & 2 deletions flambe/compile/downloader.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ def download_s3_file(url: str, destination: str) -> None:
s3 = boto3.client('s3')
s3.download_file(parsed_url.netloc, parsed_url.path[1:], destination)
except botocore.client.ClientError:
raise ValueError(f"Error downlaoding artifact from s3.")
raise ValueError("Error downlaoding artifact from s3.")


def http_exists(url: str) -> bool:
Expand Down Expand Up @@ -138,7 +138,7 @@ def download_s3_folder(url: str, destination: str) -> None:
)
except subprocess.CalledProcessError as exc:
logger.debug(exc.output)
raise ValueError(f"Error downlaoding artifacts from s3. " +
raise ValueError("Error downlaoding artifacts from s3. " +
"Check logs for more information")


Expand Down
6 changes: 3 additions & 3 deletions flambe/compile/extensions.py
Original file line number Diff line number Diff line change
Expand Up @@ -222,9 +222,9 @@ def install_extensions(extensions: Dict[str, str],

output = output.decode("utf-8")

for l in output.splitlines():
logger.debug(l)
r = re.search(r'Successfully uninstalled (?P<pkg_name>\D*)-(?P<version>.*)', l)
for line in output.splitlines():
logger.debug(line)
r = re.search(r'Successfully uninstalled (?P<pkg_name>\D*)-(?P<version>.*)', line)
if r and 'pkg_name' in r.groupdict():
logger.info(cl.RE(f"WARNING: While installing {ext}, " +
f"existing {r.groupdict()['pkg_name']}-" +
Expand Down
8 changes: 4 additions & 4 deletions flambe/experiment/experiment.py
Original file line number Diff line number Diff line change
Expand Up @@ -199,7 +199,7 @@ def run(self, force: bool = False, verbose: bool = False, debug: bool = False, *
if not self.resume and not force and os.path.exists(self.full_save_path) \
and list(get_files(self.full_save_path)):
raise error.ParsingRunnableError(
f"Results from an experiment with the same name were located in the save path " +
"Results from an experiment with the same name were located in the save path " +
f"{self.full_save_path}. To overide this results, please use '--force' " +
"To use these results and resume the experiment, pick 'resume: True' " +
"If not, just pick another save_path/name."
Expand Down Expand Up @@ -227,7 +227,7 @@ def run(self, force: bool = False, verbose: bool = False, debug: bool = False, *

if any(map(lambda x: isinstance(x, ClusterResource), self.resources.values())):
raise ValueError(
f"Local experiments doesn't support resources with '!cluster' tags. " +
"Local experiments doesn't support resources with '!cluster' tags. " +
"The '!cluster' tag is used for those resources that need to be handled " +
"in the cluster when running remote experiments.")

Expand Down Expand Up @@ -264,7 +264,7 @@ def run(self, force: bool = False, verbose: bool = False, debug: bool = False, *
ray.init(redis_address=f"{self.env.orchestrator_ip}:{const.RAY_REDIS_PORT}", **kwargs)
else:
ray.init(**kwargs)
logger.debug(f"Ray cluster up")
logger.debug("Ray cluster up")

# Initialize map from block to list of checkpoints
# This is used whe resolving links over other computable blocks
Expand Down Expand Up @@ -589,7 +589,7 @@ def setup(self, cluster: Cluster, extensions: Dict[str, str], force: bool, **kwa
cluster.orchestrator.launch_report_site(
f"{output_dir_remote}/state.pkl",
port=const.REPORT_SITE_PORT,
output_log=f"output.log",
output_log="output.log",
output_dir=output_dir_remote,
tensorboard_port=const.TENSORBOARD_PORT
)
Expand Down
2 changes: 1 addition & 1 deletion flambe/export/builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -168,7 +168,7 @@ def save_s3(self, force) -> None:
)
except subprocess.CalledProcessError as exc:
logger.debug(exc.output)
raise ValueError(f"Error uploading artifacts to s3. " +
raise ValueError("Error uploading artifacts to s3. " +
"Check logs for more information")
else:
logger.info(cl.BL(f"Done uploading to {self.destination}"))
2 changes: 1 addition & 1 deletion flambe/field/bow.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ def __init__(self, # nosec
self.full_vocab: Dict[str, int] = {}

if scale_factor and not normalize:
raise ValueError(f"Cannot specify scale_factor without normalizing")
raise ValueError("Cannot specify scale_factor without normalizing")

self.register_attrs('vocab', 'full_vocab')

Expand Down
11 changes: 9 additions & 2 deletions flambe/field/text.py
Original file line number Diff line number Diff line change
Expand Up @@ -386,6 +386,11 @@ def _build_embeddings(self, model: KeyedVectors,

tokens: Iterable[str] = self.vocab.keys()

# Compute statistics about word embedding distribution
# This is used for sampling new random word embeddings
vectors = np.array([model[key] for key in model.vocab.keys()])
mean, std = np.mean(vectors, axis=0), np.std(vectors, axis=0)

if setup_vocab_from_embeddings:
tokens = chain(tokens, model.vocab.keys())

Expand All @@ -395,13 +400,15 @@ def _build_embeddings(self, model: KeyedVectors,
embedding_matrix.append(torch.tensor(model[token]))
new_vocab[token] = new_index = new_index + 1
elif token in self.specials:
embedding_matrix.append(torch.randn(model.vector_size))
rand_emb = torch.tensor(np.random.normal(mean, std), dtype=torch.float32)
embedding_matrix.append(rand_emb)
new_vocab[token] = new_index = new_index + 1
else:
self.unk_numericals.add(self.vocab[token])

if initialize_unknowns:
embedding_matrix.append(torch.randn(model.vector_size))
rand_emb = torch.tensor(np.random.normal(mean, std), dtype=torch.float32)
embedding_matrix.append(rand_emb)
new_vocab[token] = new_index = new_index + 1
else:
# Collapse all OOV's to the same <unk> token id
Expand Down
4 changes: 2 additions & 2 deletions flambe/nn/pooling.py
Original file line number Diff line number Diff line change
Expand Up @@ -216,8 +216,8 @@ def __init__(self,
dimensions = [input_size, *attention_units, attention_heads]
layers = []
# iterating over hidden layers
for l in range(len(dimensions) - 2):
layers.append(nn.Linear(dimensions[l], dimensions[l + 1], bias=is_biased))
for layer in range(len(dimensions) - 2):
layers.append(nn.Linear(dimensions[layer], dimensions[layer + 1], bias=is_biased))
layers.append(nn.Tanh() if hidden_activation is None else hidden_activation)
# adding output layer
layers.append(nn.Linear(dimensions[-2], dimensions[-1], bias=False))
Expand Down
2 changes: 1 addition & 1 deletion flambe/runner/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ def main(args: argparse.Namespace) -> None:
print(cl.BL(f"VERSION: {flambe.__version__}\n"))

if args.debug:
print(cl.YE(f"Debug mode activated\n"))
print(cl.YE("Debug mode activated\n"))
if args.cluster is not None:
raise ValueError('Will not run on cluster in debug mode. ' +
'Please disable debug mode or run locally.')
Expand Down
4 changes: 2 additions & 2 deletions flambe/sampler/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,8 +100,8 @@ def _batch_from_nested_col(col: Tuple, pad: int) -> torch.Tensor:

# Compute the max length for each level
lvl_to_lens: Dict[int, List] = defaultdict(list)
for l in lens:
for lvl, lns in l.items():
for length in lens:
for lvl, lns in length.items():
lvl_to_lens[lvl].extend(lns)
max_lens = odict([(lvl, max(lvl_to_lens[lvl])) for lvl in sorted(lvl_to_lens.keys())])

Expand Down