asappresearch · jeremyasapp · May 13, 2020 · May 13, 2020 · May 13, 2020
diff --git a/flambe/cluster/aws.py b/flambe/cluster/aws.py
@@ -587,7 +587,7 @@ def _generic_launch_instances(
         for idx, ins in enumerate(boto_instances):
             ins.wait_until_running()
 
-        logger.debug(f"Created instances running")
+        logger.debug("Created instances running")
 
         ret = []
         for idx, ins in enumerate(boto_instances):
@@ -833,7 +833,7 @@ def create_cloudwatch_events(self) -> None:
                     self._create_cloudwatch_event(f_id, mins=mins, cpu_thresh=0.5)
                     logger.info(cl.YE(f"{f.host} timeout of {mins} mins set"))
         else:
-            logger.info(cl.YE(f"Factories have no timeout"))
+            logger.info(cl.YE("Factories have no timeout"))
 
         orch_host = self.orchestrator.host
         orch_id = self._get_instance_id_by_host(orch_host)

diff --git a/flambe/cluster/cluster.py b/flambe/cluster/cluster.py
@@ -752,7 +752,7 @@ def cluster_has_key(self) -> bool:
                     logger.debug(f"Key in {ins.host} differs from others")
                     return False  # Keys mismatch
 
-        logger.debug(f"All hosts contain same key pair")
+        logger.debug("All hosts contain same key pair")
         return True
 
     def distribute_keys(self) -> None:

diff --git a/flambe/cluster/instance/instance.py b/flambe/cluster/instance/instance.py
@@ -481,7 +481,7 @@ def clean_containers(self) -> None:
             If command fails
 
         """
-        cmd = f'''
+        cmd = '''
         docker stop $(docker ps -a -q);
         docker rm $(docker ps -a -q);
         '''
@@ -905,7 +905,7 @@ def num_cpus(self) -> int:
         """Return the number of CPUs this host contains.
 
         """
-        cmd = self._run_cmd(f"python3 -c 'import multiprocessing; " +
+        cmd = self._run_cmd("python3 -c 'import multiprocessing; " +
                             "print(multiprocessing.cpu_count())'")
 
         if cmd.success:
@@ -927,7 +927,7 @@ def num_gpus(self) -> int:
             If command to get the number of GPUs fails.
 
         """
-        cmd = self._run_cmd(f"python3 -c 'import torch; print(torch.cuda.device_count())'")
+        cmd = self._run_cmd("python3 -c 'import torch; print(torch.cuda.device_count())'")
 
         if cmd.success:
             return int(cmd.msg)
@@ -1179,7 +1179,7 @@ def launch_flambe(self,
         """
         force_params = "--force" if force else ""
         cmd = (
-            f"tmux new-session -d -s 'flambe' " +
+            "tmux new-session -d -s 'flambe' " +
             f"'bash -lc \"flambe {config_file} --secrets {secrets_file} " +
             f"{force_params} &> output.log\"'"
         )

diff --git a/flambe/compile/component.py b/flambe/compile/component.py
@@ -703,7 +703,7 @@ def from_yaml(cls, constructor: Any, node: Any, factory_name: str) -> 'Link':
 
     def convert(self) -> Callable[..., Any]:
         if self.local:
-            return ray.tune.function(lambda spec: eval(f'spec'))  # TODO what do here
+            return ray.tune.function(lambda spec: eval('spec'))  # TODO what do here
         return ray.tune.function(lambda spec: eval(f'spec.config.params.{self.root_schema}'))
 
 
@@ -1412,7 +1412,7 @@ def helper(obj: Any) -> Any:
         for kw in newkeywords:
             if isinstance(newkeywords[kw], YAML_TYPES):
                 msg = f"'{cls}' property '{kw}' is still yaml type {type(newkeywords[kw])}\n"
-                msg += f"This could be because of a typo or the class is not registered properly"
+                msg += "This could be because of a typo or the class is not registered properly"
                 warn(msg)
         # Find intended constructor in case using some factory
         factory_method: Callable[..., Any] = cls

diff --git a/flambe/compile/downloader.py b/flambe/compile/downloader.py
@@ -77,7 +77,7 @@ def download_s3_file(url: str, destination: str) -> None:
         s3 = boto3.client('s3')
         s3.download_file(parsed_url.netloc, parsed_url.path[1:], destination)
     except botocore.client.ClientError:
-        raise ValueError(f"Error downlaoding artifact from s3.")
+        raise ValueError("Error downlaoding artifact from s3.")
 
 
 def http_exists(url: str) -> bool:
@@ -138,7 +138,7 @@ def download_s3_folder(url: str, destination: str) -> None:
         )
     except subprocess.CalledProcessError as exc:
         logger.debug(exc.output)
-        raise ValueError(f"Error downlaoding artifacts from s3. " +
+        raise ValueError("Error downlaoding artifacts from s3. " +
                          "Check logs for more information")
 
 

diff --git a/flambe/compile/extensions.py b/flambe/compile/extensions.py
@@ -222,9 +222,9 @@ def install_extensions(extensions: Dict[str, str],
 
             output = output.decode("utf-8")
 
-            for l in output.splitlines():
-                logger.debug(l)
-                r = re.search(r'Successfully uninstalled (?P<pkg_name>\D*)-(?P<version>.*)', l)
+            for line in output.splitlines():
+                logger.debug(line)
+                r = re.search(r'Successfully uninstalled (?P<pkg_name>\D*)-(?P<version>.*)', line)
                 if r and 'pkg_name' in r.groupdict():
                     logger.info(cl.RE(f"WARNING: While installing {ext}, " +
                                       f"existing {r.groupdict()['pkg_name']}-" +

diff --git a/flambe/experiment/experiment.py b/flambe/experiment/experiment.py
@@ -199,7 +199,7 @@ def run(self, force: bool = False, verbose: bool = False, debug: bool = False, *
         if not self.resume and not force and os.path.exists(self.full_save_path) \
                 and list(get_files(self.full_save_path)):
             raise error.ParsingRunnableError(
-                f"Results from an experiment with the same name were located in the save path " +
+                "Results from an experiment with the same name were located in the save path " +
                 f"{self.full_save_path}. To overide this results, please use '--force' " +
                 "To use these results and resume the experiment, pick 'resume: True' " +
                 "If not, just pick another save_path/name."
@@ -227,7 +227,7 @@ def run(self, force: bool = False, verbose: bool = False, debug: bool = False, *
 
         if any(map(lambda x: isinstance(x, ClusterResource), self.resources.values())):
             raise ValueError(
-                f"Local experiments doesn't support resources with '!cluster' tags. " +
+                "Local experiments doesn't support resources with '!cluster' tags. " +
                 "The '!cluster' tag is used for those resources that need to be handled " +
                 "in the cluster when running remote experiments.")
 
@@ -264,7 +264,7 @@ def run(self, force: bool = False, verbose: bool = False, debug: bool = False, *
             ray.init(redis_address=f"{self.env.orchestrator_ip}:{const.RAY_REDIS_PORT}", **kwargs)
         else:
             ray.init(**kwargs)
-            logger.debug(f"Ray cluster up")
+            logger.debug("Ray cluster up")
 
         # Initialize map from block to list of checkpoints
         # This is used whe resolving links over other computable blocks
@@ -589,7 +589,7 @@ def setup(self, cluster: Cluster, extensions: Dict[str, str], force: bool, **kwa
         cluster.orchestrator.launch_report_site(
             f"{output_dir_remote}/state.pkl",
             port=const.REPORT_SITE_PORT,
-            output_log=f"output.log",
+            output_log="output.log",
             output_dir=output_dir_remote,
             tensorboard_port=const.TENSORBOARD_PORT
         )

diff --git a/flambe/export/builder.py b/flambe/export/builder.py
@@ -168,7 +168,7 @@ def save_s3(self, force) -> None:
                 )
             except subprocess.CalledProcessError as exc:
                 logger.debug(exc.output)
-                raise ValueError(f"Error uploading artifacts to s3. " +
+                raise ValueError("Error uploading artifacts to s3. " +
                                  "Check logs for more information")
             else:
                 logger.info(cl.BL(f"Done uploading to {self.destination}"))
diff --git a/flambe/field/bow.py b/flambe/field/bow.py
@@ -73,7 +73,7 @@ def __init__(self,  # nosec
         self.full_vocab: Dict[str, int] = {}
 
         if scale_factor and not normalize:
-            raise ValueError(f"Cannot specify scale_factor without normalizing")
+            raise ValueError("Cannot specify scale_factor without normalizing")
 
         self.register_attrs('vocab', 'full_vocab')
 

diff --git a/flambe/field/text.py b/flambe/field/text.py
@@ -386,6 +386,11 @@ def _build_embeddings(self, model: KeyedVectors,
 
         tokens: Iterable[str] = self.vocab.keys()
 
+        # Compute statistics about word embedding distribution
+        # This is used for sampling new random word embeddings
+        vectors = np.array([model[key] for key in model.vocab.keys()])
+        mean, std = np.mean(vectors, axis=0), np.std(vectors, axis=0)
+
         if setup_vocab_from_embeddings:
             tokens = chain(tokens, model.vocab.keys())
 
@@ -395,13 +400,15 @@ def _build_embeddings(self, model: KeyedVectors,
                     embedding_matrix.append(torch.tensor(model[token]))
                     new_vocab[token] = new_index = new_index + 1
                 elif token in self.specials:
-                    embedding_matrix.append(torch.randn(model.vector_size))
+                    rand_emb = torch.tensor(np.random.normal(mean, std), dtype=torch.float32)
+                    embedding_matrix.append(rand_emb)
                     new_vocab[token] = new_index = new_index + 1
                 else:
                     self.unk_numericals.add(self.vocab[token])
 
                     if initialize_unknowns:
-                        embedding_matrix.append(torch.randn(model.vector_size))
+                        rand_emb = torch.tensor(np.random.normal(mean, std), dtype=torch.float32)
+                        embedding_matrix.append(rand_emb)
                         new_vocab[token] = new_index = new_index + 1
                     else:
                         # Collapse all OOV's to the same <unk> token id

diff --git a/flambe/nn/pooling.py b/flambe/nn/pooling.py
@@ -216,8 +216,8 @@ def __init__(self,
         dimensions = [input_size, *attention_units, attention_heads]
         layers = []
         # iterating over hidden layers
-        for l in range(len(dimensions) - 2):
-            layers.append(nn.Linear(dimensions[l], dimensions[l + 1], bias=is_biased))
+        for layer in range(len(dimensions) - 2):
+            layers.append(nn.Linear(dimensions[layer], dimensions[layer + 1], bias=is_biased))
             layers.append(nn.Tanh() if hidden_activation is None else hidden_activation)
         # adding output layer
         layers.append(nn.Linear(dimensions[-2], dimensions[-1], bias=False))

diff --git a/flambe/runner/run.py b/flambe/runner/run.py
@@ -39,7 +39,7 @@ def main(args: argparse.Namespace) -> None:
         print(cl.BL(f"VERSION: {flambe.__version__}\n"))
 
     if args.debug:
-        print(cl.YE(f"Debug mode activated\n"))
+        print(cl.YE("Debug mode activated\n"))
         if args.cluster is not None:
             raise ValueError('Will not run on cluster in debug mode. ' +
                              'Please disable debug mode or run locally.')

diff --git a/flambe/sampler/base.py b/flambe/sampler/base.py
@@ -100,8 +100,8 @@ def _batch_from_nested_col(col: Tuple, pad: int) -> torch.Tensor:
 
     # Compute the max length for each level
     lvl_to_lens: Dict[int, List] = defaultdict(list)
-    for l in lens:
-        for lvl, lns in l.items():
+    for length in lens:
+        for lvl, lns in length.items():
             lvl_to_lens[lvl].extend(lns)
     max_lens = odict([(lvl, max(lvl_to_lens[lvl])) for lvl in sorted(lvl_to_lens.keys())])