diff --git a/openwpm/browser_manager.py b/openwpm/browser_manager.py index 2d929a611..f7c81b7dc 100644 --- a/openwpm/browser_manager.py +++ b/openwpm/browser_manager.py @@ -131,6 +131,33 @@ def launch_browser_manager(self) -> bool: crash_recovery = True + # Create a unique temporary directory that we can delete + # when we shut down. Note that this doesn't force anything to + # use `tmpdir`, it just makes it available. + if self.browser_params.tmpdir is not None: + self.logger.debug( + "BROWSER %i: leftover temp directory %s? Deleting it." + % (self.browser_id, self.browser_params.tmpdir) + ) + try: + shutil.rmtree(self.browser_params.tmpdir) + except Exception: + self.logger.debug( + "BROWSER %i: error deleting %s" + % ( + self.browser_id, + self.browser_params.tmpdir, + ), + exc_info=True, + ) + self.browser_params.tmpdir = Path( + tempfile.mkdtemp(prefix="openwpm_", dir=os.getenv("TMPDIR", default="/tmp")) + ) + self.logger.debug( + "BROWSER %i: Using temp dir %s" + % (self.browser_id, self.browser_params.tmpdir) + ) + self.logger.info("BROWSER %i: Launching browser..." % self.browser_id) self.is_fresh = not crash_recovery @@ -340,6 +367,34 @@ def close_browser_manager(self, force: bool = False) -> None: if not shutdown_complete: self.kill_browser_manager() + # Delete the temporary directory used by geckodriver. + if self.browser_params.tmpdir is not None: + try: + t1 = time.time() + self.logger.debug( + "BROWSER %i: deleting temp dir %s" + % (self.browser_id, self.browser_params.tmpdir) + ) + shutil.rmtree(self.browser_params.tmpdir) + self.logger.debug( + "BROWSER %i: completed deleting temp dir %s in %d seconds" + % ( + self.browser_id, + self.browser_params.tmpdir, + time.time() - t1, + ) + ) + self.browser_params.tmpdir = None + except Exception as e: + self.logger.warn( + "BROWSER %i: failed to delete temp dir %s" + % ( + self.browser_id, + self.browser_params.tmpdir, + ), + exc_info=True, + ) + def execute_command_sequence( self, # Quoting to break cyclic import, see https://stackoverflow.com/a/39757388 diff --git a/openwpm/config.py b/openwpm/config.py index 674d8dab6..e42824550 100644 --- a/openwpm/config.py +++ b/openwpm/config.py @@ -103,6 +103,7 @@ class BrowserParams(DataClassJsonMixin): default=Path(tempfile.gettempdir()), metadata=DCJConfig(encoder=path_to_str, decoder=str_to_path), ) + """ The tmp_profile_dir defaults to the OS's temporary file folder (typically /tmp) and is where the generated browser profiles and residual files are stored. @@ -139,6 +140,18 @@ class BrowserParams(DataClassJsonMixin): """ + tmpdir: Optional[Path] = field( + default=None, + metadata=DCJConfig(encoder=path_to_str, decoder=str_to_path), + ) + """ + The temporary directory used by `geckodriver`. This is configured in + `BrowserManager.run` and then deleted when the browser is finished. We do + this because it seems that `geckodriver` doesn't clean up its temporary + files (in particular, a copy of the extension XPI file), so we need to do + so ourselves. + """ + recovery_tar: Optional[Path] = None donottrack: bool = False tracking_protection: bool = False diff --git a/openwpm/deploy_browsers/deploy_firefox.py b/openwpm/deploy_browsers/deploy_firefox.py index c4797b794..b5c4a6e29 100755 --- a/openwpm/deploy_browsers/deploy_firefox.py +++ b/openwpm/deploy_browsers/deploy_firefox.py @@ -140,6 +140,14 @@ def deploy_firefox( # Launch the webdriver status_queue.put(("STATUS", "Launch Attempted", None)) + # Use browser_params.tmpdir as the temporary directory. This is so that + # geckodriver makes its copy of the extension XPI file in tmpdir, so + # we can delete it later and not have it left behind. I make a shallow + # copy of `os.environ` because I'm a little nervous about modifying the + # OpenWPM process' environment. + env = os.environ.copy() + env["TMPDIR"] = str(browser_params.tmpdir) + fo.binary_location = firefox_binary_path geckodriver_path = subprocess.check_output( "which geckodriver", encoding="utf-8", shell=True @@ -149,6 +157,7 @@ def deploy_firefox( service=Service( executable_path=geckodriver_path, log_output=open(webdriver_interceptor.fifo, "w"), + env=env, ), )