From 4435b8ea14cc753d21f0c22377140cab2156c593 Mon Sep 17 00:00:00 2001 From: Daniel Wagner-Hall Date: Thu, 18 Jul 2019 17:35:02 -0700 Subject: [PATCH] Local caching CommandRunner has default-on flag (#8040) --- .travis.yml | 3 ++ build-support/travis/travis.yml.mustache | 3 ++ src/python/pants/engine/native.py | 2 +- src/python/pants/option/global_options.py | 5 +++ src/rust/engine/Cargo.lock | 1 + src/rust/engine/Cargo.toml | 1 + src/rust/engine/engine_cffi/src/lib.rs | 2 ++ .../engine/process_execution/src/cache.rs | 10 +++--- src/rust/engine/src/context.rs | 31 +++++++++++++++---- 9 files changed, 46 insertions(+), 12 deletions(-) diff --git a/.travis.yml b/.travis.yml index 5b48b9bffa2..9eac28f6051 100644 --- a/.travis.yml +++ b/.travis.yml @@ -208,6 +208,9 @@ base_osx_config: &base_osx_config brew: packages: - openssl + before_script: + - ulimit -c unlimited + - ulimit -n 8192 py36_osx_config: &py36_osx_config <<: *base_osx_config diff --git a/build-support/travis/travis.yml.mustache b/build-support/travis/travis.yml.mustache index 816167da03d..29a436497fc 100644 --- a/build-support/travis/travis.yml.mustache +++ b/build-support/travis/travis.yml.mustache @@ -197,6 +197,9 @@ base_osx_config: &base_osx_config brew: packages: - openssl + before_script: + - ulimit -c unlimited + - ulimit -n 8192 py36_osx_config: &py36_osx_config <<: *base_osx_config diff --git a/src/python/pants/engine/native.py b/src/python/pants/engine/native.py index 69fcdb63fc1..96fd3c23d06 100644 --- a/src/python/pants/engine/native.py +++ b/src/python/pants/engine/native.py @@ -842,8 +842,8 @@ def ti(type_obj): execution_options.process_execution_remote_parallelism, execution_options.process_execution_cleanup_local_dirs, execution_options.process_execution_speculation_delay, - self.context.utf8_buf(execution_options.process_execution_speculation_strategy), + execution_options.process_execution_use_local_cache, ) return self.gc(scheduler, self.lib.scheduler_destroy) diff --git a/src/python/pants/option/global_options.py b/src/python/pants/option/global_options.py index 6eb1550f9a3..c68ee638cb0 100644 --- a/src/python/pants/option/global_options.py +++ b/src/python/pants/option/global_options.py @@ -38,6 +38,7 @@ class ExecutionOptions(datatype([ 'process_execution_cleanup_local_dirs', 'process_execution_speculation_delay', 'process_execution_speculation_strategy', + 'process_execution_use_local_cache', 'remote_execution_process_cache_namespace', 'remote_instance_name', 'remote_ca_certs_path', @@ -65,6 +66,7 @@ def from_bootstrap_options(cls, bootstrap_options): process_execution_cleanup_local_dirs=bootstrap_options.process_execution_cleanup_local_dirs, process_execution_speculation_delay=bootstrap_options.process_execution_speculation_delay, process_execution_speculation_strategy=bootstrap_options.process_execution_speculation_strategy, + process_execution_use_local_cache=bootstrap_options.process_execution_use_local_cache, remote_execution_process_cache_namespace=bootstrap_options.remote_execution_process_cache_namespace, remote_instance_name=bootstrap_options.remote_instance_name, remote_ca_certs_path=bootstrap_options.remote_ca_certs_path, @@ -86,6 +88,7 @@ def from_bootstrap_options(cls, bootstrap_options): process_execution_cleanup_local_dirs=True, process_execution_speculation_delay=.1, process_execution_speculation_strategy='local_first', + process_execution_use_local_cache=True, remote_execution_process_cache_namespace=None, remote_instance_name=None, remote_ca_certs_path=None, @@ -435,6 +438,8 @@ def register_bootstrap_options(cls, register): 'and fall back to the local host if remote calls take longer than the speculation timeout.\n' '`none`: Do not speculate about long running processes.', advanced=True) + register('--process-execution-use-local-cache', type=bool, default=True, advanced=True, + help='Whether to keep process executions in a local cache persisted to disk.') @classmethod def register_options(cls, register): diff --git a/src/rust/engine/Cargo.lock b/src/rust/engine/Cargo.lock index 81085e09b59..af7d4ffbbeb 100644 --- a/src/rust/engine/Cargo.lock +++ b/src/rust/engine/Cargo.lock @@ -622,6 +622,7 @@ dependencies = [ "rand 0.6.5 (registry+https://github.com/rust-lang/crates.io-index)", "reqwest 0.9.18 (registry+https://github.com/rust-lang/crates.io-index)", "rule_graph 0.0.1", + "sharded_lmdb 0.0.1", "smallvec 0.6.10 (registry+https://github.com/rust-lang/crates.io-index)", "store 0.1.0", "task_executor 0.0.1", diff --git a/src/rust/engine/Cargo.toml b/src/rust/engine/Cargo.toml index aa86feb79d2..63d24132c8c 100644 --- a/src/rust/engine/Cargo.toml +++ b/src/rust/engine/Cargo.toml @@ -94,6 +94,7 @@ process_execution = { path = "process_execution" } rand = "0.6" reqwest = { version = "0.9.10", default_features = false, features = ["rustls-tls"] } rule_graph = { path = "rule_graph" } +sharded_lmdb = { path = "sharded_lmdb" } smallvec = "0.6" store = { path = "fs/store" } tempfile = "3" diff --git a/src/rust/engine/engine_cffi/src/lib.rs b/src/rust/engine/engine_cffi/src/lib.rs index 6388684f75c..f9867769074 100644 --- a/src/rust/engine/engine_cffi/src/lib.rs +++ b/src/rust/engine/engine_cffi/src/lib.rs @@ -204,6 +204,7 @@ pub extern "C" fn scheduler_create( process_execution_cleanup_local_dirs: bool, process_execution_speculation_delay: f64, process_execution_speculation_strategy_buf: Buffer, + process_execution_use_local_cache: bool, ) -> *const Scheduler { let root_type_ids = root_type_ids.to_vec(); let ignore_patterns = ignore_patterns_buf @@ -320,6 +321,7 @@ pub extern "C" fn scheduler_create( // off nightly. https://github.com/rust-lang/rust/issues/54361 Duration::from_millis((process_execution_speculation_delay * 1000.0).round() as u64), process_execution_speculation_strategy, + process_execution_use_local_cache, )))) } diff --git a/src/rust/engine/process_execution/src/cache.rs b/src/rust/engine/process_execution/src/cache.rs index e45127eb42f..3a736cb8a51 100644 --- a/src/rust/engine/process_execution/src/cache.rs +++ b/src/rust/engine/process_execution/src/cache.rs @@ -11,11 +11,11 @@ use store::Store; use workunit_store::WorkUnitStore; #[derive(Clone)] -struct CommandRunner { - underlying: Arc, - process_execution_store: ShardedLmdb, - file_store: Store, - metadata: ExecuteProcessRequestMetadata, +pub struct CommandRunner { + pub underlying: Arc, + pub process_execution_store: ShardedLmdb, + pub file_store: Store, + pub metadata: ExecuteProcessRequestMetadata, } impl crate::CommandRunner for CommandRunner { diff --git a/src/rust/engine/src/context.rs b/src/rust/engine/src/context.rs index 27f27e2b973..d73dc42846d 100644 --- a/src/rust/engine/src/context.rs +++ b/src/rust/engine/src/context.rs @@ -25,6 +25,7 @@ use process_execution::{ use rand::seq::SliceRandom; use reqwest; use rule_graph::RuleGraph; +use sharded_lmdb::ShardedLmdb; use std::collections::btree_map::BTreeMap; use store::Store; @@ -75,6 +76,7 @@ impl Core { process_execution_cleanup_local_dirs: bool, process_execution_speculation_delay: Duration, process_execution_speculation_strategy: String, + process_execution_use_local_cache: bool, ) -> Core { // Randomize CAS address order to avoid thundering herds from common config. let mut remote_store_servers = remote_store_servers; @@ -101,7 +103,7 @@ impl Core { None }; - let local_store_dir = local_store_dir.clone(); + let local_store_dir2 = local_store_dir.clone(); let store = safe_create_dir_all_ioerror(&local_store_dir) .map_err(|e| format!("Error making directory {:?}: {:?}", local_store_dir, e)) .and_then(|()| { @@ -127,6 +129,12 @@ impl Core { }) .unwrap_or_else(|e| panic!("Could not initialize Store: {:?}", e)); + let process_execution_metadata = ExecuteProcessRequestMetadata { + instance_name: remote_instance_name.clone(), + cache_key_gen_version: remote_execution_process_cache_namespace.clone(), + platform_properties: remote_execution_extra_platform_properties.clone(), + }; + let mut command_runner: Box = Box::new(BoundedCommandRunner::new( Box::new(process_execution::local::CommandRunner::new( @@ -144,11 +152,7 @@ impl Core { // No problem unwrapping here because the global options validation // requires the remote_execution_server be present when remote_execution is set. &remote_execution_server.unwrap(), - ExecuteProcessRequestMetadata { - instance_name: remote_instance_name.clone(), - cache_key_gen_version: remote_execution_process_cache_namespace.clone(), - platform_properties: remote_execution_extra_platform_properties.clone(), - }, + process_execution_metadata.clone(), root_ca_certs.clone(), oauth_bearer_token.clone(), store.clone(), @@ -171,6 +175,21 @@ impl Core { }; } + if process_execution_use_local_cache { + let process_execution_store = ShardedLmdb::new( + local_store_dir2.join("processes"), + 5 * 1024 * 1024 * 1024, + executor.clone(), + ) + .expect("Could not initialize store for process cache: {:?}"); + command_runner = Box::new(process_execution::cache::CommandRunner { + underlying: command_runner.into(), + process_execution_store, + file_store: store.clone(), + metadata: process_execution_metadata, + }) + } + let http_client = reqwest::r#async::Client::new(); let rule_graph = RuleGraph::new(tasks.as_map(), root_subject_types);