From f1d1f911a9811b7c8de3c25d161519ef83705b6c Mon Sep 17 00:00:00 2001 From: Dmitry Rybakov <160598371+comandeo-mongo@users.noreply.github.com> Date: Tue, 19 Nov 2024 16:19:05 +0100 Subject: [PATCH] RUBY-3429 Retry failed KMS requests (#2907) --- .evergreen/run-tests.sh | 1 + .rubocop.yml | 3 + gemfiles/standard.rb | 2 +- lib/mongo/crypt/binding.rb | 58 ++++++++- lib/mongo/crypt/context.rb | 82 ++++++++----- lib/mongo/crypt/encryption_io.rb | 6 +- lib/mongo/crypt/handle.rb | 2 +- lib/mongo/error/kms_error.rb | 9 ++ .../kms_retry_prose_spec.rb | 112 ++++++++++++++++++ .../kms_tls_options_spec.rb | 2 +- spec/integration/search_indexes_prose_spec.rb | 2 - 11 files changed, 244 insertions(+), 35 deletions(-) create mode 100644 spec/integration/client_side_encryption/kms_retry_prose_spec.rb diff --git a/.evergreen/run-tests.sh b/.evergreen/run-tests.sh index 653ca3b480..6dfa54c343 100755 --- a/.evergreen/run-tests.sh +++ b/.evergreen/run-tests.sh @@ -219,6 +219,7 @@ if test -n "$FLE"; then python3 -u .evergreen/csfle/kms_http_server.py --ca_file .evergreen/x509gen/ca.pem --cert_file .evergreen/x509gen/server.pem --port 8002 --require_client_cert & python3 -u .evergreen/csfle/kms_kmip_server.py & python3 -u .evergreen/csfle/fake_azure.py & + python3 -u .evergreen/csfle/kms_failpoint_server.py --port 9003 & # Obtain temporary AWS credentials PYTHON=python3 . .evergreen/csfle/set-temp-creds.sh diff --git a/.rubocop.yml b/.rubocop.yml index fa7d04f8c6..63d1066dc4 100644 --- a/.rubocop.yml +++ b/.rubocop.yml @@ -110,3 +110,6 @@ Style/TrailingCommaInArrayLiteral: Style/TrailingCommaInHashLiteral: Enabled: false + +RSpec/ExampleLength: + Max: 10 diff --git a/gemfiles/standard.rb b/gemfiles/standard.rb index 65c628b56b..6a11bc3404 100644 --- a/gemfiles/standard.rb +++ b/gemfiles/standard.rb @@ -68,6 +68,6 @@ def standard_dependencies gem 'ruby-lsp', platforms: :mri end - gem 'libmongocrypt-helper', '~> 1.11.0' if ENV['FLE'] == 'helper' + gem 'libmongocrypt-helper', '~> 1.12.0' if ENV['FLE'] == 'helper' end # rubocop:enable Metrics/AbcSize, Metrics/MethodLength, Metrics/BlockLength diff --git a/lib/mongo/crypt/binding.rb b/lib/mongo/crypt/binding.rb index 5d4b1fc830..e729802a8b 100644 --- a/lib/mongo/crypt/binding.rb +++ b/lib/mongo/crypt/binding.rb @@ -83,7 +83,7 @@ class Binding # will cause a `LoadError`. # # @api private - MIN_LIBMONGOCRYPT_VERSION = Gem::Version.new("1.7.0") + MIN_LIBMONGOCRYPT_VERSION = Gem::Version.new("1.12.0") # @!method self.mongocrypt_version(len) # @api private @@ -1113,6 +1113,62 @@ def self.check_kms_ctx_status(kms_context) end end + # @!method self.mongocrypt_kms_ctx_usleep(ctx) + # @api private + # + # Indicates how long to sleep before sending KMS request. + # + # @param [ FFI::Pointer ] ctx A pointer to a mongocrypt_ctx_t object. + # @return [ int64 ] A 64-bit encoded number of microseconds of how long to sleep. + attach_function :mongocrypt_kms_ctx_usleep, [:pointer], :int64 + + # Returns number of milliseconds to sleep before sending KMS request + # for the given KMS context. + # + # @param [ Mongo::Crypt::KmsContext ] kms_context KMS Context we are going + # to send KMS request for. + # @return [ Integer ] A 64-bit encoded number of microseconds to sleep. + def self.kms_ctx_usleep(kms_context) + mongocrypt_kms_ctx_usleep(kms_context.kms_ctx_p) + end + + # @!method self.mongocrypt_kms_ctx_fail(ctx) + # @api private + # + # Indicate a network-level failure. + # + # @param [ FFI::Pointer ] ctx A pointer to a mongocrypt_ctx_t object. + # @return [ Boolean ] whether the failed request may be retried. + attach_function :mongocrypt_kms_ctx_fail, [:pointer], :bool + + # Check whether the last failed request for the KMS context may be retried. + # + # @param [ Mongo::Crypt::KmsContext ] kms_context KMS Context + # @return [ true, false ] whether the failed request may be retried. + def self.kms_ctx_fail(kms_context) + mongocrypt_kms_ctx_fail(kms_context.kms_ctx_p) + end + + # @!method self.mongocrypt_setopt_retry_kms(crypt, enable) + # @api private + # + # Enable or disable KMS retry behavior. + # + # @param [ FFI::Pointer ] crypt A pointer to a mongocrypt_t object + # @param [ Boolean ] enable A boolean indicating whether to retry operations. + # @return [ Boolean ] indicating success. + attach_function :mongocrypt_setopt_retry_kms, [:pointer, :bool], :bool + + # Enable or disable KMS retry behavior. + # + # @param [ Mongo::Crypt::Handle ] handle + # @param [ true, false ] value whether to retry operations. + # @return [ true, fale ] true is the option was set, otherwise false. + def self.kms_ctx_setopt_retry_kms(handle, value) + mongocrypt_setopt_retry_kms(handle.ref, value) + end + + # @!method self.mongocrypt_kms_ctx_done(ctx) # @api private # diff --git a/lib/mongo/crypt/context.rb b/lib/mongo/crypt/context.rb index d8c6772999..625c89dc16 100644 --- a/lib/mongo/crypt/context.rb +++ b/lib/mongo/crypt/context.rb @@ -49,7 +49,6 @@ def initialize(mongocrypt_handle, io) Binding.mongocrypt_ctx_new(@mongocrypt_handle.ref), Binding.method(:mongocrypt_ctx_destroy) ) - @encryption_io = io @cached_azure_token = nil end @@ -90,35 +89,13 @@ def run_state_machine(timeout_holder) when :done return nil when :need_mongo_keys - filter = Binding.ctx_mongo_op(self) - - @encryption_io.find_keys(filter, timeout_ms: timeout_ms).each do |key| - mongocrypt_feed(key) if key - end - - mongocrypt_done + provide_keys(timeout_ms) when :need_mongo_collinfo - filter = Binding.ctx_mongo_op(self) - - result = @encryption_io.collection_info(@db_name, filter, timeout_ms: timeout_ms) - mongocrypt_feed(result) if result - - mongocrypt_done + provide_collection_info(timeout_ms) when :need_mongo_markings - cmd = Binding.ctx_mongo_op(self) - - result = @encryption_io.mark_command(cmd, timeout_ms: timeout_ms) - mongocrypt_feed(result) - - mongocrypt_done + provide_markings(timeout_ms) when :need_kms - while kms_context = Binding.ctx_next_kms_ctx(self) do - provider = Binding.kms_ctx_get_kms_provider(kms_context) - tls_options = @mongocrypt_handle.kms_tls_options(provider) - @encryption_io.feed_kms(kms_context, tls_options) - end - - Binding.ctx_kms_done(self) + feed_kms when :need_kms_credentials Binding.ctx_provide_kms_providers( self, @@ -134,6 +111,57 @@ def run_state_machine(timeout_holder) private + def provide_markings(timeout_ms) + cmd = Binding.ctx_mongo_op(self) + + result = @encryption_io.mark_command(cmd, timeout_ms: timeout_ms) + mongocrypt_feed(result) + + mongocrypt_done + end + + def provide_collection_info(timeout_ms) + filter = Binding.ctx_mongo_op(self) + + result = @encryption_io.collection_info(@db_name, filter, timeout_ms: timeout_ms) + mongocrypt_feed(result) if result + + mongocrypt_done + end + + def provide_keys(timeout_ms) + filter = Binding.ctx_mongo_op(self) + + @encryption_io.find_keys(filter, timeout_ms: timeout_ms).each do |key| + mongocrypt_feed(key) if key + end + + mongocrypt_done + end + + def feed_kms + while (kms_context = Binding.ctx_next_kms_ctx(self)) do + begin + delay = Binding.kms_ctx_usleep(kms_context) + sleep(delay / 1_000_000.0) unless delay.nil? + provider = Binding.kms_ctx_get_kms_provider(kms_context) + tls_options = @mongocrypt_handle.kms_tls_options(provider) + @encryption_io.feed_kms(kms_context, tls_options) + rescue Error::KmsError => e + if e.network_error? + if Binding.kms_ctx_fail(kms_context) + next + else + raise + end + else + raise + end + end + end + Binding.ctx_kms_done(self) + end + # Indicate that state machine is done feeding I/O responses back to libmongocrypt def mongocrypt_done Binding.mongocrypt_ctx_mongo_done(ctx_p) diff --git a/lib/mongo/crypt/encryption_io.rb b/lib/mongo/crypt/encryption_io.rb index 6bd5bccf4e..2417652d46 100644 --- a/lib/mongo/crypt/encryption_io.rb +++ b/lib/mongo/crypt/encryption_io.rb @@ -363,8 +363,10 @@ def with_ssl_socket(endpoint, tls_options, timeout_ms: nil) tls_options.merge(socket_options) ) yield(mongo_socket.socket) - rescue => e - raise Error::KmsError, "Error when connecting to KMS provider: #{e.class}: #{e.message}" + rescue Error::KmsError + raise + rescue StandardError => e + raise Error::KmsError.new("Error when connecting to KMS provider: #{e.class}: #{e.message}", network_error: true) ensure mongo_socket&.close end diff --git a/lib/mongo/crypt/handle.rb b/lib/mongo/crypt/handle.rb index b6cd0f7f67..cfc417d2c6 100644 --- a/lib/mongo/crypt/handle.rb +++ b/lib/mongo/crypt/handle.rb @@ -71,7 +71,7 @@ def initialize(kms_providers, kms_tls_options, options={}) Binding.mongocrypt_new, Binding.method(:mongocrypt_destroy) ) - + Binding.kms_ctx_setopt_retry_kms(self, true) @kms_providers = kms_providers @kms_tls_options = kms_tls_options diff --git a/lib/mongo/error/kms_error.rb b/lib/mongo/error/kms_error.rb index 98978d5301..77c1dfdfc4 100644 --- a/lib/mongo/error/kms_error.rb +++ b/lib/mongo/error/kms_error.rb @@ -20,6 +20,15 @@ class Error # A KMS-related error during client-side encryption. class KmsError < CryptError + def initialize(message, code: nil, network_error: nil) + @network_error = network_error + super(message, code: code) + end + end + + # @return [ true, false ] whether this error was caused by a network error. + def network_error? + @network_error == true end end end diff --git a/spec/integration/client_side_encryption/kms_retry_prose_spec.rb b/spec/integration/client_side_encryption/kms_retry_prose_spec.rb new file mode 100644 index 0000000000..29c1bac8ee --- /dev/null +++ b/spec/integration/client_side_encryption/kms_retry_prose_spec.rb @@ -0,0 +1,112 @@ +# frozen_string_literal: true + +require 'spec_helper' + +def simulate_failure(type, times = 1) + url = URI.parse("https://localhost:9003/set_failpoint/#{type}") + data = { count: times }.to_json + http = Net::HTTP.new(url.host, url.port) + http.use_ssl = true + http.verify_mode = OpenSSL::SSL::VERIFY_NONE + http.ca_file = '.evergreen/x509gen/ca.pem' + request = Net::HTTP::Post.new(url.path, { 'Content-Type' => 'application/json' }) + request.body = data + http.request(request) +end + +describe 'KMS Retry Prose Spec' do + require_libmongocrypt + require_enterprise + min_server_version '4.2' + + include_context 'define shared FLE helpers' + + let(:key_vault_client) do + ClientRegistry.instance.new_local_client(SpecConfig.instance.addresses) + end + + let(:client_encryption) do + Mongo::ClientEncryption.new( + key_vault_client, + kms_tls_options: { + aws: default_kms_tls_options_for_provider, + gcp: default_kms_tls_options_for_provider, + azure: default_kms_tls_options_for_provider, + }, + key_vault_namespace: key_vault_namespace, + # For some reason libmongocrypt ignores custom endpoints for Azure and CGP + # kms_providers: aws_kms_providers.merge(azure_kms_providers).merge(gcp_kms_providers) + kms_providers: aws_kms_providers + ) + end + + shared_examples 'kms_retry prose spec' do + it 'createDataKey and encrypt with TCP retry' do + simulate_failure('network') + data_key_id = client_encryption.create_data_key(kms_provider, master_key: master_key) + simulate_failure('network') + expect do + client_encryption.encrypt(123, key_id: data_key_id, algorithm: 'AEAD_AES_256_CBC_HMAC_SHA_512-Deterministic') + end.not_to raise_error + end + + it 'createDataKey and encrypt with HTTP retry' do + simulate_failure('http') + data_key_id = client_encryption.create_data_key(kms_provider, master_key: master_key) + simulate_failure('http') + expect do + client_encryption.encrypt(123, key_id: data_key_id, algorithm: 'AEAD_AES_256_CBC_HMAC_SHA_512-Deterministic') + end.not_to raise_error + end + + it 'createDataKey fails after too many retries' do + simulate_failure('network', 4) + expect do + client_encryption.create_data_key(kms_provider, master_key: master_key) + end.to raise_error(Mongo::Error::KmsError) + end + end + + context 'with AWS KMS provider' do + let(:kms_provider) { 'aws' } + + let(:master_key) do + { + region: 'foo', + key: 'bar', + endpoint: '127.0.0.1:9003', + } + end + + include_examples 'kms_retry prose spec' + end + + context 'with GCP KMS provider', skip: 'For some reason libmongocrypt ignores custom endpoints for Azure and CGP' do + let(:kms_provider) { 'gcp' } + + let(:master_key) do + { + project_id: 'foo', + location: 'bar', + key_ring: 'baz', + key_name: 'qux', + endpoint: '127.0.0.1:9003' + } + end + + include_examples 'kms_retry prose spec' + end + + context 'with Azure KMS provider', skip: 'For some reason libmongocrypt ignores custom endpoints for Azure and CGP' do + let(:kms_provider) { 'azure' } + + let(:master_key) do + { + key_vault_endpoint: '127.0.0.1:9003', + key_name: 'foo', + } + end + + include_examples 'kms_retry prose spec' + end +end diff --git a/spec/integration/client_side_encryption/kms_tls_options_spec.rb b/spec/integration/client_side_encryption/kms_tls_options_spec.rb index 0766662670..a36b1d745b 100644 --- a/spec/integration/client_side_encryption/kms_tls_options_spec.rb +++ b/spec/integration/client_side_encryption/kms_tls_options_spec.rb @@ -323,7 +323,7 @@ } ) rescue Mongo::Error::KmsError => exc - exc.message.should =~ /Error when connecting to KMS provider/ + exc.message.should =~ /Error when connecting to KMS provider|Empty KMS response/ exc.message.should =~ /libmongocrypt error code/ exc.message.should_not =~ /CryptError/ else diff --git a/spec/integration/search_indexes_prose_spec.rb b/spec/integration/search_indexes_prose_spec.rb index 6093aff9d0..1ee3dfd2d9 100644 --- a/spec/integration/search_indexes_prose_spec.rb +++ b/spec/integration/search_indexes_prose_spec.rb @@ -147,7 +147,6 @@ def filter_results(result, names) .first end - # rubocop:disable RSpec/ExampleLength it 'succeeds' do expect(create_index).to be == name helper.wait_for(name) @@ -158,7 +157,6 @@ def filter_results(result, names) expect(index['latestDefinition']).to be == new_definition end - # rubocop:enable RSpec/ExampleLength end # Case 5: dropSearchIndex suppresses namespace not found errors