diff --git a/README.md b/README.md index c3701c2..5ef73bf 100644 --- a/README.md +++ b/README.md @@ -23,6 +23,7 @@ make install-test # ClientEncryption object can connect to the mock KMS instance export SSL_CERT_FILE=$(pwd)/tests/localhost.crt make test +unset SSL_CERT_FILE ``` ## Mongoengine + Asyncio @@ -89,6 +90,7 @@ the `objects` property will give you access to asynchronous query methods: - `async_count()`: Count the number of documents asynchronously. - `async_update()`: Update documents asynchronously. - `async_insert()`: Insert documents asynchronously. +- `async_delete()`: Delete documents asynchronously. ```python async def get_first_user(): @@ -113,6 +115,9 @@ async def update_user_name(old_name, new_name): async def insert_users(user_names): users = [User(name=name) for name in user_names] await User.objects.async_insert(users) + +async def delete_users(name): + await User.objects(name=name).async_delete() async def main(): await insert_users(["Jane", "John"]) @@ -121,6 +126,7 @@ async def main(): await count_users() await update_user_name("John", "Johnny") await get_user_by_name("Johnny") + await delete_users("Johnny") if __name__ == "__main__": import asyncio @@ -141,3 +147,121 @@ We recommend using `async_to_list()` for small result sets. ## Client-side Field Level Encryption + +Mongoengine-plus introduces a new field type called `EncryptedString` that implements +Client-side Field Level Encryption ([CSFLE](https://www.mongodb.com/docs/manual/core/csfle/)). +This feature allows explicit data encryption before sending it over the network to MongoDB, +and automatic data decryption after reading from MongoDB. It supports both synchronous +and asynchronous operations. Currently, the `EncryptedString` implementation supports +the AWS KMS service as the Key Management Service (KMS) provider. + +```python +from mongoengine import Document, StringField +from mongoengine_plus.types import EncryptedString +from pymongo.encryption import Algorithm + +class User(Document): + id = StringField(primary_key=True) + ssn = EncryptedString( + algorithm=Algorithm.AEAD_AES_256_CBC_HMAC_SHA_512_Deterministic + ) + + +user = User(id='US1', ssn='12345') +user.save() +print(user.ssn) # Output: '12345' + +user_ = User.objects.get(id='US1') +print(user_.ssn) # Output: '12345' + +``` + +There are a few steps before you can start using `EncryptedString`. + +### 1. Create a Data Encryption Key (DEK) + +Before using `EncryptedString`, you'll need to create a Data Encryption Key (DEK) +for encrypting and decrypting your data. The DEK should follow the recommended +requirements described in the official MongoDB documentation on [Keys and Key Vaults](https://www.mongodb.com/docs/manual/core/csfle/fundamentals/keys-key-vaults/#std-label-csfle-reference-keys-key-vaults). +We've provided a helper method to create your DEK easily. + +```python +from mongoengine import connect +from mongoengine_plus.types.encrypted_string.base import create_data_key + +connect(host='mongo://localhost:27017/db') + +create_data_key( + kms_provider=dict( + aws=dict( + accessKeyId='your-aws-key-id', + secretAccessKey='your-aws-secret-access-key' + ) + ), + key_namespace='encryption.__keyVault', + key_arn='arn:aws:kms:us-east-1:111122223333:key/your-key-id', + key_name='my_key_name', + kms_connection_url='https://kms.us-east-1.amazonaws.com', + kms_region_name='us-east-1', +) +``` + +You'll need to execute this step only once during the project setup. Ensure that your +MongoDB user has the necessary permissions for collection and index creation, and +access to the AWS KMS key. + +### 2. Configure `EncryptedString` + +Since `EncryptedString` needs to read the DEK from your MongoDB instance and access the +KMS key for encryption/decryption, you'll need to configure it as follows. This +configuration might be in your `__init__.py` file and should be executed once. + +```python +from mongoengine import Document, StringField +from mongoengine_plus.types import EncryptedString +from pymongo.encryption import Algorithm + + +EncryptedString.configure_aws_kms( + 'encryption.__keyVault', + 'my_key_name', + 'your-aws-key-id', + 'your-aws-secret-access-key', + 'us-east-1', +) + + +class User(Document): + id = StringField(primary_key=True) + ssn = EncryptedString( + algorithm=Algorithm.AEAD_AES_256_CBC_HMAC_SHA_512_Deterministic + ) +``` + +Now you are ready to go! + +### 3. Optimize KMS requests (optional) + +There's a caveat in the `EncryptedString` implementation. Every time `EncryptedString` needs +to encrypt or decrypt data, it uses the `pymongo.encryption.ClientEncryption`, +which makes a request to the AWS KMS service endpoint. This can potentially slow down +the performance of reading and writing encrypted data to MongoDB. As a workaround, +we've created a function that patches this behavior and caches the data key. + +```python +from mongoengine_plus.types.encrypted_string import cache_kms_data_key + + +cache_kms_data_key( + 'encryption.__keyVault', + 'my_key_name', + 'your-aws-key-id', + 'your-aws-secret-access-key', + 'us-east-1', + 'https://kms.us-east-1.amazonaws.com', +) +``` + +You should execute this function once before making any database write or read operations, +perhaps in your `__init__.py` file. It will retrieve the KMS key and cache it for +subsequent requests. diff --git a/mongoengine_plus/types/encrypted_string/__init__.py b/mongoengine_plus/types/encrypted_string/__init__.py index d86f6ee..cc6488f 100644 --- a/mongoengine_plus/types/encrypted_string/__init__.py +++ b/mongoengine_plus/types/encrypted_string/__init__.py @@ -1,4 +1,4 @@ -__all__ = ['EncryptedString', 'patch_kms_request'] +__all__ = ['EncryptedString', 'cache_kms_data_key'] import codecs @@ -8,7 +8,7 @@ from .fields import EncryptedString -def patch_kms_request( +def cache_kms_data_key( key_namespace: str, key_name: str, aws_access_key_id: str, @@ -16,6 +16,11 @@ def patch_kms_request( aws_region_name: str, kms_endpoint_url: str, ) -> None: + """ + Retrieve the KMS Key used to encrypt and decrypt data and creates a cache + to optimize the usage of `EncryptedString`. You should execute this function once + before making any database write or read operations + """ from .base import get_data_key data_key = get_data_key( @@ -45,7 +50,6 @@ def patch_kms_request( content_length = len(content) - # En las pruebas kms_response_template = ( f'HTTP/1.1 200 OK\r\n' f'Content-Type: application/x-amz-json-1.1\r\n' diff --git a/tests/types/test_encrypted_string.py b/tests/types/test_encrypted_string.py index 533e962..2436fad 100644 --- a/tests/types/test_encrypted_string.py +++ b/tests/types/test_encrypted_string.py @@ -10,7 +10,7 @@ from mongoengine_plus.models import uuid_field from mongoengine_plus.types import EncryptedString -from mongoengine_plus.types.encrypted_string import patch_kms_request +from mongoengine_plus.types.encrypted_string import cache_kms_data_key from mongoengine_plus.types.encrypted_string.base import ( create_data_key, get_data_key, @@ -131,7 +131,7 @@ def test_query_encrypted_data(user: User) -> None: @pytest.mark.usefixtures('setup_encrypted_string_data_key') -def test_patch_kms_request(kms_connection_url: str) -> None: +def test_cache_kms_request(kms_connection_url: str) -> None: original_kms_request = _EncryptionIO.kms_request import boto3 @@ -140,7 +140,7 @@ def test_patch_kms_request(kms_connection_url: str) -> None: # certificate verification. This is a workaround and should not be done # in production environments. with patch('boto3.client', partial(boto3.client, verify=False)): - patch_kms_request( + cache_kms_data_key( EncryptedString.key_namespace, EncryptedString.key_name, 'test',