diff --git a/CHANGELOG.md b/CHANGELOG.md index 683ab3e..edd59a0 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,18 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [1.1.1] - 2024-05-13 + +### Updated + +- Upgrade Lambda functions runtime version to Python 3.12 +- Extended the list of supported regions + +### Fixed + +- Fix Glue job failures occurring when empty vault +- Fix InitiateRetrieval workflow to skip incorrect inventory entries and avoid failing the entire process + ## [1.1.0] - 2024-04-04 ### Added @@ -16,7 +28,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), - Implement retry within MetricsProcessor Lambda when encountering TransactionConflict exception - Use SHA-256 to generate TransactWriteItems ClientRequestToken as a more secure alternative to MD5 hashing -- Add try-except block around the archive naming logic to prevent the entire Glue job from failing due to a single/few names parsing errors. +- Add try-except block around the archive naming logic to prevent the entire Glue job from failing due to a single/few names parsing errors - Enhance SSM Automation documents descriptions - Add user-agents to all service clients to track usage on solution service API usage dashboard diff --git a/README.md b/README.md index 800d5ce..8ffa8b5 100644 --- a/README.md +++ b/README.md @@ -7,7 +7,7 @@ The solution automates the optimized restore, copy, and transfer process and pro Copying your Amazon S3 Glacier vault contents to the S3 Glacier Deep Archive storage class combines the low cost and high durability benefits of S3 Glacier Deep Archive, with the familiar Amazon S3 user and application experience that offers simple visibility and access to data. Once your archives are stored as objects in your Amazon S3 bucket, you can add tags to your data to enable items such as attributing data costs on a granular level. _Note: The solution only copies archives from a source S3 Glacier vault to - the destination S3 bucket, it does not delete archives in the source S3 Glacier vault. After the solution completes a successful archive copy to the destination S3 bucket, you must manually delete the archives from your S3 Glacier vault.For more information, + the destination S3 bucket, it does not delete archives in the source S3 Glacier vault. After the solution completes a successful archive copy to the destination S3 bucket, you must manually delete the archives from your S3 Glacier vault. For more information, refer to [Deleting an Archive in Amazon S3 Glacier](https://docs.aws.amazon.com/amazonglacier/latest/dev/deleting-an-archive.html) in the Amazon S3 Glacier Developer Guide._ ## Table of contents @@ -46,6 +46,8 @@ Copying your Amazon S3 Glacier vault contents to the S3 Glacier Deep Archive sto 18. The Amazon EventBridge rules periodically initiate Step Functions Extend Download Window and Update CloudWatch Dashboard workflows. 19. Customers monitor the transfer progress by using the Amazon CloudWatch dashboard. +Refer to the [solution developer guide](./docs/DEVELOPER_GUIDE.md) for more details about the internal components, workflows, and resource dependencies involved in transferring a Glacier Vault to S3. + ## Deploying the solution ### One-Click deploy From AWS CloudFormation diff --git a/cdk.json b/cdk.json index 9e37fbc..08a8dfa 100644 --- a/cdk.json +++ b/cdk.json @@ -41,7 +41,7 @@ "SOLUTION_NAME": "Data transfer from Amazon S3 Glacier vaults to Amazon S3", "APP_REGISTRY_NAME": "data_retrieval_for_amazon_glacier_s3", "SOLUTION_ID": "SO0293", - "SOLUTION_VERSION": "v1.1.0", + "SOLUTION_VERSION": "v1.1.1", "APPLICATION_TYPE": "AWS-Solutions" } } diff --git a/docs/DEVELOPER_GUIDE.md b/docs/DEVELOPER_GUIDE.md new file mode 100644 index 0000000..cfb82eb --- /dev/null +++ b/docs/DEVELOPER_GUIDE.md @@ -0,0 +1,103 @@ +# Data Transfer from Amazon S3 Glacier Vaults to Amazon S3 - Developer Guide +## Transfer Workflow Sequence Diagram + +The sequence diagram below illustrates the sequence of events, starting with the customer's interaction with Systems Manager through executing an automation document to launch a transfer. It demonstrates the internal components, workflows, and resource dependencies involved in transferring a Glacier Vault to S3. + + +![Data transfer from Glacier vaults to S3 - Sequence diagram](./sequence_diagram.png) + +## How the Solution works? + +*Data Transfer from Amazon S3 Glacier Vaults to Amazon S3* Solution leverages *Systems Manager* automation documents as entry points, providing a user-friendly interface for initiating transfer workflows. After deploying the Solution, two automation documents are created to capture all necessary inputs from users; `LaunchAutomationRunbook` and `ResumeAutomationRunbook`. + +* `LaunchAutomationRunbook` initiates a new orchestrator workflow to transfer archives from a Glacier vault to an S3 bucket. +* `ResumeAutomationRunbook` initiates an orchestrator workflow to resume a partially completed transfer from a Glacier vault to an S3 bucket. This can happen due to failures during the initial transfer or intentional stops by the customer. + + + +### Orchestrator workflow + +Once users provide the necessary inputs and execute the automation document, the main orchestrator Step Function `OrchestratorStateMachine` is triggered. This orchestrator Step Function consists of a series of steps, designed to manage the entire transfer process: + +1. Metadata regarding the entire transfer is stored in `GlacierObjectRetrieval` DynamoDB table. +2. A nested Step Function `ArchivesStatusCleanupStateMachine` is triggered to clean up any outdated archives statuses in DynamoDB resulting from partial transfer or previously failed transfers. +3. Following this, a nested Step Function `InventoryRetrievalStateMachine` is triggered to retrieve the *Inventory* file. Refer [Vault Inventory](https://docs.aws.amazon.com/amazonglacier/latest/dev/vault-inventory.html). +4. Upon completing the *Inventory* file download, two *EventBridge* rules are configured to periodically trigger nested Step Functions `ExtendDownloadWindowStateMachine` and `CloudWatchDashboardUpdateStateMachine`. +5. Subsequently, a nested Step Function `InitiateRetrievalStateMachine` is triggered which iterates through the archives records listed in the downloaded *Inventory* file and initiates retrieval job for each archive. +6. Afterward, another *EventBridge* rule is configured to periodically trigger `CompletionChecker` Lambda function which checks the completion of the `ArchiveRetrieval` workflow. +7. Subsequently, the workflow enters an asynchronous wait, pausing until the `ArchiveRetrieval` workflow concludes before the nested `CleanupStateMachine` Step Function is triggered. + +### Inventory Retrieval workflow + +`InventoryRetrievalStateMachine` Step Function’s purpose is to retrieve the *Inventory* file of a specific Vault, which is then used in subsequent steps to transfer the included archives records. +`InventoryRetrievalStateMachine` Step Function consists of a series of steps, designed to manage the retrieval and downloading of the *Inventory* file: + +1. `InventoryRetrievalStateMachine` Step Function has the option (choice step) to bypass the *Inventory* download step if the *Inventory* is already provided by the user. +2. `RetrieveInventoryInitiateJob` Lambda is invoked to Initiate an *Inventory* retrieval job. +3. Subsequently, the workflow enters an asynchronous wait, pausing until the *Inventory* retrieval job is completed and ready for downloading. +4. Upon Glacier sending a job completion event to `AsyncFacilitatorTopic` SNS topic, which is configured to deliver messages to `NotificationsQueue` SQS queue, which in turn triggers `AsyncFacilitator` Lambda function to unlock the asynchronous wait, allowing the workflow to proceed with the *Inventory* download. +5. `InventoryChunkDownload` Lambda function is invoked within a Distributed map, with each iteration downloading a chunk of the *Inventory* file. +6. Following the completion of downloading all chunks, the `InventoryValidation` Lambda function validates the downloaded *Inventory* file and stores it in the `InventoryBucket` S3 bucket under *original_inventory* prefix. +7. After that, a Glue job is generated based on a predefined glue workflow graph definition. +8. The Glue job filters out archives greater than 5 TB, then sorts the remaining archives by creation date, performs description parsing to extract archive names, handles duplicates, and partitions the *Inventory* into predefined partitions stored in the `InventoryBucket` S3 bucket under the *sorted_inventory* prefix. +9. After that, `SendAnonymizedStats` Lambda function is invoked to send anonymized operational metrics + + +### Initiate Archives Retrieval workflow + +`InitiateRetrievalStateMachine` Step Function’s purpose is to initiate retrieval job for all the archives listed in the *Inventory* file. A Distributed map with a locked concurrency is used to iterate over the ordered inventory stored in S3. `InitiateArchiveRetrieval` Lambda function is invoked to process batches of 100 archives. `InitiateRetrievalStateMachine` processes a portion of the archives included in the *Inventory* file at a time to control the request rate. It then calculates a timeout value based on the size of the processed portion and the time taken to initiate retrieval jobs. This ensures that the total number of initiated archive jobs within a 24-hour period remains below Glacier's daily quota. + +1. For each partition, DynamoDB partition metadata is reset, which will be used to capture the total requested archives jobs within the partition and the time taken for jobs initiation. +2. Following this, two nested Distributed maps are utilized, the outer one iterates through S3 CSV partition files located in the `InventoryBucket` S3 bucket under the *sorted_inventory* prefix, while the inner map iterates through individual archive records within each file. +3. To ensure archives are requested in order, concurrency for Distributed maps is set to 1. +4. `InitiateArchiveRetrieval` Lambda function is invoked within the inner distributed map, which is configured with a batch size of 100, enabling it to concurrently initiate 100 archives jobs and achieve nearly 100 transactions per second (TPS). +5. Upon completing the requests for all archives within a partition, `CalculateTimeout` Lambda is invoked to calculate the wait time before initiating requests for the next partition. +6. The timeout value is calculated based on partition size, Glacier daily quota, and the time elapsed to initiate requests. + +### Archive Retrieval workflow + +Archives Retrieval follows an event-driven architecture: + +1. When a job is ready, Glacier sends a notification to the `AsyncFacilitatorTopic` SNS topic. This SNS topic is set up to deliver messages to `NotificationsQueue` SQS queue, indicating the completion of the job. +2. `NotificationsQueue` SQS queue triggers `NotificationsProcessor` Lambda function, responsible for processing notifications and initiating initial steps for archive retrieval. This includes starting a multipart upload and calculating the number and sizes of chunks. +3. Then `NotificationsProcessor` Lambda function places messages for chunk retrieval in `ChunksRetrievalQueue` SQS queue for further processing. +4. `ChunksRetrievalQueue` SQS queue triggers `ChunkRetrieval` Lambda function to retrieve archive’s chunk. + 1. First, `ChunkRetrieval` Lambda function downloads the chunk from Glacier. + 2. Then, it uploads a multipart upload part to S3 destination bucket. + 3. After a new chunk is download, chunk metadata are stored in `GlacierObjectRetrieval` DynamoDB table. + 4. `ChunkRetrieval` Lambda function verifies whether all chunks for a particular archive have been processed. If so, it inserts an event into `ValidationQueueSQS` queue to trigger `ArchiveValidationLambda` function. +5. `ArchiveValidation` Lambda function conducts hash validation and integrity checks before closing the S3 multipart upload. +6. `MetricTable` DynamoDB stream invokes `MetricsProcessor` Lambda function to update transfer process metrics in `MetricTable` DynamoDB table. + + +### Archives Status Cleanup workflow + +`ArchivesStatusCleanupStateMachine` Step Function’s purpose is to update any outdated archive statuses in DynamoDB to a terminated status, especially when resuming a partial transfer or a previously failed transfer attempt. `ArchivesStatusCleanupStateMachine` Step Function is triggered prior to initiating a transfer workflow. + +### Download Window extension workflow + +`ExtendDownloadWindowStateMachine` Step Function’s purpose is to account for the 24hr download window for each archive. At predefined intervals, archives that are staged will be checked to see how long is left in their download window. If the remaining time is less than five hours, another retrieval job is initiated (No additional cost to re-stage already staged archives). + +`ExtendDownloadWindowStateMachine` Step Function consists of a series of steps, designed to manage the extension process for the download window of staged archives nearing expiration: + +1. First `ArchivesNeedingWindowExtension` Lambda function is invoked to query DynamoDB, capturing all archives already staged but set to expire within the next 5 hours. +2. Then this lambda generates an S3 JSON file containing these archives needing a download window extension, and write this file to the inventory bucket. +3. Then, a Distributed map iterates over the generated JSON file, and batch process these archives. +4. Within this Distributed map, `ExtendDownloadInitiateRetrieval` Lambda function is invoked to batch initiate requests for the archives which are included in the generated JSON file + +### CloudWatch Dashboard Update workflow + +`CloudWatchDashboardUpdateStateMachine` Step Function’s purpose is to update the Solution’s custom CloudWatch dashboard `CloudWatchDashboard` every 5 minutes, refreshing the dashboard with the recently collected metrics. + +`CloudWatchDashboardUpdateStateMachine` Step Function consists of a series of steps, designed to refresh the dashboard with the recently collected metrics: + +1. Begin by querying `MetricTable` DynamoDB table +2. Subsequently, call *PutMetricData* API to update CloudWatch dashboard + +### Completion Checker workflow + +`CompletionChecker` Lambda function’s purpose is to periodically verify the completion status of the `ArchiveRetrieval` workflow. It accomplishes this by checking the `MetricTable` DynamoDB table to compare the count of downloaded archives with the total number of archives originally in the Vault. When the counts are equal, it concludes the asynchronous wait and proceeds to the `CleanupStateMachine` Step Function. + +### Cleanup workflow + +`CleanupStateMachine` Step Function’s purpose is to perform post-transfer cleanup tasks. This includes removing all incomplete multipart uploads and stopping all *EventBridge* rules to prevent periodic workflows from being triggered after the transfer’s termination. diff --git a/docs/sequence_diagram.png b/docs/sequence_diagram.png new file mode 100644 index 0000000..4483a22 Binary files /dev/null and b/docs/sequence_diagram.png differ diff --git a/pyproject.toml b/pyproject.toml index d0bd32a..29952f7 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -10,7 +10,7 @@ package-dir = {"" = "source"} [project] name = "solution" -version = "1.1.0" +version = "1.1.1" description = "Data transfer from Amazon S3 Glacier vaults to Amazon S3" readme = "README.md" requires-python = ">=3.10" @@ -22,15 +22,16 @@ classifiers = [ "Programming Language :: Python :: 3", "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", "Typing :: Typed", ] dependencies = [ - "boto3", - "aws-cdk-lib==2.91.0", - "aws-cdk.aws-glue-alpha", - "aws-cdk.aws-servicecatalogappregistry-alpha", + "boto3==1.34.91", + "aws-cdk-lib==2.139.0", + "aws-cdk.aws-glue-alpha==2.139.0a0", + "aws-cdk.aws-servicecatalogappregistry-alpha==2.139.0a0", "constructs>=10.0.0,<11.0.0", - "cdk-nag", + "cdk-nag==2.28.12", ] [project.scripts] @@ -39,28 +40,28 @@ mock-glacier = "solution.application.mocking.mock_glacier_generator:write_mock_g [project.optional-dependencies] dev = [ - "tox", + "tox==4.14.2", "black==23.12.1", "pytest==7.4.3", - "pytest-cov", - "cdk-nag", + "pytest-cov==5.0.0", + "cdk-nag==2.28.12", "mypy==1.7.1", - "moto==4.1.10", - "aws-lambda-powertools[aws-sdk]", - "boto3-stubs-lite[essential]", - "boto3-stubs-lite[cloudformation]", - "boto3-stubs-lite[dynamodb]", - "boto3-stubs-lite[sqs]", - "boto3-stubs-lite[sns]", - "boto3-stubs-lite[s3]", - "boto3-stubs-lite[iam]", - "boto3-stubs-lite[stepfunctions]", - "boto3-stubs-lite[glacier]", - "boto3-stubs-lite[events]", - "pyspark", - "boto3-stubs-lite[ssm]", - "boto3-stubs[logs]", - "types-pyyaml" + "moto==4.1.15", + "pyspark==3.5.1", + "types-pyyaml==6.0.12.20240311", + "aws-lambda-powertools[aws-sdk]==2.37.0", + "boto3-stubs-lite[essential]==1.34.91", + "boto3-stubs-lite[cloudformation]==1.34.91", + "boto3-stubs-lite[dynamodb]==1.34.91", + "boto3-stubs-lite[sqs]==1.34.91", + "boto3-stubs-lite[sns]==1.34.91", + "boto3-stubs-lite[s3]==1.34.91", + "boto3-stubs-lite[iam]==1.34.91", + "boto3-stubs-lite[stepfunctions]==1.34.91", + "boto3-stubs-lite[glacier]==1.34.91", + "boto3-stubs-lite[events]==1.34.91", + "boto3-stubs-lite[ssm]==1.34.91", + "boto3-stubs[logs]==1.34.91" ] [tool.isort] @@ -68,4 +69,4 @@ profile = "black" known_first_party = "solution" [tool.bandit] -exclude_dirs = ["tests"] +exclude_dirs = ["tests"] \ No newline at end of file diff --git a/solution-manifest.yaml b/solution-manifest.yaml new file mode 100644 index 0000000..6e86858 --- /dev/null +++ b/solution-manifest.yaml @@ -0,0 +1,9 @@ +--- +id: SO0293 # Solution Id +name: data-transfer-from-amazon-s3-glacier-vaults-to-amazon-s3 # trademarked name +version: v1.1.1 # current version of the solution. Used to verify template headers +cloudformation_templates: # This list should match with AWS CloudFormation templates section of IG + - template: data-transfer-from-amazon-s3-glacier-vaults-to-amazon-s3.template + main_template: true +build_environment: + build_image: 'aws/codebuild/standard:7.0' # Options include: 'aws/codebuild/standard:5.0','aws/codebuild/standard:6.0','aws/codebuild/standard:7.0','aws/codebuild/amazonlinux2-x86_64-standard:4.0','aws/codebuild/amazonlinux2-x86_64-standard:5.0' diff --git a/source/solution/application/archive_retrieval/initiator.py b/source/solution/application/archive_retrieval/initiator.py index 2839d25..b068114 100644 --- a/source/solution/application/archive_retrieval/initiator.py +++ b/source/solution/application/archive_retrieval/initiator.py @@ -134,6 +134,9 @@ def extend_request( glacier_client, vault_name, sns_topic, archive_id, tier, account_id ) + if not job_id: + return + ddb_client.update_item( TableName=os.environ[OutputKeys.GLACIER_RETRIEVAL_TABLE_NAME], Key=GlacierTransferMetadataRead( @@ -154,7 +157,7 @@ def glacier_initiate_job( archive_id: str, tier: str, account_id: str, -) -> str: +) -> str | None: job_parameters: JobParametersTypeDef = { "Type": GlacierJobType.ARCHIVE_RETRIEVAL, "SNSTopic": sns_topic, @@ -168,7 +171,10 @@ def glacier_initiate_job( jobParameters=job_parameters, ) except Exception as e: - logger.error(f"An error occurred while initiating job: {e}") + logger.error( + f"An error occurred while initiating job for {job_parameters}. Error: {e}" + ) + return None return initiate_job_response["jobId"] @@ -197,6 +203,9 @@ def initiate_request( glacier_client, vault_name, sns_topic, archive_id, tier, account_id ) + if not job_id: + return + archive_metadata = GlacierTransferMetadata( workflow_run=workflow_run, glacier_object_id=archive_id, diff --git a/source/solution/application/handlers.py b/source/solution/application/handlers.py index 0567cc2..29876de 100644 --- a/source/solution/application/handlers.py +++ b/source/solution/application/handlers.py @@ -267,7 +267,7 @@ def archive_naming_overrides( # This is necessary for cases when the user does not provide a naming override file create_header_file(event["WorkflowRun"]) - if event.get("NameOverridePresignedURL") is not None: + if event.get("NameOverridePresignedURL") not in (None, ""): upload_provided_file(event["WorkflowRun"], event["NameOverridePresignedURL"]) else: logger.info("No name override file is provided.") diff --git a/source/solution/application/mocking/mock_glacier_apis.py b/source/solution/application/mocking/mock_glacier_apis.py index 2b8a104..5a50868 100644 --- a/source/solution/application/mocking/mock_glacier_apis.py +++ b/source/solution/application/mocking/mock_glacier_apis.py @@ -64,6 +64,7 @@ def initiate_job( if archive_id is None else GlacierJobType.ARCHIVE_RETRIEVAL, "archive_id": jobParameters.get("ArchiveId"), + "aws_partition": "test", } client.invoke( FunctionName=os.environ["MOCK_NOTIFY_SNS_LAMBDA_ARN"], diff --git a/source/solution/application/mocking/mock_glacier_data.py b/source/solution/application/mocking/mock_glacier_data.py index 333e00d..d7ee190 100644 --- a/source/solution/application/mocking/mock_glacier_data.py +++ b/source/solution/application/mocking/mock_glacier_data.py @@ -108,6 +108,27 @@ }, }, }, + "test_empty_vault": { + "initiate-job": { + "inventory-retrieval": { + "ResponseMetadata": {"HTTPStatusCode": 202, "RetryAttempts": 0}, + "location": "//vaults/test_empty_vault/jobs/IEQH524YNG5BY1A2ROGUBBB8AYN1B7O259OWOO3SB09GLSHV616MTS56ZC4PZ0LX9XF26GK7ZX5B4CTZKK6OAM89OZ6W", + "jobId": "IEQH524YNG5BY1A2ROGUBBB8AYN1B7O259OWOO3SB09GLSHV616MTS56ZC4PZ0LX9XF26GK7ZX5B4CTZKK6OAM89OZ6W", + } + }, + "inventory-metadata": {"size": 63, "chunkSize": 1073741824}, + "get-job-output": { + "IEQH524YNG5BY1A2ROGUBBB8AYN1B7O259OWOO3SB09GLSHV616MTS56ZC4PZ0LX9XF26GK7ZX5B4CTZKK6OAM89OZ6W": { + "bytes=0-62": { + "ResponseMetadata": {"HTTPStatusCode": 200, "RetryAttempts": 0}, + "status": 200, + "contentType": "application/json", + "body": "ArchiveId,ArchiveDescription,CreationDate,Size,SHA256TreeHash\r\n", + "checksum": "3ef5b49203b917288f1f9495c729076bbc46a3eada2c8d4ad6f584c9f27ebac5", + } + } + }, + }, "test_vault_chunk_generation_vault": { "initiate-job": { "inventory-retrieval": { diff --git a/source/solution/application/mocking/mock_glacier_generator.py b/source/solution/application/mocking/mock_glacier_generator.py index d092f20..b3b09ff 100644 --- a/source/solution/application/mocking/mock_glacier_generator.py +++ b/source/solution/application/mocking/mock_glacier_generator.py @@ -130,6 +130,17 @@ def _vault_generator( return vault.mock_data() +@mock_vault +def test_empty_vault(vault: MockGlacierVault) -> Dict[str, Any]: + inventory_job_id = vault.initiate_job({"Type": GlacierJobType.INVENTORY_RETRIEVAL}) + chunk_size = calculate_chunk_size(vault.inventory_size) + vault.set_inventory_metadata(inventory_job_id, chunk_size) + chunk_array = generate_chunk_array(vault.inventory_size, chunk_size, False) + for chunk in chunk_array: + vault.get_job_output(inventory_job_id, range=f"bytes={chunk}") + return vault.mock_data() + + @mock_vault def test_vault_chunk_generation_vault(vault: MockGlacierVault) -> Dict[str, Any]: return _vault_generator(vault, 45000, False) diff --git a/source/solution/application/mocking/notify_sns.py b/source/solution/application/mocking/notify_sns.py index 62fafdc..4d344b0 100644 --- a/source/solution/application/mocking/notify_sns.py +++ b/source/solution/application/mocking/notify_sns.py @@ -32,6 +32,7 @@ def notify_sns_job_completion( archive_id: str, ) -> None: client: SNSClient = boto3.client("sns", config=__boto_config__) + aws_partition = client.meta.partition mock_client = MockGlacierAPIs() if retrieval_type == GlacierJobType.INVENTORY_RETRIEVAL: message = { @@ -48,7 +49,7 @@ def notify_sns_job_completion( "SNSTopic": sns_topic, "StatusCode": "Succeeded", "StatusMessage": "Succeeded", - "VaultARN": f"arn:aws:glacier:{os.environ['AWS_REGION']}:{account_id}:vaults/{vault_name}", + "VaultARN": f"arn:{aws_partition}:glacier:{os.environ['AWS_REGION']}:{account_id}:vaults/{vault_name}", } else: inventory_body = generate_inventory_for_archive_retrieval( @@ -81,7 +82,7 @@ def notify_sns_job_completion( "StatusCode": "Succeeded", "StatusMessage": "Succeeded", "Tier": "Bulk", - "VaultARN": f"arn:aws:glacier:{os.environ['AWS_REGION']}:{account_id}:vaults/{vault_name}", + "VaultARN": f"arn:{aws_partition}:glacier:{os.environ['AWS_REGION']}:{account_id}:vaults/{vault_name}", } time.sleep(NOTIFICATION_DELAY_IN_SEC) diff --git a/source/solution/application/model/workflow_metadata_model.py b/source/solution/application/model/workflow_metadata_model.py index bc588b1..6fed1e2 100644 --- a/source/solution/application/model/workflow_metadata_model.py +++ b/source/solution/application/model/workflow_metadata_model.py @@ -23,7 +23,10 @@ class WorkflowMetadataRecord(Model): ) storage_class: str = Model.field(["storage_class", "S"]) retrieval_tier: str = Model.field(["retrieval_tier", "S"]) - + provided_inventory: str = Model.field(["provided_inventory", "S"]) + transfer_type: str = Model.field(["transfer_type", "S"]) + naming_override_file: str = Model.field(["naming_override_file", "S"]) + cross_region_transfer: str = Model.field(["cross_region_transfer", "S"]) sk: ClassVar[str] = "meta" _pk: str = Model.view(["pk", "S"], "workflow_run") diff --git a/source/solution/application/operational_metrics/anonymized_stats.py b/source/solution/application/operational_metrics/anonymized_stats.py index 4961ae2..a488dae 100644 --- a/source/solution/application/operational_metrics/anonymized_stats.py +++ b/source/solution/application/operational_metrics/anonymized_stats.py @@ -51,6 +51,12 @@ def send_job_stats(stats_type: str, workflow_run: str) -> None: "RetrievalTier": workflow_metadata_record.retrieval_tier, "StartTime": workflow_metadata_record.start_time, "DailyQuota": workflow_metadata_record.daily_quota, + "ProvidedInventory": workflow_metadata_record.provided_inventory, + "TransferType": workflow_metadata_record.transfer_type, + "CrossRegionTransfer": workflow_metadata_record.cross_region_transfer, + "NamingOverrideFile": "NO" + if workflow_metadata_record.naming_override_file == "" + else "YES", "VaultSize": metric_record.size_total, "ArchiveCount": metric_record.count_total, }, @@ -93,6 +99,8 @@ def send_stats(anonymized_data: Dict[str, Any]) -> None: ) return + logger.debug(f"Stats anonymized data: {anonymized_data}") + request = json.dumps(anonymized_data).encode("utf-8") headers = {"Content-Type": "application/json", "Content-Length": str(len(request))} diff --git a/source/solution/infrastructure/glue_helper/glue_sfn_update.py b/source/solution/infrastructure/glue_helper/glue_sfn_update.py index 1f05ac8..49f1d95 100644 --- a/source/solution/infrastructure/glue_helper/glue_sfn_update.py +++ b/source/solution/infrastructure/glue_helper/glue_sfn_update.py @@ -73,7 +73,9 @@ StructField("ArchiveDescription", StringType(), True), StructField("CreationDate", StringType(), True), StructField("Size", StringType(), True), - StructField("SHA256TreeHash", StringType(), True) + StructField("SHA256TreeHash", StringType(), True), + StructField("Filename", StringType(), True), + StructField("PartitionId", StringType(), True) ]) df = spark.createDataFrame([], schema) dyf = DynamicFrame.fromDF(df, glueContext, "skipped_archives") @@ -245,66 +247,48 @@ def autogenerate_etl_script(self, id: Optional[str] = None) -> tasks.CallAwsServ }, "node-5": { "CustomCode": { - "Name": ARCHIVE_NAMING_CODE_NAME, + "Name": "ValidateInput<=5TB", "Inputs": ["node-3"], - "ClassName": ARCHIVE_NAMING_CODE_NAME, - "Code": self.custom_code, + "ClassName": "validate_source_table", + "Code": self.validate_source_input_custom_code, } }, "node-6": { "CustomCode": { - "Name": "validate_source_input", + "Name": "ValidateInput>5TB", "Inputs": ["node-4"], "ClassName": "validate_source_table", "Code": self.validate_source_input_custom_code, } }, "node-7": { - "S3CsvSource": { - "Name": "S3 bucket - Naming Overrides", - "Paths.$": f"States.Array(States.Format('s3://{self.s3_bucket_name}/{{}}/naming_overrides/', $.workflow_run))", - "QuoteChar": "quote", - "Separator": "comma", - "Recurse": True, - "WithHeader": True, - "Escaper": "", - "OutputSchemas": [ - {"Columns": self.archive_naming_override_columns} - ], - }, - }, - "node-8": { "SelectFromCollection": { - "Name": "SelectFromCollection", + "Name": "SelectFromCollection<=5TB", "Inputs": ["node-5"], "Index": 0, } }, - "node-9": { + "node-8": { "SelectFromCollection": { - "Name": "SelectFromCollection", + "Name": "SelectFromCollection>5TB", "Inputs": ["node-6"], "Index": 0, } }, - "node-10": { - "SparkSQL": { - "Name": "SQL sorting", - "Inputs": ["node-7", "node-8"], - "SqlQuery": SORTING_SQL_QUERY, - "SqlAliases": [ - {"From": "node-7", "Alias": "namingOverrides"}, - {"From": "node-8", "Alias": "myDataSource"}, - ], - "OutputSchemas": [{"Columns": self.csv_file_columns_output}], + "node-9": { + "CustomCode": { + "Name": ARCHIVE_NAMING_CODE_NAME, + "Inputs": ["node-7"], + "ClassName": ARCHIVE_NAMING_CODE_NAME, + "Code": self.custom_code, } }, - "node-11": { + "node-10": { "SparkSQL": { "Name": "SQL Metric", - "Inputs": ["node-3"], + "Inputs": ["node-7"], "SqlQuery": METRIC_COLLECTION_SQL_QUERY, - "SqlAliases": [{"From": "node-3", "Alias": "myDataSource"}], + "SqlAliases": [{"From": "node-7", "Alias": "myDataSource"}], "OutputSchemas": [ { "Columns": [ @@ -315,22 +299,12 @@ def autogenerate_etl_script(self, id: Optional[str] = None) -> tasks.CallAwsServ ], } }, - "node-12": { - "S3DirectTarget": { - "Name": "S3 bucket - Not Migrated", - "Inputs": ["node-9"], - "Compression": "none", - "Format": "csv", - "SchemaChangePolicy": {"EnableUpdateCatalog": False}, - "Path.$": f"States.Format('s3://{self.s3_bucket_name}/{{}}/not_migrated/', $.workflow_run)", - } - }, - "node-13": { + "node-11": { "SparkSQL": { "Name": "SQL Skipped Metric", - "Inputs": ["node-9"], + "Inputs": ["node-8"], "SqlQuery": METRIC_COLLECTION_SQL_QUERY, - "SqlAliases": [{"From": "node-9", "Alias": "myDataSource"}], + "SqlAliases": [{"From": "node-8", "Alias": "myDataSource"}], "OutputSchemas": [ { "Columns": [ @@ -341,31 +315,74 @@ def autogenerate_etl_script(self, id: Optional[str] = None) -> tasks.CallAwsServ ], } }, - "node-14": { + "node-12": { "S3DirectTarget": { - "Name": "S3 bucket - Sorted Inventory", - "Inputs": ["node-10"], - "PartitionKeys.$": "States.Array(States.Array('PartitionId'))", + "Name": "S3 bucket - Not Migrated", + "Inputs": ["node-8"], "Compression": "none", "Format": "csv", "SchemaChangePolicy": {"EnableUpdateCatalog": False}, - "Path.$": f"States.Format('s3://{self.s3_bucket_name}/{{}}/sorted_inventory/', $.workflow_run)", + "Path.$": f"States.Format('s3://{self.s3_bucket_name}/{{}}/not_migrated/', $.workflow_run)", + } + }, + "node-13": { + "SelectFromCollection": { + "Name": "SelectFromCollection", + "Inputs": ["node-9"], + "Index": 0, + } + }, + "node-14": { + "CustomCode": { + "Name": METRIC_COLLECTION_CUSTOM_CODE_NAME, + "Inputs": ["node-10", "node-11"], + "ClassName": METRIC_COLLECTION_CUSTOM_CODE_NAME, + "Code": self.metric_collection_code, } }, "node-15": { + "S3CsvSource": { + "Name": "S3 bucket - Naming Overrides", + "Paths.$": f"States.Array(States.Format('s3://{self.s3_bucket_name}/{{}}/naming_overrides/', $.workflow_run))", + "QuoteChar": "quote", + "Separator": "comma", + "Recurse": True, + "WithHeader": True, + "Escaper": "", + "OutputSchemas": [ + {"Columns": self.archive_naming_override_columns} + ], + }, + }, + "node-16": { + "SparkSQL": { + "Name": "SQL sorting", + "Inputs": ["node-15", "node-13"], + "SqlQuery": SORTING_SQL_QUERY, + "SqlAliases": [ + {"From": "node-15", "Alias": "namingOverrides"}, + {"From": "node-13", "Alias": "myDataSource"}, + ], + "OutputSchemas": [{"Columns": self.csv_file_columns_output}], + } + }, + "node-17": { "CustomCode": { "Name": VALIDATION_CUSTOM_CODE_NAME, - "Inputs": ["node-3", "node-10"], + "Inputs": ["node-7", "node-16"], "ClassName": VALIDATION_CUSTOM_CODE_NAME, "Code": VALIDATION_CODE, } }, - "node-16": { - "CustomCode": { - "Name": METRIC_COLLECTION_CUSTOM_CODE_NAME, - "Inputs": ["node-11", "node-13"], - "ClassName": METRIC_COLLECTION_CUSTOM_CODE_NAME, - "Code": self.metric_collection_code, + "node-18": { + "S3DirectTarget": { + "Name": "S3 bucket - Sorted Inventory", + "Inputs": ["node-16"], + "PartitionKeys.$": "States.Array(States.Array('PartitionId'))", + "Compression": "none", + "Format": "csv", + "SchemaChangePolicy": {"EnableUpdateCatalog": False}, + "Path.$": f"States.Format('s3://{self.s3_bucket_name}/{{}}/sorted_inventory/', $.workflow_run)", } }, } diff --git a/source/solution/infrastructure/glue_helper/scripts/metric_collection_script.py b/source/solution/infrastructure/glue_helper/scripts/metric_collection_script.py index ab19910..3825bfa 100644 --- a/source/solution/infrastructure/glue_helper/scripts/metric_collection_script.py +++ b/source/solution/infrastructure/glue_helper/scripts/metric_collection_script.py @@ -8,7 +8,7 @@ import boto3 from botocore.config import Config -__boto_config__ = Config(user_agent_extra="AwsSolution/SO0293/v1.1.0") +__boto_config__ = Config(user_agent_extra="AwsSolution/SO0293/v1.1.1") def update_metric_table( diff --git a/source/solution/infrastructure/helpers/nested_distributed_map.py b/source/solution/infrastructure/helpers/nested_distributed_map.py index dac9305..de4c48c 100644 --- a/source/solution/infrastructure/helpers/nested_distributed_map.py +++ b/source/solution/infrastructure/helpers/nested_distributed_map.py @@ -33,7 +33,7 @@ def __init__( max_items_per_batch: Optional[int] = None, ) -> None: inner_item_reader_config = ItemReaderConfig( - item_reader_resource="arn:aws:states:::s3:getObject", + item_reader_resource=f"arn:aws:states:::s3:getObject", reader_config={ "InputType": "CSV", "CSVHeaderLocation": "FIRST_ROW", @@ -42,7 +42,7 @@ def __init__( ) inner_result_config = ResultConfig( result_writer={ - "Resource": "arn:aws:states:::s3:putObject", + "Resource": f"arn:aws:states:::s3:putObject", "Parameters": { "Bucket": inventory_bucket.bucket_name, "Prefix.$": f"States.Format('{{}}/{nested_distributed_map_id}InnerDistributedMapOutput', $.workflow_run)", @@ -68,7 +68,7 @@ def __init__( ) item_reader_config = ItemReaderConfig( - item_reader_resource="arn:aws:states:::s3:listObjectsV2", + item_reader_resource=f"arn:aws:states:::s3:listObjectsV2", item_reader_parameters={ "Bucket": inventory_bucket.bucket_name, "Prefix.$": "$.prefix", @@ -76,7 +76,7 @@ def __init__( ) result_config = ResultConfig( result_writer={ - "Resource": "arn:aws:states:::s3:putObject", + "Resource": f"arn:aws:states:::s3:putObject", "Parameters": { "Bucket": inventory_bucket.bucket_name, "Prefix.$": f"States.Format('{{}}/{nested_distributed_map_id}DistributedMapOutput', $.workflow_run)", @@ -123,7 +123,7 @@ def configure_step_function( effect=iam.Effect.ALLOW, actions=["states:DescribeExecution", "states:StopExecution"], resources=[ - f"arn:aws:states:{Aws.REGION}:{Aws.ACCOUNT_ID}:execution:{state_machine.state_machine_name}/*" + f"arn:{Aws.PARTITION}:states:{Aws.REGION}:{Aws.ACCOUNT_ID}:execution:{state_machine.state_machine_name}/*" ], ), ], @@ -173,7 +173,7 @@ def configure_step_function( "id": "AwsSolutions-IAM5", "reason": "IAM policy needed to run a Distributed Map state. https://docs.aws.amazon.com/step-functions/latest/dg/iam-policies-eg-dist-map.html", "appliesTo": [ - f"Resource::arn:aws:states:::execution:<{state_machine_logical_id}.Name>/*" + f"Resource::arn::states:::execution:<{state_machine_logical_id}.Name>/*" ], } ], diff --git a/source/solution/infrastructure/helpers/solutions_function.py b/source/solution/infrastructure/helpers/solutions_function.py index 085c90e..25eabc0 100644 --- a/source/solution/infrastructure/helpers/solutions_function.py +++ b/source/solution/infrastructure/helpers/solutions_function.py @@ -14,7 +14,7 @@ from solution.application.util.exceptions import ResourceNotFound -DEFAULT_RUNTIME = Runtime.PYTHON_3_11 +DEFAULT_RUNTIME = Runtime.PYTHON_3_12 class SolutionsPythonFunction(Function): @@ -32,7 +32,7 @@ def __init__( if not kwargs.get("role"): kwargs["role"] = self._create_role() - # set runtime to Python 3.11 unless a runtime is passed + # set runtime to Python 3.12 unless a runtime is passed if not kwargs.get("runtime"): kwargs["runtime"] = DEFAULT_RUNTIME diff --git a/source/solution/infrastructure/output_keys.py b/source/solution/infrastructure/output_keys.py index 6bcbfa7..4b8cbc4 100644 --- a/source/solution/infrastructure/output_keys.py +++ b/source/solution/infrastructure/output_keys.py @@ -29,7 +29,6 @@ class OutputKeys: ASYNC_FACILITATOR_LAMBDA_NAME = "AsyncFacilitatorLambdaName" INITIATE_RETRIEVAL_STATE_MACHINE_ARN = "InitiateRetrievalStateMachineArn" RETRIEVE_ARCHIVE_STATE_MACHINE_ARN = "RetrieveArchiveStateMachineArn" - INITIATE_METADATA_STATE_MACHINE_ARN = "InitiateMetadataStateMachineArn" GLACIER_RETRIEVAL_TABLE_NAME = "GlacierRetrievalTableName" INVENTORY_VALIDATION_LAMBDA_ARN = "InventoryValidationLambdaArn" CLEANUP_STATE_MACHINE_ARN = "CleanupStateMachineArn" diff --git a/source/solution/infrastructure/ssm_automation_docs/orchestration_automation_document.py b/source/solution/infrastructure/ssm_automation_docs/orchestration_automation_document.py index 9f4fc14..68bc978 100644 --- a/source/solution/infrastructure/ssm_automation_docs/orchestration_automation_document.py +++ b/source/solution/infrastructure/ssm_automation_docs/orchestration_automation_document.py @@ -41,7 +41,7 @@ def __init__( "region": region, "sns_topic_arn": topic_arn, "state_machine_arn": state_machine_arn, - "allow_cross_region_data_transfer": allow_cross_region_data_transfer, + "allow_cross_region_data_transfer": str(allow_cross_region_data_transfer), } @property diff --git a/source/solution/infrastructure/ssm_automation_docs/scripts/orchestration_doc_script.py b/source/solution/infrastructure/ssm_automation_docs/scripts/orchestration_doc_script.py index ac6c442..a5eb3f9 100644 --- a/source/solution/infrastructure/ssm_automation_docs/scripts/orchestration_doc_script.py +++ b/source/solution/infrastructure/ssm_automation_docs/scripts/orchestration_doc_script.py @@ -10,7 +10,7 @@ import boto3 from botocore.config import Config -__boto_config__ = Config(user_agent_extra="AwsSolution/SO0293/v1.1.0") +__boto_config__ = Config(user_agent_extra="AwsSolution/SO0293/v1.1.1") s3_storage_class_mapping: Dict[str, str] = { "S3 Glacier Deep Archive": "DEEP_ARCHIVE", @@ -57,9 +57,13 @@ def script_handler(events, _context): # type: ignore "tier": "Bulk", "workflow_run": workflow_run, "migration_type": migration_type, + "cross_region_transfer": str( + events["allow_cross_region_data_transfer"] + and events["acknowledge_cross_region"] == "YES" + ), "name_override_presigned_url": events["name_override_presigned_url"] if events["name_override_presigned_url"] - else None, + else "", "vault_name": vault_name, } sfn_client.start_execution( diff --git a/source/solution/infrastructure/stack.py b/source/solution/infrastructure/stack.py index 9272675..8d829de 100644 --- a/source/solution/infrastructure/stack.py +++ b/source/solution/infrastructure/stack.py @@ -560,14 +560,12 @@ def __init__( "states:SendTaskFailure", ], resources=[ - f"arn:aws:states:{Aws.REGION}:{Aws.ACCOUNT_ID}:stateMachine:InventoryRetrievalStateMachine*", - f"arn:aws:states:{Aws.REGION}:{Aws.ACCOUNT_ID}:execution:InventoryRetrievalStateMachine*", - f"arn:aws:states:{Aws.REGION}:{Aws.ACCOUNT_ID}:stateMachine:InitiateMetadataStateMachine*", - f"arn:aws:states:{Aws.REGION}:{Aws.ACCOUNT_ID}:execution:InitiateMetadataStateMachine*", - f"arn:aws:states:{Aws.REGION}:{Aws.ACCOUNT_ID}:stateMachine:RetrieveArchiveStateMachine*", - f"arn:aws:states:{Aws.REGION}:{Aws.ACCOUNT_ID}:execution:RetrieveArchiveStateMachine*", - f"arn:aws:states:{Aws.REGION}:{Aws.ACCOUNT_ID}:stateMachine:OrchestratorStateMachine*", - f"arn:aws:states:{Aws.REGION}:{Aws.ACCOUNT_ID}:execution:OrchestratorStateMachine*", + f"arn:{Aws.PARTITION}:states:{Aws.REGION}:{Aws.ACCOUNT_ID}:stateMachine:InventoryRetrievalStateMachine*", + f"arn:{Aws.PARTITION}:states:{Aws.REGION}:{Aws.ACCOUNT_ID}:execution:InventoryRetrievalStateMachine*", + f"arn:{Aws.PARTITION}:states:{Aws.REGION}:{Aws.ACCOUNT_ID}:stateMachine:RetrieveArchiveStateMachine*", + f"arn:{Aws.PARTITION}:states:{Aws.REGION}:{Aws.ACCOUNT_ID}:execution:RetrieveArchiveStateMachine*", + f"arn:{Aws.PARTITION}:states:{Aws.REGION}:{Aws.ACCOUNT_ID}:stateMachine:OrchestratorStateMachine*", + f"arn:{Aws.PARTITION}:states:{Aws.REGION}:{Aws.ACCOUNT_ID}:execution:OrchestratorStateMachine*", ], ), ) @@ -600,16 +598,14 @@ def __init__( "id": "AwsSolutions-IAM5", "reason": "Lambda permission needed to SendTaskSuccess and SendTaskFailure.", "appliesTo": [ - "Resource::arn:aws:states:::stateMachine:InventoryRetrievalStateMachine*", - "Resource::arn:aws:states:::execution:InventoryRetrievalStateMachine*", - "Resource::arn:aws:states:::stateMachine:InitiateMetadataStateMachine*", - "Resource::arn:aws:states:::execution:InitiateMetadataStateMachine*", - "Resource::arn:aws:states:::stateMachine:RetrieveArchiveStateMachine*", - "Resource::arn:aws:states:::execution:RetrieveArchiveStateMachine*", - "Resource::arn:aws:states:::stateMachine:ExtendDownloadWindowStateMachine*", - "Resource::arn:aws:states:::execution:ExtendDownloadWindowStateMachine*", - "Resource::arn:aws:states:::stateMachine:OrchestratorStateMachine*", - "Resource::arn:aws:states:::execution:OrchestratorStateMachine*", + "Resource::arn::states:::stateMachine:InventoryRetrievalStateMachine*", + "Resource::arn::states:::execution:InventoryRetrievalStateMachine*", + "Resource::arn::states:::stateMachine:RetrieveArchiveStateMachine*", + "Resource::arn::states:::execution:RetrieveArchiveStateMachine*", + "Resource::arn::states:::stateMachine:ExtendDownloadWindowStateMachine*", + "Resource::arn::states:::execution:ExtendDownloadWindowStateMachine*", + "Resource::arn::states:::stateMachine:OrchestratorStateMachine*", + "Resource::arn::states:::execution:OrchestratorStateMachine*", ], }, ], diff --git a/source/solution/infrastructure/workflows/archives_status_cleanup.py b/source/solution/infrastructure/workflows/archives_status_cleanup.py index cfab37c..a1088c7 100644 --- a/source/solution/infrastructure/workflows/archives_status_cleanup.py +++ b/source/solution/infrastructure/workflows/archives_status_cleanup.py @@ -100,7 +100,7 @@ def __init__(self, stack_info: StackInfo): ) item_reader_config = ItemReaderConfig( - item_reader_resource="arn:aws:states:::s3:getObject", + item_reader_resource=f"arn:aws:states:::s3:getObject", reader_config={"InputType": "JSON"}, item_reader_parameters={ "Bucket": stack_info.buckets.inventory_bucket.bucket_name, @@ -110,7 +110,7 @@ def __init__(self, stack_info: StackInfo): result_config = ResultConfig( result_writer={ - "Resource": "arn:aws:states:::s3:putObject", + "Resource": f"arn:aws:states:::s3:putObject", "Parameters": { "Bucket": stack_info.buckets.inventory_bucket.bucket_name, "Prefix.$": f"States.Format('{{}}/ArchivesStatusCleanupDistributedMapOutput', $.workflow_run)", @@ -176,7 +176,7 @@ def __init__(self, stack_info: StackInfo): effect=iam.Effect.ALLOW, actions=["states:DescribeExecution", "states:StopExecution"], resources=[ - f"arn:aws:states:{Aws.REGION}:{Aws.ACCOUNT_ID}:execution:{stack_info.state_machines.cleanup_archives_status_state_machine.state_machine_name}/*" + f"arn:{Aws.PARTITION}:states:{Aws.REGION}:{Aws.ACCOUNT_ID}:execution:{stack_info.state_machines.cleanup_archives_status_state_machine.state_machine_name}/*" ], ), ], @@ -251,7 +251,7 @@ def __init__(self, stack_info: StackInfo): "id": "AwsSolutions-IAM5", "reason": "IAM policy needed to run a Distributed Map state. https://docs.aws.amazon.com/step-functions/latest/dg/iam-policies-eg-dist-map.html", "appliesTo": [ - f"Resource::arn:aws:states:::execution:<{cleanup_archives_status_state_machine_logical_id}.Name>/*" + f"Resource::arn::states:::execution:<{cleanup_archives_status_state_machine_logical_id}.Name>/*" ], }, ], diff --git a/source/solution/infrastructure/workflows/cleanup.py b/source/solution/infrastructure/workflows/cleanup.py index a1e1c37..1980516 100644 --- a/source/solution/infrastructure/workflows/cleanup.py +++ b/source/solution/infrastructure/workflows/cleanup.py @@ -116,7 +116,7 @@ def __init__(self, stack_info: StackInfo): remove_cloudwatch_dashboard_update_target = { "Type": "Task", - "Resource": "arn:aws:states:::aws-sdk:eventbridge:removeTargets", + "Resource": f"arn:{Aws.PARTITION}:states:::aws-sdk:eventbridge:removeTargets", "Parameters": { "Rule": stack_info.eventbridge_rules.cloudwatch_dashboard_update_trigger.rule_name, "Ids.$": "States.Array($.workflow_run)", @@ -136,7 +136,7 @@ def __init__(self, stack_info: StackInfo): cloudwatch_dashboard_update_state_json = { "Type": "Task", - "Resource": "arn:aws:states:::states:startExecution.sync:2", + "Resource": f"arn:{Aws.PARTITION}:states:::states:startExecution.sync:2", "Parameters": { "StateMachineArn": stack_info.state_machines.cloudwatch_dashboard_update_state_machine.state_machine_arn, "Input.$": "$$.Execution.Input", @@ -157,7 +157,7 @@ def __init__(self, stack_info: StackInfo): remove_extend_download_window_target = { "Type": "Task", - "Resource": "arn:aws:states:::aws-sdk:eventbridge:removeTargets", + "Resource": f"arn:{Aws.PARTITION}:states:::aws-sdk:eventbridge:removeTargets", "Parameters": { "Rule": stack_info.eventbridge_rules.extend_download_window_trigger.rule_name, "Ids.$": "States.Array($.workflow_run)", @@ -227,7 +227,7 @@ def __init__(self, stack_info: StackInfo): remove_completion_checker_target = { "Type": "Task", - "Resource": "arn:aws:states:::aws-sdk:eventbridge:removeTargets", + "Resource": f"arn:{Aws.PARTITION}:states:::aws-sdk:eventbridge:removeTargets", "Parameters": { "Rule": stack_info.eventbridge_rules.completion_checker_trigger.rule_name, "Ids.$": "States.Array($.workflow_run)", @@ -379,7 +379,7 @@ def __init__(self, stack_info: StackInfo): effect=iam.Effect.ALLOW, actions=["states:DescribeExecution", "states:StopExecution"], resources=[ - f"arn:aws:states:{Aws.REGION}:{Aws.ACCOUNT_ID}:execution:{stack_info.state_machines.cloudwatch_dashboard_update_state_machine.state_machine_name}/*" + f"arn:{Aws.PARTITION}:states:{Aws.REGION}:{Aws.ACCOUNT_ID}:execution:{stack_info.state_machines.cloudwatch_dashboard_update_state_machine.state_machine_name}/*" ], ), ], @@ -402,7 +402,7 @@ def __init__(self, stack_info: StackInfo): "events:DescribeRule", ], resources=[ - f"arn:aws:events:{Aws.REGION}:{Aws.ACCOUNT_ID}:rule/StepFunctionsGetEventsForStepFunctionsExecutionRule" + f"arn:{Aws.PARTITION}:events:{Aws.REGION}:{Aws.ACCOUNT_ID}:rule/StepFunctionsGetEventsForStepFunctionsExecutionRule" ], ), ], @@ -455,7 +455,7 @@ def __init__(self, stack_info: StackInfo): "id": "AwsSolutions-IAM5", "reason": "It's necessary to have wildcard permissions to make orchestrator workflow to StartExecution for the nested workflows", "appliesTo": [ - f"Resource::arn:aws:states:::execution:<{cloudwatch_dashboard_update_logical_id}.Name>/*" + f"Resource::arn::states:::execution:<{cloudwatch_dashboard_update_logical_id}.Name>/*" ], }, ], diff --git a/source/solution/infrastructure/workflows/cloudwatch_dashboard_update.py b/source/solution/infrastructure/workflows/cloudwatch_dashboard_update.py index 0c05a5b..a130309 100644 --- a/source/solution/infrastructure/workflows/cloudwatch_dashboard_update.py +++ b/source/solution/infrastructure/workflows/cloudwatch_dashboard_update.py @@ -163,5 +163,7 @@ def put_metrics_to_cloudwatch(self) -> tasks.CallAwsService: service="cloudwatch", action="putMetricData", parameters={"MetricData": metric_data, "Namespace": METRICS_NAMESPACE}, - iam_resources=["arn:aws:states:::aws-sdk:cloudwatch:putMetricData"], + iam_resources=[ + f"arn:{Aws.PARTITION}:states:::aws-sdk:cloudwatch:putMetricData" + ], ) diff --git a/source/solution/infrastructure/workflows/extend_download_window.py b/source/solution/infrastructure/workflows/extend_download_window.py index 5abfa69..40cde8d 100644 --- a/source/solution/infrastructure/workflows/extend_download_window.py +++ b/source/solution/infrastructure/workflows/extend_download_window.py @@ -142,7 +142,7 @@ def __init__(self, stack_info: StackInfo): "glacier:InitiateJob", ], resources=[ - f"arn:aws:glacier:{Aws.REGION}:{Aws.ACCOUNT_ID}:vaults/*" + f"arn:{Aws.PARTITION}:glacier:{Aws.REGION}:{Aws.ACCOUNT_ID}:vaults/*" ], ), ], @@ -165,7 +165,7 @@ def __init__(self, stack_info: StackInfo): stack_info.default_retry.apply_to_steps([extend_download_window_initiate_job]) item_reader_config = ItemReaderConfig( - item_reader_resource="arn:aws:states:::s3:getObject", + item_reader_resource=f"arn:aws:states:::s3:getObject", reader_config={"InputType": "JSON"}, item_reader_parameters={ "Bucket": stack_info.buckets.inventory_bucket.bucket_name, @@ -175,7 +175,7 @@ def __init__(self, stack_info: StackInfo): result_config = ResultConfig( result_writer={ - "Resource": "arn:aws:states:::s3:putObject", + "Resource": f"arn:aws:states:::s3:putObject", "Parameters": { "Bucket": stack_info.buckets.inventory_bucket.bucket_name, "Prefix.$": f"States.Format('{{}}/ExtendDownloadWindowDistributedMapOutput', $.workflow_run)", @@ -244,7 +244,7 @@ def __init__(self, stack_info: StackInfo): effect=iam.Effect.ALLOW, actions=["states:DescribeExecution", "states:StopExecution"], resources=[ - f"arn:aws:states:{Aws.REGION}:{Aws.ACCOUNT_ID}:execution:{stack_info.state_machines.extend_download_window_state_machine.state_machine_name}/*" + f"arn:{Aws.PARTITION}:states:{Aws.REGION}:{Aws.ACCOUNT_ID}:execution:{stack_info.state_machines.extend_download_window_state_machine.state_machine_name}/*" ], ), ], @@ -277,7 +277,7 @@ def __init__(self, stack_info: StackInfo): "id": "AwsSolutions-IAM5", "reason": "It's necessary to have wildcard permissions for archive retrieval initiate job, since the vault name is an input that is not known in advance", "appliesTo": [ - "Resource::arn:aws:glacier:::vaults/*" + "Resource::arn::glacier:::vaults/*" ], }, ], @@ -341,7 +341,7 @@ def __init__(self, stack_info: StackInfo): "id": "AwsSolutions-IAM5", "reason": "IAM policy needed to run a Distributed Map state. https://docs.aws.amazon.com/step-functions/latest/dg/iam-policies-eg-dist-map.html", "appliesTo": [ - f"Resource::arn:aws:states:::execution:<{extend_download_window_state_machine_logical_id}.Name>/*" + f"Resource::arn::states:::execution:<{extend_download_window_state_machine_logical_id}.Name>/*" ], }, ], diff --git a/source/solution/infrastructure/workflows/get_inventory.py b/source/solution/infrastructure/workflows/get_inventory.py index cedd8c5..b4f5f67 100644 --- a/source/solution/infrastructure/workflows/get_inventory.py +++ b/source/solution/infrastructure/workflows/get_inventory.py @@ -112,7 +112,7 @@ def __init__(self, stack_info: StackInfo): "glacier:InitiateJob", ], resources=[ - f"arn:aws:glacier:{Aws.REGION}:{Aws.ACCOUNT_ID}:vaults/*" + f"arn:{Aws.PARTITION}:glacier:{Aws.REGION}:{Aws.ACCOUNT_ID}:vaults/*" ], ), ], @@ -172,7 +172,7 @@ def __init__(self, stack_info: StackInfo): "logs:CreateLogStream", "logs:PutLogEvents", ], - resources=["arn:aws:logs:*:*:*:/aws-glue/*"], + resources=[f"arn:{Aws.PARTITION}:logs:*:*:*:/aws-glue/*"], ), ], ) @@ -211,7 +211,7 @@ def __init__(self, stack_info: StackInfo): effect=iam.Effect.ALLOW, actions=["glue:UpdateJob", "glue:StartJobRun"], resources=[ - f"arn:aws:glue:{Aws.REGION}:{Aws.ACCOUNT_ID}:job/{glue_job.job_name}" + f"arn:{Aws.PARTITION}:glue:{Aws.REGION}:{Aws.ACCOUNT_ID}:job/{glue_job.job_name}" ], ), ], @@ -234,7 +234,7 @@ def __init__(self, stack_info: StackInfo): }, }, "ResultPath": "$.async_ddb_put_result", - "Resource": "arn:aws:states:::aws-sdk:dynamodb:putItem.waitForTaskToken", + "Resource": f"arn:{Aws.PARTITION}:states:::aws-sdk:dynamodb:putItem.waitForTaskToken", "TimeoutSeconds": 6 * 60 * 60, # 6 Hours "Retry": stack_info.default_retry.custom_state_params(), } @@ -366,7 +366,7 @@ def __init__(self, stack_info: StackInfo): effect=iam.Effect.ALLOW, actions=["glacier:GetJobOutput"], resources=[ - f"arn:aws:glacier:{Aws.REGION}:{Aws.ACCOUNT_ID}:vaults/*", + f"arn:{Aws.PARTITION}:glacier:{Aws.REGION}:{Aws.ACCOUNT_ID}:vaults/*", ], ), ], @@ -528,7 +528,7 @@ def __init__(self, stack_info: StackInfo): "ACCOUNT_ID": Aws.ACCOUNT_ID, "REGION": Aws.REGION, "VERSION": stack_info.scope.node.try_get_context("SOLUTION_VERSION") - or "v1.1.0", + or "v1.1.1", "SOLUTION_ID": stack_info.scope.node.try_get_context("SOLUTION_ID") or "SO0293", "SEND_ANONYMIZED_STATISTICS": "Yes", @@ -697,7 +697,7 @@ def __init__(self, stack_info: StackInfo): effect=iam.Effect.ALLOW, actions=["states:DescribeExecution", "states:StopExecution"], resources=[ - f"arn:aws:states:{Aws.REGION}:{Aws.ACCOUNT_ID}:execution:{stack_info.state_machines.inventory_retrieval_state_machine.state_machine_name}/*" + f"arn:{Aws.PARTITION}:states:{Aws.REGION}:{Aws.ACCOUNT_ID}:execution:{stack_info.state_machines.inventory_retrieval_state_machine.state_machine_name}/*" ], ), ], @@ -738,7 +738,7 @@ def __init__(self, stack_info: StackInfo): "id": "AwsSolutions-IAM5", "reason": "It's necessary to have wildcard permissions for inventory retrieval initiate job, since the vault name is an input that is not known in advance", "appliesTo": [ - "Resource::arn:aws:glacier:::vaults/*" + "Resource::arn::glacier:::vaults/*" ], }, ], @@ -800,7 +800,7 @@ def __init__(self, stack_info: StackInfo): "id": "AwsSolutions-IAM5", "reason": "It's necessary to have wildcard permissions for inventory retrieval initiate job, since the vault name is an input that is not known in advance", "appliesTo": [ - "Resource::arn:aws:glacier:::vaults/*", + "Resource::arn::glacier:::vaults/*", ], } ], @@ -1001,7 +1001,7 @@ def __init__(self, stack_info: StackInfo): "id": "AwsSolutions-IAM5", "reason": "IAM policy needed to run a Distributed Map state. https://docs.aws.amazon.com/step-functions/latest/dg/iam-policies-eg-dist-map.html", "appliesTo": [ - f"Resource::arn:aws:states:::execution:<{inventory_retrieval_state_machine_logical_id}.Name>/*" + f"Resource::arn::states:::execution:<{inventory_retrieval_state_machine_logical_id}.Name>/*" ], } ], @@ -1075,7 +1075,9 @@ def __init__(self, stack_info: StackInfo): { "id": "AwsSolutions-IAM5", "reason": "IAM policy needed to allow Glue jobs to write logs to Amazon CloudWatch", - "appliesTo": ["Resource::arn:aws:logs:*:*:*:/aws-glue/*"], + "appliesTo": [ + "Resource::arn::logs:*:*:*:/aws-glue/*" + ], } ], ) diff --git a/source/solution/infrastructure/workflows/initiate_retrieval.py b/source/solution/infrastructure/workflows/initiate_retrieval.py index 29f4707..60a249a 100644 --- a/source/solution/infrastructure/workflows/initiate_retrieval.py +++ b/source/solution/infrastructure/workflows/initiate_retrieval.py @@ -82,7 +82,7 @@ def __init__(self, stack_info: StackInfo): "glacier:InitiateJob", ], resources=[ - f"arn:aws:glacier:{Aws.REGION}:{Aws.ACCOUNT_ID}:vaults/*" + f"arn:{Aws.PARTITION}:glacier:{Aws.REGION}:{Aws.ACCOUNT_ID}:vaults/*" ], ), ], @@ -328,7 +328,7 @@ def __init__(self, stack_info: StackInfo): "id": "AwsSolutions-IAM5", "reason": "It's necessary to have wildcard permissions for archive retrieval initiate job, since the vault name is an input that is not known in advance", "appliesTo": [ - "Resource::arn:aws:glacier:::vaults/*" + "Resource::arn::glacier:::vaults/*" ], }, ], diff --git a/source/solution/infrastructure/workflows/orchestrator.py b/source/solution/infrastructure/workflows/orchestrator.py index 3c8ea5c..bb80883 100644 --- a/source/solution/infrastructure/workflows/orchestrator.py +++ b/source/solution/infrastructure/workflows/orchestrator.py @@ -58,7 +58,7 @@ def __init__(self, stack_info: StackInfo): cleanup_workflow_state_json = { "Type": "Task", - "Resource": "arn:aws:states:::states:startExecution.sync:2", + "Resource": f"arn:{Aws.PARTITION}:states:::states:startExecution.sync:2", "Parameters": { "Input.$": "$$.Execution.Input", "StateMachineArn": stack_info.state_machines.cleanup_state_machine.state_machine_arn, @@ -97,13 +97,25 @@ def __init__(self, stack_info: StackInfo): "retrieval_tier": tasks.DynamoAttributeValue.from_string( sfn.JsonPath.string_at("$.tier") ), + "provided_inventory": tasks.DynamoAttributeValue.from_string( + sfn.JsonPath.string_at("$.provided_inventory") + ), + "transfer_type": tasks.DynamoAttributeValue.from_string( + sfn.JsonPath.string_at("$.migration_type") + ), + "naming_override_file": tasks.DynamoAttributeValue.from_string( + sfn.JsonPath.string_at("$.name_override_presigned_url") + ), + "cross_region_transfer": tasks.DynamoAttributeValue.from_string( + sfn.JsonPath.string_at("$.cross_region_transfer") + ), }, result_path="$.put_workflow_input_into_ddb", ) status_cleanup_workflow_state_json = { "Type": "Task", - "Resource": "arn:aws:states:::states:startExecution.sync:2", + "Resource": f"arn:{Aws.PARTITION}:states:::states:startExecution.sync:2", "Parameters": { "Input.$": "$$.Execution.Input", "StateMachineArn": stack_info.state_machines.cleanup_archives_status_state_machine.state_machine_arn, @@ -121,7 +133,7 @@ def __init__(self, stack_info: StackInfo): inventory_retrieval_workflow_state_json = { "Type": "Task", - "Resource": "arn:aws:states:::states:startExecution.sync:2", + "Resource": f"arn:{Aws.PARTITION}:states:::states:startExecution.sync:2", "Parameters": { "StateMachineArn": stack_info.state_machines.inventory_retrieval_state_machine.state_machine_arn, "Input.$": "$$.Execution.Input", @@ -139,7 +151,7 @@ def __init__(self, stack_info: StackInfo): initiate_retrieval_workflow_state_json = { "Type": "Task", - "Resource": "arn:aws:states:::states:startExecution.sync:2", + "Resource": f"arn:{Aws.PARTITION}:states:::states:startExecution.sync:2", "Parameters": { "Input.$": "$$.Execution.Input", "StateMachineArn": stack_info.state_machines.initiate_retrieval_state_machine.state_machine_arn, @@ -172,7 +184,7 @@ def __init__(self, stack_info: StackInfo): }, }, "ResultPath": "$.async_ddb_put_result", - "Resource": "arn:aws:states:::aws-sdk:dynamodb:putItem.waitForTaskToken", + "Resource": f"arn:{Aws.PARTITION}:states:::aws-sdk:dynamodb:putItem.waitForTaskToken", "TimeoutSeconds": 48 * 60 * 60, "Retry": [ { @@ -227,7 +239,7 @@ def __init__(self, stack_info: StackInfo): } ], }, - "Resource": "arn:aws:states:::aws-sdk:eventbridge:putTargets", + "Resource": f"arn:{Aws.PARTITION}:states:::aws-sdk:eventbridge:putTargets", "ResultPath": "$.put_target_result", "Catch": cleanup_catch.custom_state_params(), } @@ -251,7 +263,7 @@ def __init__(self, stack_info: StackInfo): } ], }, - "Resource": "arn:aws:states:::aws-sdk:eventbridge:putTargets", + "Resource": f"arn:{Aws.PARTITION}:states:::aws-sdk:eventbridge:putTargets", "ResultPath": "$.post_metrics_sfn_target_result", "Catch": cleanup_catch.custom_state_params(), } @@ -274,7 +286,7 @@ def __init__(self, stack_info: StackInfo): } ], }, - "Resource": "arn:aws:states:::aws-sdk:eventbridge:putTargets", + "Resource": f"arn:{Aws.PARTITION}:states:::aws-sdk:eventbridge:putTargets", "ResultPath": "$.put_completion_checker_target_result", "Catch": cleanup_catch.custom_state_params(), } @@ -303,7 +315,7 @@ def __init__(self, stack_info: StackInfo): "events:DescribeRule", ], resources=[ - f"arn:aws:events:{Aws.REGION}:{Aws.ACCOUNT_ID}:rule/StepFunctionsGetEventsForStepFunctionsExecutionRule", + f"arn:{Aws.PARTITION}:events:{Aws.REGION}:{Aws.ACCOUNT_ID}:rule/StepFunctionsGetEventsForStepFunctionsExecutionRule", stack_info.eventbridge_rules.extend_download_window_trigger.rule_arn, stack_info.eventbridge_rules.cloudwatch_dashboard_update_trigger.rule_arn, stack_info.eventbridge_rules.completion_checker_trigger.rule_arn, @@ -375,7 +387,7 @@ def __init__(self, stack_info: StackInfo): effect=iam.Effect.ALLOW, actions=["states:DescribeExecution", "states:StopExecution"], resources=[ - f"arn:aws:states:{Aws.REGION}:{Aws.ACCOUNT_ID}:execution:{state_machine.state_machine_name}/*" + f"arn:{Aws.PARTITION}:states:{Aws.REGION}:{Aws.ACCOUNT_ID}:execution:{state_machine.state_machine_name}/*" for state_machine in state_machines_list ], ), @@ -407,7 +419,7 @@ def __init__(self, stack_info: StackInfo): "id": "AwsSolutions-IAM5", "reason": "It's necessary to have wildcard permissions to make orchestrator workflow to StartExecution for the nested workflows", "appliesTo": [ - f"Resource::arn:aws:states:::execution:<{state_machine_logical_id}.Name>/*" + f"Resource::arn::states:::execution:<{state_machine_logical_id}.Name>/*" for state_machine_logical_id in state_machines_logical_ids_list ], }, diff --git a/source/solution/infrastructure/workflows/retrieve_archive.py b/source/solution/infrastructure/workflows/retrieve_archive.py index f99810b..ed59234 100644 --- a/source/solution/infrastructure/workflows/retrieve_archive.py +++ b/source/solution/infrastructure/workflows/retrieve_archive.py @@ -232,7 +232,7 @@ def __init__(self, stack_info: StackInfo): [ { "id": "AwsSolutions-L1", - "reason": "Python 3.11 is the latest version, but there is a bug in cdk-nag due to sorting", + "reason": "Python 3.12 is the latest version, but there is a bug in cdk-nag due to sorting", } ], ) diff --git a/source/solution/mocking/mock_glacier_stack.py b/source/solution/mocking/mock_glacier_stack.py index d321b31..50b876b 100644 --- a/source/solution/mocking/mock_glacier_stack.py +++ b/source/solution/mocking/mock_glacier_stack.py @@ -18,7 +18,7 @@ def __init__(self, scope: Construct, construct_id: str) -> None: self, "MockNotifySns", handler="solution.application.mocking.handlers.mock_notify_sns_handler", - runtime=lambda_.Runtime.PYTHON_3_10, + runtime=lambda_.Runtime.PYTHON_3_12, code=lambda_.Code.from_asset("source", exclude=["tests/**"]), description="Lambda to mock notifying SNS job completion.", timeout=Duration.seconds(20), diff --git a/source/solution/pipeline/stack.py b/source/solution/pipeline/stack.py index 03b6366..b6c18ec 100644 --- a/source/solution/pipeline/stack.py +++ b/source/solution/pipeline/stack.py @@ -185,8 +185,8 @@ def get_integration_test_step(self) -> pipelines.CodeBuildStep: effect=iam.Effect.ALLOW, actions=["cloudformation:DescribeStacks"], resources=[ - f"arn:aws:cloudformation:{Aws.REGION}:{Aws.ACCOUNT_ID}:stack/{iam_stack_name}*", - f"arn:aws:cloudformation:{Aws.REGION}:{Aws.ACCOUNT_ID}:stack/{mock_sns_stack_name}*", + f"arn:{Aws.PARTITION}:cloudformation:{Aws.REGION}:{Aws.ACCOUNT_ID}:stack/{iam_stack_name}*", + f"arn:{Aws.PARTITION}:cloudformation:{Aws.REGION}:{Aws.ACCOUNT_ID}:stack/{mock_sns_stack_name}*", ], ), iam.PolicyStatement( @@ -201,7 +201,7 @@ def get_integration_test_step(self) -> pipelines.CodeBuildStep: "dynamodb:UpdateItem", ], resources=[ - f"arn:aws:dynamodb:{Aws.REGION}:{Aws.ACCOUNT_ID}:table/{iam_stack_name}*" + f"arn:{Aws.PARTITION}:dynamodb:{Aws.REGION}:{Aws.ACCOUNT_ID}:table/{iam_stack_name}*" ], ), iam.PolicyStatement( @@ -213,7 +213,7 @@ def get_integration_test_step(self) -> pipelines.CodeBuildStep: "sns:RemovePermission", ], resources=[ - f"arn:aws:sns:{Aws.REGION}:{Aws.ACCOUNT_ID}:{iam_stack_name}*" + f"arn:{Aws.PARTITION}:sns:{Aws.REGION}:{Aws.ACCOUNT_ID}:{iam_stack_name}*" ], ), iam.PolicyStatement( @@ -225,15 +225,15 @@ def get_integration_test_step(self) -> pipelines.CodeBuildStep: "s3:List*", ], resources=[ - f"arn:aws:s3:::{self.output_bucket_name}*", - "arn:aws:s3:::*-inventorybucket*", + f"arn:{Aws.PARTITION}:s3:::{self.output_bucket_name}*", + f"arn:{Aws.PARTITION}:s3:::*-inventorybucket*", ], ), iam.PolicyStatement( effect=iam.Effect.ALLOW, actions=["events:ListTargetsByRule", "events:removeTargets"], resources=[ - f"arn:aws:events:{Aws.REGION}:{Aws.ACCOUNT_ID}:rule/*", + f"arn:{Aws.PARTITION}:events:{Aws.REGION}:{Aws.ACCOUNT_ID}:rule/*", ], ), iam.PolicyStatement( @@ -246,29 +246,26 @@ def get_integration_test_step(self) -> pipelines.CodeBuildStep: "states:StopExecution", ], resources=[ - f"arn:aws:states:{Aws.REGION}:{Aws.ACCOUNT_ID}:stateMachine:InventoryRetrievalStateMachine*", - f"arn:aws:states:{Aws.REGION}:{Aws.ACCOUNT_ID}:execution:InventoryRetrievalStateMachine*", - f"arn:aws:states:{Aws.REGION}:{Aws.ACCOUNT_ID}:stateMachine:InitiateRetrievalStateMachine*", - f"arn:aws:states:{Aws.REGION}:{Aws.ACCOUNT_ID}:execution:InitiateRetrievalStateMachine*", - f"arn:aws:states:{Aws.REGION}:{Aws.ACCOUNT_ID}:stateMachine:InitiateMetadataStateMachine*", - f"arn:aws:states:{Aws.REGION}:{Aws.ACCOUNT_ID}:execution:InitiateMetadataStateMachine*", - f"arn:aws:states:{Aws.REGION}:{Aws.ACCOUNT_ID}:mapRun:InitiateMetadataStateMachine*", - f"arn:aws:states:{Aws.REGION}:{Aws.ACCOUNT_ID}:stateMachine:RetrieveArchiveStateMachine*", - f"arn:aws:states:{Aws.REGION}:{Aws.ACCOUNT_ID}:execution:RetrieveArchiveStateMachine*", - f"arn:aws:states:{Aws.REGION}:{Aws.ACCOUNT_ID}:stateMachine:OrchestratorStateMachine*", - f"arn:aws:states:{Aws.REGION}:{Aws.ACCOUNT_ID}:execution:OrchestratorStateMachine*", - f"arn:aws:states:{Aws.REGION}:{Aws.ACCOUNT_ID}:mapRun:RetrieveArchiveStateMachine*", - f"arn:aws:states:{Aws.REGION}:{Aws.ACCOUNT_ID}:mapRun:InitiateRetrievalStateMachine*", - f"arn:aws:states:{Aws.REGION}:{Aws.ACCOUNT_ID}:stateMachine:ExtendDownloadWindowStateMachine*", - f"arn:aws:states:{Aws.REGION}:{Aws.ACCOUNT_ID}:execution:ExtendDownloadWindowStateMachine*", - f"arn:aws:states:{Aws.REGION}:{Aws.ACCOUNT_ID}:mapRun:ExtendDownloadWindowStateMachine*", - f"arn:aws:states:{Aws.REGION}:{Aws.ACCOUNT_ID}:stateMachine:CloudWatchDashboardUpdateStateMachine*", - f"arn:aws:states:{Aws.REGION}:{Aws.ACCOUNT_ID}:execution:CloudWatchDashboardUpdateStateMachine*", - f"arn:aws:states:{Aws.REGION}:{Aws.ACCOUNT_ID}:stateMachine:CleanupStateMachine*", - f"arn:aws:states:{Aws.REGION}:{Aws.ACCOUNT_ID}:execution:CleanupStateMachine*", - f"arn:aws:states:{Aws.REGION}:{Aws.ACCOUNT_ID}:stateMachine:ArchivesStatusCleanupStateMachine*", - f"arn:aws:states:{Aws.REGION}:{Aws.ACCOUNT_ID}:execution:ArchivesStatusCleanupStateMachine*", - f"arn:aws:states:{Aws.REGION}:{Aws.ACCOUNT_ID}:mapRun:ArchivesStatusCleanupStateMachine*", + f"arn:{Aws.PARTITION}:states:{Aws.REGION}:{Aws.ACCOUNT_ID}:stateMachine:InventoryRetrievalStateMachine*", + f"arn:{Aws.PARTITION}:states:{Aws.REGION}:{Aws.ACCOUNT_ID}:execution:InventoryRetrievalStateMachine*", + f"arn:{Aws.PARTITION}:states:{Aws.REGION}:{Aws.ACCOUNT_ID}:stateMachine:InitiateRetrievalStateMachine*", + f"arn:{Aws.PARTITION}:states:{Aws.REGION}:{Aws.ACCOUNT_ID}:execution:InitiateRetrievalStateMachine*", + f"arn:{Aws.PARTITION}:states:{Aws.REGION}:{Aws.ACCOUNT_ID}:stateMachine:RetrieveArchiveStateMachine*", + f"arn:{Aws.PARTITION}:states:{Aws.REGION}:{Aws.ACCOUNT_ID}:execution:RetrieveArchiveStateMachine*", + f"arn:{Aws.PARTITION}:states:{Aws.REGION}:{Aws.ACCOUNT_ID}:stateMachine:OrchestratorStateMachine*", + f"arn:{Aws.PARTITION}:states:{Aws.REGION}:{Aws.ACCOUNT_ID}:execution:OrchestratorStateMachine*", + f"arn:{Aws.PARTITION}:states:{Aws.REGION}:{Aws.ACCOUNT_ID}:mapRun:RetrieveArchiveStateMachine*", + f"arn:{Aws.PARTITION}:states:{Aws.REGION}:{Aws.ACCOUNT_ID}:mapRun:InitiateRetrievalStateMachine*", + f"arn:{Aws.PARTITION}:states:{Aws.REGION}:{Aws.ACCOUNT_ID}:stateMachine:ExtendDownloadWindowStateMachine*", + f"arn:{Aws.PARTITION}:states:{Aws.REGION}:{Aws.ACCOUNT_ID}:execution:ExtendDownloadWindowStateMachine*", + f"arn:{Aws.PARTITION}:states:{Aws.REGION}:{Aws.ACCOUNT_ID}:mapRun:ExtendDownloadWindowStateMachine*", + f"arn:{Aws.PARTITION}:states:{Aws.REGION}:{Aws.ACCOUNT_ID}:stateMachine:CloudWatchDashboardUpdateStateMachine*", + f"arn:{Aws.PARTITION}:states:{Aws.REGION}:{Aws.ACCOUNT_ID}:execution:CloudWatchDashboardUpdateStateMachine*", + f"arn:{Aws.PARTITION}:states:{Aws.REGION}:{Aws.ACCOUNT_ID}:stateMachine:CleanupStateMachine*", + f"arn:{Aws.PARTITION}:states:{Aws.REGION}:{Aws.ACCOUNT_ID}:execution:CleanupStateMachine*", + f"arn:{Aws.PARTITION}:states:{Aws.REGION}:{Aws.ACCOUNT_ID}:stateMachine:ArchivesStatusCleanupStateMachine*", + f"arn:{Aws.PARTITION}:states:{Aws.REGION}:{Aws.ACCOUNT_ID}:execution:ArchivesStatusCleanupStateMachine*", + f"arn:{Aws.PARTITION}:states:{Aws.REGION}:{Aws.ACCOUNT_ID}:mapRun:ArchivesStatusCleanupStateMachine*", ], ), iam.PolicyStatement( @@ -280,7 +277,7 @@ def get_integration_test_step(self) -> pipelines.CodeBuildStep: "lambda:GetFunctionConfiguration", ], resources=[ - f"arn:aws:lambda:{Aws.REGION}:{Aws.ACCOUNT_ID}:function:*" + f"arn:{Aws.PARTITION}:lambda:{Aws.REGION}:{Aws.ACCOUNT_ID}:function:*" ], ), iam.PolicyStatement( @@ -290,8 +287,8 @@ def get_integration_test_step(self) -> pipelines.CodeBuildStep: "iam:DeleteRolePolicy", ], resources=[ - f"arn:aws:iam::{Aws.ACCOUNT_ID}:role/{mock_sns_stack_name}*", - f"arn:aws:iam::{Aws.ACCOUNT_ID}:role/{iam_stack_name}*", + f"arn:{Aws.PARTITION}:iam::{Aws.ACCOUNT_ID}:role/{mock_sns_stack_name}*", + f"arn:{Aws.PARTITION}:iam::{Aws.ACCOUNT_ID}:role/{iam_stack_name}*", ], ), iam.PolicyStatement( @@ -300,7 +297,7 @@ def get_integration_test_step(self) -> pipelines.CodeBuildStep: "ssm:StartAutomationExecution", ], resources=[ - f"arn:aws:ssm:{Aws.REGION}:{Aws.ACCOUNT_ID}:automation-definition/*" + f"arn:{Aws.PARTITION}:ssm:{Aws.REGION}:{Aws.ACCOUNT_ID}:automation-definition/*" ], ), iam.PolicyStatement( @@ -309,7 +306,7 @@ def get_integration_test_step(self) -> pipelines.CodeBuildStep: "ssm:GetAutomationExecution", ], resources=[ - f"arn:aws:ssm:{Aws.REGION}:{Aws.ACCOUNT_ID}:automation-execution/*" + f"arn:{Aws.PARTITION}:ssm:{Aws.REGION}:{Aws.ACCOUNT_ID}:automation-execution/*" ], ), iam.PolicyStatement( @@ -322,7 +319,7 @@ def get_integration_test_step(self) -> pipelines.CodeBuildStep: "logs:GetQueryResults", ], resources=[ - f"arn:aws:logs:{Aws.REGION}:{Aws.ACCOUNT_ID}:log-group:*", + f"arn:{Aws.PARTITION}:logs:{Aws.REGION}:{Aws.ACCOUNT_ID}:log-group:*", ], ), ], diff --git a/source/tests/integration/infrastructure/test_inventory_retrieval_step_function.py b/source/tests/integration/infrastructure/test_inventory_retrieval_step_function.py index 7c792f1..149b8d7 100644 --- a/source/tests/integration/infrastructure/test_inventory_retrieval_step_function.py +++ b/source/tests/integration/infrastructure/test_inventory_retrieval_step_function.py @@ -32,8 +32,10 @@ S3Client = object VAULT_NAME = "test_vault_chunk_generation_vault" +EMPTY_VAULT_NAME = "test_empty_vault" WORKFLOW_RUN = "workflow_run_inventory_retrieval" WORKFLOW_RUN_RESUME = "workflow_run_inventory_retrieval_resume" +WORKFLOW_EMPTY_VAULT = "workflow_empty_vault" @pytest.fixture(autouse=True, scope="module") @@ -48,6 +50,7 @@ def set_up() -> Any: ddb_util.delete_all_table_items(os.environ[OutputKeys.METRIC_TABLE_NAME], "pk") s3_util.delete_all_inventory_files_from_s3(prefix=WORKFLOW_RUN) s3_util.delete_all_inventory_files_from_s3(prefix=WORKFLOW_RUN_RESUME) + s3_util.delete_all_inventory_files_from_s3(prefix=WORKFLOW_EMPTY_VAULT) @pytest.fixture(scope="module") @@ -364,3 +367,27 @@ def test_state_machine_resuming_workflow( assert metric_item["Item"]["size_requested"]["N"] == "1000" assert metric_item["Item"]["count_staged"]["N"] == "50" assert metric_item["Item"]["size_staged"]["N"] == "1000" + + +def test_empty_vault( + default_input: str, + sfn_client: SFNClient, + ddb_client: DynamoDBClient, + s3_client: S3Client, +) -> None: + input_json = json.loads(default_input) + input_json["vault_name"] = EMPTY_VAULT_NAME + input_json["workflow_run"] = WORKFLOW_EMPTY_VAULT + input = json.dumps(input_json) + response = sfn_client.start_execution( + stateMachineArn=os.environ[OutputKeys.INVENTORY_RETRIEVAL_STATE_MACHINE_ARN], + input=input, + ) + sfn_util.wait_till_state_machine_finish(response["executionArn"], timeout=420) + metric_item = ddb_client.get_item( + TableName=os.environ[OutputKeys.METRIC_TABLE_NAME], + Key={"pk": {"S": WORKFLOW_EMPTY_VAULT}}, + ) + + assert metric_item["Item"]["count_requested"]["N"] == "0" + assert metric_item["Item"]["size_requested"]["N"] == "0" diff --git a/source/tests/integration/infrastructure/test_orchestrator_step_function.py b/source/tests/integration/infrastructure/test_orchestrator_step_function.py index da348b0..be7ba6b 100644 --- a/source/tests/integration/infrastructure/test_orchestrator_step_function.py +++ b/source/tests/integration/infrastructure/test_orchestrator_step_function.py @@ -36,10 +36,11 @@ def default_input() -> str: workflow_run=WORKFLOW_RUN, chunk_size=2**20, archive_chunk_size=2**20, - name_override_presigned_url=None, + name_override_presigned_url="", migration_type="LAUNCH", tier="Bulk", s3_storage_class="STANDARD", + cross_region_transfer="True", ) ) diff --git a/source/tests/unit/application/archive_retrieval/test_initiator.py b/source/tests/unit/application/archive_retrieval/test_initiator.py index e6e4290..38e8c61 100644 --- a/source/tests/unit/application/archive_retrieval/test_initiator.py +++ b/source/tests/unit/application/archive_retrieval/test_initiator.py @@ -4,16 +4,18 @@ """ import os from typing import TYPE_CHECKING, Any, Callable, Iterator, List, Tuple -from unittest.mock import patch +from unittest.mock import MagicMock, patch import pytest from botocore.config import Config +from botocore.exceptions import ClientError from mypy_boto3_dynamodb import DynamoDBClient from mypy_boto3_glacier.client import GlacierClient from solution.application import __boto_config__ from solution.application.archive_retrieval.initiator import ( extend_retrieval, + glacier_initiate_job, initiate_request, initiate_retrieval, ) @@ -187,6 +189,36 @@ def test_initiate_request_already_downloaded( glacier_client.describe_job(vaultName=VAULT_NAME, jobId=job_id) +def test_glacier_initiate_job(glacier_client: GlacierClient) -> None: + glacier_client.create_vault(vaultName=VAULT_NAME) + job_id = glacier_initiate_job( + glacier_client, + VAULT_NAME, + "test_sns_topic", + "test_archive_id", + "test_tier", + "test_account_id", + ) + assert job_id is not None + + +@patch("boto3.client") +def test_glacier_initiate_job_failed(boto3_client_mock: MagicMock) -> None: + boto3_client_mock.initiate_job.side_effect = ClientError( + error_response={"Error": {"Code": "ResourceNotFoundException"}}, + operation_name="InitiateJob", + ) + job_id = glacier_initiate_job( + boto3_client_mock, + VAULT_NAME, + "test_sns_topic", + "test_archive_id", + "test_tier", + "test_account_id", + ) + assert job_id == None + + def generate_initiate_archive_retrieval_items( count: int, ) -> List[events.InitiateArchiveRetrievalItem]: diff --git a/source/tests/unit/application/operational_metrics/test_anonymized_stats.py b/source/tests/unit/application/operational_metrics/test_anonymized_stats.py index 9b23d55..53b64d9 100644 --- a/source/tests/unit/application/operational_metrics/test_anonymized_stats.py +++ b/source/tests/unit/application/operational_metrics/test_anonymized_stats.py @@ -42,6 +42,10 @@ def mock_anonymized_data() -> Dict[str, Any]: "StorageClass": "Glacier", "RetrievalTier": "Bulk", "DailyQuota": 10000, + "ProvidedInventory": "NO", + "TransferType": "LAUNCH", + "CrossRegionTransfer": "False", + "NamingOverrideFile": "NO", "VaultSize": 100, "ArchiveCount": 10, }, @@ -117,6 +121,12 @@ def test_send_job_stats( daily_quota=anonymized_submission_data["Data"]["DailyQuota"], storage_class=anonymized_submission_data["Data"]["StorageClass"], retrieval_tier=anonymized_submission_data["Data"]["RetrievalTier"], + provided_inventory=anonymized_submission_data["Data"]["ProvidedInventory"], + transfer_type=anonymized_submission_data["Data"]["TransferType"], + naming_override_file="", + cross_region_transfer=anonymized_submission_data["Data"][ + "CrossRegionTransfer" + ], ).marshal(), ) diff --git a/source/tests/unit/conftest.py b/source/tests/unit/conftest.py index 449676a..c8af842 100644 --- a/source/tests/unit/conftest.py +++ b/source/tests/unit/conftest.py @@ -49,7 +49,7 @@ def aws_credentials() -> None: @pytest.fixture(scope="module") def solution_user_agent() -> str: - return "AwsSolution/SO0293/v1.1.0" + return "AwsSolution/SO0293/v1.1.1" @pytest.fixture(scope="module") diff --git a/source/tests/unit/infrastructure/glue_helper/test_glue_sfn_update.py b/source/tests/unit/infrastructure/glue_helper/test_glue_sfn_update.py index ec90139..98c71e2 100644 --- a/source/tests/unit/infrastructure/glue_helper/test_glue_sfn_update.py +++ b/source/tests/unit/infrastructure/glue_helper/test_glue_sfn_update.py @@ -136,70 +136,48 @@ def test_autogenerate_etl_script(glue_sfn_update: GlueSfnUpdate) -> None: }, "node-5": { "CustomCode": { - "Name": "ArchiveNaming", + "Name": "ValidateInput<=5TB", "Inputs": ["node-3"], - "ClassName": "ArchiveNaming", - "Code": glue_sfn_update.custom_code, + "ClassName": "validate_source_table", + "Code": glue_sfn_update.validate_source_input_custom_code, } }, "node-6": { "CustomCode": { - "Name": "validate_source_input", + "Name": "ValidateInput>5TB", "Inputs": ["node-4"], "ClassName": "validate_source_table", "Code": glue_sfn_update.validate_source_input_custom_code, } }, "node-7": { - "S3CsvSource": { - "Name": "S3 bucket - Naming Overrides", - "Paths.$": "States.Array(States.Format('s3://test-bucket/{}/naming_overrides/', $.workflow_run))", - "QuoteChar": "quote", - "Separator": "comma", - "Recurse": True, - "WithHeader": True, - "Escaper": "", - "OutputSchemas": [ - { - "Columns": glue_sfn_update.archive_naming_override_columns, - } - ], - } - }, - "node-8": { "SelectFromCollection": { - "Name": "SelectFromCollection", + "Name": "SelectFromCollection<=5TB", "Inputs": ["node-5"], "Index": 0, } }, - "node-9": { + "node-8": { "SelectFromCollection": { - "Name": "SelectFromCollection", + "Name": "SelectFromCollection>5TB", "Inputs": ["node-6"], "Index": 0, } }, - "node-10": { - "SparkSQL": { - "Name": "SQL sorting", - "Inputs": ["node-7", "node-8"], - "SqlQuery": SORTING_SQL_QUERY, - "SqlAliases": [ - {"From": "node-7", "Alias": "namingOverrides"}, - {"From": "node-8", "Alias": "myDataSource"}, - ], - "OutputSchemas": [ - {"Columns": glue_sfn_update.csv_file_columns_output} - ], + "node-9": { + "CustomCode": { + "Name": "ArchiveNaming", + "Inputs": ["node-7"], + "ClassName": "ArchiveNaming", + "Code": glue_sfn_update.custom_code, } }, - "node-11": { + "node-10": { "SparkSQL": { "Name": "SQL Metric", - "Inputs": ["node-3"], + "Inputs": ["node-7"], "SqlQuery": METRIC_COLLECTION_SQL_QUERY, - "SqlAliases": [{"From": "node-3", "Alias": "myDataSource"}], + "SqlAliases": [{"From": "node-7", "Alias": "myDataSource"}], "OutputSchemas": [ { "Columns": [ @@ -213,22 +191,12 @@ def test_autogenerate_etl_script(glue_sfn_update: GlueSfnUpdate) -> None: ], } }, - "node-12": { - "S3DirectTarget": { - "Name": "S3 bucket - Not Migrated", - "Inputs": ["node-9"], - "Compression": "none", - "Format": "csv", - "SchemaChangePolicy": {"EnableUpdateCatalog": False}, - "Path.$": "States.Format('s3://test-bucket/{}/not_migrated/', $.workflow_run)", - } - }, - "node-13": { + "node-11": { "SparkSQL": { "Name": "SQL Skipped Metric", - "Inputs": ["node-9"], + "Inputs": ["node-8"], "SqlQuery": METRIC_COLLECTION_SQL_QUERY, - "SqlAliases": [{"From": "node-9", "Alias": "myDataSource"}], + "SqlAliases": [{"From": "node-8", "Alias": "myDataSource"}], "OutputSchemas": [ { "Columns": [ @@ -242,31 +210,78 @@ def test_autogenerate_etl_script(glue_sfn_update: GlueSfnUpdate) -> None: ], } }, - "node-14": { + "node-12": { "S3DirectTarget": { - "Name": "S3 bucket - Sorted Inventory", - "Inputs": ["node-10"], - "PartitionKeys.$": "States.Array(States.Array('PartitionId'))", + "Name": "S3 bucket - Not Migrated", + "Inputs": ["node-8"], "Compression": "none", "Format": "csv", "SchemaChangePolicy": {"EnableUpdateCatalog": False}, - "Path.$": "States.Format('s3://test-bucket/{}/sorted_inventory/', $.workflow_run)", + "Path.$": "States.Format('s3://test-bucket/{}/not_migrated/', $.workflow_run)", + } + }, + "node-13": { + "SelectFromCollection": { + "Name": "SelectFromCollection", + "Inputs": ["node-9"], + "Index": 0, + } + }, + "node-14": { + "CustomCode": { + "Name": "MetricCollection", + "Inputs": ["node-10", "node-11"], + "ClassName": "MetricCollection", + "Code": glue_sfn_update.metric_collection_code, } }, "node-15": { + "S3CsvSource": { + "Name": "S3 bucket - Naming Overrides", + "Paths.$": "States.Array(States.Format('s3://test-bucket/{}/naming_overrides/', $.workflow_run))", + "QuoteChar": "quote", + "Separator": "comma", + "Recurse": True, + "WithHeader": True, + "Escaper": "", + "OutputSchemas": [ + { + "Columns": glue_sfn_update.archive_naming_override_columns, + } + ], + }, + }, + "node-16": { + "SparkSQL": { + "Name": "SQL sorting", + "Inputs": ["node-15", "node-13"], + "SqlQuery": SORTING_SQL_QUERY, + "SqlAliases": [ + {"From": "node-15", "Alias": "namingOverrides"}, + {"From": "node-13", "Alias": "myDataSource"}, + ], + "OutputSchemas": [ + {"Columns": glue_sfn_update.csv_file_columns_output} + ], + } + }, + "node-17": { "CustomCode": { "Name": "Validation", - "Inputs": ["node-3", "node-10"], + "Inputs": ["node-7", "node-16"], "ClassName": "Validation", "Code": "\nnode_inputs = list(dfc.values())\nassert node_inputs[0].toDF().count() == node_inputs[1].toDF().count()\n", } }, - "node-16": { - "CustomCode": { - "Name": "MetricCollection", - "Inputs": ["node-11", "node-13"], - "ClassName": "MetricCollection", - "Code": glue_sfn_update.metric_collection_code, + "node-18": { + "S3DirectTarget": { + "Name": "S3 bucket - Sorted Inventory", + "Inputs": ["node-16"], + "PartitionKeys.$": "States.Array(States.Array('PartitionId'))", + "Compression": "none", + "Format": "csv", + "SchemaChangePolicy": {"EnableUpdateCatalog": False}, + "Path.$": "States.Format('s3://test-bucket/{}/sorted_inventory/', $.workflow_run)", } }, }, diff --git a/source/tests/unit/infrastructure/ssm_automation_docs/scripts/test_orchestration_doc_script.py b/source/tests/unit/infrastructure/ssm_automation_docs/scripts/test_orchestration_doc_script.py index 9917b0c..dc8274f 100644 --- a/source/tests/unit/infrastructure/ssm_automation_docs/scripts/test_orchestration_doc_script.py +++ b/source/tests/unit/infrastructure/ssm_automation_docs/scripts/test_orchestration_doc_script.py @@ -44,23 +44,24 @@ def test_script_handler_launch_automation() -> None: "tier": "Bulk", "workflow_run": "workflow_1630251600_123456", "migration_type": "LAUNCH", + "cross_region_transfer": "True", + "allow_cross_region_data_transfer": True, + "acknowledge_cross_region": "YES", "name_override_presigned_url": None, "vault_name": "test_vault", - "acknowledge_cross_region": "YES", "bucket_name": "test_bucket_name", "region": "us-east-1", - "allow_cross_region_data_transfer": True, } script_handler(events, None) # type: ignore - events.pop("acknowledge_cross_region") events.pop("bucket_name") events.pop("region") events.pop("allow_cross_region_data_transfer") + events.pop("acknowledge_cross_region") events["s3_storage_class"] = "GLACIER_IR" - + events["name_override_presigned_url"] = "" # assert that the start_execution method was called with the correct arguments mock_sf.start_execution.assert_called_once_with( stateMachineArn=events.pop("state_machine_arn"), input=json.dumps(events) @@ -119,21 +120,24 @@ def test_script_handler_resume_automation( "tier": "Bulk", "workflow_run": "workflow_1630251600_123456", "migration_type": "RESUME", + "cross_region_transfer": "True", + "allow_cross_region_data_transfer": True, + "acknowledge_cross_region": "YES", "table_name": glacier_retrieval_table_mock[1], "name_override_presigned_url": None, - "acknowledge_cross_region": "YES", "bucket_name": "test_bucket_name", "region": "us-east-1", - "allow_cross_region_data_transfer": True, } script_handler(events, None) # type: ignore + events.pop("table_name") - events.pop("acknowledge_cross_region") events.pop("bucket_name") events.pop("region") events.pop("allow_cross_region_data_transfer") + events.pop("acknowledge_cross_region") events["s3_storage_class"] = "GLACIER_IR" + events["name_override_presigned_url"] = "" events["vault_name"] = "test_vault_name" # assert that the start_execution method was called with the correct arguments mock_sf.start_execution.assert_called_once_with( diff --git a/source/tests/unit/infrastructure/test_stack.py b/source/tests/unit/infrastructure/test_stack.py index f692823..00db5ae 100644 --- a/source/tests/unit/infrastructure/test_stack.py +++ b/source/tests/unit/infrastructure/test_stack.py @@ -231,7 +231,7 @@ def test_chunk_retrieval_lambda_created( props={ "Properties": { "Handler": "solution.application.handlers.archive_retrieval", - "Runtime": "python3.11", + "Runtime": "python3.12", "MemorySize": 1536, "Timeout": 900, }, @@ -257,7 +257,7 @@ def test_archive_validation_lambda_created( props={ "Properties": { "Handler": "solution.application.handlers.archive_validation", - "Runtime": "python3.11", + "Runtime": "python3.12", "MemorySize": 256, "Timeout": 900, }, @@ -283,7 +283,7 @@ def test_inventory_chunk_determination_created( props={ "Properties": { "Handler": "solution.application.handlers.inventory_chunking", - "Runtime": "python3.11", + "Runtime": "python3.12", }, }, ) @@ -314,7 +314,7 @@ def test_facilitator_lambda_created( }, "Handler": "solution.application.handlers.async_facilitator", "MemorySize": 256, - "Runtime": "python3.11", + "Runtime": "python3.12", }, ) @@ -400,19 +400,9 @@ def test_facilitator_default_policy( "Fn::Join": [ "", [ - "arn:aws:states:", - {"Ref": "AWS::Region"}, - ":", - {"Ref": "AWS::AccountId"}, - ":execution:InitiateMetadataStateMachine*", - ], - ] - }, - { - "Fn::Join": [ - "", - [ - "arn:aws:states:", + "arn:", + {"Ref": "AWS::Partition"}, + ":states:", {"Ref": "AWS::Region"}, ":", {"Ref": "AWS::AccountId"}, @@ -424,7 +414,9 @@ def test_facilitator_default_policy( "Fn::Join": [ "", [ - "arn:aws:states:", + "arn:", + {"Ref": "AWS::Partition"}, + ":states:", {"Ref": "AWS::Region"}, ":", {"Ref": "AWS::AccountId"}, @@ -436,7 +428,9 @@ def test_facilitator_default_policy( "Fn::Join": [ "", [ - "arn:aws:states:", + "arn:", + {"Ref": "AWS::Partition"}, + ":states:", {"Ref": "AWS::Region"}, ":", {"Ref": "AWS::AccountId"}, @@ -448,19 +442,9 @@ def test_facilitator_default_policy( "Fn::Join": [ "", [ - "arn:aws:states:", - {"Ref": "AWS::Region"}, - ":", - {"Ref": "AWS::AccountId"}, - ":stateMachine:InitiateMetadataStateMachine*", - ], - ] - }, - { - "Fn::Join": [ - "", - [ - "arn:aws:states:", + "arn:", + {"Ref": "AWS::Partition"}, + ":states:", {"Ref": "AWS::Region"}, ":", {"Ref": "AWS::AccountId"}, @@ -472,7 +456,9 @@ def test_facilitator_default_policy( "Fn::Join": [ "", [ - "arn:aws:states:", + "arn:", + {"Ref": "AWS::Partition"}, + ":states:", {"Ref": "AWS::Region"}, ":", {"Ref": "AWS::AccountId"}, @@ -484,7 +470,9 @@ def test_facilitator_default_policy( "Fn::Join": [ "", [ - "arn:aws:states:", + "arn:", + {"Ref": "AWS::Partition"}, + ":states:", {"Ref": "AWS::Region"}, ":", {"Ref": "AWS::AccountId"}, @@ -683,7 +671,9 @@ def test_glue_job_policy(stack: SolutionStack, template: assertions.Template) -> "Fn::Join": [ "", [ - "arn:aws:glue:", + "arn:", + {"Ref": "AWS::Partition"}, + ":glue:", {"Ref": "AWS::Region"}, ":", {"Ref": "AWS::AccountId"}, @@ -724,7 +714,16 @@ def test_glue_job_logging_policy( "logs:PutLogEvents", ], "Effect": "Allow", - "Resource": "arn:aws:logs:*:*:*:/aws-glue/*", + "Resource": { + "Fn::Join": [ + "", + [ + "arn:", + {"Ref": "AWS::Partition"}, + ":logs:*:*:*:/aws-glue/*", + ], + ] + }, }, ] }, diff --git a/tox.ini b/tox.ini index 0fabd6b..bbc4288 100644 --- a/tox.ini +++ b/tox.ini @@ -5,7 +5,7 @@ env_list = lint type - py3{10,11} + py3{10,11,12} minversion = 4.0.13 [testenv:lint] @@ -15,7 +15,7 @@ deps = isort==5.13.2 black==23.12.1 pytest==7.4.3 - moto==4.1.10 + moto==4.1.15 commands = isort --check source black --check source @@ -29,11 +29,11 @@ wheel_build_env = .pkg extras = dev deps = - mypy + mypy==1.7.1 commands = mypy --strict source -[testenv:py3{10,11}] +[testenv:py3{10,11,12}] description = run the tests with pytest package = wheel wheel_build_env = .pkg