diff --git a/cloud_governance/main/main_oerations/main_operations.py b/cloud_governance/main/main_oerations/main_operations.py index 1a43891bb..0f8c0a348 100644 --- a/cloud_governance/main/main_oerations/main_operations.py +++ b/cloud_governance/main/main_oerations/main_operations.py @@ -39,7 +39,8 @@ def run(self): for policy_type, policies in policies_list.items(): # @Todo support for all the aws policies, currently supports ec2_run as urgent requirement if self._policy in policies and self._policy in ["instance_run", "unattached_volume", "cluster_run", - "ip_unattached", "unused_nat_gateway", "instance_idle"]: + "ip_unattached", "unused_nat_gateway", "instance_idle", + "zombie_snapshots"]: source = policy_type if Utils.equal_ignore_case(policy_type, self._public_cloud_name): source = '' diff --git a/cloud_governance/policy/aws/zombie_snapshots.py b/cloud_governance/policy/aws/zombie_snapshots.py index 58b158734..087d8c512 100644 --- a/cloud_governance/policy/aws/zombie_snapshots.py +++ b/cloud_governance/policy/aws/zombie_snapshots.py @@ -1,15 +1,18 @@ -from cloud_governance.policy.policy_operations.aws.zombie_non_cluster.run_zombie_non_cluster_policies import NonClusterZombiePolicy +from cloud_governance.policy.helpers.aws.aws_policy_operations import AWSPolicyOperations -class ZombieSnapshots(NonClusterZombiePolicy): +class ZombieSnapshots(AWSPolicyOperations): """ This class sends an alert mail for zombie snapshots ( AMI abandoned ) to the user after 4 days and delete after 7 days. """ + RESOURCE_ACTION = 'Delete' + def __init__(self): super().__init__() + self.__image_ids = self._get_ami_ids() - def _get_image_ids_from_description(self, snapshot_description: str): + def __get_image_ids_from_description(self, snapshot_description: str): """ This method gets image Ids from snapshot description Two cases: @@ -23,43 +26,52 @@ def _get_image_ids_from_description(self, snapshot_description: str): image_ids.append(f'ami-{image.split(" ")[0]}') return image_ids + def __is_zombie_snapshot(self, snapshot_description: str): + """ + This method returns bool on verifying snapshots as zombie or not + :param snapshot_description: + :return: + """ + zombie_snapshot = True + if snapshot_description: + snapshot_images = self.__get_image_ids_from_description(snapshot_description) + for snapshot_image in snapshot_images: + if snapshot_image in self.__image_ids: + return False + return zombie_snapshot + def run(self): """ - This method returns all the zombie snapshots, delete if dry_run no + This method returns all the zombie snapshots and delete after x days @return: """ snapshots = self._ec2_operations.get_snapshots() zombie_snapshots = [] - image_ids = self._get_ami_ids() for snapshot in snapshots: - if not self._check_cluster_tag(tags=snapshot.get('Tags')): - if snapshot.get('Description'): - snapshot_images = self._get_image_ids_from_description(snapshot.get('Description')) - tags = snapshot.get('Tags') - found = False - for snapshot_image in snapshot_images: - if snapshot_image in image_ids: - found = True - snapshot_id = snapshot.get('SnapshotId') - if not found: - unused_days = self._get_resource_last_used_days(tags=tags) - zombie_snapshot = self._check_resource_and_delete(resource_name='Snapshot', - resource_id='SnapshotId', - resource_type='CreateSnapshot', - resource=snapshot, - empty_days=unused_days, - days_to_delete_resource=self.DAYS_TO_DELETE_RESOURCE, - tags=tags) - if zombie_snapshot: - zombie_snapshots.append({'ResourceId': snapshot.get('SnapshotId'), - 'Name': self._get_tag_name_from_tags(tags=tags), - 'User': self._get_tag_name_from_tags(tags=tags, tag_name='User'), - 'Size': f'{str(snapshot.get("VolumeSize"))}Gb', - 'Skip': self._get_policy_value(tags=snapshot.get('Tags')), - 'Days': str(unused_days) - }) - else: - unused_days = 0 - self._update_resource_tags(resource_id=snapshot_id, tags=tags, left_out_days=unused_days, - resource_left_out=not found) + tags = snapshot.get('Tags', []) + resource_id = snapshot.get('SnapshotId') + cleanup_result = False + cluster_tag = self._get_cluster_tag(tags=tags) + cleanup_days = 0 + if not cluster_tag and self.__is_zombie_snapshot(snapshot.get('Description')): + cleanup_days = self.get_clean_up_days_count(tags=tags) + cleanup_result = self.verify_and_delete_resource(resource_id=resource_id, tags=tags, + clean_up_days=cleanup_days) + unit_price = 0 + resource_data = self._get_es_schema(resource_id=resource_id, + user=self.get_tag_name_from_tags(tags=tags, tag_name='User'), + skip_policy=self.get_skip_policy_value(tags=tags), + cleanup_days=cleanup_days, dry_run=self._dry_run, + name=self.get_tag_name_from_tags(tags=tags, tag_name='Name'), + region=self._region, + cleanup_result=str(cleanup_result), + resource_action=self.RESOURCE_ACTION, + cloud_name=self._cloud_name, + resource_type='Snapshot', + volume_size=f"{snapshot.get('VolumeSize')} GB", + unit_price=unit_price, resource_state='Backup' if not cleanup_result else "Deleted" + ) + zombie_snapshots.append(resource_data) + if not cleanup_result: + self.update_resource_day_count_tag(resource_id=resource_id, cleanup_days=cleanup_days, tags=tags) return zombie_snapshots diff --git a/cloud_governance/policy/helpers/aws/aws_policy_operations.py b/cloud_governance/policy/helpers/aws/aws_policy_operations.py index 4c67f81c3..0526b911a 100644 --- a/cloud_governance/policy/helpers/aws/aws_policy_operations.py +++ b/cloud_governance/policy/helpers/aws/aws_policy_operations.py @@ -263,3 +263,14 @@ def get_network_out_kib_metric(self, resource_id: str, days: int = INSTANCE_IDLE average_network_out_bytes = self.__get_aggregation_metrics_value(metrics.get('MetricDataResults', []), aggregation='average') return round(average_network_out_bytes / TOTAL_BYTES_IN_KIB, DEFAULT_ROUND_DIGITS) + + def _get_ami_ids(self): + """ + This method returns all image ids + @return: + """ + images = self._ec2_operations.get_images() + image_ids = [] + for image in images: + image_ids.append(image.get('ImageId')) + return image_ids diff --git a/tests/unittest/cloud_governance/policy/aws/test_zombie_snapshots.py b/tests/unittest/cloud_governance/policy/aws/test_zombie_snapshots.py new file mode 100644 index 000000000..f87a1ef36 --- /dev/null +++ b/tests/unittest/cloud_governance/policy/aws/test_zombie_snapshots.py @@ -0,0 +1,221 @@ +import os +from datetime import datetime + +import boto3 +from moto import mock_ec2 + +from cloud_governance.common.clouds.aws.utils.common_methods import get_tag_value_from_tags +from cloud_governance.main.environment_variables import environment_variables +from cloud_governance.policy.aws.zombie_snapshots import ZombieSnapshots +from tests.unittest.configs import DRY_RUN_YES, AWS_DEFAULT_REGION, INSTANCE_TYPE_T2_MICRO, DEFAULT_AMI_ID, \ + TEST_USER_NAME, DRY_RUN_NO + +os.environ['AWS_DEFAULT_REGION'] = 'us-east-2' +os.environ['dry_run'] = 'no' + + +@mock_ec2 +def test_zombie_snapshots(): + """ + This method tests lists of the ami related snapshots + @return: + """ + environment_variables.environment_variables_dict['dry_run'] = DRY_RUN_YES + environment_variables.environment_variables_dict['AWS_DEFAULT_REGION'] = AWS_DEFAULT_REGION + environment_variables.environment_variables_dict['policy'] = 'zombie_snapshots' + tags = [{'Key': 'User', 'Value': TEST_USER_NAME}] + ec2_client = boto3.client('ec2', region_name=AWS_DEFAULT_REGION) + + # delete default snapshots and images + snapshots = ec2_client.describe_snapshots(OwnerIds=['self'])['Snapshots'] + images = ec2_client.describe_images()['Images'] + for image in images: + ec2_client.deregister_image(ImageId=image.get('ImageId')) + for snapshot in snapshots: + ec2_client.delete_snapshot(SnapshotId=snapshot.get('SnapshotId')) + + # create infra + instance_id = ec2_client.run_instances(ImageId=DEFAULT_AMI_ID, InstanceType=INSTANCE_TYPE_T2_MICRO, + MaxCount=1, MinCount=1, + TagSpecifications=[{'ResourceType': 'instance', 'Tags': tags}] + )['Instances'][0]['InstanceId'] + image_id = ec2_client.create_image(InstanceId=instance_id, Name=TEST_USER_NAME, + TagSpecifications=[{'ResourceType': 'image', 'Tags': tags}]).get('ImageId') + snapshot_id = (ec2_client.describe_images(ImageIds=[image_id])['Images'][0].get('BlockDeviceMappings')[0] + .get('Ebs').get('SnapshotId')) + ec2_client.create_tags(Resources=[snapshot_id], Tags=tags) + ec2_client.deregister_image(ImageId=image_id) + ec2_client.terminate_instances(InstanceIds=[instance_id]) + + # run zombie_snapshots + zombie_snapshots = ZombieSnapshots() + response = zombie_snapshots.run() + assert len(ec2_client.describe_snapshots(OwnerIds=['self'])['Snapshots']) == 1 + assert len(response) == 1 + assert response[0]['CleanUpDays'] == 0 + assert get_tag_value_from_tags(tags=ec2_client.describe_snapshots(OwnerIds=['self'], + SnapshotIds=[snapshot_id])['Snapshots'][0]['Tags'], + tag_name='DaysCount') + + +@mock_ec2 +def test_zombie_snapshots_delete(): + """ + This method tests delete of the ami related snapshots + @return: + """ + environment_variables.environment_variables_dict['dry_run'] = DRY_RUN_NO + environment_variables.environment_variables_dict['AWS_DEFAULT_REGION'] = AWS_DEFAULT_REGION + environment_variables.environment_variables_dict['policy'] = 'zombie_snapshots' + tags = [{'Key': 'User', 'Value': TEST_USER_NAME}, + {'Key': 'DaysCount', 'Value': f'{datetime.utcnow().date()}@7'}] + ec2_client = boto3.client('ec2', region_name=AWS_DEFAULT_REGION) + + # delete default snapshots and images + snapshots = ec2_client.describe_snapshots(OwnerIds=['self'])['Snapshots'] + images = ec2_client.describe_images()['Images'] + for image in images: + ec2_client.deregister_image(ImageId=image.get('ImageId')) + for snapshot in snapshots: + ec2_client.delete_snapshot(SnapshotId=snapshot.get('SnapshotId')) + + # create infra + instance_id = ec2_client.run_instances(ImageId=DEFAULT_AMI_ID, InstanceType=INSTANCE_TYPE_T2_MICRO, + MaxCount=1, MinCount=1, + TagSpecifications=[{'ResourceType': 'instance', 'Tags': tags}] + )['Instances'][0]['InstanceId'] + image_id = ec2_client.create_image(InstanceId=instance_id, Name=TEST_USER_NAME, + TagSpecifications=[{'ResourceType': 'image', 'Tags': tags}]).get('ImageId') + snapshot_id = (ec2_client.describe_images(ImageIds=[image_id])['Images'][0].get('BlockDeviceMappings')[0] + .get('Ebs').get('SnapshotId')) + ec2_client.create_tags(Resources=[snapshot_id], Tags=tags) + ec2_client.deregister_image(ImageId=image_id) + ec2_client.terminate_instances(InstanceIds=[instance_id]) + + # run zombie_snapshots + zombie_snapshots = ZombieSnapshots() + response = zombie_snapshots.run() + assert len(ec2_client.describe_snapshots(OwnerIds=['self'])['Snapshots']) == 0 + assert len(response) == 1 + + +@mock_ec2 +def test_zombie_snapshots_skip(): + """ + This method tests skip delete of the ami related snapshots + @return: + """ + environment_variables.environment_variables_dict['dry_run'] = DRY_RUN_NO + environment_variables.environment_variables_dict['AWS_DEFAULT_REGION'] = AWS_DEFAULT_REGION + environment_variables.environment_variables_dict['policy'] = 'zombie_snapshots' + tags = [{'Key': 'User', 'Value': TEST_USER_NAME}, {'Key': 'policy', 'Value': 'not-delete'}, + {'Key': 'DaysCount', 'Value': f'{datetime.utcnow().date()}@7'}] + ec2_client = boto3.client('ec2', region_name=AWS_DEFAULT_REGION) + + # delete default snapshots and images + snapshots = ec2_client.describe_snapshots(OwnerIds=['self'])['Snapshots'] + images = ec2_client.describe_images()['Images'] + for image in images: + ec2_client.deregister_image(ImageId=image.get('ImageId')) + for snapshot in snapshots: + ec2_client.delete_snapshot(SnapshotId=snapshot.get('SnapshotId')) + + # create infra + instance_id = ec2_client.run_instances(ImageId=DEFAULT_AMI_ID, InstanceType=INSTANCE_TYPE_T2_MICRO, + MaxCount=1, MinCount=1, + TagSpecifications=[{'ResourceType': 'instance', 'Tags': tags}] + )['Instances'][0]['InstanceId'] + image_id = ec2_client.create_image(InstanceId=instance_id, Name=TEST_USER_NAME, + TagSpecifications=[{'ResourceType': 'image', 'Tags': tags}]).get('ImageId') + snapshot_id = (ec2_client.describe_images(ImageIds=[image_id])['Images'][0].get('BlockDeviceMappings')[0] + .get('Ebs').get('SnapshotId')) + ec2_client.create_tags(Resources=[snapshot_id], Tags=tags) + ec2_client.deregister_image(ImageId=image_id) + ec2_client.terminate_instances(InstanceIds=[instance_id]) + + # run zombie_snapshots + zombie_snapshots = ZombieSnapshots() + response = zombie_snapshots.run() + assert len(ec2_client.describe_snapshots(OwnerIds=['self'])['Snapshots']) == 1 + assert len(response) == 1 + + +@mock_ec2 +def test_zombie_snapshots_contains_cluster_tag(): + """ + This method tests snapshot having the live cluster + @return: + """ + environment_variables.environment_variables_dict['dry_run'] = DRY_RUN_NO + environment_variables.environment_variables_dict['AWS_DEFAULT_REGION'] = AWS_DEFAULT_REGION + environment_variables.environment_variables_dict['policy'] = 'zombie_snapshots' + tags = [{'Key': 'User', 'Value': TEST_USER_NAME}, {'Key': 'policy', 'Value': 'not-delete'}, + {'Key': 'DaysCount', 'Value': f'{datetime.utcnow().date()}@7'}, + {'Key': 'kubernetes.io/cluster/test-zombie-cluster', 'Value': f'owned'}] + ec2_client = boto3.client('ec2', region_name=AWS_DEFAULT_REGION) + + # delete default snapshots and images + snapshots = ec2_client.describe_snapshots(OwnerIds=['self'])['Snapshots'] + images = ec2_client.describe_images()['Images'] + for image in images: + ec2_client.deregister_image(ImageId=image.get('ImageId')) + for snapshot in snapshots: + ec2_client.delete_snapshot(SnapshotId=snapshot.get('SnapshotId')) + + # create infra + instance_id = ec2_client.run_instances(ImageId=DEFAULT_AMI_ID, InstanceType=INSTANCE_TYPE_T2_MICRO, + MaxCount=1, MinCount=1, + TagSpecifications=[{'ResourceType': 'instance', 'Tags': tags}] + )['Instances'][0]['InstanceId'] + image_id = ec2_client.create_image(InstanceId=instance_id, Name=TEST_USER_NAME, + TagSpecifications=[{'ResourceType': 'image', 'Tags': tags}]).get('ImageId') + snapshot_id = (ec2_client.describe_images(ImageIds=[image_id])['Images'][0].get('BlockDeviceMappings')[0] + .get('Ebs').get('SnapshotId')) + ec2_client.create_tags(Resources=[snapshot_id], Tags=tags) + ec2_client.deregister_image(ImageId=image_id) + + # run zombie_snapshots + zombie_snapshots = ZombieSnapshots() + response = zombie_snapshots.run() + assert len(ec2_client.describe_snapshots(OwnerIds=['self'])['Snapshots']) == 1 + assert len(response) == 0 + + +@mock_ec2 +def test_zombie_snapshots_no_zombies(): + """ + This method tests snapshot having the active AMI + @return: + """ + environment_variables.environment_variables_dict['dry_run'] = DRY_RUN_NO + environment_variables.environment_variables_dict['AWS_DEFAULT_REGION'] = AWS_DEFAULT_REGION + environment_variables.environment_variables_dict['policy'] = 'zombie_snapshots' + tags = [{'Key': 'User', 'Value': TEST_USER_NAME}, {'Key': 'policy', 'Value': 'not-delete'}, + {'Key': 'DaysCount', 'Value': f'{datetime.utcnow().date()}@7'}, + {'Key': 'kubernetes.io/cluster/test-zombie-cluster', 'Value': f'owned'}] + ec2_client = boto3.client('ec2', region_name=AWS_DEFAULT_REGION) + + # delete default snapshots and images + snapshots = ec2_client.describe_snapshots(OwnerIds=['self'])['Snapshots'] + images = ec2_client.describe_images()['Images'] + for image in images: + ec2_client.deregister_image(ImageId=image.get('ImageId')) + for snapshot in snapshots: + ec2_client.delete_snapshot(SnapshotId=snapshot.get('SnapshotId')) + + # create infra + instance_id = ec2_client.run_instances(ImageId=DEFAULT_AMI_ID, InstanceType=INSTANCE_TYPE_T2_MICRO, + MaxCount=1, MinCount=1, + TagSpecifications=[{'ResourceType': 'instance', 'Tags': tags}] + )['Instances'][0]['InstanceId'] + image_id = ec2_client.create_image(InstanceId=instance_id, Name=TEST_USER_NAME, + TagSpecifications=[{'ResourceType': 'image', 'Tags': tags}]).get('ImageId') + snapshot_id = (ec2_client.describe_images(ImageIds=[image_id])['Images'][0].get('BlockDeviceMappings')[0] + .get('Ebs').get('SnapshotId')) + ec2_client.create_tags(Resources=[snapshot_id], Tags=tags) + + # run zombie_snapshots + zombie_snapshots = ZombieSnapshots() + response = zombie_snapshots.run() + assert len(ec2_client.describe_snapshots(OwnerIds=['self'])['Snapshots']) == 1 + assert len(response) == 0