From 4fc3d05779b79b387b44e54055745887b5817788 Mon Sep 17 00:00:00 2001 From: Thirumalesh Aaraveti Date: Mon, 18 Nov 2024 18:05:06 +0530 Subject: [PATCH] Fixed the bugs in CRO --- .../clouds/aws/ec2/aws_monitor_tickets.py | 73 +++++++++++++--- .../clouds/aws/ec2/aws_tagging_operations.py | 14 ++- .../common/abstract_monitor_tickets.py | 85 +++++++++++++++---- .../common/run_cro.py | 8 +- .../resource_tag_api_operations.py | 13 +++ 5 files changed, 158 insertions(+), 35 deletions(-) diff --git a/cloud_governance/cloud_resource_orchestration/clouds/aws/ec2/aws_monitor_tickets.py b/cloud_governance/cloud_resource_orchestration/clouds/aws/ec2/aws_monitor_tickets.py index 1ea3a26c..de600be6 100644 --- a/cloud_governance/cloud_resource_orchestration/clouds/aws/ec2/aws_monitor_tickets.py +++ b/cloud_governance/cloud_resource_orchestration/clouds/aws/ec2/aws_monitor_tickets.py @@ -85,13 +85,20 @@ def __send_ticket_status_alerts(self, tickets: dict, ticket_status: str): if not self.__es_operations.verify_elastic_index_doc_id(index=self.es_cro_index, doc_id=ticket_id): if ticket_status == self.REFINEMENT: ticket_status = 'manager-approved' - source = {'cloud_name': description.get('CloudName'), 'account_name': description.get('AccountName').replace('OPENSHIFT-', ''), + source = {'cloud_name': description.get('CloudName'), + 'account_name': description.get('AccountName').replace('OPENSHIFT-', ''), 'region_name': description.get('Region'), 'user': '', - 'user_cro': description.get('EmailAddress').split('@')[0], 'user_cost': 0, 'ticket_id': ticket_id, 'ticket_id_state': ticket_status.lower(), - 'estimated_cost': description.get('CostEstimation'), 'instances_count': 0, 'monitored_days': 0, - 'ticket_opened_date': description.get('TicketOpenedDate').date(), 'duration': description.get('Days'), 'approved_manager': '', - 'user_manager': '', 'project': description.get('Project'), 'owner': f'{description.get("FirstName")} {description.get("LastName")}'.upper(), 'total_spots': 0, - 'total_ondemand': 0, 'AllocatedBudget': [], 'instances_list': [], 'instance_types_list': []} + 'user_cro': description.get('EmailAddress').split('@')[0], 'user_cost': 0, + 'ticket_id': ticket_id, 'ticket_id_state': ticket_status.lower(), + 'estimated_cost': description.get('CostEstimation'), 'instances_count': 0, + 'monitored_days': 0, + 'ticket_opened_date': description.get('TicketOpenedDate').date(), + 'duration': description.get('Days'), 'approved_manager': '', + 'user_manager': '', 'project': description.get('Project'), + 'owner': f'{description.get("FirstName")} {description.get("LastName")}'.upper(), + 'total_spots': 0, + 'total_ondemand': 0, 'AllocatedBudget': [], 'instances_list': [], + 'instance_types_list': []} self.__es_operations.upload_to_elasticsearch(index=self.es_cro_index, data=source, id=ticket_id) current_date = datetime.now().date() ticket_opened_date = description.get('TicketOpenedDate').date() @@ -112,22 +119,30 @@ def __send_ticket_status_alerts(self, tickets: dict, ticket_status: str): elif ticket_opened_days >= self.__manager_escalation_days + 2: to = self.__global_admin_name extra_message = f"Missing manager approval.
The user {user} is waiting for approval for last {ticket_opened_days} days.
Please review the below details and approve/reject" - subject, body = self.__mail_message.cro_request_for_manager_approval(manager=to, request_user=user, cloud_name=self.__cloud_name, ticket_id=ticket_id, description=description, extra_message=extra_message) + subject, body = self.__mail_message.cro_request_for_manager_approval(manager=to, + request_user=user, + cloud_name=self.__cloud_name, + ticket_id=ticket_id, + description=description, + extra_message=extra_message) else: # alert user if doesn't add tag name user_tickets.setdefault(user, []).append(f"{ticket_id} : {description.get('Project')}") if user_tickets: for user, ticket_ids in user_tickets.items(): - active_instances = self.__ec2_operations.get_active_instances(ignore_tag='TicketId', tag_value=user, tag_name='User') + active_instances = self.__ec2_operations.get_active_instances(ignore_tag='TicketId', tag_value=user, + tag_name='User') if active_instances: for region, instances_list in active_instances.items(): active_instances_ids = {region: [instance.get('InstanceId') for instance in instances_list]} to = user cc = self.__default_admins - subject, body = self.__mail_message.cro_send_user_alert_to_add_tags(user=user, ticket_ids=ticket_ids) + subject, body = self.__mail_message.cro_send_user_alert_to_add_tags(user=user, + ticket_ids=ticket_ids) with tempfile.NamedTemporaryFile(mode='w', suffix='.json') as filename: filename.write(json.dumps(active_instances_ids)) filename.flush() - self.__postfix.send_email_postfix(to=to, cc=cc, subject=subject, content=body, mime_type='html', filename=filename.name) + self.__postfix.send_email_postfix(to=to, cc=cc, subject=subject, content=body, + mime_type='html', filename=filename.name) @logger_time_stamp def __track_tickets(self): @@ -136,7 +151,8 @@ def __track_tickets(self): :return: """ self.__send_ticket_status_alerts(ticket_status=self.NEW, tickets=self.get_tickets(ticket_status=self.NEW)) - self.__send_ticket_status_alerts(ticket_status=self.REFINEMENT, tickets=self.get_tickets(ticket_status=self.REFINEMENT)) + self.__send_ticket_status_alerts(ticket_status=self.REFINEMENT, + tickets=self.get_tickets(ticket_status=self.REFINEMENT)) def update_budget_tag_to_resources(self, region_name: str, ticket_id: str, updated_budget: int): """ @@ -187,7 +203,8 @@ def update_duration_tag_to_resources(self, region_name: str, ticket_id: str, upd previous_duration = get_tag_value_by_name(tags=resource.get('Tags'), tag_name=tag_to_be_updated) updated_duration += int(float(previous_duration)) update_tags_dict = {tag_to_be_updated: str(updated_duration)} - tagging_operations.tag_resources_list(resources_list=resource_arn_list, update_tags_dict=update_tags_dict) + tagging_operations.tag_resources_list(resources_list=resource_arn_list, + update_tags_dict=update_tags_dict) else: logger.info('No AWS resources to update the costs') except Exception as err: @@ -244,6 +261,38 @@ def __prepare_athena_query_for_cluster_cost(self, names: list): """ return query + def notify_ticket_closed(self, ticket_id: str, region_name: str = ''): + """ + This method deletes the tag of the resources + :param region_name: + :param ticket_id: + :return: + """ + self.delete_tags_to_resource(ticket_id, region_name) + + def delete_tags_to_resource(self, ticket_id: str, region_name: str): + """ + This method deletes the TicketId tag of the resources + :param region_name: + :param ticket_id: + :return: + """ + try: + tag_to_be_deleted = ['Duration', 'TicketId', 'EstimatedCost', 'ApprovedManager'] + tagging_operations = AWSTaggingOperations(region_name=region_name) + resources_list_to_update = tagging_operations.get_resources_list(tag_name='TicketId', tag_value=ticket_id) + if resources_list_to_update: + resource_arn_list = [] + for resource in resources_list_to_update: + resource_arn_list.append(resource.get('ResourceARN')) + tagging_operations.untag_resources_list(resources_list=resource_arn_list, + delete_tag_keys_list=tag_to_be_deleted) + else: + logger.info('No AWS resources tags were deleted') + except Exception as err: + logger.error(err) + raise err + @logger_time_stamp def run(self): """ diff --git a/cloud_governance/cloud_resource_orchestration/clouds/aws/ec2/aws_tagging_operations.py b/cloud_governance/cloud_resource_orchestration/clouds/aws/ec2/aws_tagging_operations.py index f13368bb..f42ecdb4 100644 --- a/cloud_governance/cloud_resource_orchestration/clouds/aws/ec2/aws_tagging_operations.py +++ b/cloud_governance/cloud_resource_orchestration/clouds/aws/ec2/aws_tagging_operations.py @@ -1,5 +1,3 @@ -from abc import ABC - import typeguard from cloud_governance.cloud_resource_orchestration.clouds.common.abstract_tagging_operations import \ @@ -10,7 +8,7 @@ class AWSTaggingOperations(AbstractTaggingOperations): """ - This class is performs the tagging operations on AWS + This class is performing the tagging operations on AWS """ def __init__(self, region_name: str): @@ -28,6 +26,16 @@ def tag_resources_list(self, resources_list: list, update_tags_dict: dict): self.__resource_tag_api_operations.tag_resources(resource_arn_list=resources_list, update_tags_dict=update_tags_dict) + def untag_resources_list(self, resources_list: list, delete_tag_keys_list: list): + """ + This method untags the resources + :param resources_list: + :param delete_tag_keys_list: + :return: + """ + self.__resource_tag_api_operations.untag_resources(resource_arn_list=resources_list, + delete_tags_keys=delete_tag_keys_list) + @typeguard.typechecked @logger_time_stamp def get_resources_list(self, tag_name: str, tag_value: str = ''): diff --git a/cloud_governance/cloud_resource_orchestration/common/abstract_monitor_tickets.py b/cloud_governance/cloud_resource_orchestration/common/abstract_monitor_tickets.py index b51fa108..9d9ded26 100644 --- a/cloud_governance/cloud_resource_orchestration/common/abstract_monitor_tickets.py +++ b/cloud_governance/cloud_resource_orchestration/common/abstract_monitor_tickets.py @@ -1,12 +1,12 @@ from abc import abstractmethod, ABC -from datetime import datetime +from datetime import datetime, timezone, timedelta import typeguard from cloud_governance.cloud_resource_orchestration.utils.common_operations import string_equal_ignore_case from cloud_governance.cloud_resource_orchestration.utils.elastic_search_queries import ElasticSearchQueries from cloud_governance.cloud_resource_orchestration.utils.constant_variables import FIRST_CRO_ALERT, SECOND_CRO_ALERT, \ - CLOSE_JIRA_TICKET, JIRA_ISSUE_NEW_STATE, DATE_FORMAT + CLOSE_JIRA_TICKET, DATE_FORMAT from cloud_governance.common.elasticsearch.elasticsearch_operations import ElasticSearchOperations from cloud_governance.common.jira.jira_operations import JiraOperations from cloud_governance.common.logger.init_logger import logger @@ -20,6 +20,7 @@ class AbstractMonitorTickets(ABC): """ This Abstract class perform the operations for monitoring tickets """ + CLOUD_GOVERNANCE_ES_MAIL_INDEX = 'cloud-governance-mail-messages' def __init__(self): super().__init__() @@ -52,7 +53,8 @@ def _get_all_in_progress_tickets(self, account_name: str = '', cloud_name: str = in_progress_tickets_query = self.__elasticsearch_queries.get_all_in_progress_tickets( match_conditions=match_conditions, fields=fields) in_progress_tickets_list = self.__es_operations.fetch_data_by_es_query(query=in_progress_tickets_query, - es_index=self.__es_index_cro, filter_path='hits.hits._source') + es_index=self.__es_index_cro, + filter_path='hits.hits._source') return in_progress_tickets_list @abstractmethod @@ -96,7 +98,8 @@ def extend_tickets_budget(self, ticket_id: str, region_name: str, current_budget :return: """ ticket_extended = False - sub_ticket_ids = self.__jira_operations.get_budget_extend_tickets(ticket_id=ticket_id, ticket_state='inprogress') + sub_ticket_ids = self.__jira_operations.get_budget_extend_tickets(ticket_id=ticket_id, + ticket_state='inprogress') if sub_ticket_ids: total_budget_to_extend = self.__jira_operations.get_total_extend_budget(sub_ticket_ids=sub_ticket_ids) if string_equal_ignore_case(self.__cloud_name, 'AWS'): @@ -142,19 +145,21 @@ def extend_ticket_duration(self, ticket_id: str, region_name: str, current_durat @typeguard.typechecked @logger_time_stamp - def __close_and_update_ticket_data_in_es(self, ticket_id: str): + def __close_and_update_ticket_data_in_es(self, ticket_id: str, region_name: str = ''): """ This method close the ticket and update in ElasticSearch :return: """ - data = {'timestamp': datetime.utcnow(), 'ticket_id_state': 'closed'} + data = {'timestamp': datetime.now(timezone.utc), 'ticket_id_state': 'closed'} + self.notify_ticket_closed(ticket_id=ticket_id, region_name=region_name) + self.__jira_operations.move_issue_state(ticket_id, state='CLOSED') if self.__es_operations.check_elastic_search_connection(): self.__es_operations.update_elasticsearch_index(index=self.__es_index_cro, id=ticket_id, metadata=data) - self.__jira_operations.move_issue_state(ticket_id, state='CLOSED') @typeguard.typechecked @logger_time_stamp - def _monitor_ticket_duration(self, ticket_id: str, region_name: str, duration: int, completed_duration: int, **kwargs): + def _monitor_ticket_duration(self, ticket_id: str, region_name: str, duration: int, completed_duration: int, + **kwargs): """ This method monitors the ticket duration :param ticket_id: @@ -171,20 +176,56 @@ def _monitor_ticket_duration(self, ticket_id: str, region_name: str, duration: i current_duration=duration) if not ticket_extended: if remaining_duration == FIRST_CRO_ALERT: - subject, body = self.__mail_message.cro_monitor_alert_message(user=user, days=FIRST_CRO_ALERT, ticket_id=ticket_id) + subject, body = self.__mail_message.cro_monitor_alert_message(user=user, days=FIRST_CRO_ALERT, + ticket_id=ticket_id) message_type = 'first_duration_alert' elif remaining_duration == SECOND_CRO_ALERT: - subject, body = self.__mail_message.cro_monitor_alert_message(user=user, days=SECOND_CRO_ALERT, ticket_id=ticket_id) + subject, body = self.__mail_message.cro_monitor_alert_message(user=user, days=SECOND_CRO_ALERT, + ticket_id=ticket_id) message_type = 'second_duration_alert' else: if remaining_duration <= CLOSE_JIRA_TICKET: - self.__close_and_update_ticket_data_in_es(ticket_id=ticket_id) + self.__close_and_update_ticket_data_in_es(ticket_id=ticket_id, region_name=region_name) subject, body = self.__mail_message.cro_send_closed_alert(user, ticket_id) message_type = 'ticket_closed_alert' if subject and body: self.__postfix.send_email_postfix(to=user, cc=cc, subject=subject, content=body, mime_type='html', message_type=message_type) + def get_budget_exceed_alert_times(self, user: str): + """ + This method returns the number of times alerts send to an user + :return: + """ + current_date = datetime.now(timezone.utc).date() + start_date = current_date - timedelta(days=10) + query = { + "query": { + "bool": { + "must": [ + {"term": {"To.keyword": user}}, + {"term": {"MessageType.keyword": "budget_exceed_alert"}}, + { + "range": { + "timestamp": { + "gte": start_date, + "lte": current_date, + "format": "yyyy-MM-dd" + } + } + } + ] + } + }, + "size": 10, + "sort": {"timestamp": "desc"} + } + response = self.__es_operations.fetch_data_by_es_query(query=query, + es_index=self.CLOUD_GOVERNANCE_ES_MAIL_INDEX, + search_size=10, + limit_to_size=True) + return len(response) + @typeguard.typechecked @logger_time_stamp def _monitor_ticket_budget(self, ticket_id: str, region_name: str, budget: int, used_budget: int, **kwargs): @@ -200,7 +241,8 @@ def _monitor_ticket_budget(self, ticket_id: str, region_name: str, budget: int, remaining_budget = budget - used_budget threshold_budget = budget - (budget * (self.__ticket_over_usage_limit / 100)) subject = body = None - if threshold_budget >= remaining_budget > 0: + alerted_times = self.get_budget_exceed_alert_times(user=user) + if threshold_budget >= remaining_budget > 0 and alerted_times < 3: ticket_extended = self.extend_tickets_budget(ticket_id=ticket_id, region_name=region_name, current_budget=budget) if not ticket_extended: @@ -208,14 +250,14 @@ def _monitor_ticket_budget(self, ticket_id: str, region_name: str, budget: int, ticket_id=ticket_id, used_budget=used_budget, remain_budget=remaining_budget) - elif remaining_budget <= 0: + elif remaining_budget <= 0 and alerted_times == 2: ticket_extended = self.extend_tickets_budget(ticket_id=ticket_id, region_name=region_name, current_budget=budget) if not ticket_extended: subject, body = self.__mail_message.cro_monitor_budget_remain_high_alert(user=user, budget=budget, - ticket_id=ticket_id, - used_budget=used_budget, - remain_budget=remaining_budget) + ticket_id=ticket_id, + used_budget=used_budget, + remain_budget=remaining_budget) if subject and body: self.__postfix.send_email_postfix(to=user, cc=cc, subject=subject, content=body, mime_type='html', message_type='budget_exceed_alert') @@ -238,7 +280,7 @@ def _monitor_in_progress_tickets(self): duration = int(source_data.get('duration', 0)) used_budget = int(source_data.get('actual_cost', 0)) ticket_start_date = datetime.strptime(source_data.get('ticket_opened_date'), DATE_FORMAT).date() - completed_duration = (datetime.utcnow().date() - ticket_start_date).days + completed_duration = (datetime.now(timezone.utc).date() - ticket_start_date).days self._monitor_ticket_budget(ticket_id=ticket_id, region_name=region_name, budget=budget, used_budget=used_budget, user_cro=source_data.get('user_cro'), @@ -249,6 +291,15 @@ def _monitor_in_progress_tickets(self): approved_manager=source_data.get('approved_manager') ) + def notify_ticket_closed(self, ticket_id: str, region_name: str = ''): + """ + This method notify ticket closed alert + :param region_name: + :param ticket_id: + :return: + """ + pass + def monitor_tickets(self): """ This method monitor all tickets by status diff --git a/cloud_governance/cloud_resource_orchestration/common/run_cro.py b/cloud_governance/cloud_resource_orchestration/common/run_cro.py index 1eb8a603..26209347 100644 --- a/cloud_governance/cloud_resource_orchestration/common/run_cro.py +++ b/cloud_governance/cloud_resource_orchestration/common/run_cro.py @@ -1,4 +1,4 @@ -from datetime import datetime +from datetime import datetime, timezone from cloud_governance.cloud_resource_orchestration.common.cro_object import CroObject from cloud_governance.cloud_resource_orchestration.utils.common_operations import string_equal_ignore_case @@ -11,7 +11,7 @@ class RunCRO: """This class monitors cro activities""" - PERSISTENT_RUN_DOC_ID = f'cro_run_persistence-{datetime.utcnow().date()}' + PERSISTENT_RUN_DOC_ID = f'cro_run_persistence-{datetime.now(timezone.utc).date()}' PERSISTENT_RUN_INDEX = 'cloud_resource_orchestration_persistence_run' def __init__(self): @@ -37,7 +37,9 @@ def save_current_timestamp(self): f'last_run_{self.__account.lower()}': datetime.utcnow()}, id=self.PERSISTENT_RUN_DOC_ID) else: self.__es_operations.update_elasticsearch_index(index=self.PERSISTENT_RUN_INDEX, - metadata={f'last_run_{self.__account.lower()}': datetime.utcnow()}, + metadata={ + f'last_run_{self.__account.lower()}': + datetime.now(timezone.utc)}, id=self.PERSISTENT_RUN_DOC_ID) @logger_time_stamp diff --git a/cloud_governance/common/clouds/aws/resource_tagging_api/resource_tag_api_operations.py b/cloud_governance/common/clouds/aws/resource_tagging_api/resource_tag_api_operations.py index ad4bffb9..86a38d68 100644 --- a/cloud_governance/common/clouds/aws/resource_tagging_api/resource_tag_api_operations.py +++ b/cloud_governance/common/clouds/aws/resource_tagging_api/resource_tag_api_operations.py @@ -46,3 +46,16 @@ def tag_resources(self, resource_arn_list: list, update_tags_dict: dict): except Exception as err: logger.error(err) raise err + + def untag_resources(self, resource_arn_list: list, delete_tags_keys: list): + """ + This method untags list to the given resource arn's + :param resource_arn_list: + :param delete_tags_keys: + :return: + """ + try: + self.__client.untag_resources(ResourceARNList=resource_arn_list, TagKeys=delete_tags_keys) + except Exception as err: + logger.error(err) + raise err