From 61075c496ba064beb6666f2fa3fe64ecc7c80cd5 Mon Sep 17 00:00:00 2001 From: "Gergely Daroczi (@daroczig)" Date: Fri, 29 Dec 2023 23:28:57 +0100 Subject: [PATCH] optional persistent caching of all AWS queries --- README.md | 9 +++++++ pyproject.toml | 1 + sc_crawler/vendors/aws.py | 54 +++++++++++++++++++++++++++------------ 3 files changed, 47 insertions(+), 17 deletions(-) diff --git a/README.md b/README.md index 65c92a1b..af662a93 100644 --- a/README.md +++ b/README.md @@ -4,10 +4,19 @@ Examples: ```py from sc_crawler.vendors import aws + +# enable persistent caching of AWS queries +from cachier import set_default_params +set_default_params(caching_enabled=True) + +# fetch data aws.get_all() # slow to query all instance types in all regions + +# look around aws.datacenters aws.zones +# pretty printed objects from rich import print as pp pp(aws) pp(aws._datacenters[1]._zones) diff --git a/pyproject.toml b/pyproject.toml index cfb6689f..e51466e0 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -7,6 +7,7 @@ name = "sc-crawler" version = "0.0.1" requires-python = ">= 3.7" dependencies = [ + "cachier", "pydantic", "pydantic_extra_types", "pycountry", diff --git a/sc_crawler/vendors/aws.py b/sc_crawler/vendors/aws.py index 9fe745c0..5ce19cc3 100644 --- a/sc_crawler/vendors/aws.py +++ b/sc_crawler/vendors/aws.py @@ -1,5 +1,6 @@ import boto3 -from functools import cache +from cachier import cachier, set_default_params +from datetime import timedelta from itertools import chain import logging import re @@ -9,6 +10,39 @@ logger = logging.getLogger(__name__) +# disable caching by default +set_default_params(caching_enabled=False) + +# ############################################################################## +# AWS cached helpers + + +@cachier(stale_after=timedelta(days=3)) +def describe_instance_types(region): + ec2 = boto3.client("ec2", region_name=region) + return ec2.describe_instance_types().get("InstanceTypes") + + +@cachier(stale_after=timedelta(days=3)) +def describe_regions(): + ec2 = boto3.client("ec2") + return ec2.describe_regions().get("Regions", []) + + +@cachier(stale_after=timedelta(days=3)) +def describe_availability_zones(region): + ec2 = boto3.client("ec2", region_name=region) + zones = ec2.describe_availability_zones( + Filters=[ + {"Name": "zone-type", "Values": ["availability-zone"]}, + ], + AllAvailabilityZones=True, + ).get("AvailabilityZones") + return zones + + +# ############################################################################## + def get_datacenters(vendor, *args, **kwargs): """List all available AWS datacenters. @@ -272,8 +306,7 @@ def get_datacenters(vendor, *args, **kwargs): # look for undocumented (new) datacenters in AWS supported_regions = [d.identifier for d in datacenters] - ec2 = boto3.client("ec2") - regions = ec2.describe_regions().get("Regions", []) + regions = describe_regions() for region in regions: region_name = region.get("RegionName") if "gov" in region_name: @@ -293,14 +326,7 @@ def get_datacenters(vendor, *args, **kwargs): # add zones for datacenter in datacenters: - # need to create a new clien in each AWS region - ec2 = boto3.client("ec2", region_name=datacenter.identifier) - zones = ec2.describe_availability_zones( - Filters=[ - {"Name": "zone-type", "Values": ["availability-zone"]}, - ], - AllAvailabilityZones=True, - ).get("AvailabilityZones") + zones = describe_availability_zones(datacenter.identifier) datacenter._zones = { zone.get("ZoneId"): Zone( identifier=zone.get("ZoneId"), @@ -313,12 +339,6 @@ def get_datacenters(vendor, *args, **kwargs): return datacenters -@cache -def describe_instance_types(region): - ec2 = boto3.client("ec2", region_name=region) - return ec2.describe_instance_types().get("InstanceTypes") - - instance_families = { "c": "Compute optimized", "d": "Dense storage",