From 8a2efa9d86b72a2a733db1b9d01a045785dbff4a Mon Sep 17 00:00:00 2001 From: "Gergely Daroczi (@daroczig)" Date: Fri, 23 Feb 2024 10:13:41 +0100 Subject: [PATCH] get prefix -> inventory to reduce chance of conflict with other helpers --- README.md | 33 ------------------- src/sc_crawler/cli.py | 20 ++++++------ src/sc_crawler/schemas.py | 54 ++++++++++++++++---------------- src/sc_crawler/vendors/README.md | 42 ++++++++++++------------- src/sc_crawler/vendors/aws.py | 18 +++++------ src/sc_crawler/vendors/gcp.py | 18 +++++------ 6 files changed, 76 insertions(+), 109 deletions(-) diff --git a/README.md b/README.md index 7839b514..803679d0 100644 --- a/README.md +++ b/README.md @@ -92,36 +92,3 @@ server = session.exec(select(Server).where(Server.id == 'trn1.32xlarge')).one() pp(server) pp(server.vendor) ``` - -Lower level access examples: - -```py -from sc_crawler.vendors import aws - -# enable persistent caching of AWS queries -from cachier import set_default_params -set_default_params(caching_enabled=True) - -# fetch data -aws.get_all() # slow to query all instance types in all regions - -# look around -aws.datacenters -aws.zones - -# pretty printed objects -from rich import print as pp -pp(aws) -pp(aws.datacenters) -pp(aws.servers[0]) -``` - -Debug raw AWS responses: - -```py -products = aws._methods.get_products() -pp(products[1]["product"]) - -instance_types = aws._methods.describe_instance_types(region="us-west-2") -pp(instance_types[1]) -``` diff --git a/src/sc_crawler/cli.py b/src/sc_crawler/cli.py index fa1c2237..3c685f46 100644 --- a/src/sc_crawler/cli.py +++ b/src/sc_crawler/cli.py @@ -37,7 +37,7 @@ log_levels = list(logging._nameToLevel.keys()) LogLevels = Enum("LOGLEVELS", {k: k for k in log_levels}) -supported_tables = [m[4:] for m in dir(Vendor) if m.startswith("get_")] +supported_tables = [m[10:] for m in dir(Vendor) if m.startswith("inventory_")] Tables = Enum("TABLES", {k: k for k in supported_tables}) @@ -147,23 +147,23 @@ def custom_serializer(x): vendor = session.merge(vendor) vendor.set_session(session) if Tables.compliance_frameworks in update_table: - vendor.get_compliance_frameworks() + vendor.inventory_compliance_frameworks() if Tables.datacenters in update_table: - vendor.get_datacenters() + vendor.inventory_datacenters() if Tables.zones in update_table: - vendor.get_zones() + vendor.inventory_zones() if Tables.servers in update_table: - vendor.get_servers() + vendor.inventory_servers() if Tables.server_prices in update_table: - vendor.get_server_prices() + vendor.inventory_server_prices() if Tables.server_prices_spot in update_table: - vendor.get_server_prices_spot() + vendor.inventory_server_prices_spot() if Tables.storage_prices in update_table: - vendor.get_storage_prices() + vendor.inventory_storage_prices() if Tables.traffic_prices in update_table: - vendor.get_traffic_prices() + vendor.inventory_traffic_prices() if Tables.ipv4_prices in update_table: - vendor.get_ipv4_prices() + vendor.inventory_ipv4_prices() session.merge(vendor) session.commit() diff --git a/src/sc_crawler/schemas.py b/src/sc_crawler/schemas.py index d3ca8fe4..7452e3aa 100644 --- a/src/sc_crawler/schemas.py +++ b/src/sc_crawler/schemas.py @@ -245,15 +245,15 @@ def __init__(self, **kwargs): # make sure methods are provided methods = self._get_methods().__dir__() for method in [ - "get_compliance_frameworks", - "get_datacenters", - "get_zones", - "get_servers", - "get_server_prices", - "get_server_prices_spot", - "get_storage_prices", - "get_traffic_prices", - "get_ipv4_prices", + "inventory_compliance_frameworks", + "inventory_datacenters", + "inventory_zones", + "inventory_servers", + "inventory_server_prices", + "inventory_server_prices_spot", + "inventory_storage_prices", + "inventory_traffic_prices", + "inventory_ipv4_prices", ]: if method not in methods: raise NotImplementedError( @@ -307,59 +307,59 @@ def set_table_rows_inactive(self, model: str, *args) -> None: self._session.execute(query.values(status=Status.INACTIVE)) @log_start_end - def get_compliance_frameworks(self): + def inventory_compliance_frameworks(self): """Get the vendor's all compliance frameworks.""" self.set_table_rows_inactive(VendorComplianceLink) - self._get_methods().get_compliance_frameworks(self) + self._get_methods().inventory_compliance_frameworks(self) @log_start_end - def get_datacenters(self): + def inventory_datacenters(self): """Get the vendor's all datacenters.""" self.set_table_rows_inactive(Datacenter) - self._get_methods().get_datacenters(self) + self._get_methods().inventory_datacenters(self) @log_start_end - def get_zones(self): + def inventory_zones(self): """Get all the zones in the vendor's datacenters.""" self.set_table_rows_inactive(Zone) - self._get_methods().get_zones(self) + self._get_methods().inventory_zones(self) @log_start_end - def get_servers(self): + def inventory_servers(self): """Get the vendor's all server types.""" self.set_table_rows_inactive(Server) - self._get_methods().get_servers(self) + self._get_methods().inventory_servers(self) @log_start_end - def get_server_prices(self): + def inventory_server_prices(self): """Get the current standard/ondemand/reserved prices of all server types.""" self.set_table_rows_inactive( ServerPrice, ServerPrice.allocation != Allocation.SPOT ) - self._get_methods().get_server_prices(self) + self._get_methods().inventory_server_prices(self) @log_start_end - def get_server_prices_spot(self): + def inventory_server_prices_spot(self): """Get the current spot prices of all server types.""" self.set_table_rows_inactive( ServerPrice, ServerPrice.allocation == Allocation.SPOT ) - self._get_methods().get_server_prices_spot(self) + self._get_methods().inventory_server_prices_spot(self) @log_start_end - def get_storage_prices(self): + def inventory_storage_prices(self): self.set_table_rows_inactive(StoragePrice) - self._get_methods().get_storage_prices(self) + self._get_methods().inventory_storage_prices(self) @log_start_end - def get_traffic_prices(self): + def inventory_traffic_prices(self): self.set_table_rows_inactive(TrafficPrice) - self._get_methods().get_traffic_prices(self) + self._get_methods().inventory_traffic_prices(self) @log_start_end - def get_ipv4_prices(self): + def inventory_ipv4_prices(self): self.set_table_rows_inactive(Ipv4Price) - self._get_methods().get_ipv4_prices(self) + self._get_methods().inventory_ipv4_prices(self) class Datacenter(ScModel, table=True): diff --git a/src/sc_crawler/vendors/README.md b/src/sc_crawler/vendors/README.md index 4ad53809..5dc2f518 100644 --- a/src/sc_crawler/vendors/README.md +++ b/src/sc_crawler/vendors/README.md @@ -5,23 +5,23 @@ For example, `aws.py` provides functions to be used by its `Vendor` instance, ca Each file should provide the below functions: -- `get_compliance_frameworks`: Define `VendorComplianceLink` instances to describe which frameworks the vendor complies with. Optionally include references in the `comment` field. To avoid duplicating `ComplianceFramework` instances, easiest is to use the `compliance_framework_id` field instead of the `compliance_framework` relationship. -- `get_datacenters`: Define `Datacenter` instances with location, energy source etc for each region/datacenter the vendor has. -- `get_zones`: Define a `Zone` instance for each availability zone of the vendor in each datacenter. -- `get_servers`: Define `Server` instances for the vendor's server/instance types. -- `get_server_prices`: Define the `ServerPrice` instances for the standard/ondemand (or optionally also for the reserved) pricing of the instance types per datacenter and zone. -- `get_server_prices_spot`: Similar to the above, define `ServerPrice` instances but the `allocation` field set to `Allocation.SPOT`. Very likely to see different spot prices per datacenter/zone. -- `get_storage_prices`: Define `StoragePrice` instances to describe the available storage options that can be attached to the servers. -- `get_traffic_prices`: Define `TrafficPrice` instances to describe the pricing of ingress/egress traffic. -- `get_ipv4_prices`: Define `Ipv4Price` instances on the price of an IPv4 address. +- `inventory_compliance_frameworks`: Define `VendorComplianceLink` instances to describe which frameworks the vendor complies with. Optionally include references in the `comment` field. To avoid duplicating `ComplianceFramework` instances, easiest is to use the `compliance_framework_id` field instead of the `compliance_framework` relationship. +- `inventory_datacenters`: Define `Datacenter` instances with location, energy source etc for each region/datacenter the vendor has. +- `inventory_zones`: Define a `Zone` instance for each availability zone of the vendor in each datacenter. +- `inventory_servers`: Define `Server` instances for the vendor's server/instance types. +- `inventory_server_prices`: Define the `ServerPrice` instances for the standard/ondemand (or optionally also for the reserved) pricing of the instance types per datacenter and zone. +- `inventory_server_prices_spot`: Similar to the above, define `ServerPrice` instances but the `allocation` field set to `Allocation.SPOT`. Very likely to see different spot prices per datacenter/zone. +- `inventory_storage_prices`: Define `StoragePrice` instances to describe the available storage options that can be attached to the servers. +- `inventory_traffic_prices`: Define `TrafficPrice` instances to describe the pricing of ingress/egress traffic. +- `inventory_ipv4_prices`: Define `Ipv4Price` instances on the price of an IPv4 address. Each function will be picked up as the related `Vendor` instance's instance methods, so each function should take a single argument, that is the `Vendor` instance. No need to return the objects -- it's enough to define the above-mentioned instances. -If a helper is not needed (e.g. another helper already provides its output, or there are no spot prices), it is still required, but can return early, e.g. if `Zone` objects were populated by `get_datacenters` already, do something like: +If a helper is not needed (e.g. another helper already provides its output, or there are no spot prices), it is still required, but can return early, e.g. if `Zone` objects were populated by `inventory_datacenters` already, do something like: ```python -def get_zones(self): - """Zones were already provided in get_datacenters.""" +def inventory_zones(self): + """Zones were already provided in inventory_datacenters.""" pass ``` @@ -43,38 +43,38 @@ from ..schemas import ( ) -def get_compliance_frameworks(vendor): +def inventory_compliance_frameworks(vendor): pass -def get_datacenters(vendor): +def inventory_datacenters(vendor): pass -def get_zones(vendor): +def inventory_zones(vendor): pass -def get_servers(vendor): +def inventory_servers(vendor): pass -def get_server_prices(vendor): +def inventory_server_prices(vendor): pass -def get_server_prices_spot(vendor): +def inventory_server_prices_spot(vendor): pass -def get_storage_prices(vendor): +def inventory_storage_prices(vendor): pass -def get_traffic_prices(vendor): +def inventory_traffic_prices(vendor): pass -def get_ipv4_prices(vendor): +def inventory_ipv4_prices(vendor): pass ``` diff --git a/src/sc_crawler/vendors/aws.py b/src/sc_crawler/vendors/aws.py index a74d674a..3c84f599 100644 --- a/src/sc_crawler/vendors/aws.py +++ b/src/sc_crawler/vendors/aws.py @@ -353,7 +353,7 @@ def _make_price_from_product(product, vendor): # Public methods to fetch data -def get_compliance_frameworks(vendor): +def inventory_compliance_frameworks(vendor): for compliance_framework in ["hipaa", "soc2t2"]: VendorComplianceLink( vendor=vendor, @@ -361,7 +361,7 @@ def get_compliance_frameworks(vendor): ) -def get_datacenters(vendor): +def inventory_datacenters(vendor): """List all available AWS datacenters. Some data sources are not available from APIs, and were collected manually: @@ -679,7 +679,7 @@ def get_datacenters(vendor): datacenter.vendor.merge_dependent(datacenter) -def get_zones(vendor): +def inventory_zones(vendor): """List all available AWS availability zones.""" for datacenter in vendor.datacenters: if datacenter.status == "active": @@ -692,7 +692,7 @@ def get_zones(vendor): ) -def get_servers(vendor): +def inventory_servers(vendor): # TODO drop this in favor of pricing.get_products, as it has info e.g. on instanceFamily # although other fields are messier (e.g. extract memory from string) for datacenter in vendor.datacenters: @@ -700,7 +700,7 @@ def get_servers(vendor): _list_instance_types_of_region(datacenter.id, vendor) -def get_server_prices(vendor): +def inventory_server_prices(vendor): products = _boto_get_products( service_code="AmazonEC2", filters={ @@ -721,19 +721,19 @@ def get_server_prices(vendor): _make_price_from_product(product, vendor) -def get_server_prices_spot(vendor): +def inventory_server_prices_spot(vendor): pass -def get_storage_prices(vendor): +def inventory_storage_prices(vendor): pass -def get_traffic_prices(vendor): +def inventory_traffic_prices(vendor): pass -def get_ipv4_prices(vendor): +def inventory_ipv4_prices(vendor): products = _boto_get_products( service_code="AmazonVPC", filters={ diff --git a/src/sc_crawler/vendors/gcp.py b/src/sc_crawler/vendors/gcp.py index 2341b4fd..34a5afb5 100644 --- a/src/sc_crawler/vendors/gcp.py +++ b/src/sc_crawler/vendors/gcp.py @@ -1,34 +1,34 @@ -def get_compliance_frameworks(vendor): +def inventory_compliance_frameworks(vendor): pass -def get_datacenters(vendor): +def inventory_datacenters(vendor): pass -def get_zones(vendor): +def inventory_zones(vendor): pass -def get_servers(vendor): +def inventory_servers(vendor): pass -def get_server_prices(vendor): +def inventory_server_prices(vendor): pass -def get_server_prices_spot(vendor): +def inventory_server_prices_spot(vendor): pass -def get_storage_prices(vendor): +def inventory_storage_prices(vendor): pass -def get_traffic_prices(vendor): +def inventory_traffic_prices(vendor): pass -def get_ipv4_prices(vendor): +def inventory_ipv4_prices(vendor): pass