Skip to content

Commit

Permalink
get prefix -> inventory to reduce chance of conflict with other helpers
Browse files Browse the repository at this point in the history
  • Loading branch information
daroczig committed Feb 23, 2024
1 parent 9a58792 commit 8a2efa9
Show file tree
Hide file tree
Showing 6 changed files with 76 additions and 109 deletions.
33 changes: 0 additions & 33 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -92,36 +92,3 @@ server = session.exec(select(Server).where(Server.id == 'trn1.32xlarge')).one()
pp(server)
pp(server.vendor)
```

Lower level access examples:

```py
from sc_crawler.vendors import aws

# enable persistent caching of AWS queries
from cachier import set_default_params
set_default_params(caching_enabled=True)

# fetch data
aws.get_all() # slow to query all instance types in all regions

# look around
aws.datacenters
aws.zones

# pretty printed objects
from rich import print as pp
pp(aws)
pp(aws.datacenters)
pp(aws.servers[0])
```

Debug raw AWS responses:

```py
products = aws._methods.get_products()
pp(products[1]["product"])

instance_types = aws._methods.describe_instance_types(region="us-west-2")
pp(instance_types[1])
```
20 changes: 10 additions & 10 deletions src/sc_crawler/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@
log_levels = list(logging._nameToLevel.keys())
LogLevels = Enum("LOGLEVELS", {k: k for k in log_levels})

supported_tables = [m[4:] for m in dir(Vendor) if m.startswith("get_")]
supported_tables = [m[10:] for m in dir(Vendor) if m.startswith("inventory_")]
Tables = Enum("TABLES", {k: k for k in supported_tables})


Expand Down Expand Up @@ -147,23 +147,23 @@ def custom_serializer(x):
vendor = session.merge(vendor)
vendor.set_session(session)
if Tables.compliance_frameworks in update_table:
vendor.get_compliance_frameworks()
vendor.inventory_compliance_frameworks()
if Tables.datacenters in update_table:
vendor.get_datacenters()
vendor.inventory_datacenters()
if Tables.zones in update_table:
vendor.get_zones()
vendor.inventory_zones()
if Tables.servers in update_table:
vendor.get_servers()
vendor.inventory_servers()
if Tables.server_prices in update_table:
vendor.get_server_prices()
vendor.inventory_server_prices()
if Tables.server_prices_spot in update_table:
vendor.get_server_prices_spot()
vendor.inventory_server_prices_spot()
if Tables.storage_prices in update_table:
vendor.get_storage_prices()
vendor.inventory_storage_prices()
if Tables.traffic_prices in update_table:
vendor.get_traffic_prices()
vendor.inventory_traffic_prices()
if Tables.ipv4_prices in update_table:
vendor.get_ipv4_prices()
vendor.inventory_ipv4_prices()
session.merge(vendor)
session.commit()

Expand Down
54 changes: 27 additions & 27 deletions src/sc_crawler/schemas.py
Original file line number Diff line number Diff line change
Expand Up @@ -245,15 +245,15 @@ def __init__(self, **kwargs):
# make sure methods are provided
methods = self._get_methods().__dir__()
for method in [
"get_compliance_frameworks",
"get_datacenters",
"get_zones",
"get_servers",
"get_server_prices",
"get_server_prices_spot",
"get_storage_prices",
"get_traffic_prices",
"get_ipv4_prices",
"inventory_compliance_frameworks",
"inventory_datacenters",
"inventory_zones",
"inventory_servers",
"inventory_server_prices",
"inventory_server_prices_spot",
"inventory_storage_prices",
"inventory_traffic_prices",
"inventory_ipv4_prices",
]:
if method not in methods:
raise NotImplementedError(
Expand Down Expand Up @@ -307,59 +307,59 @@ def set_table_rows_inactive(self, model: str, *args) -> None:
self._session.execute(query.values(status=Status.INACTIVE))

@log_start_end
def get_compliance_frameworks(self):
def inventory_compliance_frameworks(self):
"""Get the vendor's all compliance frameworks."""
self.set_table_rows_inactive(VendorComplianceLink)
self._get_methods().get_compliance_frameworks(self)
self._get_methods().inventory_compliance_frameworks(self)

@log_start_end
def get_datacenters(self):
def inventory_datacenters(self):
"""Get the vendor's all datacenters."""
self.set_table_rows_inactive(Datacenter)
self._get_methods().get_datacenters(self)
self._get_methods().inventory_datacenters(self)

@log_start_end
def get_zones(self):
def inventory_zones(self):
"""Get all the zones in the vendor's datacenters."""
self.set_table_rows_inactive(Zone)
self._get_methods().get_zones(self)
self._get_methods().inventory_zones(self)

@log_start_end
def get_servers(self):
def inventory_servers(self):
"""Get the vendor's all server types."""
self.set_table_rows_inactive(Server)
self._get_methods().get_servers(self)
self._get_methods().inventory_servers(self)

@log_start_end
def get_server_prices(self):
def inventory_server_prices(self):
"""Get the current standard/ondemand/reserved prices of all server types."""
self.set_table_rows_inactive(
ServerPrice, ServerPrice.allocation != Allocation.SPOT
)
self._get_methods().get_server_prices(self)
self._get_methods().inventory_server_prices(self)

@log_start_end
def get_server_prices_spot(self):
def inventory_server_prices_spot(self):
"""Get the current spot prices of all server types."""
self.set_table_rows_inactive(
ServerPrice, ServerPrice.allocation == Allocation.SPOT
)
self._get_methods().get_server_prices_spot(self)
self._get_methods().inventory_server_prices_spot(self)

@log_start_end
def get_storage_prices(self):
def inventory_storage_prices(self):
self.set_table_rows_inactive(StoragePrice)
self._get_methods().get_storage_prices(self)
self._get_methods().inventory_storage_prices(self)

@log_start_end
def get_traffic_prices(self):
def inventory_traffic_prices(self):
self.set_table_rows_inactive(TrafficPrice)
self._get_methods().get_traffic_prices(self)
self._get_methods().inventory_traffic_prices(self)

@log_start_end
def get_ipv4_prices(self):
def inventory_ipv4_prices(self):
self.set_table_rows_inactive(Ipv4Price)
self._get_methods().get_ipv4_prices(self)
self._get_methods().inventory_ipv4_prices(self)


class Datacenter(ScModel, table=True):
Expand Down
42 changes: 21 additions & 21 deletions src/sc_crawler/vendors/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,23 +5,23 @@ For example, `aws.py` provides functions to be used by its `Vendor` instance, ca

Each file should provide the below functions:

- `get_compliance_frameworks`: Define `VendorComplianceLink` instances to describe which frameworks the vendor complies with. Optionally include references in the `comment` field. To avoid duplicating `ComplianceFramework` instances, easiest is to use the `compliance_framework_id` field instead of the `compliance_framework` relationship.
- `get_datacenters`: Define `Datacenter` instances with location, energy source etc for each region/datacenter the vendor has.
- `get_zones`: Define a `Zone` instance for each availability zone of the vendor in each datacenter.
- `get_servers`: Define `Server` instances for the vendor's server/instance types.
- `get_server_prices`: Define the `ServerPrice` instances for the standard/ondemand (or optionally also for the reserved) pricing of the instance types per datacenter and zone.
- `get_server_prices_spot`: Similar to the above, define `ServerPrice` instances but the `allocation` field set to `Allocation.SPOT`. Very likely to see different spot prices per datacenter/zone.
- `get_storage_prices`: Define `StoragePrice` instances to describe the available storage options that can be attached to the servers.
- `get_traffic_prices`: Define `TrafficPrice` instances to describe the pricing of ingress/egress traffic.
- `get_ipv4_prices`: Define `Ipv4Price` instances on the price of an IPv4 address.
- `inventory_compliance_frameworks`: Define `VendorComplianceLink` instances to describe which frameworks the vendor complies with. Optionally include references in the `comment` field. To avoid duplicating `ComplianceFramework` instances, easiest is to use the `compliance_framework_id` field instead of the `compliance_framework` relationship.
- `inventory_datacenters`: Define `Datacenter` instances with location, energy source etc for each region/datacenter the vendor has.
- `inventory_zones`: Define a `Zone` instance for each availability zone of the vendor in each datacenter.
- `inventory_servers`: Define `Server` instances for the vendor's server/instance types.
- `inventory_server_prices`: Define the `ServerPrice` instances for the standard/ondemand (or optionally also for the reserved) pricing of the instance types per datacenter and zone.
- `inventory_server_prices_spot`: Similar to the above, define `ServerPrice` instances but the `allocation` field set to `Allocation.SPOT`. Very likely to see different spot prices per datacenter/zone.
- `inventory_storage_prices`: Define `StoragePrice` instances to describe the available storage options that can be attached to the servers.
- `inventory_traffic_prices`: Define `TrafficPrice` instances to describe the pricing of ingress/egress traffic.
- `inventory_ipv4_prices`: Define `Ipv4Price` instances on the price of an IPv4 address.

Each function will be picked up as the related `Vendor` instance's instance methods, so each function should take a single argument, that is the `Vendor` instance. No need to return the objects -- it's enough to define the above-mentioned instances.

If a helper is not needed (e.g. another helper already provides its output, or there are no spot prices), it is still required, but can return early, e.g. if `Zone` objects were populated by `get_datacenters` already, do something like:
If a helper is not needed (e.g. another helper already provides its output, or there are no spot prices), it is still required, but can return early, e.g. if `Zone` objects were populated by `inventory_datacenters` already, do something like:

```python
def get_zones(self):
"""Zones were already provided in get_datacenters."""
def inventory_zones(self):
"""Zones were already provided in inventory_datacenters."""
pass
```

Expand All @@ -43,38 +43,38 @@ from ..schemas import (
)


def get_compliance_frameworks(vendor):
def inventory_compliance_frameworks(vendor):
pass


def get_datacenters(vendor):
def inventory_datacenters(vendor):
pass


def get_zones(vendor):
def inventory_zones(vendor):
pass


def get_servers(vendor):
def inventory_servers(vendor):
pass


def get_server_prices(vendor):
def inventory_server_prices(vendor):
pass


def get_server_prices_spot(vendor):
def inventory_server_prices_spot(vendor):
pass


def get_storage_prices(vendor):
def inventory_storage_prices(vendor):
pass


def get_traffic_prices(vendor):
def inventory_traffic_prices(vendor):
pass


def get_ipv4_prices(vendor):
def inventory_ipv4_prices(vendor):
pass
```
18 changes: 9 additions & 9 deletions src/sc_crawler/vendors/aws.py
Original file line number Diff line number Diff line change
Expand Up @@ -353,15 +353,15 @@ def _make_price_from_product(product, vendor):
# Public methods to fetch data


def get_compliance_frameworks(vendor):
def inventory_compliance_frameworks(vendor):
for compliance_framework in ["hipaa", "soc2t2"]:
VendorComplianceLink(
vendor=vendor,
compliance_framework_id=compliance_framework,
)


def get_datacenters(vendor):
def inventory_datacenters(vendor):
"""List all available AWS datacenters.
Some data sources are not available from APIs, and were collected manually:
Expand Down Expand Up @@ -679,7 +679,7 @@ def get_datacenters(vendor):
datacenter.vendor.merge_dependent(datacenter)


def get_zones(vendor):
def inventory_zones(vendor):
"""List all available AWS availability zones."""
for datacenter in vendor.datacenters:
if datacenter.status == "active":
Expand All @@ -692,15 +692,15 @@ def get_zones(vendor):
)


def get_servers(vendor):
def inventory_servers(vendor):
# TODO drop this in favor of pricing.get_products, as it has info e.g. on instanceFamily
# although other fields are messier (e.g. extract memory from string)
for datacenter in vendor.datacenters:
if datacenter.status == "active":
_list_instance_types_of_region(datacenter.id, vendor)


def get_server_prices(vendor):
def inventory_server_prices(vendor):
products = _boto_get_products(
service_code="AmazonEC2",
filters={
Expand All @@ -721,19 +721,19 @@ def get_server_prices(vendor):
_make_price_from_product(product, vendor)


def get_server_prices_spot(vendor):
def inventory_server_prices_spot(vendor):
pass


def get_storage_prices(vendor):
def inventory_storage_prices(vendor):
pass


def get_traffic_prices(vendor):
def inventory_traffic_prices(vendor):
pass


def get_ipv4_prices(vendor):
def inventory_ipv4_prices(vendor):
products = _boto_get_products(
service_code="AmazonVPC",
filters={
Expand Down
18 changes: 9 additions & 9 deletions src/sc_crawler/vendors/gcp.py
Original file line number Diff line number Diff line change
@@ -1,34 +1,34 @@
def get_compliance_frameworks(vendor):
def inventory_compliance_frameworks(vendor):
pass


def get_datacenters(vendor):
def inventory_datacenters(vendor):
pass


def get_zones(vendor):
def inventory_zones(vendor):
pass


def get_servers(vendor):
def inventory_servers(vendor):
pass


def get_server_prices(vendor):
def inventory_server_prices(vendor):
pass


def get_server_prices_spot(vendor):
def inventory_server_prices_spot(vendor):
pass


def get_storage_prices(vendor):
def inventory_storage_prices(vendor):
pass


def get_traffic_prices(vendor):
def inventory_traffic_prices(vendor):
pass


def get_ipv4_prices(vendor):
def inventory_ipv4_prices(vendor):
pass

0 comments on commit 8a2efa9

Please sign in to comment.