Skip to content

Commit

Permalink
Merge pull request #9 from SpareCores/DEV-6
Browse files Browse the repository at this point in the history
DEV-6 missing AWS resources
  • Loading branch information
daroczig authored Mar 8, 2024
2 parents 8da47b4 + fc5dc9b commit 673a884
Show file tree
Hide file tree
Showing 6 changed files with 389 additions and 81 deletions.
18 changes: 11 additions & 7 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -70,25 +70,29 @@ Note that you need specific IAM permissions to be able to run the Crawler at the

</details>


Fetch and standardize datacenter, zone, products etc data into a single SQLite file:

```shell
rm sc_crawler.db; sc-crawler pull --cache --log-level DEBUG --include-vendor aws
sc-crawler pull --cache --include-vendor aws
```

Such an up-to-date SQLite database is managed by the Spare Cores team in the
[SC Data](https://github.com/SpareCores/sc-data) repository, or you can also
find it at https://sc-data-public-40e9d310.s3.amazonaws.com/sc-data-all.db.bz2

## Other WIP methods

Read from DB:
Read from previously pulled DB:

```py
from sc_crawler.database import engine
from sc_crawler.schemas import Server
from sqlmodel import Session, select
session = Session(engine)
session.exec(select(Server).where(Server.id == 'trn1.32xlarge')).one()
from sqlmodel import create_engine, Session, select

engine = create_engine("sqlite:///sc_crawler.db")
session = Session(engine)
server = session.exec(select(Server).where(Server.id == 'trn1.32xlarge')).one()

from rich import print as pp
pp(server)
pp(server.vendor)
```
2 changes: 2 additions & 0 deletions src/sc_crawler/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -183,6 +183,8 @@ def custom_serializer(x):
vendor.inventory_server_prices()
if Tables.server_prices_spot in update_table:
vendor.inventory_server_prices_spot()
if Tables.storages in update_table:
vendor.inventory_storages()
if Tables.storage_prices in update_table:
vendor.inventory_storage_prices()
if Tables.traffic_prices in update_table:
Expand Down
46 changes: 20 additions & 26 deletions src/sc_crawler/schemas.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
"""Schemas for vendors, datacenters, zones, and other resources."""


import logging
from datetime import datetime
from enum import Enum
Expand Down Expand Up @@ -111,8 +110,9 @@ def __init__(self, *args, **kwargs):
"""
super().__init__(*args, **kwargs)
if hasattr(self, "vendor"):
if self.vendor.session:
self.vendor.merge_dependent(self)
if self.vendor:
if self.vendor.session:
self.vendor.merge_dependent(self)


class Json(BaseModel):
Expand Down Expand Up @@ -175,6 +175,7 @@ class PriceUnit(str, Enum):
HOUR = "hour"
GIB = "GiB"
GB = "GB"
GB_MONTH = "GB/month"


class PriceTier(Json):
Expand Down Expand Up @@ -394,7 +395,6 @@ class Vendor(HasName, HasIdPK, table=True):
datacenters: List["Datacenter"] = Relationship(back_populates="vendor")
zones: List["Zone"] = Relationship(back_populates="vendor")
storages: List["Storage"] = Relationship(back_populates="vendor")
traffics: List["Traffic"] = Relationship(back_populates="vendor")
servers: List["Server"] = Relationship(back_populates="vendor")
server_prices: List["ServerPrice"] = Relationship(back_populates="vendor")
traffic_prices: List["TrafficPrice"] = Relationship(back_populates="vendor")
Expand Down Expand Up @@ -554,6 +554,11 @@ def inventory_server_prices_spot(self):
)
self._get_methods().inventory_server_prices_spot(self)

@log_start_end
def inventory_storages(self):
self.set_table_rows_inactive(Storage)
self._get_methods().inventory_storages(self)

@log_start_end
def inventory_storage_prices(self):
self.set_table_rows_inactive(StoragePrice)
Expand Down Expand Up @@ -637,7 +642,6 @@ class Zone(HasStatus, HasName, HasDatacenterPK, HasVendorPK, HasIdPK, table=True
class Storage(HasDescription, HasName, HasVendorPK, HasIdPK, table=True):
"""Flexible storage options that can be attached to a Server."""

size: int = Field(default=0, description="Size (GiB) of the overall storage.")
storage_type: StorageType = Field(
description="High-level category of the main storage."
)
Expand All @@ -662,23 +666,6 @@ class Storage(HasDescription, HasName, HasVendorPK, HasIdPK, table=True):
prices: List["StoragePrice"] = Relationship(back_populates="storage")


# TODO this table might not be needed?
# might be better add the "direction" column directly to the TrafficPrice table
class Traffic(HasDescription, HasName, HasVendorPK, HasIdPK, table=True):
"""Extra traffic options tied to a Server."""

direction: TrafficDirection = Field(
description="Direction of the traffic: inbound or outbound."
)
status: Status = Field(
default=Status.ACTIVE,
description="Status of the resource (active or inactive).",
)

vendor: Vendor = Relationship(back_populates="traffics")
prices: List["TrafficPrice"] = Relationship(back_populates="traffic")


class Server(ScModel, table=True):
"""Server types."""

Expand Down Expand Up @@ -808,6 +795,7 @@ class ServerPriceExtraFields(ScModel):
allocation: Allocation = Field(
default=Allocation.ONDEMAND,
description="Allocation method, e.g. on-demand or spot.",
primary_key=True,
)


Expand Down Expand Up @@ -843,16 +831,22 @@ class StoragePrice(StoragePriceBase, table=True):
storage: Storage = Relationship(back_populates="prices")


class TrafficPriceBase(HasPriceFields, HasTraffic, HasDatacenterPK, HasVendorPK):
pass
class TrafficPriceBase(HasDatacenterPK, HasVendorPK):
direction: TrafficDirection = Field(
description="Direction of the traffic: inbound or outbound.",
primary_key=True,
)
status: Status = Field(
default=Status.ACTIVE,
description="Status of the resource (active or inactive).",
)


class TrafficPrice(TrafficPriceBase, table=True):
class TrafficPrice(HasPriceFields, TrafficPriceBase, table=True):
"""Extra Traffic prices in each Datacenter."""

vendor: Vendor = Relationship(back_populates="traffic_prices")
datacenter: Datacenter = Relationship(back_populates="traffic_prices")
traffic: Traffic = Relationship(back_populates="prices")


class Ipv4PriceBase(HasPriceFields, HasDatacenterPK, HasVendorPK):
Expand Down
13 changes: 13 additions & 0 deletions src/sc_crawler/str.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,3 +30,16 @@ def plural(text):
if search("[aeiou]y$", text):
return sub("y$", "ies", text)
return text + "s"


def extract_last_number(s: str) -> float:
"""Extract the last number from a string.
Examples:
>>> extract_last_number("foo42")
42.0
>>> extract_last_number("foo24.42bar")
24.42
"""
match = search(r"([\d\.]+)[^0-9]*$", str(s))
return float(match.group(1)) if match else None
Loading

0 comments on commit 673a884

Please sign in to comment.