diff --git a/deployer/commands/transform/cost_table.py b/deployer/commands/transform/cost_table.py index b4e4d00678..36fe96929f 100644 --- a/deployer/commands/transform/cost_table.py +++ b/deployer/commands/transform/cost_table.py @@ -44,21 +44,24 @@ def aws( """ # Read the CSV file into a pandas dataframe. Skip the first row and this # contains numerical project IDs - the project names begin on the second row. + df = pd.read_csv( + input_path, + skiprows=1, + ) + # We conditionally rename the column names. If '($)' is present in the column # name, we assume this is a linked account name: we strip of any # leading/trailing whitespace and convert to lower case so we have just the # account names and allow for AWS permitting whitespace in them. # Otherwise (i.e. not an account name), we also replace any whitespace with # underscores for easier data cleaning in pandas. - df = pd.read_csv( - input_path, - skiprows=1, - ).rename( + df.rename( columns=lambda col: ( col.lower().strip("($)").strip() if "($)" in col else col.strip().lower().replace(" ", "_") - ) + ), + inplace=True, ) # Ensure values of the linked_account_name column are lower case and any @@ -86,12 +89,20 @@ def aws( # Sort the account names in alphabetical order df.sort_index(inplace=True) + # Transform months from 2024-01-01 to 2024-01 + df.rename( + columns=lambda col: ( + re.match("([0-9]*-[0-9]*)-[0-9]*", col).groups()[0] + if re.match("[0-9]*-[0-9]*-[0-9]*", col) + else col + ), + inplace=True, + ) + if output_path is None: - # Find all the column names that match the regex expression `[0-9]*-[0-9]*-[0-9]*` + # Find all the column names that match the regex expression `[0-9]*-[0-9]*` months = [ - col - for col in df.columns - if re.match("[0-9]*-[0-9]*-[0-9]*", col) is not None + col for col in df.columns if re.match("[0-9]*-[0-9]*", col) is not None ] # Construct output filename diff --git a/docs/howto/bill.md b/docs/howto/bill.md index a4fd13bbb6..b2af900d04 100644 --- a/docs/howto/bill.md +++ b/docs/howto/bill.md @@ -74,8 +74,8 @@ AWS management account. If a future cluster deviates from this, you can tell by ``` This will output a new CSV file to your local filesystem called `AWS_{START_MONTH}_{END_MONTH}.csv`. -2. Upload this CSV file to the [cloud costs folder] -3. Ping the folks in the `#billing` slack channel to let them know the info for dedicated clusters is now available and provide a link to the file you have just uploaded +1. Upload this CSV file to the [cloud costs folder] +1. Ping the folks in the `#billing` slack channel to let them know the info for dedicated clusters is now available and provide a link to the file you have just uploaded [direct link]: https://us-east-1.console.aws.amazon.com/costmanagement/home?region=us-east-1#/cost-explorer?reportId=d826a775-e0d6-4e85-a181-7f87a8deb162&reportName=Monthly%20costs%20by%20linked%20account&isDefault=true&chartStyle=GROUP&historicalRelativeRange=LAST_6_MONTHS&futureRelativeRange=CUSTOM&granularity=Monthly&groupBy=%5B%22LinkedAccount%22%5D&filter=%5B%5D&costAggregate=unBlendedCost&showOnlyUntagged=false&showOnlyUncategorized=false&useNormalizedUnits=false @@ -90,7 +90,7 @@ Currently this is the recommended way of retrieving the costs from GCP. 1. Under time range on the right sidebar, select 'Invoice Month' 1. Select the time range you are interested in. Note that this has to be at least two months right now, or the next step does not work 1. Under 'Group by', select 'Month -> Project'. -1. Under the chart, click the 'Download CSV' button. This downloads a CSV that you can use to later populate the columns in the costs spreadsheet +1. Under the chart, click the 'Download CSV' button. ```{figure} ../images/gcp-billing-ui.png GCP billing UI ``` @@ -101,8 +101,8 @@ Currently this is the recommended way of retrieving the costs from GCP. ``` This will output a new CSV file to your local filesystem called `GCP_{START_MONTH}_{END_MONTH}.csv`. -2. Upload this CSV file to the [cloud costs folder] -3. Ping the folks in the `#billing` slack channel to let them know the info for dedicated clusters is now available and provide a link to the file just uploaded +1. Upload this CSV file to the [cloud costs folder] +1. Ping the folks in the `#billing` slack channel to let them know the info for dedicated clusters is now available and provide a link to the file just uploaded [2i2c billing account]: https://console.cloud.google.com/billing/0157F7-E3EA8C-25AC3C/reports;timeRange=CUSTOM_RANGE;from=2024-01-01;to=2024-01-31;dateType=INVOICE_DATE;invoiceCorrections=TAX,BILLING_MODIFICATION?organizationId=184174754493&project=two-eye-two-see