generated from Cloudzero/template-cloudzero-open-source
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Initial move from internal tool to CZ repo
- Loading branch information
1 parent
e9e70ae
commit b27e09c
Showing
5 changed files
with
399 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,10 @@ | ||
FROM public.ecr.aws/lambda/python:3.9 | ||
|
||
COPY requirements.txt . | ||
RUN pip3 install -r requirements.txt --target ${LAMBDA_TASK_ROOT} | ||
|
||
COPY lambda.py ${LAMBDA_TASK_ROOT} | ||
COPY anycostoci.py ${LAMBDA_TASK_ROOT} | ||
|
||
CMD ["lambda.anycost"] | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,210 @@ | ||
import calendar | ||
import json | ||
import re | ||
import oci | ||
import datetime | ||
from datetime import datetime | ||
from datetime import timedelta | ||
from datetime import date | ||
import dateutil | ||
from dateutil import relativedelta | ||
import sys | ||
import os | ||
import gzip | ||
import pandas | ||
|
||
def __create_or_verify_bdid_folder(start_date: datetime, output_dir: str, drop_id: str): | ||
# Snap to the first of the month that start_date is in | ||
first_of_the_month = start_date.replace(day=1) | ||
# Calculate the last date of that month | ||
last_date_of_the_month = calendar.monthrange(start_date.year, start_date.month)[1] | ||
# Make a datetime for that last date. | ||
last_of_the_month = start_date.replace(day=last_date_of_the_month) | ||
# Add 1 day to get the first of the next month | ||
first_of_next_month = last_of_the_month + timedelta(days=1) | ||
|
||
# Build billing data ID - "YYYYMMDD-YYYMMDD" of 1st of data month to 1st of following month | ||
bdid_segment_1 = first_of_the_month.date().strftime("%Y%m%d") | ||
bdid_segment_2 = first_of_next_month.strftime("%Y%m%d") | ||
|
||
billing_data_id = bdid_segment_1 + "-" + bdid_segment_2 | ||
bdid_path = os.path.join(output_dir, billing_data_id) | ||
drop_path = os.path.join(bdid_path, drop_id) | ||
|
||
if not os.path.exists(drop_path): | ||
print(f"Creating directory {drop_path}") | ||
os.makedirs(drop_path, exist_ok = True) | ||
|
||
return drop_path | ||
|
||
|
||
def __months_lookback(lookback_months: int) -> dict: | ||
"""How many previous months to look back and stop. Returns tuple of <date>, <date>""" | ||
|
||
start_date = datetime.utcnow().date() - dateutil.relativedelta.relativedelta(months=lookback_months) | ||
start_date = start_date.replace(day=1) | ||
|
||
# end_date = datetime.utcnow().date() - dateutil.relativedelta.relativedelta(months=1) | ||
last_day_of_the_month = calendar.monthrange(start_date.year, start_date.month)[1] | ||
end_date = start_date.replace(day=last_day_of_the_month) | ||
|
||
print(f"Eval dates: {start_date} to {end_date}") | ||
return start_date, end_date | ||
|
||
|
||
# 0 lookback starts from first of current month to today | ||
# 1 lookback starts from 1st of previous month to last day of previous month | ||
# 2 lookback starts from 1st of next-previous month to last day of that month | ||
def download_oci_cost_files(lookback_months: int, oci_config = {}, output_dir = '/tmp/' ) -> slice: | ||
"""Download OCI cost reports between start_date and end_date. Returns slice of downloaded filenames on success.""" | ||
oci.config.validate_config(oci_config) | ||
|
||
object_storage = oci.object_storage.ObjectStorageClient(oci_config) | ||
|
||
start_date, end_date = __months_lookback(lookback_months) | ||
# Extend the end date since the last day of the month reports in the | ||
# following month. | ||
report_end_date = end_date + timedelta(days=3) | ||
|
||
# TODO: the point of pagination is to make repeated calls. Push this paginating into the fetch/comparison loop function | ||
report_bucket_objects = oci.pagination.list_call_get_all_results( | ||
object_storage.list_objects, | ||
'bling', | ||
oci_config['tenancy'], | ||
fields="name,timeCreated,size", | ||
prefix="reports/cost-csv" | ||
) | ||
|
||
downloaded_reports = [] | ||
for o in report_bucket_objects.data.objects: | ||
# print(f"Report Created: {o.time_created.date()} Earliest date: {datetime.strptime('2022-01-01', '%Y-%m-%d').date()}") | ||
if (o.time_created.date() >= start_date and | ||
o.time_created.date() <= report_end_date ): | ||
this_report = object_storage.get_object('bling', oci_config['tenancy'], o.name) | ||
filename_path = os.path.join(output_dir, o.time_created.strftime("%Y%m%d%H%M%S%Z") + ".csv.gz") | ||
with open(filename_path, "wb", ) as f: | ||
for chunk in this_report.data.raw.stream(1024 * 1024, decode_content=False): | ||
f.write(chunk) | ||
downloaded_reports.append(filename_path) | ||
print(f"File {filename_path} Downloaded - created {o.time_created}") | ||
|
||
|
||
return downloaded_reports | ||
|
||
def build_anycost_drop_from_oci_files(lookback_months: int, | ||
oci_cost_files_dir = '/tmp/', | ||
output_dir = '/tmp/anycost_drop/') -> slice: | ||
"""Take a directory of gzipped OCI cost reports and build an AnyCost drop out of them. | ||
Evaluates the files in oci_cost_files_dir to see their begin/end dates. | ||
Creates a CBF-drop-formatted directory and file structure in output_dir. | ||
Creates a CBF manifest.json pointing to the new files. | ||
Returns a set of paths to created billing data ID folders under output_dir | ||
""" | ||
# CBF drop folder structure is like: | ||
# output_dir/<billing_data_id>/<drop_id>/<data_file> | ||
# Ex: | ||
# output_dir/20220101-20220201/20220128000000Z/data_file[0...N].csv.gz | ||
# output_dir/20220101-20220201/manifest.json | ||
|
||
start_date, end_date = __months_lookback(lookback_months) | ||
|
||
drop_id = datetime.utcnow().strftime("%Y%m%d%H%M%S%Z") | ||
drop_paths = set() | ||
|
||
for root, dirs, cost_files in os.walk(oci_cost_files_dir): | ||
# It would be swell if this yielded the files in order. | ||
# The filenames are ordered numbers and we could display progress | ||
for cost_file in cost_files: | ||
if not re.match(".*\.csv\.gz$", cost_file): | ||
continue | ||
|
||
with gzip.open(os.path.join(root, cost_file), 'rb') as f: | ||
print(f"Processing file {root}/{cost_file}...") | ||
try: | ||
oci_cost = pandas.read_csv(f) | ||
except pandas.errors.EmptyDataError: | ||
print(f"No rows read from file {root}/{cost_file}") | ||
|
||
# Start building the CBF formatted frame | ||
cbf_frame = pandas.DataFrame([]) | ||
|
||
cbf_frame.insert(0, 'lineitem/id', oci_cost.loc[:, 'lineItem/referenceNo']) | ||
# AFAICT all cost types in OCI are 'Usage', with the possible | ||
# exception of 'Adjustment's for rows with isCorrection=True. | ||
# Depending on how corrections are handled we may not need | ||
# to show that. | ||
cbf_frame.insert(1, 'lineitem/type', 'Usage') | ||
cbf_frame.insert(2, 'lineitem/description', oci_cost.loc[:, 'product/Description']) | ||
cbf_frame.insert(3, 'time/usage_start', oci_cost.loc[:, 'lineItem/intervalUsageStart']) | ||
cbf_frame.insert(4, 'time/usage_end', oci_cost.loc[:, 'lineItem/intervalUsageEnd']) | ||
cbf_frame.insert(5, 'resource/id', oci_cost.loc[:, 'product/resourceId']) | ||
cbf_frame.insert(6, 'resource/service', oci_cost.loc[:, 'product/service']) | ||
cbf_frame.insert(7, 'resource/account', oci_cost.loc[:, 'lineItem/tenantId']) | ||
cbf_frame.insert(8, 'resource/region', oci_cost.loc[:, 'product/region']) | ||
cbf_frame.insert(9, 'action/account', oci_cost.loc[:, 'lineItem/tenantId']) | ||
cbf_frame.insert(10, 'usage/amount', oci_cost.loc[:, 'usage/billedQuantity']) | ||
cbf_frame.insert(11, 'cost/cost', oci_cost.loc[:, 'cost/myCost']) | ||
|
||
#Tags | ||
for c in oci_cost.columns: | ||
match = re.match('^tags\/(?P<tag_key>.*)', c) | ||
if match: | ||
oci_tag_key = match.group('tag_key') | ||
oci_tag_key_cleaned = re.sub("[^a-zA-Z0-9\_\.\:\+\@\=\-\/]+", '', oci_tag_key) | ||
|
||
if len(oci_tag_key) != len(oci_tag_key_cleaned): | ||
print("Warning: Some characters were stripped from OCI tag column.") | ||
print(f"Column '{oci_tag_key}' contained invalid characters.") | ||
|
||
tag_column = "resource/tag:" + oci_tag_key_cleaned | ||
cbf_frame.insert(len(cbf_frame.columns), tag_column, oci_cost.loc[:, c]) | ||
|
||
# This section prunes the CBF frames to contain only rows with | ||
# usage_start dates within the BDID boundary. | ||
|
||
# Format the usage timestamps so we can parse them | ||
cbf_frame['time/usage_start'] = pandas.to_datetime(cbf_frame['time/usage_start'], cache=True) | ||
cbf_frame['time/usage_end'] = pandas.to_datetime(cbf_frame['time/usage_end'], cache=True) | ||
|
||
# Create new date-only timestamp columns so we can look at those for pruning | ||
# note the .dt property refers to the datetime object inside the column | ||
cbf_frame['time/usage_start_date'] = cbf_frame['time/usage_start'].dt.date | ||
cbf_frame['time/usage_end_date'] = cbf_frame['time/usage_end'].dt.date | ||
|
||
# CBF treats all usage with start dates within the BDID window | ||
# as valid for that window. So we look at the start date of | ||
# every row to see whether it belongs within the window. | ||
if start_date: # conditional here since @start_date is inside the string | ||
cbf_frame.query('`time/usage_start_date` >= @start_date', inplace=True) | ||
|
||
if end_date: | ||
cbf_frame.query('`time/usage_start_date` <= @end_date', inplace=True) | ||
|
||
# Finally, let's drop the _date columns since they don't belong | ||
# in the output. | ||
cbf_frame.drop(columns=['time/usage_start_date', 'time/usage_end_date'], inplace=True) | ||
|
||
# Dump to disk, assuming we have any rows left | ||
if len(cbf_frame) > 0: | ||
drop_path = __create_or_verify_bdid_folder(cbf_frame.head(1).iat[0,3], output_dir, drop_id) | ||
cbf_file_path = os.path.join(output_dir, drop_path, os.path.basename(cost_file)) | ||
print(f"Writing CBF file to {cbf_file_path}") | ||
cbf_frame.to_csv(cbf_file_path, index=False) | ||
drop_paths.add(drop_path) | ||
else: | ||
print(f"No rows remaining after date window prune in file {cost_file}") | ||
|
||
# Emit manifest pointing at our current drop. | ||
# There should only be 1, despite the for statement. | ||
for d in drop_paths: | ||
manifest = { | ||
"version": "1.0.0", | ||
"current_drop_id": drop_id | ||
} | ||
|
||
with open(os.path.join(os.path.dirname(d), "manifest.json"), 'w') as f: | ||
json.dump(manifest, f) | ||
|
||
return drop_paths |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,76 @@ | ||
#!/usr/bin/env python3.9 | ||
|
||
import argparse | ||
import json | ||
import oci | ||
import boto3 | ||
import sys | ||
import os | ||
import gzip | ||
from datetime import datetime | ||
from datetime import date | ||
from datetime import timedelta | ||
import calendar | ||
import pandas | ||
import tempfile | ||
import anycostoci | ||
|
||
parser = argparse.ArgumentParser() | ||
|
||
# AnyCost side configuration, for writing to s3 | ||
parser.add_argument("-aki", "--access-key-id") | ||
parser.add_argument("-sak", "--secret-access-key") | ||
parser.add_argument("-b", "--anycost-s3-bucket") | ||
|
||
# OCI side configuration, where is my config file | ||
# Config should contain user OCID, tenancy OCID, and keyfile location | ||
parser.add_argument("-o", "--oci-config-file") | ||
|
||
# Temp dir stores the OCI cost files, output is the actual drop. | ||
parser.add_argument("-t", "--temp-dir", default="/tmp/") | ||
parser.add_argument("-d", "--output-dir", default="/tmp/anycost_drop") | ||
|
||
# Months of history to evaluate and store in AnyCost drop | ||
parser.add_argument("-m", "--lookback-months", default=1, type=int) | ||
|
||
args = parser.parse_args() | ||
|
||
print(f"Args given: {args}") | ||
|
||
# temp_dir = tempfile.TemporaryDirectory(dir=args.temp_dir) | ||
temp_dir = '/tmp/' | ||
|
||
# Filesystem sanity check | ||
try: | ||
oci_write_dir = os.path.join(temp_dir, "oci_cost_files") | ||
os.makedirs(oci_write_dir, exist_ok=True) | ||
anycost_drop_dir = args.output_dir | ||
os.makedirs(anycost_drop_dir) | ||
except FileExistsError as fee: | ||
print(f"Path exists: {anycost_drop_dir}") | ||
exit(1) | ||
|
||
oci_file = "" | ||
if args.oci_config_file == None: | ||
oci_file = oci.config.DEFAULT_LOCATION | ||
else: | ||
oci_file = args.oci_config_file | ||
|
||
# Hydrate OCI config and download --lookback-months worth of cost files | ||
|
||
oci_config = oci.config.from_file(oci_file, oci.config.DEFAULT_PROFILE) | ||
print(f"OCI Config: {oci_config}") | ||
|
||
downloaded_reports = anycostoci.download_oci_cost_files( | ||
args.lookback_months, | ||
oci_config = oci_config, | ||
output_dir = oci_write_dir) | ||
|
||
output_paths = anycostoci.build_anycost_drop_from_oci_files( | ||
args.lookback_months, | ||
oci_cost_files_dir = oci_write_dir, | ||
output_dir = anycost_drop_dir | ||
) | ||
|
||
print("Created drops in:") | ||
print(output_paths) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,98 @@ | ||
import anycostoci | ||
import sys | ||
import os | ||
import datetime | ||
import boto3 | ||
import s3fs | ||
import oci | ||
import tempfile | ||
from datetime import timedelta | ||
from datetime import datetime | ||
|
||
# Call with event data to specify lookback: | ||
# curl -d '{"lookback_months": 1 }' for example. | ||
|
||
def __load_oci_config(params_path: str) -> dict: | ||
client = boto3.client('ssm') | ||
config = {} | ||
|
||
config['user'] = client.get_parameter( | ||
Name = params_path + "oci-user")['Parameter']['Value'] | ||
config['key_content'] = client.get_parameter( | ||
Name = params_path + "oci-key-content", | ||
WithDecryption = True )['Parameter']['Value'] | ||
config['fingerprint'] = client.get_parameter( | ||
Name = params_path + "oci-key-fingerprint")['Parameter']['Value'] | ||
config['tenancy'] = client.get_parameter( | ||
Name = params_path + "oci-tenancy" )['Parameter']['Value'] | ||
config['region'] = client.get_parameter( | ||
Name = params_path + "oci-region" )['Parameter']['Value'] | ||
oci.config.validate_config(config) | ||
return config | ||
|
||
def anycost(event, context): | ||
|
||
# hydrate the OCI configuration for downloading | ||
params_path = os.environ.get('SSM_PARAMETER_STORE_FOLDER_PATH') | ||
oci_config = __load_oci_config(params_path) | ||
|
||
# hydrate the S3 config | ||
ssm = boto3.client('ssm') | ||
cbf_s3_bucket = ssm.get_parameter(Name=params_path+'s3-bucket')['Parameter']['Value'] | ||
cbf_s3_prefix = ssm.get_parameter(Name=params_path+'s3-bucket-prefix')['Parameter']['Value'] | ||
|
||
# Check event arguments | ||
lookback_months = 1 | ||
if 'lookback_months' in event: | ||
lookback_months = event['lookback_months'] | ||
print(f"Looking back {lookback_months} months ago") | ||
|
||
# temp dir management for Lambda temp storage | ||
temp_dir = tempfile.TemporaryDirectory(dir="/tmp/") | ||
try: | ||
oci_write_dir = os.path.join(temp_dir.name, "oci_cost_files") | ||
os.makedirs(oci_write_dir, exist_ok=True) | ||
anycost_drop_dir = os.path.join(temp_dir.name, "anycost_drop") | ||
os.makedirs(anycost_drop_dir, exist_ok=True) | ||
|
||
anycostoci.download_oci_cost_files( | ||
lookback_months = lookback_months, | ||
oci_config = oci_config, | ||
output_dir = oci_write_dir | ||
) | ||
|
||
output_drops = anycostoci.build_anycost_drop_from_oci_files( | ||
lookback_months = lookback_months, | ||
oci_cost_files_dir = oci_write_dir, | ||
output_dir = anycost_drop_dir, | ||
) | ||
|
||
# output_drops: | ||
# {'/tmp/tmp425_p9ui/anycost_drop/20230101-20230201/20230130225601'} | ||
print(output_drops) | ||
|
||
# walk the output_drops and plop them all in the target s3 bucket | ||
s3 = s3fs.S3FileSystem() | ||
for drop in output_drops: | ||
drop_id = os.path.basename(drop) | ||
drop_bdid_dir = os.path.dirname(drop) | ||
drop_bdid = os.path.basename(drop_bdid_dir) | ||
|
||
# should be /bucket/prefix/20230101-20230201/ | ||
s3_drop_path = os.path.join(cbf_s3_bucket, cbf_s3_prefix, drop_bdid, drop_id + "/") | ||
|
||
# when new month used, didn't create drop_id prefix, it just put the files directly in the bdid prefix | ||
print(f"Putting dir: {drop} to S3 path {s3_drop_path}") | ||
s3.put(drop, s3_drop_path, recursive=True) | ||
|
||
# manifest.json | ||
#should be /tmp/tmp425_p9ui/anycost_drop/20230101-20230201/manifest.json | ||
manifest_tmp_path = os.path.join(drop_bdid_dir, "manifest.json") | ||
manifest_s3_path = os.path.join(cbf_s3_bucket, cbf_s3_prefix, drop_bdid, 'manifest.json') | ||
print(f"Putting {manifest_tmp_path} to S3 path {manifest_s3_path}") | ||
s3.put_file(manifest_tmp_path, manifest_s3_path) | ||
|
||
|
||
# cleanup for next invocation | ||
finally: | ||
temp_dir.cleanup() |
Oops, something went wrong.