Skip to content

Commit

Permalink
feat: change the time diff option to join two tables internally
Browse files Browse the repository at this point in the history
Signed-off-by: seolmin <[email protected]>
  • Loading branch information
stat-kwon committed Dec 11, 2024
1 parent 1536e83 commit f9e7cbe
Showing 1 changed file with 41 additions and 7 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,9 @@ def __init__(

self.data_unit = options.get("data_unit")
self.timediff = options.get("timediff")
if self.timediff:
self.timediff_data_name = self.timediff["data_name"]

self.group_by = options.get("group_by")
self.filter = options.get("filter")
self.filter_or = options.get("filter_or")
Expand Down Expand Up @@ -94,14 +97,14 @@ def load(
try:
start, end = self._get_time_from_granularity(granularity, start, end)

if self.timediff:
start, end = self._change_query_time(granularity, start, end)

if self.source_type == "COST":
self._analyze_cost(granularity, start, end, vars)
elif self.source_type == "ASSET":
self._analyze_asset(granularity, start, end, vars)

if self.timediff:
self.df = self._apply_timediff(granularity, start, end, vars)

self.state = "AVAILABLE"
self.error_message = None

Expand Down Expand Up @@ -177,13 +180,44 @@ def _analyze_cost(

self.df = pd.DataFrame(results)

def _change_datetime_format(self, results: list) -> list:
def _apply_timediff(
self, granularity: str, start: str, end: str, vars: dict
) -> pd.DataFrame:
origin_df = self.df.copy()
self.data_name = self.timediff.get("data_name")

start, end = self._change_query_time(granularity, start, end)

if self.source_type == "COST":
self._analyze_cost(granularity, start, end, vars)
elif self.source_type == "ASSET":
self._analyze_asset(granularity, start, end, vars)

self.df["Date"] = self.df["Date"].apply(
lambda x: self._change_date_by_timediff(x)
)

origin_label_keys = [
column for column in origin_df.columns if column != self.data_name
]
diff_label_keys = [
column for column in self.df.columns if column != self.timediff_data_name
]
join_keys = list(set(origin_label_keys) & set(diff_label_keys))

merged_df = pd.merge(origin_df, self.df, on=join_keys, how="left")

fill_na = {key: 0 for key in [self.data_name, self.timediff_data_name]}
fill_na.update({key: "" for key in join_keys})
merged_df = merged_df.fillna(value=fill_na)

return merged_df

@staticmethod
def _change_datetime_format(results: list) -> list:
changed_results = []
for result in results:
if date := result.get("date"):
if self.timediff:
date = self._change_date_by_timediff(date)

result["Date"] = date
del result["date"]
changed_results.append(result)
Expand Down

0 comments on commit f9e7cbe

Please sign in to comment.