Skip to content

Commit

Permalink
Assume upgrades are string
Browse files Browse the repository at this point in the history
  • Loading branch information
rajeee committed May 8, 2024
1 parent 80047fc commit 85e99c3
Show file tree
Hide file tree
Showing 11 changed files with 406 additions and 139 deletions.
2 changes: 1 addition & 1 deletion buildstock_query/aggregate_query.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ def aggregate_annual(self, *,
enduse_selection = [safunc.sum(enduse * total_weight).label(self._bsq._simple_label(enduse.name))
for enduse in enduse_cols]
if params.get_quartiles:
enduse_selection += [sa.func.approx_percentile(enduse, [0, 0.02, 0.25, 0.5, 0.75, 0.98, 1]).label(
enduse_selection += [sa.func.approx_percentile(enduse, [0, 0.02, 0.1, 0.25, 0.5, 0.75, 0.9, 0.98, 1]).label(
f"{self._bsq._simple_label(enduse.name)}__quartiles") for enduse in enduse_cols]

if params.get_nonzero_count:
Expand Down
11 changes: 7 additions & 4 deletions buildstock_query/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -399,14 +399,17 @@ def get_upgrades_csv(self, *, upgrade_id: Union[str, int] = '0',
logger.info("Making results_csv query for upgrade ...")
return self.execute(query).set_index(self.bs_bldgid_column.name)

def _download_upgrades_csv(self, upgrade_id: int) -> str:
def _download_upgrades_csv(self, upgrade_id: Union[int, str]) -> str:
""" Downloads the upgrades csv from s3 and returns the path to the downloaded file.
"""
if self.up_table is None:
raise ValueError("This run has no upgrades")

available_upgrades = list(self.get_available_upgrades())
available_upgrades.remove('0')
if isinstance(upgrade_id, int):
upgrade_id = f"{upgrade_id:02}"

if str(upgrade_id) not in available_upgrades:
raise ValueError(f"Upgrade {upgrade_id} not found")

Expand All @@ -428,8 +431,8 @@ def _download_upgrades_csv(self, upgrade_id: int) -> str:
raise ValueError(f"Results parquet not found in s3 at {upgrades_path}")
# out of the contents find the key with name matching the pattern results_up{upgrade_id}.parquet
matching_files = [path['Key'] for path in s3_data['Contents']
if f"up{upgrade_id:02}.parquet" in path['Key'] or
f"upgrade{upgrade_id:02}.parquet" in path['Key']]
if f"up{upgrade_id}.parquet" in path['Key'] or
f"upgrade{upgrade_id}.parquet" in path['Key']]
if len(matching_files) > 1:
raise ValueError(f"Multiple results parquet found in s3 at {upgrades_path} for upgrade {upgrade_id}."
f"These files matched: {matching_files}")
Expand All @@ -440,7 +443,7 @@ def _download_upgrades_csv(self, upgrade_id: int) -> str:
self._aws_s3.download_file(bucket, matching_files[0], local_copy_path)
return local_copy_path

def get_upgrades_csv_full(self, upgrade_id: int) -> pd.DataFrame:
def get_upgrades_csv_full(self, upgrade_id: Union[int, str]) -> pd.DataFrame:
""" Returns the full results csv table for upgrades. This is the same as get_upgrades_csv without any
restrictions. It uses the stored parquet files in s3 to download the results which is faster than querying
athena.
Expand Down
32 changes: 16 additions & 16 deletions buildstock_query/report_query.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ def _get_bs_success_report(self, get_query_only: bool = False):
return self._bsq._compile(bs_query)
df = self._bsq.execute(bs_query)
df = self._rename_completed_status_column(df)
df.insert(0, 'upgrade', 0)
df.insert(0, 'upgrade', '0')
return self._process_report(df)

@typing.overload
Expand Down Expand Up @@ -93,7 +93,7 @@ def _get_change_report(self, get_query_only: bool = False):
for chng_type, query in zip(chng_types, queries):
df = self._bsq.execute(query)
df.rename(columns={"change": chng_type}, inplace=True)
df['upgrade'] = df['upgrade'].map(int)
# df['upgrade'] = df['upgrade'].map(int)
df = df.set_index('upgrade').sort_index()
change_df = change_df.join(df, how='outer') if len(change_df) > 0 else df
change_df = change_df.fillna(0)
Expand All @@ -103,7 +103,7 @@ def _get_change_report(self, get_query_only: bool = False):
return change_df

@validate_arguments(config=dict(arbitrary_types_allowed=True, smart_union=True))
def print_change_details(self, upgrade_id: int, yml_file: str, opt_sat_path: str,
def print_change_details(self, upgrade_id: Union[int, str], yml_file: str, opt_sat_path: str,
change_type: Literal["no-chng", "bad-chng", "ok-chng", "true-bad-chng",
"true-ok-chng", "null", "any"] = 'no-chng'):
ua = self._bsq.get_upgrades_analyzer(yaml_file=yml_file,
Expand All @@ -113,21 +113,21 @@ def print_change_details(self, upgrade_id: int, yml_file: str, opt_sat_path: str
ua.print_unique_characteristic(upgrade_id, change_type, good_bids, bad_bids)

@typing.overload
def _get_upgrade_buildings(self, *, upgrade_id: int, trim_missing_bs: bool = True,
def _get_upgrade_buildings(self, *, upgrade_id: Union[int, str], trim_missing_bs: bool = True,
get_query_only: Literal[False] = False) -> list[int]:
...

@typing.overload
def _get_upgrade_buildings(self, *, upgrade_id: int, get_query_only: Literal[True],
def _get_upgrade_buildings(self, *, upgrade_id: Union[int, str], get_query_only: Literal[True],
trim_missing_bs: bool = True) -> str:
...

@typing.overload
def _get_upgrade_buildings(self, *, upgrade_id: int, get_query_only: bool,
def _get_upgrade_buildings(self, *, upgrade_id: Union[int, str], get_query_only: bool,
trim_missing_bs: bool = True) -> Union[list[int], str]:
...

def _get_upgrade_buildings(self, *, upgrade_id: int, trim_missing_bs: bool = True, get_query_only: bool = False):
def _get_upgrade_buildings(self, *, upgrade_id: Union[int, str], trim_missing_bs: bool = True, get_query_only: bool = False):
if self._bsq.up_table is None:
raise ValueError("No upgrade table is available .")
up_query = sa.select([self._bsq.up_bldgid_column])
Expand Down Expand Up @@ -191,28 +191,28 @@ def _get_change_conditions(self, change_type: str):
return conditions

@typing.overload
def get_buildings_by_change(self, *, upgrade_id: int, get_query_only: Literal[True],
def get_buildings_by_change(self, *, upgrade_id: Union[int, str], get_query_only: Literal[True],
change_type: Literal["no-chng", "bad-chng", "ok-chng", "true-bad-chng",
"true-ok-chng", "null", "any"] = 'no-chng'
) -> str:
...

@typing.overload
def get_buildings_by_change(self, *, upgrade_id: int, get_query_only: Literal[False] = False,
def get_buildings_by_change(self, *, upgrade_id: Union[int, str], get_query_only: Literal[False] = False,
change_type: Literal["no-chng", "bad-chng", "ok-chng", "true-bad-chng",
"true-ok-chng", "null", "any"] = 'no-chng'
) -> list[int]:
...

@typing.overload
def get_buildings_by_change(self, *, upgrade_id: int, get_query_only: bool,
def get_buildings_by_change(self, *, upgrade_id: Union[int, str], get_query_only: bool,
change_type: Literal["no-chng", "bad-chng", "ok-chng", "true-bad-chng",
"true-ok-chng", "null", "any"] = 'no-chng'
) -> Union[list[int], str]:
...

@validate_arguments(config=dict(arbitrary_types_allowed=True, smart_union=True))
def get_buildings_by_change(self, *, upgrade_id: int,
def get_buildings_by_change(self, *, upgrade_id: Union[int, str],
change_type: Literal["no-chng", "bad-chng", "ok-chng", "true-bad-chng",
"true-ok-chng", "null", "any"] = 'no-chng',
get_query_only: bool = False):
Expand Down Expand Up @@ -276,7 +276,7 @@ def _get_up_success_report(self, *, trim_missing_bs: bool = True, get_query_only
return self._process_report(df)

def _process_report(self, df: DataFrame):
df['upgrade'] = df['upgrade'].map(int)
# df['upgrade'] = df['upgrade'].map(int)
pf = df.pivot(index=['upgrade'], columns=['completed_status'],
values=['count'])
pf.columns = [c[1] for c in pf.columns]
Expand Down Expand Up @@ -317,7 +317,7 @@ def _get_full_options_report(self, trim_missing_bs: bool = True, get_query_only:
df = self._bsq.execute(query)
simple_names = [f"option{i+1}" for i in range(len(opt_name_cols))]
df.columns = ['upgrade'] + simple_names + ['success', "applied_buildings"]
df['upgrade'] = df['upgrade'].map(int)
# df['upgrade'] = df['upgrade'].map(int)
df['applied_buildings'] = df['applied_buildings'].map(lambda x: literal_eval(x))
applied_rows = df[simple_names].any(axis=1) # select only rows with at least one option applied
return df[applied_rows]
Expand Down Expand Up @@ -570,7 +570,7 @@ def _get_ts_report(self, get_query_only: bool = False):
if get_query_only:
return self._bsq._compile(ts_query)
df = self._bsq.execute(ts_query)
df['upgrade'] = df['upgrade'].map(int)
# df['upgrade'] = df['upgrade'].map(int)
df = df.set_index('upgrade')
df = df.rename(columns={'count': 'success'})
return df
Expand Down Expand Up @@ -660,7 +660,7 @@ def get_applied_options(self, upgrade_id: Union[str, int], bldg_ids: list[int],
Returns:
list[set|dict]: List of options (along with baseline chars, if include_base_opt is true)
"""
up_csv = self._bsq.get_upgrades_csv_full(upgrade_id=int(upgrade_id))
up_csv = self._bsq.get_upgrades_csv_full(upgrade_id=upgrade_id)
rel_up_csv = up_csv.loc[bldg_ids]
upgrade_cols = [key for key in up_csv.columns
if key.startswith("upgrade_costs.option_") and key.endswith("_name")]
Expand Down Expand Up @@ -702,7 +702,7 @@ def get_enduses_buildings_map_by_change(self, upgrade_id: Union[str, int],
Returns:
dict[str, pd.Index]: Dict mapping enduses that had a given change and building ids showing that change.
"""
up_csv = self._bsq.get_upgrades_csv_full(upgrade_id=int(upgrade_id))
up_csv = self._bsq.get_upgrades_csv_full(upgrade_id=upgrade_id)
bs_csv = self._bsq.get_results_csv_full()
if bldg_list:
up_csv = up_csv.loc[bldg_list]
Expand Down
2 changes: 1 addition & 1 deletion buildstock_query/tools/upgrades_analyzer.py
Original file line number Diff line number Diff line change
Expand Up @@ -366,7 +366,7 @@ def get_report(self, upgrade_num: Optional[int] = None) -> pd.DataFrame:
"""

self._logic_cache = {}
if upgrade_num not in self.upgrade_names:
if upgrade_num is not None and upgrade_num not in self.upgrade_names:
raise ValueError(f"Invalid upgrade {upgrade_num}. Valid upgrade_num = {self.upgrade_names.keys()}.")

record_dfs = []
Expand Down
Loading

0 comments on commit 85e99c3

Please sign in to comment.