469 update retrofit dataset (#470)

* reorganize retrofit * update retrofit plans * add retrofit data and logic * rewrite the default fragility mapping entry key part * update retrofit test * it's working but too slow * properly tested * use exec instead of eval * rewrite retrofit part * eval works but it's not updating the inventory * reorganize the code to have the update inventory in the very beginning of the analysis as a separate part * fix flood retrofit * fix bug and add proper timer for retrofit * use optional * speed up the update process if no retrofit just skip * inner join * changelog * use temp folder * Fix FutureWarning in dataprocessutil.py --------- Co-authored-by: Ya-Lan Yang <[email protected]>
IN-CORE · Feb 21, 2024 · 40d8e8b · 40d8e8b
1 parent 58649ce
commit 40d8e8b
Show file tree

Hide file tree

Showing 15 changed files with 45,769 additions and 51 deletions.
diff --git a/.gitignore b/.gitignore
@@ -51,6 +51,7 @@ original/
 *.json
 !tests/data/*/*.json
 !tests/data/*.json
+!tests/data/retrofit/*.csv
 
 # Seaside ipopt constants
 pyincore/analyses/seasidecge/solverconstants/ipopt_cons.py

diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -13,6 +13,12 @@ and this project adheres to [Semantic Versioning](http://semver.org/).
 ### Fixed
 - Refactoring tornadoepndamage for hazardDatasets [#495](https://github.com/IN-CORE/pyincore/issues/495)
 
+## [Unreleased]
+
+### Changed
+- Retrofitted Building Damage [#469](https://github.com/IN-CORE/pyincore/issues/469) 
+
+
 ## [1.16.0] - 2024-02-07
 
 ### Added

diff --git a/pyincore/analyses/buildingdamage/buildingdamage.py b/pyincore/analyses/buildingdamage/buildingdamage.py
@@ -12,6 +12,7 @@
     FragilityService, AnalysisUtil, GeoUtil
 from pyincore.analyses.buildingdamage.buildingutil import BuildingUtil
 from pyincore.models.dfr3curve import DFR3Curve
+from pyincore.utils.datasetutil import DatasetUtil
 
 
 class BuildingDamage(BaseAnalysis):
@@ -31,15 +32,22 @@ def __init__(self, incore_client):
 
     def run(self):
         """Executes building damage analysis."""
+
         # Building dataset
-        bldg_set = self.get_input_dataset("buildings").get_inventory_reader()
+        bldg_dataset = self.get_input_dataset("buildings")
 
         # building retrofit strategy
         retrofit_strategy_dataset = self.get_input_dataset("retrofit_strategy")
-        if retrofit_strategy_dataset is not None:
-            retrofit_strategy = list(retrofit_strategy_dataset.get_csv_reader())
-        else:
-            retrofit_strategy = None
+
+        # mapping
+        dfr3_mapping_set = self.get_input_dataset("dfr3_mapping_set")
+
+        # Update the building inventory dataset if applicable
+        bldg_dataset, tmpdirname, _ = DatasetUtil.construct_updated_inventories(bldg_dataset,
+                                                                    add_info_dataset=retrofit_strategy_dataset,
+                                                                    mapping=dfr3_mapping_set)
+
+        bldg_set = bldg_dataset.get_inventory_reader()
 
         # Accommodating to multi-hazard
         hazards = []  # hazard objects
@@ -85,7 +93,6 @@ def run(self):
         (ds_results, damage_results) = self.building_damage_concurrent_future(self.building_damage_analysis_bulk_input,
                                                                               num_workers,
                                                                               inventory_args,
-                                                                              repeat(retrofit_strategy),
                                                                               repeat(hazards),
                                                                               repeat(hazard_types),
                                                                               repeat(hazard_dataset_ids))
@@ -95,6 +102,10 @@ def run(self):
                                   damage_results,
                                   name=self.get_parameter("result_name") + "_additional_info")
 
+        # clean up temp folder if applicable
+        if tmpdirname is not None:
+            bldg_dataset.delete_temp_folder()
+
         return True
 
     def building_damage_concurrent_future(self, function_name, parallelism, *args):
@@ -118,13 +129,11 @@ def building_damage_concurrent_future(self, function_name, parallelism, *args):
 
         return output_ds, output_dmg
 
-    def building_damage_analysis_bulk_input(self, buildings, retrofit_strategy, hazards, hazard_types,
-                                            hazard_dataset_ids):
+    def building_damage_analysis_bulk_input(self, buildings, hazards, hazard_types, hazard_dataset_ids):
         """Run analysis for multiple buildings.
 
         Args:
             buildings (list): Multiple buildings from input inventory set.
-            retrofit_strategy (list): building guid and its retrofit level 0, 1, 2, etc. This is Optional
             hazards (list): List of hazard objects.
             hazard_types (list): List of Hazard type, either earthquake, tornado, or tsunami.
             hazard_dataset_ids (list): List of id of the hazard exposure.
@@ -136,7 +145,7 @@ def building_damage_analysis_bulk_input(self, buildings, retrofit_strategy, haza
 
         fragility_key = self.get_parameter("fragility_key")
         fragility_sets = self.fragilitysvc.match_inventory(self.get_input_dataset("dfr3_mapping_set"), buildings,
-                                                           fragility_key, retrofit_strategy)
+                                                           fragility_key)
         use_liquefaction = False
         liquefaction_resp = None
         # Get geology dataset id containing liquefaction susceptibility

diff --git a/pyincore/dataset.py b/pyincore/dataset.py
@@ -11,12 +11,13 @@
 import os
 
 import fiona
-import numpy
 import pandas as pd
 import geopandas as gpd
 import rasterio
 import warnings
 from pyincore import DataService
+from pathlib import Path
+import shutil
 
 warnings.filterwarnings("ignore", "", UserWarning)
 
@@ -33,6 +34,8 @@ def __init__(self, metadata):
         self.metadata = metadata
 
         # For convenience instead of having to dig through the metadata for these
+        self.title = metadata["title"] if "title" in metadata else None
+        self.description = metadata["description"] if "description" in metadata else None
         self.data_type = metadata["dataType"]
         self.format = metadata["format"]
         self.id = metadata["id"]
@@ -329,6 +332,20 @@ def get_dataframe_from_shapefile(self):
 
         return gdf
 
+    def delete_temp_file(self):
+        """Delete temporary folder.
+        """
+        if os.path.exists(self.local_file_path):
+            os.remove(self.local_file_path)
+
+    def delete_temp_folder(self):
+        """Delete temporary folder.
+        """
+        path = Path(self.local_file_path)
+        absolute_path = path.parent.absolute()
+        if os.path.isdir(absolute_path):
+            shutil.rmtree(absolute_path)
+
     def close(self):
         for key in self.readers:
             self.readers[key].close()

diff --git a/pyincore/dfr3service.py b/pyincore/dfr3service.py
@@ -7,7 +7,7 @@
 
 import re
 from urllib.parse import urljoin
-from typing import Dict
+from typing import Dict, Optional
 
 import pyincore.globals as pyglobals
 from pyincore.decorators import forbid_offline
@@ -56,7 +56,7 @@ def __init__(self):
 
 
 class MappingResponse(object):
-    def __init__(self, sets: Dict[str, any]=dict(), mapping: Dict[str, str]=dict()):
+    def __init__(self, sets: Dict[str, any] = dict(), mapping: Dict[str, str] = dict()):
         self.sets = sets
         self.mapping = mapping
 
@@ -175,23 +175,31 @@ def create_dfr3_set(self, dfr3_set: dict, timeout=(30, 600), **kwargs):
         r = self.client.post(url, json=dfr3_set, timeout=timeout, **kwargs)
         return return_http_response(r).json()
 
-    def match_inventory(self, mapping: MappingSet, inventories: list, entry_key: str, add_info: list = None):
+    def match_inventory(self, mapping: MappingSet, inventories: list, entry_key: Optional[str] = None):
         """This method is intended to replace the match_inventory method in the future. The functionality is same as
         match_inventory but instead of dfr3_sets in plain json, dfr3 curves will be represented in
         FragilityCurveSet Object.
 
         Args:
             mapping (obj): MappingSet Object that has the rules and entries.
-            inventories (list): A list of inventories. Each item is a casted fiona object
-            entry_key (str): keys such as PGA, pgd, and etc.
-            add_info (None, dict): additional information that used to match rules, e.g. retrofit strategy per building.
+            inventories (list): A list of inventories. Each item is a fiona object
+            entry_key (None, str): Mapping Entry Key e.g. Non-retrofit Fragility ID Code, retrofit_method_1, etc.
 
         Returns:
              dict: A dictionary of {"inventory id": FragilityCurveSet object}.
-
         """
+
         dfr3_sets = {}
 
+        # find default mapping entry key if not provided
+        if entry_key is None:
+            for m in mapping.mappingEntryKeys:
+                if "defaultKey" in m and m["defaultKey"] is True:
+                    entry_key = m["name"]
+                    break
+        if entry_key is None:
+            raise ValueError("Entry key not provided and no default entry key found in the mapping!")
+
         # loop through inventory to match the rules
         matched_curve_ids = []
         for inventory in inventories:
@@ -202,24 +210,22 @@ def match_inventory(self, mapping: MappingSet, inventories: list, entry_key: str
                     inventory["properties"]["efacility"] is None:
                 inventory["properties"]["efacility"] = ""
 
-            # if additional information presented, merge inventory properties with that additional information
-            if add_info is not None:
-                for add_info_row in add_info:
-                    if inventory["properties"].get("guid") is not None and \
-                            add_info_row.get("guid") is not None and \
-                            inventory["properties"].get("guid") == add_info_row.get("guid"):
-                        inventory["properties"].update(add_info_row)
-                        break  # assume no duplicated guid
+            # if retrofit key exist, use retrofit key otherwise use default key
+            retrofit_entry_key = inventory["properties"]["retrofit_k"] if "retrofit_k" in \
+                                                                          inventory["properties"] else None
 
             for m in mapping.mappings:
                 # for old format rule matching [[]]
                 # [[ and ] or [ and ]]
                 if isinstance(m.rules, list):
                     if self._property_match_legacy(rules=m.rules, properties=inventory["properties"]):
-                        curve = m.entry[entry_key]
+                        if retrofit_entry_key is not None and retrofit_entry_key in m.entry.keys():
+                            curve = m.entry[retrofit_entry_key]
+                        else:
+                            curve = m.entry[entry_key]
                         dfr3_sets[inventory['id']] = curve
 
-                        # if it's string:id; then need to fetch it from remote and cast to fragility3curve object
+                        # if it's string:id; then need to fetch it from remote and cast to dfr3curve object
                         if isinstance(curve, str) and curve not in matched_curve_ids:
                             matched_curve_ids.append(curve)
 
@@ -230,10 +236,13 @@ def match_inventory(self, mapping: MappingSet, inventories: list, entry_key: str
                 # {"AND": [xx, "OR": [yy, yy], "AND": {"OR":["zz", "zz"]]}
                 elif isinstance(m.rules, dict):
                     if self._property_match(rules=m.rules, properties=inventory["properties"]):
-                        curve = m.entry[entry_key]
+                        if retrofit_entry_key is not None and retrofit_entry_key in m.entry.keys():
+                            curve = m.entry[retrofit_entry_key]
+                        else:
+                            curve = m.entry[entry_key]
                         dfr3_sets[inventory['id']] = curve
 
-                        # if it's string:id; then need to fetch it from remote and cast to fragility3curve object
+                        # if it's string:id; then need to fetch it from remote and cast to dfr3 curve object
                         if isinstance(curve, str) and curve not in matched_curve_ids:
                             matched_curve_ids.append(curve)
 
@@ -255,21 +264,30 @@ def match_inventory(self, mapping: MappingSet, inventories: list, entry_key: str
 
         return dfr3_sets
 
-    def match_list_of_dicts(self, mapping: MappingSet, inventories: list, entry_key: str):
+    def match_list_of_dicts(self, mapping: MappingSet, inventories: list, entry_key: Optional[str] = None):
         """This method is same as match_inventory, except it takes a simple list of dictionaries that contains the items
         to be mapped in the rules. The match_inventory method takes a list of fiona objects
 
         Args:
             mapping (obj): MappingSet Object that has the rules and entries.
             inventories (list): A list of inventories. Each item of the list is a simple dictionary
-            entry_key (str): keys such as PGA, pgd, and etc.
+            entry_key (None, str): Mapping Entry Key e.g. Non-retrofit Fragility ID Code, retrofit_method_1, etc.
 
         Returns:
              dict: A dictionary of {"inventory id": FragilityCurveSet object}.
 
         """
         dfr3_sets = {}
 
+        # find default mapping entry key if not provided
+        if entry_key is None:
+            for m in mapping.mappingEntryKeys:
+                if "defaultKey" in m and m["defaultKey"] is True:
+                    entry_key = m["name"]
+                    break
+        if entry_key is None:
+            raise ValueError("Entry key not provided and no default entry key found in the mapping!")
+
         # loop through inventory to match the rules
         matched_curve_ids = []
         for inventory in inventories:
@@ -299,7 +317,7 @@ def match_list_of_dicts(self, mapping: MappingSet, inventories: list, entry_key:
 
                         # use the first match
                         break
-                    
+
         batch_dfr3_sets = self.batch_get_dfr3_set(matched_curve_ids)
 
         # replace the curve id in dfr3_sets to the dfr3 curve
@@ -446,7 +464,8 @@ def extract_inventory_class_legacy(rules):
         """This method will extract the inventory class name from a mapping rule. E.g. PWT2/PPP1
 
         Args:
-            rules (list): The outer list is applying "OR" rule and the inner list is applying an "AND" rule. e.g. list(["java.lang.String utilfcltyc EQUALS 'PWT2'"],["java.lang.String utilfcltyc EQUALS 'PPP1'"])
+            rules (list): The outer list is applying "OR" rule and the inner list is applying an "AND" rule.
+            e.g. list(["java.lang.String utilfcltyc EQUALS 'PWT2'"],["java.lang.String utilfcltyc EQUALS 'PPP1'"])
 
         Returns:
             inventory_class (str): extracted inventory class name. "/" stands for or and "+" stands for and
@@ -471,7 +490,8 @@ def extract_inventory_class(rules):
         """This method will extract the inventory class name from a mapping rule. E.g. PWT2/PPP1
 
         Args:
-            rules (dict): e.g. { "AND": ["java.lang.String utilfcltyc EQUALS 'PWT2'", "java.lang.String utilfcltyc EQUALS 'PPP1'"]}
+            rules (dict): e.g. { "AND": ["java.lang.String utilfcltyc EQUALS 'PWT2'",
+            "java.lang.String utilfcltyc EQUALS 'PPP1'"]}
 
         Returns:
             inventory_class (str): extracted inventory class name. "/" stands for or and "+" stands for and

diff --git a/pyincore/models/mappingset.py b/pyincore/models/mappingset.py
@@ -18,15 +18,17 @@ class MappingSet:
     """
 
     def __init__(self, metadata):
-        self.id = metadata["id"]
-        self.name = metadata["name"]
-        self.hazard_type = metadata["hazardType"]
-        self.inventory_type = metadata['inventoryType']
 
-        if 'dataType' in metadata:
-            self.data_type = metadata["dataType"]
+        self.id = metadata["id"] if "id" in metadata else ""
+        self.name = metadata["name"] if "name" in metadata else ""
+        self.hazard_type = metadata["hazardType"] if "hazardType" in metadata else ""
+        self.inventory_type = metadata['inventoryType'] if "inventoryType" in metadata else ""
+        if "mappingEntryKeys" in metadata and metadata["mappingEntryKeys"] is not None:
+            self.mappingEntryKeys = metadata["mappingEntryKeys"]
         else:
-            self.data_type = "incore:dfr3MappingSet"
+            self.mappingEntryKeys = []
+
+        self.data_type = metadata["dataType"] if "dataType" in metadata else "incore:dfr3MappingSet"
 
         mappings = []
         for m in metadata['mappings']:
@@ -54,7 +56,7 @@ def from_json_str(cls, json_str):
         return cls(json.loads(json_str))
 
     @classmethod
-    def from_json_file(cls, file_path, data_type):
+    def from_json_file(cls, file_path, data_type="incore:dfr3MappingSet"):
         """Get dfr3 mapping from the file.
 
         Args:

diff --git a/pyincore/utils/dataprocessutil.py b/pyincore/utils/dataprocessutil.py
@@ -442,7 +442,7 @@ def get_max_damage_state(dmg_result):
 
         guids = dmg_result[["guid"]]
         max_val = dmg_result[dmg_states].max(axis=1)
-        max_key = dmg_result[dmg_states].idxmax(axis=1)
+        max_key = dmg_result[dmg_states].dropna(how='all').idxmax(axis=1)
         dmg_concat = pd.concat([guids, max_val, max_key], axis=1)
         dmg_concat.rename(columns={0: "max_prob", 1: "max_state"}, inplace=True)