From 5e8d8fd0be588d45dfacb9038abef4ae4e7b25c1 Mon Sep 17 00:00:00 2001
From: Scott Lindauer <Scott.Lindauer@sas.com>
Date: Tue, 26 Sep 2023 09:03:36 -0400
Subject: [PATCH] Deprecate older functions to generate dmcas_fitstat/ROC/Lift
 json files

---
 src/sasctl/pzmm/write_json_files.py | 441 ----------------------------
 1 file changed, 441 deletions(-)

diff --git a/src/sasctl/pzmm/write_json_files.py b/src/sasctl/pzmm/write_json_files.py
index d088aaab..1f3282d1 100644
--- a/src/sasctl/pzmm/write_json_files.py
+++ b/src/sasctl/pzmm/write_json_files.py
@@ -1466,447 +1466,6 @@ def apply_dataframe_to_json(
                 )
         return json_dict
 
-    # noinspection PyCallingNonCallable, PyNestedDecorators
-    @deprecated(
-        "Please use the calculate_model_statistics method instead.",
-        version="1.9",
-        removed_in="1.10",
-    )
-    @classmethod
-    def calculateFitStat(
-        cls, validateData=None, trainData=None, testData=None, jPath=Path.cwd()
-    ):
-        """
-        Calculates fit statistics from user data and predictions and then writes to a
-        JSON file for importing into the common model repository.
-
-        Note that if no data set is provided (validate, train, or test),
-        this function raises an error and does not create a JSON file.
-
-        Datasets can be provided in the following forms:
-        * pandas dataframe; the actual and predicted values are their own columns
-        * numpy array; the actual and predicted values are their own columns or rows and
-        ordered such that the actual values come first and the predicted second
-        * list; the actual and predicted values are their own indexed entry
-
-        This function outputs a JSON file named "dmcas_fitstat.json".
-
-        Parameters
-        ----------
-        validateData : pandas dataframe, numpy array, or list, optional
-            Dataframe, array, or list of the validation data set, including both
-            the actual and predicted values. The default value is None.
-        trainData : pandas dataframe, numpy array, or list, optional
-            Dataframe, array, or list of the train data set, including both
-            the actual and predicted values. The default value is None.
-        testData : pandas dataframe, numpy array, or list, optional
-            Dataframe, array, or list of the test data set, including both
-            the actual and predicted values. The default value is None.
-        jPath : string, optional
-            Location for the output JSON file. The default value is the current
-            working directory.
-        """
-        # If numpy inputs are supplied, then assume numpy is installed
-        try:
-            import numpy as np
-        except ImportError:
-            np = None
-
-        try:
-            from sklearn import metrics
-        except ImportError:
-            raise RuntimeError(
-                "The 'scikit-learn' package is required to use the calculateFitStat "
-                "function. "
-            )
-
-        nullJSONPath = (
-            Path(__file__).resolve().parent / "template_files/dmcas_fitstat.json"
-        )
-        nullJSONDict = cls.read_json_file(nullJSONPath)
-
-        dataSets = [[[None], [None]], [[None], [None]], [[None], [None]]]
-
-        dataPartitionExists = []
-        for i, data in enumerate([validateData, trainData, testData]):
-            if data is not None:
-                dataPartitionExists.append(i)
-                if type(data) is pd.core.frame.DataFrame:
-                    dataSets[i] = data.transpose().values.tolist()
-                elif type(data) is list:
-                    dataSets[i] = data
-                elif type(data) is np.ndarray:
-                    dataSets[i] = data.tolist()
-
-        if len(dataPartitionExists) == 0:
-            raise ValueError(
-                "No data was provided. Please provide the actual and predicted values "
-                "for at least one of the partitions (VALIDATE, TRAIN, or TEST)."
-            )
-
-        for j in dataPartitionExists:
-            fitStats = nullJSONDict["data"][j]["dataMap"]
-
-            fitStats["_PartInd_"] = j
-
-            # If the data provided is Predicted | Actual instead of Actual |
-            # Predicted, catch the error and flip the columns
-            try:
-                fpr, tpr, _ = metrics.roc_curve(dataSets[j][0], dataSets[j][1])
-            except ValueError:
-                tempSet = dataSets[j]
-                dataSets[j][0] = tempSet[1]
-                dataSets[j][1] = tempSet[0]
-                fpr, tpr, _ = metrics.roc_curve(dataSets[j][0], dataSets[j][1])
-
-            RASE = math.sqrt(metrics.mean_squared_error(dataSets[j][0], dataSets[j][1]))
-            fitStats["_RASE_"] = RASE
-
-            NObs = len(dataSets[j][0])
-            fitStats["_NObs_"] = NObs
-
-            auc = metrics.roc_auc_score(dataSets[j][0], dataSets[j][1])
-            GINI = (2 * auc) - 1
-            fitStats["_GINI_"] = GINI
-
-            try:
-                from scipy.stats import gamma
-
-                _, _, scale = gamma.fit(dataSets[j][1])
-                fitStats["_GAMMA_"] = 1 / scale
-            except ImportError:
-                warnings.warn(
-                    "scipy was not installed, so the gamma calculation could"
-                    "not be computed."
-                )
-                fitStats["_GAMMA_"] = None
-
-            intPredict = [round(x) for x in dataSets[j][1]]
-            MCE = 1 - metrics.accuracy_score(dataSets[j][0], intPredict)
-            fitStats["_MCE_"] = MCE
-
-            ASE = metrics.mean_squared_error(dataSets[j][0], dataSets[j][1])
-            fitStats["_ASE_"] = ASE
-
-            MCLL = metrics.log_loss(dataSets[j][0], dataSets[j][1])
-            fitStats["_MCLL_"] = MCLL
-
-            KS = max(abs(fpr - tpr))
-            fitStats["_KS_"] = KS
-
-            KSPostCutoff = None
-            fitStats["_KSPostCutoff_"] = KSPostCutoff
-
-            DIV = len(dataSets[j][0])
-            fitStats["_DIV_"] = DIV
-
-            TAU = pd.Series(dataSets[j][0]).corr(
-                pd.Series(dataSets[j][1]), method="kendall"
-            )
-            fitStats["_TAU_"] = TAU
-
-            KSCut = None
-            fitStats["_KSCut_"] = KSCut
-
-            C = metrics.auc(fpr, tpr)
-            fitStats["_C_"] = C
-
-            nullJSONDict["data"][j]["dataMap"] = fitStats
-
-        with open(Path(jPath) / FITSTAT, "w") as jFile:
-            json.dump(nullJSONDict, jFile, indent=4)
-        if cls.notebook_output:
-            print(
-                f"{FITSTAT} was successfully written and saved to "
-                f"{Path(jPath) / FITSTAT}"
-            )
-
-    # noinspection PyCallingNonCallable,PyNestedDecorators
-    @deprecated(
-        "Please use the calculate_model_statistics method instead.",
-        version="1.9",
-        removed_in="1.10",
-    )
-    @classmethod
-    def generateROCLiftStat(
-        cls,
-        targetName,
-        targetValue,
-        swatConn,
-        validateData=None,
-        trainData=None,
-        testData=None,
-        jPath=Path.cwd(),
-    ):
-        """
-        Calculates the ROC and Lift curves from user data and model predictions and
-        the writes it to JSON files for importing in to the common model repository.
-
-        ROC and Lift calculations are completed by CAS through a SWAT call. Note that
-        if no data set is provided (validate, train, or test), this function raises
-        an error and does not create any JSON files.
-
-        This function outputs a pair of JSON files named "dmcas_lift.json" and
-        "dmcas_roc.json".
-
-        Parameters
-        ---------------
-        targetName: str
-            Target variable name to be predicted.
-        targetValue: int or float
-            Value of target variable that indicates an event.
-        swatConn: SWAT connection to CAS
-            Connection object to CAS service in SAS Model Manager through SWAT
-            authentication.
-        validateData : pandas dataframe, numpy array, or list, optional
-            Dataframe, array, or list of the validation data set, including both the
-            actual values and the calculated probabilities. The default value is None.
-        trainData : pandas dataframe, numpy array, or list, optional
-            Dataframe, array, or list of the train data set, including both the actual
-            values and the calculated probabilities. The default value is None.
-        testData : pandas dataframe, numpy array, or list, optional
-            Dataframe, array, or list of the test data set, including both the actual
-            values and the calculated probabilities. The default value is None.
-        jPath : string, optional
-            Location for the output JSON file. The default value is the current working
-            directory.
-        """
-        # If numpy inputs are supplied, then assume numpy is installed
-        try:
-            # noinspection PyPackageRequirements
-            import numpy as np
-        except ImportError:
-            np = None
-        try:
-            import swat
-        except ImportError:
-            raise RuntimeError(
-                "The 'swat' package is required to generate ROC and Lift charts with "
-                "this function. "
-            )
-
-        nullJSONROCPath = (
-            Path(__file__).resolve().parent / "template_files/dmcas_roc.json"
-        )
-        nullJSONROCDict = cls.read_json_file(nullJSONROCPath)
-
-        nullJSONLiftPath = (
-            Path(__file__).resolve().parent / "template_files/dmcas_lift.json"
-        )
-        nullJSONLiftDict = cls.read_json_file(nullJSONLiftPath)
-
-        dataSets = [pd.DataFrame(), pd.DataFrame(), pd.DataFrame()]
-        columns = ["actual", "predict"]
-
-        dataPartitionExists = []
-        # Check if a data partition exists, then convert to a pandas dataframe
-        for i, data in enumerate([validateData, trainData, testData]):
-            if data is not None:
-                dataPartitionExists.append(i)
-                if type(data) is list:
-                    dataSets[i][columns] = list(zip(*data))
-                elif type(data) is pd.core.frame.DataFrame:
-                    try:
-                        dataSets[i][columns[0]] = data.iloc[:, 0]
-                        dataSets[i][columns[1]] = data.iloc[:, 1]
-                    except NameError:
-                        dataSets[i] = pd.DataFrame(data=data.iloc[:, 0]).rename(
-                            columns={data.columns[0]: columns[0]}
-                        )
-                        dataSets[i][columns[1]] = data.iloc[:, 1]
-                elif type(data) is np.ndarray:
-                    try:
-                        dataSets[i][columns] = data
-                    except ValueError:
-                        dataSets[i][columns] = data.transpose()
-
-        if len(dataPartitionExists) == 0:
-            raise ValueError(
-                "No data was provided. Please provide the actual and predicted values "
-                "for at least one of the partitions (VALIDATE, TRAIN, or TEST)"
-            )
-
-        nullLiftRow = list(range(1, 64))
-        nullROCRow = list(range(1, 301))
-
-        swatConn.loadactionset("percentile")
-
-        for i in dataPartitionExists:
-            swatConn.read_frame(
-                dataSets[i][columns], casout=dict(name="SCOREDVALUES", replace=True)
-            )
-            swatConn.percentile.assess(
-                table="SCOREDVALUES",
-                inputs=[columns[1]],
-                casout=dict(name="SCOREASSESS", replace=True),
-                response=columns[0],
-                event=str(targetValue),
-            )
-            assessROC = swatConn.CASTable("SCOREASSESS_ROC").to_frame()
-            assessLift = swatConn.CASTable("SCOREASSESS").to_frame()
-
-            for j in range(100):
-                rowNumber = (i * 100) + j
-                nullROCRow.remove(rowNumber + 1)
-                nullJSONROCDict["data"][rowNumber]["dataMap"]["_Event_"] = targetValue
-                nullJSONROCDict["data"][rowNumber]["dataMap"][
-                    "_TargetName_"
-                ] = targetName
-                nullJSONROCDict["data"][rowNumber]["dataMap"]["_Cutoff_"] = str(
-                    assessROC["_Cutoff_"][j]
-                )
-                nullJSONROCDict["data"][rowNumber]["dataMap"]["_TP_"] = str(
-                    assessROC["_TP_"][j]
-                )
-                nullJSONROCDict["data"][rowNumber]["dataMap"]["_FP_"] = str(
-                    assessROC["_FP_"][j]
-                )
-                nullJSONROCDict["data"][rowNumber]["dataMap"]["_FN_"] = str(
-                    assessROC["_FN_"][j]
-                )
-                nullJSONROCDict["data"][rowNumber]["dataMap"]["_TN_"] = str(
-                    assessROC["_TN_"][j]
-                )
-                nullJSONROCDict["data"][rowNumber]["dataMap"]["_Sensitivity_"] = str(
-                    assessROC["_Sensitivity_"][j]
-                )
-                nullJSONROCDict["data"][rowNumber]["dataMap"]["_Specificity_"] = str(
-                    assessROC["_Specificity_"][j]
-                )
-                nullJSONROCDict["data"][rowNumber]["dataMap"]["_KS_"] = str(
-                    assessROC["_KS_"][j]
-                )
-                nullJSONROCDict["data"][rowNumber]["dataMap"]["_KS2_"] = str(
-                    assessROC["_KS2_"][j]
-                )
-                nullJSONROCDict["data"][rowNumber]["dataMap"]["_FHALF_"] = str(
-                    assessROC["_FHALF_"][j]
-                )
-                nullJSONROCDict["data"][rowNumber]["dataMap"]["_FPR_"] = str(
-                    assessROC["_FPR_"][j]
-                )
-                nullJSONROCDict["data"][rowNumber]["dataMap"]["_ACC_"] = str(
-                    assessROC["_ACC_"][j]
-                )
-                nullJSONROCDict["data"][rowNumber]["dataMap"]["_FDR_"] = str(
-                    assessROC["_FDR_"][j]
-                )
-                nullJSONROCDict["data"][rowNumber]["dataMap"]["_F1_"] = str(
-                    assessROC["_F1_"][j]
-                )
-                nullJSONROCDict["data"][rowNumber]["dataMap"]["_C_"] = str(
-                    assessROC["_C_"][j]
-                )
-                nullJSONROCDict["data"][rowNumber]["dataMap"]["_GINI_"] = str(
-                    assessROC["_GINI_"][j]
-                )
-                nullJSONROCDict["data"][rowNumber]["dataMap"]["_GAMMA_"] = str(
-                    assessROC["_GAMMA_"][j]
-                )
-                nullJSONROCDict["data"][rowNumber]["dataMap"]["_TAU_"] = str(
-                    assessROC["_TAU_"][j]
-                )
-                nullJSONROCDict["data"][rowNumber]["dataMap"]["_MiscEvent_"] = str(
-                    assessROC["_MiscEvent_"][j]
-                )
-                nullJSONROCDict["data"][rowNumber]["dataMap"][
-                    "_OneMinusSpecificity_"
-                ] = str(1 - assessROC["_Specificity_"][j])
-
-            for j in range(21):
-                rowNumber = (i * 21) + j
-                nullLiftRow.remove(rowNumber + 1)
-                nullJSONLiftDict["data"][rowNumber]["dataMap"]["_Event_"] = str(
-                    targetValue
-                )
-                nullJSONLiftDict["data"][rowNumber]["dataMap"][
-                    "_TargetName_"
-                ] = targetName
-                if j != 0:
-                    nullJSONLiftDict["data"][rowNumber]["dataMap"]["_Depth_"] = str(
-                        assessLift["_Depth_"][j - 1]
-                    )
-                    nullJSONLiftDict["data"][rowNumber]["dataMap"]["_Value_"] = str(
-                        assessLift["_Value_"][j - 1]
-                    )
-                    nullJSONLiftDict["data"][rowNumber]["dataMap"]["_NObs_"] = str(
-                        assessLift["_NObs_"][j - 1]
-                    )
-                    nullJSONLiftDict["data"][rowNumber]["dataMap"]["_NEvents_"] = str(
-                        assessLift["_NEvents_"][j - 1]
-                    )
-                    nullJSONLiftDict["data"][rowNumber]["dataMap"][
-                        "_NEventsBest_"
-                    ] = str(assessLift["_NEventsBest_"][j - 1])
-                    nullJSONLiftDict["data"][rowNumber]["dataMap"]["_Resp_"] = str(
-                        assessLift["_Resp_"][j - 1]
-                    )
-                    nullJSONLiftDict["data"][rowNumber]["dataMap"]["_RespBest_"] = str(
-                        assessLift["_RespBest_"][j - 1]
-                    )
-                    nullJSONLiftDict["data"][rowNumber]["dataMap"]["_Lift_"] = str(
-                        assessLift["_Lift_"][j - 1]
-                    )
-                    nullJSONLiftDict["data"][rowNumber]["dataMap"]["_LiftBest_"] = str(
-                        assessLift["_LiftBest_"][j - 1]
-                    )
-                    nullJSONLiftDict["data"][rowNumber]["dataMap"]["_CumResp_"] = str(
-                        assessLift["_CumResp_"][j - 1]
-                    )
-                    nullJSONLiftDict["data"][rowNumber]["dataMap"][
-                        "_CumRespBest_"
-                    ] = str(assessLift["_CumRespBest_"][j - 1])
-                    nullJSONLiftDict["data"][rowNumber]["dataMap"]["_CumLift_"] = str(
-                        assessLift["_CumLift_"][j - 1]
-                    )
-                    nullJSONLiftDict["data"][rowNumber]["dataMap"][
-                        "_CumLiftBest_"
-                    ] = str(assessLift["_CumLiftBest_"][j - 1])
-                    nullJSONLiftDict["data"][rowNumber]["dataMap"]["_PctResp_"] = str(
-                        assessLift["_PctResp_"][j - 1]
-                    )
-                    nullJSONLiftDict["data"][rowNumber]["dataMap"][
-                        "_PctRespBest_"
-                    ] = str(assessLift["_PctRespBest_"][j - 1])
-                    nullJSONLiftDict["data"][rowNumber]["dataMap"][
-                        "_CumPctResp_"
-                    ] = str(assessLift["_CumPctResp_"][j - 1])
-                    nullJSONLiftDict["data"][rowNumber]["dataMap"][
-                        "_CumPctRespBest_"
-                    ] = str(assessLift["_CumPctRespBest_"][j - 1])
-                    nullJSONLiftDict["data"][rowNumber]["dataMap"]["_Gain_"] = str(
-                        assessLift["_Gain_"][j - 1]
-                    )
-                    nullJSONLiftDict["data"][rowNumber]["dataMap"]["_GainBest_"] = str(
-                        assessLift["_GainBest_"][j - 1]
-                    )
-
-        # If not all partitions are present, clean up the dicts for compliant formatting
-        if len(dataPartitionExists) < 3:
-            # Remove missing partitions from ROC and Lift dicts
-            for index, row in reversed(list(enumerate(nullJSONLiftDict["data"]))):
-                if int(row["rowNumber"]) in nullLiftRow:
-                    nullJSONLiftDict["data"].pop(index)
-            for index, row in reversed(list(enumerate(nullJSONROCDict["data"]))):
-                if int(row["rowNumber"]) in nullROCRow:
-                    nullJSONROCDict["data"].pop(index)
-
-            # Reassign the row number values to match what is left in each dict
-            for i, _ in enumerate(nullJSONLiftDict["data"]):
-                nullJSONLiftDict["data"][i]["rowNumber"] = i + 1
-            for i, _ in enumerate(nullJSONROCDict["data"]):
-                nullJSONROCDict["data"][i]["rowNumber"] = i + 1
-
-        with open(Path(jPath) / ROC, "w") as jFile:
-            json.dump(nullJSONROCDict, jFile, indent=4)
-        if cls.notebook_output:
-            print(f"{ROC} was successfully written and saved to {Path(jPath) / ROC}")
-
-        with open(Path(jPath) / LIFT, "w") as jFile:
-            json.dump(nullJSONLiftDict, jFile, indent=4)
-        if cls.notebook_output:
-            print(f"{LIFT} was successfully written and saved to {Path(jPath) / LIFT}")
-
     @staticmethod
     def read_json_file(path: Union[str, Path]) -> Any:
         """