diff --git a/app/appraiser.py b/app/appraiser.py index dc906c3..9f32ee1 100644 --- a/app/appraiser.py +++ b/app/appraiser.py @@ -1,4 +1,3 @@ -from numpy.typing import ArrayLike from typing import List class Appraiser(): diff --git a/app/baselines/datascope_wrapper.py b/app/baselines/datascope_wrapper.py index 86070c8..e3e72c0 100644 --- a/app/baselines/datascope_wrapper.py +++ b/app/baselines/datascope_wrapper.py @@ -3,7 +3,7 @@ from datascope.importance.common import SklearnModelAccuracy from datascope.importance.shapley import ShapleyImportance, ImportanceMethod from appraiser import Appraiser -from classifier import LogisticClassifier as Classifier +from classifier import XGBClassifier as Classifier utility_pipeline = pipeline.make_pipeline( Classifier() diff --git a/app/baselines/debugging.py b/app/baselines/debugging.py index 7f30aac..81dd84f 100644 --- a/app/baselines/debugging.py +++ b/app/baselines/debugging.py @@ -1,6 +1,6 @@ import numpy as np from sklearn.model_selection import train_test_split -from classifier import LogisticClassifier as Classifier +from classifier import XGBClassifier as Classifier from appraiser import Appraiser class CustomAppraiser(Appraiser): diff --git a/app/baselines/influence_function.py b/app/baselines/influence_function.py index a1c9d84..59998e3 100644 --- a/app/baselines/influence_function.py +++ b/app/baselines/influence_function.py @@ -1,5 +1,6 @@ import torch from appraiser import Appraiser +# we use LogisticClassifier here as a proxy from classifier import LogisticClassifier as Classifier from baselines.ptif.calc_influence_function import calc_img_wise from torch.utils.data import TensorDataset, DataLoader diff --git a/app/baselines/random_pick.py b/app/baselines/random_pick.py index b7fa5d7..e4ae42b 100644 --- a/app/baselines/random_pick.py +++ b/app/baselines/random_pick.py @@ -1,5 +1,4 @@ import numpy as np -from sklearn.model_selection import train_test_split from appraiser import Appraiser class RandomAppraiser(Appraiser): diff --git a/app/classifier.py b/app/classifier.py index f64eac9..805c6f7 100644 --- a/app/classifier.py +++ b/app/classifier.py @@ -1,4 +1,3 @@ -from loguru import logger import numpy as np from pyarrow import parquet as pq import sklearn @@ -6,8 +5,6 @@ import xgboost import torch.nn as nn from torch.utils.data import TensorDataset, DataLoader -from sklearn.linear_model import LogisticRegressionCV - class XGBClassifier(): def __init__(self): diff --git a/app/plotter_overall.py b/app/plotter_overall.py index 71a4b0a..a7c10dc 100644 --- a/app/plotter_overall.py +++ b/app/plotter_overall.py @@ -29,8 +29,6 @@ def aggregate_data(evaluation_file, result_folder): return data def plot(data, result_folder, score_metric='auc'): - data = data[data['method']!='my_debug'] - sns.set( font="DejaVu Sans", context="paper", diff --git a/app/utils.py b/app/utils.py index ab64847..4fa37a0 100644 --- a/app/utils.py +++ b/app/utils.py @@ -1,7 +1,7 @@ import numpy as np import pyarrow as pa -from sklearn import metrics from loguru import logger + def fix(proposed_fixes, train, budget, gt_df): if len(proposed_fixes) > budget: raise ValueError("Submission takes more budget than expected, {}>{}".format(len(proposed_fixes), budget))