Skip to content

Commit

Permalink
Update dataset_loader.py and augments.py
Browse files Browse the repository at this point in the history
  • Loading branch information
JunyuYan authored Jul 25, 2024
1 parent acc73f3 commit 70c83e4
Show file tree
Hide file tree
Showing 2 changed files with 100 additions and 34 deletions.
24 changes: 15 additions & 9 deletions algorithm/arguments.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,37 +8,43 @@ def parse_args():
parser.add_argument('--arch', type=str, help='choose from resnext101,'
' enet, resnet101, densenet or inception', default='densenet')
# Setting training & test dataset
parser.add_argument('--dataset', type=str, help='choose from ISIC or Fitzpatrick17k', default='ISIC')
parser.add_argument('--task', type=str, help='choose from skin, xray, mri', default='xray')
# Setting debiasing technique
parser.add_argument('--debias-config', type=str, help='choose from baseline, LNTL, TABE, both,'
' doubleTABE or doubleLNTL', default='baseline')
# Bias to remove
parser.add_argument('--attr', type=str, help='use to define which bias is to remove', default='skin_attribute')
parser.add_argument('--bias', type =str, help='use to define which bias function want to use, choose from spd, eodds, eopp', default='eodds')
parser.add_argument('--bias', type=str, help='use to define which bias function want to use, choose from spd, eodds, eopp', default='eodds')
# Setting hyperparameters
parser.add_argument('--seed', help='sets all random seeds', type=int, default=0)
parser.add_argument('--batch-size', help='sets batch size', type=int, default=64)
parser.add_argument('--num-workers', help='sets number of cpu workers', type=int, default=2)
parser.add_argument('--lr-base', help='sets baseline learning rate', type=float, default=0.001)
parser.add_argument('--lr-forget', help='sets forget learning rate', type=float, default=0.0003)
parser.add_argument('--alpha', help='sets alpha for l1 sparsity', type=float, default=0.0005)
parser.add_argument('--lr-forget', help='sets forget learning rate', type=float, default=0.0001)
parser.add_argument('--alpha', help='sets alpha for l1 sparsity', type=float, default=0.05)
parser.add_argument('--beta', help='sets beta to balance classification loss and fairness loss', type=float, default=0.1)
parser.add_argument('--momentum', help='sets momentum', type=float, default=0.9)
parser.add_argument('--weight-decay', help='sets weight decay', type=float, default=0.0005)
parser.add_argument('--unlearn-epochs', help='sets epochs to unlearn', type=int, default=5)
parser.add_argument('--n-epochs', help='sets epochs to finetune', type=int, default=4)
parser.add_argument('--unlearn-epochs', help='sets epochs to unlearn', type=int, default=10)
parser.add_argument('--n-epochs', help='sets epochs to finetune', type=int, default=10)
parser.add_argument('--out-dim', help='sets main head output dimension', type=int, default=1)
parser.add_argument('--pin-memory', help='use to pin an unpinned CPU tensor', default=False, action="store_true")
parser.add_argument('--mask-type', help='Choose different mask', type=str, default='fisher')
parser.add_argument('--mask-iter', help='Choose if using iteration to generate the mask', default=0)
parser.add_argument('--mask-scale', help='Choose from block, layer, weight', type=str, default='weight')
parser.add_argument('--subset-ratio', help='The ratio for the subset [0, 1]', type=float, default=0.1)
parser.add_argument('--algo', help='Type of algorithms', type=str, default='FullFT')
# Setting directories
parser.add_argument('--image-dir', help='path to image directory', type=str, default='./data/images')
parser.add_argument('--csv-dir', help='path to csv directory', type=str, default='./data/skin/csv/isic_val.csv')
parser.add_argument('--csv-dir', help='path to csv directory', type=str, default='')
parser.add_argument('--test-csv-dir', help='path to test csv directory', type=str, default='./data/skin/csv/fitzpatrick17k.csv')
parser.add_argument('--model-dir', help='path to load models from', type=str, default='./results/weights')
parser.add_argument('--output-dir', help='path to save plots to', type=str, default='./results/debias')
parser.add_argument('--save-path', help='path to save plots to', type=str, default='output.csv')
parser.add_argument('--output-dir', help='path to save plots to', type=str, default='./results/crossval_csv/')
parser.add_argument('--save-path', help='path to save plots to', type=str, default='debias_output_pad_skin.csv')
parser.add_argument('--log-dir', help='path to save logs to', type=str, default='./results/logs')
# Miscellaneous
parser.add_argument('--image-size', type=int, default=256)
parser.add_argument('--run-times', type=int, default=5)
parser.add_argument('--CUDA_VISIBLE_DEVICES', help='selecting GPUs to run on', type=str, default='0')

args, _ = parser.parse_known_args()
Expand Down
110 changes: 85 additions & 25 deletions algorithm/dataset_loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from arguments import *

args = parse_args()
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")


class TestDataset(Dataset):
Expand All @@ -19,7 +20,7 @@ def __init__(self, csv, attr, transform) -> None:
Initialize an `ATLASDataset`.
"""
self.csv = pd.read_csv(csv, low_memory=False).reset_index(drop=True)
self.csv = csv
self.transform = transform
self.attr = attr

Expand All @@ -35,10 +36,21 @@ def __getitem__(self, idx: int) -> Tuple[torch.Tensor, torch.Tensor, torch.Tenso
"""
row = self.csv.iloc[idx]
# print(os.getcwd())
if 'data/skin/' not in row.filepath:
image_path = os.path.join('./data/skin/', row.filepath)
else:
image_path = os.path.join(row.filepath)
if args.task == 'skin':
if 'data/skin/' not in row.filepath:
if 'data/Skin/' in row.filepath:
image_path = row.filepath
image_path = image_path.replace('data/Skin/', 'data/skin/')
else:
image_path = os.path.join('data/skin/', row.filepath)
else:
image_path = row.filepath
if args.task == 'xray':
if 'data/chestXray/' not in row.filepath:
image_path = os.path.join('data/chestXray/', row.filepath)
else:
image_path = row.filepath


image = cv.imread(image_path)
image = cv.cvtColor(image, cv.COLOR_BGR2RGB)
Expand All @@ -58,20 +70,60 @@ def __getitem__(self, idx: int) -> Tuple[torch.Tensor, torch.Tensor, torch.Tenso
return data, label, attr


def criterion_func(df):
lst = df['target'].value_counts().sort_index().tolist()
sum_lst = sum(lst)
class_freq = []
for i in lst:
class_freq.append(i / sum_lst * 100)
weights = torch.tensor(class_freq, dtype=torch.float32)

weights = weights / weights.sum()
weights = 1.0 / weights
weights = weights / weights.sum()
weights = weights.to(device)

return weights
def target_dataset_balance(df, target):
target0 = df[df[target] == 0]
target1 = df[df[target] == 1]
if len(target0) > len(target1):
# index = np.random.randint(len(target1), size=len(target0))
index = np.random.randint(len(target1), size=len(target1))
target1 = target1.iloc[list(index)]
else:
index = np.random.randint(len(target0), size=len(target1))
target0 = target0.iloc[list(index)]
csv = pd.concat([target0, target1], ignore_index=True)
return csv


def dataset_balance(df, attribute, target, downsample=0):
group0 = df[df[attribute] == 0]
group1 = df[df[attribute] == 1]
if len(group0) > len(group1):
df0, df1 = split_dataset(group0, group1, target, downsample)
else:
df0, df1 = split_dataset(group1, group0, target, downsample)

csv = pd.concat([df0, df1], ignore_index=True)
return csv


def split_dataset(df0, df1, target, downsample):
group0_target0 = df0[df0[target] == 0]
group0_target1 = df0[df0[target] == 1]
group1_target0 = df1[df1[target] == 0]
group1_target1 = df1[df1[target] == 1]
if downsample:
index_target0 = np.random.randint(len(group0_target0), size=len(group1_target0))
down_target0 = group0_target0.iloc[list(index_target0)]
index_target1 = np.random.randint(len(group0_target1), size=len(group1_target1))
down_target1 = group0_target1.iloc[list(index_target1)]
df0 = pd.concat([down_target0, down_target1], ignore_index=True)
else:
index_target0 = np.random.randint(len(group1_target0), size=len(group0_target0))
up_target0 = group1_target0.iloc[list(index_target0)]
index_target1 = np.random.randint(len(group1_target1), size=len(group0_target1))
up_target1 = group0_target1.iloc[list(index_target1)]
df1 = pd.concat([up_target0, up_target1], ignore_index=True)

return df0, df1


def cal_pos_weight(df):
pos_count = df['target'].sum() * 1.0 + 1e-10
neg_count = (len(df) - pos_count)*1.0
ratio = neg_count/pos_count
pos_weight = torch.tensor(ratio, device=device)

return pos_weight


def load_dataset(dataset, batch_size=64, shuffle=False) -> DataLoader[Any]:
Expand All @@ -84,13 +136,21 @@ def load_dataset(dataset, batch_size=64, shuffle=False) -> DataLoader[Any]:
)


def get_dataset(csv, attr, transform=None):
if transform is None:
def get_dataset(csv, attr, transform=None, mode='train'):
if mode == 'train':
if transform is None:
transform = A.Compose([
A.Resize(256, 256),
A.HorizontalFlip(p=0.5),
A.VerticalFlip(p=0.5),
A.Transpose(p=0.5),
A.Normalize()
])
else:
transform = A.Compose([
A.Resize(256, 256),
A.HorizontalFlip(p=0.5),
A.Normalize()
])
A.Resize(256, 256),
A.Normalize()
])

dataset = TestDataset(csv, attr, transform)

Expand Down

0 comments on commit 70c83e4

Please sign in to comment.