Skip to content

Commit

Permalink
update fine-tune scripts
Browse files Browse the repository at this point in the history
  • Loading branch information
lukeyf committed Oct 11, 2024
1 parent 2703d84 commit a213697
Show file tree
Hide file tree
Showing 4 changed files with 42 additions and 15 deletions.
4 changes: 4 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -172,5 +172,9 @@ selected_1000/
archive/
*.pth
*.csv
*.pdf
*.svg
*.png
*.json
submit_*
config*
39 changes: 31 additions & 8 deletions modelling/dino/evaluate.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,9 @@ def evaluate(fold, model_name, target = "", use_checkpoint = False, model_not_na
checkpoint = f'{model_par_dir}{model_name}_{fold}_one_country_best_{imagery_source}{named_target}_.pth'
else:
raise Exception(mode)


print(f"Evaluating {model_name} on fold {fold} with target {target} using checkpoint {use_checkpoint}")
print(f"Evaluating {model_name} on fold {fold} with target {target} using checkpoint {checkpoint}")

if target == '':
eval_target = 'deprived_sev'
Expand All @@ -45,6 +46,7 @@ def evaluate(fold, model_name, target = "", use_checkpoint = False, model_not_na
target_size = 1
else:
target_size = 99


if imagery_source == 'L':
normalization = 30000.
Expand All @@ -55,14 +57,14 @@ def evaluate(fold, model_name, target = "", use_checkpoint = False, model_not_na

data_folder = r'survey_processing/processed_data/'
if mode == 'spatial':
train_df = pd.read_csv(f'{data_folder}train_fold_{fold}.csv', index_col=0)
test_df = pd.read_csv(f'{data_folder}test_fold_{fold}.csv', index_col=0)
train_df = pd.read_csv(f'{data_folder}train_fold_{fold}.csv')
test_df = pd.read_csv(f'{data_folder}test_fold_{fold}.csv')
elif mode == 'temporal':
train_df = pd.read_csv(f'{data_folder}before_2020.csv', index_col=0)
test_df = pd.read_csv(f'{data_folder}after_2020.csv', index_col=0)
train_df = pd.read_csv(f'{data_folder}before_2020.csv')
test_df = pd.read_csv(f'{data_folder}after_2020.csv')
elif mode == 'one_country':
train_df = pd.read_csv(f'{data_folder}train_fold_{fold}.csv', index_col=0)
test_df = pd.read_csv(f'{data_folder}test_fold_{fold}.csv', index_col=0)
train_df = pd.read_csv(f'{data_folder}train_fold_{fold}.csv')
test_df = pd.read_csv(f'{data_folder}test_fold_{fold}.csv')

available_imagery = []
import os
Expand Down Expand Up @@ -159,7 +161,7 @@ def __getitem__(self, idx):
train_dataset = CustomDataset(train_df, transform)
val_dataset = CustomDataset(test_df, transform)

train_loader = DataLoader(train_dataset, batch_size=1, shuffle=True, num_workers=4)
train_loader = DataLoader(train_dataset, batch_size=1, shuffle=False, num_workers=4)
val_loader = DataLoader(val_dataset, batch_size=1, shuffle=False, num_workers=4)
model.to(device)
model.eval()
Expand Down Expand Up @@ -190,6 +192,27 @@ def __getitem__(self, idx):
X_test.append(outputs.cpu()[0].numpy())
y_test.append(targets.cpu()[0].numpy())

# Convert lists to numpy arrays
X_train = np.array(X_train)
y_train = np.array(y_train)
X_test = np.array(X_test)
y_test = np.array(y_test)

# Convert to pandas DataFrames
df_X_train = pd.DataFrame(X_train)
df_y_train = pd.DataFrame(y_train, columns=['target'])
df_X_test = pd.DataFrame(X_test)
df_y_test = pd.DataFrame(y_test, columns=['target'])

results_folder = f'modelling/dino/results/split_{mode}{imagery_source}_{fold}/'
if not os.path.exists(results_folder):
os.makedirs(results_folder)
# Save to CSV files
df_X_train.to_csv(results_folder+'X_train.csv', index=False)
df_y_train.to_csv(results_folder+'y_train.csv', index=False)
df_X_test.to_csv(results_folder+'X_test.csv', index=False)
df_y_test.to_csv(results_folder+'y_test.csv', index=False)

alphas = np.logspace(-6, 6, 20)
# Define the model and pipeline
ridge_pipeline = Pipeline([
Expand Down
4 changes: 2 additions & 2 deletions modelling/dino/finetune_one_country.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,8 @@ def main(country, model_name, target, imagery_path, imagery_source, emb_size, ba

data_folder = r'survey_processing/processed_data'

train_df = pd.read_csv(f'{data_folder}/train_fold_{country}.csv', index_col=0)
test_df = pd.read_csv(f'{data_folder}/test_fold_{country}.csv', index_col=0)
train_df = pd.read_csv(f'{data_folder}/train_fold_{country}.csv')
test_df = pd.read_csv(f'{data_folder}/test_fold_{country}.csv')

available_imagery = []
for d in os.listdir(imagery_path):
Expand Down
10 changes: 5 additions & 5 deletions modelling/dino/finetune_spatial.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,8 +31,8 @@ def main(fold, model_name, target, imagery_path, imagery_source, emb_size, batch
imagery_size = img_size
data_folder = r'survey_processing/processed_data'

train_df = pd.read_csv(f'{data_folder}/train_fold_{fold}.csv', index_col=0)
test_df = pd.read_csv(f'{data_folder}/test_fold_{fold}.csv', index_col=0)
train_df = pd.read_csv(f'{data_folder}/train_fold_{fold}.csv')
test_df = pd.read_csv(f'{data_folder}/test_fold_{fold}.csv')

available_imagery = []
for d in os.listdir(imagery_path):
Expand Down Expand Up @@ -74,6 +74,7 @@ def filter_contains(query):
train_df = train_df.dropna(subset=filtered_predict_target)
predict_target = sorted(filtered_predict_target)

print(train_df.shape)
def load_and_preprocess_image(path, grouped_bands=[4,3,2]):
with rasterio.open(path) as src:
b1 = src.read(grouped_bands[0])
Expand Down Expand Up @@ -104,8 +105,7 @@ def set_seed(seed):
# Set your desired seed
seed = 42
set_seed(seed)

train, validation = train_test_split(train_df, test_size=0.2, random_state=42)
train, validation = train_test_split(train_df, test_size=0.2, random_state=seed)

class CustomDataset(Dataset):
def __init__(self, dataframe, transform):
Expand Down Expand Up @@ -167,7 +167,7 @@ def forward(self, pixel_values):
if os.path.exists(last_model):
last_state_dict = torch.load(last_model)
best_error = torch.load(best_model)['loss']
epoch_ran = last_state_dict['epoch']
epochs_ran = last_state_dict['epoch']
model.load_state_dict(last_state_dict['model_state_dict'])
print('Found existing model')
else:
Expand Down

0 comments on commit a213697

Please sign in to comment.