-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathimage_augmentation_ben_pc.py
142 lines (95 loc) · 5.43 KB
/
image_augmentation_ben_pc.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
# -*- coding: utf-8 -*-
"""image_augmentation_ben_pc.ipynb
Automatically generated by Colaboratory.
Original file is located at
https://colab.research.google.com/drive/1iHvkV1IOUOCNQH2MeZLeq6bX4--PZkXA
"""
# Commented out IPython magic to ensure Python compatibility.
# importing all the required libraries
import warnings
warnings.filterwarnings('ignore')
import numpy as np
import skimage.io as io
from skimage.transform import rotate, AffineTransform, warp
import os
import pandas as pd
import matplotlib.pyplot as plt
# %matplotlib inline
path = '' #path to save files
#dir_path_0 = r'/content/drive/MyDrive/Hackerthon_data/0_img' #path to file for scenario 0
#dir_path_1 = r'/content/drive/MyDrive/Hackerthon_data/1_img' #path to file for scenario 1
#dir_path_2 = r'/content/drive/MyDrive/Hackerthon_data/2_img' #path to file for scenario 2
df = pd.read_csv('/content/drive/MyDrive/Hackerthon_data/train.csv')
df['File_ID'] = df.example_path.str.extract('(\d+)')
df['version_no'] = np.zeros([len(df)])
df_0 = df.copy(deep=True)
df_0.drop(df_0[df_0['label']!=0].index,inplace=True)
df_0.reset_index(inplace=True)
df_1 = df.copy(deep=True)
df_1.drop(df_1[df_1['label']!=1].index,inplace=True)
df_1.reset_index(inplace=True)
df_2 = df.copy(deep=True)
df_2.drop(df_2[df_2['label']!=2].index,inplace=True)
df_2.reset_index(inplace=True)
def percentage_matcher(n_iter,df,df_0,df_1,df_2,path):
'''change the percentages of files in each scenario to be similar to 2 dp.
input path0: path to scenario 0
input path1: path to scenario 1
input path2: path to scenario 2
input n_iter: numbner of iterations for the for loop
input df: dataframe of all training data
input df_0: dataframe of scenario 0
input df_1: dataframe of scenario 1
input df_2: dataframe of scenario 2
input path: path to folder location to save file e.g.,/content/drive/MyDrive/Hackerthon_data/
return lengths: the length of each directory
return percentages: the percentage of evenets in each directory
return df: dataframe of augmented training data'''
len_0 = len(df_0)#(len([entry for entry in os.listdir(dir_path_0) if os.path.isfile(os.path.join(dir_path_0, entry))])) #calculate the number of files in the directory
len_1 = len(df_1)#(len([entry for entry in os.listdir(dir_path_1) if os.path.isfile(os.path.join(dir_path_1, entry))])) #calculate the number of files in the directory
len_2 = len(df_2)#(len([entry for entry in os.listdir(dir_path_2) if os.path.isfile(os.path.join(dir_path_2, entry))])) #calculate the number of files in the directory
lengths = np.array([len_0,len_1,len_2]) #put the starting number of flies for each scenario into an array
percentages = (lengths)/np.sum(lengths) #calculate the starting percentages of each image in the three scenarios
folders = np.array(['0_img','1_img','2_img'])
for i in range(n_iter):
min_length = min(lengths)
boolarr = lengths == min_length #identify scenario with less events
lengths[boolarr] = lengths[boolarr] + 5
percentages = (lengths)/np.sum(lengths)
if folders[boolarr] == '0_img':
sample_img_no = np.array(df_0.sample())
elif folders[boolarr] == '1_img':
sample_img_no = np.array(df_1.sample())
elif folders[boolarr] == '2_img':
sample_img_no = np.array(df_2.sample())
image_path = '{h}/{p}.png'.format(h=path,k=folders[boolarr][0],p=sample_img_no[0][-2])
img = io.imread(image_path)
io.imshow(img)
rotations = [90,180,270]
for j in range(len(rotations)):
rotated = rotate(img, angle=rotations[j], mode = 'wrap')
sample_img_no[0][-1] +=1
sample_img_no_version = sample_img_no[0][1:]
df_ev = pd.DataFrame([sample_img_no_version],columns=['label','latitude','longitude','year','example_path','File_ID','version_no'])
df_ev['example_path'] = 'train_test_data/train/{p}_{q}.png'.format(p=df_ev['File_ID'].iloc[-1],q=int(df_ev['version_no'].iloc[-1]))
df = pd.concat([df,df_ev],ignore_index=True)
io.imsave('{h}/{p}_{q}.png'.format(h=path,k=folders[boolarr][0],p=df['File_ID'].iloc[-1],q=int(df['version_no'].iloc[-1])),arr=rotated)
flipLR = np.fliplr(img)
sample_img_no[0][-1] +=1
sample_img_no_version = sample_img_no[0][1:]
df_ev = pd.DataFrame([sample_img_no_version],columns=['label','latitude','longitude','year','example_path','File_ID','version_no'])
df_ev['example_path'] = 'rain_test_data/train/{p}_{q}.png'.format(p=df_ev['File_ID'].iloc[-1],q=int(df_ev['version_no'].iloc[-1]))
df = pd.concat([df,df_ev],ignore_index=True)
io.imsave('{h}/{p}_{q}.png'.format(h=path,k=folders[boolarr][0],p=df['File_ID'].iloc[-1],q=int(df['version_no'].iloc[-1])),arr=flipLR)
flipUD = np.flipud(img)
sample_img_no[0][-1] +=1
sample_img_no_version = sample_img_no[0][1:]
df_ev = pd.DataFrame([sample_img_no_version],columns=['label','latitude','longitude','year','example_path','File_ID','version_no'])
df_ev['example_path'] = 'rain_test_data/train/{p}_{q}.png'.format(p=df_ev['File_ID'].iloc[-1],q=int(df_ev['version_no'].iloc[-1]))
df = pd.concat([df,df_ev],ignore_index=True)
io.imsave('{h}/{p}_{q}.png'.format(h=path,k=folders[boolarr][0],p=df['File_ID'].iloc[-1],q=int(df['version_no'].iloc[-1])),arr=flipUD)
return lengths, percentages, df
lengths, percentages,df = percentage_matcher(1,df,df_0,df_1,df_2)
from google.colab import drive
drive.mount('/content/drive')
df