-
Notifications
You must be signed in to change notification settings - Fork 0
/
SOM_Converged.R
241 lines (189 loc) · 9.89 KB
/
SOM_Converged.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
#This is the R Script for loading the data and creating the grid.
library(ggplot2)
#-----------------------------------------------------------------------------------------------------------------------------------------------------------------
# 1) Reading the data and scaling it
data <- read.csv("binary.csv", header = T)
X <- scale(data[, -1])
data <- X
#-----------------------------------------------------------------------------------------------------------------------------------------------------------------
# 2) A function to return the sum of squared distance between x and y.
euclidean_distance <- function(x, y) {
ret <- sqrt(sum((x - y)^2))
return(ret)
}
#-----------------------------------------------------------------------------------------------------------------------------------------------------------------
# 3) Function to create a SOM grid.
# n is the number of neurons.
# p is the number of columns in the original dataframe.
# data table is used for faster computation.
create_grid <- function(x,y,p) {
ret <- matrix(data = rnorm(x * y * p), nrow = x * y, ncol = p)
return(ret)
}
#-----------------------------------------------------------------------------------------------------------------------------------------------------------------
#Creating a 4*4 grid using the function defined above.
set.seed(222)
grid <- create_grid(4,4,3)
#-----------------------------------------------------------------------------------------------------------------------------------------------------------------
#This is the R Script that contains functions to decay radius, learning rate and calculate influence.
#-----------------------------------------------------------------------------------------------------------------------------------------------------------------
# 1) Function to decay the radius exponentially over time.
# rds is the initial radius that is passed.
# cur_iter represents the current iteration.
# time_constant is the time constant that is calculated before the
decay_radius_function <- function(radius, current_iteration, time_constant) {
ret <- radius * exp(-current_iteration / time_constant)
return(ret)
}
#-----------------------------------------------------------------------------------------------------------------------------------------------------------------
# 2) Function to decay the learning rate.
# lr is the current learning rate.
# cur_iter is the current iteration
# n_iteration is the number of iterations.
decay_learning_rate <- function(learning_rate, current_iteration, n_iteration) {
ret <- learning_rate * exp(-current_iteration / n_iteration)
return(ret)
}
#-----------------------------------------------------------------------------------------------------------------------------------------------------------------
# 3) A function to calculate influence over neighboring neurons
#dstnc is the lateral distance.
#rds is the current neighbourhood radius.
influence_calculation <- function(distance, radius) {
ret <- exp(-(distance^2) / (2 * (radius^2)))
return(ret)
}
#-----------------------------------------------------------------------------------------------------------------------------------------------------------------
#R Script to find the winning neuron / Best Matching unit of a SOM grid
#-----------------------------------------------------------------------------------------------------------------------------------------------------------------
# Simple BMU Implementation - Function to return the winning neuron.
# x is a single row of data and input_grid is the grid
BMU <- function(x, input_grid) {
distance <- 0
min_distance <- 10000000 # Setting high min dist value
min_ind <- -1 # Setting random min_ind value
for (e in 1:nrow(input_grid)) # Iterating through grid
{
distance <- euclidean_distance(x, input_grid[e, ]) # euclidean_distance distance
if (distance < min_distance) {
min_distance <- distance # Updating min distance for winning unit
min_ind <- e # Updating winning neuron
}
}
return(min_ind-1) #returns index of BMU
}
#-----------------------------------------------------------------------------------------------------------------------------------------------------------------
#Fastest BMU Implementation using vectorisation.
#x is a single row of data and input_grid is the grid
BMU_Vectorised <- function(x, input_grid) {
dist_mtrx=rowSums(sweep(input_grid,2,x)^2) #Calculating the distance of this row from all the neurons using matrix operations.
min_ind=which.min(dist_mtrx) #Finding the location of the neuron with the minimum distance.
return (min_ind-1) #Returning the zero-indexed value of the winning neuron.
}
#-----------------------------------------------------------------------------------------------------------------------------------------------------------------
#R Script to create a Self Organising Map.
#-----------------------------------------------------------------------------------------------------------------------------------------------------------------
#Importing the Library
library(dplyr)
#-----------------------------------------------------------------------------------------------------------------------------------------------------------------
#Fastest implmentation of a self organising map.
#x is the input and input_grid is the SOM grid that will be updated iteratively.
SOM <- function(x, input_grid) {
breaker <- 0
n_iteration <- nrow(x) # Defining number of iterations
initial_learning_rate <- 0.05 # Defining initial learning rate
initial_radius <- 3 # Defining initial radius
time_constant <- n_iteration / log(initial_radius) # Initializing time constant
lateral_distance_points=expand.grid(1:sqrt(nrow(input_grid)),1:sqrt(nrow(input_grid)))#Initialising physical locations of neurons to figure out lateral distance.
rows=sqrt(nrow(input_grid)) #The square grid is used here - so taking the number of rows as square root of number of entries in the grid.
n_epochs=40 #Defining the number of epochs.
new_radius <- initial_radius
l <- c()
for(ne in 1:n_epochs)
{
extra <- ((ne-1)*400)
for (i in 1:n_iteration) # Looping through for training
{
old_grid=input_grid
curr_i <- extra + i
sample_input_row <- as.vector(unlist(x[sample(1:nrow(x), size = 1, replace = F), ])) # Selecting random input row from given data set
new_radius <- decay_radius_function(initial_radius, curr_i, time_constant) # Decaying radius
new_learning_rate <- decay_learning_rate(initial_learning_rate,curr_i, n_iteration) # Decaying learning rate
index_temp <- BMU_Vectorised(sample_input_row, input_grid) # Finding best matching unit for given input row
index_new=c((as.integer(index_temp/rows)+1),(index_temp%%rows)+1) #Converting a 1D co-ordinate to a 2D co-ordinate for finding lateral distance on the map.
lateral_distance=sqrt(abs(rowSums(sweep(lateral_distance_points,2,index_new)^2))) #Finding Euclidean distance between the given best matching units and all units on the map.
rn=which(lateral_distance<=new_radius) #Finding neurons that are within the radius of the winning unit.
inf=influence_calculation(lateral_distance[rn],new_radius)#Calculating the influence of the winning neuron on neighbours.
if(length(rn)!=1) #Updating multiple rows if neighbourhood is large
{
#Calculating the influence of the winning neuron on neighbours.
diff_grid=(sweep(input_grid[rn,],2,sample_input_row))*-1 #A temporary matrix that stores the difference between the data point and the weights of the winning neuron & neighbours.
updated_weights=new_learning_rate*inf*diff_grid #The updating operation on the winning and neighbouring neurons.
input_grid[rn,]=input_grid[rn,]+updated_weights #Now updating those grid entries that are either the winning neuron or its neighbours.
}
else #Updating only winning neuron.
{
diff_row=(input_grid[rn,]-sample_input_row)*-1 #A temporary matrix that stores the difference between the data point and the weights of the winning neuron & neighbours.
updated_weights=new_learning_rate*inf*diff_row #The updating operation on the winning and neighbouring neurons.
input_grid[rn,]=input_grid[rn,]+updated_weights #Now updating those grid entries that are either the winning neuron or its neighbours.
}
l <- c(l,euclidean_distance(old_grid,input_grid))
if(isTRUE(all.equal(old_grid,input_grid)))
{
print(curr_i)
print("Converged")
breaker <- 1
break
}
}
if(breaker ==1)
{
break
}
}
return(list(input_grid,l)) #Returning the updated SOM weights.
}
y <- SOM(data,grid)
gridSOM <- y[1]
gridSOM
l <- y[2]
t=1:length(l[[1]])
plot(t,l[[1]])
l=unlist(l)
l[which.min(unlist(l))]
#rm(list=ls())
drawGrid<- function(weight,dimension){
# Converting to a matrix
weight<-as.matrix(weight, ncol = ncol(weight))
norm.matrix<-NULL
# Calculation of the norm
for(i in 1:length(weight[,1])){
a<-norm(weight[i,], type = "2")
norm.matrix<-rbind(norm.matrix,a)
}
## Mapping to range 400 to 700
input_start<-min(norm.matrix)
input_end<-max(norm.matrix)
output_start<-400
output_end<-700
## Calculating wavelength based on norm
color<-NULL
for(i in 1:length(norm.matrix)){
input = norm.matrix[i]
output = output_start + ((output_end - output_start) / (input_end - input_start)) * (input - input_start)
color<-rbind(color,output)
}
# Getting the colors (hex values) from the wavelength
color.rgb<-w_length2rgb(color)
# Plotting the grid
dim<-max(dimension)+1
plot(1:dim, type = "n")
for (i in 1:dimension[1]) {
for(j in 1:dimension[2]){
#draw.circle(i*2,j*6, radius =.5, col = color.rgb[i*dimension[1]+j - dimension[1]])
rect(i,j,i+1,j+1, col = color.rgb[i*dimension[1]+j - dimension[1]])
}
}
}
library(photobiology)
gridSOM=matrix(unlist(gridSOM),ncol=3)
drawGrid(gridSOM,c(4,4))