Merge branch 'voted-perceptron'

lalvim · Nov 26, 2017 · 0c16156 · 0c16156
2 parents 80d8bc4 + bcdf4b1
commit 0c16156
Show file tree

Hide file tree

Showing 11 changed files with 286 additions and 45 deletions.
diff --git a/README.md b/README.md
@@ -1,7 +1,7 @@
 Perceptrons.jl
 ======
 
-A package with several types of Perceptron classifiers. Perceptrons are fast classifiers and can be used even for big data. Up to now, this package contains a linear perceptron and a Kernel perceptron for binary classification problems. This project will have the following perceptron classifiers: Multiclass, Kernel, Structured, Voted, Average and Sparse. Some state-of-the-art must be included after these.
+A package with several types of Perceptron classifiers. Perceptrons are fast classifiers and can be used even for big data. Up to now, this package contains a linear perceptron, voted perceptron and a Kernel perceptron for binary classification problems. This project will have the following perceptron classifiers: Multiclass, Kernel, Structured, Voted, Average and Sparse. Some state-of-the-art must be included after these.
 
 [![Build Status](https://travis-ci.org/lalvim/Perceptrons.jl.svg?branch=master)](https://travis-ci.org/lalvim/Perceptrons.jl)
 
@@ -34,15 +34,22 @@ Examples
 
     println("[Perceptron] accuracy : $(acc(Y_train,Y_pred))")
 
+    # training a voted perceptron
+    model   = Perceptrons.fit(X_train,Y_train,centralize=true,mode="voted")
+    Y_pred  = Perceptrons.predict(model,X_test)
+
+    println("[Voted Perceptron] accuracy : $(acc(Y_train,Y_pred))")
+
+
     # training a kernel perceptron (XOR)
     X_train = [1.0 1.0; 0.0 1.0; 1.0 0.0; 0.0 0.0]
     Y_train = [0.0 ; 1.0; 1.0; 0.0]
-    X_test  = X .+ .03 # adding noise
+    X_test  = X_train .+ .03 # adding noise
 
-    model   = Perceptrons.fit(X_train,Y_train,centralize=true,kernel="rbf",width=.01)
+    model   = Perceptrons.fit(X_train,Y_train,centralize=true,mode="kernel",kernel="rbf",width=.01)
     Y_pred  = Perceptrons.predict(model,X_test)
 
-    println("[Perceptron] accuracy : $(acc(Y_train,Y_pred))")
+    println("[Kernel Perceptron] accuracy : $(acc(Y_train,Y_pred))")
 
 
     # if you want to save your model
@@ -59,7 +66,6 @@ What is Implemented
 
 What is Upcoming
 =======
-* Kernel Perceptron
 * Multiclass Perceptron
 * Voted Perceptron
 * Average Perceptron

diff --git a/experiments/readme_example.jl b/experiments/readme_example.jl
@@ -10,15 +10,22 @@ Y_pred         = Perceptrons.predict(model,X_test)
 
 println("[Perceptron] accuracy : $(acc(Y_train,Y_pred))")
 
+# training a voted perceptron
+model   = Perceptrons.fit(X_train,Y_train,centralize=true,mode="voted")
+Y_pred  = Perceptrons.predict(model,X_test)
+
+println("[Voted Perceptron] accuracy : $(acc(Y_train,Y_pred))")
+
+
 # training a kernel perceptron (XOR)
 X_train = [1.0 1.0; 0.0 1.0; 1.0 0.0; 0.0 0.0]
 Y_train = [0.0 ; 1.0; 1.0; 0.0]
-X_test  = X .+ .03 # adding noise
+X_test  = X_train .+ .03 # adding noise
 
-model   = Perceptrons.fit(X_train,Y_train,centralize=true,kernel="rbf",width=.01)
+model   = Perceptrons.fit(X_train,Y_train,centralize=true,mode="kernel",kernel="rbf",width=.01)
 Y_pred  = Perceptrons.predict(model,X_test)
 
-println("[Perceptron] accuracy : $(acc(Y_train,Y_pred))")
+println("[Kernel Perceptron] accuracy : $(acc(Y_train,Y_pred))")
 
 
 # if you want to save your model

diff --git a/src/Perceptrons.jl b/src/Perceptrons.jl
@@ -6,11 +6,13 @@ include("utils.jl")
 include("types.jl")
 include("linear_perceptron.jl")
 include("kernel_perceptron.jl")
+include("voted_perceptron.jl")
+
 
 
 
 """
-    fit(X::Matrix{:<AbstractFloat},Y::AbstractArray{:<AbstractFloat}; copydata::Bool=true, centralize::Bool=true, kernel="linear", width=1.0, alpha=1.0e-2, shuffle_epoch = true, random_state = true, max_epochs = 5 )
+    fit(X::Matrix{:<AbstractFloat},Y::AbstractArray{:<AbstractFloat}; copydata::Bool=true, centralize::Bool=true, kernel="linear", width=1.0, alpha=1.0e-2, shuffle_epoch = true, random_state = true, max_epochs = 5, mode = "linear" )
 
 Perceptron algorithm.
 
@@ -23,6 +25,7 @@ Perceptron algorithm.
 - `shuffle_epoch::Bool = true`: Shuffle dataset for each epoch. Improves convergency.
 - `random_state::Int = 42`: Use a seed to force same results trhough the same dataset.
 - `max_epochs::Int = 5`: Maximum epochs.
+- `mode::String = "linear"`: modes are "linear", "kernel" or "voted" perceptron.
 """
 function fit{T<:AbstractFloat}(X::AbstractArray{T},
                                Y::AbstractArray{T};
@@ -33,13 +36,14 @@ function fit{T<:AbstractFloat}(X::AbstractArray{T},
                                alpha::AbstractFloat   = 1.0e-2,
                                shuffle_epoch::Bool    = true,
                                random_state::Int      = 42,
-                               max_epochs::Int        = 50
+                               max_epochs::Int        = 50,
+                               mode                   = "linear"
                                )
     X = X[:,:]
     check_constant_cols(X)
     check_constant_cols(Y)
 
-    check_params(kernel)
+    check_params(kernel,mode)
 
     check_data(X, Y)
 
@@ -54,7 +58,8 @@ function fit{T<:AbstractFloat}(X::AbstractArray{T},
                   max_epochs,
                   centralize,
                   kernel,
-                  width)
+                  width,
+                  mode)
 
     Xi =  (centralize ? centralize_data(Xi,model.mx,model.sx) : Xi)
     model.centralize  = (centralize ? true: false)

diff --git a/src/kernel_perceptron.jl b/src/kernel_perceptron.jl
@@ -38,18 +38,8 @@ function ΦΦ{T<:AbstractFloat}(X::AbstractArray{T},
     K
 end
 
-@inline function ∑(λ,y,n,K)
-    #    sum = .0
-    #    for i=1:n
-    #        sum += λ[i]*y[i]*K[i,j]
-    #    end
-    #    return sum
-    return sum(λ .* y .* K)
-end
+@inline ∑(λ,y,n,K) = sum(λ .* y .* K)
 
-@inline function sign(val)
-    return  (val >=0 ? 1.0: -1.0 )
-end
 
 function trainer{T<:AbstractFloat}(model::KernelPerceptron{T},
 	                              X::AbstractArray{T},

diff --git a/src/linear_perceptron.jl b/src/linear_perceptron.jl
@@ -1,7 +1,6 @@
+# use in linear perceptron
+@inline   h(Θ,x) = sinal(Θ'*x)
 
-@inline function h(Θ,x)
-           return  (Θ'*x >=0 ? 1.0: 0.0 )
-end
 
 function trainer{T<:AbstractFloat}(model::LinearPerceptron{T},
 	                              X::AbstractArray{T},

diff --git a/src/types.jl b/src/types.jl
@@ -40,6 +40,48 @@ function LinearPerceptron{T<:AbstractFloat}(X::AbstractArray{T},
                            size(X,2))
 
 end
+####################################################################################
+
+#### Linear Perceptron type
+mutable struct VotedPerceptron{T<:AbstractFloat} <: PerceptronModel{T}
+   α::T
+   Θ#::Dict{Integer,Vector{T}}
+   c#::Dict{Integer,Integer}
+   k::Integer
+   shuffle_epoch::Bool
+   random_state::Integer
+   max_epochs::Integer
+   last_epoch::Integer
+   history::Vector{Integer}
+   mx::Matrix{T}          # mean stat after for z-scoring input data (X)
+   sx::Matrix{T}          # standard deviation stat after for z-scoring target data (X)
+   centralize::Bool
+   nfeatures::Integer
+end
+
+function VotedPerceptron{T<:AbstractFloat}(X::AbstractArray{T},
+                          alpha,
+                          shuffle_epoch,
+                          random_state,
+                          max_epochs,
+                          centralize)
+
+   return VotedPerceptron(alpha, # I will refactor to a constructor. Cleaner
+                           nothing,
+                           nothing,
+                           0,
+                           shuffle_epoch,
+                           random_state,
+                           max_epochs,
+                           0,
+                           Vector{Integer}(1),
+                           mean(X,1),
+                           std(X,1),
+                           centralize,
+                           size(X,2))
+
+end
+
 
 ####################################################################################
 
@@ -90,23 +132,32 @@ function Model{T<:AbstractFloat}(X::AbstractArray{T},
                max_epochs,
                centralize,
                kernel,
-               width)
+               width,
+               mode)
 
-      if kernel == "linear"
+      if mode == "linear"
          return LinearPerceptron(X,
                                  alpha,
                                  shuffle_epoch,
                                  random_state,
                                  max_epochs,
                                  centralize)
-      elseif kernel == "rbf"
+      elseif mode == "kernel"
          return KernelPerceptron(X,
                                  max_epochs,
                                  centralize,
                                  kernel,
                                  width)
+      elseif mode == "voted"
+      return VotedPerceptron(X,
+                           alpha,
+                           shuffle_epoch,
+                           random_state,
+                           max_epochs,
+                           centralize)
+
       else
-         error("Invalid Kernel name: $(kernel)")
+         error("Invalid perceptron mode name: $(mode). \n Cadidates are: linear, kernel or voted")
       end
 end
 

diff --git a/src/utils.jl b/src/utils.jl
@@ -3,9 +3,13 @@
 ## Auxiliary functions
 export acc
 
-function acc(yt,yp)
-      count(x->x==true, yt .== yp)/length(yt)
-end
+@inline acc(yt,yp) = count(x->x==true, yt .== yp)/length(yt)
+
+# used in linear and voted perceptron
+@inline sinal(x) = (x>=0 ? 1.0 : 0.0)
+# used in kernel perceptron
+@inline sign(val) = (val >=0 ? 1.0: -1.0 )
+
 
 ## checks PLS input data and params
 function check_data{T<:AbstractFloat}(X::Matrix{T},Y::Union{Vector{T},Matrix{T}})
@@ -25,8 +29,9 @@ function check_data{T<:AbstractFloat}(X::Matrix{T},nfeatures::Int)
 end
 
 
-function check_params(kernel::AbstractString)
-    kernel == "rbf" || kernel == "linear" || error("kernel must be kernel='linear' or kernel='rbf'")
+function check_params(kernel::AbstractString,mode::AbstractString)
+    kernel in ["rbf","linear"] || error("kernel must be 'linear' or 'rbf'")
+    mode   in ["kernel","linear","voted"] || error("mode must be 'linear' or 'kernel' or 'voted'")
 end
 
 ## checks constant columns

diff --git a/src/voted_perceptron.jl b/src/voted_perceptron.jl
@@ -0,0 +1,83 @@
+
+
+
+@inline function vote(Θ,x,c,k)
+
+   s = 0
+   for j=1:k
+       s += c[j]*sign(Θ[j]'*x) # voting (+1 or -1 * c[j] weight)
+   end
+   s
+end
+
+function trainer{T<:AbstractFloat}(model::VotedPerceptron{T},
+	                              X::AbstractArray{T},
+        								   Y::Vector{T})
+
+   shuffle_epoch = model.shuffle_epoch
+   random_state  = model.random_state
+   max_epochs    = model.max_epochs
+
+   if random_state!=-1
+      srand(random_state)
+   end
+
+   n,m         = size(X)
+   X           = hcat(X,ones(n,1)) # adding bias
+   history     = []
+   nerrors,nlast_errors = Inf,0
+   epochs      = 0
+   k,Θ,c,α     = 1,Dict(1=>rand(m+1)),Dict(1=>0),model.α
+   #while  nerrors>0 && epochs < max_epochs
+   while  epochs < max_epochs
+   # stops when error is equal to zero or grater than last_error or reached max iterations
+       # shuffle dataset
+       if shuffle_epoch
+          sind = shuffle(1:n)
+          x = X[sind,:]
+          y = Y[sind]
+       end
+       nerrors = 0
+       # weight updates for all samples
+       for i=1:n
+          xi = x[i,:]
+          ξ   = sinal(Θ[k]'*xi) - y[i]
+          if ξ==0
+             c[k] += 1
+          else
+             nerrors+=1
+             c[k+1] = 1
+			    Θ[k+1] = Θ[k] - α * ξ * xi
+             k     += 1
+          end
+		 end
+       nlast_errors   = nerrors
+       epochs+=1
+       push!(history,nerrors)
+   end
+   if nerrors > 0
+      warn("Perceptron: Not converged. Max epochs $(max_epochs) reached. Error history: $(history) \n Try to increase max_epochs or may be you have a non linear problem.")
+   end
+   model.Θ = Θ
+   model.c = c
+   model.k = k
+   model.history = history
+end
+
+function predictor{T<:AbstractFloat}(model::VotedPerceptron{T},
+	                                    X::AbstractArray{T})
+
+   Θ = model.Θ
+   α = model.α
+   k = model.k
+   c = model.c
+
+   n   = size(X,1)
+   y   = zeros(Real,n)
+   X   = hcat(X,ones(n,1)) # adding bias
+   for i=1:n
+      y[i] = sinal(vote(Θ,X[i,:],c,k))
+   end
+   y
+
+end