Skip to content

Commit

Permalink
implement quantile regression estimator
Browse files Browse the repository at this point in the history
  • Loading branch information
jbytecode committed Dec 29, 2022
1 parent 92a670c commit 7be933b
Show file tree
Hide file tree
Showing 9 changed files with 181 additions and 4 deletions.
6 changes: 5 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,8 @@
# v0.8.16 (Upcoming Release)
# v0.8.17 (Upcoming release)


# v0.8.16
- Quantile Regression implemented


# v0.8.15
Expand Down
2 changes: 1 addition & 1 deletion Project.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
name = "LinRegOutliers"
uuid = "6d4de0fb-32d9-4c65-aac1-cc9ed8b94b1a"
authors = ["Mehmet Hakan Satman <[email protected]>", "Shreesh Adiga <[email protected]>", "Guillermo Angeris <[email protected]>", "Emre Akadal <[email protected]>"]
version = "0.8.15"
version = "0.8.16"

[deps]
Clustering = "aaaa29a8-35af-508c-8bc3-b662a17a0fe5"
Expand Down
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ A Julia package for outlier detection in linear regression.
- Satman (2015)
- Setan & Halim & Mohd (2000)
- Least Absolute Deviations (LAD)
- Quantile Regression Parameter Estimation (quantileregression)
- Least Trimmed Absolute Deviations (LTA)
- Hadi (1992)
- Marchette & Solka (2003) Data Images
Expand Down
4 changes: 4 additions & 0 deletions docs/src/algorithms.md
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,10 @@ LinRegOutliers.hadi1994
LinRegOutliers.cm97
```

## Quantile Regression
```@docs
LinRegOutliers.quantileregression
```



Expand Down
4 changes: 4 additions & 0 deletions src/LinRegOutliers.jl
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,9 @@ import .Satman2015: satman2015, dominates
include("lad.jl")
import .LAD: lad

# Quantile Regression Estimator
include("quantileregression.jl")
import .QuantileRegression: quantileregression

# Least Trimmed Absolute Deviations estimator
include("lta.jl")
Expand Down Expand Up @@ -216,6 +219,7 @@ export py95, py95SuspectedObservations
export satman2013
export satman2015, dominates
export lad
export quantileregression
export lta
export hadi1992
export hadi1994
Expand Down
2 changes: 1 addition & 1 deletion src/lad.jl
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ Perform Least Absolute Deviations regression for a given regression setting.
- `setting::RegressionSetting`: RegressionSetting object with a formula and dataset.
# Description
The LAD estimator searches for regression parameters estimates that minimizes the sum of absolute residuals.
The LAD estimator searches for regression the parameters estimates that minimize the sum of absolute residuals.
The optimization problem is
Min z = u1(-) + u1(+) + u2(-) + u2(+) + .... + un(-) + un(+)
Expand Down
117 changes: 117 additions & 0 deletions src/quantileregression.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,117 @@
module QuantileRegression

export quantileregression

using JuMP
using GLPK

import ..Basis:
RegressionSetting, @extractRegressionSetting, designMatrix, responseVector, applyColumns

"""
quantileregression(setting; tau = 0.5)
Perform Quantile Regression for a given regression setting (multiple linear regression).
# Arguments
- `setting::RegressionSetting`: RegressionSetting object with a formula and dataset.
- `tau::Float64`: Quantile level. Default is 0.5.
# Description
The Quantile Regression estimator searches for the regression parameter estimates that minimize the
Min z = (1 - tau) (u1(-) + u2(-) + ... + un(-)) + tau (u1(+) + u2(+) + ... + un(+))
Subject to:
y_1 - beta0 - beta1 * x_2 + u1(-) - u1(+) = 0
y_2 - beta0 - beta1 * x_2 + u2(-) - u2(+) = 0
.
.
.
y_n - beta0 - beta1 * x_n + un(-) - un(+) = 0
where
ui(-), ui(+) >= 0
i = 1, 2, ..., n
beta0, beta1 in R
n : Number of observations
model is the y = beta1 + beta2 * x + u
# Output
- `["betas"]`: Estimated regression coefficients
- `["residuals"]`: Regression residuals
- `["model"]`: Linear Programming Model
# Examples
```julia-repl
julia> reg0001 = createRegressionSetting(@formula(calls ~ year), phones);
julia> quantileregression(reg0001)
```
"""
function quantileregression(setting::RegressionSetting; tau::Float64 = 0.5)
X, y = @extractRegressionSetting setting
return quantileregression(X, y, tau = tau)
end


"""
quantileregression(X, y, tau = 0.5)
Estimates parameters of linear regression using Quantile Regression Estimator for a given regression setting.
# Arguments
- `X::Array{Float64, 2}`: Design matrix of the linear model.
- `y::Array{Float64, 1}`: Response vector of the linear model.
- `tau::Float64`: Quantile level. Default is 0.5.
# Examples
```julia-repl
julia> income = [420.157651, 541.411707, 901.157457, 639.080229, 750.875606];
julia> foodexp = [255.839425, 310.958667, 485.680014, 402.997356, 495.560775];
julia> n = length(income)
julia> X = hcat(ones(Float64, n), income)
julia> result = quantileregression(X, foodexp, tau = 0.25)
```
"""
function quantileregression(X::Array{Float64,2}, y::Array{Float64,1}; tau::Float64 = 0.5)
n, p = size(X)

m = JuMP.Model(GLPK.Optimizer)

JuMP.@variable(m, d[1:(2n)])
JuMP.@variable(m, beta[1:p])

JuMP.@objective(
m,
Min,
sum((1 - tau) * d[i] for i = 1:n) + sum(tau * d[i] for i = (n+1):2n)
)

for i = 1:n
c = JuMP.@constraint(m, y[i] - sum(X[i, :] .* beta) + d[i] - d[n+i] == 0)
end

for i = 1:(2n)
JuMP.@constraint(m, d[i] >= 0)
end

JuMP.optimize!(m)

betahats = JuMP.value.(beta)
residuals = y .- X * betahats

result = Dict()
result["betas"] = betahats
result["residuals"] = residuals
result["model"] = m
return result
end

end # end of module QuantileRegression
2 changes: 1 addition & 1 deletion test/runtests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@ using LinearAlgebra
using LinRegOutliers
import Plots: RGB


include("testdiagnostics.jl")
include("testbasis.jl")
include("testols.jl")
Expand All @@ -20,6 +19,7 @@ include("testmvemcd.jl")
include("testbch.jl")
include("testpy95.jl")
include("testlad.jl")
include("testquantileregression.jl")
include("testga.jl")
include("testccf.jl")
include("testsatman2013.jl")
Expand Down
47 changes: 47 additions & 0 deletions test/testquantileregression.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
@testset "Quantile Regression" begin

eps = 0.001

@testset "Quantile Regression - q = 0.5" begin
income = [420.157651, 541.411707, 901.157457, 639.080229, 750.875606]
foodexp = [255.839425, 310.958667, 485.680014, 402.997356, 495.560775]

n = length(income)
X = hcat(ones(Float64, n), income)

result = quantileregression(X, foodexp, tau = 0.5)

betas2 = result["betas"]
@test abs(betas2[1] - 55.0716060) < eps
@test abs(betas2[2] - 0.4778393) < eps
end

@testset "Quantile Regression - q = 0.25" begin
income = [420.157651, 541.411707, 901.157457, 639.080229, 750.875606]
foodexp = [255.839425, 310.958667, 485.680014, 402.997356, 495.560775]

n = length(income)
X = hcat(ones(Float64, n), income)

result = quantileregression(X, foodexp, tau = 0.25)

betas2 = result["betas"]
@test abs(betas2[1] - 48.0057823) < eps
@test abs(betas2[2] - 0.4856801 ) < eps
end

@testset "Quantile Regression - q = 0.95" begin
income = [420.157651, 541.411707, 901.157457, 639.080229, 750.875606]
foodexp = [255.839425, 310.958667, 485.680014, 402.997356, 495.560775]

n = length(income)
X = hcat(ones(Float64, n), income)

result = quantileregression(X, foodexp, tau = 0.95)

betas2 = result["betas"]
@test abs(betas2[1] - (-48.7124077)) < eps
@test abs(betas2[2] - 0.7248513) < eps
end

end

2 comments on commit 7be933b

@jbytecode
Copy link
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@JuliaRegistrator register()

@JuliaRegistrator
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Registration pull request created: JuliaRegistries/General/74773

After the above pull request is merged, it is recommended that a tag is created on this repository for the registered package version.

This will be done automatically if the Julia TagBot GitHub Action is installed, or can be done manually through the github interface, or via:

git tag -a v0.8.16 -m "<description of version>" 7be933b6185aaf1d8d2f26222baaf20e821e4f78
git push origin v0.8.16

Please sign in to comment.