From adf8925fb2f6dde0fb5485bf085348b3c17d1409 Mon Sep 17 00:00:00 2001 From: Mehmet Hakan Satman Date: Thu, 5 Jan 2023 21:26:29 +0300 Subject: [PATCH] update satman(2013) --- CHANGELOG.md | 3 ++- Project.toml | 2 +- src/lts.jl | 1 + src/satman2013.jl | 24 +++++++++++++++++++++--- test/testsatman2013.jl | 2 +- 5 files changed, 26 insertions(+), 6 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 1b70921..e782c8f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,7 @@ # Upcoming Release - +# v0.8.19 +- Update Satman(2013) algorithm # v0.8.18 diff --git a/Project.toml b/Project.toml index 31cf92f..612e713 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "LinRegOutliers" uuid = "6d4de0fb-32d9-4c65-aac1-cc9ed8b94b1a" authors = ["Mehmet Hakan Satman ", "Shreesh Adiga <16567adigashreesh@gmail.com>", "Guillermo Angeris ", "Emre Akadal "] -version = "0.8.18" +version = "0.8.19" [deps] Clustering = "aaaa29a8-35af-508c-8bc3-b662a17a0fe5" diff --git a/src/lts.jl b/src/lts.jl index 4bb06d6..02caf52 100644 --- a/src/lts.jl +++ b/src/lts.jl @@ -1,6 +1,7 @@ module LTS export lts +export iterateCSteps import ..Basis: RegressionSetting, @extractRegressionSetting, designMatrix, responseVector import ..OrdinaryLeastSquares: ols, coef, residuals, predict diff --git a/src/satman2013.jl b/src/satman2013.jl index 08d2dcd..da38cbe 100644 --- a/src/satman2013.jl +++ b/src/satman2013.jl @@ -7,7 +7,7 @@ export satman2013 import ..Basis: RegressionSetting, @extractRegressionSetting, designMatrix, responseVector, applyColumns import ..LTS: iterateCSteps -import ..OrdinaryLeastSquares: ols, coef +import ..OrdinaryLeastSquares: ols, coef, wls, residuals import ..Diagnostics: mahalanobisSquaredMatrix import Distributions: median import LinearAlgebra: diag @@ -30,6 +30,8 @@ stages. # Output - `["outliers"]`: Array of indices of outliers. +- `["betas"]`: Array of estimated regression coefficients. +- `["residuals"]`: Array of residuals. # Examples ```julia-repl @@ -37,7 +39,8 @@ julia> eg0001 = createRegressionSetting(@formula(y ~ x1 + x2 + x3), hbk); julia> satman2013(reg0001) Dict{Any,Any} with 1 entry: "outliers" => [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 47] - + "betas" => ... + "residuals" => ... ``` # References @@ -51,9 +54,13 @@ end function satman2013(X::Array{Float64,2}, y::Array{Float64,1}) + # Sample size and the number of regression parameters n, p = size(X) + + # A lower limit for the number of clean observations h = Int(floor((n + p + 1.0) / 2.0)) + # If the intercept is included, remove it from the data X0 = X p0 = p if X0[:, 1] == ones(n) @@ -63,8 +70,10 @@ function satman2013(X::Array{Float64,2}, y::Array{Float64,1}) allindices = collect(1:n) + # Initial covariance matrix covmat = zeros(p0, p0) + # Construct an estimation of the covariance matrix for i = 1:p0 for j = 1:p0 if i == j @@ -85,11 +94,18 @@ function satman2013(X::Array{Float64,2}, y::Array{Float64,1}) end md = sqrt.(md2) - sorted_indices = sortperm(md) + # Perform Weighted Least Squares using the weights based on Mahalanobis distances + wlsreg = wls(X, y, 1.0 ./ md) + wlsresiduals = residuals(wlsreg) + + # Find best h indices using the residuals obtained from WLS + sorted_indices = sortperm(abs.(wlsresiduals)) best_h_indices = sorted_indices[1:h] + # Iterate C-steps _, bestset = iterateCSteps(X, y, best_h_indices, h) + # Estimate the final regression parameters olsreg = ols(X[bestset, :], y[bestset]) betas = coef(olsreg) resids = y .- (X * betas) @@ -100,6 +116,8 @@ function satman2013(X::Array{Float64,2}, y::Array{Float64,1}) result = Dict() result["outliers"] = outlierset + result["betas"] = betas + result["residuals"] = resids return result end diff --git a/test/testsatman2013.jl b/test/testsatman2013.jl index 7006807..ca07f99 100644 --- a/test/testsatman2013.jl +++ b/test/testsatman2013.jl @@ -3,7 +3,7 @@ reg = createRegressionSetting(@formula(y ~ x1 + x2 + x3), df) result = satman2013(reg) outliers = result["outliers"] - for i = 1:14 + for i = 1:10 @test i in outliers end end