Skip to content

Commit

Permalink
linting
Browse files Browse the repository at this point in the history
  • Loading branch information
jameslamb committed Jul 13, 2020
1 parent c92f4ff commit 0cd56ec
Showing 1 changed file with 74 additions and 132 deletions.
206 changes: 74 additions & 132 deletions R-package/demo/leaf_stability.R
Original file line number Diff line number Diff line change
Expand Up @@ -4,15 +4,87 @@

library(lightgbm)

# Second, we load our data
# define helper functions for creating plots

# output of `RColorBrewer::brewer.pal(10, "RdYlGn")`, hardcooded here to avoid a dependency
.diverging_palette <- c(
"#A50026", "#D73027", "#F46D43", "#FDAE61", "#FEE08B"
, "#D9EF8B", "#A6D96A", "#66BD63", "#1A9850", "#006837"
)

.prediction_depth_plot <- function(df) {
plot(
x = df$X
, y = df$Y
, type = "p"
, main = "Prediction Depth"
, xlab = "Leaf Bin"
, ylab = "Prediction Probability"
, pch = 19L
, col = .diverging_palette[df$binned + 1L]
)
legend(
"topright"
, title = "bin"
, legend = sort(unique(df$binned))
, pch = 19L
, col = .diverging_palette[sort(unique(df$binned + 1L))]
, cex = 0.7
)
}

.prediction_depth_spread_plot <- function(df) {
plot(
x = df$binned
, xlim = c(0L, 9L)
, y = df$Z
, type = "p"
, main = "Prediction Depth Spread"
, xlab = "Leaf Bin"
, ylab = "Logloss"
, pch = 19L
, col = .diverging_palette[df$binned + 1L]
)
legend(
"topright"
, title = "bin"
, legend = sort(unique(df$binned))
, pch = 19L
, col = .diverging_palette[sort(unique(df$binned + 1L))]
, cex = 0.7
)
}

.depth_density_plot <- function(df) {
plot(
x = density(df$Y)
, xlim = c(min(df$Y), max(df$Y))
, type = "p"
, main = "Depth Density"
, xlab = "Prediction Probability"
, ylab = "Bin Density"
, pch = 19L
, col = .diverging_palette[df$binned + 1L]
)
legend(
"topright"
, title = "bin"
, legend = sort(unique(df$binned))
, pch = 19L
, col = .diverging_palette[sort(unique(df$binned + 1L))]
, cex = 0.7
)
}

# load some data
data(agaricus.train, package = "lightgbm")
train <- agaricus.train
dtrain <- lgb.Dataset(train$data, label = train$label)
data(agaricus.test, package = "lightgbm")
test <- agaricus.test
dtest <- lgb.Dataset.create.valid(dtrain, test$data, label = test$label)

# Third, we setup parameters and we train a model
# setup parameters and we train a model
params <- list(objective = "regression", metric = "l2")
valids <- list(test = dtest)
model <- lgb.train(
Expand Down Expand Up @@ -64,105 +136,9 @@ table(new_data$binned)
# We can plot the binned content
# On the second plot, we clearly notice the lower the bin (the lower the leaf value), the higher the loss
# On the third plot, it is smooth!
.diverging_palette <- c(
"#A50026", "#D73027", "#F46D43", "#FDAE61", "#FEE08B"
, "#D9EF8B", "#A6D96A", "#66BD63", "#1A9850", "#006837"
)

.prediction_depth_plot <- function(df){
plot(
x = df$X
, y = df$Y
, type = "p"
, main = "Prediction Depth"
, xlab = "Leaf Bin"
, ylab = "Prediction Probability"
, pch = 19
, col = .diverging_palette[df$binned + 1]
)
legend(
"topright"
, title = "bin"
, legend = sort(unique(df$binned))
, pch = 19
, col = .diverging_palette[sort(unique(df$binned + 1))]
, cex = 0.7
)
}


.prediction_depth_plot(df = new_data)

ggplot(
data = new_data
, mapping = aes(x = X, y = Y, color = binned)
) + geom_point() +
theme_bw() +
labs(title = "Prediction Depth", x = "Leaf Bin", y = "Prediction Probability")


.prediction_depth_spread_plot <- function(df){
plot(
x = df$binned
, xlim = c(0, 9)
, y = df$Z
, type = "p"
, main = "Prediction Depth Spread"
, xlab = "Leaf Bin"
, ylab = "Logloss"
, pch = 19
, col = .diverging_palette[df$binned + 1]
)
legend(
"topright"
, title = "bin"
, legend = sort(unique(df$binned))
, pch = 19
, col = .diverging_palette[sort(unique(df$binned + 1))]
, cex = 0.7
)
}


.prediction_depth_spread_plot(df = new_data)
ggplot(
data = new_data
, mapping = aes(x = binned, y = Z, fill = binned, group = binned)
) + geom_boxplot() +
theme_bw() +
labs(title = "Prediction Depth Spread", x = "Leaf Bin", y = "Logloss")

.depth_density_plot <- function(df){
plot(
x = density(df$Y)
, xlim = c(min(df$Y), max(df$Y))
, type = "p"
, main = "Depth Density"
, xlab = "Prediction Probability"
, ylab = "Bin Density"
, pch = 19
, col = .diverging_palette[df$binned + 1]
)
legend(
"topright"
, title = "bin"
, legend = sort(unique(df$binned))
, pch = 19
, pt.cex = 0.1
, col = .diverging_palette[sort(unique(df$binned + 1))]
, cex = 0.7
)
}


.depth_density_plot(df = new_data)
ggplot(
data = new_data
, mapping = aes(x = Y, y = ..count.., fill = binned)
) + geom_density(position = "fill") +
theme_bw() +
labs(title = "Depth Density", x = "Prediction Probability", y = "Bin Density")


# Now, let's show with other parameters
model2 <- lgb.train(
Expand Down Expand Up @@ -213,29 +189,8 @@ table(new_data2$binned)
# real thus it is not an issue
# However, if the rules were not true, the loss would explode.
.prediction_depth_plot(df = new_data2)
ggplot(
data = new_data2
, mapping = aes(x = X, y = Y, color = binned)
) + geom_point() +
theme_bw() +
labs(title = "Prediction Depth", x = "Leaf Bin", y = "Prediction Probability")

.prediction_depth_spread_plot(df = new_data2)
ggplot(
data = new_data2
, mapping = aes(x = binned, y = Z, fill = binned, group = binned)
) + geom_boxplot() +
theme_bw() +
labs(title = "Prediction Depth Spread", x = "Leaf Bin", y = "Logloss")

.depth_density_plot(df = new_data2)
ggplot(
data = new_data2
, mapping = aes(x = Y, y = ..count.., fill = binned)
) + geom_density(position = "fill") +
theme_bw() +
labs(title = "Depth Density", x = "Prediction Probability", y = "Bin Density")


# Now, try with very severe overfitting
model3 <- lgb.train(
Expand Down Expand Up @@ -285,19 +240,6 @@ table(new_data3$binned)
# are real thus it is not an issue.
# However, if the rules were not true, the loss would explode. See the sudden spikes?
.depth_density_plot(df = new_data3)
ggplot(
data = new_data3
, mapping = aes(x = Y, y = ..count.., fill = binned)
) +
geom_density(position = "fill") +
theme_bw() +
labs(title = "Depth Density", x = "Prediction Probability", y = "Bin Density")

# Compare with our second model, the difference is severe. This is smooth.
.depth_density_plot(df = new_data2)
ggplot(
data = new_data2
, mapping = aes(x = Y, y = ..count.., fill = binned)
) + geom_density(position = "fill") +
theme_bw() +
labs(title = "Depth Density", x = "Prediction Probability", y = "Bin Density")

0 comments on commit 0cd56ec

Please sign in to comment.