-
Notifications
You must be signed in to change notification settings - Fork 34
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Overlapping histograms #114
base: master
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change | ||||
---|---|---|---|---|---|---|
|
@@ -89,8 +89,8 @@ Descriptives <- function(jaspResults, dataset, options) { | |||||
if (options$plotVariables) { | ||||||
if(is.null(jaspResults[["distributionPlots"]])) { | ||||||
jaspResults[["distributionPlots"]] <- createJaspContainer(gettext("Distribution Plots")) | ||||||
jaspResults[["distributionPlots"]]$dependOn(c("plotVariables", "splitby", "binWidthType", "distPlotDensity", | ||||||
"distPlotRug", "numberOfBins")) | ||||||
jaspResults[["distributionPlots"]]$dependOn(c("plotVariables", "splitby", "binWidthType", "overlay", | ||||||
"distPlotDensity", "distPlotRug", "numberOfBins")) | ||||||
jaspResults[["distributionPlots"]]$position <- 5 | ||||||
} | ||||||
|
||||||
|
@@ -99,9 +99,9 @@ Descriptives <- function(jaspResults, dataset, options) { | |||||
for (var in variables) { | ||||||
if(is.null(distPlots[[var]])) { | ||||||
if (makeSplit) { | ||||||
distPlots[[var]] <- .descriptivesFrequencyPlots(dataset = splitDat.factors, options = options, variable = var) | ||||||
distPlots[[var]] <- .descriptivesFrequencyPlots(dataset = splitDat.factors, options = options, variable = var, overlayFactor = splitFactor) | ||||||
} else { | ||||||
distPlots[[var]] <- .descriptivesFrequencyPlots(dataset = dataset.factors, options = options, variable = var) | ||||||
distPlots[[var]] <- .descriptivesFrequencyPlots(dataset = dataset.factors, options = options, variable = var, overlayFactor = splitFactor) | ||||||
} | ||||||
} | ||||||
} | ||||||
|
@@ -941,24 +941,55 @@ Descriptives <- function(jaspResults, dataset, options) { | |||||
} | ||||||
|
||||||
|
||||||
.descriptivesFrequencyPlots <- function(dataset, options, variable) { | ||||||
.descriptivesFrequencyPlots <- function(dataset, options, variable, overlayFactor) { | ||||||
if (options$splitby != "" ) { | ||||||
# return a collection | ||||||
split <- names(dataset) | ||||||
|
||||||
plotResult <- createJaspContainer(title = variable) | ||||||
plotResult$dependOn(options = "splitby", optionContainsValue = list(variables = variable)) | ||||||
if (options$overlay) { | ||||||
|
||||||
#overlayFactor = Descriptives.splitFactor#dataset[[.v(splitName)]] | ||||||
|
||||||
plotResult <- createJaspContainer(title = variable) | ||||||
plotResult$dependOn(c("splitby", "overlay"), optionContainsValue = list(variables = variable)) | ||||||
|
||||||
#for (l in split) { | ||||||
plotResult <- .descriptivesFrequencyPlots_SubFunc_Overlay(dataset = dataset, variable = variable, overlayFactor = overlayFactor, splitByName = options$splitby, width = options$plotWidth, height = options$plotHeight, displayDensity = options$distPlotDensity, rugs = options$distPlotRug, title = variable, binWidthType = options$binWidthType, numberOfBins = options$numberOfBins) | ||||||
plotResult$dependOn(optionsFromObject = plotResult) | ||||||
#} | ||||||
|
||||||
return(plotResult) | ||||||
|
||||||
} else { | ||||||
|
||||||
plotResult <- createJaspContainer(title = variable) | ||||||
plotResult$dependOn(options = "splitby", optionContainsValue = list(variables = variable)) | ||||||
|
||||||
for (l in split) { | ||||||
plotResult[[l]] <- .descriptivesFrequencyPlots_SubFunc(dataset = dataset[[l]], variable = variable, width = options$plotWidth, height = options$plotHeight, displayDensity = options$distPlotDensity, rugs = options$distPlotRug, title = l, binWidthType = options$binWidthType, numberOfBins = options$numberOfBins) | ||||||
plotResult[[l]]$dependOn(optionsFromObject = plotResult) | ||||||
} | ||||||
|
||||||
return(plotResult) | ||||||
|
||||||
for (l in split) { | ||||||
plotResult[[l]] <- .descriptivesFrequencyPlots_SubFunc(dataset = dataset[[l]], variable = variable, width = options$plotWidth, height = options$plotHeight, displayDensity = options$distPlotDensity, rugs = options$distPlotRug, title = l, binWidthType = options$binWidthType, numberOfBins = options$numberOfBins) | ||||||
plotResult[[l]]$dependOn(optionsFromObject = plotResult) | ||||||
} | ||||||
|
||||||
return(plotResult) | ||||||
} else { | ||||||
column <- dataset[[.v(variable)]] | ||||||
aPlot <- .descriptivesFrequencyPlots_SubFunc(dataset = dataset, variable = variable, width = options$plotWidth, height = options$plotHeight, displayDensity = options$distPlotDensity, rugs = options$distPlotRug, title = variable, binWidthType = options$binWidthType, numberOfBins = options$numberOfBins) | ||||||
aPlot$dependOn(options = "splitby", optionContainsValue = list(variables = variable)) | ||||||
|
||||||
if (options$overlay == TRUE) { | ||||||
overlayFactor = NULL | ||||||
column <- dataset[[.v(variable)]] | ||||||
aPlot <- .descriptivesFrequencyPlots_SubFunc_Overlay(dataset = dataset, variable = variable, overlayFactor=overlayFactor, width = options$plotWidth, height = options$plotHeight, displayDensity = options$distPlotDensity, rugs = options$distPlotRug, title = variable, binWidthType = options$binWidthType, numberOfBins = options$numberOfBins) | ||||||
aPlot$dependOn(c("splitby", "overlay"), optionContainsValue = list(variables = variable)) | ||||||
|
||||||
} else { | ||||||
|
||||||
column <- dataset[[.v(variable)]] | ||||||
aPlot <- .descriptivesFrequencyPlots_SubFunc(dataset = dataset, variable = variable, width = options$plotWidth, height = options$plotHeight, displayDensity = options$distPlotDensity, rugs = options$distPlotRug, title = variable, binWidthType = options$binWidthType, numberOfBins = options$numberOfBins) | ||||||
aPlot$dependOn(options = "splitby", optionContainsValue = list(variables = variable)) | ||||||
|
||||||
} | ||||||
|
||||||
|
||||||
return(aPlot) | ||||||
} | ||||||
|
@@ -981,6 +1012,25 @@ Descriptives <- function(jaspResults, dataset, options) { | |||||
return(freqPlot) | ||||||
} | ||||||
|
||||||
.descriptivesFrequencyPlots_SubFunc_Overlay <- function(dataset, variable, overlayFactor, splitByName = splitByName, width, height, displayDensity, rugs, title, binWidthType, numberOfBins) { | ||||||
freqPlot <- createJaspPlot(title = title, width = width, height = height) | ||||||
|
||||||
errorMessage <- NULL #.descriptivesCheckPlotErrors(dataset, variable, obsAmount = "< 3") | ||||||
column <- dataset[[variable]] | ||||||
column <- column[!is.na(column)] | ||||||
#print(do.call(rbind, dataset)) | ||||||
|
||||||
isDiscrete <- is.factor(column) || is.character(column) | ||||||
if (!is.null(errorMessage)) | ||||||
freqPlot$setError(gettextf("Plotting not possible: %s", errorMessage)) | ||||||
else if (length(column) > 0 && isDiscrete) | ||||||
freqPlot$plotObject <- .barplotJASP(column, variable) | ||||||
else #if (length(column) > 0 && !isDiscrete) | ||||||
freqPlot$plotObject <- .plotMarginal_Overlay(column = do.call(rbind, dataset), variableName = variable, overlayFactor, splitByName = splitByName, displayDensity = displayDensity, rugs = rugs, binWidthType = binWidthType, numberOfBins = numberOfBins) | ||||||
|
||||||
return(freqPlot) | ||||||
} | ||||||
|
||||||
.descriptivesSplitPlot <- function(dataset, options, variable) { | ||||||
depends <- c("splitPlotColour", "splitPlotViolin", "splitPlotBoxplot", "splitPlotJitter", "splitPlotOutlierLabel") | ||||||
|
||||||
|
@@ -1239,6 +1289,111 @@ Descriptives <- function(jaspResults, dataset, options) { | |||||
return(p) | ||||||
} | ||||||
|
||||||
|
||||||
|
||||||
.plotMarginal_Overlay <- function(column, variableName, overlayFactor, splitByName, | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
We use camelCase, so no underscores in the names, please. I know that jaspDescriptives does not follow this convention, but it's good to start doing it here. |
||||||
rugs = FALSE, displayDensity = FALSE, binWidthType = c("doane", "fd", "scott", "sturges", "manual"), | ||||||
numberOfBins = NA, | ||||||
lwd = 1) { | ||||||
binWidthType <- match.arg(binWidthType) | ||||||
#column <- as.numeric(column) | ||||||
variable <- column[[variableName]] #na.omit(column) | ||||||
|
||||||
#message(variable) | ||||||
|
||||||
if (length(variable) == 0) | ||||||
return(NULL) | ||||||
|
||||||
if (binWidthType == "doane") { # https://en.wikipedia.org/wiki/Histogram#Doane's_formula | ||||||
sigma.g1 <- sqrt((6*(length(variable) - 2)) / ((length(variable) + 1)*(length(variable) + 3))) | ||||||
g1 <- mean(abs(variable)^3) | ||||||
k <- 1 + log2(length(variable)) + log2(1 + (g1 / sigma.g1)) | ||||||
binWidthType <- k | ||||||
} else if (binWidthType == "fd" && nclass.FD(variable) > 10000) { # FD-method will produce extreme number of bins and crash ggplot, mention this in footnote | ||||||
binWidthType <- 10000 | ||||||
} else if (binWidthType == "manual") { | ||||||
binWidthType <- numberOfBins | ||||||
} | ||||||
|
||||||
|
||||||
h <- hist(variable, plot = FALSE, breaks = binWidthType) | ||||||
|
||||||
if (!displayDensity) | ||||||
yhigh <- max(h$counts) | ||||||
else { | ||||||
dens <- density(variable) | ||||||
yhigh <- max(max(h$density), max(dens$y)) | ||||||
} | ||||||
|
||||||
ylow <- 0 | ||||||
xticks <- base::pretty(c(variable, h$breaks), min.n = 3) | ||||||
|
||||||
if (!displayDensity) { | ||||||
p <- | ||||||
jaspGraphs::drawAxis( | ||||||
xName = variableName, yName = gettext("Counts"), xBreaks = xticks, | ||||||
yBreaks = base::pretty(c(0, h$counts)), force = TRUE, xLabels = xticks | ||||||
) | ||||||
} else { | ||||||
p <- | ||||||
jaspGraphs::drawAxis( | ||||||
xName = variableName, yName = gettext("Density"), xBreaks = xticks, | ||||||
yBreaks = c(0, 1.05 * yhigh), force = TRUE, yLabels = NULL, | ||||||
xLabels = xticks | ||||||
) | ||||||
} | ||||||
|
||||||
|
||||||
if (displayDensity) { | ||||||
p <- p + | ||||||
ggplot2::geom_histogram( | ||||||
data = data.frame(x = variable, g = overlayFactor), | ||||||
mapping = ggplot2::aes(x = x, fill=factor(g), y = ..density..),#options$splitBy, y = ..density..), | ||||||
Comment on lines
+1350
to
+1351
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Perhaps it makes sense to incorporate these changes here? If you prefer, I can also do that (either before or after this PR). There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Sounds good, I have a very slight preference for moving onto something else in the interest of getting broader exposure to different parts of JASP, but emphasis on the word 'slight' - I would also happily have a go at incorporating the changes in jaspGraphs. By the way I think it's worth mentioning that there were a couple of not-completely-resolved questions about whether this could be implemented more neatly, and what to do if there is no split factor. Are there differences between the required functionality or code standards in the jaspGraphs and jaspDescriptives modules? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Makes sense.
This can definitely be done in one function (and should probably be done in one function). .plotMarginal_Overlay <- function(column, variableName, overlayFactor = NULL, splitByName = NULL, ...) {
if (displayDensity) {
if (is.null(overlayFactor)) {
histogramData <- data.frame(x = variable, g = overlayFactor)
histogramMapping <- ggplot2::aes(x = x, fill=factor(g), y = ..density..)
} else {
histogramData <- data.frame(x = variable)
histogramMapping <- ggplot2::aes(x = x, y = ..density..)
}
p <- p +
ggplot2::geom_histogram(
data = histogramData,
mapping = histogramMapping,
...
) + ...
}
Since jaspGraphs is reused by all of JASP, I tend to be a bit more strict in terms of functionality and documentation (e.g., enforcing roxygen2 for documentation). Also, all code there must pass the R CMD CHECK. but I can also do that if you prefer to work on other things! |
||||||
breaks = h[["breaks"]], | ||||||
position = 'identity', | ||||||
alpha = 0.2, | ||||||
col = "black", | ||||||
size = .7 | ||||||
) + | ||||||
ggplot2::geom_line( | ||||||
data = data.frame(x = dens$x, y = dens$y), | ||||||
mapping = ggplot2::aes(x = x, y = y), | ||||||
lwd = lwd, | ||||||
col = "black" | ||||||
) + ggplot2::scale_fill_manual(name = splitByName, values = rainbow(nlevels(overlayFactor))) | ||||||
Comment on lines
+1358
to
+1363
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. it would be also good to split the density plot by the group |
||||||
|
||||||
} else { | ||||||
|
||||||
|
||||||
p <- p + | ||||||
ggplot2::geom_histogram( | ||||||
data = data.frame(x = variable, g = overlayFactor), | ||||||
mapping = ggplot2::aes(x = x, fill = factor(g)), | ||||||
breaks = h[["breaks"]], | ||||||
position = "identity", | ||||||
alpha = 0.2, | ||||||
col = "black", | ||||||
size = .7 | ||||||
) + ggplot2::scale_fill_manual(name = splitByName, values = rainbow(nlevels(overlayFactor))) | ||||||
|
||||||
} | ||||||
|
||||||
if (rugs) | ||||||
p <- p + ggplot2::geom_rug(data = data.frame(variable), mapping = ggplot2::aes(x = variable), sides = "b") | ||||||
|
||||||
# JASP theme | ||||||
p <- jaspGraphs::themeJasp(p, legend.position = 'right', | ||||||
axisTickWidth = .7, | ||||||
bty = list(type = "n", ldwX = .7, lwdY = 1)) | ||||||
# TODO: Fix jaspgraphs axis width X vs Y. See @vandenman. | ||||||
|
||||||
if (displayDensity) | ||||||
p <- p + ggplot2::theme(axis.ticks.y = ggplot2::element_blank()) | ||||||
|
||||||
return(p) | ||||||
} | ||||||
|
||||||
|
||||||
.descriptivesDotPlots <- function(dataset, options, variable){ | ||||||
|
||||||
|
||||||
|
Original file line number | Diff line number | Diff line change | ||||
---|---|---|---|---|---|---|
|
@@ -83,6 +83,7 @@ Form | |||||
enabled: plotVariables.checked || plotCorrelationMatrix.checked | ||||||
|
||||||
indent: true | ||||||
CheckBox { name: "overlay"; label: qsTr("Overlay") } | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The question in general is whether this option should be presented here as is, as it affects only the distribution plots but not the correlation plots. The alternative option would be to implement this split for the correlation plots as well (which would be ideal, I think). But this is already a good step forward, so I would not be that opposed to let it be like this. We can revisit the correlation plots in the future. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Also, it would be good to add description of this button in the help file (you can find the file inside |
||||||
CheckBox { name: "distPlotDensity"; label: qsTr("Display density") } | ||||||
CheckBox { name: "distPlotRug"; label: qsTr("Display rug marks") } | ||||||
DropDown { | ||||||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
[[
over$
avoids partial matching, so it's good to prefer that.