This post will take you from a variable that shows a numeric variable, such as the number of days to purchase a product:
To a state where you can visualize the cumulative proportion and easily see the median, ranging from red (0%), to yellow (50%, the median), to blue (100%):
Requirements
You will need a numeric variable in your Data Sets tree. Numeric variables are represented by a "2" next to their name:
Method
- From the toolbar, go to Calculation > Custom Code.
- Drag your cursor on the page where you would like to place the heated density plot.
- In the object inspector, go to Data > R CODE > Edit Code and paste in the following code then click Calculate.
HeatedDensityPlot <- function(x,
title = "",
x.title = "x",
colors = c('#d7191c','#fdae61','#ffffbf','#abdda4','#2b83ba'),
show.legend = TRUE,
legend.title = "Cumulative %",
n = 512, # Number of unique values of x in the heatmap/density estimation
n.breaks = 5, # Number of values of x to show on the x-axis. Final number may differ.
...)
{
# Checking inputs.
n.obs <- length(x)
if (any(nas <- is.na(x))) {
warning(sum(nas), " observations with missing values have been removed.")
x <- x[!nas]
n.obs <- length(x)
}
if (n.obs < 4)
stop(n.obs, " is too few observations for a valid density plot. See Silverman (1986) Density Estimation for Statistics and Data Analysis.")
# Computing densities
dens <- density(x, ...)
y.max <- max(dens$y) * 1.1 # To ensure top of plot isn't too close to title
x.to.plot.true <- x.to.plot <- dens$x
y.seq <- c(0, y.max/2, y.max)
y.to.plot <- dens$y
range.x = range(x)
cum.dens <- ecdf(x)(x.to.plot)# * 100
#Due to plotly blug that misaligns heatmap with ensuing white line,
#putting blanks at beginnig of data.
n.blanks <- 10
cum.dens <- c(rep(NA, n.blanks), cum.dens)
diff <- x.to.plot[1] - x.to.plot[2]
blanks <- diff * (n.blanks:1) + x.to.plot[1]
x.to.plot <- c(blanks, x.to.plot)
y.to.plot <- c(rep(0, n.blanks), y.to.plot)
# Creating the matrix of heatmap values
cum.perc <- cum.dens * 100
z.mat <- matrix(cum.perc, byrow = TRUE, nrow = 3, ncol = n + n.blanks,
dimnames = list(y = y.seq, x = x.to.plot))
# Specifying the colors
col.fun <- scales::col_numeric(colors, domain = 0:1, na.color = "white")#range.x)
x.as.colors <- col.fun(cum.dens)
z.to.plot.scaled <- scales::rescale(cum.perc)
color.lookup <- setNames(data.frame(z.to.plot.scaled, x.as.colors), NULL)
# Creating the base heatmap.
require(plotly)
p <- plot_ly(z = z.mat,
xsrc = x.to.plot,
ysrc = y.seq,
type = "heatmap",
colorscale = color.lookup,
cauto = FALSE,
hoverinfo = "none",
colorbar = list(title = legend.title),
showscale = show.legend)
# Placing white on top of the bits of the heatmap to hide
p <- add_trace(p,
x = c(1:(n + n.blanks), (n + n.blanks):1),
y = c(y.to.plot, rep(y.max * 1.10, n + n.blanks)),
fill = "tonexty",
hoverinfo = "none",
showlegend = FALSE,
type = "scatter",
mode = "line",
showscale = FALSE,
line = list(color = "white", width = 0),
fillcolor = "white")
# Adding the tooltips
p <- add_trace(p,
x = 1:(n + n.blanks),
y = y.to.plot,
name = "",
hoverinfo = "text",
text = sprintf(paste0(x.title,": %.0f %% < %.1f"), cum.perc, x.to.plot),
type = "scatter",
mode = "lines",
line = list(color = "white", width = 0),
showlegend=FALSE,
showscale=FALSE)
p <- plotly::config(p, displayModeBar = FALSE)
# Formatting the x axis
x.text <- pretty(x.to.plot, n = n.breaks)
x.tick <- 1 + (x.text - x.to.plot[1]) / (x.to.plot[n + n.blanks] - x.to.plot[1]) * (n + n.blanks - 1)
p <- layout(p, title = title,
xaxis = list(title = x.title, tickmode = "array", tickvals = x.tick, ticktext = x.text),
yaxis = list(title = "", showline = FALSE, ticks = "", showticklabels = FALSE, range= c(0, y.max)),
margin = list(t = 30, l = 5, b = 50, r = 5))
p
}
HeatedDensityPlot(density_var, from = 0,
title = "Heated density plot",
x.title = "Number of days since trial started",
legend.title = "% of buyers")
In the code above, density_var is the name of your numeric variable being used to create the density plot. This must be updated with the name of the variable in your data set.
Next
How to Create a Density Plot Using R