Please note these steps require a Displayr license.
library(BiasedUrn)
setup.flat.data = function(x, number.alternatives){
n = nrow(x)
number.sets = ncol(x) / number.alternatives
data = vector("list",n)
for (i in 1:n)
{
temp.respondent.data = matrix(x[i,],byrow=TRUE,ncol = number.alternatives)
respondent.data = vector("list",number.sets)
for (s in 1:number.sets)
respondent.data[[s]] = as.numeric(temp.respondent.data[s,])
data[[i]] = respondent.data
}
compress.data(data)}
compress.data = function(x){#Creates a vector for each set where the first entry was best and the last worst
compress = function(x){
x.valid = !is.na(x)
x.position = (1:length(x))[x.valid]
x.position[order(x[x.valid], decreasing = TRUE)]
}
n = length(x)
number.sets = length(x[[1]])
number.alternatives = length(x[[1]][[1]])
data = vector("list",n)
for (i in 1:n)
{
respondent.data = vector("list",number.sets)
for (s in 1:number.sets)
respondent.data[[s]] = compress(x[[i]][[s]])
data[[i]] = respondent.data
}
class(data) = "maxdiffData"
data}
d.marley = function(b,x){
b.vector = b[x]
k = length(b.vector)
ediffs = exp(matrix(b.vector,k,k,byrow=FALSE) - matrix(b.vector,k,k,byrow=TRUE))
ediffs[1,k] / (sum(ediffs) - sum(diag(ediffs)))}
d.rlogit = function(b,x){
eb = exp(b[x])
k = length(eb)
d.best = eb[1]/sum(eb)
d.not.worst = dMWNCHypergeo(c(rep(1,k-2),0), rep(1,k-1),k-2,eb[-1], precision = 1E-7)
d.best * d.not.worst}
d.repeated.maxdiff = function(b,x, method){
prod(as.numeric(lapply(x,b = b,switch(method,marley=d.marley, rlogit = d.rlogit))))}
ll.max.diff = function(b,x, maxdiff.method = c("marley","rlogit")[1]){
b[b > 100] = 100
b[b < -100] = -100
sum(log(as.numeric(lapply(x,b = c(0,b),method=maxdiff.method, d.repeated.maxdiff))))
}
max.diff.rank.ordered.logit.with.ties = function(stacked.data){
flat.data = setup.flat.data(stacked.data, ncol(stacked.data))
solution = optim(seq(.01,.02, length.out = ncol(stacked.data)-1), ll.max.diff, maxdiff.method = "rlogit", gr = NULL, x = flat.data, method = "BFGS", control = list(fnscale = -1, maxit = 1000, trace = FALSE), hessian = FALSE)
pars = c(0, solution$par)
names(pars) = dimnames(stacked.data)[[2]]
list(log.likelihood = solution$value, coef = pars)}
Reading the data into R
The code above assumes the data is in the stacked layout. Using the example used in Analyzing MaxDiff Using Standard Logit Models Using R:
mdData = matrix(c(NA,NA,1,0,-1,NA,1,NA,NA,0,-1,NA,NA,1,NA,0,NA,-1,0,NA,1,NA,NA,-1,0,-1,1,NA,NA,NA,NA,1,NA,NA,0,-1),6,byrow=TRUE, dimnames=list(Block=1:6,Alternatives = LETTERS[1:6]))
mdData
Alternatives Block A B C D E F 1 NA NA 1 0 -1 NA 2 1 NA NA 0 -1 NA 3 NA 1 NA 0 NA -1 4 0 NA 1 NA NA -1 5 0 -1 1 NA NA NA 6 NA 1 NA NA 0 -1
Estimating the model
The model is estimated using the following code:
max.diff.rank.ordered.logit.with.ties(mdData)
Interpreting the results
The results of the model are (this will differ from computer-to-computer, but the relativities will remain the same):
$log.likelihood [1] -3.34057e-08 $coef A B C D E F 0.00000 -18.32953 21.20740 -36.45326 -56.78844 -75.94036
A few points about the interpretation:
- The interpretation is identical to that from the logit model (click here for more information). In particular, the correct interpretation is to either look at ranks, or, to compute probabilities.
- As was the case with the standard logit model, and for the same reasons, it is not appropriate to directly compare the parameters that are estimated between different respondents.
- The actual substantive interpretation of these outputs, which is: C > A > B > D > E > F is identical to that obtained from the tricked multinomial logit model.
- The magnitudes of these numbers differs from those of the tricked multinomial logit model. This is unimportant and is due to different levels of precision used in the different algorithms.
- The log-likelihood computed using this model cannot be meaningfully compared with those from the other methods (they make different assumptions).
Next
How to Analyze MaxDiff Using Standard Logit Models Using R
How to Perform MaxDiff Multinomial Logit Analysis