I have been working a bit more on estimating simple factor models with JAGS. I posted the problem from the last post over there at the open discussion forum and had some very helpful input from Vincent Arel-Bundock from Michigan University and one of his co-authors, Walter R. Mebane which ultimately led to the code of the current post.
Here I wanted to know how to estimate a simple factor model for continuous manifest variables and three latent, intercorrelated factors. The code is simple and straight forward. I put a scaled inverse Wishart prior on the variance-covariance matrix of factor loadings, following the approach in Gelman & Hill (2007, p.376). For solving the invariance problem I set the loadings for one item on each factor to 1. In addition, the means of the latent trait distributions (factor values) were all constrained to zero. This is a very traditional approach, that is also implemented in e.g. MPlus, the STATA program GLLAMM or the R package lavaan.
In the following code I use the Holzinger & Swineford dataset from the lavaan tutorial for comparison. First I estimate the model with lavaan and in a second step with JAGS. Both approaches show very comparable results, but keep in mind that this is somewhat experimental. When comparing coefficients please note that I coded the model in such a way that JAGS returns estimated standard deviations and not variances for the manifest items and latent variables.
Ok, enough talk, here is the code. First the R-script:
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
| ## holzinger.R
# Experimental!
library(lavaan)
# From the lavaan tutorial
HS.model <- ' visual =~ x1 + x2 + x3
textual =~ x4 + x5 + x6
speed =~ x7 + x8 + x9 '
fit <- cfa(HS.model, data=HolzingerSwineford1939, meanstructure=TRUE)
summary(fit, standardized=TRUE)
library(R2jags)
data<-HolzingerSwineford1939
## Transform data to long format----
# the complicated way
# Dependent variable
y<-c(data$x1, data$x2, data$x3, data$x4, data$x5, data$x6, data$x7, data$x8, data$x9)
# Item identifier
item<-c(rep(1, 301), rep(2, 301), rep(3, 301), rep(4, 301), rep(5, 301), rep(6, 301), rep(7, 301), rep(8, 301), rep(9, 301))
# Construct identifier (1: visual, 2: textual, 3: speed)
const<-c(rep(1,3*301 ), rep(2,3*301 ), rep(3,3*301 ))
# Person id
id<-rep(seq(1:301),9)
## ---------------------------------
# Fitting the model with JAGS
W=(diag(3))
dat<-list("y", "item", "const", "id", "W")
parameters<-(c("beta", "sigma.y", "lam", "sigma.eta1", "sigma.eta2", "sigma.eta3", "rho_eta13", "rho_eta12", "rho_eta23", "eta"))
output<-jags.parallel(dat, inits=NULL, parameters, model.file="3fa.txt",n.chains=2, n.iter=3000, n.burnin=2000)
plot(output) |
## holzinger.R
# Experimental!
library(lavaan)
# From the lavaan tutorial
HS.model <- ' visual =~ x1 + x2 + x3
textual =~ x4 + x5 + x6
speed =~ x7 + x8 + x9 '
fit <- cfa(HS.model, data=HolzingerSwineford1939, meanstructure=TRUE)
summary(fit, standardized=TRUE)
library(R2jags)
data<-HolzingerSwineford1939
## Transform data to long format----
# the complicated way
# Dependent variable
y<-c(data$x1, data$x2, data$x3, data$x4, data$x5, data$x6, data$x7, data$x8, data$x9)
# Item identifier
item<-c(rep(1, 301), rep(2, 301), rep(3, 301), rep(4, 301), rep(5, 301), rep(6, 301), rep(7, 301), rep(8, 301), rep(9, 301))
# Construct identifier (1: visual, 2: textual, 3: speed)
const<-c(rep(1,3*301 ), rep(2,3*301 ), rep(3,3*301 ))
# Person id
id<-rep(seq(1:301),9)
## ---------------------------------
# Fitting the model with JAGS
W=(diag(3))
dat<-list("y", "item", "const", "id", "W")
parameters<-(c("beta", "sigma.y", "lam", "sigma.eta1", "sigma.eta2", "sigma.eta3", "rho_eta13", "rho_eta12", "rho_eta23", "eta"))
output<-jags.parallel(dat, inits=NULL, parameters, model.file="3fa.txt",n.chains=2, n.iter=3000, n.burnin=2000)
plot(output)
Next the model code for JAGS:
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
| ## 3fa.txt
# Experimental!
model{
# Likelihood
for(i in 1:2709) # Loop over observations
{
# Linear decomposition could probably done more
# elegantly with matrix algebra
mu[i]<-beta[item[i]] +
lam[const[i], item[i]]*eta[const[i], id[i]]+
lam[const[i], item[i]]*eta[const[i], id[i]]+
lam[const[i], item[i]]*eta[const[i], id[i]]
# Distributional assumptions for the
# observed variables
y[i]~dnorm(mu[i], tau.y[item[i]])
}
# Priors for the items
for(i in 1:9)
{
beta[i]~dnorm(0,1.0E-3)
tau.y[i] <- pow(sigma.y[i], -2)
sigma.y[i] ~ dunif (0, 100)
}
# Matrix of loadings [construct, item]
# Constraints and priors
lam[1,1]<-1
lam[2,1]<-0
lam[3,1]<-0
lam[1,2]~dnorm(0,1.0E-3)
lam[2,2]<-0
lam[3,2]<-0
lam[1,3]~dnorm(0,1.0E-3)
lam[2,3]<-0
lam[3,3]<-0
lam[1,4]<-0
lam[2,4]<-1
lam[3,4]<-0
lam[1,5]<-0
lam[2,5]~dnorm(0,1.0E-3)
lam[3,5]<-0
lam[1,6]<-0
lam[2,6]~dnorm(0,1.0E-3)
lam[3,6]<-0
lam[1,7]<-0
lam[2,7]<-0
lam[3,7]<-1
lam[1,8]<-0
lam[2,8]<-0
lam[3,8]~dnorm(0,1.0E-3)
lam[1,9]<-0
lam[2,9]<-0
lam[3,9]~dnorm(0,1.0E-3)
# Scaled inverse Wishart prior for
# variance-covariance matrix of factor values
# after Gelman and Hill (2007)
for(i in 1:301)
{
eta[1,i]<-xi.eta1*eta.raw[i,1]
eta[2,i]<-xi.eta2*eta.raw[i,2]
eta[3,i]<-xi.eta3*eta.raw[i,3]
eta.raw[i,1:3]~dmnorm(eta.raw.hat[i,],tau.eta.raw[,])
eta.raw.hat[i,1]<-mu.eta1.raw
eta.raw.hat[i,2]<-mu.eta2.raw
eta.raw.hat[i,3]<-mu.eta3.raw
}
mu.eta1<-xi.eta1*mu.eta1.raw
mu.eta2<-xi.eta2*mu.eta2.raw
mu.eta3<-xi.eta3*mu.eta3.raw
mu.eta1.raw<-0
mu.eta2.raw<-0
mu.eta3.raw<-0
xi.eta1~dunif(0,100)
xi.eta2~dunif(0,100)
xi.eta3~dunif(0,100)
tau.eta.raw[1:3,1:3] ~ dwish(W[,],df)
df<-4
sigma.eta.raw[1:3,1:3]<-inverse(tau.eta.raw[,])
sigma.eta1<-xi.eta1*sqrt(sigma.eta.raw[1,1])
sigma.eta2<-xi.eta2*sqrt(sigma.eta.raw[2,2])
sigma.eta3<-xi.eta3*sqrt(sigma.eta.raw[3,3])
rho_eta12<-sigma.eta.raw[1,2]/sqrt(sigma.eta.raw[1,1]*sigma.eta.raw[2,2])
rho_eta13<-sigma.eta.raw[1,3]/sqrt(sigma.eta.raw[1,1]*sigma.eta.raw[3,3])
rho_eta23<-sigma.eta.raw[2,3]/sqrt(sigma.eta.raw[2,2]*sigma.eta.raw[3,3])
} |
## 3fa.txt
# Experimental!
model{
# Likelihood
for(i in 1:2709) # Loop over observations
{
# Linear decomposition could probably done more
# elegantly with matrix algebra
mu[i]<-beta[item[i]] +
lam[const[i], item[i]]*eta[const[i], id[i]]+
lam[const[i], item[i]]*eta[const[i], id[i]]+
lam[const[i], item[i]]*eta[const[i], id[i]]
# Distributional assumptions for the
# observed variables
y[i]~dnorm(mu[i], tau.y[item[i]])
}
# Priors for the items
for(i in 1:9)
{
beta[i]~dnorm(0,1.0E-3)
tau.y[i] <- pow(sigma.y[i], -2)
sigma.y[i] ~ dunif (0, 100)
}
# Matrix of loadings [construct, item]
# Constraints and priors
lam[1,1]<-1
lam[2,1]<-0
lam[3,1]<-0
lam[1,2]~dnorm(0,1.0E-3)
lam[2,2]<-0
lam[3,2]<-0
lam[1,3]~dnorm(0,1.0E-3)
lam[2,3]<-0
lam[3,3]<-0
lam[1,4]<-0
lam[2,4]<-1
lam[3,4]<-0
lam[1,5]<-0
lam[2,5]~dnorm(0,1.0E-3)
lam[3,5]<-0
lam[1,6]<-0
lam[2,6]~dnorm(0,1.0E-3)
lam[3,6]<-0
lam[1,7]<-0
lam[2,7]<-0
lam[3,7]<-1
lam[1,8]<-0
lam[2,8]<-0
lam[3,8]~dnorm(0,1.0E-3)
lam[1,9]<-0
lam[2,9]<-0
lam[3,9]~dnorm(0,1.0E-3)
# Scaled inverse Wishart prior for
# variance-covariance matrix of factor values
# after Gelman and Hill (2007)
for(i in 1:301)
{
eta[1,i]<-xi.eta1*eta.raw[i,1]
eta[2,i]<-xi.eta2*eta.raw[i,2]
eta[3,i]<-xi.eta3*eta.raw[i,3]
eta.raw[i,1:3]~dmnorm(eta.raw.hat[i,],tau.eta.raw[,])
eta.raw.hat[i,1]<-mu.eta1.raw
eta.raw.hat[i,2]<-mu.eta2.raw
eta.raw.hat[i,3]<-mu.eta3.raw
}
mu.eta1<-xi.eta1*mu.eta1.raw
mu.eta2<-xi.eta2*mu.eta2.raw
mu.eta3<-xi.eta3*mu.eta3.raw
mu.eta1.raw<-0
mu.eta2.raw<-0
mu.eta3.raw<-0
xi.eta1~dunif(0,100)
xi.eta2~dunif(0,100)
xi.eta3~dunif(0,100)
tau.eta.raw[1:3,1:3] ~ dwish(W[,],df)
df<-4
sigma.eta.raw[1:3,1:3]<-inverse(tau.eta.raw[,])
sigma.eta1<-xi.eta1*sqrt(sigma.eta.raw[1,1])
sigma.eta2<-xi.eta2*sqrt(sigma.eta.raw[2,2])
sigma.eta3<-xi.eta3*sqrt(sigma.eta.raw[3,3])
rho_eta12<-sigma.eta.raw[1,2]/sqrt(sigma.eta.raw[1,1]*sigma.eta.raw[2,2])
rho_eta13<-sigma.eta.raw[1,3]/sqrt(sigma.eta.raw[1,1]*sigma.eta.raw[3,3])
rho_eta23<-sigma.eta.raw[2,3]/sqrt(sigma.eta.raw[2,2]*sigma.eta.raw[3,3])
}