Display system information and load tidyverse
and faraway
packages
sessionInfo()
## R version 3.6.2 (2019-12-12)
## Platform: x86_64-apple-darwin15.6.0 (64-bit)
## Running under: macOS Catalina 10.15.4
##
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/3.6/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/3.6/Resources/lib/libRlapack.dylib
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## loaded via a namespace (and not attached):
## [1] compiler_3.6.2 magrittr_1.5 tools_3.6.2 htmltools_0.4.0
## [5] yaml_2.2.1 Rcpp_1.0.4.6 stringi_1.4.6 rmarkdown_2.1
## [9] knitr_1.28 stringr_1.4.0 xfun_0.13 digest_0.6.25
## [13] rlang_0.4.5 evaluate_0.14
library(tidyverse)
## ── Attaching packages ──────────────────────────────────────────────────── tidyverse 1.3.0 ──
## ✓ ggplot2 3.3.0 ✓ purrr 0.3.4
## ✓ tibble 3.0.0 ✓ dplyr 0.8.5
## ✓ tidyr 1.0.2 ✓ stringr 1.4.0
## ✓ readr 1.3.1 ✓ forcats 0.5.0
## ── Conflicts ─────────────────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(faraway)
psid
(Panel Study of Income Dynamics) data records the income of 85 individuals, who were aged 25-39 in 1968 and had complete data for at least 11 of the years between 1968 and 1990.psid <- as_tibble(psid) %>%
print(n = 30)
## # A tibble: 1,661 x 6
## age educ sex income year person
## <int> <int> <fct> <int> <int> <int>
## 1 31 12 M 6000 68 1
## 2 31 12 M 5300 69 1
## 3 31 12 M 5200 70 1
## 4 31 12 M 6900 71 1
## 5 31 12 M 7500 72 1
## 6 31 12 M 8000 73 1
## 7 31 12 M 8000 74 1
## 8 31 12 M 9600 75 1
## 9 31 12 M 9000 76 1
## 10 31 12 M 9000 77 1
## 11 31 12 M 23000 78 1
## 12 31 12 M 22000 79 1
## 13 31 12 M 8000 80 1
## 14 31 12 M 10000 81 1
## 15 31 12 M 21800 82 1
## 16 31 12 M 19000 83 1
## 17 29 16 M 7500 68 2
## 18 29 16 M 7300 69 2
## 19 29 16 M 9250 70 2
## 20 29 16 M 10300 71 2
## 21 29 16 M 10900 72 2
## 22 29 16 M 11900 73 2
## 23 29 16 M 13000 74 2
## 24 29 16 M 12900 75 2
## 25 29 16 M 19900 76 2
## 26 29 16 M 18900 77 2
## 27 29 16 M 19100 78 2
## 28 29 16 M 20300 79 2
## 29 29 16 M 31600 80 2
## 30 29 16 M 16600 81 2
## # … with 1,631 more rows
summary(psid)
## age educ sex income year
## Min. :25.00 Min. : 3.00 F:732 Min. : 3 Min. :68.00
## 1st Qu.:28.00 1st Qu.:10.00 M:929 1st Qu.: 4300 1st Qu.:73.00
## Median :34.00 Median :12.00 Median : 9000 Median :78.00
## Mean :32.19 Mean :11.84 Mean : 13575 Mean :78.61
## 3rd Qu.:36.00 3rd Qu.:13.00 3rd Qu.: 18050 3rd Qu.:84.00
## Max. :39.00 Max. :16.00 Max. :180000 Max. :90.00
## person
## Min. : 1.00
## 1st Qu.:20.00
## Median :42.00
## Mean :42.44
## 3rd Qu.:63.00
## Max. :85.00
psid %>%
filter(person <= 20) %>%
ggplot() +
geom_line(mapping = aes(x = year, y = income)) +
facet_wrap(~ person) +
labs(x = "Year", y = "Income")
psid %>%
filter(person <= 20) %>%
ggplot() +
geom_line(mapping = aes(x = year, y = income, group = person)) +
facet_wrap(~ sex) +
scale_y_log10()
year
by the median 78.library(lme4)
## Loading required package: Matrix
##
## Attaching package: 'Matrix'
## The following objects are masked from 'package:tidyr':
##
## expand, pack, unpack
psid <- psid %>%
mutate(cyear = I(year - 78))
oldw <- getOption("warn")
options(warn = -1) # turn off warnings
ml <- lmList(log(income) ~ cyear | person, data = psid)
options(warn = oldw)
intercepts <- sapply(ml, coef)[1, ]
slopes <- sapply(ml, coef)[2, ]
tibble(int = intercepts,
slo = slopes) %>%
ggplot() +
geom_point(mapping = aes(x = int, y = slo)) +
labs(x = "Intercept", y = "Slope")
mmod <- lmer(log(income) ~ cyear * sex + age + educ + (cyear | person), data = psid)
summary(mmod)
## Linear mixed model fit by REML ['lmerMod']
## Formula: log(income) ~ cyear * sex + age + educ + (cyear | person)
## Data: psid
##
## REML criterion at convergence: 3819.8
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -10.2310 -0.2134 0.0795 0.4147 2.8254
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## person (Intercept) 0.2817 0.53071
## cyear 0.0024 0.04899 0.19
## Residual 0.4673 0.68357
## Number of obs: 1661, groups: person, 85
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 6.674211 0.543323 12.284
## cyear 0.085312 0.008999 9.480
## sexM 1.150312 0.121292 9.484
## age 0.010932 0.013524 0.808
## educ 0.104209 0.021437 4.861
## cyear:sexM -0.026306 0.012238 -2.150
##
## Correlation of Fixed Effects:
## (Intr) cyear sexM age educ
## cyear 0.020
## sexM -0.104 -0.098
## age -0.874 0.002 -0.026
## educ -0.597 0.000 0.008 0.167
## cyear:sexM -0.003 -0.735 0.156 -0.010 -0.011
Exercise: interpretation of fixed effects.
To test the fixed effects, we can use the Kenward-Roger adjusted F-test. For example, to test the interaction term
library(pbkrtest)
mmod <- lmer(log(income) ~ cyear * sex + age + educ + (cyear | person), data = psid, REML = TRUE)
mmodr <- lmer(log(income) ~ cyear + sex + age + educ + (cyear | person), data = psid, REML = TRUE)
KRmodcomp(mmod, mmodr)
## F-test with Kenward-Roger approximation; time: 3.94 sec
## large : log(income) ~ cyear * sex + age + educ + (cyear | person)
## small : log(income) ~ cyear + sex + age + educ + (cyear | person)
## stat ndf ddf F.scaling p.value
## Ftest 4.6142 1.0000 81.3279 1 0.03468 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
The interaction term is marginally significant.
confint(mmod, method = "boot")
## Computing bootstrap confidence intervals ...
##
## 6 warning(s): Model failed to converge with max|grad| = 0.00248527 (tol = 0.002, component 1) (and others)
## 2.5 % 97.5 %
## .sig01 0.41805778 0.6060715825
## .sig02 -0.08450882 0.4589312952
## .sig03 0.03728021 0.0561610294
## .sigma 0.65838714 0.7086585202
## (Intercept) 5.75391578 7.7808107375
## cyear 0.06865442 0.1022462412
## sexM 0.91269131 1.3954601702
## age -0.01812344 0.0359199565
## educ 0.06081642 0.1434301814
## cyear:sexM -0.04806158 -0.0003335703
All variance component parameters are significant except for the covariance (correlation) between random intercept and slope.
(diagd <- fortify(mmod))
## # A tibble: 1,661 x 10
## age educ sex income year person cyear .fitted .resid .scresid
## <int> <int> <fct> <int> <int> <int> <I<dbl>> <dbl> <dbl> <dbl>
## 1 31 12 M 6000 68 1 -10 8.63 0.0654 0.0956
## 2 31 12 M 5300 69 1 -9 8.71 -0.134 -0.196
## 3 31 12 M 5200 70 1 -8 8.78 -0.228 -0.333
## 4 31 12 M 6900 71 1 -7 8.86 -0.0198 -0.0290
## 5 31 12 M 7500 72 1 -6 8.93 -0.0114 -0.0167
## 6 31 12 M 8000 73 1 -5 9.01 -0.0218 -0.0319
## 7 31 12 M 8000 74 1 -4 9.08 -0.0968 -0.142
## 8 31 12 M 9600 75 1 -3 9.16 0.0105 0.0154
## 9 31 12 M 9000 76 1 -2 9.23 -0.129 -0.189
## 10 31 12 M 9000 77 1 -1 9.31 -0.204 -0.298
## # … with 1,651 more rows
diagd %>%
ggplot(mapping = aes(sample = .resid)) +
stat_qq() + facet_grid(~sex)
diagd$edulevel <- cut(psid$educ,
c(0, 8.5, 12.5, 20),
labels=c("lessHS", "HS", "moreHS"))
diagd %>%
ggplot(mapping = aes(x = .fitted, y = .resid)) +
geom_point(alpha = 0.3) +
geom_hline(yintercept = 0) +
facet_grid(~ edulevel) +
labs(x = "Fitted", ylab = "Residuals")
vision
dataacuity
. Each individual is tested on left and right eyes under 4 powers.vision <- as_tibble(vision) %>%
print(n = Inf)
## # A tibble: 56 x 4
## acuity power eye subject
## <dbl> <fct> <fct> <fct>
## 1 116 6/6 left 1
## 2 119 6/18 left 1
## 3 116 6/36 left 1
## 4 124 6/60 left 1
## 5 120 6/6 right 1
## 6 117 6/18 right 1
## 7 114 6/36 right 1
## 8 122 6/60 right 1
## 9 110 6/6 left 2
## 10 110 6/18 left 2
## 11 114 6/36 left 2
## 12 115 6/60 left 2
## 13 106 6/6 right 2
## 14 112 6/18 right 2
## 15 110 6/36 right 2
## 16 110 6/60 right 2
## 17 117 6/6 left 3
## 18 118 6/18 left 3
## 19 120 6/36 left 3
## 20 120 6/60 left 3
## 21 120 6/6 right 3
## 22 120 6/18 right 3
## 23 120 6/36 right 3
## 24 124 6/60 right 3
## 25 112 6/6 left 4
## 26 116 6/18 left 4
## 27 115 6/36 left 4
## 28 113 6/60 left 4
## 29 115 6/6 right 4
## 30 116 6/18 right 4
## 31 116 6/36 right 4
## 32 119 6/60 right 4
## 33 113 6/6 left 5
## 34 114 6/18 left 5
## 35 114 6/36 left 5
## 36 118 6/60 left 5
## 37 114 6/6 right 5
## 38 117 6/18 right 5
## 39 116 6/36 right 5
## 40 112 6/60 right 5
## 41 119 6/6 left 6
## 42 115 6/18 left 6
## 43 94 6/36 left 6
## 44 116 6/60 left 6
## 45 100 6/6 right 6
## 46 99 6/18 right 6
## 47 94 6/36 right 6
## 48 97 6/60 right 6
## 49 110 6/6 left 7
## 50 110 6/18 left 7
## 51 105 6/36 left 7
## 52 118 6/60 left 7
## 53 105 6/6 right 7
## 54 105 6/18 right 7
## 55 115 6/36 right 7
## 56 115 6/60 right 7
vision %>%
mutate(npower = rep(1:4, 14)) %>%
ggplot() +
geom_line(mapping = aes(x = npower, y = acuity, linetype = eye)) +
facet_wrap(~ subject) +
scale_x_continuous("Power", breaks = 1:4,
labels = c("6/6", "6/18", "6/36", "6/60"))
eyes
and subject
, since we don’t believe the eye difference is consistent across individuals.mmod <- lmer(acuity ~ power + (1 | subject) + (1 | subject:eye), data = vision)
summary(mmod)
## Linear mixed model fit by REML ['lmerMod']
## Formula: acuity ~ power + (1 | subject) + (1 | subject:eye)
## Data: vision
##
## REML criterion at convergence: 328.7
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.4240 -0.3232 0.0109 0.4408 2.4662
##
## Random effects:
## Groups Name Variance Std.Dev.
## subject:eye (Intercept) 10.27 3.205
## subject (Intercept) 21.53 4.640
## Residual 16.60 4.075
## Number of obs: 56, groups: subject:eye, 14; subject, 7
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 112.6429 2.2349 50.401
## power6/18 0.7857 1.5400 0.510
## power6/36 -1.0000 1.5400 -0.649
## power6/60 3.2857 1.5400 2.134
##
## Correlation of Fixed Effects:
## (Intr) pw6/18 pw6/36
## power6/18 -0.345
## power6/36 -0.345 0.500
## power6/60 -0.345 0.500 0.500
power
, we can use the Kenward-Roger adjusted F-test. We find the power
is not significant at the 0.05 level.mmod <- lmer(acuity ~ power + (1 | subject) + (1 | subject:eye), data = vision, REML = TRUE)
nmod <- lmer(acuity ~ (1 | subject) + (1 | subject:eye), data = vision, REML = TRUE)
KRmodcomp(mmod, nmod)
## F-test with Kenward-Roger approximation; time: 0.18 sec
## large : acuity ~ power + (1 | subject) + (1 | subject:eye)
## small : acuity ~ (1 | subject) + (1 | subject:eye)
## stat ndf ddf F.scaling p.value
## Ftest 2.8263 3.0000 39.0000 1 0.05106 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
mmod_mle <- lmer(acuity ~ power + (1 | subject) + (1 | subject:eye), data = vision, REML = FALSE)
nmod_mle <- lmer(acuity ~ (1 | subject) + (1 | subject:eye), data = vision, REML = FALSE)
(lrtstat <- as.numeric(2 * (logLik(mmod_mle, REML = FALSE) - logLik(nmod_mle, REML = FALSE))))
## [1] 8.262412
pchisq(lrtstat, 3, lower.tail = FALSE)
## [1] 0.04088862
library(pbkrtest)
set.seed(123)
PBmodcomp(mmod, nmod) %>%
summary()
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, :
## Model failed to converge with max|grad| = 0.00254113 (tol = 0.002, component 1)
## Bootstrap test; time: 18.38 sec;samples: 1000; extremes: 51;
## large : acuity ~ power + (1 | subject) + (1 | subject:eye)
## small : acuity ~ (1 | subject) + (1 | subject:eye)
## stat df ddf p.value
## LRT 8.2909 3.0000 0.04037 *
## PBtest 8.2909 0.05195 .
## Gamma 8.2909 0.04810 *
## Bartlett 7.7732 3.0000 0.05094 .
## F 2.7636 3.0000 2.9092 0.21739
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
power
becomes significant.mmodr <- lmer(acuity ~ power + (1 | subject) + (1 | subject:eye), data = vision, REML = TRUE, subset = -43)
nmodr <- lmer(acuity ~ (1 | subject) + (1 | subject:eye), data = vision, REML = TRUE, subset = -43)
KRmodcomp(mmodr, nmodr)
## F-test with Kenward-Roger approximation; time: 0.20 sec
## large : acuity ~ power + (1 | subject) + (1 | subject:eye)
## small : acuity ~ (1 | subject) + (1 | subject:eye)
## stat ndf ddf F.scaling p.value
## Ftest 3.5956 3.0000 38.0373 1 0.02212 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
PBmodcomp(mmodr, nmodr) %>%
summary()
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, :
## Model failed to converge with max|grad| = 0.00311466 (tol = 0.002, component 1)
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, :
## Model failed to converge with max|grad| = 0.00210241 (tol = 0.002, component 1)
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, :
## Model failed to converge with max|grad| = 0.00245455 (tol = 0.002, component 1)
## Bootstrap test; time: 20.05 sec;samples: 1000; extremes: 31;
## large : acuity ~ power + (1 | subject) + (1 | subject:eye)
## small : acuity ~ (1 | subject) + (1 | subject:eye)
## stat df ddf p.value
## LRT 10.2751 3.0000 0.01637 *
## PBtest 10.2751 0.03197 *
## Gamma 10.2751 0.03378 *
## Bartlett 9.1212 3.0000 0.02772 *
## F 3.4250 3.0000 2.8405 0.17732
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
mmod <- lmer(acuity ~ power + (1 | subject) + (1 | subject:eye), data = vision)
confint(mmod, method = "boot")
## Computing bootstrap confidence intervals ...
##
## 39 message(s): boundary (singular) fit: see ?isSingular
## 2.5 % 97.5 %
## .sig01 0.000000e+00 5.562805
## .sig02 1.395231e-06 7.855640
## .sigma 3.162019e+00 4.831459
## (Intercept) 1.079321e+02 116.746657
## power6/18 -1.962919e+00 3.979653
## power6/36 -3.743456e+00 2.087987
## power6/60 4.277295e-01 6.554199
qqnorm(ranef(mmodr)$"subject:eye"[[1]], main = "")
plot(resid(mmodr) ~ fitted(mmodr), xlab = "Fitted", ylab = "Residuals")
abline(h=0)