Display system information and load tidyverse and faraway packages

sessionInfo()
## R version 3.6.2 (2019-12-12)
## Platform: x86_64-apple-darwin15.6.0 (64-bit)
## Running under: macOS Catalina 10.15.5
## 
## Matrix products: default
## BLAS:   /Library/Frameworks/R.framework/Versions/3.6/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/3.6/Resources/lib/libRlapack.dylib
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## loaded via a namespace (and not attached):
##  [1] compiler_3.6.2  magrittr_1.5    tools_3.6.2     htmltools_0.4.0
##  [5] yaml_2.2.1      Rcpp_1.0.4.6    stringi_1.4.6   rmarkdown_2.1  
##  [9] knitr_1.28      stringr_1.4.0   xfun_0.13       digest_0.6.25  
## [13] rlang_0.4.5     evaluate_0.14
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────────── tidyverse 1.3.0 ──
## ✓ ggplot2 3.3.0     ✓ purrr   0.3.4
## ✓ tibble  3.0.0     ✓ dplyr   0.8.5
## ✓ tidyr   1.0.2     ✓ stringr 1.4.0
## ✓ readr   1.3.1     ✓ forcats 0.5.0
## ── Conflicts ────────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(faraway)

Additive models

LA ozone concentration

ozone <- as_tibble(ozone) %>% print()
## # A tibble: 330 x 10
##       O3    vh  wind humidity  temp   ibh   dpg   ibt   vis   doy
##    <dbl> <dbl> <dbl>    <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
##  1     3  5710     4       28    40  2693   -25    87   250    33
##  2     5  5700     3       37    45   590   -24   128   100    34
##  3     5  5760     3       51    54  1450    25   139    60    35
##  4     6  5720     4       69    35  1568    15   121    60    36
##  5     4  5790     6       19    45  2631   -33   123   100    37
##  6     4  5790     3       25    55   554   -28   182   250    38
##  7     6  5700     3       73    41  2083    23   114   120    39
##  8     7  5700     3       59    44  2654    -2    91   120    40
##  9     4  5770     8       27    54  5000   -19    92   120    41
## 10     6  5720     3       44    51   111     9   173   150    42
## # … with 320 more rows
summary(ozone)
##        O3              vh            wind           humidity    
##  Min.   : 1.00   Min.   :5320   Min.   : 0.000   Min.   :19.00  
##  1st Qu.: 5.00   1st Qu.:5690   1st Qu.: 3.000   1st Qu.:47.00  
##  Median :10.00   Median :5760   Median : 5.000   Median :64.00  
##  Mean   :11.78   Mean   :5750   Mean   : 4.848   Mean   :58.13  
##  3rd Qu.:17.00   3rd Qu.:5830   3rd Qu.: 6.000   3rd Qu.:73.00  
##  Max.   :38.00   Max.   :5950   Max.   :11.000   Max.   :93.00  
##       temp            ibh              dpg              ibt       
##  Min.   :25.00   Min.   : 111.0   Min.   :-69.00   Min.   :-25.0  
##  1st Qu.:51.00   1st Qu.: 877.5   1st Qu.: -9.00   1st Qu.:107.0  
##  Median :62.00   Median :2112.5   Median : 24.00   Median :167.5  
##  Mean   :61.75   Mean   :2572.9   Mean   : 17.37   Mean   :161.2  
##  3rd Qu.:72.00   3rd Qu.:5000.0   3rd Qu.: 44.75   3rd Qu.:214.0  
##  Max.   :93.00   Max.   :5000.0   Max.   :107.00   Max.   :332.0  
##       vis             doy       
##  Min.   :  0.0   Min.   : 33.0  
##  1st Qu.: 70.0   1st Qu.:120.2  
##  Median :120.0   Median :205.5  
##  Mean   :124.5   Mean   :209.4  
##  3rd Qu.:150.0   3rd Qu.:301.8  
##  Max.   :350.0   Max.   :390.0
ozone %>%
  ggplot(mapping = aes(x = temp, y = O3)) + 
  geom_point(size = 1) + 
  geom_smooth()
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

ozone %>%
  ggplot(mapping = aes(x = ibh, y = O3)) + 
  geom_point(size = 1) + 
  geom_smooth()
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

ozone %>%
  ggplot(mapping = aes(x = ibt, y = O3)) + 
  geom_point(size = 1) + 
  geom_smooth()
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

olm <- lm(O3 ~ temp + ibh + ibt, data = ozone)
summary(olm)
## 
## Call:
## lm(formula = O3 ~ temp + ibh + ibt, data = ozone)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -11.3224  -3.1913  -0.2591   2.9635  13.2860 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -7.7279822  1.6216623  -4.765 2.84e-06 ***
## temp         0.3804408  0.0401582   9.474  < 2e-16 ***
## ibh         -0.0011862  0.0002567  -4.621 5.52e-06 ***
## ibt         -0.0058215  0.0101793  -0.572    0.568    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4.748 on 326 degrees of freedom
## Multiple R-squared:  0.652,  Adjusted R-squared:  0.6488 
## F-statistic: 203.6 on 3 and 326 DF,  p-value: < 2.2e-16
library(mgcv)
## Loading required package: nlme
## 
## Attaching package: 'nlme'
## The following object is masked from 'package:dplyr':
## 
##     collapse
## This is mgcv 1.8-31. For overview type 'help("mgcv-package")'.
ammgcv <- gam(O3 ~ s(temp) + s(ibh) + s(ibt), data = ozone)
summary(ammgcv)
## 
## Family: gaussian 
## Link function: identity 
## 
## Formula:
## O3 ~ s(temp) + s(ibh) + s(ibt)
## 
## Parametric coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  11.7758     0.2382   49.44   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Approximate significance of smooth terms:
##           edf Ref.df      F  p-value    
## s(temp) 3.386  4.259 20.681 6.93e-16 ***
## s(ibh)  4.174  5.076  7.338 1.38e-06 ***
## s(ibt)  2.112  2.731  1.400    0.214    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## R-sq.(adj) =  0.708   Deviance explained = 71.7%
## GCV = 19.346  Scale est. = 18.72     n = 330

Effective degrees of freedom is calculated as the trace of projection matrices. An approximate F test gives the significance of predictors.

plot(ammgcv, residuals = TRUE, select = 1)

plot(ammgcv, residuals = TRUE, select = 2)

plot(ammgcv, residuals = TRUE, select = 3)

am1 <- gam(O3 ~ s(temp) + s(ibh), data = ozone)
am2 <- gam(O3 ~    temp + s(ibh), data = ozone)
anova(am2, am1, test = "F")
## Analysis of Deviance Table
## 
## Model 1: O3 ~ temp + s(ibh)
## Model 2: O3 ~ s(temp) + s(ibh)
##   Resid. Df Resid. Dev     Df Deviance      F    Pr(>F)    
## 1    322.74       6950                                     
## 2    319.11       6054 3.6237   895.98 13.109 3.146e-09 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# te(x1, x2) play the role x1 * x2 in regular lm/glm
amint <- gam(O3 ~ te(temp, ibh), data = ozone)
summary(amint)
## 
## Family: gaussian 
## Link function: identity 
## 
## Formula:
## O3 ~ te(temp, ibh)
## 
## Parametric coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  11.7758     0.2385   49.38   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Approximate significance of smooth terms:
##                edf Ref.df     F p-value    
## te(temp,ibh) 10.45  13.02 61.39  <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## R-sq.(adj) =  0.708   Deviance explained = 71.7%
## GCV = 19.439  Scale est. = 18.764    n = 330
anova(am1, amint, test = "F")
## Analysis of Deviance Table
## 
## Model 1: O3 ~ s(temp) + s(ibh)
## Model 2: O3 ~ te(temp, ibh)
##   Resid. Df Resid. Dev     Df Deviance      F Pr(>F)
## 1    319.11     6054.0                              
## 2    315.98     5977.3 3.1312   76.643 1.3044 0.2725
plot(amint, select = 1)

vis.gam(amint, theta = -45, color = "gray")

predict(am1, data.frame(temp = 60, ibh = 2000, ibt = 100), se = T)
## $fit
##        1 
## 10.60479 
## 
## $se.fit
##         1 
## 0.7106163
# extrapolation
predict(am1, data.frame(temp = 120, ibh = 2000, ibt = 100), se = T)
## $fit
##        1 
## 39.11324 
## 
## $se.fit
##        1 
## 5.256285

Generalized additive model

  • Combining GLM and additive model yields the generalized additive models (GAMs). The systematic component (linear predictor) becomes \[ \eta = \beta_0 + \sum_{j=1}^p f_j(X_j). \]

  • For example, we an fit the ozone data using a Poisson additive model. Setting scale = -1 tells the function to fit overdispersion parameter as well.

gammgcv <- gam(O3 ~ s(temp) + s(ibh) + s(ibt), family = poisson, scale = -1, data = ozone)
summary(gammgcv)
## 
## Family: poisson 
## Link function: log 
## 
## Formula:
## O3 ~ s(temp) + s(ibh) + s(ibt)
## 
## Parametric coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  2.29269    0.02305   99.45   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Approximate significance of smooth terms:
##           edf Ref.df      F  p-value    
## s(temp) 3.816  4.736 16.803 3.97e-14 ***
## s(ibh)  3.737  4.568 10.594 1.13e-08 ***
## s(ibt)  1.348  1.623  0.238    0.635    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## R-sq.(adj) =  0.712   Deviance explained = 72.9%
## GCV = 1.5062  Scale est. = 1.4585    n = 330
plot(gammgcv, residuals = TRUE, select = 1)

plot(gammgcv, residuals = TRUE, select = 2)

plot(gammgcv, residuals = TRUE, select = 3)

Generalized additive mixed model (GAMM)

egamm <- gamm(seizures ~ offset(timeadj) + treat * expind + s(age), 
              family   = poisson, 
              random   = list(id = ~1), 
              data     = epilepsy,  
              subset   = (id != 49))
## 
##  Maximum number of PQL iterations:  20
## iteration 1
## iteration 2
## iteration 3
## iteration 4
summary(egamm$gam)
## 
## Family: poisson 
## Link function: log 
## 
## Formula:
## seizures ~ offset(timeadj) + treat * expind + s(age)
## 
## Parametric coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  -4.871650   0.140365 -34.707  < 2e-16 ***
## treat        -0.007726   0.195377  -0.040    0.968    
## expind        4.725542   0.047200 100.117  < 2e-16 ***
## treat:expind -0.302384   0.070194  -4.308 2.27e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Approximate significance of smooth terms:
##        edf Ref.df     F p-value
## s(age)   1      1 0.339   0.561
## 
## R-sq.(adj) =  0.324   
##   Scale est. = 1         n = 290

Multivariate adaptive regression splines (MARS) (not covered in this course)

library(earth)
## Loading required package: Formula
## Loading required package: plotmo
## Loading required package: plotrix
## Loading required package: TeachingDemos
mmod <- earth(O3 ~ ., data = ozone)
summary(mmod)
## Call: earth(formula=O3~., data=ozone)
## 
##                coefficients
## (Intercept)      14.1595171
## h(5860-vh)       -0.0137728
## h(wind-3)        -0.3377222
## h(54-humidity)   -0.1349547
## h(temp-58)        0.2791320
## h(1105-ibh)      -0.0033837
## h(dpg-10)        -0.0991581
## h(ibt-120)        0.0326330
## h(150-vis)        0.0231881
## h(96-doy)        -0.1105145
## h(doy-96)         0.0406468
## h(doy-158)       -0.0836732
## 
## Selected 12 of 20 terms, and 9 of 9 predictors
## Termination condition: Reached nk 21
## Importance: temp, humidity, dpg, doy, vh, ibh, vis, ibt, wind
## Number of terms at each degree of interaction: 1 11 (additive model)
## GCV 14.61004    RSS 4172.671    GRSq 0.7730502    RSq 0.8023874
mmod <- earth(O3 ~ ., ozone, nk = 7)
summary(mmod)
## Call: earth(formula=O3~., data=ozone, nk=7)
## 
##             coefficients
## (Intercept)   12.3746135
## h(58-temp)    -0.1084870
## h(temp-58)     0.4962117
## h(ibh-1105)   -0.0012172
## h(96-doy)     -0.0988262
## h(doy-96)     -0.0130345
## 
## Selected 6 of 7 terms, and 3 of 9 predictors
## Termination condition: Reached nk 7
## Importance: temp, ibh, doy, vh-unused, wind-unused, humidity-unused, ...
## Number of terms at each degree of interaction: 1 5 (additive model)
## GCV 18.3112    RSS 5646.565    GRSq 0.715557    RSq 0.7325855
mmod <- earth(O3 ~ ., ozone, nk = 7, degree = 2)
summary(mmod)
## Call: earth(formula=O3~., data=ozone, degree=2, nk=7)
## 
##                             coefficients
## (Intercept)                   10.0034999
## h(58-temp)                    -0.1065473
## h(temp-58)                     0.4562717
## h(ibh-1105)                   -0.0011576
## h(55-humidity) * h(temp-58)   -0.0130712
## h(humidity-55) * h(temp-58)    0.0059811
## 
## Selected 6 of 7 terms, and 3 of 9 predictors
## Termination condition: Reached nk 7
## Importance: temp, ibh, humidity, vh-unused, wind-unused, dpg-unused, ...
## Number of terms at each degree of interaction: 1 3 2
## GCV 18.38791    RSS 5581.691    GRSq 0.7143654    RSq 0.7356579
plotmo(mmod)
##  plotmo grid:    vh wind humidity temp    ibh dpg   ibt vis   doy
##                5760    5       64   62 2112.5  24 167.5 120 205.5