exm-plot-fits.qmd

#### Example: PLOS Medicine title length data

(Adapted from @dobson4e, §6.7.1)

```{r}
#| fig-cap: "Number of authors versus title length in *PLOS Medicine* articles"
#| label: fig-plos
data(PLOS, package = "dobson")
library(ggplot2)
fig1 = 
  PLOS |> 
  ggplot(
    aes(x = authors,
        y = nchar)
  ) +
  geom_point() +
  theme(legend.position = "bottom") +
  labs(col = "") +
  guides(col=guide_legend(ncol=3))
fig1
```

---

##### Linear fit

```{r}
lm_PLOS_linear = lm(
  formula = nchar ~ authors, 
  data = PLOS)
```

```{r}
#| fig-cap: "Number of authors versus title length in *PLOS Medicine*, with linear model fit"
#| label: fig-plos-lm
#| layout-ncol: 2
#| fig-subcap: 
#| - "Data and fit"
#| - "Residuals vs fitted"

fig2 = fig1 +
  geom_smooth(
    method = "lm", 
              fullrange = TRUE,
              aes(col = "lm(y ~ x)"))
fig2

library(ggfortify)
autoplot(lm_PLOS_linear, which = 1, ncol = 1)
```

---

##### Quadratic fit {.smaller}

```{r}
lm_PLOS_quad = lm(
  formula = nchar ~ authors + I(authors^2), 
  data = PLOS)
```

```{r}
#| fig-cap: "Number of authors versus title length in *PLOS Medicine*, with quadratic model fit"
#| label: fig-plos-lm-quad
#| layout-ncol: 2
#| fig-subcap: 
#| - "Data and fit"
#| - "Residuals vs fitted"
fig3 = 
  fig2 + 
geom_smooth(
    method = "lm",
    fullrange = TRUE,
    formula = y ~ x + I(x ^ 2),
    aes(col = "lm(y ~ x + I(x^2))")
  )
fig3

autoplot(lm_PLOS_quad, which = 1, ncol = 1)
```

---

##### Linear versus quadratic fits

```{r}
#| fig-cap: "Residuals versus fitted plot for linear and quadratic fits to `PLOS` data"
#| label: fig-plos-lm-resid2
#| layout-ncol: 2
#| fig-subcap: 
#| - "Linear"
#| - "Quadratic"
library(ggfortify)
autoplot(lm_PLOS_linear, which = 1, ncol = 1)

autoplot(lm_PLOS_quad, which = 1, ncol = 1)
```

---

##### Cubic fit

```{r}
lm_PLOS_cub = lm(
  formula = nchar ~ authors + I(authors^2) + I(authors^3), 
  data = PLOS)
```

```{r}
#| fig-cap: "Number of authors versus title length in *PLOS Medicine*, with cubic model fit"
#| label: fig-plos-lm-cubic
#| layout-ncol: 2
#| fig-subcap: 
#| - "Data and fit"
#| - "Residuals vs fitted"
fig4 = 
  fig3 + 
geom_smooth(
    method = "lm",
    fullrange = TRUE,
    formula = y ~ x + I(x ^ 2) + I(x ^ 3),
    aes(col = "lm(y ~ x + I(x^2) + I(x ^ 3))")
  )
fig4

autoplot(lm_PLOS_cub, which = 1, ncol = 1)

```

---

##### Logarithmic fit

```{r}
lm_PLOS_log = lm(nchar ~ log(authors), data = PLOS)
```

```{r}
#| layout-ncol: 2
#| fig-cap: "logarithmic fit"
#| label: fig-plos-log
#| fig-subcap: 
#| - "Data and fit"
#| - "Residuals vs fitted"
fig5 = fig4 + 
  geom_smooth(
    method = "lm",
    fullrange = TRUE,
    formula = y ~ log(x),
    aes(col = "lm(y ~ log(x))")
  )
fig5

autoplot(lm_PLOS_log, which = 1, ncol = 1)
```

---

##### Model selection {.smaller}

```{r}
#| tbl-cap: "linear vs quadratic"
#| label: tbl-plos-lin-quad-anova
anova(lm_PLOS_linear, lm_PLOS_quad)
```


```{r}
#| tbl-cap: "quadratic vs cubic"
#| label: tbl-plos-quad-cub-anova
anova(lm_PLOS_quad, lm_PLOS_cub)
```

---

##### AIC/BIC  {.smaller}

```{r}
#| code-fold: show
#| 
AIC(lm_PLOS_quad)
AIC(lm_PLOS_cub)
```

```{r}
#| code-fold: show
#| label: plos-aic
AIC(lm_PLOS_cub)
AIC(lm_PLOS_log)
```

```{r}
#| code-fold: show
#| label: plos-bic
BIC(lm_PLOS_cub)
BIC(lm_PLOS_log)
```

---

##### Extrapolation is dangerous

```{r}
#| fig-cap: "Number of authors versus title length in *PLOS Medicine*"
#| label: fig-plos-multifit

fig_all = fig5 +
  xlim(0, 60)
fig_all
```