forked from bcaffo/Caffo-Coursera
-
Notifications
You must be signed in to change notification settings - Fork 0
/
lecture8.tex
273 lines (243 loc) · 8.97 KB
/
lecture8.tex
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
\documentclass[aspectratio=169]{beamer}
\mode<presentation>
% \usetheme{Warsaw}
% \usetheme{Goettingen}
\usetheme{Hannover}
% \useoutertheme{default}
% \useoutertheme{infolines}
\useoutertheme{sidebar}
\usecolortheme{dolphin}
\usepackage{amsmath}
\usepackage{amssymb}
\usepackage{enumerate}
% some bold math symbosl
\newcommand{\Cov}{\mathrm{Cov}}
\newcommand{\Cor}{\mathrm{Cor}}
\newcommand{\Var}{\mathrm{Var}}
\newcommand{\brho}{\boldsymbol{\rho}}
\newcommand{\bSigma}{\boldsymbol{\Sigma}}
\newcommand{\btheta}{\boldsymbol{\theta}}
\newcommand{\bbeta}{\boldsymbol{\beta}}
\newcommand{\bmu}{\boldsymbol{\mu}}
\newcommand{\bW}{\mathbf{W}}
\newcommand{\one}{\mathbf{1}}
\newcommand{\bH}{\mathbf{H}}
\newcommand{\by}{\mathbf{y}}
\newcommand{\bolde}{\mathbf{e}}
\newcommand{\bx}{\mathbf{x}}
\newcommand{\cpp}[1]{\texttt{#1}}
\title{Mathematical Biostatistics Boot Camp: Lecture 8, Asymptotics}
\author{Brian Caffo}
\date{\today}
\institute[Department of Biostatistics]{
Department of Biostatistics \\
Johns Hopkins Bloomberg School of Public Health\\
Johns Hopkins University
}
\begin{document}
\frame{\titlepage}
\section{Table of contents}
\frame{
\frametitle{Table of contents}
\tableofcontents
}
\section{Limits}
\begin{frame}\frametitle{Numerical limits}
\begin{itemize}
\item Imagine a sequence
\begin{itemize}
\item $a_1 = .9$,
\item $a_2 = .99$,
\item $a_3 = .999$, $\hdots$
\end{itemize}
\item Clearly this sequence converges to $1$
\item Definition of a limit: For any fixed distance we can find a
point in the sequence so that the sequence is closer to the limit
than that distance from that point on
\item $|a_n - 1| = 10^{-n}$
\end{itemize}
\end{frame}
\begin{frame}\frametitle{Limits of random variables}
\begin{itemize}
\item The problem is harder for random variables
\item Consider $\bar X_n$ the sample average of the first $n$ of
a collection of $iid$ observations
\begin{itemize}
\item Example $\bar X_n$ could be the average of the result of
$n$ coin flips (i.e. the sample proportion of heads)
\end{itemize}
\item We say that $\bar X_n$ {\bf converges in probability} to a limit
if for any fixed distance the {\em probability} of $\bar X_n$ being
closer (further away) than that distance from the limit converges to
one (zero)
\item $P(|\bar X_n - \mbox{limit}| < \epsilon) \rightarrow 1$
\end{itemize}
\end{frame}
\section{LLN}
\begin{frame}\frametitle{The Law of Large Numbers}
\begin{itemize}
\item Establishing that a random sequence converges to a limit is hard
\item Fortunately, we have a theorem that does all the work for us, called
the {\bf Law of Large Numbers}
\item The law of large numbers states that if $X_1,\ldots X_n$ are iid
from a population with mean $\mu$ and variance $\sigma^2$ then $\bar
X_n$ converges in probability to $\mu$
\item (There are many variations on the LLN; we are using a particularly
lazy one)
\end{itemize}
\end{frame}
\begin{frame}\frametitle{Proof using Chebyshev's inequality}
\begin{itemize}
\item Recall Chebyshev's inequality states that the probability that a random variable
variable is more than $k$ standard deviations from its mean is less than $1/k^2$
\item Therefore for the sample mean
$$
P\left\{|\bar X_n - \mu| \geq k ~\mbox{sd}(\bar X_n)\right\} \leq 1 / k ^ 2
$$
\item Pick a distance $\epsilon$ and let $k = \epsilon / \mbox{sd}(\bar X_n)$
$$
P(|\bar X_n - \mu| \geq \epsilon) \leq \frac{\mbox{sd}(\bar X_n)^2}{\epsilon^2}
= \frac{\sigma^2}{n\epsilon^2}
$$
\end{itemize}
\end{frame}
\begin{frame}
\includegraphics[width=3.5in]{lln.pdf}
\end{frame}
\begin{frame}\frametitle{Useful facts}
\begin{itemize}
\item Functions of convergent random sequences converge to
the function evaluated at the limit
\item This includes sums, products, differences, ...
\item Example $(\bar X_n) ^2$ converges to $\mu^2$
\item Notice that this is different than $(\sum X_i^2) / n$ which converges
to $E[X_i^2] = \sigma^2 + \mu^2$
\item We can use this to prove that the sample variance converges to
$\sigma^2$
\end{itemize}
\end{frame}
\begin{frame}\frametitle{Continued}
\begin{eqnarray*}
\sum (X_i - \bar X_n)^2 / (n - 1) & = & \frac{\sum X_i^2}{n - 1} - \frac{n (\bar X_n)^2}{n - 1} \\ \\
& = & \frac{n}{n-1}\times \frac{\sum X_i^2}{n} - \frac{n}{n-1} \times (\bar X_n)^ 2\\ \\
& \stackrel{p}{\rightarrow} & 1 \times (\sigma^2 + \mu^2) - 1 \times \mu^2 \\ \\
& = & \sigma^2
\end{eqnarray*}
Hence we also know that the sample standard deviation converges to
$\sigma$
\end{frame}
\begin{frame}\frametitle{Discussion}
\begin{itemize}
\item An estimator is {\bf consistent} if it converges to what you want to estimate
\item The LLN basically states that the sample mean is consistent
\item We just showed that the sample variance and the sample standard deviation are
consistent as well
\item Recall also that the sample mean and the sample variance are unbiased as well
\item (The sample standard deviation is biased, by the way)
\end{itemize}
\end{frame}
\section{CLT}
\begin{frame}\frametitle{The Central Limit Theorem}
\begin{itemize}
\item The {\bf Central Limit Theorem} (CLT) is one of the most important theorems in statistics
\item For our purposes, the CLT states that the distribution of
averages of iid variables, properly normalized, becomes that of a
standard normal as the sample size increases
\item The CLT applies in an endless variety of settings
\end{itemize}
\end{frame}
\begin{frame}\frametitle{The CLT}
\begin{itemize}
\item Let $X_1,\ldots,X_n$ be a collection of iid random variables
with mean $\mu$ and variance $\sigma^2$
\item Let $\bar X_n$ be their sample average
\item Then
\begin{equation*}
P\left( \frac{\bar X_n - \mu}{\sigma / \sqrt{n}} \leq z \right) \rightarrow \Phi(z)
\end{equation*}
\item Notice the form of the normalized quantity
$$
\frac{\bar X_n - \mu}{\sigma / \sqrt{n}} =
\frac{\mbox{Estimate} - \mbox{Mean of estimate}}{\mbox{Std. Err. of estimate}}.
$$
\end{itemize}
\end{frame}
\begin{frame}\frametitle{Example}
\begin{itemize}
\item Simulate a standard normal random variable by rolling $n$
(six sided)
\item Let $X_i$ be the outcome for die $i$
\item Then note that $\mu = E[X_i] = 3.5$
\item $\Var(X_i) = 2.92$
\item SE $\sqrt{2.92 / n} = 1.71 / \sqrt{n}$
\item Standardized mean
$$
\frac{\bar X_n - 3.5}{1.71/\sqrt{n}}
$$
\end{itemize}
\end{frame}
\begin{frame}
\includegraphics[width=3.5in]{die.pdf}
\end{frame}
\begin{frame}\frametitle{Coin CLT}
\begin{itemize}
\item Let $X_i$ be the $0$ or $1$ result of the $i^{th}$ flip of a
possibly unfair coin
\item The sample proportion, say $\hat p$, is the average of the coin flips
\item $E[X_i] = p$ and $\Var(X_i) = p(1-p)$
\item Standard error of the mean is $\sqrt{p(1-p)/n}$
\item Then
$$
\frac{\hat p - p}{\sqrt{p(1-p)/n}}
$$
will be approximately normally distributed
\end{itemize}
\end{frame}
\begin{frame}
\includegraphics[width=3.5in]{coinCLT.pdf}
\end{frame}
\begin{frame}\frametitle{CLT in practice}
\begin{itemize}
\item In practice the CLT is mostly useful as an approximation
$$
P\left( \frac{\bar X_n - \mu}{\sigma / \sqrt{n}} \leq z \right) \approx \Phi(z).
$$
\item Recall $1.96$ is a good approximation to the $.975^{th}$
quantile of the standard normal
\item Consider
\begin{eqnarray*}
.95 & \approx & P\left( -1.96 \leq \frac{\bar X_n - \mu}{\sigma / \sqrt{n}} \leq 1.96 \right)\\ \\
& = & P\left(\bar X_n +1.96 \sigma/\sqrt{n} \geq \mu \geq \bar X_n - 1.96\sigma/\sqrt{n} \right),\\
\end{eqnarray*}
\end{itemize}
\end{frame}
\section{Confidence intervals}
\begin{frame}\frametitle{Confidence intervals}
\begin{itemize}
\item Therefore, according to the CLT, the probability that the random
interval $$\bar X_n \pm z_{1-\alpha/2}\sigma / \sqrt{n}$$ contains $\mu$ is
approximately 95\%, where $z_{1-\alpha/2}$ is the $1-\alpha/2$ quantile of the
standard normal distribution
\item This is called a 95\% {\bf confidence interval} for $\mu$
\item {\bf Slutsky's theorem}, allows us to replace the unknown $\sigma$ with $s$
\end{itemize}
\end{frame}
\begin{frame}\frametitle{Sample proportions}
\begin{itemize}
\item In the event that each $X_i$ is $0$ or $1$ with common success probability $p$
then $\sigma^2 = p(1 - p)$
\item The interval takes the form
$$
\hat p \pm z_{1 - \alpha/2} \sqrt{\frac{p(1 - p)}{n}}
$$
\item Replacing $p$ by $\hat p$ in the standard error results in what is called a Wald confidence
interval for $p$
\item Also note that $p(1-p) \leq 1/4$ for $0 \leq p \leq 1$
\item Let $\alpha = .05$ so that $z_{1 -\alpha/2} = 1.96 \approx 2$ then
$$
2 \sqrt{\frac{p(1 - p)}{n}} \leq 2 \sqrt{\frac{1}{4n}} = \frac{1}{\sqrt{n}}
$$
\item Therefore $\hat p \pm \frac{1}{\sqrt{n}}$ is a quick CI estimate for $p$
\end{itemize}
\end{frame}
\end{document}