lecture5.tex

\documentclass[aspectratio=169]{beamer}
\mode<presentation>
% \usetheme{Warsaw}
% \usetheme{Goettingen}
\usetheme{Hannover}
% \useoutertheme{default}

% \useoutertheme{infolines}
\useoutertheme{sidebar}
\usecolortheme{dolphin}

\usepackage{amsmath}
\usepackage{amssymb}
\usepackage{enumerate}

% some bold math symbosl
\newcommand{\Cov}{\mathrm{Cov}}
\newcommand{\Cor}{\mathrm{Cor}}
\newcommand{\Var}{\mathrm{Var}}
\newcommand{\brho}{\boldsymbol{\rho}}
\newcommand{\bSigma}{\boldsymbol{\Sigma}}
\newcommand{\btheta}{\boldsymbol{\theta}}
\newcommand{\bbeta}{\boldsymbol{\beta}}
\newcommand{\bmu}{\boldsymbol{\mu}}
\newcommand{\bW}{\mathbf{W}}
\newcommand{\one}{\mathbf{1}}
\newcommand{\bH}{\mathbf{H}}
\newcommand{\by}{\mathbf{y}}
\newcommand{\bolde}{\mathbf{e}}
\newcommand{\bx}{\mathbf{x}}

\newcommand{\cpp}[1]{\texttt{#1}}

\title{Mathematical Biostatistics Boot Camp: Lecture 5, Conditional Probability}
\author{Brian Caffo}
\date{\today}
\institute[Department of Biostatistics]{
  Department of Biostatistics \\
  Johns Hopkins Bloomberg School of Public Health\\
  Johns Hopkins University
}


\begin{document}

\frame{\titlepage}


\section{Table of contents}
\frame{
  \frametitle{Table of contents}
  \tableofcontents
}


\section{Conditional probability}
\begin{frame}\frametitle{Conditional probability, motivation}
\begin{itemize}
\item The probability of getting a one when rolling a (standard) die
  is usually assumed to be one sixth
\item Suppose you were given the extra information that the die roll
  was an odd number (hence 1, 3 or 5)
\item {\em conditional on this new information}, the probability of a
  one is now one third
\end{itemize}
\end{frame}

\begin{frame}\frametitle{Conditional probability, definition}
\begin{itemize}
\item Let $B$ be an event so that $P(B) > 0$
\item Then the conditional probability of an event $A$ given that $B$
  has occurred is
  $$
  P(A ~|~ B) = \frac{P(A \cap B)}{P(B)}
  $$
\item Notice that if $A$ and $B$ are independent, then
  $$
  P(A ~|~ B) = \frac{P(A) P(B)}{P(B)} = P(A)
  $$
\end{itemize}
\end{frame}

\begin{frame}\frametitle{Example}
\begin{itemize}
\item Consider our die roll example
\item $B = \{1, 3, 5\}$
\item $A = \{1\}$
  \begin{eqnarray*}
P(\mbox{one given that roll is odd})  & = & P(A ~|~ B) \\ \\
  & = & \frac{P(A \cap B)}{P(B)} \\ \\
  & = & \frac{P(A)}{P(B)} \\ \\ 
  & = & \frac{1/6}{3/6} = \frac{1}{3}
  \end{eqnarray*}
\end{itemize}
\end{frame}

\section{Conditional densities}
\begin{frame}\frametitle{Conditional densities and mass functions} 
\begin{itemize}
\item Conditional densities or mass functions of one variable conditional on
  the value of another
\item Let $f(x,y)$ be a bivariate density or mass function for random variables
$X$ and $Y$
\item Let $f(x)$ and $f(y)$ be the associated marginal mass function
  or densities disregarding the other variables
$$
f(y) = \int f(x, y)dx ~~~~\mbox{or}~~~~ f(y) = \sum_x f(x, y) dx.
$$
\item Then the {\bf conditional} density or mass function {\em given that $Y = y$} is given by
$$
f(x ~|~ y) = f(x, y) / f(y)
$$
\end{itemize}
\end{frame}

\begin{frame}\frametitle{Notes}
\begin{itemize}
\item It is easy to see that, in the discrete case, the definition of
  conditional probability is exactly as in the definition for
  conditional events where $A = $ the event that $X = x$ and $B = $
  the event that $Y = y$
\item The continuous definition is a little harder to motivate, since
  the events $X = x$ and $Y = y$ each have probability 0
\item However, a useful motivation can be performed by taking the
  appropriate limits as follows
\item Define $A = \{X \leq x\}$ while $B = \{Y \in [y, y + \epsilon]\}$
\end{itemize}
\end{frame}

\begin{frame}
\frametitle{Continued}
\begin{eqnarray*} 
P(X \leq x ~|~ Y \in [y, y + \epsilon]) & = & P(A ~|~ B) = \frac{P(A \cap B)}{P(B)} \\ \\  \\
& = & \frac{P(X \leq x, Y \in [y, y + \epsilon])}{P(Y \in [y, y + \epsilon])} \\ \\ \\
& = & \frac{\int_{y}^{y+\epsilon}\int_{-\infty}^{x}f(x,y)dxdy}
           {\int_{y}^{y+\epsilon} f(y) dy} \\ \\ \\
& = &  \frac{\epsilon\int_{y}^{y+\epsilon}\int_{-\infty}^{x}f(x,y)dxdy}
           {\epsilon\int_{y}^{y+\epsilon} f(y) dy} 
\end{eqnarray*}
\end{frame}

\begin{frame}
\frametitle{Continued}
\begin{eqnarray*}
& = & \frac{\frac{\int_{-\infty}^{y+\epsilon}\int_{\infty}^{x}f(x,y)dxdy - 
                  \int_{-\infty}^{y}\int_{-\infty}^{x}f(x,y)dxdy}{\epsilon}}
{\frac{\int_{-\infty}^{y+\epsilon} f(y) dy - \int_{-\infty}^{y} f(y) dy}{\epsilon}}\\ \\ \\
& = & \frac{\frac{g_1(y + \epsilon) - g_1(y)}{\epsilon}}{\frac{g_2(y + \epsilon) - g_2(y)}{\epsilon}}
\end{eqnarray*}
where
$$
g_1(y) = \int_{-\infty}^{y}\int_{-\infty}^{x}f(x,y)dxdy ~~\mbox{and}~~ 
g_2(y) = \int_{-\infty}^{y} f(y) dy.
$$
\end{frame}


\begin{frame}
\begin{itemize}
\item Notice that the limit of the numerator and denominator tends to
  $g_1'$ and $g_2'$ as $\epsilon$ gets smaller and smaller
\item Hence we have that the conditional distribution function is
$$
P(X \leq x ~|~ Y = y) = \frac{\int_{-\infty}^x f(x, y)dx}{f(y)}.
$$
\item Now, taking the derivative with respect to $x$ yields the
  conditional density
$$
f(x ~|~ y) = \frac{f(x, y)}{f(y)}
$$
\end{itemize}
\end{frame}

\begin{frame}\frametitle{Geometrically}
\begin{itemize}
\item Geometrically, the conditional density is obtained by taking the
  relevant slice of the joint density and appropriately renormalizing it
\item This idea extends to any other line, or even non-linear functions
\end{itemize}
\end{frame}

\begin{frame}\frametitle{Example}
\begin{itemize}
\item Let $f(x, y) = ye^{-xy - y}$ for $0 \leq x$ and $0 \leq y$
\item Then note
$$
f(y) = \int_{0}^\infty f(x, y)dx = e^{-y}\int_{0}^\infty ye^{-xy}dx = e^{-y}
$$
\item Therefore
$$
f(x~|~ y) = f(x, y) / f(y) = \frac{ ye^{-xy - y}}{e^{-y}} = ye^{-xy}
$$
\end{itemize}
\end{frame}

%\begin{frame}\frametitle{Example}
%\begin{itemize}
%\item Let $f(x, y) = 1 / \pi r^2$ for $x^2 + y^2 \leq r^2$
%\item $X$ and $Y$ are uniform on a circle with radius $r$
%\item What is the conditional density of $X$ given that $Y=0$?
%\item Probably easiest to think geometrically
%$$
%f(x ~|~ y = 0) \propto 1 ~~\mbox{for}~~ -r^2 \leq x \leq r^2
%$$
%\item Therefore
%$$
%f(x ~|~ y = 0) = \frac{1}{2r^2}~~\mbox{for}~~ -r^2 \leq x \leq r^2
%$$
%\end{itemize}
%\end{frame}

\section{Bayes' Rule}
\begin{frame}\frametitle{Bayes' rule}
\begin{itemize}
\item Let $f(x ~|~ y)$ be the conditional density or mass function for $X$ given
  that $Y = y$
\item Let $f(y)$ be the marginal distribution for $y$
\item Then if $y$ is continuous
  $$
  f(y ~|~ x) = \frac{f(x ~|~ y) f(y)}{\int f(x ~|~ t) f(t) dt}
  $$
\item If $y$ is discrete
  $$
  f(y ~|~ x) = \frac{f(x ~|~ y) f(y)}{\sum_t f(x ~|~ t) f(t)}  
  $$
\end{itemize}
\end{frame}

\begin{frame}\frametitle{Notes}
\begin{itemize}
\item Bayes' rule relates the conditional density of $f(y ~|~ x)$ to
  the $f(x ~|~ y)$ and $f(y)$
\item A special case of this kind relationship is for two sets $A$ and $B$, 
  which yields that
  $$
  P(B ~|~ A) = \frac{P(A ~|~ B) P(B)}{P(A ~|~ B) P(B) + P(A ~|~ B^c)P(B^c)}.
  $$
Proof:
\begin{itemize}
\item Let $X$ be an indicator that event $A$ has occurred
\item Let $Y$ be an indicator that event $B$ has occurred
\item Plug into the discrete version of Bayes' rule
\end{itemize}
\end{itemize}
\end{frame}

\section{Diagnostic tests}
\begin{frame}\frametitle{Example: diagnostic tests}
\begin{itemize}
\item Let $+$ and $-$ be the events that the result of a diagnostic test
  is positive or negative respectively
\item Let $D$ and $D^c$ be the event
  that the subject of the test has or does not have the disease
  respectively 
\item The {\bf sensitivity} is the probability that the test is
  positive given that the subject actually has the disease, $P(+ ~|~
  D)$
\item The {\bf specificity} is the probability that the test is
  negative given that the subject does not have the disease, $P(- ~|~ D^c)$
\end{itemize}
\end{frame}

\begin{frame}\frametitle{More definitions}
\begin{itemize}
   \item The {\bf positive predictive value} is the probability that the subject has the 
     disease given that the test is positive, $P(D ~|~ +)$
   \item The {\bf negative predictive value} is the probability that the subject does not have
     the disease given that the test is negative, $P(D^c ~|~ -)$
   \item The {\bf prevalence of the disease} is the marginal probability of disease, $P(D)$
\end{itemize}

\end{frame}
\begin{frame}\frametitle{More definitions}
\begin{itemize}
   \item The {\bf diagnostic likelihood ratio of a positive test}, labeled $DLR_+$, is 
     $P(+ ~|~ D) / P(+ ~|~ D^c)$, which is the $$sensitivity / (1 - specificity)$$
   \item The {\bf diagnostic likelihood ratio of a negative test},
     labeled $DLR_-$, is $P(- ~|~ D) / P(- ~|~ D^c)$, which is the
     $$(1 - sensitivity) / specificity$$
\end{itemize}
\end{frame}

\begin{frame}\frametitle{Example} 
\begin{itemize}
\item A study comparing the efficacy of HIV tests, reports on an
  experiment which concluded that HIV antibody tests have a
  sensitivity of 99.7\% and a specificity of 98.5\%
\item Suppose that a subject, from a population with a .1\% prevalence
  of HIV, receives a positive test result. What is the probability
  that this subject has HIV?
\item Mathematically, we want $P(D ~|~ +)$ given the sensitivity, $P(+
  ~|~ D) = .997$, the specificity, $P(- ~|~ D^c) =.985$, and the
  prevalence $P(D) = .001$
\end{itemize}
\end{frame}

\begin{frame}\frametitle{Using Bayes' formula}
\begin{eqnarray*}
  P(D ~|~ +) & = &\frac{P(+~|~D)P(D)}{P(+~|~D)P(D) + P(+~|~D^c)P(D^c)}\\ \\
 & = & \frac{P(+~|~D)P(D)}{P(+~|~D)P(D) + \{1-P(-~|~D^c)\}\{1 - P(D)\}} \\ \\
 & = & \frac{.997\times .001}{.997 \times .001 + .015 \times .999}\\ \\
 & = & .062
\end{eqnarray*}
\begin{itemize}
\item In this population a positive test result only suggests a 6\% probability that
 the subject has the disease 
\item (The positive predictive value is 6\% for this test)
\end{itemize}
\end{frame}

\begin{frame}\frametitle{More on this example}
\begin{itemize}
\item The low positive predictive value is due to low prevalence of disease and the
somewhat modest specificity
\item Suppose it was known that the subject was an intravenous drug
  user and routinely had intercourse with an HIV infected partner
\item Notice that the evidence implied by a positive test result does not change 
  because of the prevalence of disease in the subject's population, only our
  interpretation of that evidence changes
\end{itemize}
\end{frame}

\section{DLRs}
\begin{frame}\frametitle{Likelihood ratios}
\begin{itemize}
\item Using Bayes rule, we have
  $$
  P(D ~|~ +) = \frac{P(+~|~D)P(D)}{P(+~|~D)P(D) + P(+~|~D^c)P(D^c)} 
  $$
  and
  $$
  P(D^c ~|~ +) = \frac{P(+~|~D^c)P(D^c)}{P(+~|~D)P(D) + P(+~|~D^c)P(D^c)}.
  $$
\item Therefore
$$
\frac{P(D ~|~ +)}{P(D^c ~|~ +)} = \frac{P(+~|~D)}{P(+~|~D^c)}\times \frac{P(D)}{P(D^c)}
$$
ie
$$
\mbox{post-test odds of }D = DLR_+\times\mbox{pre-test odds of }D
$$
\item Similarly, $DLR_-$ relates the decrease in the odds of the
  disease after a negative test result to the odds of disease prior to
  the test.
\end{itemize}
\end{frame}

\begin{frame}\frametitle{HIV example revisited}
\begin{itemize}
\item Suppose a subject has a positive HIV test
\item $DLR_+ = .997 / (1 - .985) \approx 66$
\item The result of the positive test is that the odds of disease is
  now 66 times the pretest odds
\item Or, equivalently, the hypothesis of disease is 66 times more
  supported by the data than the hypothesis of no disease
\end{itemize}
\end{frame}

\begin{frame}\frametitle{HIV example revisited}
\begin{itemize}
\item Suppose that a subject has a negative test result 
\item $DLR_- = (1 - .997) / .985  \approx .003$
\item Therefore, the post-test odds of disease is now $.3\%$ of the pretest
  odds given the negative test.
\item Or, the hypothesis of disease is supported $.003$ times that of
  the hypothesis of absence of disease given the negative test result
\end{itemize}
\end{frame}

\end{document}