\documentclass[twoside]{article}
\usepackage{amsmath}
\usepackage{amsthm}
\usepackage{amssymb}


\theoremstyle{definition}
\newtheorem{thm}{Theorem}[section]
\newtheorem{lem}[thm]{Lemma}
\newtheorem{prop}[thm]{Proposition}
\newtheorem{cor}[thm]{Corollary}


\theoremstyle{definition}
\newtheorem*{defn}{Definition}
\newtheorem*{exmp}{Example}

\theoremstyle{remark}
\newtheorem*{rem}{Remark}
\newtheorem*{note}{Note}
\newtheorem*{exer}{Exercise}

\newtheoremstyle{citing}% name
  {3pt}%      Space above, empty = `usual value'
  {3pt}%      Space below
  {}% Body font
  {}%         Indent amount (empty = no indent, \parindent = para indent)
  {\bfseries}% Thm head font
  {.}%        Punctuation after thm head
  {.5em}%     Space after thm head: " " = normal interword space;
        %       \newline = linebreak
  {\thmnote{#3}}% Thm head spec

\theoremstyle{citing}
\newtheorem*{varthm}{}


\newcommand{\boxedeqn}[1]{%
\[\fbox{%
\addtolength{\linewidth}{-2\fboxsep}%
\addtolength{\linewidth}{-2\fboxrule}%
\begin{minipage}{\linewidth}%
\begin{equation}#1\end{equation}%
\end{minipage}%
}\]%
}


\setlength{\oddsidemargin}{0.25 in}
\setlength{\evensidemargin}{-0.25 in}
\setlength{\topmargin}{-0.6 in}
\setlength{\textwidth}{6.5 in}
\setlength{\textheight}{8.5 in}
\setlength{\headsep}{0.75 in}
\setlength{\parindent}{0 in}
\setlength{\parskip}{0.1 in}

\newcommand{\lecture}[4]{
   \pagestyle{myheadings}
   \thispagestyle{plain}
   \newpage
   \setcounter{page}{1}
   \setcounter{section}{#4}
   \noindent
   \begin{center}
   \framebox{
      \vbox{\vspace{2mm}
    \hbox to 6.28in { {\bf Stat205A:~Probability~Theory~(Fall 2002) \hfill Lecture: #4} }
       \vspace{6mm}
       \hbox to 6.28in { {\Large \hfill #1  \hfill} }
       \vspace{6mm}
       \hbox to 6.28in { {\it Lecturer: #2 \hfill Scribe: #3} }
      \vspace{2mm}}
   }
   \end{center}
   \markboth{#1}{#1}
   \vspace*{4mm}
}

\begin{document}
\lecture{Almost sure limits for sums of independent random variables. }{James W. Pitman}{Animesh Kumar \tt
animesh@eecs.berkeley.edu}{7}

We first note a few general facts about the various types of convergence we
know,
\begin{enumerate}
\item If $X_n \rightarrow X \quad \mbox{a.s.}$ then $X_n \rightarrow X \quad
\mbox{in P}$.
\item If $X_n \rightarrow X \quad \mbox{in P}$ then there exists a fixed
increasing subsequence $n_k$ such that $X_{n_k} \rightarrow X \quad
\mbox{a.s.}$.
\item $X_n \rightarrow X \quad \mbox{in P}$  iff for every subsequence $n_k$
there exists a further subsequebce $n_{k}'$ so that $X_{n_{k}'} \rightarrow X
\quad \mbox{a.s.}$.
\end{enumerate}

Proof of 2 and 3 are in the textbook \cite{durrett95}. We first begin with a technique
which uses the information about almost sure convergence of subsequence of a
sequence of random variables, and then somehow getting control over a maximum.
Let us start with the technique.

One can prove $X_n \rightarrow X \quad \mbox{a.s.}$  by first showing $X_{n_k}
\rightarrow X \quad \mbox{a.s.}$ for some $n_k$ (we choose $n_k$) and then
getting control over 
\begin{eqnarray*}
M_k = \max_{n_k \leq m < n_{k+1}} |X_m - X_{n_k}|
\end{eqnarray*}
In particular we must be able to show that $ M_k \rightarrow 0 \quad \mbox{a.s.}$
because if $\omega \in \Omega$ is such that both $X_{n_k}(\omega) \rightarrow 0$
and $M_k(\omega) \rightarrow 0$ then we get (using triangular inequality and max
greater than the elements of set over which maximum is taken)
\begin{eqnarray*}
X_m(\omega) \rightarrow X(\omega)
\end{eqnarray*}
for all $\omega$ in the set of significant probability. To illustrate how to use
the technique and how easy it is to use, we start with the example of SLLN with
a second moment condition,
\begin{thm} If $X, X_1, X_2, ...$ are IID\footnote{Independent and Identically
Distributed.} random variables with $E(X) = 0, \quad E(X^2) < \infty$, and $S_n := X_1 +X_2 +
... + X_n$, then,
\begin{equation}
\label{eqn:sllnwithvariance}
\frac{S_n}{n} \rightarrow 0 \quad \mbox{a.s.}
\end{equation}
\end{thm}
\begin{proof}
First we find a subsequence converging almost surely to the mean. For that we
use two tools, 
\begin{itemize}
\item Convergence in Probability or P.
\item Borel-Cantelli lemma.
\end{itemize}
WLOG\footnote{Without Loss of Generality.} we can assume that $E(X)=0$. From
Chebychev's inequality we get,
\begin{eqnarray*}
P\left( \left|\frac{S_n}{n}\right| > \epsilon\right) <
\frac{E(X^2)}{n\epsilon^2}
\end{eqnarray*}
This means that $\frac{S_n}{n} \rightarrow 0 \quad \mbox{in P}$. Notice that
$\sum_k \frac{1}{k^2}$ converges to a finite value, therefore for the
subsequence $n_k = k^2$ we get using Borel-Cantelli lemma
\begin{eqnarray*}
P\left( \left|\frac{S_{n^2}}{n^2}\right| > \epsilon \quad i.o. \right) = 0
\end{eqnarray*}
which means that $\frac{S_{n^2}}{n^2} \rightarrow 0 \quad \mbox{a.s.}$.

Now let us try to control $M_k$ as defined above. For convenience we define 
\begin{eqnarray*}
D_n := \max_{n^2 \leq k < (n+1)^2} |S_k - S_{n^2}|
\end{eqnarray*}
for $n^2 \leq k < (n+1)^2$, we have $|S_k| \leq |S_{n^2}| + D_n$ and
$\frac{1}{k} \leq \frac{1}{n^2}$. So we have the following inequality,
\begin{eqnarray*}
\left| \frac{S_k}{k}\right| \leq \left| \frac{S_{n^2}}{n^2}\right| +
\frac{D_n}{n^2}
\end{eqnarray*}
Finally, using definition of $M_k$, we get the following, 
\begin{eqnarray*}
M_k &\leq& \max_{n^2 \leq k < (n+1)^2} \left| \frac{S_k}{k}\right| + \left|
\frac{S_{n^2}}{n^2}\right|\\
  & \leq & 2\left|\frac{S_{n^2}}{n^2}\right| + \frac{D_n}{n^2}\\
\end{eqnarray*}
So all we need to prove is that $\frac{D_n}{n^2} \rightarrow 0 \quad
\mbox{a.s.}$. Let us define a new quantity $T_m = S_{n^2 + m} - S_{n^2}$.
Therefore, 
\begin{eqnarray*}
D_{n}^2 & =& \max_{1\leq m \leq 2n} T_{m}^2 \\
& \leq & \sum_{m = 1}^{2n} T_{m}^2
\end{eqnarray*}
Taking expectations on both sides, we get that, 
\begin{eqnarray*}
E(D_{n}^2)& \leq &\sum_{m = 1}^{2n} m\sigma^2  = n(2n+1) \sigma^2\\
&\leq & 4n^2 \sigma^2
\end{eqnarray*}
where $E(X^2) = \sigma^2$. Hence we get that 
\begin{eqnarray*}
P\left( \left|\frac{D_n}{n^2}\right| > \epsilon \right) &\leq&
\frac{E\left(\frac{D_n}{n^2}\right)} {\epsilon^2}\\
&\leq & \frac{4\sigma^2}{k^2 \epsilon^2}
\end{eqnarray*}
Again in conjunction with Borel-Cantelli lemma and  
\begin{eqnarray*}
\sum_n P\left( \left|\frac{D_n}{n^2}\right| > \epsilon \right) < \infty
\end{eqnarray*}
we get that $\frac{D_n}{n^2} \rightarrow 0 \quad \mbox{a.s.}$. Which completes
the proof.
\end{proof}
Now we proceed to the {\bf sums of independent random variables} which may not be
identically distributed. We first start with Kronecker's lemma (see text for
proof) 
\begin{lem}[Kronecker] Let $\{x_n\}$ be a sequence of reals and $S_n = x_1 + x_2
+...+x_n, \quad 0\leq a_n \uparrow \infty$, then the lemma states that if
$\sum_n \frac{x_n}{a_n}$ converges to a finite limit then $\frac{s_n}{a_n}
\rightarrow 0$. 
\end{lem}
Now we start looking at sums like $\sum_{n = 1}^{\infty} X_n$ where $\{X_n\}$ is
a sequence of independent random variables. The first key fact that we will
prove is Kolmogorov's Zero-One law, which will be written henceforth as K'rov 0-1 law for
brevity. First key fact that we prove is that 
\begin{equation}
\label{eqn:01law}
P\left( \sum_{n=1}^{\infty} X_n \mbox{ converges }\right) = \left\{
\begin{array}{c}
1\\
0\\
\end{array}
\right\}
\end{equation}
which says that the set of $\omega$ for which the sum converges is either of
probability 0 or 1. 
\begin{defn}
Given a sequence of random variables $\{X_n\}$, the tail sigma field is defined
as 
\begin{eqnarray*}
{\cal T} := \bigcap_n \sigma(X_n, X_{n+1}, ... )
\end{eqnarray*}
\end{defn}
With this definition in mind, the K'rov 0-1 law says that if $X_i's$ are
independent then for any $T\in
{\cal T}$, we have $P(T) = 0$ or $1$. 
\begin{proof} We start the proof of 0-1 law now. The trick is to show that any
such $T$ is independent of itself which sounds pretty bizarre but it turns out
to be true. With that aid one can show that $P(T) = P(T\cap T) = P^2(T)$ and
hence the result will follow. 

Take $T \in {\cal T}$ and let $F_n \in \sigma(X_1, X_2, ..., X_n)$, and $T_n \in
\sigma(X_{n+1}, X_{n+2}, ...)$. Then $F_n$ and $T_n$ are independent. So if
$T\in {\cal T}$ then $T \in \sigma(X_{n+1}, X_{n+1}, ...)$, and hence
\begin{eqnarray*}
P(T \cap F_n) = P(T)P(F_n)
\end{eqnarray*}
for all $F_n \in \sigma(X_1, X_2, ..., X_n)$. Now consider the set 
${\cal F} = \{F \in \sigma (X_1, X_2, X_3,...) : P(T\cap F) = P(T) P(F)\}$. This
can be verified to be a $\lambda$ system (use MCT for increasing sequences) and
the $\lambda$ system contains all sets like $\bigcup_n \sigma(X_1, X_2, X_3,
..., X_n)$  which is a field (and hence a $\pi$ system). Therefore, using the
$\pi-\lambda$ theorem, we get this property to be true for $\sigma(X_1, X_2,
X_3, ...)$ which completes the proof.
\end{proof}
Finally we arrive at Kolmogorov's inequality. We formally state it as follows,
\begin{thm}[Kolmogorov's Inequality] Let $X_1, X_2, ...$ be independent with
$E(X_i) = 0$ and $\sigma_{i}^2 =  E(X_{i}^2) < \infty$, and define $S_k = X_1 +
X_2 + ... +X_k$, then the inequality states that
\begin{equation}
\label{eqn:kolmogorovinequality}
P\left( \max_{1\leq k \leq n} |S_k| \geq \epsilon \right) \leq
\frac{E(S_{n}^2)}{\epsilon^2}
\end{equation}
\end{thm}
\begin{proof}
Decompose the event according to when we escape from the $\pm \epsilon$
strip. Let 
\begin{eqnarray*}
A_k = \{|S_m| < \epsilon for 1 \leq m < k; |S_k| \geq \epsilon\}
\end{eqnarray*}
Simply speaking or in words, $A_k$ is the event of first escape out of
$\epsilon$ strip and that too at the $k$-th step. Also notice that all these
events are disjoint, and as a final remark we have $\bigcup_{k = 1}^{n} A_k =
\left( \max_{1\leq k \leq n} |S_k| \geq \epsilon \right)$. Finally we start for
a chain of inequality, since we have all the pieces ready,
\begin{eqnarray*}
E(S_{n}^2) \geq E\left( S_{n}^2 1\left(\bigcup_{k=1}^n A_k\right) \right) =
\sum_{k=1}^{n} E\left(S_{n}^2 1_{A_k}\right)
\end{eqnarray*}
We can split $S_{n}^2 = S_{k}^2 +  (S_{n} - S_k)^2 + 2S_k(S_n - S_k)$, and write
the following,
\begin{eqnarray*}
E\left(S_{n}^2 1_{A_k}\right) &=& E\left( S_{k}^2 1_{A_k}\right) + E\left(
(S_n-S_k)^2 1_{A_k} \right) + E\left(2(S_n-S_k)S_k 1_{A_k}\right)\\
&\geq & \epsilon^2 P(A_k)
\end{eqnarray*}
where the first term is larger than $\epsilon^2$ second term is always positive,
and the third term is expectation of product of two independent random variables
(and hence product of expectation which is zero).

Finally put into the summation to get, 
\begin{eqnarray*}
E(S_{n}^2) \geq \sum_{k=1}^n P(A_k) \epsilon^2 = P\left( \max_{1\leq k \leq n}
|S_k| \geq \epsilon \right) \epsilon^2
\end{eqnarray*}
which easily leads to the result. 
\end{proof}
Hence the proof is complete.

\bibliographystyle{plain}
\bibliography{/saruman/accounts/fac/pitman/search/bm3,/saruman/accounts/fac/pitman/search/general,/saruman/accounts/fac/pitman/search/bm4,/saruman/accounts/fac/pitman/search/bessel,/saruman/accounts/fac/pitman/search/sizebias,/saruman/accounts/fac/pitman/search/pitman,/saruman/accounts/fac/pitman/search/comb,/saruman/accounts/fac/pitman/search/species,/saruman/accounts/fac/aldous/trees/trees,/saruman/accounts/fac/aldous/trees/rwgbook,/saruman/accounts/fac/aldous/trees/misc,/saruman/accounts/fac/aldous/trees/me,/saruman/accounts/fac/aldous/trees/coag}
\end{document}