\documentclass{article}
\usepackage{amssymb}
\usepackage{amsmath}
%\usepackage{slide-article}
\usepackage{slide-article-tom}

\usepackage{tocloft}

\ifx\pdfoutput\undefined
     \usepackage[dvips]{graphicx}
\else
     \usepackage[pdftex]{graphicx}
%%     \usepackage{type1cm}
%%     \usepackage{color}
     \pdfcompresslevel9
\fi
%  \usepackage{epsfig}
%\usepackage{graphics}
\usepackage{hyperref}

%\definecolor{Emerald}{cmyk}{1,0,0.50,0}
\hypersetup{colorlinks,
            linkcolor=blue,
            %pdfpagemode=FullScreen
            pdfpagemode=None
            }


%\usepackage{hyper}
%\usepackage{hthtml}
%\def\hyperref#1#2#3#4{\hturl{#1}}

\def\pagedone{\newpage}

\def\tthdump#1{#1}

%%\tthdump{\def\sectionhead#1{\begin{center}{\LARGE\hypertarget{#1}
%%     {#1}\hyperlink{Our general topics:}{\hfil$\leftarrow$}}\end{center}}}
%%     
%%%%tth:\def\sectionhead#1{{\LARGE#1\hypertarget{#1}{#1}}
%%%%tth:     \special{html: <A NAME="#1"></A><a href="\#Top of file">       Top</a>}}

\tthdump{\def\sectionhead#1{\begin{center}\section*{#1\hyperlink{Contents}{\ \ $\leftarrow$}}\end{center}
     \addcontentsline{toc}{section}{#1}
     \vspace{-\baselineskip}
     }}

\tthdump{\def\shortsectionhead#1#2{\begin{center}\section*{#1\hyperlink{Contents}{\ \ $\leftarrow$}}\end{center}
     \addcontentsline{toc}{section}{#2}
     \vspace{-\baselineskip}
     }}


\tthdump{\def\referencehead#1{\begin{center}\section*{ \hyperlink{Contents}{\ \ $\leftarrow$}}\end{center}
     \addcontentsline{toc}{section}{#1}
     \vspace{-\baselineskip}
     }}

%%tth:\def\sectionhead#1{\begin{center}{\special{html: <a href="\#Top of file">       <--</a>}\section{#1}\end{center}}}
%% %%tth:     \addcontentsline{toc}{section}{#1}}

\tthdump{\def\exercises#1{\begin{center}\subsection*{#1 -~exercises
     \hyperlink{Contents}{\hfil$\leftarrow$}}\end{center}
     \addcontentsline{toc}{subsection}{#1 (ex)}
     \vspace{-\baselineskip}
     }}
     
%%tth:\def\exercises#1{\begin{center}\subsection{#1 -~exercises}\end{center}
%%tth:     {\special{html: <a href="\#Top of file">       <--</a>}}}

\tthdump{\def\quotesection#1{\begin{center}{\LARGE\hypertarget{#1}
     {#1}\hyperlink{The quotes}{\hfil$\twoheadleftarrow$}}\end{center}}}
     
%%tth:\def\quotesection#1{{\LARGE#1\hypertarget{#1}{#1}}
%%tth:     \special{html: <A NAME="#1"></A><a href="\#The quotes">       <-</a>}}

%%tth:\def\makehyperlink#1{\special{html: <a href="\##1">}{\large#1}\special{html: </a>}}

%%tth:\def\binom#1#2{\left(\begin{array}{c}#1\\#2\end{array}\right)}

%\def\sectionhead#1{\begin{center}{\LARGE #1}\end{center}}
%\def\sectionhead#1{\section{#1}}

% defines a 2 element column vector.
\def\col#1#2{\left(\begin{array}{c}#1\\#2\end{array}\right)}
\def\tcol#1#2{(#1, #2)^T}

\begin{document}
\raggedright
%%tth:\special{html: <A NAME="Top of file"></A>}

\pagestyle{myfooters}
%\pagestyle{plain}

\thispagestyle{empty}


%%tth:\special{html:<title> Econ 101</title>}

%Slide 1
\title{{\LARGE\bf Entropy, Power Laws, \newline and Economics}\newline}
\author{Tom Carter
\newline
\newline
\newline
Complex Systems Summer School \newline
SFI, 2007
\vfill
\tthdump{\href{http://astarte.csustan.edu/\~tom/}{http://astarte.csustan.edu/\~\ tom/}}
%%tth:\href{http://astarte.csustan.edu/~tom/}{http://astarte.csustan.edu/\~tom/}
\vfill
Santa Fe
%%\newline
}
\date{June, 2007}
\maketitle

\hypertarget{Contents}{}

\setlength{\cftbeforetoctitleskip}{-3cm}
\setlength{\cftbeforesecskip}{0.1cm}

\tableofcontents


%%%Slide 2
%%\sectionhead{Our general topics:}

%%%%tth:\item
%%\tthdump{\hyperlink{Mathematics of Information}
%%	        {$\circledcirc$ Mathematics of Information\newline}} 
%%%%tth:\makehyperlink{Mathematics of Information}
%%%%tth:\item
%%\tthdump{\hyperlink{Some entropy theory}
%%	        {$\circledcirc$ Some entropy theory\newline}} 
%%%%tth:\makehyperlink{Some entropy theory}

%%%%tth:\item
%%\tthdump{\hyperlink{A Maximum Entropy Principle}
%%	         {$\circledcirc$ A Maximum Entropy Principle\newline}} 
%%%%tth:\makehyperlink{A Maximum Entropy Principle}
%%%%tth:\item
%%\tthdump{\hyperlink{Application: Economics I (a Boltzmann Economy)}
%%	         {$\circledcirc$ Application: Economics I (a Boltzmann Economy)\newline}} 
%%%%tth:\makehyperlink{Application: Economics I (a Boltzmann Economy)}
%%%%tth:\item
%%\tthdump{\hyperlink{Fit to the Real World}
%%	         {$\circledcirc$ Fit to the Real World\newline}} 
%%%%tth:\makehyperlink{Fit to the Real World}
%%%%tth:\item
%%\tthdump{\hyperlink{Application: Economics II (a power law)}
%%	         {$\circledcirc$ Application: Economics II (a power law)\newline}} 
%%%%tth:\makehyperlink{Application: Economics II (a power law)}

%%%%tth:\item
%%\tthdump{\hyperlink{References}
%%	        {$\circledcirc$ References\newline}} 
%%%%tth:\makehyperlink{References}
%%%%tth:\end{itemize}

\pagedone

\quotesection{The quotes}
%%tth:\begin{itemize}

%%tth:\item
\tthdump{\hyperlink{Science, wisdom, and counting}
               {\ $\circledcirc$ Science, wisdom, and counting\newline}}
%%tth:\makehyperlink{Science, wisdom, and counting}
%%tth:\item
	 \tthdump{\hyperlink{Surprise, information, and miracles}
	        {$\circledcirc$ Surprise, information, and miracles\newline}}
%%tth:\makehyperlink{Surprise, information, and miracles}
%%tth:\item
	 \tthdump{\hyperlink{Information (and hope)}
	        {$\circledcirc$ Information (and hope)\newline}}
%%tth:\makehyperlink{Information (and hope)}
%%tth:\item
	 \tthdump{\hyperlink{H (or S) for Entropy}
	         {$\circledcirc$ H (or S) for Entropy\newline}}
%%tth:\makehyperlink{H (or S) for Entropy}
%%tth:\end{itemize}
%\thepage

\tthdump{\hyperlink{Contents}{\hfil To topics $\leftarrow$}}
%%tth:{\special{html: <a href="\#Top of file">       Back to top of file</a>}}
\pagedone

%Slide 3

\quotesection{Science, wisdom, and counting}
%%tth:\begin{quote}
``Science is organized knowledge. Wisdom is organized life.''

- Immanuel Kant 

``My own suspicion is that the universe is not only stranger than we suppose,
but stranger than we can suppose.''

- John Haldane

 ``Not everything that can be counted counts, and not everything that counts can be counted.'' 
 
      - Albert Einstein (1879-1955) 
      

``The laws of probability, so true in general, so fallacious in particular .''

- Edward Gibbon 

%%tth:\end{quote}

\pagedone


\quotesection{Surprise, information, and miracles}
%%tth:\begin{quote}
``The opposite of a correct statement is a false statement. The opposite of a profound truth may well be another profound truth.''
 
      - Niels Bohr (1885-1962)
      
``I heard someone tried the monkeys-on-typewriters bit trying for the plays of W. Shakespeare, but all they got was the collected works of Francis Bacon.'' 

- Bill Hirst 

``There are only two ways to live your life. One is as though nothing is a miracle. The other is as though everything is a miracle.''
 
      - Albert Einstein (1879-1955) 
%%tth:\end{quote}
      
\pagedone
\sectionhead{Mathematics of Information}
\begin{itemize}
	\item We would like to develop a usable measure of the {\em information} we get
	       from observing the occurrence of an event having probability $p$ .  Our first reduction
                will be to ignore any particular features of the event, and only observe whether or
                not it happened.  Thus we will think of an event as the 
                observance of a symbol whose probability of occurring is $p$. We will thus be defining
                the {\em information} in terms of the probability $p$.
               
               The approach we will be taking here is axiomatic:  on the next page is a list of
               the four fundamental axioms we will use.  Note that we can apply this axiomatic
               system in any context in which we have available a set of non-negative real numbers.
               A specific special case of interest is {\em probabilities} (i.e., real numbers
               between 0 and 1), which motivated the selection of axioms \ldots

\pagedone
	       
	 \item      We will want our {\em information} measure $I(p)$ to have several properties:
	       \begin{enumerate}
	            \item Information is a non-negative quantity:  $I(p) \ge 0$.
		     \item If an event has probability 1, we get no information from the occurrence
		          of the event:  $I(1) = 0$.
		     \item If two independent events occur (whose joint probability is the product of
		          their individual probabilities), then the information we get from observing
		          the events is the sum of the two informations:
		          $I(p_1*p_2) = I(p_1) + I(p_2).$
		          (This is the critical property \ldots)
		     \item We will want our {\em information} measure to be a continuous (and, in fact,
		          monotonic) function of the probability (slight changes in probability should result
		          in slight changes in {\em information}).
             \end{enumerate}
\pagedone

     \item  We can therefore derive the following:
          \begin{enumerate}
               \item $I(p^2) = I(p*p) = I(p) + I(p) = 2*I(p)$
               \item Thus, further, $I(p^n) = n*I(p)$ \newline
                    (by induction \ldots)
               \item  $I(p) = I((p^{1/m})^m) = m * I(p^{1/m})$, so $I(p^{1/m}) = \frac{1}{m}*I(P)$
                      and thus in general
                      $$I(p^{n/m}) = \frac{n}{m}*I(p)$$
               \item  And thus, by continuity, we get, for $0 < p \le 1$, and $a>0$ a real number:
                      $$I(p^a) = a*I(p)$$
           \end{enumerate}
           
      \item  From this, we can derive the nice property:
           $$I(p) = -\log_b(p) = \log_b(1/p)$$
           for some base $b$.


\end{itemize}
\pagedone

%Slide 9
\begin{itemize}
       \item Summarizing: from the four properties,
       \begin{enumerate}
            \item $I(p) \ge 0$
            \item $I(p_1*p_2) = I(p_1) + I(p_2)$
            \item $I(p)$ is monotonic and continuous in $p$
            \item $I(1) = 0$
       \end{enumerate}
       we can derive that
       $$I(p) = \log_b(1/p) = - \log_b(p),$$
       for some positive constant $b$.  The base $b$ determines the units we
       are using.

       We can change the units by changing the base, using the formulas, for $b_1, b_2, x > 0$,
            $$x = b_1^{\log_{b_1}(x)}$$
       and therefore
       $$\log_{b_2}(x) = \log_{b_2}(b_1^{\log_{b_1}(x)}) = (\log_{b_2}(b_1))(\log_{b_1}(x)).$$

\end{itemize}
\pagedone

\begin{itemize}
       \item  Thus, using different bases for the logarithm results in {\em information} measures
            which are just constant multiples of each other, corresponding with measurements
            in different units:
            \begin{enumerate}
            \item $\log_2$ units are {\em bits} (from 'binary')
            \item $\log_3$ units are {\em trits}(from 'trinary')
            \item $\log_e$ units are {\em nats} (from 'natural logarithm') (We'll use $\ln(x)$
                 for $\log_e(x)$)
            \item $\log_{10}$ units are {\em Hartleys}, after an early worker in the field.
            \end{enumerate}
      
      \item Unless we want to emphasize the units, we need not bother to specifiy
            the base for the logarithm, and will write $\log(p)$.  Typically, we will think
            in terms of $\log_2(p)$.
            
\end{itemize}
\pagedone

\begin{itemize}            
       \item  For example, flipping a fair coin once will give us events $h$ and $t$ each
            with probability $1/2$, and thus a single flip of a coin gives us $-\log_2(1/2) = 1$ bit
            of information (whether it comes up $h$ or $t$).
            
            Flipping a fair coin $n$ times (or, equivalently, flipping $n$ fair coins) gives us
            $-\log_2((1/2)^n) = \log_2(2^n) = n*\log_2(2) = n$ bits of information.
            
            We could enumerate a sequence of 25 flips as, for example:
            $$hthhtththhhthttththhhthtt$$
            or, using $1$ for $h$ and $0$ for $t$, the 25 bits
            $$1011001011101000101110100.$$
            
            We thus get the nice fact that $n$ flips of a fair coin gives us $n$ bits of information,
            and takes $n$ binary digits to specify.  That these two are the same reassures us that
            we have done a good job in our definition of our {\em information} measure \ldots
    
\end{itemize}

\pagedone

\quotesection{Information (and hope)}
%%tth:\begin{quote}
``In Cyberspace, the First Amendment is a local ordinance.''

- John Perry Barlow


``Groundless hope, like unconditional love, is the only kind worth having.''

- John Perry Barlow

``The most interesting facts are those which can be used several times,
those which have a chance of recurring.  \ldots  Which, then, are the facts that have a chance
of recurring?  In the first place, simple facts.''

H. Poincare, 1908
%%tth:\end{quote}

\pagedone
%Slide 10
\sectionhead{Some entropy theory}


     \begin{itemize}
          \item One question we might ask here is, what is the average amount of information
               we will get (per observation) from observing events from a probability
               distribution $P$?  In particular, what is the expected value of the
               information?
               
          \item  Suppose we have a discrete probability distribution
               $P = \{p_1, p_2, \ldots, p_n\}$, with $p_i \ge 0$ and $\sum_{i=1}^n p_i = 1$,
               or a continuous distribution $p(x)$ with $p(x) \ge 0$ and $\int p(x)dx = 1$,
               we can define the {\em expected value} of an associated discrete set
               $F = \{f_1, f_2, \ldots, f_n\}$ or function $F(x)$ by:
               $$ <F> = \sum_{i=1}^n f_i p_i$$
               or
               $$ <F(x)> = \int F(x) p(x) dx.$$
               
               With these ideas in mind, we can define the {\em entropy of a distribution} by:
               $$H(P) = <I(p)>.$$
               
               In other words, we can define the {\em entropy} of a probability distribution as the
               {\em expected value} of the {\em information} of the distribution.
               
               In particular, for a discrete distribution $P = \{p_1, p_2, \ldots, p_n\}$, we
               have the entropy:
               $$H(P) = \sum_{i=1}^n p_i \log\left(\frac{1}{p_i}\right).$$

\end{itemize}

\pagedone

%Slide 12

     Several questions probably come to mind at this point:
     \begin{itemize}
          \item  What properties does the function $H(P)$ have?  For example,
               does it have a maximum, and if so where?
          \item  Is {\em entropy} a reasonable name for this?  In particular, the
               name {\em entropy} is already in use in thermodynamics.  How are
               these uses of the term related to each other?
          \item  What can we do with this new tool?
          \item  Let me start with an easy one.  Why use the letter $H$ for entropy?
               What follows is a slight variation of a footnote, p. 105, in the book
          {\em Spikes} by Rieke, et al. :-)
\end{itemize}

\pagedone


\quotesection{H (or S) for Entropy}
%%tth:\begin{quote}         
          ``The enthalpy is [often] written U.  V is the volume, and Z is the partition function.  P and
          Q are the position and momentum of a particle.  R is the gas constant, and of course T is
          temperature.  W is the number of ways of configuring our system (the number of states),
          and we have to keep X and Y in case we need more variables.  Going back to the first half
          of the alphabet, A, F, and G are all different kinds of free energies (the last named for
          Gibbs).  B is a virial coefficient or a magnetic field.  I will be used as a symbol for
          information; J and L are angular momenta.  K is Kelvin, which is the proper unit of T.
          M is magnetization, and N is a number, possibly Avogadro's, and O is too easily
          confused with 0.  This leaves S . . .'' and H.  In {\em Spikes} they also eliminate H
          (e.g., as the Hamiltonian).  I, on the other hand, along with Shannon and others, prefer
          to honor Hartley.  Thus, H for entropy . . .
%%tth:\end{quote}

\pagedone



\sectionhead{A Maximum Entropy Principle}

    \begin{itemize}
       \item  Suppose we have a system for which we can measure certain macroscopic
            characteristics.  Suppose further that the system is made up of many microscopic
            elements, and that the system is free to vary among various states.  Then (a generic
            version of) the Second Law of Thermodynamics says that with probability essentially equal to
            1, the system will be observed in states with maximum entropy.
            
            We will then sometimes be able to gain understanding of the system by
            applying a {\em maximum information entropy} principle (MEP), and, using
            Lagrange multipliers, derive formulae for aspects of the system.
\pagedone            
      \item  Suppose we have a set of macroscopic measurable characteristics $f_k$,
           $k = 1, 2, \ldots, M$
           (which we can think of as constraints on the system), which we assume are
           related to microscopic characteristics via:
           $$\sum_ip_i * f_i^{(k)} = f_k.$$
           Of course, we also have the constraints:
           $$p_i \ge 0,\ \mathrm{and}$$
           $$\sum_ip_i = 1.$$
           We want to maximize the entropy, $\sum_ip_i\log(1/p_i)$, subject to these
           constraints.  Using Lagrange multipliers $\lambda_k$ (one for each constraint),
           we have the general solution:
           $$p_i = \exp\left(- \lambda - \sum_k\lambda_kf_i^{(k)}\right).$$
           If we define $Z$, called the partition function, by
           $$Z(\lambda_1, \ldots, \lambda_M) = \sum_i\exp\left(-\sum_k\lambda_kf_i^{(k)}\right),$$
           then we have $e^\lambda = Z$, or $\lambda = \ln(Z)$.
           
    \end{itemize}
\pagedone

\shortsectionhead{Application: Economics I (a Boltzmann Economy)}{Application: Economics I}

    \begin{itemize}
        \item Our first example here is a very simple economy.  Suppose there is
            a fixed amount of money ($M$ dollars), and a fixed number of agents ($N$)
            in the economy.
            Suppose that during each time step, each agent randomly selects another
            agent and transfers one dollar to the selected agent.  An agent having
            no money doesn't go in debt.  What
            will the long term (stable) distribution of money be?
            
            This is not a very realistic economy -- there is no growth, only a redistribution
            of money (by a random process).  For the sake of argument, we can imagine
            that every agent starts with approximately the same amount of money, although
            in the long run, the starting distribution shouldn't matter.
            
\pagedone

        \item For this example, we are interested in looking at the distribution of money
            in the economy, so we are looking at the 
            probabilities $\{p_i\}$ that an agent has the amount of money $i$.  We are
            hoping to develop a model for the collection $\{p_i\}$.
            
            If we let $n_i$ be the number of agents who have $i$ dollars, we have two
            constraints:
            $$\sum_in_i * i = M$$
            and
            $$\sum_in_i = N.$$
            Phrased differently (using $p_i = \frac{n_i}{N}$), this says
            $$\sum_ip_i * i = \frac{M}{N}$$
            and
            $$\sum_ip_i = 1.$$
            
\pagedone

        \item  We now apply Lagrange multipliers:
            \begin{eqnarray*}
               L = \sum_ip_i\ln(1/p_i)
                & - &\lambda\left[\sum_ip_i*i - \frac{M}{N}\right] \\
                & - & \mu\left[\sum_ip_i - 1\right],
             \end{eqnarray*}
            from which we get
            $$\frac{\partial L}{\partial p_i} = -[1 + \ln(p_i)] - \lambda i - \mu = 0.$$
            
            We can solve this for $p_i$:
            $$\ln(p_i) = - \lambda i - (1 + \mu)$$
            and so
            $$p_i = e^{-\lambda_0}e^{-\lambda i}$$
            (where we have set $1 + \mu \equiv \lambda_0)$.
            
\pagedone
            
        \item Putting in constraints, we have
            \begin{eqnarray*}
                1 & = & \sum_i p_i \\
                  & = & \sum_i e^{-\lambda_0}e^{-\lambda i}\\
                  & = & e^{-\lambda_0} \sum_{i = 0}^M e^{-\lambda i},
            \end{eqnarray*}
            and
            \begin{eqnarray*}
                \frac{M}{N} & = & \sum_i p_i * i \\
                  & = & \sum_i e^{-\lambda_0}e^{-\lambda i} * i \\
                  & = & e^{-\lambda_0} \sum_{i = 0}^M e^{-\lambda i} * i.
            \end{eqnarray*}
            We can approximate (for large $M$)
            $$ \sum_{i = 0}^M e^{-\lambda i}  \approx \int_0^Me^{-\lambda x}dx
                    \approx \frac{1}{\lambda},$$
            and
            $$\sum_{i = 0}^M e^{-\lambda i} * i \approx \int_0^Mxe^{-\lambda x}dx
                \approx \frac{1}{\lambda^2}.$$
                
\pagedone

            From these we have (approximately)
            $$e^{\lambda_0} = \frac{1}{\lambda}$$
            and 
            $$e^{\lambda_0}\frac{M}{N} = \frac{1}{\lambda^2}.$$
            From this, we get
            $$\lambda = \frac{N}{M} = e^{-\lambda_0},$$
            and thus (letting $T = \frac{M}{N}$) we have:
            \begin{eqnarray*}
              p_i & = & e^{-\lambda_0}e^{-\lambda i} \\
                  & = & \frac{1}{T}e^{-\frac{i}{T}}.
            \end{eqnarray*}
            This is a Boltzmann-Gibbs distribution, where we can think of $T$ (the
            average amount of money per agent) as the
            ``temperature,'' and thus we have a ``Boltzmann economy'' \ldots
            
            Note: this distribution also solves the functional equation
            $$p(m_1)p(m_2) = p(m_1 + m_2).$$
            
\pagedone
            
        \item This example, and related topics, are discussed in
            
            {\em Statistical mechanics of money}
            
            by Adrian Dragulescu and Victor M. Yakovenko,
            
            \tthdump{\href{http://arxiv.org/abs/cond-mat/0001432}
                {http://arxiv.org/abs/cond-mat/0001432}}
%%tth:\href{http://arxiv.org/abs/cond-mat/0001432}
%%tth:{http://arxiv.org/abs/cond-mat/0001432}
            
            and
            
            {\em Statistical mechanics of money: How saving propensity affects its distribution}
            
            by Anirban Chakraborti and Bikas K. Chakrabarti
            
            \tthdump{\href{http://arxiv.org/abs/cond-mat/0004256}
                {http://arxiv.org/abs/cond-mat/0004256}}
%%tth:\href{http://arxiv.org/abs/cond-mat/0004256}
%%tth:{http://arxiv.org/abs/cond-mat/0004256}

    \end{itemize}
    
\pagedone

\shortsectionhead{Fit of this model to the Real World\texttrademark}{Fit to Real World\texttrademark}

     \begin{itemize}
          \item How well does this model seem to fit to the Real World?
          
               For a fairly large range of individuals, it actually does a decent job.
               Here is a graphical representation of U.S. census data for 1996:
               
               \centerline{\includegraphics[width = 5.5in]{income-distribution}}
               The black line is  $p(x) = \frac{1}{R}e^\frac{-x}{R} $.
               
\pagedone
          \item  However, for the wealthy it doesn't do such a good job.  Here are some
               graphical representations of U.K. and U.S. data for 1996-2001:
                
               \
               
               \centerline{\includegraphics[width = 7.0in]{wealth-distribution}}
               
               As can be seen on the left graph, the wealth distribution for the U.K. wealthy
               in 1996 is close to a linear fit in $\log-\log$ coordinates.
               
               Can we modify the model somewhat to capture other characteristics of the data?
               
 \pagedone
 
           \item  There are a wide variety of important distributions that are observed
                in data sets.  For example:
                
                \begin{itemize}
                  \item  Normal (gaussian) distribution:
                  $$ p(x) \sim \exp(-\frac{x^2}{2\sigma^2})$$
                  Natural explanation:  Central limit theorem; sum of random
                  variables (with finite second moment):
                  $$X_n = \sum_{i = 1}^n x_i$$
                  Many applications:
                  \begin{itemize}
                     \item Maxwell:  distribution of velocities of gas particles
                     \item IQ
                     \item heights of individuals
                  \end{itemize}
                  Distribution is {\em thin tailed} -- no one is 20 feet tall \ldots
                  
                  \item  Exponential distribution:
                  $$ p(x) \sim \exp(-x / x_0)$$
                  Natural explanation 1:  Survival time for constant probability decay.
                  
                  Natural explanation 2:  Equlibrium statistical mechanics (see above --
                    maximum entropy subject to constraint on mean).
                    
                  Many applications:
                  \begin{itemize}
                     \item Radioactive decay.
                     \item Equilibrium statistical mechanics (Boltzmann-Gibbs
                        distribution)
                  \end{itemize}
                    
                  Characteristic scale is $x_0$; distribution is thin tailed.
                  
                \item  Power law (see below):
                $$p(x) \sim x^{-\alpha}$$
                \end{itemize}
     \end{itemize}
     
\pagedone

\sectionhead{A bit about Power Laws}

     \begin{itemize}
          \item  Various researchers in various fields at various times have observed that many
               datasets seem to reflect a relationship of the form
               $$ p(x) \sim x^{-\alpha}$$
               for a fairly broad range of values of $x$.  These sorts of data relations are
               often called {\em power laws}, and have been the subject of fairly intensive
               interest and study.
               
               An early researcher, Vilfredo Pareto, observed in the late 1800s that pretty
               uniformly across geographical locations, wealth was distributed through
               the population according to a power law, and hence such distributions
               are often called {\em Pareto distributions}.

\pagedone

               A variety of other names
               have been applied to these distributions:
               
               \begin{itemize} 

                    \item Power law distribution 
                                  
                    \item Pareto's law
                    
                    \item Zipf's law 

                    \item Lotka's law 
 
                    \item Bradford's law 

                    \item Zeta distribution 

                    \item Scale free distribution 

                    \item Rank-size rule
                    
              \end{itemize}
              My general rule of thumb is that if something has lots of names,
              it is likely to be important \ldots
              
\pagedone

          \item  These distributions have been observed many places (as noted, for
               example, in Wikipedia):
          
               \begin{itemize}
                    	\item	Frequencies of words in longer texts
                    	\item	The size of human settlements (few cities,
                    	          many hamlets/villages)
                    	\item	File size distribution of Internet traffic which
                    	         uses the TCP protocol (many smaller files, few larger ones)
                    	\item	Clusters of Bose-Einstein condensate near absolute zero
                    	\item	The value of oil reserves in oil fields (a few large
                    	          fields, many small fields)
                    	\item	The length distribution in jobs assigned supercomputers
                    	          (a few large ones, many small ones)
                    	\item	The standardized price returns on individual stocks
                    	\item	Size of sand particles
                    	\item   Number of species per genus (please note the
                    	          subjectivity involved: The tendency to divide a genus
                    	          into two or more increases with the number of species in it)
                    	\item	Areas burnt in forest fires
                    
               \end{itemize}
               
           \item  There are a variety of important properties of power laws:
             \begin{itemize}
                \item Distribution has fat / heavy tails (extreme events are
                   more likely than one might expect \ldots).  Stock market
                   volatility; sizes of storms / floods, etc.
                   
                \item A power law is a linear relation between logarithms:
                   \begin{eqnarray*}
                       p(x) & = & K x^{-\alpha} \\
                       \log(p(x)) & = & -\alpha \log(x) + \log(K)
                    \end{eqnarray*}
                    
\pagedone
                \item Power laws are scale invariant:
                
                   Sufficient:
                   \begin{eqnarray*}
                      p(x) & = & K x^{-\alpha} \\
                      x    & \to & cx \\
                      p(x) & \to & Kc^{-\alpha}x^{-\alpha} = c^{-\alpha}p(x)
                   \end{eqnarray*}
                   Necessary: Scale invariant is defined as
                   $$ p(cx) = K(c) p(x)$$
                   Power law is the only solution (0 and 1 are trivial solutions).
             \end{itemize}
             
             \item Power laws are actually asymptotic relations.  We can't
                define a power law on $[0, \infty]$:
                
                If $\alpha > 1$, not integrable at 0.
                
                If $\alpha <= 1$, not integrable at $\infty$.
                
                Thus, when we say something is a power law, we mean either within a range,
                or as $x \to 0$ or as $x \to \infty$.
                
 \pagedone
 
            \item Moments:  power laws have a threshold above which moments don't
                 exist.  For $p(x) \sim x^{-(\alpha+1)}$, when $\alpha > m$,
                 \begin{eqnarray*}
                   \gamma(m) & = & \int_a^\infty x^m p(x) dx \\
                             & = & \int_a^\infty x^m x^{-(\alpha+1)} dx \\
                             & = & \infty
                 \end{eqnarray*}
                 
            \item The lack of moments is conserved under aggregation \ldots
                 If $\alpha(x)$ is the {\em tail exponent} of the random
                 variable $x$ (the value above which moments don't exist), then
                 \begin{eqnarray*}
                      \alpha(x + y) & = & \min(\alpha(x), \alpha(y)) \\
                      \alpha(xy)  & = & \min(\alpha(x), \alpha(y)) \\
                      \alpha(x^k) & = & \alpha(x)/k.
                 \end{eqnarray*}
                 
 \pagedone
           \item Power laws are generic for heavy / fat tailed distributions.  In
                other words, any ``reasonable'' distribution with fat tails (i.e.,
                with moments that don't exist) is a power law:
                \begin{eqnarray*}
                     P(X > x) & = & 1 - \Phi_{\alpha}(x) \\
                              & = & 1 - \exp(-x^{-\alpha}) \\
                              & \approx & 1 - (1 - x^{-\alpha}) \\
                              & = & x^{-\alpha}
                \end{eqnarray*}
                (there is some discussion of {\em extreme value distributions} that
                goes here, with discussion of Fr\'echet, Weibull, and Gumbel
                distributions -- specifically Fr\'echet distributions (with fat
                tails) \ldots perhaps another place or time).
                
 \pagedone
           \item Some mechanism for generating power laws:
              \begin{itemize}
                 \item Critical points and deterministic dynamics 
                 \item Non-equilibrium statistical mechanics 
                 \item Random processes 
                 \item Mixtures 
                 \item Maximization principles 
                 \item Preferential attachment 
                 \item Dimensional constraints 

              \end{itemize}
              
 \pagedone
           \item  Multiplicative (random) processes generate $\log$-normal distributions,
              which can look like power law distributions across various ranges of
              the variable.  If $a(t)$ is a random variable:
              \begin{eqnarray*}
                   x(t + 1) & = & a(t) x(t) \\
                   x(t)  & = & \prod_{i = 0}^{t - 1} a(i) x(0) \\
                   \log(x(t)) & = & \sum_{i = 0}^{t - 1} \log(a(i)) + \log(x(0)) \\
                   f(x) & = & \frac{1}{\sqrt{2\pi}\sigma x}e^{-(\log x - \mu)^2/2\sigma^2} \\
                   \log(f(x)) & = & -\frac{(log(x))^2}{2\sigma^2} + (\frac{\mu}{\sigma^2} - 1)\log(x)
                                    + const
               \end{eqnarray*}
               
               In particular, if $\sigma$ is large in comparison with $\log(x)$,
               then it will look like
               $$log(f(x)) \approx \log(x^{-1}),$$
               which is a one-over-x power law distribution \ldots
               
 \pagedone
           \item Other distributions that have power-law appearing regions:
           \begin{itemize}
                \item Mixed multiplicative / additive processes ({\em Kesten processes}):
                $$x(t + 1) = a(t)x(t) + b(t)$$
                
                \item Stable multiplicative random walk with reflecting barrier.
                
           \end{itemize}
           
              Both of these will look $\log$-normal in their bodies, and like
                power laws in their tails.
              
              \vfill
              
              (Various pieces of this section draw from lectures / notes by
              Doyne Farmer on power laws in financial markets -- my thanks to him \ldots)
              
     \end{itemize}

\pagedone

\shortsectionhead{Application: Economics II (a power law)}{Application: Economics II}

    \begin{itemize}
        \item Suppose that a (simple) economy is made up of many agents $a$, each with
            wealth at time $t$ in the amount of $w(a, t)$.  (I'll leave it to you to
            come up with a reasonable definition of ``wealth'' -- of course we will
            want to make sure that the definition of ``wealth'' is applied consistently
            across all the agents.)  We can also look at the total wealth in the economy
            $W(t) = \sum_aw(a,t)$.
            
            For this example, we are interested in looking at the distribution of wealth
            in the economy, so we will assume there is some collection $\{w_i\}$ of
            possible values for the wealth an agent can have, and associated probabilities
            $\{p_i\}$ that an agent has wealth $w_i$.  We are hoping to develop a
            model for the collection $\{p_i\}$.
            
\pagedone

        \item In order to apply the maximum entropy principle, we want to look at
            global (aggregate/macro) observables of the system that reflect (or are made
            up of) characteristics of (micro) elements of the system.
            
            For this example, we can look at the growth rate of the economy.  A reasonable
            way to think about this is to let $R_i = w_i(t_1) / w_i(t_0)$ and
            $R = W(t_1)/W(t_0)$ (where $t_0$ and $t_1$ represent time steps of the economy).
            The growth rate will then be $\ln(R)$.  We then have the two constraints on the
            $p_i$:
            $$\sum_ip_i * \ln(R_i) = \ln(R)$$
            and
            $$\sum_ip_i = 1.$$
            
\pagedone

        \item  We now apply Lagrange multipliers:
            \begin{eqnarray*}
               L = \sum_ip_i\ln(1/p_i)
                & - &\lambda\left[\sum_ip_i\ln(R_i) - \ln(R)\right] \\
                & - & \mu\left[\sum_ip_i - 1\right],
             \end{eqnarray*}
            from which we get
            $$\frac{\partial L}{\partial p_i} = -[1 + \ln(p_i)] - \lambda \ln(R_i) - \mu = 0.$$
            
            We can solve this for $p_i$:
            $$p_i = e^{-\lambda_0}e^{-\lambda\ln(R_i)} = e^{-\lambda_0}R_i^{-\lambda}$$
            (where we have set $1 + \mu \equiv \lambda_0)$.
            
            Solving, we get $\lambda_0 = \ln(Z(\lambda))$, where $Z(\lambda) \equiv
            \sum_i R_i^{-\lambda}$ (the partition function) normalizes the probability
            distribution to sum to 1.  From this we see the power law (for $\lambda > 1$):
            $$p_i = \frac{R_i^{-\lambda}}{Z(\lambda)}.$$
            
\pagedone

        \item We might actually like to calculate specific values of $\lambda$, so we
            will do the process again in a continuous version.  In this version, we
            will let $R = w(T)/w(0)$ be the relative wealth at time T.  We want to
            find the probability density function $f(R)$, that is:
            $$\max_{\{f\}} H(f) = - \int_1^\infty f(R) \ln(f(R))dR,$$
            subject to
            \begin{eqnarray*}
                 \int_1^\infty f(R)dR & = & 1, \\
                 \int_1^\infty f(R)\ln(R)dR & = & C\ln(R),
            \end{eqnarray*}
            where $C$ is the average number of transactions per time step.
            
            We need to apply the calculus of variations to maximize over a class
            of functions.
\pagedone            
            When we are solving an extremal problem of the form
            $$\int F[x, f(x), f'(x)]dx,$$
            we work to solve
            $$\frac{\partial F}{\partial f(x)} - \frac{d}{dx}
                \left(\frac{\partial F}{\partial f'(x)}\right) = 0.$$
                
            Our Lagrangian is of the form
            \begin{eqnarray*}
                L \equiv & - & \int_1^\infty f(R) \ln(f(R)) dr -
                    \mu \left(\int_1^\infty f(R) dR - 1\right) \\
                    & - & \lambda\left(\int_1^\infty f(R)\ln(R)dR - C*\ln(R)\right).
            \end{eqnarray*}
            Since this does not depend on $f'(x)$, we look at:
            $$\frac{\partial[-f(R)\ln f(R) - \mu(f(R) - 1) - \lambda(f(R)\ln R - R)]}
                {\partial f(R)}$$
                $$= 0$$
            from which we get
            $$f(R) = e^{-(\lambda_0 - \lambda \ln(R))} = R^{-\lambda}e^{-\lambda_0},$$
            where again $\lambda_0 \equiv 1 + \mu$.
            
\pagedone

            We can use the first constraint to solve for $e^{\lambda_0}$:
            $$e^{\lambda_0} = \int_1^\infty R^{-\lambda}dR
                = \left[\frac{R^{-\lambda + 1}}{1 - \lambda}\right]_1^\infty
                = \frac{1}{\lambda - 1},
            $$
            assuming $\lambda > 1$.  We therefore have a power law distribution for
            wealth of the form:
            $$f(R) = (\lambda - 1)R^{-\lambda}.$$
            
            To solve for $\lambda$, we use:
            $$C * \ln(R) = (\lambda - 1)\int_1^\infty R^{-\lambda}\ln(R)dR.$$
            Using integration by parts, we get
            \begin{eqnarray*}
               C*\ln(R)
                 & = & (\lambda - 1)\left[\ln(R)\frac{R^{1 - \lambda}}{1 - \lambda}
                        \right]_1^\infty \\
                 &\ \ & - (\lambda - 1)\int_1^\infty\frac{R^{-\lambda}}{1 - \lambda}dR \\
                 & = & (\lambda - 1)\left[\ln(R)\frac{R^{1 - \lambda}}{1 - \lambda}
                        \right]_1^\infty
                        + \left[\frac{R^{1 - \lambda}}{1 - \lambda}\right]_1^\infty.
            \end{eqnarray*}
            
\pagedone

            By L'H\^opital's rule, the first term goes to zero as $R \to \infty$,
            so we are left with
            $$C * \ln(R) = \left[\frac{R^{1 - \lambda}}{1 - \lambda}\right]_1^\infty
                = \frac{1}{\lambda - 1},$$
            or, in other terms,
            $$\lambda - 1 = C * \ln(R^{-1}).$$
            
            For much more discussion of this example, see the paper {\em A Statistical
            Equilibrium Model of Wealth Distribution} by Mishael Milakovic, February, 2001,
            available on the web at:
            
% \tthdump{\href{http://www.econometricsociety.org/cgi-bin/conference/download.cgi?db\_name=SCE2001\&paper\_id=214}
%   {http://www.econometricsociety.org/
%       cgi-bin/conference/download.cgi
%       ?db\_name=SCE2001\&paper\_id=214}}
% %%tth:\href{http://www.econometricsociety.org/cgi-bin/conference/download.cgi?db\_name=SCE2001\&pape\r_id=214}
% %%tth:{http://www.econometricsociety.org/cgi-bin/conference/download.cgi?db\_name=SCE2001\&paper\_id=214}

\tthdump{\href{http://astarte.csustan.edu/\~tom/SFI-CSSS/Wealth/wealth-Milakovic.pdf}
  {http://astarte.csustan.edu/\~\ tom/SFI-CSSS/Wealth/wealth-Milakovic.pdf}}
%%tth:\href{http://astarte.csustan.edu/~tom/SFI-CSSS/Wealth/wealth-Milakovic.pdf}
%%tth:{http://astarte.csustan.edu/~tom/SFI-CSSS/Wealth/wealth-Milakovic.pdf}
                       
            
    \end{itemize}
    
\pagedone

 \footnotesize
 \bibliographystyle{plain}
 
 \referencehead{References}
 
%%\tthdump{\hypertarget{References}{}\hyperlink{Contents}{\hfil \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ 
%%\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ back $\leftarrow$}}
%%%%tth:{\special{html: <A NAME="References"></A><a href="\#Top of file">       Top</a>}}

\vspace{-1.0 in}

\begin{thebibliography}{12}
%%tth:{\special{html: <font size="+0">}}

\bibitem{baryam}
Bar-Yam, Yaneer,
{\em Dynamics of Complex Systems (Studies in Nonlinearity) },
Westview Press, Boulder, 1997.

% \bibitem{bennett2}
% Bennett, C. H., 
% The Thermodynamics of Computation - a Review, 
% {\em International Journal of Theoretical Physics}, 
% {\bf 21}, No. 12, p. 905, 1982.
%  
% \bibitem{bennett3}
% Bennett, C. H. and Landauer, R., 
% The fundamental physical limits of computation,
% {\em Scientific American,} July 38--46, 1985.
 
%  \bibitem{bennett4}
%  Bennett, C. H., 
%  Demons, engines and the second law,
%  {\em Scientific American} {\bf 257} no. 5 (November) pp 88--96,
%  1987.

 \bibitem{brillouin}
 Brillouin, L.,
 {\em Science and information theory}
 Academic Press, New York, 1956.

\bibitem{brooks}
Brooks, Daniel R., and Wiley, E. O.,
{\em Evolution as Entropy}, Toward a Unified Theory of Biology,
Second Edition,
University of Chicago Press, Chicago, 1988.

\bibitem{campbell}
Campbell, Jeremy,
{\em Grammatical Man}, Information, Entropy, Language, and Life,
Simon and Schuster, New York, 1982.

% \bibitem{church}
% Church, Alonzo, 
% An unsolvable problem of elementary number theory,
% {\em Amer. J. Math.} {\bf 58} 345--363, 1936.

% \bibitem{clausen}
% Clausen, M., 
% Fast Generalized Fourier transforms, 
% {\em Theoret. Comput. Sci.} {\bf 56} 55--63 1989.

\bibitem{cover}
Cover, T. M., and Thomas J. A., 
{\em Elements of Information Theory,}
John Wiley and Sons, New York, 1991.

\bibitem{delillo}
DeLillo, Don,
{\em White Noise},
Viking/Penguin, New York, 1984.

\bibitem{feller}
Feller, W.,
{\em An Introduction to Probability Theory and Its Applications},
Wiley, New York,1957.
 
\bibitem{feynman-96}
Feynman, Richard,
{\em Feynman lectures on computation},
Addison-Wesley, Reading, 1996.

\bibitem{gatlin}
Gatlin, L. L.,
{\em Information Theory and the Living System},
Columbia University Press, New York, 1972.

\bibitem{greven}
Greven, A., Keller, G., Warnecke, G.,
{\em Entropy}, Princeton Univ. Press, Princeton, 2003.

% \bibitem{golay}
% Golay, M. J. E.,
% Notes on digital coding,
% {\em Proc. IEEE} {\bf 37} 657, 1949.
% 
% \bibitem{grey}
% Garey M R and Johnson D S,
% {\em Computers and Intractability}, 
% Freeman and Company, New York, 1979.

\bibitem{haken}
Haken, Hermann,
{\em Information and Self-Organization, a Macroscopic Approach to Complex Systems},
Springer-Verlag, Berlin/New York, 1988.

\bibitem{hamming1}
Hamming, R. W.,
Error detecting and error correcting codes,
{\em Bell Syst. Tech. J.} {\bf 29} 147, 1950.

\bibitem{hamming2}
Hamming, R. W.,
{\em Coding and information theory}, 2nd ed,
Prentice-Hall, Englewood Cliffs, 1986.

% \bibitem{hardy}
% Hardy, G. H. and Wright, E. M.,
% {\em An introduction to the theory of numbers}
% Clarendon Press, Oxford, 1979.

\bibitem{hill}
Hill, R.,
{\em A first course in coding theory}
Clarendon Press, Oxford, 1986.

\bibitem{hodges}
Hodges, A.,
{\em Alan Turing: the enigma}
Vintage, London, 1983.

\bibitem{hofstadter}
Hofstadter, Douglas R.,
{\em Metamagical Themas:  Questing for the Essence of Mind and Pattern},
Basic Books, New York, 1985
 
\bibitem{jones}
Jones, D. S.,
{\em Elementary information theory}
Clarendon Press, Oxford, 1979.

% \bibitem{knuthd} 
% Knuth, D. E.,
% {\em The Art of Computer Programming, Vol. 2: Seminumerical Algorithms},
% 2nd ed, Addison-Wesley, Reading, 1981.

\bibitem{knuthe}
Knuth, Eldon L.,
{\em Introduction to Statistical Thermodynamics},
McGraw-Hill, New York, 1966.

\bibitem{landauer3}
Landauer, R.,
Information is physical,
{\em Phys. Today}, May 1991 23-29.

\bibitem{landauer4} 
Landauer, R.,
The physical nature of information,
{\em Phys. Lett. A}, {\bf 217} 188, 1996.

\bibitem{lint}
van Lint, J. H.,
{\em Coding Theory},
Springer-Verlag, New York/Berlin, 1982.

\bibitem{lipton}
Lipton, R. J., 
Using DNA to solve NP-complete problems, 
{\em Science}, {\bf 268} 542--545, Apr. 28, 1995.

\bibitem{macwilliams}
MacWilliams, F. J., and Sloane, N. J. A.,
{\em The theory of error correcting codes},
Elsevier Science, Amsterdam, 1977.

\bibitem{martin}
Martin, N. F. G., and England, J. W.,
{\em Mathematical Theory of Entropy},
Addison-Wesley, Reading, 1981.

\bibitem{maxwell}
Maxwell, J. C.,
{\em Theory of heat}
Longmans, Green and Co, London, 1871.

% \bibitem{minsky}
% Minsky, M. L.,
% {\em Computation: Finite and Infinite Machines}
% Prentice-Hall, Inc., Englewood Cliffs, N. J. (also London 1972), 1967.

\bibitem{neumann}
von Neumann, John,
Probabilistic logic and the synthesis of reliable organisms
		  from unreliable components,
in {\em automata studies( Shanon,McCarthy eds)}, 1956 .

\bibitem{papa}
Papadimitriou, C. H., 
{\em Computational Complexity}, 
Addison-Wesley, Reading, 1994.

\bibitem{pierce}
Pierce, John R.,
{\em An Introduction to Information Theory -- Symbols, Signals and Noise},
(second revised edition),
Dover Publications, New York, 1980.

% \bibitem{rabin79} 
% Rabin, M. O., 
% Probabilistic Algorithms,
% {\em  Algorithms and Complexity: New Directions 
% and Recent Results}, pp. 21-39,
%    Academic Press, 1976.

\bibitem{roman}
Roman, Steven, 
{\em Introduction to Coding and Information Theory},
Springer-Verlag, Berlin/New York, 1997.

\bibitem{sampson}
Sampson, Jeffrey R.,
{\em Adaptive Information Processing, an Introductory Survey},
Springer-Verlag, Berlin/New York, 1976.

\bibitem{schroeder1}
Schroeder, Manfred,
{\em Fractals, Chaos, Power Laws, Minutes from an Infinite Paradise},
W. H. Freeman, New York, 1991.

% \bibitem{schroeder2}
% Schroeder, M. R., 1984
% {\em Number theory in science and communication}
% Springer-Verlag, New York/Berlin/Heidelberg, 1984.

\bibitem{shannon}
Shannon, C. E.,
A mathematical theory of communication
{\em Bell Syst. Tech. J.} {\bf 27} 379; also p. 623, 1948.

\bibitem{slepian}
Slepian, D., ed.,
{\em Key papers in the development of information theory}
IEEE Press, New York, 1974.

% \bibitem{szilard}
% Szilard L 1929 Z. Phys. {\bf 53} 840;
% translated in Wheeler and Zurek (1983).

\bibitem{turing}
Turing, A. M.,
On computable numbers, with an application to the
Entscheidungsproblem,
{\em Proc. Lond. Math. Soc. Ser. 2} {\bf 42}, 230 ; see also
{\em Proc. Lond. Math. Soc. Ser. 2} {\bf 43}, 544, 1936.

% \bibitem{vergis} 
% Vergis, A., Steiglitz, K., and Dickinson, B.,
% The Complexity of Analog Computation,
% {\em Math. Comput. Simulation 28}, pp. 91-113. 1986.

\bibitem{zurek2}
Zurek, W. H.,
Thermodynamic cost of computation, algorithmic complexity and the
information metric,
{\em Nature} {\bf 341} 119-124, 1989.

\end{thebibliography}

\tthdump{\hyperlink{Our general topics:}{\hfil To top $\leftarrow$}}
%%tth:{\special{html: <a href="\#Top of file">       Back to top of file</a>}}

% 
% \sectionhead{On-line references}
% 
% 
% Some of the references listed above are available on line.  They are listed again here for easy access:
% 
% %\bibitem{abrams2}
% Abrams D S and Lloyd S, 
% Non-Linear Quantum Mechanics implies Polynomial Time 
% solution for NP-complete and $\#$P problems,
% %in {\it LANL e-print} quant-ph/9801041, http://xxx.lanl.gov (1998)
% \hyperref{http://xxx.lanl.gov/abs/quant-ph/9801041}{}{}
% %\hyperURL{http}{xxx.lanl.gov/abs/quant-ph}{9801041}
% {http://xxx.lanl.gov/abs/quant-ph/9801041}
% 
% 
% %\bibitem{aharonov5}
% Aharonov D, Beckman D, Chuang I and  Nielsen M,
% What Makes Quantum Computers Powerful? 
% \hyperref{http://wwwcas.phys.unm.edu/\~mnielsen/science.html}{}{}
% %\hyperURL{http}{wwwcas.phys.unm.edu/~mnielsen}{science.html}
% {http://wwwcas.phys.unm.edu/\~mnielsen/science.html}
% % 
% % 
% 
% %\bibitem{decoherence2}
%  Chuang I L, Laflamme R and Paz J P, 
% Effects of Loss and Decoherence on a Simple Quantum Computer,
% %in {\it LANL e-print} quant-ph/9602018,  http://xxx.lanl.gov (1996)
% \hyperref{http://xxx.lanl.gov/abs/quant-ph/9602018}{}{}
% %\hyperURL{http}{xxx.lanl.gov/abs/quant-ph}{9602018}
% {http://xxx.lanl.gov/abs/quant-ph/9602018}
% 
% %\bibitem{grover2}
%  Grover L K, 
% A framework for fast quantum mechanical algorithms,
% %in {\it LANL e-print} quant-ph/9711043,  http://xxx.lanl.gov (1997)
% \hyperref{http://xxx.lanl.gov/abs/quant-ph/9711043}{}{}
% %\hyperURL{http}{xxx.lanl.gov/abs/quant-ph}{9711043}
% {http://xxx.lanl.gov/abs/quant-ph/9711043}
% 
% %\bibitem{grover4}
%  Grover L K, 
% A fast quantum mechanical algorithm for estimating the median,
% %in {\it LANL e-print} quant-ph/9607024,  http://xxx.lanl.gov (1997)
% \hyperref{http://xxx.lanl.gov/abs/quant-ph/9607024}{}{}
% %\hyperURL{http}{xxx.lanl.gov/abs/quant-ph}{9607024}
% {http://xxx.lanl.gov/abs/quant-ph/9607024}
% 
% 
% %\bibitem{knill4}
% Knill E, Laflamme R and Zurek W H 1997
% Resilient quantum computation: error models and thresholds
% %in {\it LANL e-print} quant-ph/9702058,  http://xxx.lanl.gov (1997)
% \hyperref{http://xxx.lanl.gov/abs/quant-ph/9702058}{}{}
% %\hyperURL{http}{xxx.lanl.gov/abs/quant-ph}{9702058}
% {http://xxx.lanl.gov/abs/quant-ph/9702058}
% 
% \pagedone
% 
% 
% %\bibitem{preskill2}
% Preskill J 1997
% Fault tolerant quantum computation,
% %in {\it LANL e-print} quant-ph/9712048,  http://xxx.lanl.gov (1997), 
% to appear in {\it Introduction to Quantum
% Computation},  edited by H.-K. Lo, S. Popescu, and T. P. Spiller
% \hyperref{http://xxx.lanl.gov/abs/quant-ph/9712048}{}{}
% %\hyperURL{http}{xxx.lanl.gov/abs/quant-ph}{9712048}
% {http://xxx.lanl.gov/abs/quant-ph/9712048}
% 
% %\bibitem{preskill3}
% Preskill J, Kitaev A, Course notes for Physics 229, Fall 1998, Caltech Univ.,
% \hyperref{http://www.theory.caltech.edu/people/preskill/ph229}{}{}
% %\hyperURL{http}{www.theory.caltech.edu/people/preskill}{ph229}
% {http://www.theory.caltech.edu/people/preskill/ph229}
% 
% 
% %\bibitem{rieffel}
% Rieffel E, Polak W
% An Introduction to Quantum Computing for Non-Physicists
% %{\it LANL e-print} quant-ph/9809016,  http://xxx.lanl.gov (1998),
% \hyperref{http://xxx.lanl.gov/abs/quant-ph/9809016}{}{}
% %\hyperURL{http}{xxx.lanl.gov/abs/quant-ph}{9809016}
% {http://xxx.lanl.gov/abs/quant-ph/9809016}
% 
% %\bibitem{Steane-97}
% Steane A,
% Quantum Computation, Reports on Progress in Physics 61 (1998) 117,
% %(preprint in {\it LANL e-print} quant-ph/9708022, http://xxx.lanl.gov)
% \hyperref{http://xxx.lanl.gov/abs/quant-ph/9708022}{}{}
% %\hyperURL{http}{xxx.lanl.gov/abs/quant-ph}{9708022}
% {http://xxx.lanl.gov/abs/quant-ph/9708022}
% 
% %\bibitem{zalka2}
% Zalka C, 
% Grover's quantum searching algorithm is optimal,
% %in {\it LANL e-print} quant-ph/9711070http://xxx.lanl.gov (1997)
% \hyperref{http://xxx.lanl.gov/abs/quant-ph/9711070}{}{}
% %\hyperURL{http}{xxx.lanl.gov/abs/quant-ph}{9711070}
% {http://xxx.lanl.gov/abs/quant-ph/9711070}
% 

\end{document}
