From 07f6bda31f4e223e80e7756cba424ab907e6e29e Mon Sep 17 00:00:00 2001
From: Stefan van der Walt <stefanv@berkeley.edu>
Date: Tue, 6 Oct 2015 18:39:46 -0700
Subject: [PATCH 01/16] Add preliminary version of tutorial

---
 docs/conf.py      |  2 ++
 docs/tutorial.rst | 52 ++++++++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 53 insertions(+), 1 deletion(-)

diff --git a/docs/conf.py b/docs/conf.py
index 34943d339..2bec3ace8 100644
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -35,6 +35,7 @@
     'sphinx.ext.autodoc',
     'sphinx.ext.autosummary',
     'sphinx.ext.viewcode',
+    'IPython.sphinxext.ipython_directive'
 ]
 
 # Config autosummary
@@ -147,6 +148,7 @@
 # relative to this directory. They are copied after the builtin static files,
 # so a file named "default.css" will overwrite the builtin "default.css".
 html_static_path = []
+ipython_savefig_dir = '../_build/html/_static'
 
 # Add any extra paths that contain custom files (such as robots.txt or
 # .htaccess) here, relative to this directory. These files are copied
diff --git a/docs/tutorial.rst b/docs/tutorial.rst
index 7aaed0f49..25ad56ad9 100644
--- a/docs/tutorial.rst
+++ b/docs/tutorial.rst
@@ -1,16 +1,66 @@
 Start Here: ``datascience`` Tutorial
 ====================================
 
-In progress.
+This is general introduction to the functionality in
+:py:mod:`datascience`.  For a reference guide, please see
+:ref:`tables-overview`.
 
 Introduction
 ------------
 
+First, load the :py:mod:`datascience` module:
+
+.. ipython:: python
+
+   import datascience as ds
+
+In the IPython notebook, type `ds.` followed by the TAB-key to see a list of members.
+The most important of these is the :py:class:`Table` class, which is
+the structure used to represent columns of data.
+
 Basic Table Usage
 -----------------
 
+A table is constructed as follows:
+
+.. ipython:: python
+
+   letters = ['a', 'b', 'c', 'z']
+   counts = [9, 3, 3, 1]
+   points = [1, 2, 2, 10]
+
+   t = ds.Table(columns=[letters, counts, points],
+                labels=['letter', 'count', 'points'])
+
+   print(t)
+
+Note how the first keyword, ``columns``, specifies the contents of the
+table, and how the second, ``labels``, gives a name to each column.
+
+A table could also be read from a CSV file (that can be exported from
+an Excel spreadsheet, for example).  Here's the content of the file:
+
+.. ipython:: python
+
+   cat mydata.csv
+
+And this is how we load it in as a :class:`Table`:
+
+.. ipython:: python
+
+   t = ds.Table.read_table('mydata.csv')
+   print(t)
+
 More Advanced Table Usage
 -------------------------
 
+Once a table has been constructed, we can do various queries on it.
+
+Print the first two entries:
+
+.. ipython:: python
+
+   print(t[:2])
+
 Drawing Maps
 ------------

From 3013c702c2e1fc5d1b6507b1ecbde5cf27cbb68c Mon Sep 17 00:00:00 2001
From: Sam Lau <samlau95@gmail.com>
Date: Mon, 21 Dec 2015 03:14:16 -0800
Subject: [PATCH 02/16] Write first few sections of the tutorial

The rest is to come.
---
 docs/sample.csv   |   4 ++
 docs/tutorial.rst | 115 ++++++++++++++++++++++++++++++++++++----------
 2 files changed, 94 insertions(+), 25 deletions(-)
 create mode 100644 docs/sample.csv

diff --git a/docs/sample.csv b/docs/sample.csv
new file mode 100644
index 000000000..ecee95a65
--- /dev/null
+++ b/docs/sample.csv
@@ -0,0 +1,4 @@
+x,y,z
+1,10,100
+2,11,101
+3,12,102
diff --git a/docs/tutorial.rst b/docs/tutorial.rst
index 25ad56ad9..82b4ce969 100644
--- a/docs/tutorial.rst
+++ b/docs/tutorial.rst
@@ -1,27 +1,57 @@
 Start Here: ``datascience`` Tutorial
 ====================================
 
-This is general introduction to the functionality in
-:py:mod:`datascience`.  For a reference guide, please see
+This is a brief introduction to the functionality in
+:py:mod:`datascience`.  For a complete reference guide, please see
 :ref:`tables-overview`.
 
-Introduction
-------------
+For other useful tutorials and examples, see:
+
+- `The textbook introduction to Tables`_
+- `Example notebooks`_
+
+.. _The textbook introduction to Tables: http://data8.org/text/1_data.html#tables
+.. _Example notebooks: https://github.com/deculler/TableDemos
 
-First, load the :py:mod:`datascience` module:
+Getting Started
+---------------
+
+The most important functionality in the package is is the :py:class:`Table`
+class, which is the structure used to represent columns of data. You may load
+the class with:
 
 .. ipython:: python
 
-   import datascience as ds
+    from datascience import Table
 
-In the IPython notebook, type `ds.` followed by the TAB-key to see a list of members.
-The most important of these is the :py:class:`Table` class, which is
-the structure used to represent columns of data.
+In the IPython notebook, type ``Table.`` followed by the TAB-key to see a list of
+members.
 
-Basic Table Usage
------------------
+Note that for the Data Science 8 class we also import additional packages and
+settings for all assignments and labs. This is so that plots and other available
+packages mirror the ones in the textbook more closely. The exact code we use is:
+
+.. code-block:: python
+
+    # HIDDEN
+
+    import matplotlib
+    matplotlib.use('Agg')
+    from datascience import Table
+    %matplotlib inline
+    import matplotlib.pyplot as plots
+    import numpy as np
+    plots.style.use('fivethirtyeight')
+
+In particular, the lines involving ``matplotlib`` allow for plotting within the
+IPython notebook.
+
+Creating a Table
+----------------
+
+A Table is a sequence of labeled columns of data.
 
-A table is constructed as follows:
+The basic Table constructor works as follows:
 
 .. ipython:: python
 
@@ -29,38 +59,73 @@ A table is constructed as follows:
    counts = [9, 3, 3, 1]
    points = [1, 2, 2, 10]
 
-   t = ds.Table(columns=[letters, counts, points],
-                labels=['letter', 'count', 'points'])
+   t = Table(columns=[letters, counts, points],
+             labels=['letter', 'count', 'points'])
 
    print(t)
 
-Note how the first keyword, ``columns``, specifies the contents of the
-table, and how the second, ``labels``, gives a name to each column.
+Note how the first keyword, ``columns``, specifies the contents of the table,
+and how the second, ``labels``, gives a name to each column. See
+:meth:`~datascience.tables.Table.__init__` for more details.
 
-A table could also be read from a CSV file (that can be exported from
-an Excel spreadsheet, for example).  Here's the content of the file:
+------
+
+A table could also be read from a CSV file (that can be exported from an Excel
+spreadsheet, for example).  Here's the content of an example file:
 
 .. ipython:: python
 
    cat mydata.csv
 
-And this is how we load it in as a :class:`Table`:
+And this is how we load it in as a :class:`Table` using
+:meth:`~datascience.tables.Table.read_table`:
 
 .. ipython:: python
 
-   t = ds.Table.read_table('mydata.csv')
+   t = Table.read_table('sample.csv')
    print(t)
 
-More Advanced Table Usage
--------------------------
+CSVs from URLs are also valid inputs to
+:meth:`~datascience.tables.Table.read_table`:
+
+.. ipython:: python
 
-Once a table has been constructed, we can do various queries on it.
+   Table.read_table('http://data8.org/text/sat2014.csv')
 
-Print the first two entries:
+------
+
+For convenience, you can also initialize a Table from a dictionary of column
+names using
+:meth:`~datascience.tables.Table.from_columns_dict`.
 
 .. ipython:: python
 
-   print(t[:2])
+   Table.from_columns_dict({
+      'letter': letters,
+      'count': counts,
+      'points': points,
+   })
+
+This example illustrates the fact that built-in Python dictionaries don't
+preserve their key order. If you want to ensure the order of your columns, use
+an ``OrderedDict``.
+
+Accessing Values
+----------------
+To come.
+
+Manipulating Data
+-----------------
+To come.
+
+Visualizing Data
+----------------
+To come.
+
+An Example
+----------
+To come.
 
 Drawing Maps
 ------------
+To come.

From 440c377642102fddc86cec153b5026f78fe03edc Mon Sep 17 00:00:00 2001
From: Sam Lau <samlau95@gmail.com>
Date: Tue, 22 Dec 2015 13:10:45 -0800
Subject: [PATCH 03/16] Add highlighting for IPython blocks

---
 docs/conf.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/docs/conf.py b/docs/conf.py
index 2bec3ace8..fba253690 100644
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -35,6 +35,9 @@
     'sphinx.ext.autodoc',
     'sphinx.ext.autosummary',
     'sphinx.ext.viewcode',
+    # These IPython extensions allow for embedded IPython code that gets rerun
+    # at build time.
+    'IPython.sphinxext.ipython_console_highlighting',
     'IPython.sphinxext.ipython_directive'
 ]
 

From 86ace8854cb9800980a01d2271b50043f0929763 Mon Sep 17 00:00:00 2001
From: Chris Holdgraf <choldgraf@berkeley.edu>
Date: Tue, 22 Dec 2015 15:30:43 -0600
Subject: [PATCH 04/16] Added a check for python 3

---
 setup.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/setup.py b/setup.py
index 644dadf47..307f4953a 100644
--- a/setup.py
+++ b/setup.py
@@ -3,6 +3,8 @@
 from setuptools.command.test import test as TestCommand
 
 
+if sys.version_info < (3, 0):
+    raise ValueError('This package requires python >= 3.0')
 
 with open('requirements.txt') as fid:
     install_requires = [l.strip() for l in fid.readlines() if l]

From d0113418b5ac0db91d5bac3663ca46cbcf7096ce Mon Sep 17 00:00:00 2001
From: Sam Lau <samlau95@gmail.com>
Date: Tue, 22 Dec 2015 16:37:01 -0800
Subject: [PATCH 05/16] Write accessing values section

---
 docs/tutorial.rst | 77 ++++++++++++++++++++++++++++++++++-------------
 1 file changed, 56 insertions(+), 21 deletions(-)

diff --git a/docs/tutorial.rst b/docs/tutorial.rst
index 82b4ce969..09940c672 100644
--- a/docs/tutorial.rst
+++ b/docs/tutorial.rst
@@ -24,8 +24,8 @@ the class with:
 
     from datascience import Table
 
-In the IPython notebook, type ``Table.`` followed by the TAB-key to see a list of
-members.
+In the IPython notebook, type ``Table.`` followed by the TAB-key to see a list
+of members.
 
 Note that for the Data Science 8 class we also import additional packages and
 settings for all assignments and labs. This is so that plots and other available
@@ -55,14 +55,14 @@ The basic Table constructor works as follows:
 
 .. ipython:: python
 
-   letters = ['a', 'b', 'c', 'z']
-   counts = [9, 3, 3, 1]
-   points = [1, 2, 2, 10]
+    letters = ['a', 'b', 'c', 'z']
+    counts = [9, 3, 3, 1]
+    points = [1, 2, 2, 10]
 
-   t = Table(columns=[letters, counts, points],
-             labels=['letter', 'count', 'points'])
+    t = Table(columns=[letters, counts, points],
+              labels=['letter', 'count', 'points'])
 
-   print(t)
+    print(t)
 
 Note how the first keyword, ``columns``, specifies the contents of the table,
 and how the second, ``labels``, gives a name to each column. See
@@ -75,22 +75,22 @@ spreadsheet, for example).  Here's the content of an example file:
 
 .. ipython:: python
 
-   cat mydata.csv
+    cat mydata.csv
 
 And this is how we load it in as a :class:`Table` using
 :meth:`~datascience.tables.Table.read_table`:
 
 .. ipython:: python
 
-   t = Table.read_table('sample.csv')
-   print(t)
+    t = Table.read_table('sample.csv')
+    print(t)
 
 CSVs from URLs are also valid inputs to
 :meth:`~datascience.tables.Table.read_table`:
 
 .. ipython:: python
 
-   Table.read_table('http://data8.org/text/sat2014.csv')
+    Table.read_table('http://data8.org/text/sat2014.csv')
 
 ------
 
@@ -100,23 +100,58 @@ names using
 
 .. ipython:: python
 
-   Table.from_columns_dict({
-      'letter': letters,
-      'count': counts,
-      'points': points,
-   })
+    Table.from_columns_dict({
+       'letter': letters,
+       'count': counts,
+       'points': points,
+    })
 
 This example illustrates the fact that built-in Python dictionaries don't
-preserve their key order. If you want to ensure the order of your columns, use
-an ``OrderedDict``.
+preserve their key order -- the dictionary keys are ordered 'letter', 'count',
+then 'points', but the table columns are ordered 'points', 'count', then
+'letter'). If you want to ensure the order of your columns, use an
+``OrderedDict``.
 
 Accessing Values
 ----------------
-To come.
+
+To access values of columns in the table, use
+:meth:`~datascience.tables.Table.values`.
+
+.. ipython:: python
+
+    t
+
+    t.values('x')
+    t.values('y')
+
+    t['x'] # This is a shorthand for t.values('x')
+
+To access values by row, :meth:`~datascience.tables.Table.rows` returns an
+list-like :class:`~datascience.tables.Table.Rows` object that contains
+tuple-like :class:`~datascience.tables.Table.Row` objects.
+
+.. ipython:: python
+
+    t.rows
+    t.rows[0]
+
+    second = t.rows[1]
+    second
+    second[0]
+    second[1]
+
+To get the number of rows, use :meth:`~datascience.tables.Table.num_rows`.
+
+.. ipython:: python
+
+    t.num_rows
+
 
 Manipulating Data
 -----------------
-To come.
+
+
 
 Visualizing Data
 ----------------

From ba3fbf15660cf9a58881f84513b2f36b2019f1e4 Mon Sep 17 00:00:00 2001
From: Sam Lau <samlau95@gmail.com>
Date: Wed, 23 Dec 2015 16:47:23 -0800
Subject: [PATCH 06/16] Fix error in figure path for ipython plots

---
 docs/conf.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/conf.py b/docs/conf.py
index fba253690..5bc561ff8 100644
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -151,7 +151,7 @@
 # relative to this directory. They are copied after the builtin static files,
 # so a file named "default.css" will overwrite the builtin "default.css".
 html_static_path = []
-ipython_savefig_dir = '../_build/html/_static'
+ipython_savefig_dir = './_build/html/_images'
 
 # Add any extra paths that contain custom files (such as robots.txt or
 # .htaccess) here, relative to this directory. These files are copied

From 8368b1d9d6b588dcbe1dd6a5fcb83675f3da01bf Mon Sep 17 00:00:00 2001
From: Sam Lau <samlau95@gmail.com>
Date: Wed, 23 Dec 2015 16:47:59 -0800
Subject: [PATCH 07/16] Write rest of tutorial

Except for the example
---
 .gitignore        |   1 +
 docs/tutorial.rst | 162 ++++++++++++++++++++++++++++++++++++++++++----
 2 files changed, 152 insertions(+), 11 deletions(-)

diff --git a/.gitignore b/.gitignore
index cc5c9d120..5747a1fc0 100644
--- a/.gitignore
+++ b/.gitignore
@@ -61,3 +61,4 @@ cache/
 
 docs/_build/
 docs/_autosummary/
+docs/normal_data.csv
diff --git a/docs/tutorial.rst b/docs/tutorial.rst
index 09940c672..3585392f5 100644
--- a/docs/tutorial.rst
+++ b/docs/tutorial.rst
@@ -82,8 +82,7 @@ And this is how we load it in as a :class:`Table` using
 
 .. ipython:: python
 
-    t = Table.read_table('sample.csv')
-    print(t)
+    Table.read_table('sample.csv')
 
 CSVs from URLs are also valid inputs to
 :meth:`~datascience.tables.Table.read_table`:
@@ -107,10 +106,10 @@ names using
     })
 
 This example illustrates the fact that built-in Python dictionaries don't
-preserve their key order -- the dictionary keys are ordered 'letter', 'count',
-then 'points', but the table columns are ordered 'points', 'count', then
-'letter'). If you want to ensure the order of your columns, use an
-``OrderedDict``.
+preserve their key order -- the dictionary keys are ordered ``'letter'``,
+``'count'``, then ``'points'``, but the table columns are ordered ``'points'``,
+``'count'``, then ``'letter'``). If you want to ensure the order of your
+columns, use an ``OrderedDict``.
 
 Accessing Values
 ----------------
@@ -122,10 +121,10 @@ To access values of columns in the table, use
 
     t
 
-    t.values('x')
-    t.values('y')
+    t.values('letter')
+    t.values('count')
 
-    t['x'] # This is a shorthand for t.values('x')
+    t['letter'] # This is a shorthand for t.values('letter')
 
 To access values by row, :meth:`~datascience.tables.Table.rows` returns an
 list-like :class:`~datascience.tables.Table.Rows` object that contains
@@ -141,7 +140,7 @@ tuple-like :class:`~datascience.tables.Table.Row` objects.
     second[0]
     second[1]
 
-To get the number of rows, use :meth:`~datascience.tables.Table.num_rows`.
+To get the number of rows, use :attr:`~datascience.tables.Table.num_rows`.
 
 .. ipython:: python
 
@@ -151,11 +150,152 @@ To get the number of rows, use :meth:`~datascience.tables.Table.num_rows`.
 Manipulating Data
 -----------------
 
+Here are some of the most common operations on data. For the rest, see the
+reference (:ref:`tables-overview`).
+
+Adding a column with :meth:`~datascience.tables.Table.with_column`:
+
+.. ipython:: python
+
+    t
+    t.with_column('vowel?', ['yes', 'no', 'no', 'no'])
+    t # .with_column returns a new table without modifying the original
+
+    t.with_column('2 * count', t['count'] * 2) # A simple way to operate on columns
+
+Selecting columns with :meth:`~datascience.tables.Table.select`:
+
+.. ipython:: python
+
+    t.select('letter')
+    t.select(['letter', 'points'])
+
+Renaming columns with :meth:`~datascience.tables.Table.with_relabeling`:
+
+.. ipython:: python
+
+    t
+    t.with_relabeling('points', 'other name')
+    t
+    t.with_relabeling(['letter', 'count', 'points'], ['x', 'y', 'z'])
+
+Selecting out rows by index with :meth:`~datascience.tables.Table.take` and
+conditionally with :meth:`~datascience.tables.Table.where`:
+
+.. ipython:: python
+
+    t
+    t.take(2) # the third row
+    t.take[0:2] # the first and second rows
+
+.. ipython:: python
+
+    t.where('points', 2) # rows where points == 2
+    t.where(t['count'] < 8) # rows where count < 8
+
+    t['count'] < 8 # .where actually takes in an array of booleans
+    t.where([False, True, True, True]) # same as the last line
+
+Operate on table data with :meth:`~datascience.tables.Table.sort`,
+:meth:`~datascience.tables.Table.group`, and
+:meth:`~datascience.tables.Table.pivot`
+
+.. ipython:: python
+
+    t
+    t.sort('count')
+    t.sort('letter', descending = True)
+
+.. ipython:: python
+
+    t.group('count')
+
+    # You may pass a reducing function into the collect arg
+    # Note the renaming of the points column because of the collect arg
+    t.select(['count', 'points']).group('count', collect = sum)
+
+.. ipython:: python
+
+    other_table = Table([
+        ['married', 'married', 'partner', 'partner', 'married'],
+        ['Working as paid', 'Working as paid', 'Not working', 'Not working', 'Not working'],
+        [1, 1, 1, 1, 1]
+    ],
+    ['mar_status', 'empl_status', 'count'])
+    other_table
 
+    other_table.pivot('mar_status', 'empl_status', 'count', collect = sum)
 
 Visualizing Data
 ----------------
-To come.
+
+We'll start with some data drawn at random from two normal distributions:
+
+.. ipython:: python
+
+    normal_data = Table(
+        [ np.random.normal(loc = 1, scale = 2, size = 100),
+          np.random.normal(loc = 4, scale = 3, size = 100) ],
+        ['data1', 'data2']
+    })
+
+    normal_data
+
+Draw histograms with :meth:`~datascience.tables.Table.hist`:
+
+.. ipython:: python
+
+    @savefig hist.png width=4in
+    normal_data.hist()
+
+.. ipython:: python
+
+    @savefig hist_binned.png width=4in
+    normal_data.hist(bins = range(-5, 10))
+
+.. ipython:: python
+
+    @savefig hist_overlay.png width=4in
+    normal_data.hist(bins = range(-5, 10), overlay = True)
+
+If we treat the ``normal_data`` table as a set of x-y points, we can
+:meth:`~datascience.tables.Table.plot` and
+:meth:`~datascience.tables.Table.scatter`:
+
+.. ipython:: python
+
+    @savefig plot.png width=4in
+    normal_data.sort('data1').plot('data1') # Sort first to make plot nicer
+
+.. ipython:: python
+
+    @savefig scatter.png width=4in
+    normal_data.scatter('data1')
+
+.. ipython:: python
+
+    @savefig scatter_line.png width=4in
+    normal_data.scatter('data1', fit_line = True)
+
+Use :meth:`~datascience.tables.Table.barh` to display categorical data.
+
+.. ipython:: python
+
+    t
+    t.barh('letter')
+
+Exporting
+---------
+
+Exporting to CSV is the most common operation and can be done by first
+converting to a pandas dataframe with :meth:`~datascience.tables.Table.to_df`:
+
+.. ipython:: python
+
+    normal_data
+
+    # index = False prevents row numbers from appearing in the resulting CSV
+    normal_data.to_df().to_csv('normal_data.csv', index = False)
 
 An Example
 ----------

From 9a675087c7512a070349f5ada811a97893057150 Mon Sep 17 00:00:00 2001
From: Sam Lau <samlau95@gmail.com>
Date: Wed, 23 Dec 2015 17:29:31 -0800
Subject: [PATCH 08/16] Fix typo in tutorial and silence matplotlib warning

---
 docs/conf.py      | 11 +++++++++++
 docs/tutorial.rst | 28 ++++++++++++++++++++++++++--
 2 files changed, 37 insertions(+), 2 deletions(-)

diff --git a/docs/conf.py b/docs/conf.py
index 5bc561ff8..1e5fceac7 100644
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -41,6 +41,17 @@
     'IPython.sphinxext.ipython_directive'
 ]
 
+# The following lines silence the matplotlib.use warnings since we import
+# matplotlib in each ipython directive block
+ipython_mplbackend = None
+ipython_execlines = [
+  'import matplotlib',
+  'matplotlib.use("Agg", warn=False)',
+  'import numpy as np',
+  'import matplotlib.pyplot as plt',
+  'plt.style.use("fivethirtyeight")',
+]
+
 # Config autosummary
 autosummary_generate = True
 
diff --git a/docs/tutorial.rst b/docs/tutorial.rst
index 3585392f5..7f7db8dd7 100644
--- a/docs/tutorial.rst
+++ b/docs/tutorial.rst
@@ -237,7 +237,7 @@ We'll start with some data drawn at random from two normal distributions:
         [ np.random.normal(loc = 1, scale = 2, size = 100),
           np.random.normal(loc = 4, scale = 3, size = 100) ],
         ['data1', 'data2']
-    })
+    )
 
     normal_data
 
@@ -299,7 +299,31 @@ converting to a pandas dataframe with :meth:`~datascience.tables.Table.to_df`:
 
 An Example
 ----------
-To come.
+
+Because most methods return a new Table, we can chain the above methods to
+work with data easily.
+
+We'll recreate the steps in `Chapter 3 of the textbook`_ to see if there is a
+significant difference in birth weights between smokers and non-smokers using a
+bootstrap test.
+
+.. _Chapter 3 of the textbook: http://data8.org/text/3_inference.html#Using-the-Bootstrap-Method-to-Test-Hypotheses
+
+From the text:
+
+    The table ``baby`` contains data on a random sample of 1,174 mothers and
+    their newborn babies. The column ``birthwt`` contains the birth weight of
+    the baby, in ounces; ``gest_days`` is the number of gestational days, that
+    is, the number of days the baby was in the womb. There is also data on
+    maternal age, maternal height, maternal pregnancy weight, and whether or not
+    the mother was a smoker.
+
+.. ipython:: python
+
+    baby = Table.read_table('http://data8.org/text/baby.csv')
+    baby # Let's take a peek at the table
+
+
 
 Drawing Maps
 ------------

From ae4dd7e6a266bddae3ca997105602d0989f97289 Mon Sep 17 00:00:00 2001
From: Sam Lau <samlau95@gmail.com>
Date: Wed, 23 Dec 2015 18:52:58 -0800
Subject: [PATCH 09/16] Add example and finish tutorial

---
 docs/tutorial.rst | 68 ++++++++++++++++++++++++++++++++++++++++++++---
 1 file changed, 65 insertions(+), 3 deletions(-)

diff --git a/docs/tutorial.rst b/docs/tutorial.rst
index 7f7db8dd7..a3ac92457 100644
--- a/docs/tutorial.rst
+++ b/docs/tutorial.rst
@@ -282,6 +282,7 @@ Use :meth:`~datascience.tables.Table.barh` to display categorical data.
 .. ipython:: python
 
     t
+    @savefig barh.png width=4in
     t.barh('letter')
 
 Exporting
@@ -300,14 +301,14 @@ converting to a pandas dataframe with :meth:`~datascience.tables.Table.to_df`:
 An Example
 ----------
 
-Because most methods return a new Table, we can chain the above methods to
-work with data easily.
-
 We'll recreate the steps in `Chapter 3 of the textbook`_ to see if there is a
 significant difference in birth weights between smokers and non-smokers using a
 bootstrap test.
 
+For more examples, check out `the TableDemos repo`_.
+
 .. _Chapter 3 of the textbook: http://data8.org/text/3_inference.html#Using-the-Bootstrap-Method-to-Test-Hypotheses
+.. _the TableDemos repo: https://github.com/deculler/TableDemos
 
 From the text:
 
@@ -323,6 +324,67 @@ From the text:
     baby = Table.read_table('http://data8.org/text/baby.csv')
     baby # Let's take a peek at the table
 
+    # Select out columns we want.
+    smoker_and_wt = baby.select(['m_smoker', 'birthwt'])
+    smoker_and_wt
+
+Let's compare the number of smokers to non-smokers.
+
+.. ipython:: python
+
+    @savefig m_smoker.png width=4in
+    smoker_and_wt.select('m_smoker').hist(bins = [0, 1, 2]);
+
+We can also compare the distribution of birthweights between smokers and
+non-smokers.
+
+.. ipython:: python
+
+    # Non smokers
+    # We do this by grabbing the rows that correspond to mothers that don't
+    # smoke, then plotting a histogram of just the birthweights.
+    @savefig not_m_smoker_weights.png width=4in
+    smoker_and_wt.where('m_smoker', 0).select('birthwt').hist()
+
+    # Smokers
+    @savefig m_smoker_weights.png width=4in
+    smoker_and_wt.where('m_smoker', 1).select('birthwt').hist()
+
+What's the difference in mean birth weight of the two categories?
+
+.. ipython:: python
+
+    nonsmoking_mean = smoker_and_wt.where('m_smoker', 0).values('birthwt').mean()
+    smoking_mean = smoker_and_wt.where('m_smoker', 1).values('birthwt').mean()
+
+    observed_diff = nonsmoking_mean - smoking_mean
+    observed_diff
+
+Let's do the bootstrap test on the two categories.
+
+.. ipython:: python
+
+    num_nonsmokers = smoker_and_wt.where('m_smoker', 0).num_rows
+    def bootstrap_once():
+        """
+        Computes one bootstrapped difference in means.
+        The table.sample method lets us take random samples.
+        We then split according to the number of nonsmokers in the original sample.
+        """
+        resample = smoker_and_wt.sample(with_replacement = True)
+        bootstrap_diff = resample.values('birthwt')[:num_nonsmokers].mean() - \
+            resample.values('birthwt')[num_nonsmokers:].mean()
+        return bootstrap_diff
+
+    repetitions = 1000
+    bootstrapped_diff_means = np.array(
+        [ bootstrap_once() for _ in range(repetitions) ])
+
+    bootstrapped_diff_means[:10]
+
+    num_diffs_greater = (abs(bootstrapped_diff_means) > abs(observed_diff)).sum()
+    p_value = num_diffs_greater / len(bootstrapped_diff_means)
+    p_value
 
 
 Drawing Maps

From 994d8a0ae63739b19dc134f9b066b235203a617c Mon Sep 17 00:00:00 2001
From: Sam Lau <samlau95@gmail.com>
Date: Thu, 24 Dec 2015 00:00:30 -0800
Subject: [PATCH 10/16] Use plt instead of plots

---
 docs/tutorial.rst | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/docs/tutorial.rst b/docs/tutorial.rst
index a3ac92457..1328bae9e 100644
--- a/docs/tutorial.rst
+++ b/docs/tutorial.rst
@@ -39,9 +39,9 @@ packages mirror the ones in the textbook more closely. The exact code we use is:
     matplotlib.use('Agg')
     from datascience import Table
     %matplotlib inline
-    import matplotlib.pyplot as plots
+    import matplotlib.pyplot as plt
     import numpy as np
-    plots.style.use('fivethirtyeight')
+    plt.style.use('fivethirtyeight')
 
 In particular, the lines involving ``matplotlib`` allow for plotting within the
 IPython notebook.

From 30eda1064631d5c65a24a16f4b445bcc9bc38584 Mon Sep 17 00:00:00 2001
From: Sam Lau <samlau95@gmail.com>
Date: Sun, 20 Dec 2015 21:57:52 -0800
Subject: [PATCH 11/16] Add methods we forgot to document before to docs

Table.exclude and Table.boxplot are available but were not put into the
docs. This commit fixes that.

I also reordered methods to better match the ordering in the docs.
---
 datascience/tables.py | 328 +++++++++++++++++++++---------------------
 docs/tables.rst       |   2 +
 2 files changed, 166 insertions(+), 164 deletions(-)

diff --git a/datascience/tables.py b/datascience/tables.py
index 3ed3dcf9e..50fb1d5cb 100644
--- a/datascience/tables.py
+++ b/datascience/tables.py
@@ -246,15 +246,6 @@ def __init__(self, columns=None, labels=None,
         self.take = _RowTaker(self)
         self.exclude = _RowExcluder(self)
 
-    # These, along with a snippet below, are necessary for Sphinx to
-    # correctly load the `take` and `exclude` docstrings.  The definitions
-    # will be over-ridden during class instantiation.
-    def take(self):
-        raise NotImplementedError()
-
-    def exclude(self):
-        raise NotImplementedError()
-
     @classmethod
     def empty(cls, column_labels=None):
         """Create an empty table. Column labels are optional
@@ -345,6 +336,18 @@ def _add_column_and_format(self, table, label, column):
         if label in self._formats:
             table._formats[label] = self._formats[label]
 
+    @classmethod
+    def from_df(cls, df):
+        """Convert a Pandas DataFrame into a Table."""
+        labels = df.columns
+        return Table([df[label].values for label in labels], labels)
+
+    @classmethod
+    def from_array(cls, arr):
+        """Convert a structured NumPy array into a Table."""
+        return Table([arr[f] for f in arr.dtype.names],
+                     labels=arr.dtype.names)
+
 
     #################
     # Magic Methods #
@@ -711,6 +714,15 @@ def select(self, column_label_or_labels):
             self._add_column_and_format(table, label, np.copy(self[label]))
         return table
 
+    # These, along with a snippet below, are necessary for Sphinx to
+    # correctly load the `take` and `exclude` docstrings.  The definitions
+    # will be over-ridden during class instantiation.
+    def take(self):
+        raise NotImplementedError()
+
+    def exclude(self):
+        raise NotImplementedError()
+
     def drop(self, column_label_or_labels):
         """Return a Table with only columns other than selected label or labels."""
         exclude = _as_labels(column_label_or_labels)
@@ -1298,18 +1310,6 @@ def index_by(self, column_or_label):
             index.setdefault(key, []).append(row)
         return index
 
-    @classmethod
-    def from_df(cls, df):
-        """Convert a Pandas DataFrame into a Table."""
-        labels = df.columns
-        return Table([df[label].values for label in labels], labels)
-
-    @classmethod
-    def from_array(cls, arr):
-        """Convert a structured NumPy array into a Table."""
-        return Table([arr[f] for f in arr.dtype.names],
-                     labels=arr.dtype.names)
-
     def to_df(self):
         """Convert the table to a Pandas DataFrame."""
         return pandas.DataFrame(self._columns)
@@ -1348,6 +1348,28 @@ def to_array(self):
         'alpha': 0.8,
     }
 
+    def _visualize(self, x_label, y_labels, ticks, overlay, draw, annotate, width=6, height=4):
+        """Generic visualization that overlays or separates the draw function."""
+        n = len(y_labels)
+        colors = list(itertools.islice(itertools.cycle(self.chart_colors), n))
+        if overlay:
+            _, axis = plt.subplots(figsize=(width, height))
+            for label, color in zip(y_labels, colors):
+                draw(axis, label, color)
+            if ticks is not None:
+                annotate(axis, ticks)
+            axis.legend(y_labels, bbox_to_anchor=(1.5, 1.0))
+        else:
+            fig, axes = plt.subplots(n, 1, figsize=(width, height * n))
+            if not isinstance(axes, collections.Iterable):
+                axes=[axes]
+            for axis, y_label, color in zip(axes, y_labels, colors):
+                draw(axis, y_label, color)
+                axis.set_ylabel(y_label, fontsize=16)
+                axis.set_xlabel(x_label, fontsize=16)
+                if ticks is not None:
+                    annotate(axis, ticks)
+
     def plot(self, column_for_xticks, overlay=False, **vargs):
         """Plot contents as lines."""
         options = self.default_options.copy()
@@ -1366,84 +1388,70 @@ def annotate(axis, ticks):
 
         self._visualize(column_for_xticks, y_labels, xticks, overlay, draw, annotate)
 
-    def scatter(self, column_for_x, overlay=False, fit_line=False, **vargs):
-        """Creates scatterplots, optionally adding a line of best fit.
+    def bar(self, column_for_categories=None, overlay=False, **vargs):
+        """Plots bar charts for the table.
 
-        All scatterplots use the values in ``column_for_x`` as the x-values. A
-        total of n - 1 scatterplots are created where n is the number of
-        columns in the table, one for every column other than ``column_for_x``.
+        Each chart is categorized using the values in `column_for_categories`
+        and one chart is produced for every other column in the table.
+        A total of n - 1 charts are created where n is the number of columns
+        in the table.
 
-        Requires all columns in the table to contain numerical values only.
-        If the columns contain other types, a ``ValueError`` is raised.
+        Requires every column except for `column_for_categories` to be
+        numerical. If the columns contain other types, a `ValueError` is
+        raised.
 
         Args:
-            ``column_for_x`` (str): The name to use for the x-axis values of the
-                scatter plots.
+            column_for_categories (str): The name to use for the bar chart
+                categories
 
         Kwargs:
-            ``overlay`` (bool): If True, creates one scatterplot with n - 1
-                y-values plotted, one for each column other than
-                ``column_for_x`` (instead of the default behavior of creating n
-                - 1 scatterplots. Also adds a legend that matches each dot
-                and best-fit line color to its column.
-
-            ``fit_line`` (bool): If True, draws a line of best fit for each
-                scatterplot drawn.
+            overlay (bool): If True, creates one chart with n - 1 bars for each
+                category, one for each column other than `column_for_categories`
+                (instead of the default behavior of creating n - 1 charts).
+                Also adds a legend that matches each bar color to its column.
 
-            ``vargs``: Additional arguments that get passed into `plt.scatter`.
-                See http://matplotlib.org/api/pyplot_api.html#matplotlib.pyplot.scatter
+            vargs: Additional arguments that get passed into `plt.bar`.
+                See http://matplotlib.org/api/pyplot_api.html#matplotlib.pyplot.bar
                 for additional arguments that can be passed into vargs. These
-                include: `marker` and `norm`, to name a couple.
+                include: `linewidth`, `xerr`, `yerr`, and `log`, to name a few.
 
         Returns:
             None
 
         Raises:
-            ``ValueError``: The table contains non-numerical values in columns.
-
-        >>> x = [9, 3, 3, 1]
-        >>> y = [1, 2, 2, 10]
-        >>> z = [3, 4, 5, 6]
-        >>> table = Table([x, y, z], ['x', 'y', 'z'])
-        >>> table
-        x    | y    | z
-        9    | 1    | 3
-        3    | 2    | 4
-        3    | 2    | 5
-        1    | 10   | 6
-        >>> table.scatter('x') # doctest: +SKIP
-        <scatterplot of values in y on x>
-        <scatterplot of values in z on x>
-
-        >>> table.scatter('x', overlay = True) # doctest: +SKIP
-        <scatterplot of values in y and z on x>
-
-        >>> table.scatter('x', fit_line = True) # doctest: +SKIP
-        <scatterplot of values in y on x with line of best fit>
-        <scatterplot of values in z on x with line of best fit>
+            ValueError: The Table contains non-numerical values in columns
+            other than `column_for_categories`
 
         """
-        # Check for non-numerical values and raise a ValueError if any found
-        for col in self:
-            if any(isinstance(cell, np.flexible) for cell in self[col]):
-                raise ValueError("The column '{0}' contains non-numerical "
-                    "values. A histogram cannot be drawn for this table."
-                    .format(col))
-
         options = self.default_options.copy()
         options.update(vargs)
-        xdata, y_labels =  self._split_by_column(column_for_x)
+
+        xticks, y_labels = self._split_by_column(column_for_categories)
+        for label in y_labels:
+            if any(isinstance(cell, np.flexible) for cell in self[label]):
+                raise ValueError("The column '{0}' contains non-numerical "
+                    "values. A bar graph cannot be drawn for this table."
+                    .format(label))
+
+        index = np.arange(self.num_rows)
+        margin = 0.1
+        width = 1 - 2 * margin
+        if overlay:
+            width /= len(y_labels)
 
         def draw(axis, label, color):
-            axis.scatter(xdata, self[label], color=color, **options)
-            if fit_line:
-                m,b = np.polyfit(xdata, self[label], 1)
-                minx, maxx = np.min(xdata),np.max(xdata)
-                axis.plot([minx,maxx],[m*minx+b,m*maxx+b])
+            if overlay:
+                xpos = index + margin + (1-2*margin)*labels.index(label)/len(labels)
+            else:
+                xpos = index
+            axis.bar(xpos, self[label], 1.0, color=color, **options)
 
         def annotate(axis, ticks):
+            if (ticks is not None) :
+                tick_labels = [ticks[int(l)] for l in axis.get_xticks() if l<len(ticks)]
+                axis.set_xticklabels(tick_labels, stretch='ultra-condensed')
             return None
-        self._visualize(column_for_x, y_labels, None, overlay, draw, annotate)
+        self._visualize(column_for_categories, y_labels, xticks, overlay, draw, annotate)
 
     def barh(self, column_for_categories, overlay=False, **vargs):
         """Plots horizontal bar charts for the table.
@@ -1549,93 +1557,6 @@ def annotate(axis, ticks):
             height = vargs.pop('height')
         self._visualize(column_for_categories, y_labels, yticks, overlay, draw, annotate, height=height)
 
-    def bar(self, column_for_categories=None, overlay=False, **vargs):
-        """Plots bar charts for the table.
-
-        Each chart is categorized using the values in `column_for_categories`
-        and one chart is produced for every other column in the table.
-        A total of n - 1 charts are created where n is the number of columns
-        in the table.
-
-        Requires every column except for `column_for_categories` to be
-        numerical. If the columns contain other types, a `ValueError` is
-        raised.
-
-        Args:
-            column_for_categories (str): The name to use for the bar chart
-                categories
-
-        Kwargs:
-            overlay (bool): If True, creates one chart with n - 1 bars for each
-                category, one for each column other than `column_for_categories`
-                (instead of the default behavior of creating n - 1 charts).
-                Also adds a legend that matches each bar color to its column.
-
-            vargs: Additional arguments that get passed into `plt.bar`.
-                See http://matplotlib.org/api/pyplot_api.html#matplotlib.pyplot.bar
-                for additional arguments that can be passed into vargs. These
-                include: `linewidth`, `xerr`, `yerr`, and `log`, to name a few.
-
-        Returns:
-            None
-
-        Raises:
-            ValueError: The Table contains non-numerical values in columns
-            other than `column_for_categories`
-
-        """
-        options = self.default_options.copy()
-        options.update(vargs)
-
-        xticks, y_labels = self._split_by_column(column_for_categories)
-        for label in y_labels:
-            if any(isinstance(cell, np.flexible) for cell in self[label]):
-                raise ValueError("The column '{0}' contains non-numerical "
-                    "values. A bar graph cannot be drawn for this table."
-                    .format(label))
-
-        index = np.arange(self.num_rows)
-        margin = 0.1
-        width = 1 - 2 * margin
-        if overlay:
-            width /= len(y_labels)
-
-        def draw(axis, label, color):
-            if overlay:
-                xpos = index + margin + (1-2*margin)*labels.index(label)/len(labels)
-            else:
-                xpos = index
-            axis.bar(xpos, self[label], 1.0, color=color, **options)
-
-        def annotate(axis, ticks):
-            if (ticks is not None) :
-                tick_labels = [ticks[int(l)] for l in axis.get_xticks() if l<len(ticks)]
-                axis.set_xticklabels(tick_labels, stretch='ultra-condensed')
-            return None
-        self._visualize(column_for_categories, y_labels, xticks, overlay, draw, annotate)
-
-    def _visualize(self, x_label, y_labels, ticks, overlay, draw, annotate, width=6, height=4):
-        """Generic visualization that overlays or separates the draw function."""
-        n = len(y_labels)
-        colors = list(itertools.islice(itertools.cycle(self.chart_colors), n))
-        if overlay:
-            _, axis = plt.subplots(figsize=(width, height))
-            for label, color in zip(y_labels, colors):
-                draw(axis, label, color)
-            if ticks is not None:
-                annotate(axis, ticks)
-            axis.legend(y_labels, bbox_to_anchor=(1.5, 1.0))
-        else:
-            fig, axes = plt.subplots(n, 1, figsize=(width, height * n))
-            if not isinstance(axes, collections.Iterable):
-                axes=[axes]
-            for axis, y_label, color in zip(axes, y_labels, colors):
-                draw(axis, y_label, color)
-                axis.set_ylabel(y_label, fontsize=16)
-                axis.set_xlabel(x_label, fontsize=16)
-                if ticks is not None:
-                    annotate(axis, ticks)
-
     def _split_by_column(self, column_or_label):
         """Return the specified column and labels of other columns."""
         labels = list(self.column_labels)
@@ -1827,6 +1748,85 @@ def points(self, column__lat, column__long, labels=None, colors=None, **kwargs)
         if colors is not None : colors = self._get_column(colors)
         return _maps.Circle.map(latitudes, longitudes, labels=labels, colors=colors, **kwargs)
 
+    def scatter(self, column_for_x, overlay=False, fit_line=False, **vargs):
+        """Creates scatterplots, optionally adding a line of best fit.
+
+        All scatterplots use the values in ``column_for_x`` as the x-values. A
+        total of n - 1 scatterplots are created where n is the number of
+        columns in the table, one for every column other than ``column_for_x``.
+
+        Requires all columns in the table to contain numerical values only.
+        If the columns contain other types, a ``ValueError`` is raised.
+
+        Args:
+            ``column_for_x`` (str): The name to use for the x-axis values of the
+                scatter plots.
+
+        Kwargs:
+            ``overlay`` (bool): If True, creates one scatterplot with n - 1
+                y-values plotted, one for each column other than
+                ``column_for_x`` (instead of the default behavior of creating n
+                - 1 scatterplots. Also adds a legend that matches each dot
+                and best-fit line color to its column.
+
+            ``fit_line`` (bool): If True, draws a line of best fit for each
+                scatterplot drawn.
+
+            ``vargs``: Additional arguments that get passed into `plt.scatter`.
+                See http://matplotlib.org/api/pyplot_api.html#matplotlib.pyplot.scatter
+                for additional arguments that can be passed into vargs. These
+                include: `marker` and `norm`, to name a couple.
+
+        Returns:
+            None
+
+        Raises:
+            ``ValueError``: The table contains non-numerical values in columns.
+
+        >>> x = [9, 3, 3, 1]
+        >>> y = [1, 2, 2, 10]
+        >>> z = [3, 4, 5, 6]
+        >>> table = Table([x, y, z], ['x', 'y', 'z'])
+        >>> table
+        x    | y    | z
+        9    | 1    | 3
+        3    | 2    | 4
+        3    | 2    | 5
+        1    | 10   | 6
+        >>> table.scatter('x') # doctest: +SKIP
+        <scatterplot of values in y on x>
+        <scatterplot of values in z on x>
+
+        >>> table.scatter('x', overlay = True) # doctest: +SKIP
+        <scatterplot of values in y and z on x>
+
+        >>> table.scatter('x', fit_line = True) # doctest: +SKIP
+        <scatterplot of values in y on x with line of best fit>
+        <scatterplot of values in z on x with line of best fit>
+
+        """
+        # Check for non-numerical values and raise a ValueError if any found
+        for col in self:
+            if any(isinstance(cell, np.flexible) for cell in self[col]):
+                raise ValueError("The column '{0}' contains non-numerical "
+                    "values. A histogram cannot be drawn for this table."
+                    .format(col))
+
+        options = self.default_options.copy()
+        options.update(vargs)
+        xdata, y_labels =  self._split_by_column(column_for_x)
+
+        def draw(axis, label, color):
+            axis.scatter(xdata, self[label], color=color, **options)
+            if fit_line:
+                m,b = np.polyfit(xdata, self[label], 1)
+                minx, maxx = np.min(xdata),np.max(xdata)
+                axis.plot([minx,maxx],[m*minx+b,m*maxx+b])
+
+        def annotate(axis, ticks):
+            return None
+        self._visualize(column_for_x, y_labels, None, overlay, draw, annotate)
+
     ###########
     # Support #
     ###########
diff --git a/docs/tables.rst b/docs/tables.rst
index a0579bd5a..8bd632024 100644
--- a/docs/tables.rst
+++ b/docs/tables.rst
@@ -86,6 +86,7 @@ Transformation (creates a new table)
     Table.select
     Table.drop
     Table.take
+    Table.exclude
     Table.where
     Table.sort
     Table.group
@@ -125,3 +126,4 @@ Visualizations
     Table.hist
     Table.points
     Table.scatter
+    Table.boxplot

From aef1001befd7bbecd67d3e3c464e01a9cae4dc08 Mon Sep 17 00:00:00 2001
From: Sam Lau <samlau95@gmail.com>
Date: Wed, 23 Dec 2015 19:05:11 -0800
Subject: [PATCH 12/16] Add to_csv method

This is a convenience method that is commonly requested.
---
 datascience/tables.py | 29 +++++++++++++++++++++++++++++
 docs/tables.rst       |  1 +
 2 files changed, 30 insertions(+)

diff --git a/datascience/tables.py b/datascience/tables.py
index 50fb1d5cb..23b118e84 100644
--- a/datascience/tables.py
+++ b/datascience/tables.py
@@ -1314,6 +1314,35 @@ def to_df(self):
         """Convert the table to a Pandas DataFrame."""
         return pandas.DataFrame(self._columns)
 
+    def to_csv(self, filename):
+        """Creates a CSV file with the provided filename.
+
+        The CSV is created in such a way that if we run
+        ``table.to_csv('my_table.csv')`` we can recreate the same table with
+        ``Table.read_table('my_table.csv')``.
+
+        Args:
+            ``filename`` (str): The filename of the output CSV file.
+
+        Returns:
+            None, outputs a file with name ``filename``.
+
+        >>> job = ['a', 'b', 'c', 'd']
+        >>> wage = [10, 20, 15, 8]
+        >>> some_table = Table([job, wage], ['job', 'wage'])
+        >>> some_table
+        job  | wage
+        a    | 10
+        b    | 20
+        c    | 15
+        d    | 8
+        >>> some_table.to_csv('my_table.csv') # doctest: +SKIP
+        <outputs a file called my_table.csv in the current directory>
+        """
+        # We use index = False to avoid the row number output that pandas does
+        # by default.
+        self.to_df().to_csv(filename, index = False)
+
     def to_array(self):
         """Convert the table to a NumPy array."""
         dt = np.dtype(list(zip(self.column_labels,
diff --git a/docs/tables.rst b/docs/tables.rst
index 8bd632024..a3fe8ab54 100644
--- a/docs/tables.rst
+++ b/docs/tables.rst
@@ -113,6 +113,7 @@ Exporting / Displaying
     Table.index_by
     Table.to_array
     Table.to_df
+    Table.to_csv
 
 Visualizations
 

From ef6199c5525e1ae4e0e65bb9c850b75f83f66d72 Mon Sep 17 00:00:00 2001
From: Sam Lau <samlau95@gmail.com>
Date: Sat, 2 Jan 2016 16:19:38 -0800
Subject: [PATCH 13/16] Version 0.3.dev22

---
 datascience/version.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/datascience/version.py b/datascience/version.py
index 1e70f4a0c..89d7e2955 100644
--- a/datascience/version.py
+++ b/datascience/version.py
@@ -1 +1 @@
-__version__ = '0.3.dev21'
+__version__ = '0.3.dev22'

From 20b5efb957c8d488ecc78384d4daaa9e9131f533 Mon Sep 17 00:00:00 2001
From: Sam Lau <samlau95@gmail.com>
Date: Mon, 4 Jan 2016 00:23:58 -0800
Subject: [PATCH 14/16] Add TOC to tutorial and try to fix Travis again

I'm getting desperate, as you can tell.
---
 .travis.yml       | 6 ++++++
 docs/tutorial.rst | 4 ++++
 2 files changed, 10 insertions(+)

diff --git a/.travis.yml b/.travis.yml
index 630648fcd..a68a990b7 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -35,6 +35,12 @@ install:
   # TODO(sam): Add --upgrade flag when it works again
   - python3 setup.py install
 
+# https://docs.travis-ci.com/user/gui-and-headless-browsers/#Using-xvfb-to-Run-Tests-That-Require-a-GUI
+before_script:
+  - "export DISPLAY=:99.0"
+  - "sh -e /etc/init.d/xvfb start"
+  - sleep 3 # give xvfb some time to start
+
 script:
   - coverage run setup.py test
   - cd docs && make html-raise-on-warning && cd ..
diff --git a/docs/tutorial.rst b/docs/tutorial.rst
index 1328bae9e..20cd18c1c 100644
--- a/docs/tutorial.rst
+++ b/docs/tutorial.rst
@@ -13,6 +13,10 @@ For other useful tutorials and examples, see:
 .. _The textbook introduction to Tables: http://data8.org/text/1_data.html#tables
 .. _Example notebooks: https://github.com/deculler/TableDemos
 
+.. contents:: Table of Contents
+    :depth: 2
+    :local:
+
 Getting Started
 ---------------
 

From 9e8b6eb70271adbaf2c6d4e6453e71a54578b7c0 Mon Sep 17 00:00:00 2001
From: Sam Lau <samlau95@gmail.com>
Date: Mon, 4 Jan 2016 00:47:02 -0800
Subject: [PATCH 15/16] Fix buggy travis cache and ensure _images folder exists

Really crossing my fingers now!
---
 .travis.yml   | 8 +++-----
 docs/Makefile | 2 ++
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/.travis.yml b/.travis.yml
index a68a990b7..28dfc1a8d 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -26,7 +26,7 @@ addons:
 before_install:
   - wget https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -O miniconda.sh
   - chmod +x miniconda.sh
-  - ./miniconda.sh -b -p $HOME/miniconda
+  - ./miniconda.sh -b -f -p $HOME/miniconda
   - export PATH=/home/travis/miniconda/bin:$PATH
   - conda update --yes conda
 
@@ -36,6 +36,8 @@ install:
   - python3 setup.py install
 
 # https://docs.travis-ci.com/user/gui-and-headless-browsers/#Using-xvfb-to-Run-Tests-That-Require-a-GUI
+# sam: Not exactly sure why we need to initialize a display for this but it
+#      helps the tutorial plots build on Travis
 before_script:
   - "export DISPLAY=:99.0"
   - "sh -e /etc/init.d/xvfb start"
@@ -48,7 +50,3 @@ script:
 after_success:
   - coveralls
   - bash tools/deploy_docs.sh
-
-cache:
-  directories:
-  - /home/travis/virtualenv/python3.4.2/
diff --git a/docs/Makefile b/docs/Makefile
index fd5eddeb2..b05545c01 100644
--- a/docs/Makefile
+++ b/docs/Makefile
@@ -52,11 +52,13 @@ clean:
 	rm -rf $(BUILDDIR)/*
 
 html:
+	mkdir -p $(BUILDDIR)/html/_images
 	$(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html
 	@echo
 	@echo "Build finished. The HTML pages are in $(BUILDDIR)/html."
 
 html-raise-on-warning:
+	mkdir -p $(BUILDDIR)/html/_images
 	$(SPHINXBUILD) -W -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html
 
 dirhtml:

From 88d21e17d2efa20d30d4f1401b6dbf330fcf9b9b Mon Sep 17 00:00:00 2001
From: Sam Lau <samlau95@gmail.com>
Date: Mon, 4 Jan 2016 01:08:36 -0800
Subject: [PATCH 16/16] Just use `make docs` instead of raise-on-warning

Since we have warnings I can't currently get around when we generate
images in the tutorial.
---
 .travis.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.travis.yml b/.travis.yml
index 28dfc1a8d..3ec57d10c 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -45,7 +45,7 @@ before_script:
 
 script:
   - coverage run setup.py test
-  - cd docs && make html-raise-on-warning && cd ..
+  - make docs
 
 after_success:
   - coveralls