Interactions and ANOVAΒΆ

Link to Notebook GitHub

Note: This script is based heavily on Jonathan Taylor's class notes http://www.stanford.edu/class/stats191/interactions.html

Download and format data:

In [1]:
from __future__ import print_function
from statsmodels.compat import urlopen
import numpy as np
np.set_printoptions(precision=4, suppress=True)
import statsmodels.api as sm
import pandas as pd
pd.set_option("display.width", 100)
import matplotlib.pyplot as plt
from statsmodels.formula.api import ols
from statsmodels.graphics.api import interaction_plot, abline_plot
from statsmodels.stats.anova import anova_lm

try:
    salary_table = pd.read_csv('salary.table')
except:  # recent pandas can read URL without urlopen
    url = 'http://stats191.stanford.edu/data/salary.table'
    fh = urlopen(url)
    salary_table = pd.read_table(fh)
    salary_table.to_csv('salary.table')

E = salary_table.E
M = salary_table.M
X = salary_table.X
S = salary_table.S
---------------------------------------------------------------------------
URLError                                  Traceback (most recent call last)
<ipython-input-380-e34bb7e2457e> in <module>()
     15 except:  # recent pandas can read URL without urlopen
     16     url = 'http://stats191.stanford.edu/data/salary.table'
---> 17     fh = urlopen(url)
     18     salary_table = pd.read_table(fh)
     19     salary_table.to_csv('salary.table')

/usr/lib/python2.7/urllib2.pyc in urlopen(url, data, timeout, cafile, capath, cadefault, context)
    152     else:
    153         opener = _opener
--> 154     return opener.open(url, data, timeout)
    155 
    156 def install_opener(opener):

/usr/lib/python2.7/urllib2.pyc in open(self, fullurl, data, timeout)
    427             req = meth(req)
    428 
--> 429         response = self._open(req, data)
    430 
    431         # post-process response

/usr/lib/python2.7/urllib2.pyc in _open(self, req, data)
    445         protocol = req.get_type()
    446         result = self._call_chain(self.handle_open, protocol, protocol +
--> 447                                   '_open', req)
    448         if result:
    449             return result

/usr/lib/python2.7/urllib2.pyc in _call_chain(self, chain, kind, meth_name, *args)
    405             func = getattr(handler, meth_name)
    406 
--> 407             result = func(*args)
    408             if result is not None:
    409                 return result

/usr/lib/python2.7/urllib2.pyc in http_open(self, req)
   1226 
   1227     def http_open(self, req):
-> 1228         return self.do_open(httplib.HTTPConnection, req)
   1229 
   1230     http_request = AbstractHTTPHandler.do_request_

/usr/lib/python2.7/urllib2.pyc in do_open(self, http_class, req, **http_conn_args)
   1196         except socket.error, err: # XXX what error?
   1197             h.close()
-> 1198             raise URLError(err)
   1199         else:
   1200             try:

URLError: <urlopen error [Errno -2] Name or service not known>

Take a look at the data:

In [2]:
plt.figure(figsize=(6,6))
symbols = ['D', '^']
colors = ['r', 'g', 'blue']
factor_groups = salary_table.groupby(['E','M'])
for values, group in factor_groups:
    i,j = values
    plt.scatter(group['X'], group['S'], marker=symbols[j], color=colors[i-1],
               s=144)
plt.xlabel('Experience');
plt.ylabel('Salary');
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-381-66af8762e78f> in <module>()
      2 symbols = ['D', '^']
      3 colors = ['r', 'g', 'blue']
----> 4 factor_groups = salary_table.groupby(['E','M'])
      5 for values, group in factor_groups:
      6     i,j = values

NameError: name 'salary_table' is not defined
<matplotlib.figure.Figure at 0x7ff5c90e4a90>

Fit a linear model:

In [3]:
formula = 'S ~ C(E) + C(M) + X'
lm = ols(formula, salary_table).fit()
print(lm.summary())
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-382-95ded19c3a72> in <module>()
      1 formula = 'S ~ C(E) + C(M) + X'
----> 2 lm = ols(formula, salary_table).fit()
      3 print(lm.summary())

NameError: name 'salary_table' is not defined

Have a look at the created design matrix:

In [4]:
lm.model.exog[:5]
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-383-e9012a047d1f> in <module>()
----> 1 lm.model.exog[:5]

NameError: name 'lm' is not defined

Or since we initially passed in a DataFrame, we have a DataFrame available in

In [5]:
lm.model.data.orig_exog[:5]
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-384-0d890acdc576> in <module>()
----> 1 lm.model.data.orig_exog[:5]

NameError: name 'lm' is not defined

We keep a reference to the original untouched data in

In [6]:
lm.model.data.frame[:5]
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-385-7c0e717ac664> in <module>()
----> 1 lm.model.data.frame[:5]

NameError: name 'lm' is not defined

Influence statistics

In [7]:
infl = lm.get_influence()
print(infl.summary_table())
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-386-a1537ce8cfa3> in <module>()
----> 1 infl = lm.get_influence()
      2 print(infl.summary_table())

NameError: name 'lm' is not defined

or get a dataframe

In [8]:
df_infl = infl.summary_frame()
In [9]:
df_infl[:5]
Out[9]:
dfb_const dfb_GNPDEFL dfb_GNP dfb_UNEMP dfb_ARMED dfb_POP dfb_YEAR cooks_d dffits dffits_internal hat_diag standard_resid student_resid
0 -0.016406 -0.234566 -0.045095 -0.121513 -0.149026 0.211057 0.013388 0.140840 1.014472 0.992915 0.424537 1.156014 1.181112
1 -0.020608 -0.289091 0.124453 0.156964 0.287700 -0.161890 0.025958 0.040561 -0.508591 -0.532850 0.564978 -0.467568 -0.446281
2 -0.008382 0.007161 -0.016799 0.009575 0.002227 0.014871 0.008103 0.002930 0.135299 0.143218 0.362075 0.190101 0.179590
3 0.018093 0.907968 -0.500022 -0.495996 0.089996 0.711142 -0.040056 0.244193 -1.495156 -1.307421 0.372228 -1.697900 -1.941705
4 1.871260 -0.219351 1.611418 1.561520 1.169337 -1.081513 -1.864186 0.613917 2.333153 2.073021 0.615511 1.638429 1.844027

Now plot the reiduals within the groups separately:

In [10]:
resid = lm.resid
plt.figure(figsize=(6,6));
for values, group in factor_groups:
    i,j = values
    group_num = i*2 + j - 1  # for plotting purposes
    x = [group_num] * len(group)
    plt.scatter(x, resid[group.index], marker=symbols[j], color=colors[i-1],
            s=144, edgecolors='black')
plt.xlabel('Group');
plt.ylabel('Residuals');
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-389-1f49e6986775> in <module>()
----> 1 resid = lm.resid
      2 plt.figure(figsize=(6,6));
      3 for values, group in factor_groups:
      4     i,j = values
      5     group_num = i*2 + j - 1  # for plotting purposes

NameError: name 'lm' is not defined

Now we will test some interactions using anova or f_test

In [11]:
interX_lm = ols("S ~ C(E) * X + C(M)", salary_table).fit()
print(interX_lm.summary())
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-390-1875cf9774f0> in <module>()
----> 1 interX_lm = ols("S ~ C(E) * X + C(M)", salary_table).fit()
      2 print(interX_lm.summary())

NameError: name 'salary_table' is not defined

Do an ANOVA check

In [12]:
from statsmodels.stats.api import anova_lm

table1 = anova_lm(lm, interX_lm)
print(table1)

interM_lm = ols("S ~ X + C(E)*C(M)", data=salary_table).fit()
print(interM_lm.summary())

table2 = anova_lm(lm, interM_lm)
print(table2)
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-391-0c9af2b17bde> in <module>()
      1 from statsmodels.stats.api import anova_lm
      2 
----> 3 table1 = anova_lm(lm, interX_lm)
      4 print(table1)
      5 

NameError: name 'lm' is not defined

The design matrix as a DataFrame

In [13]:
interM_lm.model.data.orig_exog[:5]
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-392-54036886e7c2> in <module>()
----> 1 interM_lm.model.data.orig_exog[:5]

NameError: name 'interM_lm' is not defined

The design matrix as an ndarray

In [14]:
interM_lm.model.exog
interM_lm.model.exog_names
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-393-f8ee67f5a751> in <module>()
----> 1 interM_lm.model.exog
      2 interM_lm.model.exog_names

NameError: name 'interM_lm' is not defined
In [15]:
infl = interM_lm.get_influence()
resid = infl.resid_studentized_internal
plt.figure(figsize=(6,6))
for values, group in factor_groups:
    i,j = values
    idx = group.index
    plt.scatter(X[idx], resid[idx], marker=symbols[j], color=colors[i-1],
            s=144, edgecolors='black')
plt.xlabel('X');
plt.ylabel('standardized resids');
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-394-b9ade982a4ac> in <module>()
----> 1 infl = interM_lm.get_influence()
      2 resid = infl.resid_studentized_internal
      3 plt.figure(figsize=(6,6))
      4 for values, group in factor_groups:
      5     i,j = values

NameError: name 'interM_lm' is not defined

Looks like one observation is an outlier.

In [16]:
drop_idx = abs(resid).argmax()
print(drop_idx)  # zero-based index
idx = salary_table.index.drop(drop_idx)

lm32 = ols('S ~ C(E) + X + C(M)', data=salary_table, subset=idx).fit()

print(lm32.summary())
print('\n')

interX_lm32 = ols('S ~ C(E) * X + C(M)', data=salary_table, subset=idx).fit()

print(interX_lm32.summary())
print('\n')


table3 = anova_lm(lm32, interX_lm32)
print(table3)
print('\n')


interM_lm32 = ols('S ~ X + C(E) * C(M)', data=salary_table, subset=idx).fit()

table4 = anova_lm(lm32, interM_lm32)
print(table4)
print('\n')
226

---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-395-a8a74da84db4> in <module>()
      1 drop_idx = abs(resid).argmax()
      2 print(drop_idx)  # zero-based index
----> 3 idx = salary_table.index.drop(drop_idx)
      4 
      5 lm32 = ols('S ~ C(E) + X + C(M)', data=salary_table, subset=idx).fit()

NameError: name 'salary_table' is not defined

Replot the residuals

In [17]:
try:
    resid = interM_lm32.get_influence().summary_frame()['standard_resid']
except:
    resid = interM_lm32.get_influence().summary_frame()['standard_resid']

plt.figure(figsize=(6,6))
for values, group in factor_groups:
    i,j = values
    idx = group.index
    plt.scatter(X[idx], resid[idx], marker=symbols[j], color=colors[i-1],
            s=144, edgecolors='black')
plt.xlabel('X[~[32]]');
plt.ylabel('standardized resids');
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-396-b457d7767b76> in <module>()
      2     resid = interM_lm32.get_influence().summary_frame()['standard_resid']
      3 except:
----> 4     resid = interM_lm32.get_influence().summary_frame()['standard_resid']
      5 
      6 plt.figure(figsize=(6,6))

NameError: name 'interM_lm32' is not defined

Plot the fitted values

In [18]:
lm_final = ols('S ~ X + C(E)*C(M)', data = salary_table.drop([drop_idx])).fit()
mf = lm_final.model.data.orig_exog
lstyle = ['-','--']

plt.figure(figsize=(6,6))
for values, group in factor_groups:
    i,j = values
    idx = group.index
    plt.scatter(X[idx], S[idx], marker=symbols[j], color=colors[i-1],
                s=144, edgecolors='black')
    # drop NA because there is no idx 32 in the final model
    plt.plot(mf.X[idx].dropna(), lm_final.fittedvalues[idx].dropna(),
            ls=lstyle[j], color=colors[i-1])
plt.xlabel('Experience');
plt.ylabel('Salary');
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-397-25e9e43f896a> in <module>()
----> 1 lm_final = ols('S ~ X + C(E)*C(M)', data = salary_table.drop([drop_idx])).fit()
      2 mf = lm_final.model.data.orig_exog
      3 lstyle = ['-','--']
      4 
      5 plt.figure(figsize=(6,6))

NameError: name 'salary_table' is not defined

From our first look at the data, the difference between Master's and PhD in the management group is different than in the non-management group. This is an interaction between the two qualitative variables management,M and education,E. We can visualize this by first removing the effect of experience, then plotting the means within each of the 6 groups using interaction.plot.

In [19]:
U = S - X * interX_lm32.params['X']

plt.figure(figsize=(6,6))
interaction_plot(E, M, U, colors=['red','blue'], markers=['^','D'],
        markersize=10, ax=plt.gca())
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-398-1bfcb96c9336> in <module>()
----> 1 U = S - X * interX_lm32.params['X']
      2 
      3 plt.figure(figsize=(6,6))
      4 interaction_plot(E, M, U, colors=['red','blue'], markers=['^','D'],
      5         markersize=10, ax=plt.gca())

NameError: name 'S' is not defined

Minority Employment Data

In [20]:
try:
    minority_table = pd.read_table('minority.table')
except:  # don't have data already
    url = 'http://stats191.stanford.edu/data/minority.table'
    minority_table = pd.read_table(url)

factor_group = minority_table.groupby(['ETHN'])

fig, ax = plt.subplots(figsize=(6,6))
colors = ['purple', 'green']
markers = ['o', 'v']
for factor, group in factor_group:
    ax.scatter(group['TEST'], group['JPERF'], color=colors[factor],
                marker=markers[factor], s=12**2)
ax.set_xlabel('TEST');
ax.set_ylabel('JPERF');
---------------------------------------------------------------------------
URLError                                  Traceback (most recent call last)
<ipython-input-399-83c26c0c5424> in <module>()
      3 except:  # don't have data already
      4     url = 'http://stats191.stanford.edu/data/minority.table'
----> 5     minority_table = pd.read_table(url)
      6 
      7 factor_group = minority_table.groupby(['ETHN'])

/usr/lib/python2.7/dist-packages/pandas/io/parsers.pyc in parser_f(filepath_or_buffer, sep, dialect, compression, doublequote, escapechar, quotechar, quoting, skipinitialspace, lineterminator, header, index_col, names, prefix, skiprows, skipfooter, skip_footer, na_values, true_values, false_values, delimiter, converters, dtype, usecols, engine, delim_whitespace, as_recarray, na_filter, compact_ints, use_unsigned, low_memory, buffer_lines, warn_bad_lines, error_bad_lines, keep_default_na, thousands, comment, decimal, parse_dates, keep_date_col, dayfirst, date_parser, memory_map, float_precision, nrows, iterator, chunksize, verbose, encoding, squeeze, mangle_dupe_cols, tupleize_cols, infer_datetime_format, skip_blank_lines)
    496                     skip_blank_lines=skip_blank_lines)
    497 
--> 498         return _read(filepath_or_buffer, kwds)
    499 
    500     parser_f.__name__ = name

/usr/lib/python2.7/dist-packages/pandas/io/parsers.pyc in _read(filepath_or_buffer, kwds)
    260     filepath_or_buffer, _, compression = get_filepath_or_buffer(filepath_or_buffer,
    261                                                                 encoding,
--> 262                                                                 compression=kwds.get('compression', None))
    263     kwds['compression'] = inferred_compression if compression == 'infer' else compression
    264 

/usr/lib/python2.7/dist-packages/pandas/io/common.pyc in get_filepath_or_buffer(filepath_or_buffer, encoding, compression)
    256 
    257     if _is_url(filepath_or_buffer):
--> 258         req = _urlopen(str(filepath_or_buffer))
    259         if compression == 'infer':
    260             content_encoding = req.headers.get('Content-Encoding', None)

/usr/lib/python2.7/urllib2.pyc in urlopen(url, data, timeout, cafile, capath, cadefault, context)
    152     else:
    153         opener = _opener
--> 154     return opener.open(url, data, timeout)
    155 
    156 def install_opener(opener):

/usr/lib/python2.7/urllib2.pyc in open(self, fullurl, data, timeout)
    427             req = meth(req)
    428 
--> 429         response = self._open(req, data)
    430 
    431         # post-process response

/usr/lib/python2.7/urllib2.pyc in _open(self, req, data)
    445         protocol = req.get_type()
    446         result = self._call_chain(self.handle_open, protocol, protocol +
--> 447                                   '_open', req)
    448         if result:
    449             return result

/usr/lib/python2.7/urllib2.pyc in _call_chain(self, chain, kind, meth_name, *args)
    405             func = getattr(handler, meth_name)
    406 
--> 407             result = func(*args)
    408             if result is not None:
    409                 return result

/usr/lib/python2.7/urllib2.pyc in http_open(self, req)
   1226 
   1227     def http_open(self, req):
-> 1228         return self.do_open(httplib.HTTPConnection, req)
   1229 
   1230     http_request = AbstractHTTPHandler.do_request_

/usr/lib/python2.7/urllib2.pyc in do_open(self, http_class, req, **http_conn_args)
   1196         except socket.error, err: # XXX what error?
   1197             h.close()
-> 1198             raise URLError(err)
   1199         else:
   1200             try:

URLError: <urlopen error [Errno -2] Name or service not known>
In [21]:
min_lm = ols('JPERF ~ TEST', data=minority_table).fit()
print(min_lm.summary())
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-400-9d22ebbe63c2> in <module>()
----> 1 min_lm = ols('JPERF ~ TEST', data=minority_table).fit()
      2 print(min_lm.summary())

NameError: name 'minority_table' is not defined
In [22]:
fig, ax = plt.subplots(figsize=(6,6));
for factor, group in factor_group:
    ax.scatter(group['TEST'], group['JPERF'], color=colors[factor],
                marker=markers[factor], s=12**2)

ax.set_xlabel('TEST')
ax.set_ylabel('JPERF')
fig = abline_plot(model_results = min_lm, ax=ax)
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-401-d9e4c8602668> in <module>()
      1 fig, ax = plt.subplots(figsize=(6,6));
----> 2 for factor, group in factor_group:
      3     ax.scatter(group['TEST'], group['JPERF'], color=colors[factor],
      4                 marker=markers[factor], s=12**2)
      5 

NameError: name 'factor_group' is not defined
Error in callback <function post_execute at 0x7ff5d7b48140> (for post_execute):

---------------------------------------------------------------------------
RuntimeError                              Traceback (most recent call last)
/usr/lib/python2.7/dist-packages/matplotlib/pyplot.pyc in post_execute()
    145             def post_execute():
    146                 if matplotlib.is_interactive():
--> 147                     draw_all()
    148 
    149             # IPython >= 2

/usr/lib/python2.7/dist-packages/matplotlib/_pylab_helpers.pyc in draw_all(cls, force)
    148         for f_mgr in cls.get_all_fig_managers():
    149             if force or f_mgr.canvas.figure.stale:
--> 150                 f_mgr.canvas.draw_idle()
    151 
    152 atexit.register(Gcf.destroy_all)

/usr/lib/python2.7/dist-packages/matplotlib/backend_bases.pyc in draw_idle(self, *args, **kwargs)
   2024         if not self._is_idle_drawing:
   2025             with self._idle_draw_cntx():
-> 2026                 self.draw(*args, **kwargs)
   2027 
   2028     def draw_cursor(self, event):

/usr/lib/python2.7/dist-packages/matplotlib/backends/backend_agg.pyc in draw(self)
    472 
    473         try:
--> 474             self.figure.draw(self.renderer)
    475         finally:
    476             RendererAgg.lock.release()

/usr/lib/python2.7/dist-packages/matplotlib/artist.pyc in draw_wrapper(artist, renderer, *args, **kwargs)
     59     def draw_wrapper(artist, renderer, *args, **kwargs):
     60         before(artist, renderer)
---> 61         draw(artist, renderer, *args, **kwargs)
     62         after(artist, renderer)
     63 

/usr/lib/python2.7/dist-packages/matplotlib/figure.pyc in draw(self, renderer)
   1157         dsu.sort(key=itemgetter(0))
   1158         for zorder, a, func, args in dsu:
-> 1159             func(*args)
   1160 
   1161         renderer.close_group('figure')

/usr/lib/python2.7/dist-packages/matplotlib/artist.pyc in draw_wrapper(artist, renderer, *args, **kwargs)
     59     def draw_wrapper(artist, renderer, *args, **kwargs):
     60         before(artist, renderer)
---> 61         draw(artist, renderer, *args, **kwargs)
     62         after(artist, renderer)
     63 

/usr/lib/python2.7/dist-packages/matplotlib/axes/_base.pyc in draw(self, renderer, inframe)
   2322 
   2323         for zorder, a in dsu:
-> 2324             a.draw(renderer)
   2325 
   2326         renderer.close_group('axes')

/usr/lib/python2.7/dist-packages/matplotlib/artist.pyc in draw_wrapper(artist, renderer, *args, **kwargs)
     59     def draw_wrapper(artist, renderer, *args, **kwargs):
     60         before(artist, renderer)
---> 61         draw(artist, renderer, *args, **kwargs)
     62         after(artist, renderer)
     63 

/usr/lib/python2.7/dist-packages/matplotlib/axis.pyc in draw(self, renderer, *args, **kwargs)
   1106         ticks_to_draw = self._update_ticks(renderer)
   1107         ticklabelBoxes, ticklabelBoxes2 = self._get_tick_bboxes(ticks_to_draw,
-> 1108                                                                 renderer)
   1109 
   1110         for tick in ticks_to_draw:

/usr/lib/python2.7/dist-packages/matplotlib/axis.pyc in _get_tick_bboxes(self, ticks, renderer)
   1056         for tick in ticks:
   1057             if tick.label1On and tick.label1.get_visible():
-> 1058                 extent = tick.label1.get_window_extent(renderer)
   1059                 ticklabelBoxes.append(extent)
   1060             if tick.label2On and tick.label2.get_visible():

/usr/lib/python2.7/dist-packages/matplotlib/text.pyc in get_window_extent(self, renderer, dpi)
    959             raise RuntimeError('Cannot get window extent w/o renderer')
    960 
--> 961         bbox, info, descent = self._get_layout(self._renderer)
    962         x, y = self.get_unitless_position()
    963         x, y = self.get_transform().transform_point((x, y))

/usr/lib/python2.7/dist-packages/matplotlib/text.pyc in _get_layout(self, renderer)
    350         tmp, lp_h, lp_bl = renderer.get_text_width_height_descent('lp',
    351                                                          self._fontproperties,
--> 352                                                          ismath=False)
    353         offsety = (lp_h - lp_bl) * self._linespacing
    354 

/usr/lib/python2.7/dist-packages/matplotlib/backends/backend_agg.pyc in get_text_width_height_descent(self, s, prop, ismath)
    227             fontsize = prop.get_size_in_points()
    228             w, h, d = texmanager.get_text_width_height_descent(s, fontsize,
--> 229                                                                renderer=self)
    230             return w, h, d
    231 

/usr/lib/python2.7/dist-packages/matplotlib/texmanager.pyc in get_text_width_height_descent(self, tex, fontsize, renderer)
    673         else:
    674             # use dviread. It sometimes returns a wrong descent.
--> 675             dvifile = self.make_dvi(tex, fontsize)
    676             dvi = dviread.Dvi(dvifile, 72 * dpi_fraction)
    677             try:

/usr/lib/python2.7/dist-packages/matplotlib/texmanager.pyc in make_dvi(self, tex, fontsize)
    420                      'string:\n%s\nHere is the full report generated by '
    421                      'LaTeX: \n\n' % repr(tex.encode('unicode_escape')) +
--> 422                      report))
    423             else:
    424                 mpl.verbose.report(report, 'debug')

RuntimeError: LaTeX was not able to process the following string:
'lp'
Here is the full report generated by LaTeX:

<matplotlib.figure.Figure at 0x7ff5c915fd10>
In [23]:
min_lm2 = ols('JPERF ~ TEST + TEST:ETHN',
        data=minority_table).fit()

print(min_lm2.summary())
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-402-f6a4c64c17ca> in <module>()
      1 min_lm2 = ols('JPERF ~ TEST + TEST:ETHN',
----> 2         data=minority_table).fit()
      3 
      4 print(min_lm2.summary())

NameError: name 'minority_table' is not defined
In [24]:
fig, ax = plt.subplots(figsize=(6,6));
for factor, group in factor_group:
    ax.scatter(group['TEST'], group['JPERF'], color=colors[factor],
                marker=markers[factor], s=12**2)

fig = abline_plot(intercept = min_lm2.params['Intercept'],
                 slope = min_lm2.params['TEST'], ax=ax, color='purple');
fig = abline_plot(intercept = min_lm2.params['Intercept'],
        slope = min_lm2.params['TEST'] + min_lm2.params['TEST:ETHN'],
        ax=ax, color='green');
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-403-f8432b7c2f24> in <module>()
      1 fig, ax = plt.subplots(figsize=(6,6));
----> 2 for factor, group in factor_group:
      3     ax.scatter(group['TEST'], group['JPERF'], color=colors[factor],
      4                 marker=markers[factor], s=12**2)
      5 

NameError: name 'factor_group' is not defined
Error in callback <function post_execute at 0x7ff5d7b48140> (for post_execute):

---------------------------------------------------------------------------
RuntimeError                              Traceback (most recent call last)
/usr/lib/python2.7/dist-packages/matplotlib/pyplot.pyc in post_execute()
    145             def post_execute():
    146                 if matplotlib.is_interactive():
--> 147                     draw_all()
    148 
    149             # IPython >= 2

/usr/lib/python2.7/dist-packages/matplotlib/_pylab_helpers.pyc in draw_all(cls, force)
    148         for f_mgr in cls.get_all_fig_managers():
    149             if force or f_mgr.canvas.figure.stale:
--> 150                 f_mgr.canvas.draw_idle()
    151 
    152 atexit.register(Gcf.destroy_all)

/usr/lib/python2.7/dist-packages/matplotlib/backend_bases.pyc in draw_idle(self, *args, **kwargs)
   2024         if not self._is_idle_drawing:
   2025             with self._idle_draw_cntx():
-> 2026                 self.draw(*args, **kwargs)
   2027 
   2028     def draw_cursor(self, event):

/usr/lib/python2.7/dist-packages/matplotlib/backends/backend_agg.pyc in draw(self)
    472 
    473         try:
--> 474             self.figure.draw(self.renderer)
    475         finally:
    476             RendererAgg.lock.release()

/usr/lib/python2.7/dist-packages/matplotlib/artist.pyc in draw_wrapper(artist, renderer, *args, **kwargs)
     59     def draw_wrapper(artist, renderer, *args, **kwargs):
     60         before(artist, renderer)
---> 61         draw(artist, renderer, *args, **kwargs)
     62         after(artist, renderer)
     63 

/usr/lib/python2.7/dist-packages/matplotlib/figure.pyc in draw(self, renderer)
   1157         dsu.sort(key=itemgetter(0))
   1158         for zorder, a, func, args in dsu:
-> 1159             func(*args)
   1160 
   1161         renderer.close_group('figure')

/usr/lib/python2.7/dist-packages/matplotlib/artist.pyc in draw_wrapper(artist, renderer, *args, **kwargs)
     59     def draw_wrapper(artist, renderer, *args, **kwargs):
     60         before(artist, renderer)
---> 61         draw(artist, renderer, *args, **kwargs)
     62         after(artist, renderer)
     63 

/usr/lib/python2.7/dist-packages/matplotlib/axes/_base.pyc in draw(self, renderer, inframe)
   2322 
   2323         for zorder, a in dsu:
-> 2324             a.draw(renderer)
   2325 
   2326         renderer.close_group('axes')

/usr/lib/python2.7/dist-packages/matplotlib/artist.pyc in draw_wrapper(artist, renderer, *args, **kwargs)
     59     def draw_wrapper(artist, renderer, *args, **kwargs):
     60         before(artist, renderer)
---> 61         draw(artist, renderer, *args, **kwargs)
     62         after(artist, renderer)
     63 

/usr/lib/python2.7/dist-packages/matplotlib/axis.pyc in draw(self, renderer, *args, **kwargs)
   1106         ticks_to_draw = self._update_ticks(renderer)
   1107         ticklabelBoxes, ticklabelBoxes2 = self._get_tick_bboxes(ticks_to_draw,
-> 1108                                                                 renderer)
   1109 
   1110         for tick in ticks_to_draw:

/usr/lib/python2.7/dist-packages/matplotlib/axis.pyc in _get_tick_bboxes(self, ticks, renderer)
   1056         for tick in ticks:
   1057             if tick.label1On and tick.label1.get_visible():
-> 1058                 extent = tick.label1.get_window_extent(renderer)
   1059                 ticklabelBoxes.append(extent)
   1060             if tick.label2On and tick.label2.get_visible():

/usr/lib/python2.7/dist-packages/matplotlib/text.pyc in get_window_extent(self, renderer, dpi)
    959             raise RuntimeError('Cannot get window extent w/o renderer')
    960 
--> 961         bbox, info, descent = self._get_layout(self._renderer)
    962         x, y = self.get_unitless_position()
    963         x, y = self.get_transform().transform_point((x, y))

/usr/lib/python2.7/dist-packages/matplotlib/text.pyc in _get_layout(self, renderer)
    350         tmp, lp_h, lp_bl = renderer.get_text_width_height_descent('lp',
    351                                                          self._fontproperties,
--> 352                                                          ismath=False)
    353         offsety = (lp_h - lp_bl) * self._linespacing
    354 

/usr/lib/python2.7/dist-packages/matplotlib/backends/backend_agg.pyc in get_text_width_height_descent(self, s, prop, ismath)
    227             fontsize = prop.get_size_in_points()
    228             w, h, d = texmanager.get_text_width_height_descent(s, fontsize,
--> 229                                                                renderer=self)
    230             return w, h, d
    231 

/usr/lib/python2.7/dist-packages/matplotlib/texmanager.pyc in get_text_width_height_descent(self, tex, fontsize, renderer)
    673         else:
    674             # use dviread. It sometimes returns a wrong descent.
--> 675             dvifile = self.make_dvi(tex, fontsize)
    676             dvi = dviread.Dvi(dvifile, 72 * dpi_fraction)
    677             try:

/usr/lib/python2.7/dist-packages/matplotlib/texmanager.pyc in make_dvi(self, tex, fontsize)
    420                      'string:\n%s\nHere is the full report generated by '
    421                      'LaTeX: \n\n' % repr(tex.encode('unicode_escape')) +
--> 422                      report))
    423             else:
    424                 mpl.verbose.report(report, 'debug')

RuntimeError: LaTeX was not able to process the following string:
'lp'
Here is the full report generated by LaTeX:

<matplotlib.figure.Figure at 0x7ff5c90a11d0>
In [25]:
min_lm3 = ols('JPERF ~ TEST + ETHN', data = minority_table).fit()
print(min_lm3.summary())
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-404-5f2d6b39420c> in <module>()
----> 1 min_lm3 = ols('JPERF ~ TEST + ETHN', data = minority_table).fit()
      2 print(min_lm3.summary())

NameError: name 'minority_table' is not defined
In [26]:
fig, ax = plt.subplots(figsize=(6,6));
for factor, group in factor_group:
    ax.scatter(group['TEST'], group['JPERF'], color=colors[factor],
                marker=markers[factor], s=12**2)

fig = abline_plot(intercept = min_lm3.params['Intercept'],
                 slope = min_lm3.params['TEST'], ax=ax, color='purple');
fig = abline_plot(intercept = min_lm3.params['Intercept'] + min_lm3.params['ETHN'],
        slope = min_lm3.params['TEST'], ax=ax, color='green');
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-405-3e445d581db3> in <module>()
      1 fig, ax = plt.subplots(figsize=(6,6));
----> 2 for factor, group in factor_group:
      3     ax.scatter(group['TEST'], group['JPERF'], color=colors[factor],
      4                 marker=markers[factor], s=12**2)
      5 

NameError: name 'factor_group' is not defined
Error in callback <function post_execute at 0x7ff5d7b48140> (for post_execute):

---------------------------------------------------------------------------
RuntimeError                              Traceback (most recent call last)
/usr/lib/python2.7/dist-packages/matplotlib/pyplot.pyc in post_execute()
    145             def post_execute():
    146                 if matplotlib.is_interactive():
--> 147                     draw_all()
    148 
    149             # IPython >= 2

/usr/lib/python2.7/dist-packages/matplotlib/_pylab_helpers.pyc in draw_all(cls, force)
    148         for f_mgr in cls.get_all_fig_managers():
    149             if force or f_mgr.canvas.figure.stale:
--> 150                 f_mgr.canvas.draw_idle()
    151 
    152 atexit.register(Gcf.destroy_all)

/usr/lib/python2.7/dist-packages/matplotlib/backend_bases.pyc in draw_idle(self, *args, **kwargs)
   2024         if not self._is_idle_drawing:
   2025             with self._idle_draw_cntx():
-> 2026                 self.draw(*args, **kwargs)
   2027 
   2028     def draw_cursor(self, event):

/usr/lib/python2.7/dist-packages/matplotlib/backends/backend_agg.pyc in draw(self)
    472 
    473         try:
--> 474             self.figure.draw(self.renderer)
    475         finally:
    476             RendererAgg.lock.release()

/usr/lib/python2.7/dist-packages/matplotlib/artist.pyc in draw_wrapper(artist, renderer, *args, **kwargs)
     59     def draw_wrapper(artist, renderer, *args, **kwargs):
     60         before(artist, renderer)
---> 61         draw(artist, renderer, *args, **kwargs)
     62         after(artist, renderer)
     63 

/usr/lib/python2.7/dist-packages/matplotlib/figure.pyc in draw(self, renderer)
   1157         dsu.sort(key=itemgetter(0))
   1158         for zorder, a, func, args in dsu:
-> 1159             func(*args)
   1160 
   1161         renderer.close_group('figure')

/usr/lib/python2.7/dist-packages/matplotlib/artist.pyc in draw_wrapper(artist, renderer, *args, **kwargs)
     59     def draw_wrapper(artist, renderer, *args, **kwargs):
     60         before(artist, renderer)
---> 61         draw(artist, renderer, *args, **kwargs)
     62         after(artist, renderer)
     63 

/usr/lib/python2.7/dist-packages/matplotlib/axes/_base.pyc in draw(self, renderer, inframe)
   2322 
   2323         for zorder, a in dsu:
-> 2324             a.draw(renderer)
   2325 
   2326         renderer.close_group('axes')

/usr/lib/python2.7/dist-packages/matplotlib/artist.pyc in draw_wrapper(artist, renderer, *args, **kwargs)
     59     def draw_wrapper(artist, renderer, *args, **kwargs):
     60         before(artist, renderer)
---> 61         draw(artist, renderer, *args, **kwargs)
     62         after(artist, renderer)
     63 

/usr/lib/python2.7/dist-packages/matplotlib/axis.pyc in draw(self, renderer, *args, **kwargs)
   1106         ticks_to_draw = self._update_ticks(renderer)
   1107         ticklabelBoxes, ticklabelBoxes2 = self._get_tick_bboxes(ticks_to_draw,
-> 1108                                                                 renderer)
   1109 
   1110         for tick in ticks_to_draw:

/usr/lib/python2.7/dist-packages/matplotlib/axis.pyc in _get_tick_bboxes(self, ticks, renderer)
   1056         for tick in ticks:
   1057             if tick.label1On and tick.label1.get_visible():
-> 1058                 extent = tick.label1.get_window_extent(renderer)
   1059                 ticklabelBoxes.append(extent)
   1060             if tick.label2On and tick.label2.get_visible():

/usr/lib/python2.7/dist-packages/matplotlib/text.pyc in get_window_extent(self, renderer, dpi)
    959             raise RuntimeError('Cannot get window extent w/o renderer')
    960 
--> 961         bbox, info, descent = self._get_layout(self._renderer)
    962         x, y = self.get_unitless_position()
    963         x, y = self.get_transform().transform_point((x, y))

/usr/lib/python2.7/dist-packages/matplotlib/text.pyc in _get_layout(self, renderer)
    350         tmp, lp_h, lp_bl = renderer.get_text_width_height_descent('lp',
    351                                                          self._fontproperties,
--> 352                                                          ismath=False)
    353         offsety = (lp_h - lp_bl) * self._linespacing
    354 

/usr/lib/python2.7/dist-packages/matplotlib/backends/backend_agg.pyc in get_text_width_height_descent(self, s, prop, ismath)
    227             fontsize = prop.get_size_in_points()
    228             w, h, d = texmanager.get_text_width_height_descent(s, fontsize,
--> 229                                                                renderer=self)
    230             return w, h, d
    231 

/usr/lib/python2.7/dist-packages/matplotlib/texmanager.pyc in get_text_width_height_descent(self, tex, fontsize, renderer)
    673         else:
    674             # use dviread. It sometimes returns a wrong descent.
--> 675             dvifile = self.make_dvi(tex, fontsize)
    676             dvi = dviread.Dvi(dvifile, 72 * dpi_fraction)
    677             try:

/usr/lib/python2.7/dist-packages/matplotlib/texmanager.pyc in make_dvi(self, tex, fontsize)
    420                      'string:\n%s\nHere is the full report generated by '
    421                      'LaTeX: \n\n' % repr(tex.encode('unicode_escape')) +
--> 422                      report))
    423             else:
    424                 mpl.verbose.report(report, 'debug')

RuntimeError: LaTeX was not able to process the following string:
'lp'
Here is the full report generated by LaTeX:

<matplotlib.figure.Figure at 0x7ff5c8bc6490>
In [27]:
min_lm4 = ols('JPERF ~ TEST * ETHN', data = minority_table).fit()
print(min_lm4.summary())
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-406-45bd21e99036> in <module>()
----> 1 min_lm4 = ols('JPERF ~ TEST * ETHN', data = minority_table).fit()
      2 print(min_lm4.summary())

NameError: name 'minority_table' is not defined
In [28]:
fig, ax = plt.subplots(figsize=(8,6));
for factor, group in factor_group:
    ax.scatter(group['TEST'], group['JPERF'], color=colors[factor],
                marker=markers[factor], s=12**2)

fig = abline_plot(intercept = min_lm4.params['Intercept'],
                 slope = min_lm4.params['TEST'], ax=ax, color='purple');
fig = abline_plot(intercept = min_lm4.params['Intercept'] + min_lm4.params['ETHN'],
        slope = min_lm4.params['TEST'] + min_lm4.params['TEST:ETHN'],
        ax=ax, color='green');
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-407-2572d5e3e956> in <module>()
      1 fig, ax = plt.subplots(figsize=(8,6));
----> 2 for factor, group in factor_group:
      3     ax.scatter(group['TEST'], group['JPERF'], color=colors[factor],
      4                 marker=markers[factor], s=12**2)
      5 

NameError: name 'factor_group' is not defined
Error in callback <function post_execute at 0x7ff5d7b48140> (for post_execute):

---------------------------------------------------------------------------
RuntimeError                              Traceback (most recent call last)
/usr/lib/python2.7/dist-packages/matplotlib/pyplot.pyc in post_execute()
    145             def post_execute():
    146                 if matplotlib.is_interactive():
--> 147                     draw_all()
    148 
    149             # IPython >= 2

/usr/lib/python2.7/dist-packages/matplotlib/_pylab_helpers.pyc in draw_all(cls, force)
    148         for f_mgr in cls.get_all_fig_managers():
    149             if force or f_mgr.canvas.figure.stale:
--> 150                 f_mgr.canvas.draw_idle()
    151 
    152 atexit.register(Gcf.destroy_all)

/usr/lib/python2.7/dist-packages/matplotlib/backend_bases.pyc in draw_idle(self, *args, **kwargs)
   2024         if not self._is_idle_drawing:
   2025             with self._idle_draw_cntx():
-> 2026                 self.draw(*args, **kwargs)
   2027 
   2028     def draw_cursor(self, event):

/usr/lib/python2.7/dist-packages/matplotlib/backends/backend_agg.pyc in draw(self)
    472 
    473         try:
--> 474             self.figure.draw(self.renderer)
    475         finally:
    476             RendererAgg.lock.release()

/usr/lib/python2.7/dist-packages/matplotlib/artist.pyc in draw_wrapper(artist, renderer, *args, **kwargs)
     59     def draw_wrapper(artist, renderer, *args, **kwargs):
     60         before(artist, renderer)
---> 61         draw(artist, renderer, *args, **kwargs)
     62         after(artist, renderer)
     63 

/usr/lib/python2.7/dist-packages/matplotlib/figure.pyc in draw(self, renderer)
   1157         dsu.sort(key=itemgetter(0))
   1158         for zorder, a, func, args in dsu:
-> 1159             func(*args)
   1160 
   1161         renderer.close_group('figure')

/usr/lib/python2.7/dist-packages/matplotlib/artist.pyc in draw_wrapper(artist, renderer, *args, **kwargs)
     59     def draw_wrapper(artist, renderer, *args, **kwargs):
     60         before(artist, renderer)
---> 61         draw(artist, renderer, *args, **kwargs)
     62         after(artist, renderer)
     63 

/usr/lib/python2.7/dist-packages/matplotlib/axes/_base.pyc in draw(self, renderer, inframe)
   2322 
   2323         for zorder, a in dsu:
-> 2324             a.draw(renderer)
   2325 
   2326         renderer.close_group('axes')

/usr/lib/python2.7/dist-packages/matplotlib/artist.pyc in draw_wrapper(artist, renderer, *args, **kwargs)
     59     def draw_wrapper(artist, renderer, *args, **kwargs):
     60         before(artist, renderer)
---> 61         draw(artist, renderer, *args, **kwargs)
     62         after(artist, renderer)
     63 

/usr/lib/python2.7/dist-packages/matplotlib/axis.pyc in draw(self, renderer, *args, **kwargs)
   1106         ticks_to_draw = self._update_ticks(renderer)
   1107         ticklabelBoxes, ticklabelBoxes2 = self._get_tick_bboxes(ticks_to_draw,
-> 1108                                                                 renderer)
   1109 
   1110         for tick in ticks_to_draw:

/usr/lib/python2.7/dist-packages/matplotlib/axis.pyc in _get_tick_bboxes(self, ticks, renderer)
   1056         for tick in ticks:
   1057             if tick.label1On and tick.label1.get_visible():
-> 1058                 extent = tick.label1.get_window_extent(renderer)
   1059                 ticklabelBoxes.append(extent)
   1060             if tick.label2On and tick.label2.get_visible():

/usr/lib/python2.7/dist-packages/matplotlib/text.pyc in get_window_extent(self, renderer, dpi)
    959             raise RuntimeError('Cannot get window extent w/o renderer')
    960 
--> 961         bbox, info, descent = self._get_layout(self._renderer)
    962         x, y = self.get_unitless_position()
    963         x, y = self.get_transform().transform_point((x, y))

/usr/lib/python2.7/dist-packages/matplotlib/text.pyc in _get_layout(self, renderer)
    350         tmp, lp_h, lp_bl = renderer.get_text_width_height_descent('lp',
    351                                                          self._fontproperties,
--> 352                                                          ismath=False)
    353         offsety = (lp_h - lp_bl) * self._linespacing
    354 

/usr/lib/python2.7/dist-packages/matplotlib/backends/backend_agg.pyc in get_text_width_height_descent(self, s, prop, ismath)
    227             fontsize = prop.get_size_in_points()
    228             w, h, d = texmanager.get_text_width_height_descent(s, fontsize,
--> 229                                                                renderer=self)
    230             return w, h, d
    231 

/usr/lib/python2.7/dist-packages/matplotlib/texmanager.pyc in get_text_width_height_descent(self, tex, fontsize, renderer)
    673         else:
    674             # use dviread. It sometimes returns a wrong descent.
--> 675             dvifile = self.make_dvi(tex, fontsize)
    676             dvi = dviread.Dvi(dvifile, 72 * dpi_fraction)
    677             try:

/usr/lib/python2.7/dist-packages/matplotlib/texmanager.pyc in make_dvi(self, tex, fontsize)
    420                      'string:\n%s\nHere is the full report generated by '
    421                      'LaTeX: \n\n' % repr(tex.encode('unicode_escape')) +
--> 422                      report))
    423             else:
    424                 mpl.verbose.report(report, 'debug')

RuntimeError: LaTeX was not able to process the following string:
'lp'
Here is the full report generated by LaTeX:

<matplotlib.figure.Figure at 0x7ff5c90a1690>
In [29]:
# is there any effect of ETHN on slope or intercept?
table5 = anova_lm(min_lm, min_lm4)
print(table5)
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-408-129d94cb4fbc> in <module>()
      1 # is there any effect of ETHN on slope or intercept?
----> 2 table5 = anova_lm(min_lm, min_lm4)
      3 print(table5)

NameError: name 'min_lm' is not defined
In [30]:
# is there any effect of ETHN on intercept
table6 = anova_lm(min_lm, min_lm3)
print(table6)
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-409-c31cb02af7ce> in <module>()
      1 # is there any effect of ETHN on intercept
----> 2 table6 = anova_lm(min_lm, min_lm3)
      3 print(table6)

NameError: name 'min_lm' is not defined
In [31]:
# is there any effect of ETHN on slope
table7 = anova_lm(min_lm, min_lm2)
print(table7)
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-410-8848959d255f> in <module>()
      1 # is there any effect of ETHN on slope
----> 2 table7 = anova_lm(min_lm, min_lm2)
      3 print(table7)

NameError: name 'min_lm' is not defined
In [32]:
# is it just the slope or both?
table8 = anova_lm(min_lm2, min_lm4)
print(table8)
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-411-b62ed8a937ec> in <module>()
      1 # is it just the slope or both?
----> 2 table8 = anova_lm(min_lm2, min_lm4)
      3 print(table8)

NameError: name 'min_lm2' is not defined

One-way ANOVA

In [33]:
try:
    rehab_table = pd.read_csv('rehab.table')
except:
    url = 'http://stats191.stanford.edu/data/rehab.csv'
    rehab_table = pd.read_table(url, delimiter=",")
    rehab_table.to_csv('rehab.table')

fig, ax = plt.subplots(figsize=(8,6))
fig = rehab_table.boxplot('Time', 'Fitness', ax=ax, grid=False)
---------------------------------------------------------------------------
URLError                                  Traceback (most recent call last)
<ipython-input-412-f592f2c5b222> in <module>()
      3 except:
      4     url = 'http://stats191.stanford.edu/data/rehab.csv'
----> 5     rehab_table = pd.read_table(url, delimiter=",")
      6     rehab_table.to_csv('rehab.table')
      7 

/usr/lib/python2.7/dist-packages/pandas/io/parsers.pyc in parser_f(filepath_or_buffer, sep, dialect, compression, doublequote, escapechar, quotechar, quoting, skipinitialspace, lineterminator, header, index_col, names, prefix, skiprows, skipfooter, skip_footer, na_values, true_values, false_values, delimiter, converters, dtype, usecols, engine, delim_whitespace, as_recarray, na_filter, compact_ints, use_unsigned, low_memory, buffer_lines, warn_bad_lines, error_bad_lines, keep_default_na, thousands, comment, decimal, parse_dates, keep_date_col, dayfirst, date_parser, memory_map, float_precision, nrows, iterator, chunksize, verbose, encoding, squeeze, mangle_dupe_cols, tupleize_cols, infer_datetime_format, skip_blank_lines)
    496                     skip_blank_lines=skip_blank_lines)
    497 
--> 498         return _read(filepath_or_buffer, kwds)
    499 
    500     parser_f.__name__ = name

/usr/lib/python2.7/dist-packages/pandas/io/parsers.pyc in _read(filepath_or_buffer, kwds)
    260     filepath_or_buffer, _, compression = get_filepath_or_buffer(filepath_or_buffer,
    261                                                                 encoding,
--> 262                                                                 compression=kwds.get('compression', None))
    263     kwds['compression'] = inferred_compression if compression == 'infer' else compression
    264 

/usr/lib/python2.7/dist-packages/pandas/io/common.pyc in get_filepath_or_buffer(filepath_or_buffer, encoding, compression)
    256 
    257     if _is_url(filepath_or_buffer):
--> 258         req = _urlopen(str(filepath_or_buffer))
    259         if compression == 'infer':
    260             content_encoding = req.headers.get('Content-Encoding', None)

/usr/lib/python2.7/urllib2.pyc in urlopen(url, data, timeout, cafile, capath, cadefault, context)
    152     else:
    153         opener = _opener
--> 154     return opener.open(url, data, timeout)
    155 
    156 def install_opener(opener):

/usr/lib/python2.7/urllib2.pyc in open(self, fullurl, data, timeout)
    427             req = meth(req)
    428 
--> 429         response = self._open(req, data)
    430 
    431         # post-process response

/usr/lib/python2.7/urllib2.pyc in _open(self, req, data)
    445         protocol = req.get_type()
    446         result = self._call_chain(self.handle_open, protocol, protocol +
--> 447                                   '_open', req)
    448         if result:
    449             return result

/usr/lib/python2.7/urllib2.pyc in _call_chain(self, chain, kind, meth_name, *args)
    405             func = getattr(handler, meth_name)
    406 
--> 407             result = func(*args)
    408             if result is not None:
    409                 return result

/usr/lib/python2.7/urllib2.pyc in http_open(self, req)
   1226 
   1227     def http_open(self, req):
-> 1228         return self.do_open(httplib.HTTPConnection, req)
   1229 
   1230     http_request = AbstractHTTPHandler.do_request_

/usr/lib/python2.7/urllib2.pyc in do_open(self, http_class, req, **http_conn_args)
   1196         except socket.error, err: # XXX what error?
   1197             h.close()
-> 1198             raise URLError(err)
   1199         else:
   1200             try:

URLError: <urlopen error [Errno -2] Name or service not known>
In [34]:
rehab_lm = ols('Time ~ C(Fitness)', data=rehab_table).fit()
table9 = anova_lm(rehab_lm)
print(table9)

print(rehab_lm.model.data.orig_exog)
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-413-ba617da218fc> in <module>()
----> 1 rehab_lm = ols('Time ~ C(Fitness)', data=rehab_table).fit()
      2 table9 = anova_lm(rehab_lm)
      3 print(table9)
      4 
      5 print(rehab_lm.model.data.orig_exog)

NameError: name 'rehab_table' is not defined
In [35]:
print(rehab_lm.summary())
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-414-2b2c1df214c7> in <module>()
----> 1 print(rehab_lm.summary())

NameError: name 'rehab_lm' is not defined

Two-way ANOVA

In [36]:
try:
    kidney_table = pd.read_table('./kidney.table')
except:
    url = 'http://stats191.stanford.edu/data/kidney.table'
    kidney_table = pd.read_table(url, delimiter=" *")
---------------------------------------------------------------------------
URLError                                  Traceback (most recent call last)
<ipython-input-415-b6b45eb9c5b0> in <module>()
      3 except:
      4     url = 'http://stats191.stanford.edu/data/kidney.table'
----> 5     kidney_table = pd.read_table(url, delimiter=" *")

/usr/lib/python2.7/dist-packages/pandas/io/parsers.pyc in parser_f(filepath_or_buffer, sep, dialect, compression, doublequote, escapechar, quotechar, quoting, skipinitialspace, lineterminator, header, index_col, names, prefix, skiprows, skipfooter, skip_footer, na_values, true_values, false_values, delimiter, converters, dtype, usecols, engine, delim_whitespace, as_recarray, na_filter, compact_ints, use_unsigned, low_memory, buffer_lines, warn_bad_lines, error_bad_lines, keep_default_na, thousands, comment, decimal, parse_dates, keep_date_col, dayfirst, date_parser, memory_map, float_precision, nrows, iterator, chunksize, verbose, encoding, squeeze, mangle_dupe_cols, tupleize_cols, infer_datetime_format, skip_blank_lines)
    496                     skip_blank_lines=skip_blank_lines)
    497 
--> 498         return _read(filepath_or_buffer, kwds)
    499 
    500     parser_f.__name__ = name

/usr/lib/python2.7/dist-packages/pandas/io/parsers.pyc in _read(filepath_or_buffer, kwds)
    260     filepath_or_buffer, _, compression = get_filepath_or_buffer(filepath_or_buffer,
    261                                                                 encoding,
--> 262                                                                 compression=kwds.get('compression', None))
    263     kwds['compression'] = inferred_compression if compression == 'infer' else compression
    264 

/usr/lib/python2.7/dist-packages/pandas/io/common.pyc in get_filepath_or_buffer(filepath_or_buffer, encoding, compression)
    256 
    257     if _is_url(filepath_or_buffer):
--> 258         req = _urlopen(str(filepath_or_buffer))
    259         if compression == 'infer':
    260             content_encoding = req.headers.get('Content-Encoding', None)

/usr/lib/python2.7/urllib2.pyc in urlopen(url, data, timeout, cafile, capath, cadefault, context)
    152     else:
    153         opener = _opener
--> 154     return opener.open(url, data, timeout)
    155 
    156 def install_opener(opener):

/usr/lib/python2.7/urllib2.pyc in open(self, fullurl, data, timeout)
    427             req = meth(req)
    428 
--> 429         response = self._open(req, data)
    430 
    431         # post-process response

/usr/lib/python2.7/urllib2.pyc in _open(self, req, data)
    445         protocol = req.get_type()
    446         result = self._call_chain(self.handle_open, protocol, protocol +
--> 447                                   '_open', req)
    448         if result:
    449             return result

/usr/lib/python2.7/urllib2.pyc in _call_chain(self, chain, kind, meth_name, *args)
    405             func = getattr(handler, meth_name)
    406 
--> 407             result = func(*args)
    408             if result is not None:
    409                 return result

/usr/lib/python2.7/urllib2.pyc in http_open(self, req)
   1226 
   1227     def http_open(self, req):
-> 1228         return self.do_open(httplib.HTTPConnection, req)
   1229 
   1230     http_request = AbstractHTTPHandler.do_request_

/usr/lib/python2.7/urllib2.pyc in do_open(self, http_class, req, **http_conn_args)
   1196         except socket.error, err: # XXX what error?
   1197             h.close()
-> 1198             raise URLError(err)
   1199         else:
   1200             try:

URLError: <urlopen error [Errno -2] Name or service not known>

Explore the dataset

In [37]:
kidney_table.groupby(['Weight', 'Duration']).size()
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-416-2e414de92632> in <module>()
----> 1 kidney_table.groupby(['Weight', 'Duration']).size()

NameError: name 'kidney_table' is not defined

Balanced panel

In [38]:
kt = kidney_table
plt.figure(figsize=(8,6))
fig = interaction_plot(kt['Weight'], kt['Duration'], np.log(kt['Days']+1),
        colors=['red', 'blue'], markers=['D','^'], ms=10, ax=plt.gca())
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-417-e6704152aff6> in <module>()
----> 1 kt = kidney_table
      2 plt.figure(figsize=(8,6))
      3 fig = interaction_plot(kt['Weight'], kt['Duration'], np.log(kt['Days']+1),
      4         colors=['red', 'blue'], markers=['D','^'], ms=10, ax=plt.gca())

NameError: name 'kidney_table' is not defined

You have things available in the calling namespace available in the formula evaluation namespace

In [39]:
kidney_lm = ols('np.log(Days+1) ~ C(Duration) * C(Weight)', data=kt).fit()

table10 = anova_lm(kidney_lm)

print(anova_lm(ols('np.log(Days+1) ~ C(Duration) + C(Weight)',
                data=kt).fit(), kidney_lm))
print(anova_lm(ols('np.log(Days+1) ~ C(Duration)', data=kt).fit(),
               ols('np.log(Days+1) ~ C(Duration) + C(Weight, Sum)',
                   data=kt).fit()))
print(anova_lm(ols('np.log(Days+1) ~ C(Weight)', data=kt).fit(),
               ols('np.log(Days+1) ~ C(Duration) + C(Weight, Sum)',
                   data=kt).fit()))
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-418-a2ff4752a520> in <module>()
----> 1 kidney_lm = ols('np.log(Days+1) ~ C(Duration) * C(Weight)', data=kt).fit()
      2 
      3 table10 = anova_lm(kidney_lm)
      4 
      5 print(anova_lm(ols('np.log(Days+1) ~ C(Duration) + C(Weight)',

NameError: name 'kt' is not defined

Sum of squares

Illustrates the use of different types of sums of squares (I,II,II) and how the Sum contrast can be used to produce the same output between the 3.

Types I and II are equivalent under a balanced design.

Don't use Type III with non-orthogonal contrast - ie., Treatment

In [40]:
sum_lm = ols('np.log(Days+1) ~ C(Duration, Sum) * C(Weight, Sum)',
            data=kt).fit()

print(anova_lm(sum_lm))
print(anova_lm(sum_lm, typ=2))
print(anova_lm(sum_lm, typ=3))
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-419-46f87d0de498> in <module>()
      1 sum_lm = ols('np.log(Days+1) ~ C(Duration, Sum) * C(Weight, Sum)',
----> 2             data=kt).fit()
      3 
      4 print(anova_lm(sum_lm))
      5 print(anova_lm(sum_lm, typ=2))

NameError: name 'kt' is not defined
In [41]:
nosum_lm = ols('np.log(Days+1) ~ C(Duration, Treatment) * C(Weight, Treatment)',
            data=kt).fit()
print(anova_lm(nosum_lm))
print(anova_lm(nosum_lm, typ=2))
print(anova_lm(nosum_lm, typ=3))
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-420-f63f83741440> in <module>()
      1 nosum_lm = ols('np.log(Days+1) ~ C(Duration, Treatment) * C(Weight, Treatment)',
----> 2             data=kt).fit()
      3 print(anova_lm(nosum_lm))
      4 print(anova_lm(nosum_lm, typ=2))
      5 print(anova_lm(nosum_lm, typ=3))

NameError: name 'kt' is not defined