Interactions and ANOVA

Note: This script is based heavily on Jonathan Taylor's class notes http://www.stanford.edu/class/stats191/interactions.html

Download and format data:

In [1]:
%matplotlib inline

from __future__ import print_function
from statsmodels.compat import urlopen
import numpy as np
np.set_printoptions(precision=4, suppress=True)
import statsmodels.api as sm
import pandas as pd
pd.set_option("display.width", 100)
import matplotlib.pyplot as plt
from statsmodels.formula.api import ols
from statsmodels.graphics.api import interaction_plot, abline_plot
from statsmodels.stats.anova import anova_lm

try:
    salary_table = pd.read_csv('salary.table')
except:  # recent pandas can read URL without urlopen
    url = 'http://stats191.stanford.edu/data/salary.table'
    fh = urlopen(url)
    salary_table = pd.read_table(fh)
    salary_table.to_csv('salary.table')

E = salary_table.E
M = salary_table.M
X = salary_table.X
S = salary_table.S
/build/statsmodels-GE7Zhw/statsmodels-0.8.0/.pybuild/cpython3_3.6_statsmodels/build/statsmodels/compat/pandas.py:56: FutureWarning: The pandas.core.datetools module is deprecated and will be removed in a future version. Please use the pandas.tseries module instead.
  from pandas.core import datetools
---------------------------------------------------------------------------
FileNotFoundError                         Traceback (most recent call last)
<ipython-input-1-699e2abfd95f> in <module>()
     15 try:
---> 16     salary_table = pd.read_csv('salary.table')
     17 except:  # recent pandas can read URL without urlopen

/usr/lib/python3/dist-packages/pandas/io/parsers.py in parser_f(filepath_or_buffer, sep, delimiter, header, names, index_col, usecols, squeeze, prefix, mangle_dupe_cols, dtype, engine, converters, true_values, false_values, skipinitialspace, skiprows, nrows, na_values, keep_default_na, na_filter, verbose, skip_blank_lines, parse_dates, infer_datetime_format, keep_date_col, date_parser, dayfirst, iterator, chunksize, compression, thousands, decimal, lineterminator, quotechar, quoting, escapechar, comment, encoding, dialect, tupleize_cols, error_bad_lines, warn_bad_lines, skipfooter, skip_footer, doublequote, delim_whitespace, as_recarray, compact_ints, use_unsigned, low_memory, buffer_lines, memory_map, float_precision)
    708 
--> 709         return _read(filepath_or_buffer, kwds)
    710 

/usr/lib/python3/dist-packages/pandas/io/parsers.py in _read(filepath_or_buffer, kwds)
    448     # Create the parser.
--> 449     parser = TextFileReader(filepath_or_buffer, **kwds)
    450 

/usr/lib/python3/dist-packages/pandas/io/parsers.py in __init__(self, f, engine, **kwds)
    817 
--> 818         self._make_engine(self.engine)
    819 

/usr/lib/python3/dist-packages/pandas/io/parsers.py in _make_engine(self, engine)
   1048         if engine == 'c':
-> 1049             self._engine = CParserWrapper(self.f, **self.options)
   1050         else:

/usr/lib/python3/dist-packages/pandas/io/parsers.py in __init__(self, src, **kwds)
   1694 
-> 1695         self._reader = parsers.TextReader(src, **kwds)
   1696 

pandas/_libs/parsers.pyx in pandas._libs.parsers.TextReader.__cinit__()

pandas/_libs/parsers.pyx in pandas._libs.parsers.TextReader._setup_parser_source()

FileNotFoundError: File b'salary.table' does not exist

During handling of the above exception, another exception occurred:

ConnectionRefusedError                    Traceback (most recent call last)
/usr/lib/python3.6/urllib/request.py in do_open(self, http_class, req, **http_conn_args)
   1317                 h.request(req.get_method(), req.selector, req.data, headers,
-> 1318                           encode_chunked=req.has_header('Transfer-encoding'))
   1319             except OSError as err: # timeout error

/usr/lib/python3.6/http/client.py in request(self, method, url, body, headers, encode_chunked)
   1238         """Send a complete request to the server."""
-> 1239         self._send_request(method, url, body, headers, encode_chunked)
   1240 

/usr/lib/python3.6/http/client.py in _send_request(self, method, url, body, headers, encode_chunked)
   1284             body = _encode(body, 'body')
-> 1285         self.endheaders(body, encode_chunked=encode_chunked)
   1286 

/usr/lib/python3.6/http/client.py in endheaders(self, message_body, encode_chunked)
   1233             raise CannotSendHeader()
-> 1234         self._send_output(message_body, encode_chunked=encode_chunked)
   1235 

/usr/lib/python3.6/http/client.py in _send_output(self, message_body, encode_chunked)
   1025         del self._buffer[:]
-> 1026         self.send(msg)
   1027 

/usr/lib/python3.6/http/client.py in send(self, data)
    963             if self.auto_open:
--> 964                 self.connect()
    965             else:

/usr/lib/python3.6/http/client.py in connect(self)
    935         self.sock = self._create_connection(
--> 936             (self.host,self.port), self.timeout, self.source_address)
    937         self.sock.setsockopt(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1)

/usr/lib/python3.6/socket.py in create_connection(address, timeout, source_address)
    723     if err is not None:
--> 724         raise err
    725     else:

/usr/lib/python3.6/socket.py in create_connection(address, timeout, source_address)
    712                 sock.bind(source_address)
--> 713             sock.connect(sa)
    714             # Break explicitly a reference cycle

ConnectionRefusedError: [Errno 111] Connection refused

During handling of the above exception, another exception occurred:

URLError                                  Traceback (most recent call last)
<ipython-input-1-699e2abfd95f> in <module>()
     17 except:  # recent pandas can read URL without urlopen
     18     url = 'http://stats191.stanford.edu/data/salary.table'
---> 19     fh = urlopen(url)
     20     salary_table = pd.read_table(fh)
     21     salary_table.to_csv('salary.table')

/usr/lib/python3.6/urllib/request.py in urlopen(url, data, timeout, cafile, capath, cadefault, context)
    221     else:
    222         opener = _opener
--> 223     return opener.open(url, data, timeout)
    224 
    225 def install_opener(opener):

/usr/lib/python3.6/urllib/request.py in open(self, fullurl, data, timeout)
    524             req = meth(req)
    525 
--> 526         response = self._open(req, data)
    527 
    528         # post-process response

/usr/lib/python3.6/urllib/request.py in _open(self, req, data)
    542         protocol = req.type
    543         result = self._call_chain(self.handle_open, protocol, protocol +
--> 544                                   '_open', req)
    545         if result:
    546             return result

/usr/lib/python3.6/urllib/request.py in _call_chain(self, chain, kind, meth_name, *args)
    502         for handler in handlers:
    503             func = getattr(handler, meth_name)
--> 504             result = func(*args)
    505             if result is not None:
    506                 return result

/usr/lib/python3.6/urllib/request.py in http_open(self, req)
   1344 
   1345     def http_open(self, req):
-> 1346         return self.do_open(http.client.HTTPConnection, req)
   1347 
   1348     http_request = AbstractHTTPHandler.do_request_

/usr/lib/python3.6/urllib/request.py in do_open(self, http_class, req, **http_conn_args)
   1318                           encode_chunked=req.has_header('Transfer-encoding'))
   1319             except OSError as err: # timeout error
-> 1320                 raise URLError(err)
   1321             r = h.getresponse()
   1322         except:

URLError: <urlopen error [Errno 111] Connection refused>

Take a look at the data:

In [2]:
plt.figure(figsize=(6,6))
symbols = ['D', '^']
colors = ['r', 'g', 'blue']
factor_groups = salary_table.groupby(['E','M'])
for values, group in factor_groups:
    i,j = values
    plt.scatter(group['X'], group['S'], marker=symbols[j], color=colors[i-1],
               s=144)
plt.xlabel('Experience');
plt.ylabel('Salary');
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-2-06b5e3c35d99> in <module>()
      2 symbols = ['D', '^']
      3 colors = ['r', 'g', 'blue']
----> 4 factor_groups = salary_table.groupby(['E','M'])
      5 for values, group in factor_groups:
      6     i,j = values

NameError: name 'salary_table' is not defined
<Figure size 432x432 with 0 Axes>

Fit a linear model:

In [3]:
formula = 'S ~ C(E) + C(M) + X'
lm = ols(formula, salary_table).fit()
print(lm.summary())
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-3-bd1483e9558c> in <module>()
      1 formula = 'S ~ C(E) + C(M) + X'
----> 2 lm = ols(formula, salary_table).fit()
      3 print(lm.summary())

NameError: name 'salary_table' is not defined

Have a look at the created design matrix:

In [4]:
lm.model.exog[:5]
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-4-15c89faef8a8> in <module>()
----> 1 lm.model.exog[:5]

NameError: name 'lm' is not defined

Or since we initially passed in a DataFrame, we have a DataFrame available in

In [5]:
lm.model.data.orig_exog[:5]
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-5-136b4afd409d> in <module>()
----> 1 lm.model.data.orig_exog[:5]

NameError: name 'lm' is not defined

We keep a reference to the original untouched data in

In [6]:
lm.model.data.frame[:5]
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-6-45902db7bdbe> in <module>()
----> 1 lm.model.data.frame[:5]

NameError: name 'lm' is not defined

Influence statistics

In [7]:
infl = lm.get_influence()
print(infl.summary_table())
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-7-cf91966f823d> in <module>()
----> 1 infl = lm.get_influence()
      2 print(infl.summary_table())

NameError: name 'lm' is not defined

or get a dataframe

In [8]:
df_infl = infl.summary_frame()
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-8-4a89b3b617d2> in <module>()
----> 1 df_infl = infl.summary_frame()

NameError: name 'infl' is not defined
In [9]:
df_infl[:5]
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-9-c7ae236e6fec> in <module>()
----> 1 df_infl[:5]

NameError: name 'df_infl' is not defined

Now plot the reiduals within the groups separately:

In [10]:
resid = lm.resid
plt.figure(figsize=(6,6));
for values, group in factor_groups:
    i,j = values
    group_num = i*2 + j - 1  # for plotting purposes
    x = [group_num] * len(group)
    plt.scatter(x, resid[group.index], marker=symbols[j], color=colors[i-1],
            s=144, edgecolors='black')
plt.xlabel('Group');
plt.ylabel('Residuals');
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-10-b1fbd2073437> in <module>()
----> 1 resid = lm.resid
      2 plt.figure(figsize=(6,6));
      3 for values, group in factor_groups:
      4     i,j = values
      5     group_num = i*2 + j - 1  # for plotting purposes

NameError: name 'lm' is not defined

Now we will test some interactions using anova or f_test

In [11]:
interX_lm = ols("S ~ C(E) * X + C(M)", salary_table).fit()
print(interX_lm.summary())
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-11-63ad3de263bb> in <module>()
----> 1 interX_lm = ols("S ~ C(E) * X + C(M)", salary_table).fit()
      2 print(interX_lm.summary())

NameError: name 'salary_table' is not defined

Do an ANOVA check

In [12]:
from statsmodels.stats.api import anova_lm

table1 = anova_lm(lm, interX_lm)
print(table1)

interM_lm = ols("S ~ X + C(E)*C(M)", data=salary_table).fit()
print(interM_lm.summary())

table2 = anova_lm(lm, interM_lm)
print(table2)
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-12-99371545e58d> in <module>()
      1 from statsmodels.stats.api import anova_lm
      2 
----> 3 table1 = anova_lm(lm, interX_lm)
      4 print(table1)
      5 

NameError: name 'lm' is not defined

The design matrix as a DataFrame

In [13]:
interM_lm.model.data.orig_exog[:5]
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-13-1dd5303dacd2> in <module>()
----> 1 interM_lm.model.data.orig_exog[:5]

NameError: name 'interM_lm' is not defined

The design matrix as an ndarray

In [14]:
interM_lm.model.exog
interM_lm.model.exog_names
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-14-127a7082e299> in <module>()
----> 1 interM_lm.model.exog
      2 interM_lm.model.exog_names

NameError: name 'interM_lm' is not defined
In [15]:
infl = interM_lm.get_influence()
resid = infl.resid_studentized_internal
plt.figure(figsize=(6,6))
for values, group in factor_groups:
    i,j = values
    idx = group.index
    plt.scatter(X[idx], resid[idx], marker=symbols[j], color=colors[i-1],
            s=144, edgecolors='black')
plt.xlabel('X');
plt.ylabel('standardized resids');
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-15-4a4b0ea23d42> in <module>()
----> 1 infl = interM_lm.get_influence()
      2 resid = infl.resid_studentized_internal
      3 plt.figure(figsize=(6,6))
      4 for values, group in factor_groups:
      5     i,j = values

NameError: name 'interM_lm' is not defined

Looks like one observation is an outlier.

In [16]:
drop_idx = abs(resid).argmax()
print(drop_idx)  # zero-based index
idx = salary_table.index.drop(drop_idx)

lm32 = ols('S ~ C(E) + X + C(M)', data=salary_table, subset=idx).fit()

print(lm32.summary())
print('\n')

interX_lm32 = ols('S ~ C(E) * X + C(M)', data=salary_table, subset=idx).fit()

print(interX_lm32.summary())
print('\n')


table3 = anova_lm(lm32, interX_lm32)
print(table3)
print('\n')


interM_lm32 = ols('S ~ X + C(E) * C(M)', data=salary_table, subset=idx).fit()

table4 = anova_lm(lm32, interM_lm32)
print(table4)
print('\n')
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-16-b012081228e7> in <module>()
----> 1 drop_idx = abs(resid).argmax()
      2 print(drop_idx)  # zero-based index
      3 idx = salary_table.index.drop(drop_idx)
      4 
      5 lm32 = ols('S ~ C(E) + X + C(M)', data=salary_table, subset=idx).fit()

NameError: name 'resid' is not defined

Replot the residuals

In [17]:
try:
    resid = interM_lm32.get_influence().summary_frame()['standard_resid']
except:
    resid = interM_lm32.get_influence().summary_frame()['standard_resid']

plt.figure(figsize=(6,6))
for values, group in factor_groups:
    i,j = values
    idx = group.index
    plt.scatter(X[idx], resid[idx], marker=symbols[j], color=colors[i-1],
            s=144, edgecolors='black')
plt.xlabel('X[~[32]]');
plt.ylabel('standardized resids');
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-17-3f5881af6ee4> in <module>()
      1 try:
----> 2     resid = interM_lm32.get_influence().summary_frame()['standard_resid']
      3 except:

NameError: name 'interM_lm32' is not defined

During handling of the above exception, another exception occurred:

NameError                                 Traceback (most recent call last)
<ipython-input-17-3f5881af6ee4> in <module>()
      2     resid = interM_lm32.get_influence().summary_frame()['standard_resid']
      3 except:
----> 4     resid = interM_lm32.get_influence().summary_frame()['standard_resid']
      5 
      6 plt.figure(figsize=(6,6))

NameError: name 'interM_lm32' is not defined

Plot the fitted values

In [18]:
lm_final = ols('S ~ X + C(E)*C(M)', data = salary_table.drop([drop_idx])).fit()
mf = lm_final.model.data.orig_exog
lstyle = ['-','--']

plt.figure(figsize=(6,6))
for values, group in factor_groups:
    i,j = values
    idx = group.index
    plt.scatter(X[idx], S[idx], marker=symbols[j], color=colors[i-1],
                s=144, edgecolors='black')
    # drop NA because there is no idx 32 in the final model
    plt.plot(mf.X[idx].dropna(), lm_final.fittedvalues[idx].dropna(),
            ls=lstyle[j], color=colors[i-1])
plt.xlabel('Experience');
plt.ylabel('Salary');
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-18-799bf5928998> in <module>()
----> 1 lm_final = ols('S ~ X + C(E)*C(M)', data = salary_table.drop([drop_idx])).fit()
      2 mf = lm_final.model.data.orig_exog
      3 lstyle = ['-','--']
      4 
      5 plt.figure(figsize=(6,6))

NameError: name 'salary_table' is not defined

From our first look at the data, the difference between Master's and PhD in the management group is different than in the non-management group. This is an interaction between the two qualitative variables management,M and education,E. We can visualize this by first removing the effect of experience, then plotting the means within each of the 6 groups using interaction.plot.

In [19]:
U = S - X * interX_lm32.params['X']

plt.figure(figsize=(6,6))
interaction_plot(E, M, U, colors=['red','blue'], markers=['^','D'],
        markersize=10, ax=plt.gca())
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-19-078f1a51de12> in <module>()
----> 1 U = S - X * interX_lm32.params['X']
      2 
      3 plt.figure(figsize=(6,6))
      4 interaction_plot(E, M, U, colors=['red','blue'], markers=['^','D'],
      5         markersize=10, ax=plt.gca())

NameError: name 'S' is not defined

Minority Employment Data

In [20]:
try:
    jobtest_table = pd.read_table('jobtest.table')
except:  # don't have data already
    url = 'http://stats191.stanford.edu/data/jobtest.table'
    jobtest_table = pd.read_table(url)

factor_group = jobtest_table.groupby(['ETHN'])

fig, ax = plt.subplots(figsize=(6,6))
colors = ['purple', 'green']
markers = ['o', 'v']
for factor, group in factor_group:
    ax.scatter(group['TEST'], group['JPERF'], color=colors[factor],
                marker=markers[factor], s=12**2)
ax.set_xlabel('TEST');
ax.set_ylabel('JPERF');
---------------------------------------------------------------------------
FileNotFoundError                         Traceback (most recent call last)
<ipython-input-20-6329e9f7fab4> in <module>()
      1 try:
----> 2     jobtest_table = pd.read_table('jobtest.table')
      3 except:  # don't have data already

/usr/lib/python3/dist-packages/pandas/io/parsers.py in parser_f(filepath_or_buffer, sep, delimiter, header, names, index_col, usecols, squeeze, prefix, mangle_dupe_cols, dtype, engine, converters, true_values, false_values, skipinitialspace, skiprows, nrows, na_values, keep_default_na, na_filter, verbose, skip_blank_lines, parse_dates, infer_datetime_format, keep_date_col, date_parser, dayfirst, iterator, chunksize, compression, thousands, decimal, lineterminator, quotechar, quoting, escapechar, comment, encoding, dialect, tupleize_cols, error_bad_lines, warn_bad_lines, skipfooter, skip_footer, doublequote, delim_whitespace, as_recarray, compact_ints, use_unsigned, low_memory, buffer_lines, memory_map, float_precision)
    708 
--> 709         return _read(filepath_or_buffer, kwds)
    710 

/usr/lib/python3/dist-packages/pandas/io/parsers.py in _read(filepath_or_buffer, kwds)
    448     # Create the parser.
--> 449     parser = TextFileReader(filepath_or_buffer, **kwds)
    450 

/usr/lib/python3/dist-packages/pandas/io/parsers.py in __init__(self, f, engine, **kwds)
    817 
--> 818         self._make_engine(self.engine)
    819 

/usr/lib/python3/dist-packages/pandas/io/parsers.py in _make_engine(self, engine)
   1048         if engine == 'c':
-> 1049             self._engine = CParserWrapper(self.f, **self.options)
   1050         else:

/usr/lib/python3/dist-packages/pandas/io/parsers.py in __init__(self, src, **kwds)
   1694 
-> 1695         self._reader = parsers.TextReader(src, **kwds)
   1696 

pandas/_libs/parsers.pyx in pandas._libs.parsers.TextReader.__cinit__()

pandas/_libs/parsers.pyx in pandas._libs.parsers.TextReader._setup_parser_source()

FileNotFoundError: File b'jobtest.table' does not exist

During handling of the above exception, another exception occurred:

ConnectionRefusedError                    Traceback (most recent call last)
/usr/lib/python3.6/urllib/request.py in do_open(self, http_class, req, **http_conn_args)
   1317                 h.request(req.get_method(), req.selector, req.data, headers,
-> 1318                           encode_chunked=req.has_header('Transfer-encoding'))
   1319             except OSError as err: # timeout error

/usr/lib/python3.6/http/client.py in request(self, method, url, body, headers, encode_chunked)
   1238         """Send a complete request to the server."""
-> 1239         self._send_request(method, url, body, headers, encode_chunked)
   1240 

/usr/lib/python3.6/http/client.py in _send_request(self, method, url, body, headers, encode_chunked)
   1284             body = _encode(body, 'body')
-> 1285         self.endheaders(body, encode_chunked=encode_chunked)
   1286 

/usr/lib/python3.6/http/client.py in endheaders(self, message_body, encode_chunked)
   1233             raise CannotSendHeader()
-> 1234         self._send_output(message_body, encode_chunked=encode_chunked)
   1235 

/usr/lib/python3.6/http/client.py in _send_output(self, message_body, encode_chunked)
   1025         del self._buffer[:]
-> 1026         self.send(msg)
   1027 

/usr/lib/python3.6/http/client.py in send(self, data)
    963             if self.auto_open:
--> 964                 self.connect()
    965             else:

/usr/lib/python3.6/http/client.py in connect(self)
    935         self.sock = self._create_connection(
--> 936             (self.host,self.port), self.timeout, self.source_address)
    937         self.sock.setsockopt(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1)

/usr/lib/python3.6/socket.py in create_connection(address, timeout, source_address)
    723     if err is not None:
--> 724         raise err
    725     else:

/usr/lib/python3.6/socket.py in create_connection(address, timeout, source_address)
    712                 sock.bind(source_address)
--> 713             sock.connect(sa)
    714             # Break explicitly a reference cycle

ConnectionRefusedError: [Errno 111] Connection refused

During handling of the above exception, another exception occurred:

URLError                                  Traceback (most recent call last)
<ipython-input-20-6329e9f7fab4> in <module>()
      3 except:  # don't have data already
      4     url = 'http://stats191.stanford.edu/data/jobtest.table'
----> 5     jobtest_table = pd.read_table(url)
      6 
      7 factor_group = jobtest_table.groupby(['ETHN'])

/usr/lib/python3/dist-packages/pandas/io/parsers.py in parser_f(filepath_or_buffer, sep, delimiter, header, names, index_col, usecols, squeeze, prefix, mangle_dupe_cols, dtype, engine, converters, true_values, false_values, skipinitialspace, skiprows, nrows, na_values, keep_default_na, na_filter, verbose, skip_blank_lines, parse_dates, infer_datetime_format, keep_date_col, date_parser, dayfirst, iterator, chunksize, compression, thousands, decimal, lineterminator, quotechar, quoting, escapechar, comment, encoding, dialect, tupleize_cols, error_bad_lines, warn_bad_lines, skipfooter, skip_footer, doublequote, delim_whitespace, as_recarray, compact_ints, use_unsigned, low_memory, buffer_lines, memory_map, float_precision)
    707                     skip_blank_lines=skip_blank_lines)
    708 
--> 709         return _read(filepath_or_buffer, kwds)
    710 
    711     parser_f.__name__ = name

/usr/lib/python3/dist-packages/pandas/io/parsers.py in _read(filepath_or_buffer, kwds)
    431     compression = _infer_compression(filepath_or_buffer, compression)
    432     filepath_or_buffer, _, compression = get_filepath_or_buffer(
--> 433         filepath_or_buffer, encoding, compression)
    434     kwds['compression'] = compression
    435 

/usr/lib/python3/dist-packages/pandas/io/common.py in get_filepath_or_buffer(filepath_or_buffer, encoding, compression)
    188 
    189     if _is_url(filepath_or_buffer):
--> 190         req = _urlopen(filepath_or_buffer)
    191         content_encoding = req.headers.get('Content-Encoding', None)
    192         if content_encoding == 'gzip':

/usr/lib/python3.6/urllib/request.py in urlopen(url, data, timeout, cafile, capath, cadefault, context)
    221     else:
    222         opener = _opener
--> 223     return opener.open(url, data, timeout)
    224 
    225 def install_opener(opener):

/usr/lib/python3.6/urllib/request.py in open(self, fullurl, data, timeout)
    524             req = meth(req)
    525 
--> 526         response = self._open(req, data)
    527 
    528         # post-process response

/usr/lib/python3.6/urllib/request.py in _open(self, req, data)
    542         protocol = req.type
    543         result = self._call_chain(self.handle_open, protocol, protocol +
--> 544                                   '_open', req)
    545         if result:
    546             return result

/usr/lib/python3.6/urllib/request.py in _call_chain(self, chain, kind, meth_name, *args)
    502         for handler in handlers:
    503             func = getattr(handler, meth_name)
--> 504             result = func(*args)
    505             if result is not None:
    506                 return result

/usr/lib/python3.6/urllib/request.py in http_open(self, req)
   1344 
   1345     def http_open(self, req):
-> 1346         return self.do_open(http.client.HTTPConnection, req)
   1347 
   1348     http_request = AbstractHTTPHandler.do_request_

/usr/lib/python3.6/urllib/request.py in do_open(self, http_class, req, **http_conn_args)
   1318                           encode_chunked=req.has_header('Transfer-encoding'))
   1319             except OSError as err: # timeout error
-> 1320                 raise URLError(err)
   1321             r = h.getresponse()
   1322         except:

URLError: <urlopen error [Errno 111] Connection refused>
In [21]:
min_lm = ols('JPERF ~ TEST', data=jobtest_table).fit()
print(min_lm.summary())
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-21-2dda11467a64> in <module>()
----> 1 min_lm = ols('JPERF ~ TEST', data=jobtest_table).fit()
      2 print(min_lm.summary())

NameError: name 'jobtest_table' is not defined
In [22]:
fig, ax = plt.subplots(figsize=(6,6));
for factor, group in factor_group:
    ax.scatter(group['TEST'], group['JPERF'], color=colors[factor],
                marker=markers[factor], s=12**2)

ax.set_xlabel('TEST')
ax.set_ylabel('JPERF')
fig = abline_plot(model_results = min_lm, ax=ax)
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-22-3bca97a45134> in <module>()
      1 fig, ax = plt.subplots(figsize=(6,6));
----> 2 for factor, group in factor_group:
      3     ax.scatter(group['TEST'], group['JPERF'], color=colors[factor],
      4                 marker=markers[factor], s=12**2)
      5 

NameError: name 'factor_group' is not defined
In [23]:
min_lm2 = ols('JPERF ~ TEST + TEST:ETHN',
        data=jobtest_table).fit()

print(min_lm2.summary())
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-23-ad7cfc5a299c> in <module>()
      1 min_lm2 = ols('JPERF ~ TEST + TEST:ETHN',
----> 2         data=jobtest_table).fit()
      3 
      4 print(min_lm2.summary())

NameError: name 'jobtest_table' is not defined
In [24]:
fig, ax = plt.subplots(figsize=(6,6));
for factor, group in factor_group:
    ax.scatter(group['TEST'], group['JPERF'], color=colors[factor],
                marker=markers[factor], s=12**2)

fig = abline_plot(intercept = min_lm2.params['Intercept'],
                 slope = min_lm2.params['TEST'], ax=ax, color='purple');
fig = abline_plot(intercept = min_lm2.params['Intercept'],
        slope = min_lm2.params['TEST'] + min_lm2.params['TEST:ETHN'],
        ax=ax, color='green');
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-24-f7126666bc31> in <module>()
      1 fig, ax = plt.subplots(figsize=(6,6));
----> 2 for factor, group in factor_group:
      3     ax.scatter(group['TEST'], group['JPERF'], color=colors[factor],
      4                 marker=markers[factor], s=12**2)
      5 

NameError: name 'factor_group' is not defined
In [25]:
min_lm3 = ols('JPERF ~ TEST + ETHN', data = jobtest_table).fit()
print(min_lm3.summary())
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-25-26270b8e0d6d> in <module>()
----> 1 min_lm3 = ols('JPERF ~ TEST + ETHN', data = jobtest_table).fit()
      2 print(min_lm3.summary())

NameError: name 'jobtest_table' is not defined
In [26]:
fig, ax = plt.subplots(figsize=(6,6));
for factor, group in factor_group:
    ax.scatter(group['TEST'], group['JPERF'], color=colors[factor],
                marker=markers[factor], s=12**2)

fig = abline_plot(intercept = min_lm3.params['Intercept'],
                 slope = min_lm3.params['TEST'], ax=ax, color='purple');
fig = abline_plot(intercept = min_lm3.params['Intercept'] + min_lm3.params['ETHN'],
        slope = min_lm3.params['TEST'], ax=ax, color='green');
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-26-5bc7e2ba2d68> in <module>()
      1 fig, ax = plt.subplots(figsize=(6,6));
----> 2 for factor, group in factor_group:
      3     ax.scatter(group['TEST'], group['JPERF'], color=colors[factor],
      4                 marker=markers[factor], s=12**2)
      5 

NameError: name 'factor_group' is not defined
In [27]:
min_lm4 = ols('JPERF ~ TEST * ETHN', data = jobtest_table).fit()
print(min_lm4.summary())
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-27-0cc0b240e06d> in <module>()
----> 1 min_lm4 = ols('JPERF ~ TEST * ETHN', data = jobtest_table).fit()
      2 print(min_lm4.summary())

NameError: name 'jobtest_table' is not defined
In [28]:
fig, ax = plt.subplots(figsize=(8,6));
for factor, group in factor_group:
    ax.scatter(group['TEST'], group['JPERF'], color=colors[factor],
                marker=markers[factor], s=12**2)

fig = abline_plot(intercept = min_lm4.params['Intercept'],
                 slope = min_lm4.params['TEST'], ax=ax, color='purple');
fig = abline_plot(intercept = min_lm4.params['Intercept'] + min_lm4.params['ETHN'],
        slope = min_lm4.params['TEST'] + min_lm4.params['TEST:ETHN'],
        ax=ax, color='green');
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-28-10026649b950> in <module>()
      1 fig, ax = plt.subplots(figsize=(8,6));
----> 2 for factor, group in factor_group:
      3     ax.scatter(group['TEST'], group['JPERF'], color=colors[factor],
      4                 marker=markers[factor], s=12**2)
      5 

NameError: name 'factor_group' is not defined
In [29]:
# is there any effect of ETHN on slope or intercept?
table5 = anova_lm(min_lm, min_lm4)
print(table5)
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-29-647f7d87fe70> in <module>()
      1 # is there any effect of ETHN on slope or intercept?
----> 2 table5 = anova_lm(min_lm, min_lm4)
      3 print(table5)

NameError: name 'min_lm' is not defined
In [30]:
# is there any effect of ETHN on intercept
table6 = anova_lm(min_lm, min_lm3)
print(table6)
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-30-39645dd00862> in <module>()
      1 # is there any effect of ETHN on intercept
----> 2 table6 = anova_lm(min_lm, min_lm3)
      3 print(table6)

NameError: name 'min_lm' is not defined
In [31]:
# is there any effect of ETHN on slope
table7 = anova_lm(min_lm, min_lm2)
print(table7)
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-31-e0085d24d296> in <module>()
      1 # is there any effect of ETHN on slope
----> 2 table7 = anova_lm(min_lm, min_lm2)
      3 print(table7)

NameError: name 'min_lm' is not defined
In [32]:
# is it just the slope or both?
table8 = anova_lm(min_lm2, min_lm4)
print(table8)
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-32-a9a6172af9d0> in <module>()
      1 # is it just the slope or both?
----> 2 table8 = anova_lm(min_lm2, min_lm4)
      3 print(table8)

NameError: name 'min_lm2' is not defined

One-way ANOVA

In [33]:
try:
    rehab_table = pd.read_csv('rehab.table')
except:
    url = 'http://stats191.stanford.edu/data/rehab.csv'
    rehab_table = pd.read_table(url, delimiter=",")
    rehab_table.to_csv('rehab.table')

fig, ax = plt.subplots(figsize=(8,6))
fig = rehab_table.boxplot('Time', 'Fitness', ax=ax, grid=False)
---------------------------------------------------------------------------
FileNotFoundError                         Traceback (most recent call last)
<ipython-input-33-d3ea2b9d3e10> in <module>()
      1 try:
----> 2     rehab_table = pd.read_csv('rehab.table')
      3 except:

/usr/lib/python3/dist-packages/pandas/io/parsers.py in parser_f(filepath_or_buffer, sep, delimiter, header, names, index_col, usecols, squeeze, prefix, mangle_dupe_cols, dtype, engine, converters, true_values, false_values, skipinitialspace, skiprows, nrows, na_values, keep_default_na, na_filter, verbose, skip_blank_lines, parse_dates, infer_datetime_format, keep_date_col, date_parser, dayfirst, iterator, chunksize, compression, thousands, decimal, lineterminator, quotechar, quoting, escapechar, comment, encoding, dialect, tupleize_cols, error_bad_lines, warn_bad_lines, skipfooter, skip_footer, doublequote, delim_whitespace, as_recarray, compact_ints, use_unsigned, low_memory, buffer_lines, memory_map, float_precision)
    708 
--> 709         return _read(filepath_or_buffer, kwds)
    710 

/usr/lib/python3/dist-packages/pandas/io/parsers.py in _read(filepath_or_buffer, kwds)
    448     # Create the parser.
--> 449     parser = TextFileReader(filepath_or_buffer, **kwds)
    450 

/usr/lib/python3/dist-packages/pandas/io/parsers.py in __init__(self, f, engine, **kwds)
    817 
--> 818         self._make_engine(self.engine)
    819 

/usr/lib/python3/dist-packages/pandas/io/parsers.py in _make_engine(self, engine)
   1048         if engine == 'c':
-> 1049             self._engine = CParserWrapper(self.f, **self.options)
   1050         else:

/usr/lib/python3/dist-packages/pandas/io/parsers.py in __init__(self, src, **kwds)
   1694 
-> 1695         self._reader = parsers.TextReader(src, **kwds)
   1696 

pandas/_libs/parsers.pyx in pandas._libs.parsers.TextReader.__cinit__()

pandas/_libs/parsers.pyx in pandas._libs.parsers.TextReader._setup_parser_source()

FileNotFoundError: File b'rehab.table' does not exist

During handling of the above exception, another exception occurred:

ConnectionRefusedError                    Traceback (most recent call last)
/usr/lib/python3.6/urllib/request.py in do_open(self, http_class, req, **http_conn_args)
   1317                 h.request(req.get_method(), req.selector, req.data, headers,
-> 1318                           encode_chunked=req.has_header('Transfer-encoding'))
   1319             except OSError as err: # timeout error

/usr/lib/python3.6/http/client.py in request(self, method, url, body, headers, encode_chunked)
   1238         """Send a complete request to the server."""
-> 1239         self._send_request(method, url, body, headers, encode_chunked)
   1240 

/usr/lib/python3.6/http/client.py in _send_request(self, method, url, body, headers, encode_chunked)
   1284             body = _encode(body, 'body')
-> 1285         self.endheaders(body, encode_chunked=encode_chunked)
   1286 

/usr/lib/python3.6/http/client.py in endheaders(self, message_body, encode_chunked)
   1233             raise CannotSendHeader()
-> 1234         self._send_output(message_body, encode_chunked=encode_chunked)
   1235 

/usr/lib/python3.6/http/client.py in _send_output(self, message_body, encode_chunked)
   1025         del self._buffer[:]
-> 1026         self.send(msg)
   1027 

/usr/lib/python3.6/http/client.py in send(self, data)
    963             if self.auto_open:
--> 964                 self.connect()
    965             else:

/usr/lib/python3.6/http/client.py in connect(self)
    935         self.sock = self._create_connection(
--> 936             (self.host,self.port), self.timeout, self.source_address)
    937         self.sock.setsockopt(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1)

/usr/lib/python3.6/socket.py in create_connection(address, timeout, source_address)
    723     if err is not None:
--> 724         raise err
    725     else:

/usr/lib/python3.6/socket.py in create_connection(address, timeout, source_address)
    712                 sock.bind(source_address)
--> 713             sock.connect(sa)
    714             # Break explicitly a reference cycle

ConnectionRefusedError: [Errno 111] Connection refused

During handling of the above exception, another exception occurred:

URLError                                  Traceback (most recent call last)
<ipython-input-33-d3ea2b9d3e10> in <module>()
      3 except:
      4     url = 'http://stats191.stanford.edu/data/rehab.csv'
----> 5     rehab_table = pd.read_table(url, delimiter=",")
      6     rehab_table.to_csv('rehab.table')
      7 

/usr/lib/python3/dist-packages/pandas/io/parsers.py in parser_f(filepath_or_buffer, sep, delimiter, header, names, index_col, usecols, squeeze, prefix, mangle_dupe_cols, dtype, engine, converters, true_values, false_values, skipinitialspace, skiprows, nrows, na_values, keep_default_na, na_filter, verbose, skip_blank_lines, parse_dates, infer_datetime_format, keep_date_col, date_parser, dayfirst, iterator, chunksize, compression, thousands, decimal, lineterminator, quotechar, quoting, escapechar, comment, encoding, dialect, tupleize_cols, error_bad_lines, warn_bad_lines, skipfooter, skip_footer, doublequote, delim_whitespace, as_recarray, compact_ints, use_unsigned, low_memory, buffer_lines, memory_map, float_precision)
    707                     skip_blank_lines=skip_blank_lines)
    708 
--> 709         return _read(filepath_or_buffer, kwds)
    710 
    711     parser_f.__name__ = name

/usr/lib/python3/dist-packages/pandas/io/parsers.py in _read(filepath_or_buffer, kwds)
    431     compression = _infer_compression(filepath_or_buffer, compression)
    432     filepath_or_buffer, _, compression = get_filepath_or_buffer(
--> 433         filepath_or_buffer, encoding, compression)
    434     kwds['compression'] = compression
    435 

/usr/lib/python3/dist-packages/pandas/io/common.py in get_filepath_or_buffer(filepath_or_buffer, encoding, compression)
    188 
    189     if _is_url(filepath_or_buffer):
--> 190         req = _urlopen(filepath_or_buffer)
    191         content_encoding = req.headers.get('Content-Encoding', None)
    192         if content_encoding == 'gzip':

/usr/lib/python3.6/urllib/request.py in urlopen(url, data, timeout, cafile, capath, cadefault, context)
    221     else:
    222         opener = _opener
--> 223     return opener.open(url, data, timeout)
    224 
    225 def install_opener(opener):

/usr/lib/python3.6/urllib/request.py in open(self, fullurl, data, timeout)
    524             req = meth(req)
    525 
--> 526         response = self._open(req, data)
    527 
    528         # post-process response

/usr/lib/python3.6/urllib/request.py in _open(self, req, data)
    542         protocol = req.type
    543         result = self._call_chain(self.handle_open, protocol, protocol +
--> 544                                   '_open', req)
    545         if result:
    546             return result

/usr/lib/python3.6/urllib/request.py in _call_chain(self, chain, kind, meth_name, *args)
    502         for handler in handlers:
    503             func = getattr(handler, meth_name)
--> 504             result = func(*args)
    505             if result is not None:
    506                 return result

/usr/lib/python3.6/urllib/request.py in http_open(self, req)
   1344 
   1345     def http_open(self, req):
-> 1346         return self.do_open(http.client.HTTPConnection, req)
   1347 
   1348     http_request = AbstractHTTPHandler.do_request_

/usr/lib/python3.6/urllib/request.py in do_open(self, http_class, req, **http_conn_args)
   1318                           encode_chunked=req.has_header('Transfer-encoding'))
   1319             except OSError as err: # timeout error
-> 1320                 raise URLError(err)
   1321             r = h.getresponse()
   1322         except:

URLError: <urlopen error [Errno 111] Connection refused>
In [34]:
rehab_lm = ols('Time ~ C(Fitness)', data=rehab_table).fit()
table9 = anova_lm(rehab_lm)
print(table9)

print(rehab_lm.model.data.orig_exog)
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-34-d3bb1b06817c> in <module>()
----> 1 rehab_lm = ols('Time ~ C(Fitness)', data=rehab_table).fit()
      2 table9 = anova_lm(rehab_lm)
      3 print(table9)
      4 
      5 print(rehab_lm.model.data.orig_exog)

NameError: name 'rehab_table' is not defined
In [35]:
print(rehab_lm.summary())
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-35-99d31a5bc5c4> in <module>()
----> 1 print(rehab_lm.summary())

NameError: name 'rehab_lm' is not defined

Two-way ANOVA

In [36]:
try:
    kidney_table = pd.read_table('./kidney.table')
except:
    url = 'http://stats191.stanford.edu/data/kidney.table'
    kidney_table = pd.read_csv(url, delim_whitespace=True)
---------------------------------------------------------------------------
FileNotFoundError                         Traceback (most recent call last)
<ipython-input-36-cbc31ddb699c> in <module>()
      1 try:
----> 2     kidney_table = pd.read_table('./kidney.table')
      3 except:

/usr/lib/python3/dist-packages/pandas/io/parsers.py in parser_f(filepath_or_buffer, sep, delimiter, header, names, index_col, usecols, squeeze, prefix, mangle_dupe_cols, dtype, engine, converters, true_values, false_values, skipinitialspace, skiprows, nrows, na_values, keep_default_na, na_filter, verbose, skip_blank_lines, parse_dates, infer_datetime_format, keep_date_col, date_parser, dayfirst, iterator, chunksize, compression, thousands, decimal, lineterminator, quotechar, quoting, escapechar, comment, encoding, dialect, tupleize_cols, error_bad_lines, warn_bad_lines, skipfooter, skip_footer, doublequote, delim_whitespace, as_recarray, compact_ints, use_unsigned, low_memory, buffer_lines, memory_map, float_precision)
    708 
--> 709         return _read(filepath_or_buffer, kwds)
    710 

/usr/lib/python3/dist-packages/pandas/io/parsers.py in _read(filepath_or_buffer, kwds)
    448     # Create the parser.
--> 449     parser = TextFileReader(filepath_or_buffer, **kwds)
    450 

/usr/lib/python3/dist-packages/pandas/io/parsers.py in __init__(self, f, engine, **kwds)
    817 
--> 818         self._make_engine(self.engine)
    819 

/usr/lib/python3/dist-packages/pandas/io/parsers.py in _make_engine(self, engine)
   1048         if engine == 'c':
-> 1049             self._engine = CParserWrapper(self.f, **self.options)
   1050         else:

/usr/lib/python3/dist-packages/pandas/io/parsers.py in __init__(self, src, **kwds)
   1694 
-> 1695         self._reader = parsers.TextReader(src, **kwds)
   1696 

pandas/_libs/parsers.pyx in pandas._libs.parsers.TextReader.__cinit__()

pandas/_libs/parsers.pyx in pandas._libs.parsers.TextReader._setup_parser_source()

FileNotFoundError: File b'./kidney.table' does not exist

During handling of the above exception, another exception occurred:

ConnectionRefusedError                    Traceback (most recent call last)
/usr/lib/python3.6/urllib/request.py in do_open(self, http_class, req, **http_conn_args)
   1317                 h.request(req.get_method(), req.selector, req.data, headers,
-> 1318                           encode_chunked=req.has_header('Transfer-encoding'))
   1319             except OSError as err: # timeout error

/usr/lib/python3.6/http/client.py in request(self, method, url, body, headers, encode_chunked)
   1238         """Send a complete request to the server."""
-> 1239         self._send_request(method, url, body, headers, encode_chunked)
   1240 

/usr/lib/python3.6/http/client.py in _send_request(self, method, url, body, headers, encode_chunked)
   1284             body = _encode(body, 'body')
-> 1285         self.endheaders(body, encode_chunked=encode_chunked)
   1286 

/usr/lib/python3.6/http/client.py in endheaders(self, message_body, encode_chunked)
   1233             raise CannotSendHeader()
-> 1234         self._send_output(message_body, encode_chunked=encode_chunked)
   1235 

/usr/lib/python3.6/http/client.py in _send_output(self, message_body, encode_chunked)
   1025         del self._buffer[:]
-> 1026         self.send(msg)
   1027 

/usr/lib/python3.6/http/client.py in send(self, data)
    963             if self.auto_open:
--> 964                 self.connect()
    965             else:

/usr/lib/python3.6/http/client.py in connect(self)
    935         self.sock = self._create_connection(
--> 936             (self.host,self.port), self.timeout, self.source_address)
    937         self.sock.setsockopt(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1)

/usr/lib/python3.6/socket.py in create_connection(address, timeout, source_address)
    723     if err is not None:
--> 724         raise err
    725     else:

/usr/lib/python3.6/socket.py in create_connection(address, timeout, source_address)
    712                 sock.bind(source_address)
--> 713             sock.connect(sa)
    714             # Break explicitly a reference cycle

ConnectionRefusedError: [Errno 111] Connection refused

During handling of the above exception, another exception occurred:

URLError                                  Traceback (most recent call last)
<ipython-input-36-cbc31ddb699c> in <module>()
      3 except:
      4     url = 'http://stats191.stanford.edu/data/kidney.table'
----> 5     kidney_table = pd.read_csv(url, delim_whitespace=True)

/usr/lib/python3/dist-packages/pandas/io/parsers.py in parser_f(filepath_or_buffer, sep, delimiter, header, names, index_col, usecols, squeeze, prefix, mangle_dupe_cols, dtype, engine, converters, true_values, false_values, skipinitialspace, skiprows, nrows, na_values, keep_default_na, na_filter, verbose, skip_blank_lines, parse_dates, infer_datetime_format, keep_date_col, date_parser, dayfirst, iterator, chunksize, compression, thousands, decimal, lineterminator, quotechar, quoting, escapechar, comment, encoding, dialect, tupleize_cols, error_bad_lines, warn_bad_lines, skipfooter, skip_footer, doublequote, delim_whitespace, as_recarray, compact_ints, use_unsigned, low_memory, buffer_lines, memory_map, float_precision)
    707                     skip_blank_lines=skip_blank_lines)
    708 
--> 709         return _read(filepath_or_buffer, kwds)
    710 
    711     parser_f.__name__ = name

/usr/lib/python3/dist-packages/pandas/io/parsers.py in _read(filepath_or_buffer, kwds)
    431     compression = _infer_compression(filepath_or_buffer, compression)
    432     filepath_or_buffer, _, compression = get_filepath_or_buffer(
--> 433         filepath_or_buffer, encoding, compression)
    434     kwds['compression'] = compression
    435 

/usr/lib/python3/dist-packages/pandas/io/common.py in get_filepath_or_buffer(filepath_or_buffer, encoding, compression)
    188 
    189     if _is_url(filepath_or_buffer):
--> 190         req = _urlopen(filepath_or_buffer)
    191         content_encoding = req.headers.get('Content-Encoding', None)
    192         if content_encoding == 'gzip':

/usr/lib/python3.6/urllib/request.py in urlopen(url, data, timeout, cafile, capath, cadefault, context)
    221     else:
    222         opener = _opener
--> 223     return opener.open(url, data, timeout)
    224 
    225 def install_opener(opener):

/usr/lib/python3.6/urllib/request.py in open(self, fullurl, data, timeout)
    524             req = meth(req)
    525 
--> 526         response = self._open(req, data)
    527 
    528         # post-process response

/usr/lib/python3.6/urllib/request.py in _open(self, req, data)
    542         protocol = req.type
    543         result = self._call_chain(self.handle_open, protocol, protocol +
--> 544                                   '_open', req)
    545         if result:
    546             return result

/usr/lib/python3.6/urllib/request.py in _call_chain(self, chain, kind, meth_name, *args)
    502         for handler in handlers:
    503             func = getattr(handler, meth_name)
--> 504             result = func(*args)
    505             if result is not None:
    506                 return result

/usr/lib/python3.6/urllib/request.py in http_open(self, req)
   1344 
   1345     def http_open(self, req):
-> 1346         return self.do_open(http.client.HTTPConnection, req)
   1347 
   1348     http_request = AbstractHTTPHandler.do_request_

/usr/lib/python3.6/urllib/request.py in do_open(self, http_class, req, **http_conn_args)
   1318                           encode_chunked=req.has_header('Transfer-encoding'))
   1319             except OSError as err: # timeout error
-> 1320                 raise URLError(err)
   1321             r = h.getresponse()
   1322         except:

URLError: <urlopen error [Errno 111] Connection refused>

Explore the dataset

In [37]:
kidney_table.head(10)
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-37-fff8acd40403> in <module>()
----> 1 kidney_table.head(10)

NameError: name 'kidney_table' is not defined

Balanced panel

In [38]:
kt = kidney_table
plt.figure(figsize=(8,6))
fig = interaction_plot(kt['Weight'], kt['Duration'], np.log(kt['Days']+1),
        colors=['red', 'blue'], markers=['D','^'], ms=10, ax=plt.gca())
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-38-9312bae60782> in <module>()
----> 1 kt = kidney_table
      2 plt.figure(figsize=(8,6))
      3 fig = interaction_plot(kt['Weight'], kt['Duration'], np.log(kt['Days']+1),
      4         colors=['red', 'blue'], markers=['D','^'], ms=10, ax=plt.gca())

NameError: name 'kidney_table' is not defined

You have things available in the calling namespace available in the formula evaluation namespace

In [39]:
kidney_lm = ols('np.log(Days+1) ~ C(Duration) * C(Weight)', data=kt).fit()

table10 = anova_lm(kidney_lm)

print(anova_lm(ols('np.log(Days+1) ~ C(Duration) + C(Weight)',
                data=kt).fit(), kidney_lm))
print(anova_lm(ols('np.log(Days+1) ~ C(Duration)', data=kt).fit(),
               ols('np.log(Days+1) ~ C(Duration) + C(Weight, Sum)',
                   data=kt).fit()))
print(anova_lm(ols('np.log(Days+1) ~ C(Weight)', data=kt).fit(),
               ols('np.log(Days+1) ~ C(Duration) + C(Weight, Sum)',
                   data=kt).fit()))
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-39-c7e1132390fe> in <module>()
----> 1 kidney_lm = ols('np.log(Days+1) ~ C(Duration) * C(Weight)', data=kt).fit()
      2 
      3 table10 = anova_lm(kidney_lm)
      4 
      5 print(anova_lm(ols('np.log(Days+1) ~ C(Duration) + C(Weight)',

NameError: name 'kt' is not defined

Sum of squares

Illustrates the use of different types of sums of squares (I,II,II) and how the Sum contrast can be used to produce the same output between the 3.

Types I and II are equivalent under a balanced design.

Don't use Type III with non-orthogonal contrast - ie., Treatment

In [40]:
sum_lm = ols('np.log(Days+1) ~ C(Duration, Sum) * C(Weight, Sum)',
            data=kt).fit()

print(anova_lm(sum_lm))
print(anova_lm(sum_lm, typ=2))
print(anova_lm(sum_lm, typ=3))
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-40-e0c1ed608c29> in <module>()
      1 sum_lm = ols('np.log(Days+1) ~ C(Duration, Sum) * C(Weight, Sum)',
----> 2             data=kt).fit()
      3 
      4 print(anova_lm(sum_lm))
      5 print(anova_lm(sum_lm, typ=2))

NameError: name 'kt' is not defined
In [41]:
nosum_lm = ols('np.log(Days+1) ~ C(Duration, Treatment) * C(Weight, Treatment)',
            data=kt).fit()
print(anova_lm(nosum_lm))
print(anova_lm(nosum_lm, typ=2))
print(anova_lm(nosum_lm, typ=3))
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-41-95381847ac17> in <module>()
      1 nosum_lm = ols('np.log(Days+1) ~ C(Duration, Treatment) * C(Weight, Treatment)',
----> 2             data=kt).fit()
      3 print(anova_lm(nosum_lm))
      4 print(anova_lm(nosum_lm, typ=2))
      5 print(anova_lm(nosum_lm, typ=3))

NameError: name 'kt' is not defined