Interactions and ANOVA

Note: This script is based heavily on Jonathan Taylor's class notes http://www.stanford.edu/class/stats191/interactions.html

Download and format data:

In [1]:
%matplotlib inline

from __future__ import print_function
from statsmodels.compat import urlopen
import numpy as np
np.set_printoptions(precision=4, suppress=True)
import statsmodels.api as sm
import pandas as pd
pd.set_option("display.width", 100)
import matplotlib.pyplot as plt
from statsmodels.formula.api import ols
from statsmodels.graphics.api import interaction_plot, abline_plot
from statsmodels.stats.anova import anova_lm

try:
    salary_table = pd.read_csv('salary.table')
except:  # recent pandas can read URL without urlopen
    url = 'http://stats191.stanford.edu/data/salary.table'
    fh = urlopen(url)
    salary_table = pd.read_table(fh)
    salary_table.to_csv('salary.table')

E = salary_table.E
M = salary_table.M
X = salary_table.X
S = salary_table.S
/build/statsmodels-IFPJo1/statsmodels-0.8.0/.pybuild/cpython3_3.7_statsmodels/build/statsmodels/compat/pandas.py:56: FutureWarning: The pandas.core.datetools module is deprecated and will be removed in a future version. Please use the pandas.tseries module instead.
  from pandas.core import datetools
---------------------------------------------------------------------------
FileNotFoundError                         Traceback (most recent call last)
<ipython-input-1-699e2abfd95f> in <module>()
     15 try:
---> 16     salary_table = pd.read_csv('salary.table')
     17 except:  # recent pandas can read URL without urlopen

/usr/lib/python3/dist-packages/pandas/io/parsers.py in parser_f(filepath_or_buffer, sep, delimiter, header, names, index_col, usecols, squeeze, prefix, mangle_dupe_cols, dtype, engine, converters, true_values, false_values, skipinitialspace, skiprows, nrows, na_values, keep_default_na, na_filter, verbose, skip_blank_lines, parse_dates, infer_datetime_format, keep_date_col, date_parser, dayfirst, iterator, chunksize, compression, thousands, decimal, lineterminator, quotechar, quoting, escapechar, comment, encoding, dialect, tupleize_cols, error_bad_lines, warn_bad_lines, skipfooter, doublequote, delim_whitespace, low_memory, memory_map, float_precision)
    677 
--> 678         return _read(filepath_or_buffer, kwds)
    679 

/usr/lib/python3/dist-packages/pandas/io/parsers.py in _read(filepath_or_buffer, kwds)
    439     # Create the parser.
--> 440     parser = TextFileReader(filepath_or_buffer, **kwds)
    441 

/usr/lib/python3/dist-packages/pandas/io/parsers.py in __init__(self, f, engine, **kwds)
    786 
--> 787         self._make_engine(self.engine)
    788 

/usr/lib/python3/dist-packages/pandas/io/parsers.py in _make_engine(self, engine)
   1013         if engine == 'c':
-> 1014             self._engine = CParserWrapper(self.f, **self.options)
   1015         else:

/usr/lib/python3/dist-packages/pandas/io/parsers.py in __init__(self, src, **kwds)
   1707 
-> 1708         self._reader = parsers.TextReader(src, **kwds)
   1709 

pandas/_libs/parsers.pyx in pandas._libs.parsers.TextReader.__cinit__()

pandas/_libs/parsers.pyx in pandas._libs.parsers.TextReader._setup_parser_source()

FileNotFoundError: File b'salary.table' does not exist

During handling of the above exception, another exception occurred:

ConnectionRefusedError                    Traceback (most recent call last)
/usr/lib/python3.7/urllib/request.py in do_open(self, http_class, req, **http_conn_args)
   1316                 h.request(req.get_method(), req.selector, req.data, headers,
-> 1317                           encode_chunked=req.has_header('Transfer-encoding'))
   1318             except OSError as err: # timeout error

/usr/lib/python3.7/http/client.py in request(self, method, url, body, headers, encode_chunked)
   1228         """Send a complete request to the server."""
-> 1229         self._send_request(method, url, body, headers, encode_chunked)
   1230 

/usr/lib/python3.7/http/client.py in _send_request(self, method, url, body, headers, encode_chunked)
   1274             body = _encode(body, 'body')
-> 1275         self.endheaders(body, encode_chunked=encode_chunked)
   1276 

/usr/lib/python3.7/http/client.py in endheaders(self, message_body, encode_chunked)
   1223             raise CannotSendHeader()
-> 1224         self._send_output(message_body, encode_chunked=encode_chunked)
   1225 

/usr/lib/python3.7/http/client.py in _send_output(self, message_body, encode_chunked)
   1015         del self._buffer[:]
-> 1016         self.send(msg)
   1017 

/usr/lib/python3.7/http/client.py in send(self, data)
    955             if self.auto_open:
--> 956                 self.connect()
    957             else:

/usr/lib/python3.7/http/client.py in connect(self)
    927         self.sock = self._create_connection(
--> 928             (self.host,self.port), self.timeout, self.source_address)
    929         self.sock.setsockopt(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1)

/usr/lib/python3.7/socket.py in create_connection(address, timeout, source_address)
    726     if err is not None:
--> 727         raise err
    728     else:

/usr/lib/python3.7/socket.py in create_connection(address, timeout, source_address)
    715                 sock.bind(source_address)
--> 716             sock.connect(sa)
    717             # Break explicitly a reference cycle

ConnectionRefusedError: [Errno 111] Connection refused

During handling of the above exception, another exception occurred:

URLError                                  Traceback (most recent call last)
<ipython-input-1-699e2abfd95f> in <module>()
     17 except:  # recent pandas can read URL without urlopen
     18     url = 'http://stats191.stanford.edu/data/salary.table'
---> 19     fh = urlopen(url)
     20     salary_table = pd.read_table(fh)
     21     salary_table.to_csv('salary.table')

/usr/lib/python3.7/urllib/request.py in urlopen(url, data, timeout, cafile, capath, cadefault, context)
    220     else:
    221         opener = _opener
--> 222     return opener.open(url, data, timeout)
    223 
    224 def install_opener(opener):

/usr/lib/python3.7/urllib/request.py in open(self, fullurl, data, timeout)
    523             req = meth(req)
    524 
--> 525         response = self._open(req, data)
    526 
    527         # post-process response

/usr/lib/python3.7/urllib/request.py in _open(self, req, data)
    541         protocol = req.type
    542         result = self._call_chain(self.handle_open, protocol, protocol +
--> 543                                   '_open', req)
    544         if result:
    545             return result

/usr/lib/python3.7/urllib/request.py in _call_chain(self, chain, kind, meth_name, *args)
    501         for handler in handlers:
    502             func = getattr(handler, meth_name)
--> 503             result = func(*args)
    504             if result is not None:
    505                 return result

/usr/lib/python3.7/urllib/request.py in http_open(self, req)
   1343 
   1344     def http_open(self, req):
-> 1345         return self.do_open(http.client.HTTPConnection, req)
   1346 
   1347     http_request = AbstractHTTPHandler.do_request_

/usr/lib/python3.7/urllib/request.py in do_open(self, http_class, req, **http_conn_args)
   1317                           encode_chunked=req.has_header('Transfer-encoding'))
   1318             except OSError as err: # timeout error
-> 1319                 raise URLError(err)
   1320             r = h.getresponse()
   1321         except:

URLError: <urlopen error [Errno 111] Connection refused>

Take a look at the data:

In [2]:
plt.figure(figsize=(6,6))
symbols = ['D', '^']
colors = ['r', 'g', 'blue']
factor_groups = salary_table.groupby(['E','M'])
for values, group in factor_groups:
    i,j = values
    plt.scatter(group['X'], group['S'], marker=symbols[j], color=colors[i-1],
               s=144)
plt.xlabel('Experience');
plt.ylabel('Salary');
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-2-06b5e3c35d99> in <module>()
      2 symbols = ['D', '^']
      3 colors = ['r', 'g', 'blue']
----> 4 factor_groups = salary_table.groupby(['E','M'])
      5 for values, group in factor_groups:
      6     i,j = values

NameError: name 'salary_table' is not defined
<Figure size 432x432 with 0 Axes>

Fit a linear model:

In [3]:
formula = 'S ~ C(E) + C(M) + X'
lm = ols(formula, salary_table).fit()
print(lm.summary())
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-3-bd1483e9558c> in <module>()
      1 formula = 'S ~ C(E) + C(M) + X'
----> 2 lm = ols(formula, salary_table).fit()
      3 print(lm.summary())

NameError: name 'salary_table' is not defined

Have a look at the created design matrix:

In [4]:
lm.model.exog[:5]
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-4-15c89faef8a8> in <module>()
----> 1 lm.model.exog[:5]

NameError: name 'lm' is not defined

Or since we initially passed in a DataFrame, we have a DataFrame available in

In [5]:
lm.model.data.orig_exog[:5]
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-5-136b4afd409d> in <module>()
----> 1 lm.model.data.orig_exog[:5]

NameError: name 'lm' is not defined

We keep a reference to the original untouched data in

In [6]:
lm.model.data.frame[:5]
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-6-45902db7bdbe> in <module>()
----> 1 lm.model.data.frame[:5]

NameError: name 'lm' is not defined

Influence statistics

In [7]:
infl = lm.get_influence()
print(infl.summary_table())
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-7-cf91966f823d> in <module>()
----> 1 infl = lm.get_influence()
      2 print(infl.summary_table())

NameError: name 'lm' is not defined

or get a dataframe

In [8]:
df_infl = infl.summary_frame()
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-8-4a89b3b617d2> in <module>()
----> 1 df_infl = infl.summary_frame()

NameError: name 'infl' is not defined
In [9]:
df_infl[:5]
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-9-c7ae236e6fec> in <module>()
----> 1 df_infl[:5]

NameError: name 'df_infl' is not defined

Now plot the reiduals within the groups separately:

In [10]:
resid = lm.resid
plt.figure(figsize=(6,6));
for values, group in factor_groups:
    i,j = values
    group_num = i*2 + j - 1  # for plotting purposes
    x = [group_num] * len(group)
    plt.scatter(x, resid[group.index], marker=symbols[j], color=colors[i-1],
            s=144, edgecolors='black')
plt.xlabel('Group');
plt.ylabel('Residuals');
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-10-b1fbd2073437> in <module>()
----> 1 resid = lm.resid
      2 plt.figure(figsize=(6,6));
      3 for values, group in factor_groups:
      4     i,j = values
      5     group_num = i*2 + j - 1  # for plotting purposes

NameError: name 'lm' is not defined

Now we will test some interactions using anova or f_test

In [11]:
interX_lm = ols("S ~ C(E) * X + C(M)", salary_table).fit()
print(interX_lm.summary())
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-11-63ad3de263bb> in <module>()
----> 1 interX_lm = ols("S ~ C(E) * X + C(M)", salary_table).fit()
      2 print(interX_lm.summary())

NameError: name 'salary_table' is not defined

Do an ANOVA check

In [12]:
from statsmodels.stats.api import anova_lm

table1 = anova_lm(lm, interX_lm)
print(table1)

interM_lm = ols("S ~ X + C(E)*C(M)", data=salary_table).fit()
print(interM_lm.summary())

table2 = anova_lm(lm, interM_lm)
print(table2)
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-12-99371545e58d> in <module>()
      1 from statsmodels.stats.api import anova_lm
      2 
----> 3 table1 = anova_lm(lm, interX_lm)
      4 print(table1)
      5 

NameError: name 'lm' is not defined

The design matrix as a DataFrame

In [13]:
interM_lm.model.data.orig_exog[:5]
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-13-1dd5303dacd2> in <module>()
----> 1 interM_lm.model.data.orig_exog[:5]

NameError: name 'interM_lm' is not defined

The design matrix as an ndarray

In [14]:
interM_lm.model.exog
interM_lm.model.exog_names
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-14-127a7082e299> in <module>()
----> 1 interM_lm.model.exog
      2 interM_lm.model.exog_names

NameError: name 'interM_lm' is not defined
In [15]:
infl = interM_lm.get_influence()
resid = infl.resid_studentized_internal
plt.figure(figsize=(6,6))
for values, group in factor_groups:
    i,j = values
    idx = group.index
    plt.scatter(X[idx], resid[idx], marker=symbols[j], color=colors[i-1],
            s=144, edgecolors='black')
plt.xlabel('X');
plt.ylabel('standardized resids');
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-15-4a4b0ea23d42> in <module>()
----> 1 infl = interM_lm.get_influence()
      2 resid = infl.resid_studentized_internal
      3 plt.figure(figsize=(6,6))
      4 for values, group in factor_groups:
      5     i,j = values

NameError: name 'interM_lm' is not defined

Looks like one observation is an outlier.

In [16]:
drop_idx = abs(resid).argmax()
print(drop_idx)  # zero-based index
idx = salary_table.index.drop(drop_idx)

lm32 = ols('S ~ C(E) + X + C(M)', data=salary_table, subset=idx).fit()

print(lm32.summary())
print('\n')

interX_lm32 = ols('S ~ C(E) * X + C(M)', data=salary_table, subset=idx).fit()

print(interX_lm32.summary())
print('\n')


table3 = anova_lm(lm32, interX_lm32)
print(table3)
print('\n')


interM_lm32 = ols('S ~ X + C(E) * C(M)', data=salary_table, subset=idx).fit()

table4 = anova_lm(lm32, interM_lm32)
print(table4)
print('\n')
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-16-b012081228e7> in <module>()
----> 1 drop_idx = abs(resid).argmax()
      2 print(drop_idx)  # zero-based index
      3 idx = salary_table.index.drop(drop_idx)
      4 
      5 lm32 = ols('S ~ C(E) + X + C(M)', data=salary_table, subset=idx).fit()

NameError: name 'resid' is not defined

Replot the residuals

In [17]:
try:
    resid = interM_lm32.get_influence().summary_frame()['standard_resid']
except:
    resid = interM_lm32.get_influence().summary_frame()['standard_resid']

plt.figure(figsize=(6,6))
for values, group in factor_groups:
    i,j = values
    idx = group.index
    plt.scatter(X[idx], resid[idx], marker=symbols[j], color=colors[i-1],
            s=144, edgecolors='black')
plt.xlabel('X[~[32]]');
plt.ylabel('standardized resids');
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-17-3f5881af6ee4> in <module>()
      1 try:
----> 2     resid = interM_lm32.get_influence().summary_frame()['standard_resid']
      3 except:

NameError: name 'interM_lm32' is not defined

During handling of the above exception, another exception occurred:

NameError                                 Traceback (most recent call last)
<ipython-input-17-3f5881af6ee4> in <module>()
      2     resid = interM_lm32.get_influence().summary_frame()['standard_resid']
      3 except:
----> 4     resid = interM_lm32.get_influence().summary_frame()['standard_resid']
      5 
      6 plt.figure(figsize=(6,6))

NameError: name 'interM_lm32' is not defined

Plot the fitted values

In [18]:
lm_final = ols('S ~ X + C(E)*C(M)', data = salary_table.drop([drop_idx])).fit()
mf = lm_final.model.data.orig_exog
lstyle = ['-','--']

plt.figure(figsize=(6,6))
for values, group in factor_groups:
    i,j = values
    idx = group.index
    plt.scatter(X[idx], S[idx], marker=symbols[j], color=colors[i-1],
                s=144, edgecolors='black')
    # drop NA because there is no idx 32 in the final model
    plt.plot(mf.X[idx].dropna(), lm_final.fittedvalues[idx].dropna(),
            ls=lstyle[j], color=colors[i-1])
plt.xlabel('Experience');
plt.ylabel('Salary');
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-18-799bf5928998> in <module>()
----> 1 lm_final = ols('S ~ X + C(E)*C(M)', data = salary_table.drop([drop_idx])).fit()
      2 mf = lm_final.model.data.orig_exog
      3 lstyle = ['-','--']
      4 
      5 plt.figure(figsize=(6,6))

NameError: name 'salary_table' is not defined

From our first look at the data, the difference between Master's and PhD in the management group is different than in the non-management group. This is an interaction between the two qualitative variables management,M and education,E. We can visualize this by first removing the effect of experience, then plotting the means within each of the 6 groups using interaction.plot.

In [19]:
U = S - X * interX_lm32.params['X']

plt.figure(figsize=(6,6))
interaction_plot(E, M, U, colors=['red','blue'], markers=['^','D'],
        markersize=10, ax=plt.gca())
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-19-078f1a51de12> in <module>()
----> 1 U = S - X * interX_lm32.params['X']
      2 
      3 plt.figure(figsize=(6,6))
      4 interaction_plot(E, M, U, colors=['red','blue'], markers=['^','D'],
      5         markersize=10, ax=plt.gca())

NameError: name 'S' is not defined

Minority Employment Data

In [20]:
try:
    jobtest_table = pd.read_table('jobtest.table')
except:  # don't have data already
    url = 'http://stats191.stanford.edu/data/jobtest.table'
    jobtest_table = pd.read_table(url)

factor_group = jobtest_table.groupby(['ETHN'])

fig, ax = plt.subplots(figsize=(6,6))
colors = ['purple', 'green']
markers = ['o', 'v']
for factor, group in factor_group:
    ax.scatter(group['TEST'], group['JPERF'], color=colors[factor],
                marker=markers[factor], s=12**2)
ax.set_xlabel('TEST');
ax.set_ylabel('JPERF');
---------------------------------------------------------------------------
FileNotFoundError                         Traceback (most recent call last)
<ipython-input-20-6329e9f7fab4> in <module>()
      1 try:
----> 2     jobtest_table = pd.read_table('jobtest.table')
      3 except:  # don't have data already

/usr/lib/python3/dist-packages/pandas/io/parsers.py in parser_f(filepath_or_buffer, sep, delimiter, header, names, index_col, usecols, squeeze, prefix, mangle_dupe_cols, dtype, engine, converters, true_values, false_values, skipinitialspace, skiprows, nrows, na_values, keep_default_na, na_filter, verbose, skip_blank_lines, parse_dates, infer_datetime_format, keep_date_col, date_parser, dayfirst, iterator, chunksize, compression, thousands, decimal, lineterminator, quotechar, quoting, escapechar, comment, encoding, dialect, tupleize_cols, error_bad_lines, warn_bad_lines, skipfooter, doublequote, delim_whitespace, low_memory, memory_map, float_precision)
    677 
--> 678         return _read(filepath_or_buffer, kwds)
    679 

/usr/lib/python3/dist-packages/pandas/io/parsers.py in _read(filepath_or_buffer, kwds)
    439     # Create the parser.
--> 440     parser = TextFileReader(filepath_or_buffer, **kwds)
    441 

/usr/lib/python3/dist-packages/pandas/io/parsers.py in __init__(self, f, engine, **kwds)
    786 
--> 787         self._make_engine(self.engine)
    788 

/usr/lib/python3/dist-packages/pandas/io/parsers.py in _make_engine(self, engine)
   1013         if engine == 'c':
-> 1014             self._engine = CParserWrapper(self.f, **self.options)
   1015         else:

/usr/lib/python3/dist-packages/pandas/io/parsers.py in __init__(self, src, **kwds)
   1707 
-> 1708         self._reader = parsers.TextReader(src, **kwds)
   1709 

pandas/_libs/parsers.pyx in pandas._libs.parsers.TextReader.__cinit__()

pandas/_libs/parsers.pyx in pandas._libs.parsers.TextReader._setup_parser_source()

FileNotFoundError: File b'jobtest.table' does not exist

During handling of the above exception, another exception occurred:

ConnectionRefusedError                    Traceback (most recent call last)
/usr/lib/python3.7/urllib/request.py in do_open(self, http_class, req, **http_conn_args)
   1316                 h.request(req.get_method(), req.selector, req.data, headers,
-> 1317                           encode_chunked=req.has_header('Transfer-encoding'))
   1318             except OSError as err: # timeout error

/usr/lib/python3.7/http/client.py in request(self, method, url, body, headers, encode_chunked)
   1228         """Send a complete request to the server."""
-> 1229         self._send_request(method, url, body, headers, encode_chunked)
   1230 

/usr/lib/python3.7/http/client.py in _send_request(self, method, url, body, headers, encode_chunked)
   1274             body = _encode(body, 'body')
-> 1275         self.endheaders(body, encode_chunked=encode_chunked)
   1276 

/usr/lib/python3.7/http/client.py in endheaders(self, message_body, encode_chunked)
   1223             raise CannotSendHeader()
-> 1224         self._send_output(message_body, encode_chunked=encode_chunked)
   1225 

/usr/lib/python3.7/http/client.py in _send_output(self, message_body, encode_chunked)
   1015         del self._buffer[:]
-> 1016         self.send(msg)
   1017 

/usr/lib/python3.7/http/client.py in send(self, data)
    955             if self.auto_open:
--> 956                 self.connect()
    957             else:

/usr/lib/python3.7/http/client.py in connect(self)
    927         self.sock = self._create_connection(
--> 928             (self.host,self.port), self.timeout, self.source_address)
    929         self.sock.setsockopt(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1)

/usr/lib/python3.7/socket.py in create_connection(address, timeout, source_address)
    726     if err is not None:
--> 727         raise err
    728     else:

/usr/lib/python3.7/socket.py in create_connection(address, timeout, source_address)
    715                 sock.bind(source_address)
--> 716             sock.connect(sa)
    717             # Break explicitly a reference cycle

ConnectionRefusedError: [Errno 111] Connection refused

During handling of the above exception, another exception occurred:

URLError                                  Traceback (most recent call last)
<ipython-input-20-6329e9f7fab4> in <module>()
      3 except:  # don't have data already
      4     url = 'http://stats191.stanford.edu/data/jobtest.table'
----> 5     jobtest_table = pd.read_table(url)
      6 
      7 factor_group = jobtest_table.groupby(['ETHN'])

/usr/lib/python3/dist-packages/pandas/io/parsers.py in parser_f(filepath_or_buffer, sep, delimiter, header, names, index_col, usecols, squeeze, prefix, mangle_dupe_cols, dtype, engine, converters, true_values, false_values, skipinitialspace, skiprows, nrows, na_values, keep_default_na, na_filter, verbose, skip_blank_lines, parse_dates, infer_datetime_format, keep_date_col, date_parser, dayfirst, iterator, chunksize, compression, thousands, decimal, lineterminator, quotechar, quoting, escapechar, comment, encoding, dialect, tupleize_cols, error_bad_lines, warn_bad_lines, skipfooter, doublequote, delim_whitespace, low_memory, memory_map, float_precision)
    676                     skip_blank_lines=skip_blank_lines)
    677 
--> 678         return _read(filepath_or_buffer, kwds)
    679 
    680     parser_f.__name__ = name

/usr/lib/python3/dist-packages/pandas/io/parsers.py in _read(filepath_or_buffer, kwds)
    422     compression = _infer_compression(filepath_or_buffer, compression)
    423     filepath_or_buffer, _, compression, should_close = get_filepath_or_buffer(
--> 424         filepath_or_buffer, encoding, compression)
    425     kwds['compression'] = compression
    426 

/usr/lib/python3/dist-packages/pandas/io/common.py in get_filepath_or_buffer(filepath_or_buffer, encoding, compression, mode)
    193 
    194     if _is_url(filepath_or_buffer):
--> 195         req = _urlopen(filepath_or_buffer)
    196         content_encoding = req.headers.get('Content-Encoding', None)
    197         if content_encoding == 'gzip':

/usr/lib/python3.7/urllib/request.py in urlopen(url, data, timeout, cafile, capath, cadefault, context)
    220     else:
    221         opener = _opener
--> 222     return opener.open(url, data, timeout)
    223 
    224 def install_opener(opener):

/usr/lib/python3.7/urllib/request.py in open(self, fullurl, data, timeout)
    523             req = meth(req)
    524 
--> 525         response = self._open(req, data)
    526 
    527         # post-process response

/usr/lib/python3.7/urllib/request.py in _open(self, req, data)
    541         protocol = req.type
    542         result = self._call_chain(self.handle_open, protocol, protocol +
--> 543                                   '_open', req)
    544         if result:
    545             return result

/usr/lib/python3.7/urllib/request.py in _call_chain(self, chain, kind, meth_name, *args)
    501         for handler in handlers:
    502             func = getattr(handler, meth_name)
--> 503             result = func(*args)
    504             if result is not None:
    505                 return result

/usr/lib/python3.7/urllib/request.py in http_open(self, req)
   1343 
   1344     def http_open(self, req):
-> 1345         return self.do_open(http.client.HTTPConnection, req)
   1346 
   1347     http_request = AbstractHTTPHandler.do_request_

/usr/lib/python3.7/urllib/request.py in do_open(self, http_class, req, **http_conn_args)
   1317                           encode_chunked=req.has_header('Transfer-encoding'))
   1318             except OSError as err: # timeout error
-> 1319                 raise URLError(err)
   1320             r = h.getresponse()
   1321         except:

URLError: <urlopen error [Errno 111] Connection refused>
In [21]:
min_lm = ols('JPERF ~ TEST', data=jobtest_table).fit()
print(min_lm.summary())
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-21-2dda11467a64> in <module>()
----> 1 min_lm = ols('JPERF ~ TEST', data=jobtest_table).fit()
      2 print(min_lm.summary())

NameError: name 'jobtest_table' is not defined
In [22]:
fig, ax = plt.subplots(figsize=(6,6));
for factor, group in factor_group:
    ax.scatter(group['TEST'], group['JPERF'], color=colors[factor],
                marker=markers[factor], s=12**2)

ax.set_xlabel('TEST')
ax.set_ylabel('JPERF')
fig = abline_plot(model_results = min_lm, ax=ax)
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-22-3bca97a45134> in <module>()
      1 fig, ax = plt.subplots(figsize=(6,6));
----> 2 for factor, group in factor_group:
      3     ax.scatter(group['TEST'], group['JPERF'], color=colors[factor],
      4                 marker=markers[factor], s=12**2)
      5 

NameError: name 'factor_group' is not defined
In [23]:
min_lm2 = ols('JPERF ~ TEST + TEST:ETHN',
        data=jobtest_table).fit()

print(min_lm2.summary())
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-23-ad7cfc5a299c> in <module>()
      1 min_lm2 = ols('JPERF ~ TEST + TEST:ETHN',
----> 2         data=jobtest_table).fit()
      3 
      4 print(min_lm2.summary())

NameError: name 'jobtest_table' is not defined
In [24]:
fig, ax = plt.subplots(figsize=(6,6));
for factor, group in factor_group:
    ax.scatter(group['TEST'], group['JPERF'], color=colors[factor],
                marker=markers[factor], s=12**2)

fig = abline_plot(intercept = min_lm2.params['Intercept'],
                 slope = min_lm2.params['TEST'], ax=ax, color='purple');
fig = abline_plot(intercept = min_lm2.params['Intercept'],
        slope = min_lm2.params['TEST'] + min_lm2.params['TEST:ETHN'],
        ax=ax, color='green');
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-24-f7126666bc31> in <module>()
      1 fig, ax = plt.subplots(figsize=(6,6));
----> 2 for factor, group in factor_group:
      3     ax.scatter(group['TEST'], group['JPERF'], color=colors[factor],
      4                 marker=markers[factor], s=12**2)
      5 

NameError: name 'factor_group' is not defined
In [25]:
min_lm3 = ols('JPERF ~ TEST + ETHN', data = jobtest_table).fit()
print(min_lm3.summary())
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-25-26270b8e0d6d> in <module>()
----> 1 min_lm3 = ols('JPERF ~ TEST + ETHN', data = jobtest_table).fit()
      2 print(min_lm3.summary())

NameError: name 'jobtest_table' is not defined
In [26]:
fig, ax = plt.subplots(figsize=(6,6));
for factor, group in factor_group:
    ax.scatter(group['TEST'], group['JPERF'], color=colors[factor],
                marker=markers[factor], s=12**2)

fig = abline_plot(intercept = min_lm3.params['Intercept'],
                 slope = min_lm3.params['TEST'], ax=ax, color='purple');
fig = abline_plot(intercept = min_lm3.params['Intercept'] + min_lm3.params['ETHN'],
        slope = min_lm3.params['TEST'], ax=ax, color='green');
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-26-5bc7e2ba2d68> in <module>()
      1 fig, ax = plt.subplots(figsize=(6,6));
----> 2 for factor, group in factor_group:
      3     ax.scatter(group['TEST'], group['JPERF'], color=colors[factor],
      4                 marker=markers[factor], s=12**2)
      5 

NameError: name 'factor_group' is not defined
In [27]:
min_lm4 = ols('JPERF ~ TEST * ETHN', data = jobtest_table).fit()
print(min_lm4.summary())
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-27-0cc0b240e06d> in <module>()
----> 1 min_lm4 = ols('JPERF ~ TEST * ETHN', data = jobtest_table).fit()
      2 print(min_lm4.summary())

NameError: name 'jobtest_table' is not defined
In [28]:
fig, ax = plt.subplots(figsize=(8,6));
for factor, group in factor_group:
    ax.scatter(group['TEST'], group['JPERF'], color=colors[factor],
                marker=markers[factor], s=12**2)

fig = abline_plot(intercept = min_lm4.params['Intercept'],
                 slope = min_lm4.params['TEST'], ax=ax, color='purple');
fig = abline_plot(intercept = min_lm4.params['Intercept'] + min_lm4.params['ETHN'],
        slope = min_lm4.params['TEST'] + min_lm4.params['TEST:ETHN'],
        ax=ax, color='green');
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-28-10026649b950> in <module>()
      1 fig, ax = plt.subplots(figsize=(8,6));
----> 2 for factor, group in factor_group:
      3     ax.scatter(group['TEST'], group['JPERF'], color=colors[factor],
      4                 marker=markers[factor], s=12**2)
      5 

NameError: name 'factor_group' is not defined
In [29]:
# is there any effect of ETHN on slope or intercept?
table5 = anova_lm(min_lm, min_lm4)
print(table5)
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-29-647f7d87fe70> in <module>()
      1 # is there any effect of ETHN on slope or intercept?
----> 2 table5 = anova_lm(min_lm, min_lm4)
      3 print(table5)

NameError: name 'min_lm' is not defined
In [30]:
# is there any effect of ETHN on intercept
table6 = anova_lm(min_lm, min_lm3)
print(table6)
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-30-39645dd00862> in <module>()
      1 # is there any effect of ETHN on intercept
----> 2 table6 = anova_lm(min_lm, min_lm3)
      3 print(table6)

NameError: name 'min_lm' is not defined
In [31]:
# is there any effect of ETHN on slope
table7 = anova_lm(min_lm, min_lm2)
print(table7)
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-31-e0085d24d296> in <module>()
      1 # is there any effect of ETHN on slope
----> 2 table7 = anova_lm(min_lm, min_lm2)
      3 print(table7)

NameError: name 'min_lm' is not defined
In [32]:
# is it just the slope or both?
table8 = anova_lm(min_lm2, min_lm4)
print(table8)
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-32-a9a6172af9d0> in <module>()
      1 # is it just the slope or both?
----> 2 table8 = anova_lm(min_lm2, min_lm4)
      3 print(table8)

NameError: name 'min_lm2' is not defined

One-way ANOVA

In [33]:
try:
    rehab_table = pd.read_csv('rehab.table')
except:
    url = 'http://stats191.stanford.edu/data/rehab.csv'
    rehab_table = pd.read_table(url, delimiter=",")
    rehab_table.to_csv('rehab.table')

fig, ax = plt.subplots(figsize=(8,6))
fig = rehab_table.boxplot('Time', 'Fitness', ax=ax, grid=False)
---------------------------------------------------------------------------
FileNotFoundError                         Traceback (most recent call last)
<ipython-input-33-d3ea2b9d3e10> in <module>()
      1 try:
----> 2     rehab_table = pd.read_csv('rehab.table')
      3 except:

/usr/lib/python3/dist-packages/pandas/io/parsers.py in parser_f(filepath_or_buffer, sep, delimiter, header, names, index_col, usecols, squeeze, prefix, mangle_dupe_cols, dtype, engine, converters, true_values, false_values, skipinitialspace, skiprows, nrows, na_values, keep_default_na, na_filter, verbose, skip_blank_lines, parse_dates, infer_datetime_format, keep_date_col, date_parser, dayfirst, iterator, chunksize, compression, thousands, decimal, lineterminator, quotechar, quoting, escapechar, comment, encoding, dialect, tupleize_cols, error_bad_lines, warn_bad_lines, skipfooter, doublequote, delim_whitespace, low_memory, memory_map, float_precision)
    677 
--> 678         return _read(filepath_or_buffer, kwds)
    679 

/usr/lib/python3/dist-packages/pandas/io/parsers.py in _read(filepath_or_buffer, kwds)
    439     # Create the parser.
--> 440     parser = TextFileReader(filepath_or_buffer, **kwds)
    441 

/usr/lib/python3/dist-packages/pandas/io/parsers.py in __init__(self, f, engine, **kwds)
    786 
--> 787         self._make_engine(self.engine)
    788 

/usr/lib/python3/dist-packages/pandas/io/parsers.py in _make_engine(self, engine)
   1013         if engine == 'c':
-> 1014             self._engine = CParserWrapper(self.f, **self.options)
   1015         else:

/usr/lib/python3/dist-packages/pandas/io/parsers.py in __init__(self, src, **kwds)
   1707 
-> 1708         self._reader = parsers.TextReader(src, **kwds)
   1709 

pandas/_libs/parsers.pyx in pandas._libs.parsers.TextReader.__cinit__()

pandas/_libs/parsers.pyx in pandas._libs.parsers.TextReader._setup_parser_source()

FileNotFoundError: File b'rehab.table' does not exist

During handling of the above exception, another exception occurred:

ConnectionRefusedError                    Traceback (most recent call last)
/usr/lib/python3.7/urllib/request.py in do_open(self, http_class, req, **http_conn_args)
   1316                 h.request(req.get_method(), req.selector, req.data, headers,
-> 1317                           encode_chunked=req.has_header('Transfer-encoding'))
   1318             except OSError as err: # timeout error

/usr/lib/python3.7/http/client.py in request(self, method, url, body, headers, encode_chunked)
   1228         """Send a complete request to the server."""
-> 1229         self._send_request(method, url, body, headers, encode_chunked)
   1230 

/usr/lib/python3.7/http/client.py in _send_request(self, method, url, body, headers, encode_chunked)
   1274             body = _encode(body, 'body')
-> 1275         self.endheaders(body, encode_chunked=encode_chunked)
   1276 

/usr/lib/python3.7/http/client.py in endheaders(self, message_body, encode_chunked)
   1223             raise CannotSendHeader()
-> 1224         self._send_output(message_body, encode_chunked=encode_chunked)
   1225 

/usr/lib/python3.7/http/client.py in _send_output(self, message_body, encode_chunked)
   1015         del self._buffer[:]
-> 1016         self.send(msg)
   1017 

/usr/lib/python3.7/http/client.py in send(self, data)
    955             if self.auto_open:
--> 956                 self.connect()
    957             else:

/usr/lib/python3.7/http/client.py in connect(self)
    927         self.sock = self._create_connection(
--> 928             (self.host,self.port), self.timeout, self.source_address)
    929         self.sock.setsockopt(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1)

/usr/lib/python3.7/socket.py in create_connection(address, timeout, source_address)
    726     if err is not None:
--> 727         raise err
    728     else:

/usr/lib/python3.7/socket.py in create_connection(address, timeout, source_address)
    715                 sock.bind(source_address)
--> 716             sock.connect(sa)
    717             # Break explicitly a reference cycle

ConnectionRefusedError: [Errno 111] Connection refused

During handling of the above exception, another exception occurred:

URLError                                  Traceback (most recent call last)
<ipython-input-33-d3ea2b9d3e10> in <module>()
      3 except:
      4     url = 'http://stats191.stanford.edu/data/rehab.csv'
----> 5     rehab_table = pd.read_table(url, delimiter=",")
      6     rehab_table.to_csv('rehab.table')
      7 

/usr/lib/python3/dist-packages/pandas/io/parsers.py in parser_f(filepath_or_buffer, sep, delimiter, header, names, index_col, usecols, squeeze, prefix, mangle_dupe_cols, dtype, engine, converters, true_values, false_values, skipinitialspace, skiprows, nrows, na_values, keep_default_na, na_filter, verbose, skip_blank_lines, parse_dates, infer_datetime_format, keep_date_col, date_parser, dayfirst, iterator, chunksize, compression, thousands, decimal, lineterminator, quotechar, quoting, escapechar, comment, encoding, dialect, tupleize_cols, error_bad_lines, warn_bad_lines, skipfooter, doublequote, delim_whitespace, low_memory, memory_map, float_precision)
    676                     skip_blank_lines=skip_blank_lines)
    677 
--> 678         return _read(filepath_or_buffer, kwds)
    679 
    680     parser_f.__name__ = name

/usr/lib/python3/dist-packages/pandas/io/parsers.py in _read(filepath_or_buffer, kwds)
    422     compression = _infer_compression(filepath_or_buffer, compression)
    423     filepath_or_buffer, _, compression, should_close = get_filepath_or_buffer(
--> 424         filepath_or_buffer, encoding, compression)
    425     kwds['compression'] = compression
    426 

/usr/lib/python3/dist-packages/pandas/io/common.py in get_filepath_or_buffer(filepath_or_buffer, encoding, compression, mode)
    193 
    194     if _is_url(filepath_or_buffer):
--> 195         req = _urlopen(filepath_or_buffer)
    196         content_encoding = req.headers.get('Content-Encoding', None)
    197         if content_encoding == 'gzip':

/usr/lib/python3.7/urllib/request.py in urlopen(url, data, timeout, cafile, capath, cadefault, context)
    220     else:
    221         opener = _opener
--> 222     return opener.open(url, data, timeout)
    223 
    224 def install_opener(opener):

/usr/lib/python3.7/urllib/request.py in open(self, fullurl, data, timeout)
    523             req = meth(req)
    524 
--> 525         response = self._open(req, data)
    526 
    527         # post-process response

/usr/lib/python3.7/urllib/request.py in _open(self, req, data)
    541         protocol = req.type
    542         result = self._call_chain(self.handle_open, protocol, protocol +
--> 543                                   '_open', req)
    544         if result:
    545             return result

/usr/lib/python3.7/urllib/request.py in _call_chain(self, chain, kind, meth_name, *args)
    501         for handler in handlers:
    502             func = getattr(handler, meth_name)
--> 503             result = func(*args)
    504             if result is not None:
    505                 return result

/usr/lib/python3.7/urllib/request.py in http_open(self, req)
   1343 
   1344     def http_open(self, req):
-> 1345         return self.do_open(http.client.HTTPConnection, req)
   1346 
   1347     http_request = AbstractHTTPHandler.do_request_

/usr/lib/python3.7/urllib/request.py in do_open(self, http_class, req, **http_conn_args)
   1317                           encode_chunked=req.has_header('Transfer-encoding'))
   1318             except OSError as err: # timeout error
-> 1319                 raise URLError(err)
   1320             r = h.getresponse()
   1321         except:

URLError: <urlopen error [Errno 111] Connection refused>
In [34]:
rehab_lm = ols('Time ~ C(Fitness)', data=rehab_table).fit()
table9 = anova_lm(rehab_lm)
print(table9)

print(rehab_lm.model.data.orig_exog)
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-34-d3bb1b06817c> in <module>()
----> 1 rehab_lm = ols('Time ~ C(Fitness)', data=rehab_table).fit()
      2 table9 = anova_lm(rehab_lm)
      3 print(table9)
      4 
      5 print(rehab_lm.model.data.orig_exog)

NameError: name 'rehab_table' is not defined
In [35]:
print(rehab_lm.summary())
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-35-99d31a5bc5c4> in <module>()
----> 1 print(rehab_lm.summary())

NameError: name 'rehab_lm' is not defined

Two-way ANOVA

In [36]:
try:
    kidney_table = pd.read_table('./kidney.table')
except:
    url = 'http://stats191.stanford.edu/data/kidney.table'
    kidney_table = pd.read_csv(url, delim_whitespace=True)
---------------------------------------------------------------------------
FileNotFoundError                         Traceback (most recent call last)
<ipython-input-36-cbc31ddb699c> in <module>()
      1 try:
----> 2     kidney_table = pd.read_table('./kidney.table')
      3 except:

/usr/lib/python3/dist-packages/pandas/io/parsers.py in parser_f(filepath_or_buffer, sep, delimiter, header, names, index_col, usecols, squeeze, prefix, mangle_dupe_cols, dtype, engine, converters, true_values, false_values, skipinitialspace, skiprows, nrows, na_values, keep_default_na, na_filter, verbose, skip_blank_lines, parse_dates, infer_datetime_format, keep_date_col, date_parser, dayfirst, iterator, chunksize, compression, thousands, decimal, lineterminator, quotechar, quoting, escapechar, comment, encoding, dialect, tupleize_cols, error_bad_lines, warn_bad_lines, skipfooter, doublequote, delim_whitespace, low_memory, memory_map, float_precision)
    677 
--> 678         return _read(filepath_or_buffer, kwds)
    679 

/usr/lib/python3/dist-packages/pandas/io/parsers.py in _read(filepath_or_buffer, kwds)
    439     # Create the parser.
--> 440     parser = TextFileReader(filepath_or_buffer, **kwds)
    441 

/usr/lib/python3/dist-packages/pandas/io/parsers.py in __init__(self, f, engine, **kwds)
    786 
--> 787         self._make_engine(self.engine)
    788 

/usr/lib/python3/dist-packages/pandas/io/parsers.py in _make_engine(self, engine)
   1013         if engine == 'c':
-> 1014             self._engine = CParserWrapper(self.f, **self.options)
   1015         else:

/usr/lib/python3/dist-packages/pandas/io/parsers.py in __init__(self, src, **kwds)
   1707 
-> 1708         self._reader = parsers.TextReader(src, **kwds)
   1709 

pandas/_libs/parsers.pyx in pandas._libs.parsers.TextReader.__cinit__()

pandas/_libs/parsers.pyx in pandas._libs.parsers.TextReader._setup_parser_source()

FileNotFoundError: File b'./kidney.table' does not exist

During handling of the above exception, another exception occurred:

ConnectionRefusedError                    Traceback (most recent call last)
/usr/lib/python3.7/urllib/request.py in do_open(self, http_class, req, **http_conn_args)
   1316                 h.request(req.get_method(), req.selector, req.data, headers,
-> 1317                           encode_chunked=req.has_header('Transfer-encoding'))
   1318             except OSError as err: # timeout error

/usr/lib/python3.7/http/client.py in request(self, method, url, body, headers, encode_chunked)
   1228         """Send a complete request to the server."""
-> 1229         self._send_request(method, url, body, headers, encode_chunked)
   1230 

/usr/lib/python3.7/http/client.py in _send_request(self, method, url, body, headers, encode_chunked)
   1274             body = _encode(body, 'body')
-> 1275         self.endheaders(body, encode_chunked=encode_chunked)
   1276 

/usr/lib/python3.7/http/client.py in endheaders(self, message_body, encode_chunked)
   1223             raise CannotSendHeader()
-> 1224         self._send_output(message_body, encode_chunked=encode_chunked)
   1225 

/usr/lib/python3.7/http/client.py in _send_output(self, message_body, encode_chunked)
   1015         del self._buffer[:]
-> 1016         self.send(msg)
   1017 

/usr/lib/python3.7/http/client.py in send(self, data)
    955             if self.auto_open:
--> 956                 self.connect()
    957             else:

/usr/lib/python3.7/http/client.py in connect(self)
    927         self.sock = self._create_connection(
--> 928             (self.host,self.port), self.timeout, self.source_address)
    929         self.sock.setsockopt(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1)

/usr/lib/python3.7/socket.py in create_connection(address, timeout, source_address)
    726     if err is not None:
--> 727         raise err
    728     else:

/usr/lib/python3.7/socket.py in create_connection(address, timeout, source_address)
    715                 sock.bind(source_address)
--> 716             sock.connect(sa)
    717             # Break explicitly a reference cycle

ConnectionRefusedError: [Errno 111] Connection refused

During handling of the above exception, another exception occurred:

URLError                                  Traceback (most recent call last)
<ipython-input-36-cbc31ddb699c> in <module>()
      3 except:
      4     url = 'http://stats191.stanford.edu/data/kidney.table'
----> 5     kidney_table = pd.read_csv(url, delim_whitespace=True)

/usr/lib/python3/dist-packages/pandas/io/parsers.py in parser_f(filepath_or_buffer, sep, delimiter, header, names, index_col, usecols, squeeze, prefix, mangle_dupe_cols, dtype, engine, converters, true_values, false_values, skipinitialspace, skiprows, nrows, na_values, keep_default_na, na_filter, verbose, skip_blank_lines, parse_dates, infer_datetime_format, keep_date_col, date_parser, dayfirst, iterator, chunksize, compression, thousands, decimal, lineterminator, quotechar, quoting, escapechar, comment, encoding, dialect, tupleize_cols, error_bad_lines, warn_bad_lines, skipfooter, doublequote, delim_whitespace, low_memory, memory_map, float_precision)
    676                     skip_blank_lines=skip_blank_lines)
    677 
--> 678         return _read(filepath_or_buffer, kwds)
    679 
    680     parser_f.__name__ = name

/usr/lib/python3/dist-packages/pandas/io/parsers.py in _read(filepath_or_buffer, kwds)
    422     compression = _infer_compression(filepath_or_buffer, compression)
    423     filepath_or_buffer, _, compression, should_close = get_filepath_or_buffer(
--> 424         filepath_or_buffer, encoding, compression)
    425     kwds['compression'] = compression
    426 

/usr/lib/python3/dist-packages/pandas/io/common.py in get_filepath_or_buffer(filepath_or_buffer, encoding, compression, mode)
    193 
    194     if _is_url(filepath_or_buffer):
--> 195         req = _urlopen(filepath_or_buffer)
    196         content_encoding = req.headers.get('Content-Encoding', None)
    197         if content_encoding == 'gzip':

/usr/lib/python3.7/urllib/request.py in urlopen(url, data, timeout, cafile, capath, cadefault, context)
    220     else:
    221         opener = _opener
--> 222     return opener.open(url, data, timeout)
    223 
    224 def install_opener(opener):

/usr/lib/python3.7/urllib/request.py in open(self, fullurl, data, timeout)
    523             req = meth(req)
    524 
--> 525         response = self._open(req, data)
    526 
    527         # post-process response

/usr/lib/python3.7/urllib/request.py in _open(self, req, data)
    541         protocol = req.type
    542         result = self._call_chain(self.handle_open, protocol, protocol +
--> 543                                   '_open', req)
    544         if result:
    545             return result

/usr/lib/python3.7/urllib/request.py in _call_chain(self, chain, kind, meth_name, *args)
    501         for handler in handlers:
    502             func = getattr(handler, meth_name)
--> 503             result = func(*args)
    504             if result is not None:
    505                 return result

/usr/lib/python3.7/urllib/request.py in http_open(self, req)
   1343 
   1344     def http_open(self, req):
-> 1345         return self.do_open(http.client.HTTPConnection, req)
   1346 
   1347     http_request = AbstractHTTPHandler.do_request_

/usr/lib/python3.7/urllib/request.py in do_open(self, http_class, req, **http_conn_args)
   1317                           encode_chunked=req.has_header('Transfer-encoding'))
   1318             except OSError as err: # timeout error
-> 1319                 raise URLError(err)
   1320             r = h.getresponse()
   1321         except:

URLError: <urlopen error [Errno 111] Connection refused>

Explore the dataset

In [37]:
kidney_table.head(10)
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-37-fff8acd40403> in <module>()
----> 1 kidney_table.head(10)

NameError: name 'kidney_table' is not defined

Balanced panel

In [38]:
kt = kidney_table
plt.figure(figsize=(8,6))
fig = interaction_plot(kt['Weight'], kt['Duration'], np.log(kt['Days']+1),
        colors=['red', 'blue'], markers=['D','^'], ms=10, ax=plt.gca())
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-38-9312bae60782> in <module>()
----> 1 kt = kidney_table
      2 plt.figure(figsize=(8,6))
      3 fig = interaction_plot(kt['Weight'], kt['Duration'], np.log(kt['Days']+1),
      4         colors=['red', 'blue'], markers=['D','^'], ms=10, ax=plt.gca())

NameError: name 'kidney_table' is not defined

You have things available in the calling namespace available in the formula evaluation namespace

In [39]:
kidney_lm = ols('np.log(Days+1) ~ C(Duration) * C(Weight)', data=kt).fit()

table10 = anova_lm(kidney_lm)

print(anova_lm(ols('np.log(Days+1) ~ C(Duration) + C(Weight)',
                data=kt).fit(), kidney_lm))
print(anova_lm(ols('np.log(Days+1) ~ C(Duration)', data=kt).fit(),
               ols('np.log(Days+1) ~ C(Duration) + C(Weight, Sum)',
                   data=kt).fit()))
print(anova_lm(ols('np.log(Days+1) ~ C(Weight)', data=kt).fit(),
               ols('np.log(Days+1) ~ C(Duration) + C(Weight, Sum)',
                   data=kt).fit()))
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-39-c7e1132390fe> in <module>()
----> 1 kidney_lm = ols('np.log(Days+1) ~ C(Duration) * C(Weight)', data=kt).fit()
      2 
      3 table10 = anova_lm(kidney_lm)
      4 
      5 print(anova_lm(ols('np.log(Days+1) ~ C(Duration) + C(Weight)',

NameError: name 'kt' is not defined

Sum of squares

Illustrates the use of different types of sums of squares (I,II,II) and how the Sum contrast can be used to produce the same output between the 3.

Types I and II are equivalent under a balanced design.

Don't use Type III with non-orthogonal contrast - ie., Treatment

In [40]:
sum_lm = ols('np.log(Days+1) ~ C(Duration, Sum) * C(Weight, Sum)',
            data=kt).fit()

print(anova_lm(sum_lm))
print(anova_lm(sum_lm, typ=2))
print(anova_lm(sum_lm, typ=3))
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-40-e0c1ed608c29> in <module>()
      1 sum_lm = ols('np.log(Days+1) ~ C(Duration, Sum) * C(Weight, Sum)',
----> 2             data=kt).fit()
      3 
      4 print(anova_lm(sum_lm))
      5 print(anova_lm(sum_lm, typ=2))

NameError: name 'kt' is not defined
In [41]:
nosum_lm = ols('np.log(Days+1) ~ C(Duration, Treatment) * C(Weight, Treatment)',
            data=kt).fit()
print(anova_lm(nosum_lm))
print(anova_lm(nosum_lm, typ=2))
print(anova_lm(nosum_lm, typ=3))
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-41-95381847ac17> in <module>()
      1 nosum_lm = ols('np.log(Days+1) ~ C(Duration, Treatment) * C(Weight, Treatment)',
----> 2             data=kt).fit()
      3 print(anova_lm(nosum_lm))
      4 print(anova_lm(nosum_lm, typ=2))
      5 print(anova_lm(nosum_lm, typ=3))

NameError: name 'kt' is not defined