Interactions and ANOVA

Note: This script is based heavily on Jonathan Taylor’s class notes http://www.stanford.edu/class/stats191/interactions.html

Download and format data:

[1]:
%matplotlib inline
[2]:
from urllib.request import urlopen
import numpy as np
np.set_printoptions(precision=4, suppress=True)

import pandas as pd
pd.set_option("display.width", 100)
import matplotlib.pyplot as plt
from statsmodels.formula.api import ols
from statsmodels.graphics.api import interaction_plot, abline_plot
from statsmodels.stats.anova import anova_lm

try:
    salary_table = pd.read_csv('salary.table')
except:  # recent pandas can read URL without urlopen
    url = 'http://stats191.stanford.edu/data/salary.table'
    fh = urlopen(url)
    salary_table = pd.read_table(fh)
    salary_table.to_csv('salary.table')

E = salary_table.E
M = salary_table.M
X = salary_table.X
S = salary_table.S
---------------------------------------------------------------------------
FileNotFoundError                         Traceback (most recent call last)
<ipython-input-2-132ce9110058> in <module>
     12 try:
---> 13     salary_table = pd.read_csv('salary.table')
     14 except:  # recent pandas can read URL without urlopen

/usr/lib/python3/dist-packages/pandas/io/parsers.py in parser_f(filepath_or_buffer, sep, delimiter, header, names, index_col, usecols, squeeze, prefix, mangle_dupe_cols, dtype, engine, converters, true_values, false_values, skipinitialspace, skiprows, skipfooter, nrows, na_values, keep_default_na, na_filter, verbose, skip_blank_lines, parse_dates, infer_datetime_format, keep_date_col, date_parser, dayfirst, cache_dates, iterator, chunksize, compression, thousands, decimal, lineterminator, quotechar, quoting, doublequote, escapechar, comment, encoding, dialect, error_bad_lines, warn_bad_lines, delim_whitespace, low_memory, memory_map, float_precision)
    684
--> 685         return _read(filepath_or_buffer, kwds)
    686

/usr/lib/python3/dist-packages/pandas/io/parsers.py in _read(filepath_or_buffer, kwds)
    456     # Create the parser.
--> 457     parser = TextFileReader(fp_or_buf, **kwds)
    458

/usr/lib/python3/dist-packages/pandas/io/parsers.py in __init__(self, f, engine, **kwds)
    894
--> 895         self._make_engine(self.engine)
    896

/usr/lib/python3/dist-packages/pandas/io/parsers.py in _make_engine(self, engine)
   1134         if engine == "c":
-> 1135             self._engine = CParserWrapper(self.f, **self.options)
   1136         else:

/usr/lib/python3/dist-packages/pandas/io/parsers.py in __init__(self, src, **kwds)
   1916
-> 1917         self._reader = parsers.TextReader(src, **kwds)
   1918         self.unnamed_cols = self._reader.unnamed_cols

pandas/_libs/parsers.pyx in pandas._libs.parsers.TextReader.__cinit__()

pandas/_libs/parsers.pyx in pandas._libs.parsers.TextReader._setup_parser_source()

FileNotFoundError: [Errno 2] File b'salary.table' does not exist: b'salary.table'

During handling of the above exception, another exception occurred:

ConnectionRefusedError                    Traceback (most recent call last)
/usr/lib/python3.8/urllib/request.py in do_open(self, http_class, req, **http_conn_args)
   1318             try:
-> 1319                 h.request(req.get_method(), req.selector, req.data, headers,
   1320                           encode_chunked=req.has_header('Transfer-encoding'))

/usr/lib/python3.8/http/client.py in request(self, method, url, body, headers, encode_chunked)
   1229         """Send a complete request to the server."""
-> 1230         self._send_request(method, url, body, headers, encode_chunked)
   1231

/usr/lib/python3.8/http/client.py in _send_request(self, method, url, body, headers, encode_chunked)
   1275             body = _encode(body, 'body')
-> 1276         self.endheaders(body, encode_chunked=encode_chunked)
   1277

/usr/lib/python3.8/http/client.py in endheaders(self, message_body, encode_chunked)
   1224             raise CannotSendHeader()
-> 1225         self._send_output(message_body, encode_chunked=encode_chunked)
   1226

/usr/lib/python3.8/http/client.py in _send_output(self, message_body, encode_chunked)
   1003         del self._buffer[:]
-> 1004         self.send(msg)
   1005

/usr/lib/python3.8/http/client.py in send(self, data)
    943             if self.auto_open:
--> 944                 self.connect()
    945             else:

/usr/lib/python3.8/http/client.py in connect(self)
    914         """Connect to the host and port specified in __init__."""
--> 915         self.sock = self._create_connection(
    916             (self.host,self.port), self.timeout, self.source_address)

/usr/lib/python3.8/socket.py in create_connection(address, timeout, source_address)
    807         try:
--> 808             raise err
    809         finally:

/usr/lib/python3.8/socket.py in create_connection(address, timeout, source_address)
    795                 sock.bind(source_address)
--> 796             sock.connect(sa)
    797             # Break explicitly a reference cycle

ConnectionRefusedError: [Errno 111] Connection refused

During handling of the above exception, another exception occurred:

URLError                                  Traceback (most recent call last)
<ipython-input-2-132ce9110058> in <module>
     14 except:  # recent pandas can read URL without urlopen
     15     url = 'http://stats191.stanford.edu/data/salary.table'
---> 16     fh = urlopen(url)
     17     salary_table = pd.read_table(fh)
     18     salary_table.to_csv('salary.table')

/usr/lib/python3.8/urllib/request.py in urlopen(url, data, timeout, cafile, capath, cadefault, context)
    220     else:
    221         opener = _opener
--> 222     return opener.open(url, data, timeout)
    223
    224 def install_opener(opener):

/usr/lib/python3.8/urllib/request.py in open(self, fullurl, data, timeout)
    523
    524         sys.audit('urllib.Request', req.full_url, req.data, req.headers, req.get_method())
--> 525         response = self._open(req, data)
    526
    527         # post-process response

/usr/lib/python3.8/urllib/request.py in _open(self, req, data)
    540
    541         protocol = req.type
--> 542         result = self._call_chain(self.handle_open, protocol, protocol +
    543                                   '_open', req)
    544         if result:

/usr/lib/python3.8/urllib/request.py in _call_chain(self, chain, kind, meth_name, *args)
    500         for handler in handlers:
    501             func = getattr(handler, meth_name)
--> 502             result = func(*args)
    503             if result is not None:
    504                 return result

/usr/lib/python3.8/urllib/request.py in http_open(self, req)
   1346
   1347     def http_open(self, req):
-> 1348         return self.do_open(http.client.HTTPConnection, req)
   1349
   1350     http_request = AbstractHTTPHandler.do_request_

/usr/lib/python3.8/urllib/request.py in do_open(self, http_class, req, **http_conn_args)
   1320                           encode_chunked=req.has_header('Transfer-encoding'))
   1321             except OSError as err: # timeout error
-> 1322                 raise URLError(err)
   1323             r = h.getresponse()
   1324         except:

URLError: <urlopen error [Errno 111] Connection refused>

Take a look at the data:

[3]:
plt.figure(figsize=(6,6))
symbols = ['D', '^']
colors = ['r', 'g', 'blue']
factor_groups = salary_table.groupby(['E','M'])
for values, group in factor_groups:
    i,j = values
    plt.scatter(group['X'], group['S'], marker=symbols[j], color=colors[i-1],
               s=144)
plt.xlabel('Experience');
plt.ylabel('Salary');
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-3-06b5e3c35d99> in <module>
      2 symbols = ['D', '^']
      3 colors = ['r', 'g', 'blue']
----> 4 factor_groups = salary_table.groupby(['E','M'])
      5 for values, group in factor_groups:
      6     i,j = values

NameError: name 'salary_table' is not defined
<Figure size 432x432 with 0 Axes>

Fit a linear model:

[4]:
formula = 'S ~ C(E) + C(M) + X'
lm = ols(formula, salary_table).fit()
print(lm.summary())
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-4-bd1483e9558c> in <module>
      1 formula = 'S ~ C(E) + C(M) + X'
----> 2 lm = ols(formula, salary_table).fit()
      3 print(lm.summary())

NameError: name 'salary_table' is not defined

Have a look at the created design matrix:

[5]:
lm.model.exog[:5]
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-5-15c89faef8a8> in <module>
----> 1 lm.model.exog[:5]

NameError: name 'lm' is not defined

Or since we initially passed in a DataFrame, we have a DataFrame available in

[6]:
lm.model.data.orig_exog[:5]
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-6-136b4afd409d> in <module>
----> 1 lm.model.data.orig_exog[:5]

NameError: name 'lm' is not defined

We keep a reference to the original untouched data in

[7]:
lm.model.data.frame[:5]
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-7-45902db7bdbe> in <module>
----> 1 lm.model.data.frame[:5]

NameError: name 'lm' is not defined

Influence statistics

[8]:
infl = lm.get_influence()
print(infl.summary_table())
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-8-cf91966f823d> in <module>
----> 1 infl = lm.get_influence()
      2 print(infl.summary_table())

NameError: name 'lm' is not defined

or get a dataframe

[9]:
df_infl = infl.summary_frame()
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-9-4a89b3b617d2> in <module>
----> 1 df_infl = infl.summary_frame()

NameError: name 'infl' is not defined
[10]:
df_infl[:5]
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-10-c7ae236e6fec> in <module>
----> 1 df_infl[:5]

NameError: name 'df_infl' is not defined

Now plot the residuals within the groups separately:

[11]:
resid = lm.resid
plt.figure(figsize=(6,6));
for values, group in factor_groups:
    i,j = values
    group_num = i*2 + j - 1  # for plotting purposes
    x = [group_num] * len(group)
    plt.scatter(x, resid[group.index], marker=symbols[j], color=colors[i-1],
            s=144, edgecolors='black')
plt.xlabel('Group');
plt.ylabel('Residuals');
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-11-b1fbd2073437> in <module>
----> 1 resid = lm.resid
      2 plt.figure(figsize=(6,6));
      3 for values, group in factor_groups:
      4     i,j = values
      5     group_num = i*2 + j - 1  # for plotting purposes

NameError: name 'lm' is not defined

Now we will test some interactions using anova or f_test

[12]:
interX_lm = ols("S ~ C(E) * X + C(M)", salary_table).fit()
print(interX_lm.summary())
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-12-63ad3de263bb> in <module>
----> 1 interX_lm = ols("S ~ C(E) * X + C(M)", salary_table).fit()
      2 print(interX_lm.summary())

NameError: name 'salary_table' is not defined

Do an ANOVA check

[13]:
from statsmodels.stats.api import anova_lm

table1 = anova_lm(lm, interX_lm)
print(table1)

interM_lm = ols("S ~ X + C(E)*C(M)", data=salary_table).fit()
print(interM_lm.summary())

table2 = anova_lm(lm, interM_lm)
print(table2)
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-13-99371545e58d> in <module>
      1 from statsmodels.stats.api import anova_lm
      2
----> 3 table1 = anova_lm(lm, interX_lm)
      4 print(table1)
      5

NameError: name 'lm' is not defined

The design matrix as a DataFrame

[14]:
interM_lm.model.data.orig_exog[:5]
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-14-1dd5303dacd2> in <module>
----> 1 interM_lm.model.data.orig_exog[:5]

NameError: name 'interM_lm' is not defined

The design matrix as an ndarray

[15]:
interM_lm.model.exog
interM_lm.model.exog_names
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-15-127a7082e299> in <module>
----> 1 interM_lm.model.exog
      2 interM_lm.model.exog_names

NameError: name 'interM_lm' is not defined
[16]:
infl = interM_lm.get_influence()
resid = infl.resid_studentized_internal
plt.figure(figsize=(6,6))
for values, group in factor_groups:
    i,j = values
    idx = group.index
    plt.scatter(X[idx], resid[idx], marker=symbols[j], color=colors[i-1],
            s=144, edgecolors='black')
plt.xlabel('X');
plt.ylabel('standardized resids');
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-16-4a4b0ea23d42> in <module>
----> 1 infl = interM_lm.get_influence()
      2 resid = infl.resid_studentized_internal
      3 plt.figure(figsize=(6,6))
      4 for values, group in factor_groups:
      5     i,j = values

NameError: name 'interM_lm' is not defined

Looks like one observation is an outlier.

[17]:
drop_idx = abs(resid).argmax()
print(drop_idx)  # zero-based index
idx = salary_table.index.drop(drop_idx)

lm32 = ols('S ~ C(E) + X + C(M)', data=salary_table, subset=idx).fit()

print(lm32.summary())
print('\n')

interX_lm32 = ols('S ~ C(E) * X + C(M)', data=salary_table, subset=idx).fit()

print(interX_lm32.summary())
print('\n')


table3 = anova_lm(lm32, interX_lm32)
print(table3)
print('\n')


interM_lm32 = ols('S ~ X + C(E) * C(M)', data=salary_table, subset=idx).fit()

table4 = anova_lm(lm32, interM_lm32)
print(table4)
print('\n')
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-17-b012081228e7> in <module>
----> 1 drop_idx = abs(resid).argmax()
      2 print(drop_idx)  # zero-based index
      3 idx = salary_table.index.drop(drop_idx)
      4
      5 lm32 = ols('S ~ C(E) + X + C(M)', data=salary_table, subset=idx).fit()

NameError: name 'resid' is not defined

Replot the residuals

[18]:
resid = interM_lm32.get_influence().summary_frame()['standard_resid']

plt.figure(figsize=(6,6))
resid = resid.reindex(X.index)
for values, group in factor_groups:
    i,j = values
    idx = group.index
    plt.scatter(X.loc[idx], resid.loc[idx], marker=symbols[j], color=colors[i-1],
            s=144, edgecolors='black')
plt.xlabel('X[~[32]]');
plt.ylabel('standardized resids');
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-18-9611737d9172> in <module>
----> 1 resid = interM_lm32.get_influence().summary_frame()['standard_resid']
      2
      3 plt.figure(figsize=(6,6))
      4 resid = resid.reindex(X.index)
      5 for values, group in factor_groups:

NameError: name 'interM_lm32' is not defined

Plot the fitted values

[19]:
lm_final = ols('S ~ X + C(E)*C(M)', data = salary_table.drop([drop_idx])).fit()
mf = lm_final.model.data.orig_exog
lstyle = ['-','--']

plt.figure(figsize=(6,6))
for values, group in factor_groups:
    i,j = values
    idx = group.index
    plt.scatter(X[idx], S[idx], marker=symbols[j], color=colors[i-1],
                s=144, edgecolors='black')
    # drop NA because there is no idx 32 in the final model
    fv = lm_final.fittedvalues.reindex(idx).dropna()
    x = mf.X.reindex(idx).dropna()
    plt.plot(x, fv, ls=lstyle[j], color=colors[i-1])
plt.xlabel('Experience');
plt.ylabel('Salary');
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-19-959e75ec2355> in <module>
----> 1 lm_final = ols('S ~ X + C(E)*C(M)', data = salary_table.drop([drop_idx])).fit()
      2 mf = lm_final.model.data.orig_exog
      3 lstyle = ['-','--']
      4
      5 plt.figure(figsize=(6,6))

NameError: name 'salary_table' is not defined

From our first look at the data, the difference between Master’s and PhD in the management group is different than in the non-management group. This is an interaction between the two qualitative variables management,M and education,E. We can visualize this by first removing the effect of experience, then plotting the means within each of the 6 groups using interaction.plot.

[20]:
U = S - X * interX_lm32.params['X']

plt.figure(figsize=(6,6))
interaction_plot(E, M, U, colors=['red','blue'], markers=['^','D'],
        markersize=10, ax=plt.gca())
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-20-078f1a51de12> in <module>
----> 1 U = S - X * interX_lm32.params['X']
      2
      3 plt.figure(figsize=(6,6))
      4 interaction_plot(E, M, U, colors=['red','blue'], markers=['^','D'],
      5         markersize=10, ax=plt.gca())

NameError: name 'S' is not defined

Minority Employment Data

[21]:
try:
    jobtest_table = pd.read_table('jobtest.table')
except:  # do not have data already
    url = 'http://stats191.stanford.edu/data/jobtest.table'
    jobtest_table = pd.read_table(url)

factor_group = jobtest_table.groupby(['MINORITY'])

fig, ax = plt.subplots(figsize=(6,6))
colors = ['purple', 'green']
markers = ['o', 'v']
for factor, group in factor_group:
    ax.scatter(group['TEST'], group['JPERF'], color=colors[factor],
                marker=markers[factor], s=12**2)
ax.set_xlabel('TEST');
ax.set_ylabel('JPERF');
---------------------------------------------------------------------------
FileNotFoundError                         Traceback (most recent call last)
<ipython-input-21-917f19c1181a> in <module>
      1 try:
----> 2     jobtest_table = pd.read_table('jobtest.table')
      3 except:  # do not have data already

/usr/lib/python3/dist-packages/pandas/io/parsers.py in parser_f(filepath_or_buffer, sep, delimiter, header, names, index_col, usecols, squeeze, prefix, mangle_dupe_cols, dtype, engine, converters, true_values, false_values, skipinitialspace, skiprows, skipfooter, nrows, na_values, keep_default_na, na_filter, verbose, skip_blank_lines, parse_dates, infer_datetime_format, keep_date_col, date_parser, dayfirst, cache_dates, iterator, chunksize, compression, thousands, decimal, lineterminator, quotechar, quoting, doublequote, escapechar, comment, encoding, dialect, error_bad_lines, warn_bad_lines, delim_whitespace, low_memory, memory_map, float_precision)
    684
--> 685         return _read(filepath_or_buffer, kwds)
    686

/usr/lib/python3/dist-packages/pandas/io/parsers.py in _read(filepath_or_buffer, kwds)
    456     # Create the parser.
--> 457     parser = TextFileReader(fp_or_buf, **kwds)
    458

/usr/lib/python3/dist-packages/pandas/io/parsers.py in __init__(self, f, engine, **kwds)
    894
--> 895         self._make_engine(self.engine)
    896

/usr/lib/python3/dist-packages/pandas/io/parsers.py in _make_engine(self, engine)
   1134         if engine == "c":
-> 1135             self._engine = CParserWrapper(self.f, **self.options)
   1136         else:

/usr/lib/python3/dist-packages/pandas/io/parsers.py in __init__(self, src, **kwds)
   1916
-> 1917         self._reader = parsers.TextReader(src, **kwds)
   1918         self.unnamed_cols = self._reader.unnamed_cols

pandas/_libs/parsers.pyx in pandas._libs.parsers.TextReader.__cinit__()

pandas/_libs/parsers.pyx in pandas._libs.parsers.TextReader._setup_parser_source()

FileNotFoundError: [Errno 2] File b'jobtest.table' does not exist: b'jobtest.table'

During handling of the above exception, another exception occurred:

ConnectionRefusedError                    Traceback (most recent call last)
/usr/lib/python3.8/urllib/request.py in do_open(self, http_class, req, **http_conn_args)
   1318             try:
-> 1319                 h.request(req.get_method(), req.selector, req.data, headers,
   1320                           encode_chunked=req.has_header('Transfer-encoding'))

/usr/lib/python3.8/http/client.py in request(self, method, url, body, headers, encode_chunked)
   1229         """Send a complete request to the server."""
-> 1230         self._send_request(method, url, body, headers, encode_chunked)
   1231

/usr/lib/python3.8/http/client.py in _send_request(self, method, url, body, headers, encode_chunked)
   1275             body = _encode(body, 'body')
-> 1276         self.endheaders(body, encode_chunked=encode_chunked)
   1277

/usr/lib/python3.8/http/client.py in endheaders(self, message_body, encode_chunked)
   1224             raise CannotSendHeader()
-> 1225         self._send_output(message_body, encode_chunked=encode_chunked)
   1226

/usr/lib/python3.8/http/client.py in _send_output(self, message_body, encode_chunked)
   1003         del self._buffer[:]
-> 1004         self.send(msg)
   1005

/usr/lib/python3.8/http/client.py in send(self, data)
    943             if self.auto_open:
--> 944                 self.connect()
    945             else:

/usr/lib/python3.8/http/client.py in connect(self)
    914         """Connect to the host and port specified in __init__."""
--> 915         self.sock = self._create_connection(
    916             (self.host,self.port), self.timeout, self.source_address)

/usr/lib/python3.8/socket.py in create_connection(address, timeout, source_address)
    807         try:
--> 808             raise err
    809         finally:

/usr/lib/python3.8/socket.py in create_connection(address, timeout, source_address)
    795                 sock.bind(source_address)
--> 796             sock.connect(sa)
    797             # Break explicitly a reference cycle

ConnectionRefusedError: [Errno 111] Connection refused

During handling of the above exception, another exception occurred:

URLError                                  Traceback (most recent call last)
<ipython-input-21-917f19c1181a> in <module>
      3 except:  # do not have data already
      4     url = 'http://stats191.stanford.edu/data/jobtest.table'
----> 5     jobtest_table = pd.read_table(url)
      6
      7 factor_group = jobtest_table.groupby(['MINORITY'])

/usr/lib/python3/dist-packages/pandas/io/parsers.py in parser_f(filepath_or_buffer, sep, delimiter, header, names, index_col, usecols, squeeze, prefix, mangle_dupe_cols, dtype, engine, converters, true_values, false_values, skipinitialspace, skiprows, skipfooter, nrows, na_values, keep_default_na, na_filter, verbose, skip_blank_lines, parse_dates, infer_datetime_format, keep_date_col, date_parser, dayfirst, cache_dates, iterator, chunksize, compression, thousands, decimal, lineterminator, quotechar, quoting, doublequote, escapechar, comment, encoding, dialect, error_bad_lines, warn_bad_lines, delim_whitespace, low_memory, memory_map, float_precision)
    683         )
    684
--> 685         return _read(filepath_or_buffer, kwds)
    686
    687     parser_f.__name__ = name

/usr/lib/python3/dist-packages/pandas/io/parsers.py in _read(filepath_or_buffer, kwds)
    437     # though mypy handling of conditional imports is difficult.
    438     # See https://github.com/python/mypy/issues/1297
--> 439     fp_or_buf, _, compression, should_close = get_filepath_or_buffer(
    440         filepath_or_buffer, encoding, compression
    441     )

/usr/lib/python3/dist-packages/pandas/io/common.py in get_filepath_or_buffer(filepath_or_buffer, encoding, compression, mode)
    194
    195     if _is_url(filepath_or_buffer):
--> 196         req = urlopen(filepath_or_buffer)
    197         content_encoding = req.headers.get("Content-Encoding", None)
    198         if content_encoding == "gzip":

/usr/lib/python3.8/urllib/request.py in urlopen(url, data, timeout, cafile, capath, cadefault, context)
    220     else:
    221         opener = _opener
--> 222     return opener.open(url, data, timeout)
    223
    224 def install_opener(opener):

/usr/lib/python3.8/urllib/request.py in open(self, fullurl, data, timeout)
    523
    524         sys.audit('urllib.Request', req.full_url, req.data, req.headers, req.get_method())
--> 525         response = self._open(req, data)
    526
    527         # post-process response

/usr/lib/python3.8/urllib/request.py in _open(self, req, data)
    540
    541         protocol = req.type
--> 542         result = self._call_chain(self.handle_open, protocol, protocol +
    543                                   '_open', req)
    544         if result:

/usr/lib/python3.8/urllib/request.py in _call_chain(self, chain, kind, meth_name, *args)
    500         for handler in handlers:
    501             func = getattr(handler, meth_name)
--> 502             result = func(*args)
    503             if result is not None:
    504                 return result

/usr/lib/python3.8/urllib/request.py in http_open(self, req)
   1346
   1347     def http_open(self, req):
-> 1348         return self.do_open(http.client.HTTPConnection, req)
   1349
   1350     http_request = AbstractHTTPHandler.do_request_

/usr/lib/python3.8/urllib/request.py in do_open(self, http_class, req, **http_conn_args)
   1320                           encode_chunked=req.has_header('Transfer-encoding'))
   1321             except OSError as err: # timeout error
-> 1322                 raise URLError(err)
   1323             r = h.getresponse()
   1324         except:

URLError: <urlopen error [Errno 111] Connection refused>
[22]:
min_lm = ols('JPERF ~ TEST', data=jobtest_table).fit()
print(min_lm.summary())
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-22-2dda11467a64> in <module>
----> 1 min_lm = ols('JPERF ~ TEST', data=jobtest_table).fit()
      2 print(min_lm.summary())

NameError: name 'jobtest_table' is not defined
[23]:
fig, ax = plt.subplots(figsize=(6,6));
for factor, group in factor_group:
    ax.scatter(group['TEST'], group['JPERF'], color=colors[factor],
                marker=markers[factor], s=12**2)

ax.set_xlabel('TEST')
ax.set_ylabel('JPERF')
fig = abline_plot(model_results = min_lm, ax=ax)
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-23-3bca97a45134> in <module>
      1 fig, ax = plt.subplots(figsize=(6,6));
----> 2 for factor, group in factor_group:
      3     ax.scatter(group['TEST'], group['JPERF'], color=colors[factor],
      4                 marker=markers[factor], s=12**2)
      5

NameError: name 'factor_group' is not defined
../../../_images/examples_notebooks_generated_interactions_anova_41_1.png
[24]:
min_lm2 = ols('JPERF ~ TEST + TEST:MINORITY',
        data=jobtest_table).fit()

print(min_lm2.summary())
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-24-6f5c2b95207a> in <module>
      1 min_lm2 = ols('JPERF ~ TEST + TEST:MINORITY',
----> 2         data=jobtest_table).fit()
      3
      4 print(min_lm2.summary())

NameError: name 'jobtest_table' is not defined
[25]:
fig, ax = plt.subplots(figsize=(6,6));
for factor, group in factor_group:
    ax.scatter(group['TEST'], group['JPERF'], color=colors[factor],
                marker=markers[factor], s=12**2)

fig = abline_plot(intercept = min_lm2.params['Intercept'],
                 slope = min_lm2.params['TEST'], ax=ax, color='purple');
fig = abline_plot(intercept = min_lm2.params['Intercept'],
        slope = min_lm2.params['TEST'] + min_lm2.params['TEST:MINORITY'],
        ax=ax, color='green');
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-25-08231acf3acd> in <module>
      1 fig, ax = plt.subplots(figsize=(6,6));
----> 2 for factor, group in factor_group:
      3     ax.scatter(group['TEST'], group['JPERF'], color=colors[factor],
      4                 marker=markers[factor], s=12**2)
      5

NameError: name 'factor_group' is not defined
../../../_images/examples_notebooks_generated_interactions_anova_43_1.png
[26]:
min_lm3 = ols('JPERF ~ TEST + MINORITY', data = jobtest_table).fit()
print(min_lm3.summary())
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-26-db2300cbbd0e> in <module>
----> 1 min_lm3 = ols('JPERF ~ TEST + MINORITY', data = jobtest_table).fit()
      2 print(min_lm3.summary())

NameError: name 'jobtest_table' is not defined
[27]:
fig, ax = plt.subplots(figsize=(6,6));
for factor, group in factor_group:
    ax.scatter(group['TEST'], group['JPERF'], color=colors[factor],
                marker=markers[factor], s=12**2)

fig = abline_plot(intercept = min_lm3.params['Intercept'],
                 slope = min_lm3.params['TEST'], ax=ax, color='purple');
fig = abline_plot(intercept = min_lm3.params['Intercept'] + min_lm3.params['MINORITY'],
        slope = min_lm3.params['TEST'], ax=ax, color='green');
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-27-772c42f45c76> in <module>
      1 fig, ax = plt.subplots(figsize=(6,6));
----> 2 for factor, group in factor_group:
      3     ax.scatter(group['TEST'], group['JPERF'], color=colors[factor],
      4                 marker=markers[factor], s=12**2)
      5

NameError: name 'factor_group' is not defined
../../../_images/examples_notebooks_generated_interactions_anova_45_1.png
[28]:
min_lm4 = ols('JPERF ~ TEST * MINORITY', data = jobtest_table).fit()
print(min_lm4.summary())
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-28-ec41efc607af> in <module>
----> 1 min_lm4 = ols('JPERF ~ TEST * MINORITY', data = jobtest_table).fit()
      2 print(min_lm4.summary())

NameError: name 'jobtest_table' is not defined
[29]:
fig, ax = plt.subplots(figsize=(8,6));
for factor, group in factor_group:
    ax.scatter(group['TEST'], group['JPERF'], color=colors[factor],
                marker=markers[factor], s=12**2)

fig = abline_plot(intercept = min_lm4.params['Intercept'],
                 slope = min_lm4.params['TEST'], ax=ax, color='purple');
fig = abline_plot(intercept = min_lm4.params['Intercept'] + min_lm4.params['MINORITY'],
        slope = min_lm4.params['TEST'] + min_lm4.params['TEST:MINORITY'],
        ax=ax, color='green');
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-29-636fa8a37910> in <module>
      1 fig, ax = plt.subplots(figsize=(8,6));
----> 2 for factor, group in factor_group:
      3     ax.scatter(group['TEST'], group['JPERF'], color=colors[factor],
      4                 marker=markers[factor], s=12**2)
      5

NameError: name 'factor_group' is not defined
../../../_images/examples_notebooks_generated_interactions_anova_47_1.png
[30]:
# is there any effect of MINORITY on slope or intercept?
table5 = anova_lm(min_lm, min_lm4)
print(table5)
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-30-fbfd21e1cee1> in <module>
      1 # is there any effect of MINORITY on slope or intercept?
----> 2 table5 = anova_lm(min_lm, min_lm4)
      3 print(table5)

NameError: name 'min_lm' is not defined
[31]:
# is there any effect of MINORITY on intercept
table6 = anova_lm(min_lm, min_lm3)
print(table6)
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-31-8194ba41b1ee> in <module>
      1 # is there any effect of MINORITY on intercept
----> 2 table6 = anova_lm(min_lm, min_lm3)
      3 print(table6)

NameError: name 'min_lm' is not defined
[32]:
# is there any effect of MINORITY on slope
table7 = anova_lm(min_lm, min_lm2)
print(table7)
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-32-b4c0ae44d20e> in <module>
      1 # is there any effect of MINORITY on slope
----> 2 table7 = anova_lm(min_lm, min_lm2)
      3 print(table7)

NameError: name 'min_lm' is not defined
[33]:
# is it just the slope or both?
table8 = anova_lm(min_lm2, min_lm4)
print(table8)
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-33-a9a6172af9d0> in <module>
      1 # is it just the slope or both?
----> 2 table8 = anova_lm(min_lm2, min_lm4)
      3 print(table8)

NameError: name 'min_lm2' is not defined

One-way ANOVA

[34]:
try:
    rehab_table = pd.read_csv('rehab.table')
except:
    url = 'http://stats191.stanford.edu/data/rehab.csv'
    rehab_table = pd.read_table(url, delimiter=",")
    rehab_table.to_csv('rehab.table')

fig, ax = plt.subplots(figsize=(8,6))
fig = rehab_table.boxplot('Time', 'Fitness', ax=ax, grid=False)
---------------------------------------------------------------------------
FileNotFoundError                         Traceback (most recent call last)
<ipython-input-34-d3ea2b9d3e10> in <module>
      1 try:
----> 2     rehab_table = pd.read_csv('rehab.table')
      3 except:

/usr/lib/python3/dist-packages/pandas/io/parsers.py in parser_f(filepath_or_buffer, sep, delimiter, header, names, index_col, usecols, squeeze, prefix, mangle_dupe_cols, dtype, engine, converters, true_values, false_values, skipinitialspace, skiprows, skipfooter, nrows, na_values, keep_default_na, na_filter, verbose, skip_blank_lines, parse_dates, infer_datetime_format, keep_date_col, date_parser, dayfirst, cache_dates, iterator, chunksize, compression, thousands, decimal, lineterminator, quotechar, quoting, doublequote, escapechar, comment, encoding, dialect, error_bad_lines, warn_bad_lines, delim_whitespace, low_memory, memory_map, float_precision)
    684
--> 685         return _read(filepath_or_buffer, kwds)
    686

/usr/lib/python3/dist-packages/pandas/io/parsers.py in _read(filepath_or_buffer, kwds)
    456     # Create the parser.
--> 457     parser = TextFileReader(fp_or_buf, **kwds)
    458

/usr/lib/python3/dist-packages/pandas/io/parsers.py in __init__(self, f, engine, **kwds)
    894
--> 895         self._make_engine(self.engine)
    896

/usr/lib/python3/dist-packages/pandas/io/parsers.py in _make_engine(self, engine)
   1134         if engine == "c":
-> 1135             self._engine = CParserWrapper(self.f, **self.options)
   1136         else:

/usr/lib/python3/dist-packages/pandas/io/parsers.py in __init__(self, src, **kwds)
   1916
-> 1917         self._reader = parsers.TextReader(src, **kwds)
   1918         self.unnamed_cols = self._reader.unnamed_cols

pandas/_libs/parsers.pyx in pandas._libs.parsers.TextReader.__cinit__()

pandas/_libs/parsers.pyx in pandas._libs.parsers.TextReader._setup_parser_source()

FileNotFoundError: [Errno 2] File b'rehab.table' does not exist: b'rehab.table'

During handling of the above exception, another exception occurred:

ConnectionRefusedError                    Traceback (most recent call last)
/usr/lib/python3.8/urllib/request.py in do_open(self, http_class, req, **http_conn_args)
   1318             try:
-> 1319                 h.request(req.get_method(), req.selector, req.data, headers,
   1320                           encode_chunked=req.has_header('Transfer-encoding'))

/usr/lib/python3.8/http/client.py in request(self, method, url, body, headers, encode_chunked)
   1229         """Send a complete request to the server."""
-> 1230         self._send_request(method, url, body, headers, encode_chunked)
   1231

/usr/lib/python3.8/http/client.py in _send_request(self, method, url, body, headers, encode_chunked)
   1275             body = _encode(body, 'body')
-> 1276         self.endheaders(body, encode_chunked=encode_chunked)
   1277

/usr/lib/python3.8/http/client.py in endheaders(self, message_body, encode_chunked)
   1224             raise CannotSendHeader()
-> 1225         self._send_output(message_body, encode_chunked=encode_chunked)
   1226

/usr/lib/python3.8/http/client.py in _send_output(self, message_body, encode_chunked)
   1003         del self._buffer[:]
-> 1004         self.send(msg)
   1005

/usr/lib/python3.8/http/client.py in send(self, data)
    943             if self.auto_open:
--> 944                 self.connect()
    945             else:

/usr/lib/python3.8/http/client.py in connect(self)
    914         """Connect to the host and port specified in __init__."""
--> 915         self.sock = self._create_connection(
    916             (self.host,self.port), self.timeout, self.source_address)

/usr/lib/python3.8/socket.py in create_connection(address, timeout, source_address)
    807         try:
--> 808             raise err
    809         finally:

/usr/lib/python3.8/socket.py in create_connection(address, timeout, source_address)
    795                 sock.bind(source_address)
--> 796             sock.connect(sa)
    797             # Break explicitly a reference cycle

ConnectionRefusedError: [Errno 111] Connection refused

During handling of the above exception, another exception occurred:

URLError                                  Traceback (most recent call last)
<ipython-input-34-d3ea2b9d3e10> in <module>
      3 except:
      4     url = 'http://stats191.stanford.edu/data/rehab.csv'
----> 5     rehab_table = pd.read_table(url, delimiter=",")
      6     rehab_table.to_csv('rehab.table')
      7

/usr/lib/python3/dist-packages/pandas/io/parsers.py in parser_f(filepath_or_buffer, sep, delimiter, header, names, index_col, usecols, squeeze, prefix, mangle_dupe_cols, dtype, engine, converters, true_values, false_values, skipinitialspace, skiprows, skipfooter, nrows, na_values, keep_default_na, na_filter, verbose, skip_blank_lines, parse_dates, infer_datetime_format, keep_date_col, date_parser, dayfirst, cache_dates, iterator, chunksize, compression, thousands, decimal, lineterminator, quotechar, quoting, doublequote, escapechar, comment, encoding, dialect, error_bad_lines, warn_bad_lines, delim_whitespace, low_memory, memory_map, float_precision)
    683         )
    684
--> 685         return _read(filepath_or_buffer, kwds)
    686
    687     parser_f.__name__ = name

/usr/lib/python3/dist-packages/pandas/io/parsers.py in _read(filepath_or_buffer, kwds)
    437     # though mypy handling of conditional imports is difficult.
    438     # See https://github.com/python/mypy/issues/1297
--> 439     fp_or_buf, _, compression, should_close = get_filepath_or_buffer(
    440         filepath_or_buffer, encoding, compression
    441     )

/usr/lib/python3/dist-packages/pandas/io/common.py in get_filepath_or_buffer(filepath_or_buffer, encoding, compression, mode)
    194
    195     if _is_url(filepath_or_buffer):
--> 196         req = urlopen(filepath_or_buffer)
    197         content_encoding = req.headers.get("Content-Encoding", None)
    198         if content_encoding == "gzip":

/usr/lib/python3.8/urllib/request.py in urlopen(url, data, timeout, cafile, capath, cadefault, context)
    220     else:
    221         opener = _opener
--> 222     return opener.open(url, data, timeout)
    223
    224 def install_opener(opener):

/usr/lib/python3.8/urllib/request.py in open(self, fullurl, data, timeout)
    523
    524         sys.audit('urllib.Request', req.full_url, req.data, req.headers, req.get_method())
--> 525         response = self._open(req, data)
    526
    527         # post-process response

/usr/lib/python3.8/urllib/request.py in _open(self, req, data)
    540
    541         protocol = req.type
--> 542         result = self._call_chain(self.handle_open, protocol, protocol +
    543                                   '_open', req)
    544         if result:

/usr/lib/python3.8/urllib/request.py in _call_chain(self, chain, kind, meth_name, *args)
    500         for handler in handlers:
    501             func = getattr(handler, meth_name)
--> 502             result = func(*args)
    503             if result is not None:
    504                 return result

/usr/lib/python3.8/urllib/request.py in http_open(self, req)
   1346
   1347     def http_open(self, req):
-> 1348         return self.do_open(http.client.HTTPConnection, req)
   1349
   1350     http_request = AbstractHTTPHandler.do_request_

/usr/lib/python3.8/urllib/request.py in do_open(self, http_class, req, **http_conn_args)
   1320                           encode_chunked=req.has_header('Transfer-encoding'))
   1321             except OSError as err: # timeout error
-> 1322                 raise URLError(err)
   1323             r = h.getresponse()
   1324         except:

URLError: <urlopen error [Errno 111] Connection refused>
[35]:
rehab_lm = ols('Time ~ C(Fitness)', data=rehab_table).fit()
table9 = anova_lm(rehab_lm)
print(table9)

print(rehab_lm.model.data.orig_exog)
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-35-d3bb1b06817c> in <module>
----> 1 rehab_lm = ols('Time ~ C(Fitness)', data=rehab_table).fit()
      2 table9 = anova_lm(rehab_lm)
      3 print(table9)
      4
      5 print(rehab_lm.model.data.orig_exog)

NameError: name 'rehab_table' is not defined
[36]:
print(rehab_lm.summary())
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-36-99d31a5bc5c4> in <module>
----> 1 print(rehab_lm.summary())

NameError: name 'rehab_lm' is not defined

Two-way ANOVA

[37]:
try:
    kidney_table = pd.read_table('./kidney.table')
except:
    url = 'http://stats191.stanford.edu/data/kidney.table'
    kidney_table = pd.read_csv(url, delim_whitespace=True)
---------------------------------------------------------------------------
FileNotFoundError                         Traceback (most recent call last)
<ipython-input-37-cbc31ddb699c> in <module>
      1 try:
----> 2     kidney_table = pd.read_table('./kidney.table')
      3 except:

/usr/lib/python3/dist-packages/pandas/io/parsers.py in parser_f(filepath_or_buffer, sep, delimiter, header, names, index_col, usecols, squeeze, prefix, mangle_dupe_cols, dtype, engine, converters, true_values, false_values, skipinitialspace, skiprows, skipfooter, nrows, na_values, keep_default_na, na_filter, verbose, skip_blank_lines, parse_dates, infer_datetime_format, keep_date_col, date_parser, dayfirst, cache_dates, iterator, chunksize, compression, thousands, decimal, lineterminator, quotechar, quoting, doublequote, escapechar, comment, encoding, dialect, error_bad_lines, warn_bad_lines, delim_whitespace, low_memory, memory_map, float_precision)
    684
--> 685         return _read(filepath_or_buffer, kwds)
    686

/usr/lib/python3/dist-packages/pandas/io/parsers.py in _read(filepath_or_buffer, kwds)
    456     # Create the parser.
--> 457     parser = TextFileReader(fp_or_buf, **kwds)
    458

/usr/lib/python3/dist-packages/pandas/io/parsers.py in __init__(self, f, engine, **kwds)
    894
--> 895         self._make_engine(self.engine)
    896

/usr/lib/python3/dist-packages/pandas/io/parsers.py in _make_engine(self, engine)
   1134         if engine == "c":
-> 1135             self._engine = CParserWrapper(self.f, **self.options)
   1136         else:

/usr/lib/python3/dist-packages/pandas/io/parsers.py in __init__(self, src, **kwds)
   1916
-> 1917         self._reader = parsers.TextReader(src, **kwds)
   1918         self.unnamed_cols = self._reader.unnamed_cols

pandas/_libs/parsers.pyx in pandas._libs.parsers.TextReader.__cinit__()

pandas/_libs/parsers.pyx in pandas._libs.parsers.TextReader._setup_parser_source()

FileNotFoundError: [Errno 2] File b'./kidney.table' does not exist: b'./kidney.table'

During handling of the above exception, another exception occurred:

ConnectionRefusedError                    Traceback (most recent call last)
/usr/lib/python3.8/urllib/request.py in do_open(self, http_class, req, **http_conn_args)
   1318             try:
-> 1319                 h.request(req.get_method(), req.selector, req.data, headers,
   1320                           encode_chunked=req.has_header('Transfer-encoding'))

/usr/lib/python3.8/http/client.py in request(self, method, url, body, headers, encode_chunked)
   1229         """Send a complete request to the server."""
-> 1230         self._send_request(method, url, body, headers, encode_chunked)
   1231

/usr/lib/python3.8/http/client.py in _send_request(self, method, url, body, headers, encode_chunked)
   1275             body = _encode(body, 'body')
-> 1276         self.endheaders(body, encode_chunked=encode_chunked)
   1277

/usr/lib/python3.8/http/client.py in endheaders(self, message_body, encode_chunked)
   1224             raise CannotSendHeader()
-> 1225         self._send_output(message_body, encode_chunked=encode_chunked)
   1226

/usr/lib/python3.8/http/client.py in _send_output(self, message_body, encode_chunked)
   1003         del self._buffer[:]
-> 1004         self.send(msg)
   1005

/usr/lib/python3.8/http/client.py in send(self, data)
    943             if self.auto_open:
--> 944                 self.connect()
    945             else:

/usr/lib/python3.8/http/client.py in connect(self)
    914         """Connect to the host and port specified in __init__."""
--> 915         self.sock = self._create_connection(
    916             (self.host,self.port), self.timeout, self.source_address)

/usr/lib/python3.8/socket.py in create_connection(address, timeout, source_address)
    807         try:
--> 808             raise err
    809         finally:

/usr/lib/python3.8/socket.py in create_connection(address, timeout, source_address)
    795                 sock.bind(source_address)
--> 796             sock.connect(sa)
    797             # Break explicitly a reference cycle

ConnectionRefusedError: [Errno 111] Connection refused

During handling of the above exception, another exception occurred:

URLError                                  Traceback (most recent call last)
<ipython-input-37-cbc31ddb699c> in <module>
      3 except:
      4     url = 'http://stats191.stanford.edu/data/kidney.table'
----> 5     kidney_table = pd.read_csv(url, delim_whitespace=True)

/usr/lib/python3/dist-packages/pandas/io/parsers.py in parser_f(filepath_or_buffer, sep, delimiter, header, names, index_col, usecols, squeeze, prefix, mangle_dupe_cols, dtype, engine, converters, true_values, false_values, skipinitialspace, skiprows, skipfooter, nrows, na_values, keep_default_na, na_filter, verbose, skip_blank_lines, parse_dates, infer_datetime_format, keep_date_col, date_parser, dayfirst, cache_dates, iterator, chunksize, compression, thousands, decimal, lineterminator, quotechar, quoting, doublequote, escapechar, comment, encoding, dialect, error_bad_lines, warn_bad_lines, delim_whitespace, low_memory, memory_map, float_precision)
    683         )
    684
--> 685         return _read(filepath_or_buffer, kwds)
    686
    687     parser_f.__name__ = name

/usr/lib/python3/dist-packages/pandas/io/parsers.py in _read(filepath_or_buffer, kwds)
    437     # though mypy handling of conditional imports is difficult.
    438     # See https://github.com/python/mypy/issues/1297
--> 439     fp_or_buf, _, compression, should_close = get_filepath_or_buffer(
    440         filepath_or_buffer, encoding, compression
    441     )

/usr/lib/python3/dist-packages/pandas/io/common.py in get_filepath_or_buffer(filepath_or_buffer, encoding, compression, mode)
    194
    195     if _is_url(filepath_or_buffer):
--> 196         req = urlopen(filepath_or_buffer)
    197         content_encoding = req.headers.get("Content-Encoding", None)
    198         if content_encoding == "gzip":

/usr/lib/python3.8/urllib/request.py in urlopen(url, data, timeout, cafile, capath, cadefault, context)
    220     else:
    221         opener = _opener
--> 222     return opener.open(url, data, timeout)
    223
    224 def install_opener(opener):

/usr/lib/python3.8/urllib/request.py in open(self, fullurl, data, timeout)
    523
    524         sys.audit('urllib.Request', req.full_url, req.data, req.headers, req.get_method())
--> 525         response = self._open(req, data)
    526
    527         # post-process response

/usr/lib/python3.8/urllib/request.py in _open(self, req, data)
    540
    541         protocol = req.type
--> 542         result = self._call_chain(self.handle_open, protocol, protocol +
    543                                   '_open', req)
    544         if result:

/usr/lib/python3.8/urllib/request.py in _call_chain(self, chain, kind, meth_name, *args)
    500         for handler in handlers:
    501             func = getattr(handler, meth_name)
--> 502             result = func(*args)
    503             if result is not None:
    504                 return result

/usr/lib/python3.8/urllib/request.py in http_open(self, req)
   1346
   1347     def http_open(self, req):
-> 1348         return self.do_open(http.client.HTTPConnection, req)
   1349
   1350     http_request = AbstractHTTPHandler.do_request_

/usr/lib/python3.8/urllib/request.py in do_open(self, http_class, req, **http_conn_args)
   1320                           encode_chunked=req.has_header('Transfer-encoding'))
   1321             except OSError as err: # timeout error
-> 1322                 raise URLError(err)
   1323             r = h.getresponse()
   1324         except:

URLError: <urlopen error [Errno 111] Connection refused>

Explore the dataset

[38]:
kidney_table.head(10)
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-38-fff8acd40403> in <module>
----> 1 kidney_table.head(10)

NameError: name 'kidney_table' is not defined

Balanced panel

[39]:
kt = kidney_table
plt.figure(figsize=(8,6))
fig = interaction_plot(kt['Weight'], kt['Duration'], np.log(kt['Days']+1),
        colors=['red', 'blue'], markers=['D','^'], ms=10, ax=plt.gca())
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-39-9312bae60782> in <module>
----> 1 kt = kidney_table
      2 plt.figure(figsize=(8,6))
      3 fig = interaction_plot(kt['Weight'], kt['Duration'], np.log(kt['Days']+1),
      4         colors=['red', 'blue'], markers=['D','^'], ms=10, ax=plt.gca())

NameError: name 'kidney_table' is not defined

You have things available in the calling namespace available in the formula evaluation namespace

[40]:
kidney_lm = ols('np.log(Days+1) ~ C(Duration) * C(Weight)', data=kt).fit()

table10 = anova_lm(kidney_lm)

print(anova_lm(ols('np.log(Days+1) ~ C(Duration) + C(Weight)',
                data=kt).fit(), kidney_lm))
print(anova_lm(ols('np.log(Days+1) ~ C(Duration)', data=kt).fit(),
               ols('np.log(Days+1) ~ C(Duration) + C(Weight, Sum)',
                   data=kt).fit()))
print(anova_lm(ols('np.log(Days+1) ~ C(Weight)', data=kt).fit(),
               ols('np.log(Days+1) ~ C(Duration) + C(Weight, Sum)',
                   data=kt).fit()))
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-40-c7e1132390fe> in <module>
----> 1 kidney_lm = ols('np.log(Days+1) ~ C(Duration) * C(Weight)', data=kt).fit()
      2
      3 table10 = anova_lm(kidney_lm)
      4
      5 print(anova_lm(ols('np.log(Days+1) ~ C(Duration) + C(Weight)',

NameError: name 'kt' is not defined

Sum of squares

Illustrates the use of different types of sums of squares (I,II,II) and how the Sum contrast can be used to produce the same output between the 3.

Types I and II are equivalent under a balanced design.

Do not use Type III with non-orthogonal contrast - ie., Treatment

[41]:
sum_lm = ols('np.log(Days+1) ~ C(Duration, Sum) * C(Weight, Sum)',
            data=kt).fit()

print(anova_lm(sum_lm))
print(anova_lm(sum_lm, typ=2))
print(anova_lm(sum_lm, typ=3))
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-41-e0c1ed608c29> in <module>
      1 sum_lm = ols('np.log(Days+1) ~ C(Duration, Sum) * C(Weight, Sum)',
----> 2             data=kt).fit()
      3
      4 print(anova_lm(sum_lm))
      5 print(anova_lm(sum_lm, typ=2))

NameError: name 'kt' is not defined
[42]:
nosum_lm = ols('np.log(Days+1) ~ C(Duration, Treatment) * C(Weight, Treatment)',
            data=kt).fit()
print(anova_lm(nosum_lm))
print(anova_lm(nosum_lm, typ=2))
print(anova_lm(nosum_lm, typ=3))
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-42-95381847ac17> in <module>
      1 nosum_lm = ols('np.log(Days+1) ~ C(Duration, Treatment) * C(Weight, Treatment)',
----> 2             data=kt).fit()
      3 print(anova_lm(nosum_lm))
      4 print(anova_lm(nosum_lm, typ=2))
      5 print(anova_lm(nosum_lm, typ=3))

NameError: name 'kt' is not defined