Linear Mixed Effects Models¶
Linear Mixed Effects models are used for regression analyses involving dependent data. Such data arise when working with longitudinal and other study designs in which multiple observations are made on each subject. Two specific mixed effects models are random intercepts models, where all responses in a single group are additively shifted by a value that is specific to the group, and random slopes models, where the values follow a mean trajectory that is linear in observed covariates, with both the slopes and intercept being specific to the group. The Statsmodels MixedLM implementation allows arbitrary random effects design matrices to be specified for the groups, so these and other types of random effects models can all be fit.
The Statsmodels LME framework currently supports post-estimation inference via Wald tests and confidence intervals on the coefficients, profile likelihood analysis, likelihood ratio testing, and AIC. Some limitations of the current implementation are that it does not support structure more complex on the residual errors (they are always homoscedastic), and it does not support crossed random effects. We hope to implement these features for the next release.
Examples¶
In [1]: import statsmodels.api as sm
In [2]: import statsmodels.formula.api as smf
In [3]: data = sm.datasets.get_rdataset("dietox", "geepack", cache=True).data
---------------------------------------------------------------------------
gaierror Traceback (most recent call last)
/usr/lib/python3.7/urllib/request.py in do_open(self, http_class, req, **http_conn_args)
1316 h.request(req.get_method(), req.selector, req.data, headers,
-> 1317 encode_chunked=req.has_header('Transfer-encoding'))
1318 except OSError as err: # timeout error
/usr/lib/python3.7/http/client.py in request(self, method, url, body, headers, encode_chunked)
1228 """Send a complete request to the server."""
-> 1229 self._send_request(method, url, body, headers, encode_chunked)
1230
/usr/lib/python3.7/http/client.py in _send_request(self, method, url, body, headers, encode_chunked)
1274 body = _encode(body, 'body')
-> 1275 self.endheaders(body, encode_chunked=encode_chunked)
1276
/usr/lib/python3.7/http/client.py in endheaders(self, message_body, encode_chunked)
1223 raise CannotSendHeader()
-> 1224 self._send_output(message_body, encode_chunked=encode_chunked)
1225
/usr/lib/python3.7/http/client.py in _send_output(self, message_body, encode_chunked)
1015 del self._buffer[:]
-> 1016 self.send(msg)
1017
/usr/lib/python3.7/http/client.py in send(self, data)
955 if self.auto_open:
--> 956 self.connect()
957 else:
/usr/lib/python3.7/http/client.py in connect(self)
1383
-> 1384 super().connect()
1385
/usr/lib/python3.7/http/client.py in connect(self)
927 self.sock = self._create_connection(
--> 928 (self.host,self.port), self.timeout, self.source_address)
929 self.sock.setsockopt(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1)
/usr/lib/python3.7/socket.py in create_connection(address, timeout, source_address)
706 err = None
--> 707 for res in getaddrinfo(host, port, 0, SOCK_STREAM):
708 af, socktype, proto, canonname, sa = res
/usr/lib/python3.7/socket.py in getaddrinfo(host, port, family, type, proto, flags)
747 addrlist = []
--> 748 for res in _socket.getaddrinfo(host, port, family, type, proto, flags):
749 af, socktype, proto, canonname, sa = res
gaierror: [Errno -2] Name or service not known
During handling of the above exception, another exception occurred:
URLError Traceback (most recent call last)
<ipython-input-3-dd9bcc886be5> in <module>()
----> 1 data = sm.datasets.get_rdataset("dietox", "geepack", cache=True).data
/build/statsmodels-IFPJo1/statsmodels-0.8.0/.pybuild/cpython3_3.7_statsmodels/build/statsmodels/datasets/utils.py in get_rdataset(dataname, package, cache)
288 "master/doc/"+package+"/rst/")
289 cache = _get_cache(cache)
--> 290 data, from_cache = _get_data(data_base_url, dataname, cache)
291 data = read_csv(data, index_col=0)
292 data = _maybe_reset_index(data)
/build/statsmodels-IFPJo1/statsmodels-0.8.0/.pybuild/cpython3_3.7_statsmodels/build/statsmodels/datasets/utils.py in _get_data(base_url, dataname, cache, extension)
219 url = base_url + (dataname + ".%s") % extension
220 try:
--> 221 data, from_cache = _urlopen_cached(url, cache)
222 except HTTPError as err:
223 if '404' in str(err):
/build/statsmodels-IFPJo1/statsmodels-0.8.0/.pybuild/cpython3_3.7_statsmodels/build/statsmodels/datasets/utils.py in _urlopen_cached(url, cache)
210 # not using the cache or didn't find it in cache
211 if not from_cache:
--> 212 data = urlopen(url).read()
213 if cache is not None: # then put it in the cache
214 _cache_it(data, cache_path)
/usr/lib/python3.7/urllib/request.py in urlopen(url, data, timeout, cafile, capath, cadefault, context)
220 else:
221 opener = _opener
--> 222 return opener.open(url, data, timeout)
223
224 def install_opener(opener):
/usr/lib/python3.7/urllib/request.py in open(self, fullurl, data, timeout)
523 req = meth(req)
524
--> 525 response = self._open(req, data)
526
527 # post-process response
/usr/lib/python3.7/urllib/request.py in _open(self, req, data)
541 protocol = req.type
542 result = self._call_chain(self.handle_open, protocol, protocol +
--> 543 '_open', req)
544 if result:
545 return result
/usr/lib/python3.7/urllib/request.py in _call_chain(self, chain, kind, meth_name, *args)
501 for handler in handlers:
502 func = getattr(handler, meth_name)
--> 503 result = func(*args)
504 if result is not None:
505 return result
/usr/lib/python3.7/urllib/request.py in https_open(self, req)
1358 def https_open(self, req):
1359 return self.do_open(http.client.HTTPSConnection, req,
-> 1360 context=self._context, check_hostname=self._check_hostname)
1361
1362 https_request = AbstractHTTPHandler.do_request_
/usr/lib/python3.7/urllib/request.py in do_open(self, http_class, req, **http_conn_args)
1317 encode_chunked=req.has_header('Transfer-encoding'))
1318 except OSError as err: # timeout error
-> 1319 raise URLError(err)
1320 r = h.getresponse()
1321 except:
URLError: <urlopen error [Errno -2] Name or service not known>
In [4]: md = smf.mixedlm("Weight ~ Time", data, groups=data["Pig"])