Generalized Estimating Equations¶
Generalized Estimating Equations estimate generalized linear models for panel, cluster or repeated measures data when the observations are possibly correlated withing a cluster but uncorrelated across clusters. It supports estimation of the same one-parameter exponential families as Generalized Linear models (GLM).
See Module Reference for commands and arguments.
Examples¶
The following illustrates a Poisson regression with exchangeable correlation within clusters using data on epilepsy seizures.
In [1]: import statsmodels.api as sm
In [2]: import statsmodels.formula.api as smf
In [3]: data = sm.datasets.get_rdataset('epil', package='MASS', cache=True).data
---------------------------------------------------------------------------
gaierror Traceback (most recent call last)
/usr/lib/python3.7/urllib/request.py in do_open(self, http_class, req, **http_conn_args)
1316 h.request(req.get_method(), req.selector, req.data, headers,
-> 1317 encode_chunked=req.has_header('Transfer-encoding'))
1318 except OSError as err: # timeout error
/usr/lib/python3.7/http/client.py in request(self, method, url, body, headers, encode_chunked)
1228 """Send a complete request to the server."""
-> 1229 self._send_request(method, url, body, headers, encode_chunked)
1230
/usr/lib/python3.7/http/client.py in _send_request(self, method, url, body, headers, encode_chunked)
1274 body = _encode(body, 'body')
-> 1275 self.endheaders(body, encode_chunked=encode_chunked)
1276
/usr/lib/python3.7/http/client.py in endheaders(self, message_body, encode_chunked)
1223 raise CannotSendHeader()
-> 1224 self._send_output(message_body, encode_chunked=encode_chunked)
1225
/usr/lib/python3.7/http/client.py in _send_output(self, message_body, encode_chunked)
1015 del self._buffer[:]
-> 1016 self.send(msg)
1017
/usr/lib/python3.7/http/client.py in send(self, data)
955 if self.auto_open:
--> 956 self.connect()
957 else:
/usr/lib/python3.7/http/client.py in connect(self)
1383
-> 1384 super().connect()
1385
/usr/lib/python3.7/http/client.py in connect(self)
927 self.sock = self._create_connection(
--> 928 (self.host,self.port), self.timeout, self.source_address)
929 self.sock.setsockopt(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1)
/usr/lib/python3.7/socket.py in create_connection(address, timeout, source_address)
706 err = None
--> 707 for res in getaddrinfo(host, port, 0, SOCK_STREAM):
708 af, socktype, proto, canonname, sa = res
/usr/lib/python3.7/socket.py in getaddrinfo(host, port, family, type, proto, flags)
747 addrlist = []
--> 748 for res in _socket.getaddrinfo(host, port, family, type, proto, flags):
749 af, socktype, proto, canonname, sa = res
gaierror: [Errno -2] Name or service not known
During handling of the above exception, another exception occurred:
URLError Traceback (most recent call last)
<ipython-input-3-c0fa50268696> in <module>()
----> 1 data = sm.datasets.get_rdataset('epil', package='MASS', cache=True).data
/build/statsmodels-IFPJo1/statsmodels-0.8.0/.pybuild/cpython3_3.7_statsmodels/build/statsmodels/datasets/utils.py in get_rdataset(dataname, package, cache)
288 "master/doc/"+package+"/rst/")
289 cache = _get_cache(cache)
--> 290 data, from_cache = _get_data(data_base_url, dataname, cache)
291 data = read_csv(data, index_col=0)
292 data = _maybe_reset_index(data)
/build/statsmodels-IFPJo1/statsmodels-0.8.0/.pybuild/cpython3_3.7_statsmodels/build/statsmodels/datasets/utils.py in _get_data(base_url, dataname, cache, extension)
219 url = base_url + (dataname + ".%s") % extension
220 try:
--> 221 data, from_cache = _urlopen_cached(url, cache)
222 except HTTPError as err:
223 if '404' in str(err):
/build/statsmodels-IFPJo1/statsmodels-0.8.0/.pybuild/cpython3_3.7_statsmodels/build/statsmodels/datasets/utils.py in _urlopen_cached(url, cache)
210 # not using the cache or didn't find it in cache
211 if not from_cache:
--> 212 data = urlopen(url).read()
213 if cache is not None: # then put it in the cache
214 _cache_it(data, cache_path)
/usr/lib/python3.7/urllib/request.py in urlopen(url, data, timeout, cafile, capath, cadefault, context)
220 else:
221 opener = _opener
--> 222 return opener.open(url, data, timeout)
223
224 def install_opener(opener):
/usr/lib/python3.7/urllib/request.py in open(self, fullurl, data, timeout)
523 req = meth(req)
524
--> 525 response = self._open(req, data)
526
527 # post-process response
/usr/lib/python3.7/urllib/request.py in _open(self, req, data)
541 protocol = req.type
542 result = self._call_chain(self.handle_open, protocol, protocol +
--> 543 '_open', req)
544 if result:
545 return result
/usr/lib/python3.7/urllib/request.py in _call_chain(self, chain, kind, meth_name, *args)
501 for handler in handlers:
502 func = getattr(handler, meth_name)
--> 503 result = func(*args)
504 if result is not None:
505 return result
/usr/lib/python3.7/urllib/request.py in https_open(self, req)
1358 def https_open(self, req):
1359 return self.do_open(http.client.HTTPSConnection, req,
-> 1360 context=self._context, check_hostname=self._check_hostname)
1361
1362 https_request = AbstractHTTPHandler.do_request_
/usr/lib/python3.7/urllib/request.py in do_open(self, http_class, req, **http_conn_args)
1317 encode_chunked=req.has_header('Transfer-encoding'))
1318 except OSError as err: # timeout error
-> 1319 raise URLError(err)
1320 r = h.getresponse()
1321 except:
URLError: <urlopen error [Errno -2] Name or service not known>
In [4]: fam = sm.families.Poisson()
In [5]: ind = sm.cov_struct.Exchangeable()
In [6]: mod = smf.gee("y ~ age + trt + base", "subject", data,
...: cov_struct=ind, family=fam)
...:
---------------------------------------------------------------------------
KeyError Traceback (most recent call last)
<ipython-input-6-12bac5f45363> in <module>()
1 mod = smf.gee("y ~ age + trt + base", "subject", data,
----> 2 cov_struct=ind, family=fam)
/build/statsmodels-IFPJo1/statsmodels-0.8.0/.pybuild/cpython3_3.7_statsmodels/build/statsmodels/genmod/generalized_estimating_equations.py in from_formula(cls, formula, groups, data, subset, time, offset, exposure, *args, **kwargs)
668
669 if type(groups) == str:
--> 670 groups = data[groups]
671
672 if type(time) == str:
KeyError: 'subject'
In [7]: res = mod.fit()