Package cherrypy :: Package lib :: Module httputil
[hide private]
[frames] | no frames]

Source Code for Module cherrypy.lib.httputil

  1  """HTTP library functions. 
  2   
  3  This module contains functions for building an HTTP application 
  4  framework: any one, not just one whose name starts with "Ch". ;) If you 
  5  reference any modules from some popular framework inside *this* module, 
  6  FuManChu will personally hang you up by your thumbs and submit you 
  7  to a public caning. 
  8  """ 
  9   
 10  from binascii import b2a_base64 
 11  from cherrypy._cpcompat import BaseHTTPRequestHandler, HTTPDate, ntob, ntou 
 12  from cherrypy._cpcompat import basestring, bytestr, iteritems, nativestr 
 13  from cherrypy._cpcompat import reversed, sorted, unicodestr, unquote_qs 
 14  response_codes = BaseHTTPRequestHandler.responses.copy() 
 15   
 16  # From https://bitbucket.org/cherrypy/cherrypy/issue/361 
 17  response_codes[500] = ('Internal Server Error', 
 18                         'The server encountered an unexpected condition ' 
 19                         'which prevented it from fulfilling the request.') 
 20  response_codes[503] = ('Service Unavailable', 
 21                         'The server is currently unable to handle the ' 
 22                         'request due to a temporary overloading or ' 
 23                         'maintenance of the server.') 
 24   
 25  import re 
 26  import urllib 
 27   
 28   
29 -def urljoin(*atoms):
30 """Return the given path \*atoms, joined into a single URL. 31 32 This will correctly join a SCRIPT_NAME and PATH_INFO into the 33 original URL, even if either atom is blank. 34 """ 35 url = "/".join([x for x in atoms if x]) 36 while "//" in url: 37 url = url.replace("//", "/") 38 # Special-case the final url of "", and return "/" instead. 39 return url or "/"
40 41
42 -def urljoin_bytes(*atoms):
43 """Return the given path *atoms, joined into a single URL. 44 45 This will correctly join a SCRIPT_NAME and PATH_INFO into the 46 original URL, even if either atom is blank. 47 """ 48 url = ntob("/").join([x for x in atoms if x]) 49 while ntob("//") in url: 50 url = url.replace(ntob("//"), ntob("/")) 51 # Special-case the final url of "", and return "/" instead. 52 return url or ntob("/")
53 54
55 -def protocol_from_http(protocol_str):
56 """Return a protocol tuple from the given 'HTTP/x.y' string.""" 57 return int(protocol_str[5]), int(protocol_str[7])
58 59
60 -def get_ranges(headervalue, content_length):
61 """Return a list of (start, stop) indices from a Range header, or None. 62 63 Each (start, stop) tuple will be composed of two ints, which are suitable 64 for use in a slicing operation. That is, the header "Range: bytes=3-6", 65 if applied against a Python string, is requesting resource[3:7]. This 66 function will return the list [(3, 7)]. 67 68 If this function returns an empty list, you should return HTTP 416. 69 """ 70 71 if not headervalue: 72 return None 73 74 result = [] 75 bytesunit, byteranges = headervalue.split("=", 1) 76 for brange in byteranges.split(","): 77 start, stop = [x.strip() for x in brange.split("-", 1)] 78 if start: 79 if not stop: 80 stop = content_length - 1 81 start, stop = int(start), int(stop) 82 if start >= content_length: 83 # From rfc 2616 sec 14.16: 84 # "If the server receives a request (other than one 85 # including an If-Range request-header field) with an 86 # unsatisfiable Range request-header field (that is, 87 # all of whose byte-range-spec values have a first-byte-pos 88 # value greater than the current length of the selected 89 # resource), it SHOULD return a response code of 416 90 # (Requested range not satisfiable)." 91 continue 92 if stop < start: 93 # From rfc 2616 sec 14.16: 94 # "If the server ignores a byte-range-spec because it 95 # is syntactically invalid, the server SHOULD treat 96 # the request as if the invalid Range header field 97 # did not exist. (Normally, this means return a 200 98 # response containing the full entity)." 99 return None 100 result.append((start, stop + 1)) 101 else: 102 if not stop: 103 # See rfc quote above. 104 return None 105 # Negative subscript (last N bytes) 106 result.append((content_length - int(stop), content_length)) 107 108 return result
109 110
111 -class HeaderElement(object):
112 113 """An element (with parameters) from an HTTP header's element list.""" 114
115 - def __init__(self, value, params=None):
116 self.value = value 117 if params is None: 118 params = {} 119 self.params = params
120
121 - def __cmp__(self, other):
122 return cmp(self.value, other.value)
123
124 - def __lt__(self, other):
125 return self.value < other.value
126
127 - def __str__(self):
128 p = [";%s=%s" % (k, v) for k, v in iteritems(self.params)] 129 return str("%s%s" % (self.value, "".join(p)))
130
131 - def __bytes__(self):
132 return ntob(self.__str__())
133
134 - def __unicode__(self):
135 return ntou(self.__str__())
136
137 - def parse(elementstr):
138 """Transform 'token;key=val' to ('token', {'key': 'val'}).""" 139 # Split the element into a value and parameters. The 'value' may 140 # be of the form, "token=token", but we don't split that here. 141 atoms = [x.strip() for x in elementstr.split(";") if x.strip()] 142 if not atoms: 143 initial_value = '' 144 else: 145 initial_value = atoms.pop(0).strip() 146 params = {} 147 for atom in atoms: 148 atom = [x.strip() for x in atom.split("=", 1) if x.strip()] 149 key = atom.pop(0) 150 if atom: 151 val = atom[0] 152 else: 153 val = "" 154 params[key] = val 155 return initial_value, params
156 parse = staticmethod(parse) 157
158 - def from_str(cls, elementstr):
159 """Construct an instance from a string of the form 'token;key=val'.""" 160 ival, params = cls.parse(elementstr) 161 return cls(ival, params)
162 from_str = classmethod(from_str)
163 164 165 q_separator = re.compile(r'; *q *=') 166 167
168 -class AcceptElement(HeaderElement):
169 170 """An element (with parameters) from an Accept* header's element list. 171 172 AcceptElement objects are comparable; the more-preferred object will be 173 "less than" the less-preferred object. They are also therefore sortable; 174 if you sort a list of AcceptElement objects, they will be listed in 175 priority order; the most preferred value will be first. Yes, it should 176 have been the other way around, but it's too late to fix now. 177 """ 178
179 - def from_str(cls, elementstr):
180 qvalue = None 181 # The first "q" parameter (if any) separates the initial 182 # media-range parameter(s) (if any) from the accept-params. 183 atoms = q_separator.split(elementstr, 1) 184 media_range = atoms.pop(0).strip() 185 if atoms: 186 # The qvalue for an Accept header can have extensions. The other 187 # headers cannot, but it's easier to parse them as if they did. 188 qvalue = HeaderElement.from_str(atoms[0].strip()) 189 190 media_type, params = cls.parse(media_range) 191 if qvalue is not None: 192 params["q"] = qvalue 193 return cls(media_type, params)
194 from_str = classmethod(from_str) 195
196 - def qvalue(self):
197 val = self.params.get("q", "1") 198 if isinstance(val, HeaderElement): 199 val = val.value 200 return float(val)
201 qvalue = property(qvalue, doc="The qvalue, or priority, of this value.") 202
203 - def __cmp__(self, other):
204 diff = cmp(self.qvalue, other.qvalue) 205 if diff == 0: 206 diff = cmp(str(self), str(other)) 207 return diff
208
209 - def __lt__(self, other):
210 if self.qvalue == other.qvalue: 211 return str(self) < str(other) 212 else: 213 return self.qvalue < other.qvalue
214 215 RE_HEADER_SPLIT = re.compile(',(?=(?:[^"]*"[^"]*")*[^"]*$)')
216 -def header_elements(fieldname, fieldvalue):
217 """Return a sorted HeaderElement list from a comma-separated header string. 218 """ 219 if not fieldvalue: 220 return [] 221 222 result = [] 223 for element in RE_HEADER_SPLIT.split(fieldvalue): 224 if fieldname.startswith("Accept") or fieldname == 'TE': 225 hv = AcceptElement.from_str(element) 226 else: 227 hv = HeaderElement.from_str(element) 228 result.append(hv) 229 230 return list(reversed(sorted(result)))
231 232
233 -def decode_TEXT(value):
234 r"""Decode :rfc:`2047` TEXT (e.g. "=?utf-8?q?f=C3=BCr?=" -> "f\xfcr").""" 235 try: 236 # Python 3 237 from email.header import decode_header 238 except ImportError: 239 from email.Header import decode_header 240 atoms = decode_header(value) 241 decodedvalue = "" 242 for atom, charset in atoms: 243 if charset is not None: 244 atom = atom.decode(charset) 245 decodedvalue += atom 246 return decodedvalue
247 248
249 -def valid_status(status):
250 """Return legal HTTP status Code, Reason-phrase and Message. 251 252 The status arg must be an int, or a str that begins with an int. 253 254 If status is an int, or a str and no reason-phrase is supplied, 255 a default reason-phrase will be provided. 256 """ 257 258 if not status: 259 status = 200 260 261 status = str(status) 262 parts = status.split(" ", 1) 263 if len(parts) == 1: 264 # No reason supplied. 265 code, = parts 266 reason = None 267 else: 268 code, reason = parts 269 reason = reason.strip() 270 271 try: 272 code = int(code) 273 except ValueError: 274 raise ValueError("Illegal response status from server " 275 "(%s is non-numeric)." % repr(code)) 276 277 if code < 100 or code > 599: 278 raise ValueError("Illegal response status from server " 279 "(%s is out of range)." % repr(code)) 280 281 if code not in response_codes: 282 # code is unknown but not illegal 283 default_reason, message = "", "" 284 else: 285 default_reason, message = response_codes[code] 286 287 if reason is None: 288 reason = default_reason 289 290 return code, reason, message
291 292 293 # NOTE: the parse_qs functions that follow are modified version of those 294 # in the python3.0 source - we need to pass through an encoding to the unquote 295 # method, but the default parse_qs function doesn't allow us to. These do. 296
297 -def _parse_qs(qs, keep_blank_values=0, strict_parsing=0, encoding='utf-8'):
298 """Parse a query given as a string argument. 299 300 Arguments: 301 302 qs: URL-encoded query string to be parsed 303 304 keep_blank_values: flag indicating whether blank values in 305 URL encoded queries should be treated as blank strings. A 306 true value indicates that blanks should be retained as blank 307 strings. The default false value indicates that blank values 308 are to be ignored and treated as if they were not included. 309 310 strict_parsing: flag indicating what to do with parsing errors. If 311 false (the default), errors are silently ignored. If true, 312 errors raise a ValueError exception. 313 314 Returns a dict, as G-d intended. 315 """ 316 pairs = [s2 for s1 in qs.split('&') for s2 in s1.split(';')] 317 d = {} 318 for name_value in pairs: 319 if not name_value and not strict_parsing: 320 continue 321 nv = name_value.split('=', 1) 322 if len(nv) != 2: 323 if strict_parsing: 324 raise ValueError("bad query field: %r" % (name_value,)) 325 # Handle case of a control-name with no equal sign 326 if keep_blank_values: 327 nv.append('') 328 else: 329 continue 330 if len(nv[1]) or keep_blank_values: 331 name = unquote_qs(nv[0], encoding) 332 value = unquote_qs(nv[1], encoding) 333 if name in d: 334 if not isinstance(d[name], list): 335 d[name] = [d[name]] 336 d[name].append(value) 337 else: 338 d[name] = value 339 return d
340 341 342 image_map_pattern = re.compile(r"[0-9]+,[0-9]+") 343 344
345 -def parse_query_string(query_string, keep_blank_values=True, encoding='utf-8'):
346 """Build a params dictionary from a query_string. 347 348 Duplicate key/value pairs in the provided query_string will be 349 returned as {'key': [val1, val2, ...]}. Single key/values will 350 be returned as strings: {'key': 'value'}. 351 """ 352 if image_map_pattern.match(query_string): 353 # Server-side image map. Map the coords to 'x' and 'y' 354 # (like CGI::Request does). 355 pm = query_string.split(",") 356 pm = {'x': int(pm[0]), 'y': int(pm[1])} 357 else: 358 pm = _parse_qs(query_string, keep_blank_values, encoding=encoding) 359 return pm
360 361
362 -class CaseInsensitiveDict(dict):
363 364 """A case-insensitive dict subclass. 365 366 Each key is changed on entry to str(key).title(). 367 """ 368
369 - def __getitem__(self, key):
370 return dict.__getitem__(self, str(key).title())
371
372 - def __setitem__(self, key, value):
373 dict.__setitem__(self, str(key).title(), value)
374
375 - def __delitem__(self, key):
376 dict.__delitem__(self, str(key).title())
377
378 - def __contains__(self, key):
379 return dict.__contains__(self, str(key).title())
380
381 - def get(self, key, default=None):
382 return dict.get(self, str(key).title(), default)
383 384 if hasattr({}, 'has_key'):
385 - def has_key(self, key):
386 return str(key).title() in self
387
388 - def update(self, E):
389 for k in E.keys(): 390 self[str(k).title()] = E[k]
391
392 - def fromkeys(cls, seq, value=None):
393 newdict = cls() 394 for k in seq: 395 newdict[str(k).title()] = value 396 return newdict
397 fromkeys = classmethod(fromkeys) 398
399 - def setdefault(self, key, x=None):
400 key = str(key).title() 401 try: 402 return self[key] 403 except KeyError: 404 self[key] = x 405 return x
406
407 - def pop(self, key, default):
408 return dict.pop(self, str(key).title(), default)
409 410 411 # TEXT = <any OCTET except CTLs, but including LWS> 412 # 413 # A CRLF is allowed in the definition of TEXT only as part of a header 414 # field continuation. It is expected that the folding LWS will be 415 # replaced with a single SP before interpretation of the TEXT value." 416 if nativestr == bytestr: 417 header_translate_table = ''.join([chr(i) for i in xrange(256)]) 418 header_translate_deletechars = ''.join( 419 [chr(i) for i in xrange(32)]) + chr(127) 420 else: 421 header_translate_table = None 422 header_translate_deletechars = bytes(range(32)) + bytes([127]) 423 424
425 -class HeaderMap(CaseInsensitiveDict):
426 427 """A dict subclass for HTTP request and response headers. 428 429 Each key is changed on entry to str(key).title(). This allows headers 430 to be case-insensitive and avoid duplicates. 431 432 Values are header values (decoded according to :rfc:`2047` if necessary). 433 """ 434 435 protocol = (1, 1) 436 encodings = ["ISO-8859-1"] 437 438 # Someday, when http-bis is done, this will probably get dropped 439 # since few servers, clients, or intermediaries do it. But until then, 440 # we're going to obey the spec as is. 441 # "Words of *TEXT MAY contain characters from character sets other than 442 # ISO-8859-1 only when encoded according to the rules of RFC 2047." 443 use_rfc_2047 = True 444
445 - def elements(self, key):
446 """Return a sorted list of HeaderElements for the given header.""" 447 key = str(key).title() 448 value = self.get(key) 449 return header_elements(key, value)
450
451 - def values(self, key):
452 """Return a sorted list of HeaderElement.value for the given header.""" 453 return [e.value for e in self.elements(key)]
454
455 - def output(self):
456 """Transform self into a list of (name, value) tuples.""" 457 return list(self.encode_header_items(self.items()))
458
459 - def encode_header_items(cls, header_items):
460 """ 461 Prepare the sequence of name, value tuples into a form suitable for 462 transmitting on the wire for HTTP. 463 """ 464 for k, v in header_items: 465 if isinstance(k, unicodestr): 466 k = cls.encode(k) 467 468 if not isinstance(v, basestring): 469 v = str(v) 470 471 if isinstance(v, unicodestr): 472 v = cls.encode(v) 473 474 # See header_translate_* constants above. 475 # Replace only if you really know what you're doing. 476 k = k.translate(header_translate_table, 477 header_translate_deletechars) 478 v = v.translate(header_translate_table, 479 header_translate_deletechars) 480 481 yield (k, v)
482 encode_header_items = classmethod(encode_header_items) 483
484 - def encode(cls, v):
485 """Return the given header name or value, encoded for HTTP output.""" 486 for enc in cls.encodings: 487 try: 488 return v.encode(enc) 489 except UnicodeEncodeError: 490 continue 491 492 if cls.protocol == (1, 1) and cls.use_rfc_2047: 493 # Encode RFC-2047 TEXT 494 # (e.g. u"\u8200" -> "=?utf-8?b?6IiA?="). 495 # We do our own here instead of using the email module 496 # because we never want to fold lines--folding has 497 # been deprecated by the HTTP working group. 498 v = b2a_base64(v.encode('utf-8')) 499 return (ntob('=?utf-8?b?') + v.strip(ntob('\n')) + ntob('?=')) 500 501 raise ValueError("Could not encode header part %r using " 502 "any of the encodings %r." % 503 (v, cls.encodings))
504 encode = classmethod(encode)
505 506
507 -class Host(object):
508 509 """An internet address. 510 511 name 512 Should be the client's host name. If not available (because no DNS 513 lookup is performed), the IP address should be used instead. 514 515 """ 516 517 ip = "0.0.0.0" 518 port = 80 519 name = "unknown.tld" 520
521 - def __init__(self, ip, port, name=None):
522 self.ip = ip 523 self.port = port 524 if name is None: 525 name = ip 526 self.name = name
527
528 - def __repr__(self):
529 return "httputil.Host(%r, %r, %r)" % (self.ip, self.port, self.name)
530