Package cherrypy :: Package lib :: Module encoding
[hide private]
[frames] | no frames]

Source Code for Module cherrypy.lib.encoding

  1  import struct 
  2  import time 
  3   
  4  import cherrypy 
  5  from cherrypy._cpcompat import basestring, BytesIO, ntob, set, unicodestr 
  6  from cherrypy.lib import file_generator 
  7  from cherrypy.lib import set_vary_header 
  8   
  9   
10 -def decode(encoding=None, default_encoding='utf-8'):
11 """Replace or extend the list of charsets used to decode a request entity. 12 13 Either argument may be a single string or a list of strings. 14 15 encoding 16 If not None, restricts the set of charsets attempted while decoding 17 a request entity to the given set (even if a different charset is 18 given in the Content-Type request header). 19 20 default_encoding 21 Only in effect if the 'encoding' argument is not given. 22 If given, the set of charsets attempted while decoding a request 23 entity is *extended* with the given value(s). 24 25 """ 26 body = cherrypy.request.body 27 if encoding is not None: 28 if not isinstance(encoding, list): 29 encoding = [encoding] 30 body.attempt_charsets = encoding 31 elif default_encoding: 32 if not isinstance(default_encoding, list): 33 default_encoding = [default_encoding] 34 body.attempt_charsets = body.attempt_charsets + default_encoding
35
36 -class UTF8StreamEncoder:
37 - def __init__(self, iterator):
38 self._iterator = iterator
39
40 - def __iter__(self):
41 return self
42
43 - def next(self):
44 return self.__next__()
45
46 - def __next__(self):
47 res = next(self._iterator) 48 if isinstance(res, unicodestr): 49 res = res.encode('utf-8') 50 return res
51
52 - def __getattr__(self, attr):
53 if attr.startswith('__'): 54 raise AttributeError(self, attr) 55 return getattr(self._iterator, attr)
56 57
58 -class ResponseEncoder:
59 60 default_encoding = 'utf-8' 61 failmsg = "Response body could not be encoded with %r." 62 encoding = None 63 errors = 'strict' 64 text_only = True 65 add_charset = True 66 debug = False 67
68 - def __init__(self, **kwargs):
69 for k, v in kwargs.items(): 70 setattr(self, k, v) 71 72 self.attempted_charsets = set() 73 request = cherrypy.serving.request 74 if request.handler is not None: 75 # Replace request.handler with self 76 if self.debug: 77 cherrypy.log('Replacing request.handler', 'TOOLS.ENCODE') 78 self.oldhandler = request.handler 79 request.handler = self
80
81 - def encode_stream(self, encoding):
82 """Encode a streaming response body. 83 84 Use a generator wrapper, and just pray it works as the stream is 85 being written out. 86 """ 87 if encoding in self.attempted_charsets: 88 return False 89 self.attempted_charsets.add(encoding) 90 91 def encoder(body): 92 for chunk in body: 93 if isinstance(chunk, unicodestr): 94 chunk = chunk.encode(encoding, self.errors) 95 yield chunk
96 self.body = encoder(self.body) 97 return True
98
99 - def encode_string(self, encoding):
100 """Encode a buffered response body.""" 101 if encoding in self.attempted_charsets: 102 return False 103 self.attempted_charsets.add(encoding) 104 body = [] 105 for chunk in self.body: 106 if isinstance(chunk, unicodestr): 107 try: 108 chunk = chunk.encode(encoding, self.errors) 109 except (LookupError, UnicodeError): 110 return False 111 body.append(chunk) 112 self.body = body 113 return True
114
115 - def find_acceptable_charset(self):
116 request = cherrypy.serving.request 117 response = cherrypy.serving.response 118 119 if self.debug: 120 cherrypy.log('response.stream %r' % 121 response.stream, 'TOOLS.ENCODE') 122 if response.stream: 123 encoder = self.encode_stream 124 else: 125 encoder = self.encode_string 126 if "Content-Length" in response.headers: 127 # Delete Content-Length header so finalize() recalcs it. 128 # Encoded strings may be of different lengths from their 129 # unicode equivalents, and even from each other. For example: 130 # >>> t = u"\u7007\u3040" 131 # >>> len(t) 132 # 2 133 # >>> len(t.encode("UTF-8")) 134 # 6 135 # >>> len(t.encode("utf7")) 136 # 8 137 del response.headers["Content-Length"] 138 139 # Parse the Accept-Charset request header, and try to provide one 140 # of the requested charsets (in order of user preference). 141 encs = request.headers.elements('Accept-Charset') 142 charsets = [enc.value.lower() for enc in encs] 143 if self.debug: 144 cherrypy.log('charsets %s' % repr(charsets), 'TOOLS.ENCODE') 145 146 if self.encoding is not None: 147 # If specified, force this encoding to be used, or fail. 148 encoding = self.encoding.lower() 149 if self.debug: 150 cherrypy.log('Specified encoding %r' % 151 encoding, 'TOOLS.ENCODE') 152 if (not charsets) or "*" in charsets or encoding in charsets: 153 if self.debug: 154 cherrypy.log('Attempting encoding %r' % 155 encoding, 'TOOLS.ENCODE') 156 if encoder(encoding): 157 return encoding 158 else: 159 if not encs: 160 if self.debug: 161 cherrypy.log('Attempting default encoding %r' % 162 self.default_encoding, 'TOOLS.ENCODE') 163 # Any character-set is acceptable. 164 if encoder(self.default_encoding): 165 return self.default_encoding 166 else: 167 raise cherrypy.HTTPError(500, self.failmsg % 168 self.default_encoding) 169 else: 170 for element in encs: 171 if element.qvalue > 0: 172 if element.value == "*": 173 # Matches any charset. Try our default. 174 if self.debug: 175 cherrypy.log('Attempting default encoding due ' 176 'to %r' % element, 'TOOLS.ENCODE') 177 if encoder(self.default_encoding): 178 return self.default_encoding 179 else: 180 encoding = element.value 181 if self.debug: 182 cherrypy.log('Attempting encoding %s (qvalue >' 183 '0)' % element, 'TOOLS.ENCODE') 184 if encoder(encoding): 185 return encoding 186 187 if "*" not in charsets: 188 # If no "*" is present in an Accept-Charset field, then all 189 # character sets not explicitly mentioned get a quality 190 # value of 0, except for ISO-8859-1, which gets a quality 191 # value of 1 if not explicitly mentioned. 192 iso = 'iso-8859-1' 193 if iso not in charsets: 194 if self.debug: 195 cherrypy.log('Attempting ISO-8859-1 encoding', 196 'TOOLS.ENCODE') 197 if encoder(iso): 198 return iso 199 200 # No suitable encoding found. 201 ac = request.headers.get('Accept-Charset') 202 if ac is None: 203 msg = "Your client did not send an Accept-Charset header." 204 else: 205 msg = "Your client sent this Accept-Charset header: %s." % ac 206 _charsets = ", ".join(sorted(self.attempted_charsets)) 207 msg += " We tried these charsets: %s." % (_charsets,) 208 raise cherrypy.HTTPError(406, msg)
209
210 - def __call__(self, *args, **kwargs):
211 response = cherrypy.serving.response 212 self.body = self.oldhandler(*args, **kwargs) 213 214 if isinstance(self.body, basestring): 215 # strings get wrapped in a list because iterating over a single 216 # item list is much faster than iterating over every character 217 # in a long string. 218 if self.body: 219 self.body = [self.body] 220 else: 221 # [''] doesn't evaluate to False, so replace it with []. 222 self.body = [] 223 elif hasattr(self.body, 'read'): 224 self.body = file_generator(self.body) 225 elif self.body is None: 226 self.body = [] 227 228 ct = response.headers.elements("Content-Type") 229 if self.debug: 230 cherrypy.log('Content-Type: %r' % [str(h) 231 for h in ct], 'TOOLS.ENCODE') 232 if ct and self.add_charset: 233 ct = ct[0] 234 if self.text_only: 235 if ct.value.lower().startswith("text/"): 236 if self.debug: 237 cherrypy.log( 238 'Content-Type %s starts with "text/"' % ct, 239 'TOOLS.ENCODE') 240 do_find = True 241 else: 242 if self.debug: 243 cherrypy.log('Not finding because Content-Type %s ' 244 'does not start with "text/"' % ct, 245 'TOOLS.ENCODE') 246 do_find = False 247 else: 248 if self.debug: 249 cherrypy.log('Finding because not text_only', 250 'TOOLS.ENCODE') 251 do_find = True 252 253 if do_find: 254 # Set "charset=..." param on response Content-Type header 255 ct.params['charset'] = self.find_acceptable_charset() 256 if self.debug: 257 cherrypy.log('Setting Content-Type %s' % ct, 258 'TOOLS.ENCODE') 259 response.headers["Content-Type"] = str(ct) 260 261 return self.body
262 263 # GZIP 264 265
266 -def compress(body, compress_level):
267 """Compress 'body' at the given compress_level.""" 268 import zlib 269 270 # See http://www.gzip.org/zlib/rfc-gzip.html 271 yield ntob('\x1f\x8b') # ID1 and ID2: gzip marker 272 yield ntob('\x08') # CM: compression method 273 yield ntob('\x00') # FLG: none set 274 # MTIME: 4 bytes 275 yield struct.pack("<L", int(time.time()) & int('FFFFFFFF', 16)) 276 yield ntob('\x02') # XFL: max compression, slowest algo 277 yield ntob('\xff') # OS: unknown 278 279 crc = zlib.crc32(ntob("")) 280 size = 0 281 zobj = zlib.compressobj(compress_level, 282 zlib.DEFLATED, -zlib.MAX_WBITS, 283 zlib.DEF_MEM_LEVEL, 0) 284 for line in body: 285 size += len(line) 286 crc = zlib.crc32(line, crc) 287 yield zobj.compress(line) 288 yield zobj.flush() 289 290 # CRC32: 4 bytes 291 yield struct.pack("<L", crc & int('FFFFFFFF', 16)) 292 # ISIZE: 4 bytes 293 yield struct.pack("<L", size & int('FFFFFFFF', 16))
294 295
296 -def decompress(body):
297 import gzip 298 299 zbuf = BytesIO() 300 zbuf.write(body) 301 zbuf.seek(0) 302 zfile = gzip.GzipFile(mode='rb', fileobj=zbuf) 303 data = zfile.read() 304 zfile.close() 305 return data
306 307
308 -def gzip(compress_level=5, mime_types=['text/html', 'text/plain'], 309 debug=False):
310 """Try to gzip the response body if Content-Type in mime_types. 311 312 cherrypy.response.headers['Content-Type'] must be set to one of the 313 values in the mime_types arg before calling this function. 314 315 The provided list of mime-types must be of one of the following form: 316 * type/subtype 317 * type/* 318 * type/*+subtype 319 320 No compression is performed if any of the following hold: 321 * The client sends no Accept-Encoding request header 322 * No 'gzip' or 'x-gzip' is present in the Accept-Encoding header 323 * No 'gzip' or 'x-gzip' with a qvalue > 0 is present 324 * The 'identity' value is given with a qvalue > 0. 325 326 """ 327 request = cherrypy.serving.request 328 response = cherrypy.serving.response 329 330 set_vary_header(response, "Accept-Encoding") 331 332 if not response.body: 333 # Response body is empty (might be a 304 for instance) 334 if debug: 335 cherrypy.log('No response body', context='TOOLS.GZIP') 336 return 337 338 # If returning cached content (which should already have been gzipped), 339 # don't re-zip. 340 if getattr(request, "cached", False): 341 if debug: 342 cherrypy.log('Not gzipping cached response', context='TOOLS.GZIP') 343 return 344 345 acceptable = request.headers.elements('Accept-Encoding') 346 if not acceptable: 347 # If no Accept-Encoding field is present in a request, 348 # the server MAY assume that the client will accept any 349 # content coding. In this case, if "identity" is one of 350 # the available content-codings, then the server SHOULD use 351 # the "identity" content-coding, unless it has additional 352 # information that a different content-coding is meaningful 353 # to the client. 354 if debug: 355 cherrypy.log('No Accept-Encoding', context='TOOLS.GZIP') 356 return 357 358 ct = response.headers.get('Content-Type', '').split(';')[0] 359 for coding in acceptable: 360 if coding.value == 'identity' and coding.qvalue != 0: 361 if debug: 362 cherrypy.log('Non-zero identity qvalue: %s' % coding, 363 context='TOOLS.GZIP') 364 return 365 if coding.value in ('gzip', 'x-gzip'): 366 if coding.qvalue == 0: 367 if debug: 368 cherrypy.log('Zero gzip qvalue: %s' % coding, 369 context='TOOLS.GZIP') 370 return 371 372 if ct not in mime_types: 373 # If the list of provided mime-types contains tokens 374 # such as 'text/*' or 'application/*+xml', 375 # we go through them and find the most appropriate one 376 # based on the given content-type. 377 # The pattern matching is only caring about the most 378 # common cases, as stated above, and doesn't support 379 # for extra parameters. 380 found = False 381 if '/' in ct: 382 ct_media_type, ct_sub_type = ct.split('/') 383 for mime_type in mime_types: 384 if '/' in mime_type: 385 media_type, sub_type = mime_type.split('/') 386 if ct_media_type == media_type: 387 if sub_type == '*': 388 found = True 389 break 390 elif '+' in sub_type and '+' in ct_sub_type: 391 ct_left, ct_right = ct_sub_type.split('+') 392 left, right = sub_type.split('+') 393 if left == '*' and ct_right == right: 394 found = True 395 break 396 397 if not found: 398 if debug: 399 cherrypy.log('Content-Type %s not in mime_types %r' % 400 (ct, mime_types), context='TOOLS.GZIP') 401 return 402 403 if debug: 404 cherrypy.log('Gzipping', context='TOOLS.GZIP') 405 # Return a generator that compresses the page 406 response.headers['Content-Encoding'] = 'gzip' 407 response.body = compress(response.body, compress_level) 408 if "Content-Length" in response.headers: 409 # Delete Content-Length header so finalize() recalcs it. 410 del response.headers["Content-Length"] 411 412 return 413 414 if debug: 415 cherrypy.log('No acceptable encoding found.', context='GZIP') 416 cherrypy.HTTPError(406, "identity, gzip").set_response()
417