pion  5.0.6
parser.hpp
1 // ---------------------------------------------------------------------
2 // pion: a Boost C++ framework for building lightweight HTTP interfaces
3 // ---------------------------------------------------------------------
4 // Copyright (C) 2007-2014 Splunk Inc. (https://github.com/splunk/pion)
5 //
6 // Distributed under the Boost Software License, Version 1.0.
7 // See http://www.boost.org/LICENSE_1_0.txt
8 //
9 
10 #ifndef __PION_HTTP_PARSER_HEADER__
11 #define __PION_HTTP_PARSER_HEADER__
12 
13 #include <string>
14 #include <boost/noncopyable.hpp>
15 #include <boost/function/function2.hpp>
16 #include <boost/logic/tribool.hpp>
17 #include <boost/system/error_code.hpp>
18 #include <boost/thread/once.hpp>
19 #include <pion/config.hpp>
20 #include <pion/logger.hpp>
21 #include <pion/http/message.hpp>
22 
23 #ifndef BOOST_SYSTEM_NOEXCEPT
24  #define BOOST_SYSTEM_NOEXCEPT BOOST_NOEXCEPT
25 #endif
26 
27 
28 namespace pion { // begin namespace pion
29 namespace http { // begin namespace http
30 
31 
32 // forward declarations used for finishing HTTP messages
33 class request;
34 class response;
35 
39 class PION_API parser :
40  private boost::noncopyable
41 {
42 
43 public:
44 
46  static const std::size_t DEFAULT_CONTENT_MAX;
47 
49  typedef boost::function2<void, const char *, std::size_t> payload_handler_t;
50 
53  ERROR_METHOD_CHAR = 1,
54  ERROR_METHOD_SIZE,
55  ERROR_URI_CHAR,
56  ERROR_URI_SIZE,
57  ERROR_QUERY_CHAR,
58  ERROR_QUERY_SIZE,
59  ERROR_VERSION_EMPTY,
60  ERROR_VERSION_CHAR,
61  ERROR_STATUS_EMPTY,
62  ERROR_STATUS_CHAR,
63  ERROR_HEADER_CHAR,
64  ERROR_HEADER_NAME_SIZE,
65  ERROR_HEADER_VALUE_SIZE,
66  ERROR_INVALID_CONTENT_LENGTH,
67  ERROR_CHUNK_CHAR,
68  ERROR_MISSING_CHUNK_DATA,
69  ERROR_MISSING_HEADER_DATA,
70  ERROR_MISSING_TOO_MUCH_CONTENT,
71  };
72 
75  : public boost::system::error_category
76  {
77  public:
78  const char *name() const BOOST_SYSTEM_NOEXCEPT { return "parser"; }
79  std::string message(int ev) const {
80  switch (ev) {
81  case ERROR_METHOD_CHAR:
82  return "invalid method character";
83  case ERROR_METHOD_SIZE:
84  return "method exceeds maximum size";
85  case ERROR_URI_CHAR:
86  return "invalid URI character";
87  case ERROR_URI_SIZE:
88  return "method exceeds maximum size";
89  case ERROR_QUERY_CHAR:
90  return "invalid query string character";
91  case ERROR_QUERY_SIZE:
92  return "query string exceeds maximum size";
93  case ERROR_VERSION_EMPTY:
94  return "HTTP version undefined";
95  case ERROR_VERSION_CHAR:
96  return "invalid version character";
97  case ERROR_STATUS_EMPTY:
98  return "HTTP status undefined";
99  case ERROR_STATUS_CHAR:
100  return "invalid status character";
101  case ERROR_HEADER_CHAR:
102  return "invalid header character";
103  case ERROR_HEADER_NAME_SIZE:
104  return "header name exceeds maximum size";
105  case ERROR_HEADER_VALUE_SIZE:
106  return "header value exceeds maximum size";
107  case ERROR_INVALID_CONTENT_LENGTH:
108  return "invalid Content-Length header";
109  case ERROR_CHUNK_CHAR:
110  return "invalid chunk character";
111  case ERROR_MISSING_HEADER_DATA:
112  return "missing header data";
113  case ERROR_MISSING_CHUNK_DATA:
114  return "missing chunk data";
115  case ERROR_MISSING_TOO_MUCH_CONTENT:
116  return "missing too much content";
117  }
118  return "parser error";
119  }
120  };
121 
129  parser(const bool is_request, std::size_t max_content_length = DEFAULT_CONTENT_MAX)
130  : m_logger(PION_GET_LOGGER("pion.http.parser")), m_is_request(is_request),
131  m_read_ptr(NULL), m_read_end_ptr(NULL), m_message_parse_state(PARSE_START),
132  m_headers_parse_state(is_request ? PARSE_METHOD_START : PARSE_HTTP_VERSION_H),
133  m_chunked_content_parse_state(PARSE_CHUNK_SIZE_START), m_status_code(0),
134  m_bytes_content_remaining(0), m_bytes_content_read(0),
135  m_bytes_last_read(0), m_bytes_total_read(0),
136  m_max_content_length(max_content_length),
137  m_parse_headers_only(false), m_save_raw_headers(false)
138  {}
139 
141  virtual ~parser() {}
142 
154  boost::tribool parse(http::message& http_msg, boost::system::error_code& ec);
155 
168  boost::tribool parse_missing_data(http::message& http_msg, std::size_t len,
169  boost::system::error_code& ec);
170 
176  void finish(http::message& http_msg) const;
177 
184  inline void set_read_buffer(const char *ptr, size_t len) {
185  m_read_ptr = ptr;
186  m_read_end_ptr = ptr + len;
187  }
188 
195  inline void load_read_pos(const char *&read_ptr, const char *&read_end_ptr) const {
196  read_ptr = m_read_ptr;
197  read_end_ptr = m_read_end_ptr;
198  }
199 
208  inline bool check_premature_eof(http::message& http_msg) {
209  if (m_message_parse_state != PARSE_CONTENT_NO_LENGTH)
210  return true;
211  m_message_parse_state = PARSE_END;
212  http_msg.concatenate_chunks();
213  finish(http_msg);
214  return false;
215  }
216 
222  inline void parse_headers_only(bool b = true) { m_parse_headers_only = b; }
223 
229  inline void skip_header_parsing(http::message& http_msg) {
230  boost::system::error_code ec;
231  finish_header_parsing(http_msg, ec);
232  }
233 
235  inline void reset(void) {
236  m_message_parse_state = PARSE_START;
237  m_headers_parse_state = (m_is_request ? PARSE_METHOD_START : PARSE_HTTP_VERSION_H);
238  m_chunked_content_parse_state = PARSE_CHUNK_SIZE_START;
239  m_status_code = 0;
240  m_status_message.erase();
241  m_method.erase();
242  m_resource.erase();
243  m_query_string.erase();
244  m_raw_headers.erase();
245  m_bytes_content_read = m_bytes_last_read = m_bytes_total_read = 0;
246  }
247 
249  inline bool eof(void) const { return m_read_ptr == NULL || m_read_ptr >= m_read_end_ptr; }
250 
252  inline std::size_t bytes_available(void) const { return (eof() ? 0 : (std::size_t)(m_read_end_ptr - m_read_ptr)); }
253 
255  inline std::size_t gcount(void) const { return m_bytes_last_read; }
256 
258  inline std::size_t get_total_bytes_read(void) const { return m_bytes_total_read; }
259 
261  inline std::size_t get_content_bytes_read(void) const { return m_bytes_content_read; }
262 
264  inline std::size_t get_max_content_length(void) const { return m_max_content_length; }
265 
267  inline const std::string& get_raw_headers(void) const { return m_raw_headers; }
268 
270  inline bool get_save_raw_headers(void) const { return m_save_raw_headers; }
271 
273  inline bool get_parse_headers_only(void) { return m_parse_headers_only; }
274 
276  inline bool is_parsing_request(void) const { return m_is_request; }
277 
279  inline bool is_parsing_response(void) const { return ! m_is_request; }
280 
282  inline void set_payload_handler(payload_handler_t& h) { m_payload_handler = h; }
283 
285  inline void set_max_content_length(std::size_t n) { m_max_content_length = n; }
286 
288  inline void reset_max_content_length(void) { m_max_content_length = DEFAULT_CONTENT_MAX; }
289 
291  inline void set_save_raw_headers(bool b) { m_save_raw_headers = b; }
292 
294  inline void set_logger(logger log_ptr) { m_logger = log_ptr; }
295 
297  inline logger get_logger(void) { return m_logger; }
298 
299 
312  static bool parse_uri(const std::string& uri, std::string& proto,
313  std::string& host, boost::uint16_t& port, std::string& path,
314  std::string& query);
315 
326  static bool parse_url_encoded(ihash_multimap& dict,
327  const char *ptr, const std::size_t len);
328 
340  static bool parse_multipart_form_data(ihash_multimap& dict,
341  const std::string& content_type,
342  const char *ptr, const std::size_t len);
343 
355  static bool parse_cookie_header(ihash_multimap& dict,
356  const char *ptr, const std::size_t len,
357  bool set_cookie_header);
358 
369  static inline bool parse_cookie_header(ihash_multimap& dict,
370  const std::string& cookie_header, bool set_cookie_header)
371  {
372  return parse_cookie_header(dict, cookie_header.c_str(), cookie_header.size(), set_cookie_header);
373  }
374 
384  static inline bool parse_url_encoded(ihash_multimap& dict,
385  const std::string& query)
386  {
387  return parse_url_encoded(dict, query.c_str(), query.size());
388  }
389 
400  static inline bool parse_multipart_form_data(ihash_multimap& dict,
401  const std::string& content_type,
402  const std::string& form_data)
403  {
404  return parse_multipart_form_data(dict, content_type, form_data.c_str(), form_data.size());
405  }
406 
419  boost::tribool finish_header_parsing(http::message& http_msg,
420  boost::system::error_code& ec);
421 
431  static bool parse_forwarded_for(const std::string& header, std::string& public_ip);
432 
434  static inline error_category_t& get_error_category(void) {
435  boost::call_once(parser::create_error_category, m_instance_flag);
436  return *m_error_category_ptr;
437  }
438 
439 
440 protected:
441 
443  virtual void finished_parsing_headers(const boost::system::error_code& ec) {}
444 
457  boost::tribool parse_headers(http::message& http_msg, boost::system::error_code& ec);
458 
464  void update_message_with_header_data(http::message& http_msg) const;
465 
477  boost::tribool parse_chunks(http::message::chunk_cache_t& chunk_buffers,
478  boost::system::error_code& ec);
479 
491  boost::tribool consume_content(http::message& http_msg,
492  boost::system::error_code& ec);
493 
501  std::size_t consume_content_as_next_chunk(http::message::chunk_cache_t& chunk_buffers);
502 
508  static void compute_msg_status(http::message& http_msg, bool msg_parsed_ok);
509 
516  static inline void set_error(boost::system::error_code& ec, error_value_t ev) {
517  ec = boost::system::error_code(static_cast<int>(ev), get_error_category());
518  }
519 
521  static void create_error_category(void);
522 
523 
524  // misc functions used by the parsing functions
525  inline static bool is_char(int c);
526  inline static bool is_control(int c);
527  inline static bool is_special(int c);
528  inline static bool is_digit(int c);
529  inline static bool is_hex_digit(int c);
530  inline static bool is_cookie_attribute(const std::string& name, bool set_cookie_header);
531 
532 
534  static const boost::uint32_t STATUS_MESSAGE_MAX;
535 
537  static const boost::uint32_t METHOD_MAX;
538 
540  static const boost::uint32_t RESOURCE_MAX;
541 
543  static const boost::uint32_t QUERY_STRING_MAX;
544 
546  static const boost::uint32_t HEADER_NAME_MAX;
547 
549  static const boost::uint32_t HEADER_VALUE_MAX;
550 
552  static const boost::uint32_t QUERY_NAME_MAX;
553 
555  static const boost::uint32_t QUERY_VALUE_MAX;
556 
558  static const boost::uint32_t COOKIE_NAME_MAX;
559 
561  static const boost::uint32_t COOKIE_VALUE_MAX;
562 
563 
565  mutable logger m_logger;
566 
568  const bool m_is_request;
569 
571  const char * m_read_ptr;
572 
574  const char * m_read_end_ptr;
575 
576 
577 private:
578 
580  enum message_parse_state_t {
581  PARSE_START, PARSE_HEADERS, PARSE_FOOTERS, PARSE_CONTENT,
582  PARSE_CONTENT_NO_LENGTH, PARSE_CHUNKS, PARSE_END
583  };
584 
587  enum header_parse_state_t {
588  PARSE_METHOD_START, PARSE_METHOD, PARSE_URI_STEM, PARSE_URI_QUERY,
589  PARSE_HTTP_VERSION_H, PARSE_HTTP_VERSION_T_1, PARSE_HTTP_VERSION_T_2,
590  PARSE_HTTP_VERSION_P, PARSE_HTTP_VERSION_SLASH,
591  PARSE_HTTP_VERSION_MAJOR_START, PARSE_HTTP_VERSION_MAJOR,
592  PARSE_HTTP_VERSION_MINOR_START, PARSE_HTTP_VERSION_MINOR,
593  PARSE_STATUS_CODE_START, PARSE_STATUS_CODE, PARSE_STATUS_MESSAGE,
594  PARSE_EXPECTING_NEWLINE, PARSE_EXPECTING_CR,
595  PARSE_HEADER_WHITESPACE, PARSE_HEADER_START, PARSE_HEADER_NAME,
596  PARSE_SPACE_BEFORE_HEADER_VALUE, PARSE_HEADER_VALUE,
597  PARSE_EXPECTING_FINAL_NEWLINE, PARSE_EXPECTING_FINAL_CR
598  };
599 
602  enum chunk_parse_state_t {
603  PARSE_CHUNK_SIZE_START, PARSE_CHUNK_SIZE,
604  PARSE_EXPECTING_IGNORED_TEXT_AFTER_CHUNK_SIZE,
605  PARSE_EXPECTING_CR_AFTER_CHUNK_SIZE,
606  PARSE_EXPECTING_LF_AFTER_CHUNK_SIZE, PARSE_CHUNK,
607  PARSE_EXPECTING_CR_AFTER_CHUNK, PARSE_EXPECTING_LF_AFTER_CHUNK,
608  PARSE_EXPECTING_FINAL_CR_OR_FOOTERS_AFTER_LAST_CHUNK,
609  PARSE_EXPECTING_FINAL_LF_AFTER_LAST_CHUNK
610  };
611 
612 
614  message_parse_state_t m_message_parse_state;
615 
617  header_parse_state_t m_headers_parse_state;
618 
620  chunk_parse_state_t m_chunked_content_parse_state;
621 
623  payload_handler_t m_payload_handler;
624 
626  boost::uint16_t m_status_code;
627 
629  std::string m_status_message;
630 
632  std::string m_method;
633 
635  std::string m_resource;
636 
638  std::string m_query_string;
639 
641  std::string m_raw_headers;
642 
644  std::string m_header_name;
645 
647  std::string m_header_value;
648 
650  std::string m_chunk_size_str;
651 
653  std::size_t m_size_of_current_chunk;
654 
656  std::size_t m_bytes_read_in_current_chunk;
657 
659  std::size_t m_bytes_content_remaining;
660 
662  std::size_t m_bytes_content_read;
663 
665  std::size_t m_bytes_last_read;
666 
668  std::size_t m_bytes_total_read;
669 
671  std::size_t m_max_content_length;
672 
674  bool m_parse_headers_only;
675 
677  bool m_save_raw_headers;
678 
680  static error_category_t * m_error_category_ptr;
681 
683  static boost::once_flag m_instance_flag;
684 };
685 
686 
687 // inline functions for parser
688 
689 inline bool parser::is_char(int c)
690 {
691  return(c >= 0 && c <= 127);
692 }
693 
694 inline bool parser::is_control(int c)
695 {
696  return( (c >= 0 && c <= 31) || c == 127);
697 }
698 
699 inline bool parser::is_special(int c)
700 {
701  switch (c) {
702  case '(': case ')': case '<': case '>': case '@':
703  case ',': case ';': case ':': case '\\': case '"':
704  case '/': case '[': case ']': case '?': case '=':
705  case '{': case '}': case ' ': case '\t':
706  return true;
707  default:
708  return false;
709  }
710 }
711 
712 inline bool parser::is_digit(int c)
713 {
714  return(c >= '0' && c <= '9');
715 }
716 
717 inline bool parser::is_hex_digit(int c)
718 {
719  return((c >= '0' && c <= '9') || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F'));
720 }
721 
722 inline bool parser::is_cookie_attribute(const std::string& name, bool set_cookie_header)
723 {
724  return (name.empty() || name[0] == '$' || (set_cookie_header &&
725  (
726  // This is needed because of a very lenient determination in parse_cookie_header() of what
727  // qualifies as a cookie-pair in a Set-Cookie header.
728  // According to RFC 6265, everything after the first semicolon is a cookie attribute, but RFC 2109,
729  // which is obsolete, allowed multiple comma separated cookies.
730  // parse_cookie_header() is very conservatively assuming that any <name>=<value> pair in a
731  // Set-Cookie header is a cookie-pair unless <name> is a known cookie attribute.
732  boost::algorithm::iequals(name, "Comment")
733  || boost::algorithm::iequals(name, "Domain")
734  || boost::algorithm::iequals(name, "Max-Age")
735  || boost::algorithm::iequals(name, "Path")
736  || boost::algorithm::iequals(name, "Secure")
737  || boost::algorithm::iequals(name, "Version")
738  || boost::algorithm::iequals(name, "Expires")
739  || boost::algorithm::iequals(name, "HttpOnly")
740  )
741  ));
742 }
743 
744 } // end namespace http
745 } // end namespace pion
746 
747 #endif
static const boost::uint32_t COOKIE_NAME_MAX
maximum length for the name of a cookie name
Definition: parser.hpp:558
static bool parse_url_encoded(ihash_multimap &dict, const std::string &query)
Definition: parser.hpp:384
static void create_error_category(void)
creates the unique parser error_category_t
static const std::size_t DEFAULT_CONTENT_MAX
maximum length for HTTP payload content
Definition: parser.hpp:46
bool is_parsing_response(void) const
returns true if the parser is being used to parse an HTTP response
Definition: parser.hpp:279
static const boost::uint32_t QUERY_NAME_MAX
maximum length for the name of a query string variable
Definition: parser.hpp:552
static const boost::uint32_t HEADER_VALUE_MAX
maximum length for an HTTP header value
Definition: parser.hpp:549
const char * m_read_end_ptr
points to the end of the read_buffer (last byte + 1)
Definition: parser.hpp:574
const std::string & get_raw_headers(void) const
returns the raw HTTP headers saved by the parser
Definition: parser.hpp:267
void set_save_raw_headers(bool b)
sets parameter for saving raw HTTP header content
Definition: parser.hpp:291
static const boost::uint32_t STATUS_MESSAGE_MAX
maximum length for response status message
Definition: parser.hpp:534
static const boost::uint32_t RESOURCE_MAX
maximum length for the resource requested
Definition: parser.hpp:540
logger m_logger
primary logging interface used by this class
Definition: parser.hpp:565
class-specific error category
Definition: parser.hpp:74
std::size_t get_content_bytes_read(void) const
returns the total number of bytes read while parsing the payload content
Definition: parser.hpp:261
void set_max_content_length(std::size_t n)
sets the maximum length for HTTP payload content
Definition: parser.hpp:285
parser(const bool is_request, std::size_t max_content_length=DEFAULT_CONTENT_MAX)
Definition: parser.hpp:129
static void set_error(boost::system::error_code &ec, error_value_t ev)
Definition: parser.hpp:516
void concatenate_chunks(void)
bool eof(void) const
returns true if there are no more bytes available in the read buffer
Definition: parser.hpp:249
error_value_t
class-specific error code values
Definition: parser.hpp:52
static const boost::uint32_t QUERY_STRING_MAX
maximum length for the query string
Definition: parser.hpp:543
void reset_max_content_length(void)
resets the maximum length for HTTP payload content to the default value
Definition: parser.hpp:288
std::vector< char > chunk_cache_t
used to cache chunked data
Definition: message.hpp:64
std::size_t bytes_available(void) const
returns the number of bytes available in the read buffer
Definition: parser.hpp:252
void skip_header_parsing(http::message &http_msg)
Definition: parser.hpp:229
const bool m_is_request
true if the message is an HTTP request; false if it is an HTTP response
Definition: parser.hpp:568
bool get_parse_headers_only(void)
returns true if parsing headers only
Definition: parser.hpp:273
static bool parse_cookie_header(ihash_multimap &dict, const std::string &cookie_header, bool set_cookie_header)
Definition: parser.hpp:369
std::size_t get_max_content_length(void) const
returns the maximum length for HTTP payload content
Definition: parser.hpp:264
virtual void finished_parsing_headers(const boost::system::error_code &ec)
Called after we have finished parsing the HTTP message headers.
Definition: parser.hpp:443
static const boost::uint32_t COOKIE_VALUE_MAX
maximum length for the value of a cookie; also used for path and domain
Definition: parser.hpp:561
std::size_t gcount(void) const
returns the number of bytes read during the last parse operation
Definition: parser.hpp:255
bool get_save_raw_headers(void) const
returns true if the parser is saving raw HTTP header contents
Definition: parser.hpp:270
bool is_parsing_request(void) const
returns true if the parser is being used to parse an HTTP request
Definition: parser.hpp:276
const char * m_read_ptr
points to the next character to be consumed in the read_buffer
Definition: parser.hpp:571
static error_category_t & get_error_category(void)
returns an instance of parser::error_category_t
Definition: parser.hpp:434
std::size_t get_total_bytes_read(void) const
returns the total number of bytes read while parsing the HTTP message
Definition: parser.hpp:258
static bool parse_multipart_form_data(ihash_multimap &dict, const std::string &content_type, const std::string &form_data)
Definition: parser.hpp:400
logger get_logger(void)
returns the logger currently in use
Definition: parser.hpp:297
void load_read_pos(const char *&read_ptr, const char *&read_end_ptr) const
Definition: parser.hpp:195
static const boost::uint32_t HEADER_NAME_MAX
maximum length for an HTTP header name
Definition: parser.hpp:546
void set_payload_handler(payload_handler_t &h)
defines a callback function to be used for consuming payload content
Definition: parser.hpp:282
static const boost::uint32_t QUERY_VALUE_MAX
maximum length for the value of a query string variable
Definition: parser.hpp:555
boost::function2< void, const char *, std::size_t > payload_handler_t
callback type used to consume payload content
Definition: parser.hpp:49
void set_logger(logger log_ptr)
sets the logger to be used
Definition: parser.hpp:294
virtual ~parser()
default destructor
Definition: parser.hpp:141
bool check_premature_eof(http::message &http_msg)
Definition: parser.hpp:208
void parse_headers_only(bool b=true)
Definition: parser.hpp:222
static const boost::uint32_t METHOD_MAX
maximum length for the request method
Definition: parser.hpp:537
void reset(void)
resets the parser to its initial state
Definition: parser.hpp:235
void set_read_buffer(const char *ptr, size_t len)
Definition: parser.hpp:184