WebSocket++  0.8.1
C++ websocket client/server library
parser.hpp
1 /*
2  * Copyright (c) 2014, Peter Thorson. All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions are met:
6  * * Redistributions of source code must retain the above copyright
7  * notice, this list of conditions and the following disclaimer.
8  * * Redistributions in binary form must reproduce the above copyright
9  * notice, this list of conditions and the following disclaimer in the
10  * documentation and/or other materials provided with the distribution.
11  * * Neither the name of the WebSocket++ Project nor the
12  * names of its contributors may be used to endorse or promote products
13  * derived from this software without specific prior written permission.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
16  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18  * ARE DISCLAIMED. IN NO EVENT SHALL PETER THORSON BE LIABLE FOR ANY
19  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
20  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
21  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
22  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
24  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25  *
26  */
27 
28 #ifndef HTTP_PARSER_HPP
29 #define HTTP_PARSER_HPP
30 
31 #include <algorithm>
32 #include <map>
33 #include <string>
34 #include <utility>
35 
36 #include <websocketpp/utilities.hpp>
37 #include <websocketpp/http/constants.hpp>
38 
39 namespace websocketpp {
40 namespace http {
41 namespace parser {
42 
43 namespace state {
44  enum value {
45  method,
46  resource,
47  version,
48  headers
49  };
50 }
51 
52 namespace body_encoding {
53  enum value {
54  unknown,
55  plain,
56  chunked
57  };
58 }
59 
60 typedef std::map<std::string, std::string, utility::ci_less > header_list;
61 
62 /// Read and return the next token in the stream
63 /**
64  * Read until a non-token character is found and then return the token and
65  * iterator to the next character to read
66  *
67  * @param begin An iterator to the beginning of the sequence
68  * @param end An iterator to the end of the sequence
69  * @return A pair containing the token and an iterator to the next character in
70  * the stream
71  */
72 template <typename InputIterator>
73 std::pair<std::string,InputIterator> extract_token(InputIterator begin,
74  InputIterator end)
75 {
76  InputIterator it = std::find_if(begin,end,&is_not_token_char);
77  return std::make_pair(std::string(begin,it),it);
78 }
79 
80 /// Read and return the next quoted string in the stream
81 /**
82  * Read a double quoted string starting at `begin`. The quotes themselves are
83  * stripped. The quoted value is returned along with an iterator to the next
84  * character to read
85  *
86  * @param begin An iterator to the beginning of the sequence
87  * @param end An iterator to the end of the sequence
88  * @return A pair containing the string read and an iterator to the next
89  * character in the stream
90  */
91 template <typename InputIterator>
92 std::pair<std::string,InputIterator> extract_quoted_string(InputIterator begin,
93  InputIterator end)
94 {
95  std::string s;
96 
97  if (end == begin) {
98  return std::make_pair(s,begin);
99  }
100 
101  if (*begin != '"') {
102  return std::make_pair(s,begin);
103  }
104 
105  InputIterator cursor = begin+1;
106  InputIterator marker = cursor;
107 
108  cursor = std::find(cursor,end,'"');
109 
110  while (cursor != end) {
111  // either this is the end or a quoted string
112  if (*(cursor-1) == '\\') {
113  s.append(marker,cursor-1);
114  s.append(1,'"');
115  ++cursor;
116  marker = cursor;
117  } else {
118  s.append(marker,cursor);
119  ++cursor;
120  return std::make_pair(s,cursor);
121  }
122 
123  cursor = std::find(cursor,end,'"');
124  }
125 
126  return std::make_pair("",begin);
127 }
128 
129 /// Read and discard one unit of linear whitespace
130 /**
131  * Read one unit of linear white space and return the iterator to the character
132  * afterwards. If `begin` is returned, no whitespace was extracted.
133  *
134  * @param begin An iterator to the beginning of the sequence
135  * @param end An iterator to the end of the sequence
136  * @return An iterator to the character after the linear whitespace read
137  */
138 template <typename InputIterator>
139 InputIterator extract_lws(InputIterator begin, InputIterator end) {
140  InputIterator it = begin;
141 
142  // strip leading CRLF
143  if (end-begin > 2 && *begin == '\r' && *(begin+1) == '\n' &&
144  is_whitespace_char(static_cast<unsigned char>(*(begin+2))))
145  {
146  it+=3;
147  }
148 
149  it = std::find_if(it,end,&is_not_whitespace_char);
150  return it;
151 }
152 
153 /// Read and discard linear whitespace
154 /**
155  * Read linear white space until a non-lws character is read and return an
156  * iterator to that character. If `begin` is returned, no whitespace was
157  * extracted.
158  *
159  * @param begin An iterator to the beginning of the sequence
160  * @param end An iterator to the end of the sequence
161  * @return An iterator to the character after the linear whitespace read
162  */
163 template <typename InputIterator>
164 InputIterator extract_all_lws(InputIterator begin, InputIterator end) {
165  InputIterator old_it;
166  InputIterator new_it = begin;
167 
168  do {
169  // Pull value from previous iteration
170  old_it = new_it;
171 
172  // look ahead another pass
173  new_it = extract_lws(old_it,end);
174  } while (new_it != end && old_it != new_it);
175 
176  return new_it;
177 }
178 
179 /// Extract HTTP attributes
180 /**
181  * An http attributes list is a semicolon delimited list of key value pairs in
182  * the format: *( ";" attribute "=" value ) where attribute is a token and value
183  * is a token or quoted string.
184  *
185  * Attributes extracted are appended to the supplied attributes list
186  * `attributes`.
187  *
188  * @param [in] begin An iterator to the beginning of the sequence
189  * @param [in] end An iterator to the end of the sequence
190  * @param [out] attributes A reference to the attributes list to append
191  * attribute/value pairs extracted to
192  * @return An iterator to the character after the last atribute read
193  */
194 template <typename InputIterator>
195 InputIterator extract_attributes(InputIterator begin, InputIterator end,
196  attribute_list & attributes)
197 {
198  InputIterator cursor;
199  bool first = true;
200 
201  if (begin == end) {
202  return begin;
203  }
204 
205  cursor = begin;
206  std::pair<std::string,InputIterator> ret;
207 
208  while (cursor != end) {
209  std::string name;
210 
211  cursor = http::parser::extract_all_lws(cursor,end);
212  if (cursor == end) {
213  break;
214  }
215 
216  if (first) {
217  // ignore this check for the very first pass
218  first = false;
219  } else {
220  if (*cursor == ';') {
221  // advance past the ';'
222  ++cursor;
223  } else {
224  // non-semicolon in this position indicates end end of the
225  // attribute list, break and return.
226  break;
227  }
228  }
229 
230  cursor = http::parser::extract_all_lws(cursor,end);
231  ret = http::parser::extract_token(cursor,end);
232 
233  if (ret.first.empty()) {
234  // error: expected a token
235  return begin;
236  } else {
237  name = ret.first;
238  cursor = ret.second;
239  }
240 
241  cursor = http::parser::extract_all_lws(cursor,end);
242  if (cursor == end || *cursor != '=') {
243  // if there is an equals sign, read the attribute value. Otherwise
244  // record a blank value and continue
245  attributes[name].clear();
246  continue;
247  }
248 
249  // advance past the '='
250  ++cursor;
251 
252  cursor = http::parser::extract_all_lws(cursor,end);
253  if (cursor == end) {
254  // error: expected a token or quoted string
255  return begin;
256  }
257 
258  ret = http::parser::extract_quoted_string(cursor,end);
259  if (ret.second != cursor) {
260  attributes[name] = ret.first;
261  cursor = ret.second;
262  continue;
263  }
264 
265  ret = http::parser::extract_token(cursor,end);
266  if (ret.first.empty()) {
267  // error : expected token or quoted string
268  return begin;
269  } else {
270  attributes[name] = ret.first;
271  cursor = ret.second;
272  }
273  }
274 
275  return cursor;
276 }
277 
278 /// Extract HTTP parameters
279 /**
280  * An http parameters list is a comma delimited list of tokens followed by
281  * optional semicolon delimited attributes lists.
282  *
283  * Parameters extracted are appended to the supplied parameters list
284  * `parameters`.
285  *
286  * @param [in] begin An iterator to the beginning of the sequence
287  * @param [in] end An iterator to the end of the sequence
288  * @param [out] parameters A reference to the parameters list to append
289  * paramter values extracted to
290  * @return An iterator to the character after the last parameter read
291  */
292 template <typename InputIterator>
293 InputIterator extract_parameters(InputIterator begin, InputIterator end,
294  parameter_list &parameters)
295 {
296  InputIterator cursor;
297 
298  if (begin == end) {
299  // error: expected non-zero length range
300  return begin;
301  }
302 
303  cursor = begin;
304  std::pair<std::string,InputIterator> ret;
305 
306  /**
307  * LWS
308  * token
309  * LWS
310  * *(";" method-param)
311  * LWS
312  * ,=loop again
313  */
314  while (cursor != end) {
315  std::string parameter_name;
316  attribute_list attributes;
317 
318  // extract any stray whitespace
319  cursor = http::parser::extract_all_lws(cursor,end);
320  if (cursor == end) {break;}
321 
322  ret = http::parser::extract_token(cursor,end);
323 
324  if (ret.first.empty()) {
325  // error: expected a token
326  return begin;
327  } else {
328  parameter_name = ret.first;
329  cursor = ret.second;
330  }
331 
332  // Safe break point, insert parameter with blank attributes and exit
333  cursor = http::parser::extract_all_lws(cursor,end);
334  if (cursor == end) {
335  //parameters[parameter_name] = attributes;
336  parameters.push_back(std::make_pair(parameter_name,attributes));
337  break;
338  }
339 
340  // If there is an attribute list, read it in
341  if (*cursor == ';') {
342  InputIterator acursor;
343 
344  ++cursor;
345  acursor = http::parser::extract_attributes(cursor,end,attributes);
346 
347  if (acursor == cursor) {
348  // attribute extraction ended in syntax error
349  return begin;
350  }
351 
352  cursor = acursor;
353  }
354 
355  // insert parameter into output list
356  //parameters[parameter_name] = attributes;
357  parameters.push_back(std::make_pair(parameter_name,attributes));
358 
359  cursor = http::parser::extract_all_lws(cursor,end);
360  if (cursor == end) {break;}
361 
362  // if next char is ',' then read another parameter, else stop
363  if (*cursor != ',') {
364  break;
365  }
366 
367  // advance past comma
368  ++cursor;
369 
370  if (cursor == end) {
371  // expected more bytes after a comma
372  return begin;
373  }
374  }
375 
376  return cursor;
377 }
378 
379 inline std::string strip_lws(std::string const & input) {
380  std::string::const_iterator begin = extract_all_lws(input.begin(),input.end());
381  if (begin == input.end()) {
382  return std::string();
383  }
384 
385  std::string::const_reverse_iterator rbegin = extract_all_lws(input.rbegin(),input.rend());
386  if (rbegin == input.rend()) {
387  return std::string();
388  }
389 
390  return std::string(begin,rbegin.base());
391 }
392 
393 /// Base HTTP parser
394 /**
395  * Includes methods and data elements common to all types of HTTP messages such
396  * as headers, versions, bodies, etc.
397  */
398 class parser {
399 public:
400  parser()
401  : m_header_bytes(0)
402  , m_body_bytes_needed(0)
403  , m_body_bytes_max(max_body_size)
404  , m_body_encoding(body_encoding::unknown) {}
405 
406  /// Get the HTTP version string
407  /**
408  * @return The version string for this parser
409  */
410  std::string const & get_version() const {
411  return m_version;
412  }
413 
414  /// Set HTTP parser Version
415  /**
416  * Input should be in format: HTTP/x.y where x and y are positive integers.
417  * @todo Does this method need any validation?
418  *
419  * @param [in] version The value to set the HTTP version to.
420  */
421  void set_version(std::string const & version);
422 
423  /// Get the value of an HTTP header
424  /**
425  * @todo Make this method case insensitive.
426  *
427  * @param [in] key The name/key of the header to get.
428  * @return The value associated with the given HTTP header key.
429  */
430  std::string const & get_header(std::string const & key) const;
431 
432  /// Extract an HTTP parameter list from a parser header.
433  /**
434  * If the header requested doesn't exist or exists and is empty the
435  * parameter list is valid (but empty).
436  *
437  * @param [in] key The name/key of the HTTP header to use as input.
438  * @param [out] out The parameter list to store extracted parameters in.
439  * @return Whether or not the input was a valid parameter list.
440  */
441  bool get_header_as_plist(std::string const & key, parameter_list & out)
442  const;
443 
444  /// Return a list of all HTTP headers
445  /**
446  * Return a list of all HTTP headers
447  *
448  * @since 0.8.0
449  *
450  * @return A list of all HTTP headers
451  */
452  header_list const & get_headers() const;
453 
454  /// Append a value to an existing HTTP header
455  /**
456  * This method will set the value of the HTTP header `key` with the
457  * indicated value. If a header with the name `key` already exists, `val`
458  * will be appended to the existing value.
459  *
460  * @todo Make this method case insensitive.
461  * @todo Should there be any restrictions on which keys are allowed?
462  * @todo Exception free varient
463  *
464  * @see replace_header
465  *
466  * @param [in] key The name/key of the header to append to.
467  * @param [in] val The value to append.
468  */
469  void append_header(std::string const & key, std::string const & val);
470 
471  /// Set a value for an HTTP header, replacing an existing value
472  /**
473  * This method will set the value of the HTTP header `key` with the
474  * indicated value. If a header with the name `key` already exists, `val`
475  * will replace the existing value.
476  *
477  * @todo Make this method case insensitive.
478  * @todo Should there be any restrictions on which keys are allowed?
479  * @todo Exception free varient
480  *
481  * @see append_header
482  *
483  * @param [in] key The name/key of the header to append to.
484  * @param [in] val The value to append.
485  */
486  void replace_header(std::string const & key, std::string const & val);
487 
488  /// Remove a header from the parser
489  /**
490  * Removes the header entirely from the parser. This is different than
491  * setting the value of the header to blank.
492  *
493  * @todo Make this method case insensitive.
494  *
495  * @param [in] key The name/key of the header to remove.
496  */
497  void remove_header(std::string const & key);
498 
499  /// Get HTTP body
500  /**
501  * Gets the body of the HTTP object
502  *
503  * @return The body of the HTTP message.
504  */
505  std::string const & get_body() const {
506  return m_body;
507  }
508 
509  /// Set body content
510  /**
511  * Set the body content of the HTTP response to the parameter string. Note
512  * set_body will also set the Content-Length HTTP header to the appropriate
513  * value. If you want the Content-Length header to be something else, do so
514  * via replace_header("Content-Length") after calling set_body()
515  *
516  * @param value String data to include as the body content.
517  */
518  void set_body(std::string const & value);
519 
520  /// Get body size limit
521  /**
522  * Retrieves the maximum number of bytes to parse & buffer before canceling
523  * a request.
524  *
525  * @since 0.5.0
526  *
527  * @return The maximum length of a message body.
528  */
530  return m_body_bytes_max;
531  }
532 
533  /// Set body size limit
534  /**
535  * Set the maximum number of bytes to parse and buffer before canceling a
536  * request.
537  *
538  * @since 0.5.0
539  *
540  * @param value The size to set the max body length to.
541  */
542  void set_max_body_size(size_t value) {
543  m_body_bytes_max = value;
544  }
545 
546  /// Extract an HTTP parameter list from a string.
547  /**
548  * @param [in] in The input string.
549  * @param [out] out The parameter list to store extracted parameters in.
550  * @return Whether or not the input was a valid parameter list.
551  */
552  bool parse_parameter_list(std::string const & in, parameter_list & out)
553  const;
554 protected:
555  /// Process a header line
556  /**
557  * @todo Update this method to be exception free.
558  *
559  * @param [in] begin An iterator to the beginning of the sequence.
560  * @param [in] end An iterator to the end of the sequence.
561  */
562  void process_header(std::string::iterator begin, std::string::iterator end);
563 
564  /// Prepare the parser to begin parsing body data
565  /**
566  * Inspects headers to determine if the message has a body that needs to be
567  * read. If so, sets up the necessary state, otherwise returns false. If
568  * this method returns true and loading the message body is desired call
569  * `process_body` until it returns zero bytes or an error.
570  *
571  * Must not be called until after all headers have been processed.
572  *
573  * @since 0.5.0
574  *
575  * @return True if more bytes are needed to load the body, false otherwise.
576  */
577  bool prepare_body();
578 
579  /// Process body data
580  /**
581  * Parses body data.
582  *
583  * @since 0.5.0
584  *
585  * @param [in] begin An iterator to the beginning of the sequence.
586  * @param [in] end An iterator to the end of the sequence.
587  * @return The number of bytes processed
588  */
589  size_t process_body(char const * buf, size_t len);
590 
591  /// Check if the parser is done parsing the body
592  /**
593  * Behavior before a call to `prepare_body` is undefined.
594  *
595  * @since 0.5.0
596  *
597  * @return True if the message body has been completed loaded.
598  */
599  bool body_ready() const {
600  return (m_body_bytes_needed == 0);
601  }
602 
603  /// Generate and return the HTTP headers as a string
604  /**
605  * Each headers will be followed by the \r\n sequence including the last one.
606  * A second \r\n sequence (blank header) is not appended by this method
607  *
608  * @return The HTTP headers as a string.
609  */
610  std::string raw_headers() const;
611 
612  std::string m_version;
613  header_list m_headers;
614 
615  size_t m_header_bytes;
616 
617  std::string m_body;
618  size_t m_body_bytes_needed;
619  size_t m_body_bytes_max;
620  body_encoding::value m_body_encoding;
621 };
622 
623 } // namespace parser
624 } // namespace http
625 } // namespace websocketpp
626 
627 #include <websocketpp/http/impl/parser.hpp>
628 
629 #endif // HTTP_PARSER_HPP
std::string const & get_header(std::string const &key) const
Get the value of an HTTP header.
Definition: parser.hpp:45
size_t get_max_body_size() const
Get body size limit.
Definition: parser.hpp:529
bool get_header_as_plist(std::string const &key, parameter_list &out) const
Extract an HTTP parameter list from a parser header.
Definition: parser.hpp:55
header_list const & get_headers() const
Return a list of all HTTP headers.
Definition: parser.hpp:179
void set_body(std::string const &value)
Set body content.
Definition: parser.hpp:91
lib::weak_ptr< void > connection_hdl
A handle to uniquely identify a connection.
size_t process_body(char const *buf, size_t len)
Process body data.
Definition: parser.hpp:145
HTTP handling support.
Definition: constants.hpp:39
bool prepare_body()
Prepare the parser to begin parsing body data.
Definition: parser.hpp:119
void process_header(std::string::iterator begin, std::string::iterator end)
Process a header line.
Definition: parser.hpp:161
std::string raw_headers() const
Generate and return the HTTP headers as a string.
Definition: parser.hpp:183
std::string const & get_version() const
Get the HTTP version string.
Definition: parser.hpp:410
void append_header(std::string const &key, std::string const &val)
Append a value to an existing HTTP header.
Definition: parser.hpp:67
void replace_header(std::string const &key, std::string const &val)
Set a value for an HTTP header, replacing an existing value.
Definition: parser.hpp:81
void set_version(std::string const &version)
Set HTTP parser Version.
Definition: parser.hpp:41
bool body_ready() const
Check if the parser is done parsing the body.
Definition: parser.hpp:599
std::string const & get_body() const
Get HTTP body.
Definition: parser.hpp:505
bool parse_parameter_list(std::string const &in, parameter_list &out) const
Extract an HTTP parameter list from a string.
Definition: parser.hpp:107
void remove_header(std::string const &key)
Remove a header from the parser.
Definition: parser.hpp:87
void set_max_body_size(size_t value)
Set body size limit.
Definition: parser.hpp:542