Libevhtp  1.2.13
parser.c
Go to the documentation of this file.
1 #include <stdlib.h>
2 #include <stddef.h>
3 #include <ctype.h>
4 
5 #include "internal.h"
6 #include "evhtp/parser.h"
7 #include "evhtp/config.h"
8 #include "log.h"
9 
10 #if '\n' != '\x0a' || 'A' != 65
11 #error "You have somehow found a non-ASCII host. We can't build here."
12 #endif
13 
14 #define PARSER_STACK_MAX 8192
15 #define LF (unsigned char)10
16 #define CR (unsigned char)13
17 #define CRLF "\x0d\x0a"
18 
27 };
28 
30  parser_flag_chunked = (1 << 0),
34 };
35 
37  s_start = 0,
80 };
81 
85 
86 
87 struct htparser {
88  htpparse_error error;
89  parser_state state;
90  parser_flags flags;
91  eval_hdr_val heval;
92 
93  htp_type type;
94  htp_scheme scheme;
95  htp_method method;
96 
97  unsigned char multipart;
98  unsigned char major;
99  unsigned char minor;
100  uint64_t content_len; /* this gets decremented as data passes through */
101  uint64_t orig_content_len; /* this contains the original length of the body */
102  uint64_t bytes_read;
104  unsigned int status; /* only for responses */
105  unsigned int status_count; /* only for responses */
106 
108  char * host_offset;
109  char * port_offset;
110  char * path_offset;
111  char * args_offset;
112 
113  void * userdata;
114 
115  size_t buf_idx;
116  /* Must be last since htparser_init memsets up to the offset of this buffer */
118 };
119 
120 #ifdef EVHTP_DEBUG
121 static void
122 log_htparser__s_(struct htparser * p)
123 {
124  log_debug(
125  "struct htparser {\n"
126  " htpparse_error = %d\n"
127  " parser_state = %d\n"
128  " parser_flags = %d\n"
129  " eval_hdr_val = %d\n"
130  " htp_type = %d\n"
131  " htp_scheme = %d\n"
132  " htp_method = %d\n"
133  " multipart = %c\n"
134  " major = %c\n"
135  " minor = %c\n"
136  " content_len = %zu\n"
137  " orig_clen = %zu\n"
138  " bytes_read = %zu\n"
139  " total_read = %zu\n"
140  " status = %d\n"
141  " status_count = %d\n"
142  " scheme_offset = %s\n"
143  " host_offset = %s\n"
144  " port_offset = %s\n"
145  " path_offset = %s\n"
146  " args_offset = %s\n"
147  " userdata = %p\n"
148  " buf_idx = %zu\n"
149  " buf = %s\n"
150  "};",
151  p->error,
152  p->state,
153  p->flags,
154  p->heval,
155  p->type,
156  p->scheme,
157  p->method,
158  p->multipart,
159  p->major,
160  p->minor,
161  p->content_len,
162  p->orig_content_len,
163  p->bytes_read,
164  p->total_bytes_read,
165  p->status,
166  p->status_count,
167  p->scheme_offset,
168  p->host_offset,
169  p->port_offset,
170  p->path_offset,
171  p->args_offset,
172  p->userdata,
173  p->buf_idx,
174  p->buf);
175 } /* log_htparser__s_ */
176 
177 #else
178 #define log_htparser__s_(p)
179 #endif
180 
181 static uint32_t usual[] = {
182  0xffffdbfe,
183  0x7fff37d6,
184  0xffffffff,
185  0xffffffff,
186  0xffffffff,
187  0xffffffff,
188  0xffffffff,
189  0xffffffff
190 };
191 
192 static int8_t unhex[256] = {
193  -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
194  -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
195  -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
196  0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -1, -1, -1, -1, -1,
197  -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1,
198  -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
199  -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1,
200  -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1
201 };
202 
203 static const char * errstr_map[] = {
204  "htparse_error_none",
205  "htparse_error_too_big",
206  "htparse_error_invalid_method",
207  "htparse_error_invalid_requestline",
208  "htparse_error_invalid_schema",
209  "htparse_error_invalid_protocol",
210  "htparse_error_invalid_version",
211  "htparse_error_invalid_header",
212  "htparse_error_invalid_chunk_size",
213  "htparse_error_invalid_chunk",
214  "htparse_error_invalid_state",
215  "htparse_error_user",
216  "htparse_error_unknown"
217 };
218 
219 static const char * method_strmap[] = {
220  "GET",
221  "HEAD",
222  "POST",
223  "PUT",
224  "DELETE",
225  "MKCOL",
226  "COPY",
227  "MOVE",
228  "OPTIONS",
229  "PROPFIND",
230  "PROPATCH",
231  "LOCK",
232  "UNLOCK",
233  "TRACE",
234  "CONNECT",
235  "PATCH",
236 };
237 
238 #define _MIN_READ(a, b) ((a) < (b) ? (a) : (b))
239 
240 #ifndef HOST_BIG_ENDIAN
241 /* Little-endian cmp macros */
242 #define _str3_cmp(m, c0, c1, c2, c3) \
243  *(uint32_t *)m == ((c3 << 24) | (c2 << 16) | (c1 << 8) | c0)
244 
245 #define _str3Ocmp(m, c0, c1, c2, c3) \
246  *(uint32_t *)m == ((c3 << 24) | (c2 << 16) | (c1 << 8) | c0)
247 
248 #define _str4cmp(m, c0, c1, c2, c3) \
249  *(uint32_t *)m == ((c3 << 24) | (c2 << 16) | (c1 << 8) | c0)
250 
251 #define _str5cmp(m, c0, c1, c2, c3, c4) \
252  *(uint32_t *)m == ((c3 << 24) | (c2 << 16) | (c1 << 8) | c0) \
253  && m[4] == c4
254 
255 #define _str6cmp(m, c0, c1, c2, c3, c4, c5) \
256  *(uint32_t *)m == ((c3 << 24) | (c2 << 16) | (c1 << 8) | c0) \
257  && (((uint32_t *)m)[1] & 0xffff) == ((c5 << 8) | c4)
258 
259 #define _str7_cmp(m, c0, c1, c2, c3, c4, c5, c6, c7) \
260  *(uint32_t *)m == ((c3 << 24) | (c2 << 16) | (c1 << 8) | c0) \
261  && ((uint32_t *)m)[1] == ((c7 << 24) | (c6 << 16) | (c5 << 8) | c4)
262 
263 #define _str8cmp(m, c0, c1, c2, c3, c4, c5, c6, c7) \
264  *(uint32_t *)m == ((c3 << 24) | (c2 << 16) | (c1 << 8) | c0) \
265  && ((uint32_t *)m)[1] == ((c7 << 24) | (c6 << 16) | (c5 << 8) | c4)
266 
267 #define _str9cmp(m, c0, c1, c2, c3, c4, c5, c6, c7, c8) \
268  *(uint32_t *)m == ((c3 << 24) | (c2 << 16) | (c1 << 8) | c0) \
269  && ((uint32_t *)m)[1] == ((c7 << 24) | (c6 << 16) | (c5 << 8) | c4) \
270  && m[8] == c8
271 #else
272 /* Big endian cmp macros */
273 #define _str3_cmp(m, c0, c1, c2, c3) \
274  m[0] == c0 && m[1] == c1 && m[2] == c2
275 
276 #define _str3Ocmp(m, c0, c1, c2, c3) \
277  m[0] == c0 && m[2] == c2 && m[3] == c3
278 
279 #define _str4cmp(m, c0, c1, c2, c3) \
280  m[0] == c0 && m[1] == c1 && m[2] == c2 && m[3] == c3
281 
282 #define _str5cmp(m, c0, c1, c2, c3, c4) \
283  m[0] == c0 && m[1] == c1 && m[2] == c2 && m[3] == c3 && m[4] == c4
284 
285 #define _str6cmp(m, c0, c1, c2, c3, c4, c5) \
286  m[0] == c0 && m[1] == c1 && m[2] == c2 && m[3] == c3 \
287  && m[4] == c4 && m[5] == c5
288 
289 #define _str7_cmp(m, c0, c1, c2, c3, c4, c5, c6, c7) \
290  m[0] == c0 && m[1] == c1 && m[2] == c2 && m[3] == c3 \
291  && m[4] == c4 && m[5] == c5 && m[6] == c6
292 
293 #define _str8cmp(m, c0, c1, c2, c3, c4, c5, c6, c7) \
294  m[0] == c0 && m[1] == c1 && m[2] == c2 && m[3] == c3 \
295  && m[4] == c4 && m[5] == c5 && m[6] == c6 && m[7] == c7
296 
297 #define _str9cmp(m, c0, c1, c2, c3, c4, c5, c6, c7, c8) \
298  m[0] == c0 && m[1] == c1 && m[2] == c2 && m[3] == c3 \
299  && m[4] == c4 && m[5] == c5 && m[6] == c6 && m[7] == c7 && m[8] == c8
300 
301 #endif
302 
303 #define __HTPARSE_GENHOOK(__n) \
304  static inline int hook_ ## __n ## _run(htparser * p, htparse_hooks * hooks) \
305  { \
306  log_debug("enter"); \
307  if (hooks && (hooks)->__n) \
308  { \
309  return (hooks)->__n(p); \
310  } \
311  \
312  return 0; \
313  }
314 
315 #define __HTPARSE_GENDHOOK(__n) \
316  static inline int hook_ ## __n ## _run(htparser * p, \
317  htparse_hooks * hooks, \
318  const char * s, size_t l) \
319  { \
320  log_debug("enter"); \
321  if (hooks && (hooks)->__n) \
322  { \
323  return (hooks)->__n(p, s, l); \
324  } \
325  \
326  return 0; \
327  }
328 
329 __HTPARSE_GENHOOK(on_msg_begin)
330 __HTPARSE_GENHOOK(on_hdrs_begin)
331 __HTPARSE_GENHOOK(on_hdrs_complete)
332 __HTPARSE_GENHOOK(on_new_chunk)
333 __HTPARSE_GENHOOK(on_chunk_complete)
334 __HTPARSE_GENHOOK(on_chunks_complete)
335 __HTPARSE_GENHOOK(on_msg_complete)
336 
339 __HTPARSE_GENDHOOK(host)
340 __HTPARSE_GENDHOOK(port)
341 __HTPARSE_GENDHOOK(path)
344 __HTPARSE_GENDHOOK(hdr_key)
345 __HTPARSE_GENDHOOK(hdr_val)
346 __HTPARSE_GENDHOOK(body)
347 __HTPARSE_GENDHOOK(hostname)
348 
349 
350 static inline uint64_t
351 str_to_uint64(char * str, size_t n, int * err)
352 {
353  uint64_t value;
354 
355  /* Trim whitespace after value. */
356  while (n && isblank(str[n - 1]))
357  {
358  n--;
359  }
360 
361  if (n > 20)
362  {
363  /* 18446744073709551615 is 20 bytes */
364  *err = 1;
365  return 0;
366  }
367 
368  for (value = 0; n--; str++)
369  {
370  uint64_t check;
371 
372  if (*str < '0' || *str > '9')
373  {
374  *err = 1;
375  return 0;
376  }
377 
378  check = value * 10 + (*str - '0');
379 
380  if ((value && check <= value))
381  {
382  *err = 1;
383  return 0;
384  }
385 
386  value = check;
387  }
388 
389  return value;
390 }
391 
392 static inline ssize_t
393 _str_to_ssize_t(char * str, size_t n)
394 {
395  ssize_t value;
396 
397  if (n == 0)
398  {
399  return -1;
400  }
401 
402  for (value = 0; n--; str++)
403  {
404  if (*str < '0' || *str > '9')
405  {
406  return -1;
407  }
408 
409  value = value * 10 + (*str - '0');
410 
411 #if 0
412  if (value > INTMAX_MAX)
413  {
414  return -1;
415  }
416 #endif
417  }
418 
419  return value;
420 }
421 
422 htpparse_error
424 {
425  return p->error;
426 }
427 
428 const char *
430 {
431  htpparse_error e = htparser_get_error(p);
432 
433  if (e > (htparse_error_generic + 1))
434  {
435  return "htparse_no_such_error";
436  }
437 
438  return errstr_map[e];
439 }
440 
441 unsigned int
443 {
444  return p->status;
445 }
446 
447 int
449 {
450  if (p->major > 0 && p->minor > 0)
451  {
453  {
454  return 0;
455  } else {
456  return 1;
457  }
458  } else {
460  {
461  return 1;
462  } else {
463  return 0;
464  }
465  }
466 
467  return 0;
468 }
469 
470 htp_scheme
472 {
473  return p->scheme;
474 }
475 
476 htp_method
478 {
479  return p->method;
480 }
481 
482 const char *
483 htparser_get_methodstr_m(htp_method meth)
484 {
485  if (meth >= htp_method_UNKNOWN)
486  {
487  return NULL;
488  }
489 
490  return method_strmap[meth];
491 }
492 
493 const char *
495 {
496  return htparser_get_methodstr_m(p->method);
497 }
498 
499 void
500 htparser_set_major(htparser * p, unsigned char major)
501 {
502  p->major = major;
503 }
504 
505 void
506 htparser_set_minor(htparser * p, unsigned char minor)
507 {
508  p->minor = minor;
509 }
510 
511 unsigned char
513 {
514  return p->major;
515 }
516 
517 unsigned char
519 {
520  return p->minor;
521 }
522 
523 unsigned char
525 {
526  return p->multipart;
527 }
528 
529 void *
531 {
532  return p->userdata;
533 }
534 
535 void
537 {
538  p->userdata = ud;
539 }
540 
541 uint64_t
543 {
544  return p->content_len;
545 }
546 
547 uint64_t
549 {
550  return p->orig_content_len;
551 }
552 
553 uint64_t
555 {
556  return p->bytes_read;
557 }
558 
559 uint64_t
561 {
562  return p->total_bytes_read;
563 }
564 
565 void
567 {
568  /* Do not memset entire string buffer. */
569  memset(p, 0, offsetof(htparser, buf));
570  p->buf[0] = '\0';
571  p->state = s_start;
572  p->error = htparse_error_none;
573  p->method = htp_method_UNKNOWN;
574  p->type = type;
575 }
576 
577 htparser *
579 {
580  return malloc(sizeof(htparser));
581 }
582 
583 static int
584 is_host_char(unsigned char ch)
585 {
586  char c = (unsigned char)(ch | 0x20);
587 
588  if (c >= 'a' && c <= 'z')
589  {
590  return 1;
591  }
592 
593  if ((ch >= '0' && ch <= '9') || ch == '.' || ch == '-')
594  {
595  return 1;
596  }
597 
598  return 0;
599 }
600 
601 static htp_method
602 get_method(const char * m, const size_t sz)
603 {
604  switch (sz) {
605  case 3:
606  if (_str3_cmp(m, 'G', 'E', 'T', '\0'))
607  {
608  return htp_method_GET;
609  }
610 
611  if (_str3_cmp(m, 'P', 'U', 'T', '\0'))
612  {
613  return htp_method_PUT;
614  }
615 
616  break;
617  case 4:
618  if (m[1] == 'O')
619  {
620  if (_str3Ocmp(m, 'P', 'O', 'S', 'T'))
621  {
622  return htp_method_POST;
623  }
624 
625  if (_str3Ocmp(m, 'C', 'O', 'P', 'Y'))
626  {
627  return htp_method_COPY;
628  }
629 
630  if (_str3Ocmp(m, 'M', 'O', 'V', 'E'))
631  {
632  return htp_method_MOVE;
633  }
634 
635  if (_str3Ocmp(m, 'L', 'O', 'C', 'K'))
636  {
637  return htp_method_LOCK;
638  }
639  } else {
640  if (_str4cmp(m, 'H', 'E', 'A', 'D'))
641  {
642  return htp_method_HEAD;
643  }
644  }
645 
646  break;
647  case 5:
648  if (_str5cmp(m, 'M', 'K', 'C', 'O', 'L'))
649  {
650  return htp_method_MKCOL;
651  }
652 
653  if (_str5cmp(m, 'T', 'R', 'A', 'C', 'E'))
654  {
655  return htp_method_TRACE;
656  }
657 
658  if (_str5cmp(m, 'P', 'A', 'T', 'C', 'H'))
659  {
660  return htp_method_PATCH;
661  }
662 
663  break;
664  case 6:
665  if (_str6cmp(m, 'D', 'E', 'L', 'E', 'T', 'E'))
666  {
667  return htp_method_DELETE;
668  }
669 
670  if (_str6cmp(m, 'U', 'N', 'L', 'O', 'C', 'K'))
671  {
672  return htp_method_UNLOCK;
673  }
674 
675  break;
676  case 7:
677  if (_str7_cmp(m, 'O', 'P', 'T', 'I', 'O', 'N', 'S', '\0'))
678  {
679  return htp_method_OPTIONS;
680  }
681 
682  if (_str7_cmp(m, 'C', 'O', 'N', 'N', 'E', 'C', 'T', '\0'))
683  {
684  return htp_method_CONNECT;
685  }
686 
687  break;
688  case 8:
689  if (_str8cmp(m, 'P', 'R', 'O', 'P', 'F', 'I', 'N', 'D'))
690  {
691  return htp_method_PROPFIND;
692  }
693 
694  break;
695 
696  case 9:
697  if (_str9cmp(m, 'P', 'R', 'O', 'P', 'P', 'A', 'T', 'C', 'H'))
698  {
699  return htp_method_PROPPATCH;
700  }
701 
702  break;
703  } /* switch */
704 
705  return htp_method_UNKNOWN;
706 } /* get_method */
707 
708 size_t
709 htparser_run(htparser * p, htparse_hooks * hooks, const char * data, size_t len)
710 {
711  unsigned char ch;
712  char c;
713  size_t i;
714 
715  log_debug("enter");
716  log_debug("p == %p", p);
717 
718  p->error = htparse_error_none;
719  p->bytes_read = 0;
720 
721  for (i = 0; i < len; i++)
722  {
723  int res;
724  int err;
725 
726  ch = data[i];
727 
728  log_debug("[%p] data[%zu] = %c (%x)", p, i, isprint(ch) ? ch : ' ', ch);
729 
730  if (p->buf_idx >= PARSER_STACK_MAX)
731  {
732  p->error = htparse_error_too_big;
733  return i + 1;
734  }
735 
736  p->total_bytes_read += 1;
737  p->bytes_read += 1;
738 
739  switch (p->state) {
740  case s_start:
741  log_debug("[%p] s_start", p);
742 
743  if (ch == CR || ch == LF)
744  {
745  break;
746  }
747 
748  if ((ch < 'A' || ch > 'Z') && ch != '_')
749  {
750  p->error = htparse_error_inval_reqline;
751 
752  log_debug("s_start invalid fist char '%c'", ch);
753  log_htparser__s_(p);
754 
755  return i + 1;
756  }
757 
758 
759  p->flags = 0;
760  p->error = htparse_error_none;
761  p->method = htp_method_UNKNOWN;
762  p->multipart = 0;
763  p->major = 0;
764  p->minor = 0;
765  p->content_len = 0;
766  p->orig_content_len = 0;
767  p->status = 0;
768  p->status_count = 0;
769  p->scheme_offset = NULL;
770  p->host_offset = NULL;
771  p->port_offset = NULL;
772  p->path_offset = NULL;
773  p->args_offset = NULL;
774 
775 
776  res = hook_on_msg_begin_run(p, hooks);
777 
778  p->buf[p->buf_idx++] = ch;
779  p->buf[p->buf_idx] = '\0';
780 
781  if (evhtp_likely(p->type == htp_type_request))
782  {
783  p->state = s_method;
784  } else if (p->type == htp_type_response && ch == 'H')
785  {
786  p->state = s_http_H;
787  } else {
788  log_debug("not type of request or response?");
789  log_htparser__s_(p);
790 
791  p->error = htparse_error_inval_reqline;
792  return i + 1;
793  }
794 
795  if (res)
796  {
797  p->error = htparse_error_user;
798  return i + 1;
799  }
800 
801  break;
802 
803  case s_method:
804  log_debug("[%p] s_method", p);
805 
806  do {
807  if (ch == ' ')
808  {
809  p->method = get_method(p->buf, p->buf_idx);
810  res = hook_method_run(p, hooks, p->buf, p->buf_idx);
811 
812  p->buf_idx = 0;
814 
815  if (res)
816  {
817  p->error = htparse_error_user;
818  return i + 1;
819  }
820 
821  break;
822  } else {
823  if ((ch < 'A' || ch > 'Z') && ch != '_')
824  {
825  p->error = htparse_error_inval_method;
826  return i + 1;
827  }
828 
829  p->buf[p->buf_idx++] = ch;
830  p->buf[p->buf_idx] = '\0';
831  }
832 
833  ch = data[++i];
834  } while (i < len);
835 
836  break;
837  case s_spaces_before_uri:
838  log_debug("[%p] s_spaces_before_uri", p);
839 
840  /* CONNECT is special - RFC 2817 section 5.2:
841  * The Request-URI portion of the Request-Line is
842  * always an 'authority' as defined by URI Generic
843  * Syntax [2], which is to say the host name and port
844  * number destination of the requested connection
845  * separated by a colon
846  */
847  if (p->method == htp_method_CONNECT)
848  {
849  switch (ch) {
850  case ' ':
851  break;
852  case '[':
853  /* Literal IPv6 address start. */
854  p->buf[p->buf_idx++] = ch;
855  p->buf[p->buf_idx] = '\0';
856  p->host_offset = &p->buf[p->buf_idx];
857 
858  p->state = s_host_ipv6;
859  break;
860  default:
861  if (!is_host_char(ch))
862  {
863  p->error = htparse_error_inval_reqline;
864  log_htparser__s_(p);
865 
866  return i + 1;
867  }
868  p->host_offset = &p->buf[p->buf_idx];
869  p->buf[p->buf_idx++] = ch;
870  p->buf[p->buf_idx] = '\0';
871 
872  p->state = s_host;
873  break;
874  } /* switch */
875 
876  break;
877  }
878 
879  switch (ch) {
880  case ' ':
881  break;
882  case '/':
883  p->path_offset = &p->buf[p->buf_idx];
884 
885  p->buf[p->buf_idx++] = ch;
886  p->buf[p->buf_idx] = '\0';
888  break;
889  default:
890  c = (unsigned char)(ch | 0x20);
891 
892  if (c >= 'a' && c <= 'z')
893  {
894  p->scheme_offset = &p->buf[p->buf_idx];
895  p->buf[p->buf_idx++] = ch;
896  p->buf[p->buf_idx] = '\0';
897  p->state = s_schema;
898  break;
899  }
900 
901  p->error = htparse_error_inval_reqline;
902  log_htparser__s_(p);
903 
904  return i + 1;
905  } /* switch */
906 
907  break;
908  case s_schema:
909  log_debug("[%p] s_schema", p);
910 
911  c = (unsigned char)(ch | 0x20);
912 
913  if (c >= 'a' && c <= 'z')
914  {
915  p->buf[p->buf_idx++] = ch;
916  p->buf[p->buf_idx] = '\0';
917  break;
918  }
919 
920  switch (ch) {
921  case ':':
922  p->scheme = htp_scheme_unknown;
923 
924  switch (p->buf_idx) {
925  case 3:
926  if (_str3_cmp(p->scheme_offset, 'f', 't', 'p', '\0'))
927  {
928  p->scheme = htp_scheme_ftp;
929  break;
930  }
931 
932  if (_str3_cmp(p->scheme_offset, 'n', 'f', 's', '\0'))
933  {
934  p->scheme = htp_scheme_nfs;
935  break;
936  }
937 
938  break;
939  case 4:
940  if (_str4cmp(p->scheme_offset, 'h', 't', 't', 'p'))
941  {
942  p->scheme = htp_scheme_http;
943  break;
944  }
945  break;
946  case 5:
947  if (_str5cmp(p->scheme_offset, 'h', 't', 't', 'p', 's'))
948  {
949  p->scheme = htp_scheme_https;
950  break;
951  }
952  break;
953  } /* switch */
954 
955  res = hook_scheme_run(p, hooks,
956  p->scheme_offset,
957  (&p->buf[p->buf_idx] - p->scheme_offset));
958 
959  p->buf[p->buf_idx++] = ch;
960  p->buf[p->buf_idx] = '\0';
961 
962  p->state = s_schema_slash;
963 
964  if (res)
965  {
966  p->error = htparse_error_user;
967  return i + 1;
968  }
969 
970  break;
971  default:
972  p->error = htparse_error_inval_schema;
973  return i + 1;
974  } /* switch */
975 
976  break;
977  case s_schema_slash:
978  log_debug("[%p] s_schema_slash", p);
979 
980  switch (ch) {
981  case '/':
982  p->buf[p->buf_idx++] = ch;
983  p->buf[p->buf_idx] = '\0';
984 
986  break;
987  default:
988  p->error = htparse_error_inval_schema;
989  return i + 1;
990  }
991  break;
993  log_debug("[%p] s_schema_slash_slash", p);
994 
995  switch (ch) {
996  case '/':
997  p->buf[p->buf_idx++] = ch;
998  p->buf[p->buf_idx] = '\0';
999  p->host_offset = &p->buf[p->buf_idx];
1000 
1001  p->state = s_host;
1002  break;
1003  default:
1004  p->error = htparse_error_inval_schema;
1005  return i + 1;
1006  }
1007  break;
1008  case s_host:
1009  if (ch == '[')
1010  {
1011  /* Literal IPv6 address start. */
1012  p->buf[p->buf_idx++] = ch;
1013  p->buf[p->buf_idx] = '\0';
1014  p->host_offset = &p->buf[p->buf_idx];
1015 
1016  p->state = s_host_ipv6;
1017  break;
1018  }
1019 
1020  if (is_host_char(ch))
1021  {
1022  p->buf[p->buf_idx++] = ch;
1023  p->buf[p->buf_idx] = '\0';
1024  break;
1025  }
1026 
1027  res = hook_host_run(p, hooks,
1028  p->host_offset,
1029  (&p->buf[p->buf_idx] - p->host_offset));
1030 
1031  if (res)
1032  {
1033  p->error = htparse_error_user;
1034  return i + 1;
1035  }
1036 
1037  /* successfully parsed a NON-IPV6 hostname, knowing this, the
1038  * current character in 'ch' is actually the next state, so we
1039  * we fall through to avoid another loop.
1040  */
1041  case s_host_done:
1042  res = 0;
1043 
1044  switch (ch) {
1045  case ':':
1046  p->buf[p->buf_idx++] = ch;
1047  p->buf[p->buf_idx] = '\0';
1048 
1049  p->port_offset = &p->buf[p->buf_idx];
1050  p->state = s_port;
1051  break;
1052  case ' ':
1053  /* this technically should never happen, but we should
1054  * check anyway
1055  */
1056  if (i == 0)
1057  {
1058  p->error = htparse_error_inval_state;
1059  return i + 1;
1060  }
1061 
1062  i--;
1063  ch = '/';
1064  /* to accept requests like <method> <proto>://<host> <ver>
1065  * we fallthrough to the next case.
1066  */
1067  case '/':
1068  p->path_offset = &p->buf[p->buf_idx];
1069 
1070  p->buf[p->buf_idx++] = ch;
1071  p->buf[p->buf_idx] = '\0';
1072 
1074  break;
1075  default:
1076  p->error = htparse_error_inval_schema;
1077  return i + 1;
1078  } /* switch */
1079 
1080  if (res)
1081  {
1082  p->error = htparse_error_user;
1083  return i + 1;
1084  }
1085 
1086  break;
1087  case s_host_ipv6:
1088  c = (unsigned char)(ch | 0x20);
1089 
1090  if ((c >= 'a' && c <= 'f') ||
1091  (ch >= '0' && ch <= '9') || ch == ':' || ch == '.')
1092  {
1093  p->buf[p->buf_idx++] = ch;
1094  p->buf[p->buf_idx] = '\0';
1095  break;
1096  }
1097 
1098  switch (ch) {
1099  case ']':
1100  res = hook_host_run(p, hooks, p->host_offset,
1101  (&p->buf[p->buf_idx] - p->host_offset));
1102  if (res)
1103  {
1104  p->error = htparse_error_user;
1105  return i + 1;
1106  }
1107  p->buf[p->buf_idx++] = ch;
1108  p->buf[p->buf_idx] = '\0';
1109  p->state = s_host_done;
1110  break;
1111  default:
1112  p->error = htparse_error_inval_schema;
1113  return i + 1;
1114  }
1115  break;
1116  case s_port:
1117  if (ch >= '0' && ch <= '9')
1118  {
1119  p->buf[p->buf_idx++] = ch;
1120  p->buf[p->buf_idx] = '\0';
1121  break;
1122  }
1123 
1124  res = hook_port_run(p, hooks, p->port_offset,
1125  (&p->buf[p->buf_idx] - p->port_offset));
1126 
1127  switch (ch) {
1128  case ' ':
1129  /* this technically should never happen, but we should
1130  * check anyway
1131  */
1132  if (i == 0)
1133  {
1134  p->error = htparse_error_inval_state;
1135  return i + 1;
1136  }
1137 
1138  i--;
1139  ch = '/';
1140  /* to accept requests like <method> <proto>://<host> <ver>
1141  * we fallthrough to the next case.
1142  */
1143  case '/':
1144  p->buf[p->buf_idx++] = ch;
1145  p->buf[p->buf_idx] = '\0';
1146  p->path_offset = &p->buf[p->buf_idx - 1];
1147 
1149  break;
1150  default:
1151  p->error = htparse_error_inval_reqline;
1152  log_debug("[s_port] inval_reqline");
1153  log_htparser__s_(p);
1154 
1155  return i + 1;
1156  } /* switch */
1157 
1158  if (res)
1159  {
1160  p->error = htparse_error_user;
1161  return i + 1;
1162  }
1163 
1164  break;
1165  case s_after_slash_in_uri:
1166  log_debug("[%p] s_after_slash_in_uri", p);
1167 
1168  res = 0;
1169 
1170  if (usual[ch >> 5] & (1 << (ch & 0x1f)))
1171  {
1172  if (evhtp_likely((p->buf_idx + 1) < PARSER_STACK_MAX))
1173  {
1174  p->buf[p->buf_idx++] = ch;
1175  p->buf[p->buf_idx] = '\0';
1176  p->state = s_check_uri;
1177  }
1178  break;
1179  }
1180 
1181  switch (ch) {
1182  case ' ':
1183  {
1184  int r1 = hook_path_run(p, hooks, p->path_offset,
1185  (&p->buf[p->buf_idx] - p->path_offset));
1186  int r2 = hook_uri_run(p, hooks, p->buf, p->buf_idx);
1187 
1188  p->state = s_http_09;
1189  p->buf_idx = 0;
1190 
1191  if (r1 || r2)
1192  {
1193  res = 1;
1194  }
1195  }
1196 
1197  break;
1198  case CR:
1199  p->minor = 9;
1200  p->state = s_almost_done;
1201  break;
1202  case LF:
1203  p->minor = 9;
1204  p->state = s_hdrline_start;
1205  break;
1206  case '.':
1207  case '%':
1208  case '/':
1209  case '#':
1210  p->buf[p->buf_idx++] = ch;
1211  p->buf[p->buf_idx] = '\0';
1212  p->state = s_uri;
1213  break;
1214  case '?':
1215  res = hook_path_run(p, hooks, p->path_offset,
1216  (&p->buf[p->buf_idx] - p->path_offset));
1217 
1218  p->buf[p->buf_idx++] = ch;
1219  p->buf[p->buf_idx] = '\0';
1220  p->args_offset = &p->buf[p->buf_idx];
1221  p->state = s_uri;
1222 
1223  break;
1224  default:
1225  p->buf[p->buf_idx++] = ch;
1226  p->buf[p->buf_idx] = '\0';
1227 
1228  p->state = s_check_uri;
1229  break;
1230  } /* switch */
1231 
1232  if (res)
1233  {
1234  p->error = htparse_error_user;
1235  return i + 1;
1236  }
1237 
1238  break;
1239 
1240  case s_check_uri:
1241 
1242  res = 0;
1243 
1244  do {
1245  log_debug("[%p] s_check_uri", p);
1246  if (usual[ch >> 5] & (1 << (ch & 0x1f)))
1247  {
1248  p->buf[p->buf_idx++] = ch;
1249  p->buf[p->buf_idx] = '\0';
1250  } else {
1251  break;
1252  }
1253 
1254  ch = data[++i];
1255  } while (i < len);
1256 
1257  switch (ch) {
1258  case ' ':
1259  {
1260  int r1 = 0;
1261  int r2 = 0;
1262 
1263  if (p->args_offset)
1264  {
1265  r1 = hook_args_run(p, hooks, p->args_offset,
1266  (&p->buf[p->buf_idx] - p->args_offset));
1267  } else {
1268  r1 = hook_path_run(p, hooks, p->path_offset,
1269  (&p->buf[p->buf_idx] - p->path_offset));
1270  }
1271 
1272  r2 = hook_uri_run(p, hooks, p->buf, p->buf_idx);
1273  p->buf_idx = 0;
1274  p->state = s_http_09;
1275 
1276  if (r1 || r2)
1277  {
1278  res = 1;
1279  }
1280  }
1281  break;
1282  case '/':
1283  p->buf[p->buf_idx++] = ch;
1284  p->buf[p->buf_idx] = '\0';
1286  break;
1287  case CR:
1288  p->minor = 9;
1289  p->buf_idx = 0;
1290  p->state = s_almost_done;
1291  break;
1292  case LF:
1293  p->minor = 9;
1294  p->buf_idx = 0;
1295 
1296  p->state = s_hdrline_start;
1297  break;
1298  case '?':
1299  res = hook_path_run(p, hooks,
1300  p->path_offset,
1301  (&p->buf[p->buf_idx] - p->path_offset));
1302 
1303  p->buf[p->buf_idx++] = ch;
1304  p->buf[p->buf_idx] = '\0';
1305 
1306  p->args_offset = &p->buf[p->buf_idx];
1307  p->state = s_uri;
1308  break;
1309  default:
1310  p->buf[p->buf_idx++] = ch;
1311  p->buf[p->buf_idx] = '\0';
1312 
1313  p->state = s_uri;
1314  break;
1315  } /* switch */
1316 
1317  if (res)
1318  {
1319  p->error = htparse_error_user;
1320  return i + 1;
1321  }
1322 
1323  break;
1324 
1325  case s_uri:
1326  log_debug("[%p] s_uri", p);
1327 
1328  res = 0;
1329 
1330  do {
1331  if (usual[ch >> 5] & (1 << (ch & 0x1f)))
1332  {
1333  p->buf[p->buf_idx++] = ch;
1334  p->buf[p->buf_idx] = '\0';
1335  } else {
1336  break;
1337  }
1338 
1339  ch = data[++i];
1340  } while (i < len);
1341 
1342  switch (ch) {
1343  case ' ':
1344  {
1345  int r1 = 0;
1346  int r2 = 0;
1347 
1348  if (p->args_offset)
1349  {
1350  r1 = hook_args_run(p, hooks, p->args_offset,
1351  (&p->buf[p->buf_idx] - p->args_offset));
1352  } else {
1353  r1 = hook_path_run(p, hooks, p->path_offset,
1354  (&p->buf[p->buf_idx] - p->path_offset));
1355  }
1356 
1357  p->buf_idx = 0;
1358  p->state = s_http_09;
1359 
1360  if (r1 || r2)
1361  {
1362  res = 1;
1363  }
1364  }
1365  break;
1366  case CR:
1367  p->minor = 9;
1368  p->buf_idx = 0;
1369  p->state = s_almost_done;
1370  break;
1371  case LF:
1372  p->minor = 9;
1373  p->buf_idx = 0;
1374  p->state = s_hdrline_start;
1375  break;
1376  case '?':
1377  /* RFC 3986 section 3.4:
1378  * The query component is indicated by the
1379  * first question mark ("?") character and
1380  * terminated by a number sign ("#") character
1381  * or by the end of the URI. */
1382  if (!p->args_offset)
1383  {
1384  res = hook_path_run(p, hooks, p->path_offset,
1385  (&p->buf[p->buf_idx] - p->path_offset));
1386 
1387  p->buf[p->buf_idx++] = ch;
1388  p->buf[p->buf_idx] = '\0';
1389  p->args_offset = &p->buf[p->buf_idx];
1390  break;
1391  }
1392  /* Fall through. */
1393  default:
1394  p->buf[p->buf_idx++] = ch;
1395  p->buf[p->buf_idx] = '\0';
1396  break;
1397  } /* switch */
1398 
1399  if (res)
1400  {
1401  p->error = htparse_error_user;
1402  return i + 1;
1403  }
1404 
1405  break;
1406 
1407  case s_http_09:
1408  log_debug("[%p] s_http_09", p);
1409 
1410  switch (ch) {
1411  case ' ':
1412  break;
1413  case CR:
1414  p->minor = 9;
1415  p->buf_idx = 0;
1416  p->state = s_almost_done;
1417  break;
1418  case LF:
1419  p->minor = 9;
1420  p->buf_idx = 0;
1421  p->state = s_hdrline_start;
1422  break;
1423  case 'H':
1424  p->buf_idx = 0;
1425  p->state = s_http_H;
1426  break;
1427  default:
1428  p->error = htparse_error_inval_proto;
1429  return i + 1;
1430  } /* switch */
1431 
1432  break;
1433  case s_http_H:
1434  log_debug("[%p] s_http_H", p);
1435 
1436  switch (ch) {
1437  case 'T':
1438  p->state = s_http_HT;
1439  break;
1440  default:
1441  p->error = htparse_error_inval_proto;
1442  return i + 1;
1443  }
1444  break;
1445  case s_http_HT:
1446  switch (ch) {
1447  case 'T':
1448  p->state = s_http_HTT;
1449  break;
1450  default:
1451  p->error = htparse_error_inval_proto;
1452  return i + 1;
1453  }
1454  break;
1455  case s_http_HTT:
1456  switch (ch) {
1457  case 'P':
1458  p->state = s_http_HTTP;
1459  break;
1460  default:
1461  p->error = htparse_error_inval_proto;
1462  return i + 1;
1463  }
1464  break;
1465  case s_http_HTTP:
1466  switch (ch) {
1467  case '/':
1469  break;
1470  default:
1471  p->error = htparse_error_inval_proto;
1472  return i + 1;
1473  }
1474  break;
1475  case s_first_major_digit:
1476  if (ch < '1' || ch > '9')
1477  {
1478  p->error = htparse_error_inval_ver;
1479  return i + 1;
1480  }
1481 
1482  p->major = ch - '0';
1483  p->state = s_major_digit;
1484  break;
1485  case s_major_digit:
1486  if (ch == '.')
1487  {
1489  break;
1490  }
1491 
1492  if (ch < '0' || ch > '9')
1493  {
1494  p->error = htparse_error_inval_ver;
1495  return i + 1;
1496  }
1497 
1498  p->major = p->major * 10 + ch - '0';
1499  break;
1500  case s_first_minor_digit:
1501  if (ch < '0' || ch > '9')
1502  {
1503  p->error = htparse_error_inval_ver;
1504  return i + 1;
1505  }
1506 
1507  p->minor = ch - '0';
1508  p->state = s_minor_digit;
1509  break;
1510  case s_minor_digit:
1511  switch (ch) {
1512  case ' ':
1513  if (evhtp_likely(p->type == htp_type_request))
1514  {
1516  } else if (p->type == htp_type_response)
1517  {
1518  p->state = s_status;
1519  }
1520 
1521  break;
1522  case CR:
1523  p->state = s_almost_done;
1524  break;
1525  case LF:
1526  /* LF without a CR? error.... */
1527  p->error = htparse_error_inval_reqline;
1528  log_debug("[s_minor_digit] LF without CR!");
1529  log_htparser__s_(p);
1530 
1531  return i + 1;
1532  default:
1533  if (ch < '0' || ch > '9')
1534  {
1535  p->error = htparse_error_inval_ver;
1536  return i + 1;
1537  }
1538 
1539  p->minor = p->minor * 10 + ch - '0';
1540  break;
1541  } /* switch */
1542  break;
1543  case s_status:
1544  /* http response status code */
1545  if (ch == ' ')
1546  {
1547  if (p->status)
1548  {
1549  p->state = s_status_text;
1550  }
1551  break;
1552  }
1553 
1554  if (ch < '0' || ch > '9')
1555  {
1556  p->error = htparse_error_status;
1557  return i + 1;
1558  }
1559 
1560  p->status = p->status * 10 + ch - '0';
1561 
1562  if (++p->status_count == 3)
1563  {
1565  }
1566 
1567  break;
1568  case s_space_after_status:
1569  switch (ch) {
1570  case ' ':
1571  p->state = s_status_text;
1572  break;
1573  case CR:
1574  p->state = s_almost_done;
1575  break;
1576  case LF:
1577  p->state = s_hdrline_start;
1578  break;
1579  default:
1580  p->error = htparse_error_generic;
1581  return i + 1;
1582  }
1583  break;
1584  case s_status_text:
1585  switch (ch) {
1586  case CR:
1587  p->state = s_almost_done;
1588  break;
1589  case LF:
1590  p->state = s_hdrline_start;
1591  break;
1592  default:
1593  break;
1594  }
1595  break;
1596  case s_spaces_after_digit:
1597  switch (ch) {
1598  case ' ':
1599  break;
1600  case CR:
1601  p->state = s_almost_done;
1602  break;
1603  case LF:
1604  p->state = s_hdrline_start;
1605  break;
1606  default:
1607  p->error = htparse_error_inval_ver;
1608  return i + 1;
1609  }
1610  break;
1611 
1612  case s_almost_done:
1613  switch (ch) {
1614  case LF:
1615  if (p->type == htp_type_response && p->status >= 100 && p->status < 200)
1616  {
1617  res = hook_on_hdrs_begin_run(p, hooks);
1618 
1619  if (res)
1620  {
1621  p->error = htparse_error_user;
1622  return i + 1;
1623  }
1624 
1625  p->status = 0;
1626  p->status_count = 0;
1627  p->state = s_start;
1628  break;
1629  }
1630 
1631  p->state = s_done;
1632  res = hook_on_hdrs_begin_run(p, hooks);
1633  if (res)
1634  {
1635  p->error = htparse_error_user;
1636  return i + 1;
1637  }
1638  break;
1639  default:
1640  p->error = htparse_error_inval_reqline;
1641  log_htparser__s_(p);
1642 
1643  return i + 1;
1644  } /* switch */
1645  break;
1646  case s_done:
1647  switch (ch) {
1648  case CR:
1650  break;
1651  case LF:
1652  return i + 1;
1653  default:
1654  goto hdrline_start;
1655  }
1656  break;
1657 hdrline_start:
1658  case s_hdrline_start:
1659  log_debug("[%p] s_hdrline_start", p);
1660 
1661  p->buf_idx = 0;
1662 
1663  switch (ch) {
1664  case CR:
1666  break;
1667  case LF:
1669  break;
1670  default:
1671  p->buf[p->buf_idx++] = ch;
1672  p->buf[p->buf_idx] = '\0';
1673 
1674  p->state = s_hdrline_hdr_key;
1675  break;
1676  }
1677 
1678  break;
1679  case s_hdrline_hdr_key:
1680  log_debug("[%p] s_hdrline_hdr_key", p);
1681 
1682  do {
1683  if (evhtp_unlikely(ch == ':'))
1684  {
1685  res = hook_hdr_key_run(p, hooks, p->buf, p->buf_idx);
1686 
1687  /* figure out if the value of this header is valueable */
1688  p->heval = eval_hdr_val_none;
1689 
1690  switch (p->buf_idx + 1) {
1691  case 5:
1692  if (!strcasecmp(p->buf, "host"))
1693  {
1695  }
1696  break;
1697  case 11:
1698  if (!strcasecmp(p->buf, "connection"))
1699  {
1701  }
1702  break;
1703  case 13:
1704  if (!strcasecmp(p->buf, "content-type"))
1705  {
1707  }
1708  break;
1709  case 15:
1710  if (!strcasecmp(p->buf, "content-length"))
1711  {
1713  }
1714  break;
1715  case 17:
1716  if (!strcasecmp(p->buf, "proxy-connection"))
1717  {
1719  }
1720  break;
1721  case 18:
1722  if (!strcasecmp(p->buf, "transfer-encoding"))
1723  {
1725  }
1726  break;
1727  } /* switch */
1728 
1729  p->buf_idx = 0;
1731 
1732  if (res)
1733  {
1734  p->error = htparse_error_user;
1735  return i + 1;
1736  }
1737 
1738  break;
1739  }
1740 
1741  switch (ch) {
1742  case CR:
1744  break;
1745  case LF:
1747  break;
1748  default:
1749  p->buf[p->buf_idx++] = ch;
1750  p->buf[p->buf_idx] = '\0';
1751  break;
1752  }
1753 
1754  if (p->state != s_hdrline_hdr_key)
1755  {
1756  break;
1757  }
1758 
1759  ch = data[++i];
1760  } while (i < len);
1761 
1762  break;
1763 
1765  log_debug("[%p] s_hdrline_hdr_space_before_val", p);
1766 
1767  switch (ch) {
1768  case ' ':
1769  break;
1770  case CR:
1771  /*
1772  * we have an empty header value here, so we set the buf
1773  * to empty, set the state to hdrline_hdr_val, and
1774  * decrement the start byte counter.
1775  */
1776  p->buf[p->buf_idx++] = ' ';
1777  p->buf[p->buf_idx] = '\0';
1778  p->state = s_hdrline_hdr_val;
1779 
1780  /*
1781  * make sure the next pass comes back to this CR byte,
1782  * so it matches in s_hdrline_hdr_val.
1783  */
1784  i--;
1785  break;
1786  case LF:
1787  /* never got a CR for an empty header, this is an
1788  * invalid state.
1789  */
1790  p->error = htparse_error_inval_hdr;
1791  return i + 1;
1792  default:
1793  p->buf[p->buf_idx++] = ch;
1794  p->buf[p->buf_idx] = '\0';
1795  p->state = s_hdrline_hdr_val;
1796  break;
1797  } /* switch */
1798  break;
1799  case s_hdrline_hdr_val:
1800  err = 0;
1801 
1802  do {
1803  log_debug("[%p] s_hdrline_hdr_val", p);
1804  if (ch == CR)
1805  {
1806  switch (p->heval) {
1807  case eval_hdr_val_none:
1808  break;
1809  case eval_hdr_val_hostname:
1810  if (hook_hostname_run(p, hooks, p->buf, p->buf_idx))
1811  {
1813  p->error = htparse_error_user;
1814  return i + 1;
1815  }
1816 
1817  break;
1819  p->content_len = str_to_uint64(p->buf, p->buf_idx, &err);
1820  p->orig_content_len = p->content_len;
1821 
1822  log_debug("[%p] s_hdrline_hdr_val content-lenth = %zu", p, p->content_len);
1823 
1824  if (err == 1)
1825  {
1826  p->error = htparse_error_too_big;
1827  return i + 1;
1828  }
1829 
1830  break;
1832  switch (p->buf[0]) {
1833  char A_case;
1834  char C_case;
1835  const char * S_buf;
1836 
1837  case 'K':
1838  case 'k':
1839  if (p->buf_idx != 10)
1840  {
1841  break;
1842  }
1843 
1844  A_case = (p->buf[5] == 'A') ? 'A' : 'a';
1845  S_buf = (const char *)(p->buf + 1);
1846 
1847  if (_str9cmp(S_buf,
1848  'e', 'e', 'p', '-', A_case, 'l', 'i', 'v', 'e'))
1849  {
1851  }
1852  break;
1853  case 'c':
1854  case 'C':
1855  if (p->buf_idx != 5)
1856  {
1857  break;
1858  }
1859 
1860  C_case = (p->buf[0] == 'C') ? 'C' : 'c';
1861  S_buf = (const char *)p->buf;
1862 
1863  if (_str5cmp(S_buf, C_case, 'l', 'o', 's', 'e'))
1864  {
1866  }
1867  break;
1868  } /* switch */
1869  break;
1871  if (p->buf_idx != 7)
1872  {
1873  break;
1874  }
1875 
1876  switch (p->buf[0]) {
1877  const char * S_buf;
1878 
1879  case 'c':
1880  case 'C':
1881  if (p->buf_idx != 7)
1882  {
1883  break;
1884  }
1885 
1886  S_buf = (const char *)(p->buf + 1);
1887 
1888  if (_str6cmp(S_buf, 'h', 'u', 'n', 'k', 'e', 'd'))
1889  {
1890  p->flags |= parser_flag_chunked;
1891  }
1892 
1893  break;
1894  }
1895 
1896  break;
1898  if (p->buf_idx != 9)
1899  {
1900  break;
1901  }
1902 
1903  switch (p->buf[0]) {
1904  const char * S_buf;
1905 
1906  case 'm':
1907  case 'M':
1908  S_buf = (const char *)(p->buf + 1);
1909 
1910  if (_str8cmp(S_buf, 'u', 'l', 't', 'i', 'p', 'a', 'r', 't'))
1911  {
1912  p->multipart = 1;
1913  }
1914 
1915  break;
1916  }
1917 
1918  break;
1920  default:
1921  break;
1922  } /* switch */
1923 
1925 
1926  break;
1927  }
1928 
1929  switch (ch) {
1930  case LF:
1931  /* LF before CR? invalid */
1932  p->error = htparse_error_inval_hdr;
1933  return i + 1;
1934  default:
1935  p->buf[p->buf_idx++] = ch;
1936  p->buf[p->buf_idx] = '\0';
1937  break;
1938  } /* switch */
1939 
1940  if (p->state != s_hdrline_hdr_val)
1941  {
1942  break;
1943  }
1944 
1945  ch = data[++i];
1946  } while (i < len);
1947 
1948  break;
1950  log_debug("[%p] s_hdrline_hdr_almost_done", p);
1951 
1952  res = 0;
1953  switch (ch) {
1954  case LF:
1955  if (p->flags & parser_flag_trailing)
1956  {
1957  res = hook_on_msg_complete_run(p, hooks);
1958  p->state = s_start;
1959  break;
1960  }
1961 
1963  break;
1964  default:
1965  p->error = htparse_error_inval_hdr;
1966  return i + 1;
1967  }
1968 
1969  if (res)
1970  {
1971  p->error = htparse_error_user;
1972  return i + 1;
1973  }
1974 
1975  break;
1976  case s_hdrline_hdr_done:
1977  log_debug("[%p] s_hdrline_hdr_done", p);
1978 
1979  switch (ch) {
1980  case CR:
1981  res = hook_hdr_val_run(p, hooks, p->buf, p->buf_idx);
1983 
1984  if (res)
1985  {
1986  p->error = htparse_error_user;
1987  return i + 1;
1988  }
1989 
1990  break;
1991  case LF:
1992  /* got LFLF? is this valid? */
1993  p->error = htparse_error_inval_hdr;
1994 
1995  return i + 1;
1996  case '\t':
1997  /* this is a multiline header value, we must go back to
1998  * reading as a header value */
1999  p->state = s_hdrline_hdr_val;
2000  break;
2001  default:
2002  res = hook_hdr_val_run(p, hooks, p->buf, p->buf_idx);
2003 
2004  p->buf_idx = 0;
2005  p->buf[p->buf_idx++] = ch;
2006  p->buf[p->buf_idx] = '\0';
2007 
2008  p->state = s_hdrline_hdr_key;
2009 
2010  if (res)
2011  {
2012  p->error = htparse_error_user;
2013  return i + 1;
2014  }
2015 
2016  break;
2017  } /* switch */
2018  break;
2019  case s_hdrline_almost_done:
2020  log_debug("[%p] s_hdrline_almost_done", p);
2021 
2022  switch (ch) {
2023  case LF:
2024  res = hook_on_hdrs_complete_run(p, hooks);
2025 
2026  if (res != 0)
2027  {
2028  p->error = htparse_error_user;
2029  return i + 1;
2030  }
2031 
2032  p->buf_idx = 0;
2033 
2034  if (p->flags & parser_flag_trailing)
2035  {
2036  res = hook_on_msg_complete_run(p, hooks);
2037  p->state = s_start;
2038  } else if (p->flags & parser_flag_chunked)
2039  {
2041  } else if (p->content_len > 0)
2042  {
2043  p->state = s_body_read;
2044  } else if (p->content_len == 0)
2045  {
2046  res = hook_on_msg_complete_run(p, hooks);
2047  p->state = s_start;
2048  } else {
2049  p->state = s_hdrline_done;
2050  }
2051 
2052  if (res != 0)
2053  {
2054  p->error = htparse_error_user;
2055  return i + 1;
2056  }
2057  break;
2058 
2059  default:
2060  p->error = htparse_error_inval_hdr;
2061  return i + 1;
2062  } /* switch */
2063 
2064  if (res != 0)
2065  {
2066  p->error = htparse_error_user;
2067  return i + 1;
2068  }
2069 
2070  break;
2071  case s_hdrline_done:
2072  log_debug("[%p] s_hdrline_done", p);
2073 
2074  res = 0;
2075 
2076  if (p->flags & parser_flag_trailing)
2077  {
2078  res = hook_on_msg_complete_run(p, hooks);
2079  p->state = s_start;
2080  } else if (p->flags & parser_flag_chunked)
2081  {
2083  i--;
2084  } else if (p->content_len > 0)
2085  {
2086  p->state = s_body_read;
2087  i--;
2088  } else if (p->content_len == 0)
2089  {
2090  res = hook_on_msg_complete_run(p, hooks);
2091  p->state = s_start;
2092  }
2093 
2094  if (res)
2095  {
2096  p->error = htparse_error_user;
2097  return i + 1;
2098  }
2099 
2100  break;
2101  case s_chunk_size_start:
2102  c = unhex[(unsigned char)ch];
2103 
2104  if (c == -1)
2105  {
2106  p->error = htparse_error_inval_chunk_sz;
2107  return i + 1;
2108  }
2109 
2110  p->content_len = c;
2111  p->state = s_chunk_size;
2112  break;
2113  case s_chunk_size:
2114  if (ch == CR)
2115  {
2117  break;
2118  }
2119 
2120  c = unhex[(unsigned char)ch];
2121 
2122  if (c == -1)
2123  {
2124  p->error = htparse_error_inval_chunk_sz;
2125  return i + 1;
2126  }
2127 
2128  p->content_len *= 16;
2129  p->content_len += c;
2130  break;
2131 
2133  if (ch != LF)
2134  {
2135  p->error = htparse_error_inval_chunk_sz;
2136  return i + 1;
2137  }
2138 
2139  p->orig_content_len = p->content_len;
2140 
2141  if (p->content_len == 0)
2142  {
2143  res = hook_on_chunks_complete_run(p, hooks);
2144 
2146  p->state = s_hdrline_start;
2147  } else {
2148  res = hook_on_new_chunk_run(p, hooks);
2149 
2150  p->state = s_chunk_data;
2151  }
2152 
2153  if (res)
2154  {
2155  p->error = htparse_error_user;
2156  return i + 1;
2157  }
2158 
2159  break;
2160 
2161  case s_chunk_data:
2162  res = 0;
2163  {
2164  const char * pp = &data[i];
2165  const char * pe = (const char *)(data + len);
2166  size_t to_read = _MIN_READ(pe - pp, p->content_len);
2167 
2168  if (to_read > 0)
2169  {
2170  res = hook_body_run(p, hooks, pp, to_read);
2171 
2172  i += to_read - 1;
2173  }
2174 
2175  if (to_read == p->content_len)
2176  {
2178  }
2179 
2180  p->content_len -= to_read;
2181  }
2182 
2183  if (res)
2184  {
2185  p->error = htparse_error_user;
2186  return i + 1;
2187  }
2188 
2189  break;
2190 
2192  if (ch != CR)
2193  {
2194  p->error = htparse_error_inval_chunk;
2195  return i + 1;
2196  }
2197 
2198  p->state = s_chunk_data_done;
2199  break;
2200 
2201  case s_chunk_data_done:
2202  if (ch != LF)
2203  {
2204  p->error = htparse_error_inval_chunk;
2205  return i + 1;
2206  }
2207 
2208  p->orig_content_len = 0;
2210 
2211  if (hook_on_chunk_complete_run(p, hooks))
2212  {
2213  p->error = htparse_error_user;
2214  return i + 1;
2215  }
2216 
2217  break;
2218 
2219  case s_body_read:
2220  res = 0;
2221 
2222  {
2223  const char * pp = &data[i];
2224  const char * pe = (const char *)(data + len);
2225  size_t to_read = _MIN_READ(pe - pp, p->content_len);
2226 
2227  if (to_read > 0)
2228  {
2229  res = hook_body_run(p, hooks, pp, to_read);
2230 
2231  i += to_read - 1;
2232  p->content_len -= to_read;
2233  }
2234 
2235  if (p->content_len == 0)
2236  {
2237  res = hook_on_msg_complete_run(p, hooks);
2238  p->state = s_start;
2239  }
2240 
2241  if (res)
2242  {
2243  p->error = htparse_error_user;
2244  return i + 1;
2245  }
2246  }
2247 
2248  break;
2249 
2250  default:
2251  log_debug("[%p] This is a silly state....", p);
2252  p->error = htparse_error_inval_state;
2253  return i + 1;
2254  } /* switch */
2255 
2256  /* If we successfully completed a request/response we return
2257  * to caller, and leave it up to him to call us again if
2258  * parsing should continue. */
2259  if (p->state == s_start)
2260  {
2261  return i + 1;
2262  }
2263  } /* switch */
2264 
2265  return i;
2266 } /* htparser_run */
htp_type type
Definition: parser.c:93
uint64_t content_len
Definition: parser.c:100
Definition: parser.c:61
unsigned char multipart
Definition: parser.c:97
htp_scheme htparser_get_scheme(htparser *p)
Definition: parser.c:471
#define CR
Definition: parser.c:16
unsigned int status
Definition: parser.c:104
void * args
Definition: thread.c:119
uint64_t orig_content_len
Definition: parser.c:101
static int8_t unhex[256]
Definition: parser.c:192
char * args_offset
Definition: parser.c:111
char buf[PARSER_STACK_MAX]
Definition: parser.c:117
unsigned char major
Definition: parser.c:98
htpparse_error error
Definition: parser.c:88
static const char * errstr_map[]
Definition: parser.c:203
#define __HTPARSE_GENHOOK(__n)
Definition: parser.c:303
static int is_host_char(unsigned char ch)
Definition: parser.c:584
#define _str4cmp(m, c0, c1, c2, c3)
Definition: parser.c:248
static uint32_t usual[]
Definition: parser.c:181
#define _MIN_READ(a, b)
Definition: parser.c:238
htp_method method
Definition: parser.c:95
const char * htparser_get_methodstr_m(htp_method meth)
Definition: parser.c:483
unsigned char htparser_get_multipart(htparser *p)
Definition: parser.c:524
char * port_offset
Definition: parser.c:109
Definition: parser.c:37
void htparser_set_minor(htparser *p, unsigned char minor)
Definition: parser.c:506
#define __HTPARSE_GENDHOOK(__n)
Definition: parser.c:315
#define _str9cmp(m, c0, c1, c2, c3, c4, c5, c6, c7, c8)
Definition: parser.c:267
uint64_t htparser_get_total_bytes_read(htparser *p)
Definition: parser.c:560
parser_state state
Definition: parser.c:89
void * userdata
Definition: parser.c:113
htp_method htparser_get_method(htparser *p)
Definition: parser.c:477
uint64_t htparser_get_bytes_read(htparser *p)
Definition: parser.c:554
#define _str8cmp(m, c0, c1, c2, c3, c4, c5, c6, c7)
Definition: parser.c:263
#define _str6cmp(m, c0, c1, c2, c3, c4, c5)
Definition: parser.c:255
size_t htparser_run(htparser *p, htparse_hooks *hooks, const char *data, size_t len)
Definition: parser.c:709
Definition: parser.c:43
const char * htparser_get_strerror(htparser *p)
Definition: parser.c:429
#define _str7_cmp(m, c0, c1, c2, c3, c4, c5, c6, c7)
Definition: parser.c:259
#define _str3Ocmp(m, c0, c1, c2, c3)
Definition: parser.c:245
size_t buf_idx
Definition: parser.c:115
#define _str5cmp(m, c0, c1, c2, c3, c4)
Definition: parser.c:251
eval_hdr_val heval
Definition: parser.c:91
parser_state
Definition: parser.c:36
static const char * method_strmap[]
Definition: parser.c:219
uint64_t bytes_read
Definition: parser.c:102
#define PARSER_STACK_MAX
Definition: parser.c:14
#define log_htparser__s_(p)
Definition: parser.c:178
char * host_offset
Definition: parser.c:108
void htparser_init(htparser *p, htp_type type)
Definition: parser.c:566
uint64_t total_bytes_read
Definition: parser.c:103
const char * htparser_get_methodstr(htparser *p)
Definition: parser.c:494
parser_flags flags
Definition: parser.c:90
unsigned char htparser_get_major(htparser *p)
Definition: parser.c:512
char * scheme_offset
Definition: parser.c:107
static htp_method get_method(const char *m, const size_t sz)
Definition: parser.c:602
unsigned int status_count
Definition: parser.c:105
Definition: parser.c:46
void htparser_set_userdata(htparser *p, void *ud)
Definition: parser.c:536
static uint64_t str_to_uint64(char *str, size_t n, int *err)
Definition: parser.c:351
parser_flags
Definition: parser.c:29
void * htparser_get_userdata(htparser *p)
Definition: parser.c:530
static ssize_t _str_to_ssize_t(char *str, size_t n)
Definition: parser.c:393
htp_scheme scheme
Definition: parser.c:94
htpparse_error htparser_get_error(htparser *p)
Definition: parser.c:423
uint64_t htparser_get_content_pending(htparser *p)
Definition: parser.c:542
Definition: parser.c:49
htparser * htparser_new(void)
Definition: parser.c:578
unsigned int htparser_get_status(htparser *p)
Definition: parser.c:442
uint64_t htparser_get_content_length(htparser *p)
Definition: parser.c:548
eval_hdr_val
Definition: parser.c:19
unsigned char minor
Definition: parser.c:99
unsigned char htparser_get_minor(htparser *p)
Definition: parser.c:518
#define log_debug(M,...)
Definition: log.h:17
#define evhtp_unlikely(x)
Definition: internal.h:18
#define _str3_cmp(m, c0, c1, c2, c3)
Definition: parser.c:242
int htparser_should_keep_alive(htparser *p)
Definition: parser.c:448
#define LF
Definition: parser.c:15
void htparser_set_major(htparser *p, unsigned char major)
Definition: parser.c:500
#define evhtp_likely(x)
Definition: internal.h:17
char * path_offset
Definition: parser.c:110