libstdc++
regex.tcc
Go to the documentation of this file.
00001 // class template regex -*- C++ -*-
00002 
00003 // Copyright (C) 2013-2016 Free Software Foundation, Inc.
00004 //
00005 // This file is part of the GNU ISO C++ Library.  This library is free
00006 // software; you can redistribute it and/or modify it under the
00007 // terms of the GNU General Public License as published by the
00008 // Free Software Foundation; either version 3, or (at your option)
00009 // any later version.
00010 
00011 // This library is distributed in the hope that it will be useful,
00012 // but WITHOUT ANY WARRANTY; without even the implied warranty of
00013 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00014 // GNU General Public License for more details.
00015 
00016 // Under Section 7 of GPL version 3, you are granted additional
00017 // permissions described in the GCC Runtime Library Exception, version
00018 // 3.1, as published by the Free Software Foundation.
00019 
00020 // You should have received a copy of the GNU General Public License and
00021 // a copy of the GCC Runtime Library Exception along with this program;
00022 // see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
00023 // <http://www.gnu.org/licenses/>.
00024 
00025 /**
00026  *  @file bits/regex.tcc
00027  *  This is an internal header file, included by other library headers.
00028  *  Do not attempt to use it directly. @headername{regex}
00029  */
00030 
00031 namespace std _GLIBCXX_VISIBILITY(default)
00032 {
00033 namespace __detail
00034 {
00035 _GLIBCXX_BEGIN_NAMESPACE_VERSION
00036 
00037   // Result of merging regex_match and regex_search.
00038   //
00039   // __policy now can be _S_auto (auto dispatch) and _S_alternate (use
00040   // the other one if possible, for test purpose).
00041   //
00042   // That __match_mode is true means regex_match, else regex_search.
00043   template<typename _BiIter, typename _Alloc,
00044            typename _CharT, typename _TraitsT,
00045            _RegexExecutorPolicy __policy,
00046            bool __match_mode>
00047     bool
00048     __regex_algo_impl(_BiIter                              __s,
00049                       _BiIter                              __e,
00050                       match_results<_BiIter, _Alloc>&      __m,
00051                       const basic_regex<_CharT, _TraitsT>& __re,
00052                       regex_constants::match_flag_type     __flags)
00053     {
00054       if (__re._M_automaton == nullptr)
00055         return false;
00056 
00057       typename match_results<_BiIter, _Alloc>::_Base_type& __res = __m;
00058       __m._M_begin = __s;
00059       __m._M_resize(__re._M_automaton->_M_sub_count());
00060       for (auto& __it : __res)
00061         __it.matched = false;
00062 
00063       bool __ret;
00064       if ((__re.flags() & regex_constants::__polynomial)
00065           || (__policy == _RegexExecutorPolicy::_S_alternate
00066               && !__re._M_automaton->_M_has_backref))
00067         {
00068           _Executor<_BiIter, _Alloc, _TraitsT, false>
00069             __executor(__s, __e, __m, __re, __flags);
00070           if (__match_mode)
00071             __ret = __executor._M_match();
00072           else
00073             __ret = __executor._M_search();
00074         }
00075       else
00076         {
00077           _Executor<_BiIter, _Alloc, _TraitsT, true>
00078             __executor(__s, __e, __m, __re, __flags);
00079           if (__match_mode)
00080             __ret = __executor._M_match();
00081           else
00082             __ret = __executor._M_search();
00083         }
00084       if (__ret)
00085         {
00086           for (auto& __it : __res)
00087             if (!__it.matched)
00088               __it.first = __it.second = __e;
00089           auto& __pre = __m._M_prefix();
00090           auto& __suf = __m._M_suffix();
00091           if (__match_mode)
00092             {
00093               __pre.matched = false;
00094               __pre.first = __s;
00095               __pre.second = __s;
00096               __suf.matched = false;
00097               __suf.first = __e;
00098               __suf.second = __e;
00099             }
00100           else
00101             {
00102               __pre.first = __s;
00103               __pre.second = __res[0].first;
00104               __pre.matched = (__pre.first != __pre.second);
00105               __suf.first = __res[0].second;
00106               __suf.second = __e;
00107               __suf.matched = (__suf.first != __suf.second);
00108             }
00109         }
00110       else
00111         {
00112           __m._M_resize(0);
00113           for (auto& __it : __res)
00114             {
00115               __it.matched = false;
00116               __it.first = __it.second = __e;
00117             }
00118         }
00119       return __ret;
00120     }
00121 
00122 _GLIBCXX_END_NAMESPACE_VERSION
00123 }
00124 
00125 _GLIBCXX_BEGIN_NAMESPACE_VERSION
00126 
00127   template<typename _Ch_type>
00128   template<typename _Fwd_iter>
00129     typename regex_traits<_Ch_type>::string_type
00130     regex_traits<_Ch_type>::
00131     lookup_collatename(_Fwd_iter __first, _Fwd_iter __last) const
00132     {
00133       typedef std::ctype<char_type> __ctype_type;
00134       const __ctype_type& __fctyp(use_facet<__ctype_type>(_M_locale));
00135 
00136       static const char* __collatenames[] =
00137         {
00138           "NUL",
00139           "SOH",
00140           "STX",
00141           "ETX",
00142           "EOT",
00143           "ENQ",
00144           "ACK",
00145           "alert",
00146           "backspace",
00147           "tab",
00148           "newline",
00149           "vertical-tab",
00150           "form-feed",
00151           "carriage-return",
00152           "SO",
00153           "SI",
00154           "DLE",
00155           "DC1",
00156           "DC2",
00157           "DC3",
00158           "DC4",
00159           "NAK",
00160           "SYN",
00161           "ETB",
00162           "CAN",
00163           "EM",
00164           "SUB",
00165           "ESC",
00166           "IS4",
00167           "IS3",
00168           "IS2",
00169           "IS1",
00170           "space",
00171           "exclamation-mark",
00172           "quotation-mark",
00173           "number-sign",
00174           "dollar-sign",
00175           "percent-sign",
00176           "ampersand",
00177           "apostrophe",
00178           "left-parenthesis",
00179           "right-parenthesis",
00180           "asterisk",
00181           "plus-sign",
00182           "comma",
00183           "hyphen",
00184           "period",
00185           "slash",
00186           "zero",
00187           "one",
00188           "two",
00189           "three",
00190           "four",
00191           "five",
00192           "six",
00193           "seven",
00194           "eight",
00195           "nine",
00196           "colon",
00197           "semicolon",
00198           "less-than-sign",
00199           "equals-sign",
00200           "greater-than-sign",
00201           "question-mark",
00202           "commercial-at",
00203           "A",
00204           "B",
00205           "C",
00206           "D",
00207           "E",
00208           "F",
00209           "G",
00210           "H",
00211           "I",
00212           "J",
00213           "K",
00214           "L",
00215           "M",
00216           "N",
00217           "O",
00218           "P",
00219           "Q",
00220           "R",
00221           "S",
00222           "T",
00223           "U",
00224           "V",
00225           "W",
00226           "X",
00227           "Y",
00228           "Z",
00229           "left-square-bracket",
00230           "backslash",
00231           "right-square-bracket",
00232           "circumflex",
00233           "underscore",
00234           "grave-accent",
00235           "a",
00236           "b",
00237           "c",
00238           "d",
00239           "e",
00240           "f",
00241           "g",
00242           "h",
00243           "i",
00244           "j",
00245           "k",
00246           "l",
00247           "m",
00248           "n",
00249           "o",
00250           "p",
00251           "q",
00252           "r",
00253           "s",
00254           "t",
00255           "u",
00256           "v",
00257           "w",
00258           "x",
00259           "y",
00260           "z",
00261           "left-curly-bracket",
00262           "vertical-line",
00263           "right-curly-bracket",
00264           "tilde",
00265           "DEL",
00266         };
00267 
00268       string __s;
00269       for (; __first != __last; ++__first)
00270         __s += __fctyp.narrow(*__first, 0);
00271 
00272       for (const auto& __it : __collatenames)
00273         if (__s == __it)
00274           return string_type(1, __fctyp.widen(
00275             static_cast<char>(&__it - __collatenames)));
00276 
00277       // TODO Add digraph support:
00278       // http://boost.sourceforge.net/libs/regex/doc/collating_names.html
00279 
00280       return string_type();
00281     }
00282 
00283   template<typename _Ch_type>
00284   template<typename _Fwd_iter>
00285     typename regex_traits<_Ch_type>::char_class_type
00286     regex_traits<_Ch_type>::
00287     lookup_classname(_Fwd_iter __first, _Fwd_iter __last, bool __icase) const
00288     {
00289       typedef std::ctype<char_type> __ctype_type;
00290       const __ctype_type& __fctyp(use_facet<__ctype_type>(_M_locale));
00291 
00292       // Mappings from class name to class mask.
00293       static const pair<const char*, char_class_type> __classnames[] =
00294       {
00295         {"d", ctype_base::digit},
00296         {"w", {ctype_base::alnum, _RegexMask::_S_under}},
00297         {"s", ctype_base::space},
00298         {"alnum", ctype_base::alnum},
00299         {"alpha", ctype_base::alpha},
00300         {"blank", ctype_base::blank},
00301         {"cntrl", ctype_base::cntrl},
00302         {"digit", ctype_base::digit},
00303         {"graph", ctype_base::graph},
00304         {"lower", ctype_base::lower},
00305         {"print", ctype_base::print},
00306         {"punct", ctype_base::punct},
00307         {"space", ctype_base::space},
00308         {"upper", ctype_base::upper},
00309         {"xdigit", ctype_base::xdigit},
00310       };
00311 
00312       string __s;
00313       for (; __first != __last; ++__first)
00314         __s += __fctyp.narrow(__fctyp.tolower(*__first), 0);
00315 
00316       for (const auto& __it : __classnames)
00317         if (__s == __it.first)
00318           {
00319             if (__icase
00320                 && ((__it.second
00321                      & (ctype_base::lower | ctype_base::upper)) != 0))
00322               return ctype_base::alpha;
00323             return __it.second;
00324           }
00325       return 0;
00326     }
00327 
00328   template<typename _Ch_type>
00329     bool
00330     regex_traits<_Ch_type>::
00331     isctype(_Ch_type __c, char_class_type __f) const
00332     {
00333       typedef std::ctype<char_type> __ctype_type;
00334       const __ctype_type& __fctyp(use_facet<__ctype_type>(_M_locale));
00335 
00336       return __fctyp.is(__f._M_base, __c)
00337         // [[:w:]]
00338         || ((__f._M_extended & _RegexMask::_S_under)
00339             && __c == __fctyp.widen('_'));
00340     }
00341 
00342   template<typename _Ch_type>
00343     int
00344     regex_traits<_Ch_type>::
00345     value(_Ch_type __ch, int __radix) const
00346     {
00347       std::basic_istringstream<char_type> __is(string_type(1, __ch));
00348       long __v;
00349       if (__radix == 8)
00350         __is >> std::oct;
00351       else if (__radix == 16)
00352         __is >> std::hex;
00353       __is >> __v;
00354       return __is.fail() ? -1 : __v;
00355     }
00356 
00357   template<typename _Bi_iter, typename _Alloc>
00358   template<typename _Out_iter>
00359     _Out_iter match_results<_Bi_iter, _Alloc>::
00360     format(_Out_iter __out,
00361            const match_results<_Bi_iter, _Alloc>::char_type* __fmt_first,
00362            const match_results<_Bi_iter, _Alloc>::char_type* __fmt_last,
00363            match_flag_type __flags) const
00364     {
00365       __glibcxx_assert( ready() );
00366       regex_traits<char_type> __traits;
00367       typedef std::ctype<char_type> __ctype_type;
00368       const __ctype_type&
00369         __fctyp(use_facet<__ctype_type>(__traits.getloc()));
00370 
00371       auto __output = [&](size_t __idx)
00372         {
00373           auto& __sub = (*this)[__idx];
00374           if (__sub.matched)
00375             __out = std::copy(__sub.first, __sub.second, __out);
00376         };
00377 
00378       if (__flags & regex_constants::format_sed)
00379         {
00380           for (; __fmt_first != __fmt_last;)
00381             if (*__fmt_first == '&')
00382               {
00383                 __output(0);
00384                 ++__fmt_first;
00385               }
00386             else if (*__fmt_first == '\\')
00387               {
00388                 if (++__fmt_first != __fmt_last
00389                     && __fctyp.is(__ctype_type::digit, *__fmt_first))
00390                   __output(__traits.value(*__fmt_first++, 10));
00391                 else
00392                   *__out++ = '\\';
00393               }
00394             else
00395               *__out++ = *__fmt_first++;
00396         }
00397       else
00398         {
00399           while (1)
00400             {
00401               auto __next = std::find(__fmt_first, __fmt_last, '$');
00402               if (__next == __fmt_last)
00403                 break;
00404 
00405               __out = std::copy(__fmt_first, __next, __out);
00406 
00407               auto __eat = [&](char __ch) -> bool
00408                 {
00409                   if (*__next == __ch)
00410                     {
00411                       ++__next;
00412                       return true;
00413                     }
00414                   return false;
00415                 };
00416 
00417               if (++__next == __fmt_last)
00418                 *__out++ = '$';
00419               else if (__eat('$'))
00420                 *__out++ = '$';
00421               else if (__eat('&'))
00422                 __output(0);
00423               else if (__eat('`'))
00424                 {
00425                   auto& __sub = _M_prefix();
00426                   if (__sub.matched)
00427                     __out = std::copy(__sub.first, __sub.second, __out);
00428                 }
00429               else if (__eat('\''))
00430                 {
00431                   auto& __sub = _M_suffix();
00432                   if (__sub.matched)
00433                     __out = std::copy(__sub.first, __sub.second, __out);
00434                 }
00435               else if (__fctyp.is(__ctype_type::digit, *__next))
00436                 {
00437                   long __num = __traits.value(*__next, 10);
00438                   if (++__next != __fmt_last
00439                       && __fctyp.is(__ctype_type::digit, *__next))
00440                     {
00441                       __num *= 10;
00442                       __num += __traits.value(*__next++, 10);
00443                     }
00444                   if (0 <= __num && __num < this->size())
00445                     __output(__num);
00446                 }
00447               else
00448                 *__out++ = '$';
00449               __fmt_first = __next;
00450             }
00451           __out = std::copy(__fmt_first, __fmt_last, __out);
00452         }
00453       return __out;
00454     }
00455 
00456   template<typename _Out_iter, typename _Bi_iter,
00457            typename _Rx_traits, typename _Ch_type>
00458     _Out_iter
00459     regex_replace(_Out_iter __out, _Bi_iter __first, _Bi_iter __last,
00460                   const basic_regex<_Ch_type, _Rx_traits>& __e,
00461                   const _Ch_type* __fmt,
00462                   regex_constants::match_flag_type __flags)
00463     {
00464       typedef regex_iterator<_Bi_iter, _Ch_type, _Rx_traits> _IterT;
00465       _IterT __i(__first, __last, __e, __flags);
00466       _IterT __end;
00467       if (__i == __end)
00468         {
00469           if (!(__flags & regex_constants::format_no_copy))
00470             __out = std::copy(__first, __last, __out);
00471         }
00472       else
00473         {
00474           sub_match<_Bi_iter> __last;
00475           auto __len = char_traits<_Ch_type>::length(__fmt);
00476           for (; __i != __end; ++__i)
00477             {
00478               if (!(__flags & regex_constants::format_no_copy))
00479                 __out = std::copy(__i->prefix().first, __i->prefix().second,
00480                                   __out);
00481               __out = __i->format(__out, __fmt, __fmt + __len, __flags);
00482               __last = __i->suffix();
00483               if (__flags & regex_constants::format_first_only)
00484                 break;
00485             }
00486           if (!(__flags & regex_constants::format_no_copy))
00487             __out = std::copy(__last.first, __last.second, __out);
00488         }
00489       return __out;
00490     }
00491 
00492   template<typename _Bi_iter,
00493            typename _Ch_type,
00494            typename _Rx_traits>
00495     bool
00496     regex_iterator<_Bi_iter, _Ch_type, _Rx_traits>::
00497     operator==(const regex_iterator& __rhs) const
00498     {
00499       return (_M_match.empty() && __rhs._M_match.empty())
00500         || (_M_begin == __rhs._M_begin
00501             && _M_end == __rhs._M_end
00502             && _M_pregex == __rhs._M_pregex
00503             && _M_flags == __rhs._M_flags
00504             && _M_match[0] == __rhs._M_match[0]);
00505     }
00506 
00507   template<typename _Bi_iter,
00508            typename _Ch_type,
00509            typename _Rx_traits>
00510     regex_iterator<_Bi_iter, _Ch_type, _Rx_traits>&
00511     regex_iterator<_Bi_iter, _Ch_type, _Rx_traits>::
00512     operator++()
00513     {
00514       // In all cases in which the call to regex_search returns true,
00515       // match.prefix().first shall be equal to the previous value of
00516       // match[0].second, and for each index i in the half-open range
00517       // [0, match.size()) for which match[i].matched is true,
00518       // match[i].position() shall return distance(begin, match[i].first).
00519       // [28.12.1.4.5]
00520       if (_M_match[0].matched)
00521         {
00522           auto __start = _M_match[0].second;
00523           auto __prefix_first = _M_match[0].second;
00524           if (_M_match[0].first == _M_match[0].second)
00525             {
00526               if (__start == _M_end)
00527                 {
00528                   _M_match = value_type();
00529                   return *this;
00530                 }
00531               else
00532                 {
00533                   if (regex_search(__start, _M_end, _M_match, *_M_pregex,
00534                                    _M_flags
00535                                    | regex_constants::match_not_null
00536                                    | regex_constants::match_continuous))
00537                     {
00538                       __glibcxx_assert(_M_match[0].matched);
00539                       auto& __prefix = _M_match._M_prefix();
00540                       __prefix.first = __prefix_first;
00541                       __prefix.matched = __prefix.first != __prefix.second;
00542                       // [28.12.1.4.5]
00543                       _M_match._M_begin = _M_begin;
00544                       return *this;
00545                     }
00546                   else
00547                     ++__start;
00548                 }
00549             }
00550           _M_flags |= regex_constants::match_prev_avail;
00551           if (regex_search(__start, _M_end, _M_match, *_M_pregex, _M_flags))
00552             {
00553               __glibcxx_assert(_M_match[0].matched);
00554               auto& __prefix = _M_match._M_prefix();
00555               __prefix.first = __prefix_first;
00556               __prefix.matched = __prefix.first != __prefix.second;
00557               // [28.12.1.4.5]
00558               _M_match._M_begin = _M_begin;
00559             }
00560           else
00561             _M_match = value_type();
00562         }
00563       return *this;
00564     }
00565 
00566   template<typename _Bi_iter,
00567            typename _Ch_type,
00568            typename _Rx_traits>
00569     regex_token_iterator<_Bi_iter, _Ch_type, _Rx_traits>&
00570     regex_token_iterator<_Bi_iter, _Ch_type, _Rx_traits>::
00571     operator=(const regex_token_iterator& __rhs)
00572     {
00573       _M_position = __rhs._M_position;
00574       _M_subs = __rhs._M_subs;
00575       _M_n = __rhs._M_n;
00576       _M_suffix = __rhs._M_suffix;
00577       _M_has_m1 = __rhs._M_has_m1;
00578       _M_normalize_result();
00579       return *this;
00580     }
00581 
00582   template<typename _Bi_iter,
00583            typename _Ch_type,
00584            typename _Rx_traits>
00585     bool
00586     regex_token_iterator<_Bi_iter, _Ch_type, _Rx_traits>::
00587     operator==(const regex_token_iterator& __rhs) const
00588     {
00589       if (_M_end_of_seq() && __rhs._M_end_of_seq())
00590         return true;
00591       if (_M_suffix.matched && __rhs._M_suffix.matched
00592           && _M_suffix == __rhs._M_suffix)
00593         return true;
00594       if (_M_end_of_seq() || _M_suffix.matched
00595           || __rhs._M_end_of_seq() || __rhs._M_suffix.matched)
00596         return false;
00597       return _M_position == __rhs._M_position
00598         && _M_n == __rhs._M_n
00599         && _M_subs == __rhs._M_subs;
00600     }
00601 
00602   template<typename _Bi_iter,
00603            typename _Ch_type,
00604            typename _Rx_traits>
00605     regex_token_iterator<_Bi_iter, _Ch_type, _Rx_traits>&
00606     regex_token_iterator<_Bi_iter, _Ch_type, _Rx_traits>::
00607     operator++()
00608     {
00609       _Position __prev = _M_position;
00610       if (_M_suffix.matched)
00611         *this = regex_token_iterator();
00612       else if (_M_n + 1 < _M_subs.size())
00613         {
00614           _M_n++;
00615           _M_result = &_M_current_match();
00616         }
00617       else
00618         {
00619           _M_n = 0;
00620           ++_M_position;
00621           if (_M_position != _Position())
00622             _M_result = &_M_current_match();
00623           else if (_M_has_m1 && __prev->suffix().length() != 0)
00624             {
00625               _M_suffix.matched = true;
00626               _M_suffix.first = __prev->suffix().first;
00627               _M_suffix.second = __prev->suffix().second;
00628               _M_result = &_M_suffix;
00629             }
00630           else
00631             *this = regex_token_iterator();
00632         }
00633       return *this;
00634     }
00635 
00636   template<typename _Bi_iter,
00637            typename _Ch_type,
00638            typename _Rx_traits>
00639     void
00640     regex_token_iterator<_Bi_iter, _Ch_type, _Rx_traits>::
00641     _M_init(_Bi_iter __a, _Bi_iter __b)
00642     {
00643       _M_has_m1 = false;
00644       for (auto __it : _M_subs)
00645         if (__it == -1)
00646           {
00647             _M_has_m1 = true;
00648             break;
00649           }
00650       if (_M_position != _Position())
00651         _M_result = &_M_current_match();
00652       else if (_M_has_m1)
00653         {
00654           _M_suffix.matched = true;
00655           _M_suffix.first = __a;
00656           _M_suffix.second = __b;
00657           _M_result = &_M_suffix;
00658         }
00659       else
00660         _M_result = nullptr;
00661     }
00662 
00663 _GLIBCXX_END_NAMESPACE_VERSION
00664 } // namespace
00665