|  | Home | Libraries | People | FAQ | More | 
        The template class regex_token_iterator is an iterator
        adapter; that is to say it represents a new view of an existing iterator
        sequence, by enumerating all the occurrences of a regular expression within
        that sequence, and presenting one or more character sequence for each match
        found. Each position enumerated by the iterator is a sub_match object that represents
        what matched a particular sub-expression within the regular expression. When
        class regex_token_iterator is used to
        enumerate a single sub-expression with index -1, then the iterator performs
        field splitting: that is to say it enumerates one character sequence for
        each section of the character container sequence that does not match the
        regular expression specified.
      
template <class BidirectionalIterator, class charT = iterator_traits<BidirectionalIterator>::value_type, class traits = regex_traits<charT> > class regex_token_iterator { public: typedef basic_regex<charT, traits> regex_type; typedef sub_match<BidirectionalIterator> value_type; typedef typename iterator_traits<BidirectionalIterator>::difference_type difference_type; typedef const value_type* pointer; typedef const value_type& reference; typedef std::forward_iterator_tag iterator_category; regex_token_iterator(); regex_token_iterator(BidirectionalIterator a, BidirectionalIterator b, const regex_type& re, int submatch = 0, match_flag_type m = match_default); regex_token_iterator(BidirectionalIterator a, BidirectionalIterator b, const regex_type& re, const std::vector<int>& submatches, match_flag_type m = match_default); template <std::size_t N> regex_token_iterator(BidirectionalIterator a, BidirectionalIterator b, const regex_type& re, const int (&submatches)[N], match_flag_type m = match_default); regex_token_iterator(const regex_token_iterator&); regex_token_iterator& operator=(const regex_token_iterator&); bool operator==(const regex_token_iterator&)const; bool operator!=(const regex_token_iterator&)const; const value_type& operator*()const; const value_type* operator->()const; regex_token_iterator& operator++(); regex_token_iterator operator++(int); }; typedef regex_token_iterator<const char*> cregex_token_iterator; typedef regex_token_iterator<std::string::const_iterator> sregex_token_iterator; #ifndef BOOST_NO_WREGEX typedef regex_token_iterator<const wchar_t*> wcregex_token_iterator; typedef regex_token_iterator<<std::wstring::const_iterator> wsregex_token_iterator; #endif template <class charT, class traits> regex_token_iterator<const charT*, charT, traits> make_regex_token_iterator( const charT* p, const basic_regex<charT, traits>& e, int submatch = 0, regex_constants::match_flag_type m = regex_constants::match_default); template <class charT, class traits, class ST, class SA> regex_token_iterator<typename std::basic_string<charT, ST, SA>::const_iterator, charT, traits> make_regex_token_iterator( const std::basic_string<charT, ST, SA>& p, const basic_regex<charT, traits>& e, int submatch = 0, regex_constants::match_flag_type m = regex_constants::match_default); template <class charT, class traits, std::size_t N> regex_token_iterator<const charT*, charT, traits> make_regex_token_iterator( const charT* p, const basic_regex<charT, traits>& e, const int (&submatch)[N], regex_constants::match_flag_type m = regex_constants::match_default); template <class charT, class traits, class ST, class SA, std::size_t N> regex_token_iterator<typename std::basic_string<charT, ST, SA>::const_iterator, charT, traits> make_regex_token_iterator( const std::basic_string<charT, ST, SA>& p, const basic_regex<charT, traits>& e, const int (&submatch)[N], regex_constants::match_flag_type m = regex_constants::match_default); template <class charT, class traits> regex_token_iterator<const charT*, charT, traits> make_regex_token_iterator( const charT* p, const basic_regex<charT, traits>& e, const std::vector<int>& submatch, regex_constants::match_flag_type m = regex_constants::match_default); template <class charT, class traits, class ST, class SA> regex_token_iterator< typename std::basic_string<charT, ST, SA>::const_iterator, charT, traits> make_regex_token_iterator( const std::basic_string<charT, ST, SA>& p, const basic_regex<charT, traits>& e, const std::vector<int>& submatch, regex_constants::match_flag_type m = regex_constants::match_default);
regex_token_iterator();
Effects: constructs an end of sequence iterator.
regex_token_iterator(BidirectionalIterator a, BidirectionalIterator b, const regex_type& re, int submatch = 0, match_flag_type m = match_default);
        Preconditions: !re.empty(). Object re shall exist
        for the lifetime of the iterator constructed from it.
      
        Effects: constructs a regex_token_iterator that will enumerate
        one string for each regular expression match of the expression re
        found within the sequence [a,b), using match flags m
        (see match_flag_type).
        The string enumerated is the sub-expression submatch
        for each match found; if submatch is -1, then enumerates
        all the text sequences that did not match the expression re
        (that is to performs field splitting).
      
        Throws: std::runtime_error
        if the complexity of matching the expression against an N character string
        begins to exceed O(N2), or if the program runs out of stack space while matching
        the expression (if Boost.Regex is configured in recursive mode), or if the
        matcher exhausts its permitted memory allocation (if Boost.Regex is configured
        in non-recursive mode).
      
regex_token_iterator(BidirectionalIterator a, BidirectionalIterator b, const regex_type& re, const std::vector<int>& submatches, match_flag_type m = match_default);
        Preconditions: submatches.size()
        && !re.empty(). Object re shall exist
        for the lifetime of the iterator constructed from it.
      
        Effects: constructs a regex_token_iterator that will enumerate
        submatches.size()
        strings for each regular expression match of the expression re
        found within the sequence [a,b), using match flags m
        (see match_flag_type).
        For each match found one string will be enumerated for each sub-expression
        index contained within submatches vector; if submatches[0]
        is -1, then the first string enumerated for each match will be all of the
        text from end of the last match to the start of the current match, in addition
        there will be one extra string enumerated when no more matches can be found:
        from the end of the last match found, to the end of the underlying sequence.
      
        Throws: std::runtime_error
        if the complexity of matching the expression against an N character string
        begins to exceed O(N2), or if the program runs out of stack space while matching
        the expression (if Boost.Regex is configured in recursive mode), or if the
        matcher exhausts its permitted memory allocation (if Boost.Regex is configured
        in non-recursive mode).
      
template <std::size_t N> regex_token_iterator(BidirectionalIterator a, BidirectionalIterator b, const regex_type& re, const int (&submatches)[R], match_flag_type m = match_default);
        Preconditions: !re.empty(). Object re shall exist
        for the lifetime of the iterator constructed from it.
      
        Effects: constructs a regex_token_iterator that will enumerate
        R strings for each regular expression match of the expression
        re found within the sequence [a,b), using match flags
        m (see match_flag_type). For each match
        found one string will be enumerated for each sub-expression index contained
        within the submatches array; if submatches[0]
        is -1, then the first string enumerated for each match will be all of the
        text from end of the last match to the start of the current match, in addition
        there will be one extra string enumerated when no more matches can be found:
        from the end of the last match found, to the end of the underlying sequence.
      
        Throws: std::runtime_error
        if the complexity of matching the expression against an N character string
        begins to exceed O(N2), or if the program runs out of stack space while matching
        the expression (if Boost.Regex is configured in recursive mode), or if the
        matcher exhausts its permitted memory allocation (if Boost.Regex is configured
        in non-recursive mode).
      
regex_token_iterator(const regex_token_iterator& that);
        Effects: constructs a copy of that.
      
        Postconditions: *this == that.
      
regex_token_iterator& operator=(const regex_token_iterator& that);
        Effects: sets *this to be equal to that.
      
        Postconditions: *this == that.
      
bool operator==(const regex_token_iterator&)const;
        Effects: returns true if *this is the same position as that.
      
bool operator!=(const regex_token_iterator&)const;
        Effects: returns !(*this == that).
      
const value_type& operator*()const;
Effects: returns the current character sequence being enumerated.
const value_type* operator->()const;
        Effects: returns &(*this).
      
regex_token_iterator& operator++();
Effects: Moves on to the next character sequence to be enumerated.
        Throws: std::runtime_error
        if the complexity of matching the expression against an N character string
        begins to exceed O(N2), or if the program runs out of stack space while matching
        the expression (if Boost.Regex is configured in recursive mode), or if the
        matcher exhausts its permitted memory allocation (if Boost.Regex is configured
        in non-recursive mode).
      
        Returns: *this.
      
regex_token_iterator& operator++(int);
        Effects: constructs a copy result of *this, then
        calls ++(*this).
      
Returns: result.
template <class charT, class traits> regex_token_iterator<const charT*, charT, traits> make_regex_token_iterator( const charT* p, const basic_regex<charT, traits>& e, int submatch = 0, regex_constants::match_flag_type m = regex_constants::match_default); template <class charT, class traits, class ST, class SA> regex_token_iterator<typename std::basic_string<charT, ST, SA>::const_iterator, charT, traits> make_regex_token_iterator( const std::basic_string<charT, ST, SA>& p, const basic_regex<charT, traits>& e, int submatch = 0, regex_constants::match_flag_type m = regex_constants::match_default); template <class charT, class traits, std::size_t N> regex_token_iterator<const charT*, charT, traits> make_regex_token_iterator( const charT* p, const basic_regex<charT, traits>& e, const int (&submatch)[N], regex_constants::match_flag_type m = regex_constants::match_default); template <class charT, class traits, class ST, class SA, std::size_t N> regex_token_iterator< typename std::basic_string<charT, ST, SA>::const_iterator, charT, traits> make_regex_token_iterator( const std::basic_string<charT, ST, SA>& p, const basic_regex<charT, traits>& e, const int (&submatch)[N], regex_constants::match_flag_type m = regex_constants::match_default); template <class charT, class traits> regex_token_iterator<const charT*, charT, traits> make_regex_token_iterator( const charT* p, const basic_regex<charT, traits>& e, const std::vector<int>& submatch, regex_constants::match_flag_type m = regex_constants::match_default); template <class charT, class traits, class ST, class SA> regex_token_iterator< typename std::basic_string<charT, ST, SA>::const_iterator, charT, traits> make_regex_token_iterator( const std::basic_string<charT, ST, SA>& p, const basic_regex<charT, traits>& e, const std::vector<int>& submatch, regex_constants::match_flag_type m = regex_constants::match_default);
        Effects: returns a regex_token_iterator that enumerates
        one sub_match
        for each value in submatch for each occurrence of regular
        expression e in string p, matched
        using match_flag_type
        m.
      
The following example takes a string and splits it into a series of tokens:
#include <iostream> #include <boost/regex.hpp> using namespace std; int main(int argc) { string s; do{ if(argc == 1) { cout << "Enter text to split (or \"quit\" to exit): "; getline(cin, s); if(s == "quit") break; } else s = "This is a string of tokens"; boost::regex re("\\s+"); boost::sregex_token_iterator i(s.begin(), s.end(), re, -1); boost::sregex_token_iterator j; unsigned count = 0; while(i != j) { cout << *i++ << endl; count++; } cout << "There were " << count << " tokens found." << endl; }while(argc == 1); return 0; }
The following example takes a html file and outputs a list of all the linked files:
#include <fstream> #include <iostream> #include <iterator> #include <boost/regex.hpp> boost::regex e("<\\s*A\\s+[^>]*href\\s*=\\s*\"([^\"]*)\"", boost::regex::normal | boost::regbase::icase); void load_file(std::string& s, std::istream& is) { s.erase(); // // attempt to grow string buffer to match file size, // this doesn't always work... s.reserve(is.rdbuf()->in_avail()); char c; while(is.get(c)) { // use logarithmic growth strategy, in case // in_avail (above) returned zero: if(s.capacity() == s.size()) s.reserve(s.capacity() * 3); s.append(1, c); } } int main(int argc, char** argv) { std::string s; int i; for(i = 1; i < argc; ++i) { std::cout << "Findings URL's in " << argv[i] << ":" << std::endl; s.erase(); std::ifstream is(argv[i]); load_file(s, is); boost::sregex_token_iterator i(s.begin(), s.end(), e, 1); boost::sregex_token_iterator j; while(i != j) { std::cout << *i++ << std::endl; } } // // alternative method: // test the array-literal constructor, and split out the whole // match as well as $1.... // for(i = 1; i < argc; ++i) { std::cout << "Findings URL's in " << argv[i] << ":" << std::endl; s.erase(); std::ifstream is(argv[i]); load_file(s, is); const int subs[] = {1, 0,}; boost::sregex_token_iterator i(s.begin(), s.end(), e, subs); boost::sregex_token_iterator j; while(i != j) { std::cout << *i++ << std::endl; } } return 0; }