• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  *
3  * Copyright (c) 2004
4  * John Maddock
5  *
6  * Use, modification and distribution are subject to the
7  * Boost Software License, Version 1.0. (See accompanying file
8  * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
9  *
10  */
11 
12  /*
13   *   LOCATION:    see http://www.boost.org for most recent version.
14   *   FILE         mfc_example.cpp
15   *   VERSION      see <boost/version.hpp>
16   *   DESCRIPTION: examples of using Boost.Regex with MFC and ATL string types.
17   */
18 
19 #include <boost/regex/config.hpp>
20 
21 #ifdef BOOST_HAS_ICU
22 
23 #include <boost/regex/icu.hpp>
24 #include <iostream>
25 #include <assert.h>
26 
27 //
28 // Find out if *password* meets our password requirements,
29 // as defined by the regular expression *requirements*.
30 //
is_valid_password(const U_NAMESPACE_QUALIFIER UnicodeString & password,const U_NAMESPACE_QUALIFIER UnicodeString & requirements)31 bool is_valid_password(const U_NAMESPACE_QUALIFIER UnicodeString& password, const U_NAMESPACE_QUALIFIER UnicodeString& requirements)
32 {
33    return boost::u32regex_match(password, boost::make_u32regex(requirements));
34 }
35 
36 //
37 // Extract filename part of a path from a UTF-8 encoded std::string and return the result
38 // as another std::string:
39 //
get_filename(const std::string & path)40 std::string get_filename(const std::string& path)
41 {
42    boost::u32regex r = boost::make_u32regex("(?:\\A|.*\\\\)([^\\\\]+)");
43    boost::smatch what;
44    if(boost::u32regex_match(path, what, r))
45    {
46       // extract $1 as a std::string:
47       return what.str(1);
48    }
49    else
50    {
51       throw std::runtime_error("Invalid pathname");
52    }
53 }
54 
extract_greek(const U_NAMESPACE_QUALIFIER UnicodeString & text)55 U_NAMESPACE_QUALIFIER UnicodeString extract_greek(const U_NAMESPACE_QUALIFIER UnicodeString& text)
56 {
57    // searches through some UTF-16 encoded text for a block encoded in Greek,
58    // this expression is imperfect, but the best we can do for now - searching
59    // for specific scripts is actually pretty hard to do right.
60    boost::u32regex r = boost::make_u32regex(L"[\\x{370}-\\x{3FF}](?:[^[:L*:]]|[\\x{370}-\\x{3FF}])*");
61    boost::u16match what;
62    if(boost::u32regex_search(text, what, r))
63    {
64       // extract $0 as a UnicodeString:
65       return U_NAMESPACE_QUALIFIER UnicodeString(what[0].first, what.length(0));
66    }
67    else
68    {
69       throw std::runtime_error("No Greek found!");
70    }
71 }
72 
enumerate_currencies(const std::string & text)73 void enumerate_currencies(const std::string& text)
74 {
75    // enumerate and print all the currency symbols, along
76    // with any associated numeric values:
77    const char* re =
78       "([[:Sc:]][[:Cf:][:Cc:][:Z*:]]*)?"
79       "([[:Nd:]]+(?:[[:Po:]][[:Nd:]]+)?)?"
80       "(?(1)"
81          "|(?(2)"
82             "[[:Cf:][:Cc:][:Z*:]]*"
83          ")"
84          "[[:Sc:]]"
85       ")";
86    boost::u32regex r = boost::make_u32regex(re);
87    boost::u32regex_iterator<std::string::const_iterator> i(boost::make_u32regex_iterator(text, r)), j;
88    while(i != j)
89    {
90       std::cout << (*i)[0] << std::endl;
91       ++i;
92    }
93 }
94 
enumerate_currencies2(const std::string & text)95 void enumerate_currencies2(const std::string& text)
96 {
97    // enumerate and print all the currency symbols, along
98    // with any associated numeric values:
99    const char* re =
100       "([[:Sc:]][[:Cf:][:Cc:][:Z*:]]*)?"
101       "([[:Nd:]]+(?:[[:Po:]][[:Nd:]]+)?)?"
102       "(?(1)"
103          "|(?(2)"
104             "[[:Cf:][:Cc:][:Z*:]]*"
105          ")"
106          "[[:Sc:]]"
107       ")";
108    boost::u32regex r = boost::make_u32regex(re);
109    boost::u32regex_token_iterator<std::string::const_iterator>
110       i(boost::make_u32regex_token_iterator(text, r, 1)), j;
111    while(i != j)
112    {
113       std::cout << *i << std::endl;
114       ++i;
115    }
116 }
117 
118 
119 //
120 // Take a credit card number as a string of digits,
121 // and reformat it as a human readable string with "-"
122 // separating each group of four digit;,
123 // note that we're mixing a UTF-32 regex, with a UTF-16
124 // string and a UTF-8 format specifier, and it still all
125 // just works:
126 //
127 const boost::u32regex e = boost::make_u32regex("\\A(\\d{3,4})[- ]?(\\d{4})[- ]?(\\d{4})[- ]?(\\d{4})\\z");
128 const char* human_format = "$1-$2-$3-$4";
129 
human_readable_card_number(const U_NAMESPACE_QUALIFIER UnicodeString & s)130 U_NAMESPACE_QUALIFIER UnicodeString human_readable_card_number(const U_NAMESPACE_QUALIFIER UnicodeString& s)
131 {
132    return boost::u32regex_replace(s, e, human_format);
133 }
134 
135 
main()136 int main()
137 {
138    // password checks using u32regex_match:
139    U_NAMESPACE_QUALIFIER UnicodeString pwd = "abcDEF---";
140    U_NAMESPACE_QUALIFIER UnicodeString pwd_check = "(?=.*[[:lower:]])(?=.*[[:upper:]])(?=.*[[:punct:]]).{6,}";
141    bool b = is_valid_password(pwd, pwd_check);
142    assert(b);
143    pwd = "abcD-";
144    b = is_valid_password(pwd, pwd_check);
145    assert(!b);
146    // filename extraction with u32regex_match:
147    std::string file = "abc.hpp";
148    file = get_filename(file);
149    assert(file == "abc.hpp");
150    file = "c:\\a\\b\\c\\d.h";
151    file = get_filename(file);
152    assert(file == "d.h");
153 
154    // Greek text extraction with u32regex_search:
155    const UChar t[] = {
156       'S', 'o', 'm', 'e', ' ', 'w', 'h', 'e', 'r', 'e', ' ', 'i', 'n', 0x0391, 0x039D, 0x0395, 0x0398, 0x0391, 0
157    };
158    const UChar g[] = {
159       0x0391, 0x039D, 0x0395, 0x0398, 0x0391, 0
160    };
161    U_NAMESPACE_QUALIFIER UnicodeString text = t;
162    U_NAMESPACE_QUALIFIER UnicodeString greek = extract_greek(text);
163    assert(greek == g);
164 
165    // extract currency symbols with associated value, use iterator interface:
166    std::string text2 = " $100.23 or \xC2\xA3""198.12 "; // \xC2\xA3 is the pound sign encoded in UTF-8
167    enumerate_currencies(text2);
168    enumerate_currencies2(text2);
169 
170    U_NAMESPACE_QUALIFIER UnicodeString credit_card_number = "1234567887654321";
171    credit_card_number = human_readable_card_number(credit_card_number);
172    assert(credit_card_number == "1234-5678-8765-4321");
173    return 0;
174 }
175 
176 #else
177 
178 #include <iostream>
179 
main()180 int main()
181 {
182    std::cout << "<NOTE>ICU support not enabled, feature unavailable</NOTE>";
183    return 0;
184 }
185 
186 
187 #endif
188 
189