1 /*************************************************************************
2 * © 2016 and later: Unicode, Inc. and others.
3 * License & terms of use: http://www.unicode.org/copyright.html
4 *
5 *************************************************************************
6 *************************************************************************
7 * COPYRIGHT:
8 * Copyright (C) 2002-2006 IBM, Inc. All Rights Reserved.
9 *
10 *************************************************************************/
11
12 /**
13 * This program demos string collation
14 */
15
16 const char gHelpString[] =
17 "usage: strsrch [options*] -source source_string -pattern pattern_string\n"
18 "-help Display this message.\n"
19 "-locale name ICU locale to use. Default is en_US\n"
20 "-rules rule Collation rules file (overrides locale)\n"
21 "-french French accent ordering\n"
22 "-norm Normalizing mode on\n"
23 "-shifted Shifted mode\n"
24 "-lower Lower case first\n"
25 "-upper Upper case first\n"
26 "-case Enable separate case level\n"
27 "-level n Sort level, 1 to 5, for Primary, Secndary, Tertiary, Quaternary, Identical\n"
28 "-source string Source string\n"
29 "-pattern string Pattern string to look for in source\n"
30 "-overlap Enable searching to be done on overlapping patterns\n"
31 "-canonical Enable searching to be done matching canonical equivalent patterns"
32 "Example strsrch -rules \\u0026b\\u003ca -source a\\u0020b\\u0020bc -pattern b\n"
33 "The format \\uXXXX is supported for the rules and comparison strings\n"
34 ;
35
36 #include <stdio.h>
37 #include <string.h>
38 #include <stdlib.h>
39
40 #include <unicode/utypes.h>
41 #include <unicode/ucol.h>
42 #include <unicode/usearch.h>
43 #include <unicode/ustring.h>
44
45 /**
46 * Command line option variables
47 * These global variables are set according to the options specified
48 * on the command line by the user.
49 */
50 char * opt_locale = "en_US";
51 char * opt_rules = 0;
52 UBool opt_help = false;
53 UBool opt_norm = false;
54 UBool opt_french = false;
55 UBool opt_shifted = false;
56 UBool opt_lower = false;
57 UBool opt_upper = false;
58 UBool opt_case = false;
59 UBool opt_overlap = false;
60 UBool opt_canonical = false;
61 int opt_level = 0;
62 char * opt_source = "International Components for Unicode";
63 char * opt_pattern = "Unicode";
64 UCollator * collator = 0;
65 UStringSearch * search = 0;
66 UChar rules[100];
67 UChar source[100];
68 UChar pattern[100];
69
70 /**
71 * Definitions for the command line options
72 */
73 struct OptSpec {
74 const char *name;
75 enum {FLAG, NUM, STRING} type;
76 void *pVar;
77 };
78
79 OptSpec opts[] = {
80 {"-locale", OptSpec::STRING, &opt_locale},
81 {"-rules", OptSpec::STRING, &opt_rules},
82 {"-source", OptSpec::STRING, &opt_source},
83 {"-pattern", OptSpec::STRING, &opt_pattern},
84 {"-norm", OptSpec::FLAG, &opt_norm},
85 {"-french", OptSpec::FLAG, &opt_french},
86 {"-shifted", OptSpec::FLAG, &opt_shifted},
87 {"-lower", OptSpec::FLAG, &opt_lower},
88 {"-upper", OptSpec::FLAG, &opt_upper},
89 {"-case", OptSpec::FLAG, &opt_case},
90 {"-level", OptSpec::NUM, &opt_level},
91 {"-overlap", OptSpec::FLAG, &opt_overlap},
92 {"-canonical", OptSpec::FLAG, &opt_canonical},
93 {"-help", OptSpec::FLAG, &opt_help},
94 {"-?", OptSpec::FLAG, &opt_help},
95 {0, OptSpec::FLAG, 0}
96 };
97
98 /**
99 * processOptions() Function to read the command line options.
100 */
processOptions(int argc,const char ** argv,OptSpec opts[])101 UBool processOptions(int argc, const char **argv, OptSpec opts[])
102 {
103 for (int argNum = 1; argNum < argc; argNum ++) {
104 const char *pArgName = argv[argNum];
105 OptSpec *pOpt;
106 for (pOpt = opts; pOpt->name != 0; pOpt ++) {
107 if (strcmp(pOpt->name, pArgName) == 0) {
108 switch (pOpt->type) {
109 case OptSpec::FLAG:
110 *(UBool *)(pOpt->pVar) = true;
111 break;
112 case OptSpec::STRING:
113 argNum ++;
114 if (argNum >= argc) {
115 fprintf(stderr, "value expected for \"%s\" option.\n",
116 pOpt->name);
117 return false;
118 }
119 *(const char **)(pOpt->pVar) = argv[argNum];
120 break;
121 case OptSpec::NUM:
122 argNum ++;
123 if (argNum >= argc) {
124 fprintf(stderr, "value expected for \"%s\" option.\n",
125 pOpt->name);
126 return false;
127 }
128 char *endp;
129 int i = strtol(argv[argNum], &endp, 0);
130 if (endp == argv[argNum]) {
131 fprintf(stderr,
132 "integer value expected for \"%s\" option.\n",
133 pOpt->name);
134 return false;
135 }
136 *(int *)(pOpt->pVar) = i;
137 }
138 break;
139 }
140 }
141 if (pOpt->name == 0)
142 {
143 fprintf(stderr, "Unrecognized option \"%s\"\n", pArgName);
144 return false;
145 }
146 }
147 return true;
148 }
149
150 /**
151 * Creates a collator
152 */
processCollator()153 UBool processCollator()
154 {
155 // Set up an ICU collator
156 UErrorCode status = U_ZERO_ERROR;
157
158 if (opt_rules != 0) {
159 u_unescape(opt_rules, rules, 100);
160 collator = ucol_openRules(rules, -1, UCOL_OFF, UCOL_TERTIARY,
161 NULL, &status);
162 }
163 else {
164 collator = ucol_open(opt_locale, &status);
165 }
166 if (U_FAILURE(status)) {
167 fprintf(stderr, "Collator creation failed.: %d\n", status);
168 return false;
169 }
170 if (status == U_USING_DEFAULT_WARNING) {
171 fprintf(stderr, "Warning, U_USING_DEFAULT_WARNING for %s\n",
172 opt_locale);
173 }
174 if (status == U_USING_FALLBACK_WARNING) {
175 fprintf(stderr, "Warning, U_USING_FALLBACK_ERROR for %s\n",
176 opt_locale);
177 }
178 if (opt_norm) {
179 ucol_setAttribute(collator, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
180 }
181 if (opt_french) {
182 ucol_setAttribute(collator, UCOL_FRENCH_COLLATION, UCOL_ON, &status);
183 }
184 if (opt_lower) {
185 ucol_setAttribute(collator, UCOL_CASE_FIRST, UCOL_LOWER_FIRST,
186 &status);
187 }
188 if (opt_upper) {
189 ucol_setAttribute(collator, UCOL_CASE_FIRST, UCOL_UPPER_FIRST,
190 &status);
191 }
192 if (opt_case) {
193 ucol_setAttribute(collator, UCOL_CASE_LEVEL, UCOL_ON, &status);
194 }
195 if (opt_shifted) {
196 ucol_setAttribute(collator, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED,
197 &status);
198 }
199 if (opt_level != 0) {
200 switch (opt_level) {
201 case 1:
202 ucol_setAttribute(collator, UCOL_STRENGTH, UCOL_PRIMARY, &status);
203 break;
204 case 2:
205 ucol_setAttribute(collator, UCOL_STRENGTH, UCOL_SECONDARY,
206 &status);
207 break;
208 case 3:
209 ucol_setAttribute(collator, UCOL_STRENGTH, UCOL_TERTIARY, &status);
210 break;
211 case 4:
212 ucol_setAttribute(collator, UCOL_STRENGTH, UCOL_QUATERNARY,
213 &status);
214 break;
215 case 5:
216 ucol_setAttribute(collator, UCOL_STRENGTH, UCOL_IDENTICAL,
217 &status);
218 break;
219 default:
220 fprintf(stderr, "-level param must be between 1 and 5\n");
221 return false;
222 }
223 }
224 if (U_FAILURE(status)) {
225 fprintf(stderr, "Collator attribute setting failed.: %d\n", status);
226 return false;
227 }
228 return true;
229 }
230
231 /**
232 * Creates a string search
233 */
processStringSearch()234 UBool processStringSearch()
235 {
236 u_unescape(opt_source, source, 100);
237 u_unescape(opt_pattern, pattern, 100);
238 UErrorCode status = U_ZERO_ERROR;
239 search = usearch_openFromCollator(pattern, -1, source, -1, collator, NULL,
240 &status);
241 if (U_FAILURE(status)) {
242 return false;
243 }
244 if (static_cast<bool>(opt_overlap)) {
245 usearch_setAttribute(search, USEARCH_OVERLAP, USEARCH_ON, &status);
246 }
247 if (static_cast<bool>(opt_canonical)) {
248 usearch_setAttribute(search, USEARCH_CANONICAL_MATCH, USEARCH_ON,
249 &status);
250 }
251 if (U_FAILURE(status)) {
252 fprintf(stderr, "Error setting search attributes\n");
253 return false;
254 }
255 return true;
256 }
257
findPattern()258 UBool findPattern()
259 {
260 UErrorCode status = U_ZERO_ERROR;
261 int32_t offset = usearch_next(search, &status);
262 if (offset == USEARCH_DONE) {
263 fprintf(stdout, "Pattern not found in source\n");
264 }
265 while (offset != USEARCH_DONE) {
266 fprintf(stdout, "Pattern found at offset %d size %d\n", offset,
267 usearch_getMatchedLength(search));
268 offset = usearch_next(search, &status);
269 }
270 if (U_FAILURE(status)) {
271 fprintf(stderr, "Error in searching for pattern %d\n", status);
272 return false;
273 }
274 fprintf(stdout, "End of search\n");
275 return true;
276 }
277
278 /**
279 * Main -- process command line, read in and pre-process the test file,
280 * call other functions to do the actual tests.
281 */
main(int argc,const char ** argv)282 int main(int argc, const char** argv)
283 {
284 if (!static_cast<bool>(processOptions(argc, argv, opts)) || static_cast<bool>(opt_help)) {
285 printf(gHelpString);
286 return -1;
287 }
288
289 if (!static_cast<bool>(processCollator())) {
290 fprintf(stderr, "Error creating collator\n");
291 return -1;
292 }
293
294 if (!static_cast<bool>(processStringSearch())) {
295 fprintf(stderr, "Error creating string search\n");
296 return -1;
297 }
298
299 fprintf(stdout, "Finding pattern %s in source %s\n", opt_pattern,
300 opt_source);
301
302 findPattern();
303 ucol_close(collator);
304 usearch_close(search);
305 return 0;
306 }
307