• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2 **********************************************************************
3 *   Copyright (C) 2001-2007 IBM and others. All rights reserved.
4 **********************************************************************
5 *   Date        Name        Description
6 *  08/13/2001   synwee      Creation.
7 **********************************************************************
8 */
9 #ifndef USRCHIMP_H
10 #define USRCHIMP_H
11 
12 #include "unicode/utypes.h"
13 
14 #if !UCONFIG_NO_COLLATION
15 
16 #include "unicode/ucol.h"
17 #include "unicode/ucoleitr.h"
18 #include "unicode/ubrk.h"
19 
20 #define INITIAL_ARRAY_SIZE_       256
21 #define MAX_TABLE_SIZE_           257
22 
23 struct USearch {
24     // required since collation element iterator does not have a getText API
25     const UChar              *text;
26           int32_t             textLength; // exact length
27           UBool               isOverlap;
28           UBool               isCanonicalMatch;
29           UBreakIterator	 *internalBreakIter;  //internal character breakiterator
30           UBreakIterator     *breakIter;
31     // value USEARCH_DONE is the default value
32     // if we are not at the start of the text or the end of the text,
33     // depending on the iteration direction and matchedIndex is USEARCH_DONE
34     // it means that we can find any more matches in that particular direction
35           int32_t         matchedIndex;
36           int32_t             matchedLength;
37           UBool               isForwardSearching;
38           UBool               reset;
39 };
40 
41 struct UPattern {
42     const UChar              *text;
43           int32_t             textLength; // exact length
44           // length required for backwards ce comparison
45           int32_t             CELength;
46           int32_t            *CE;
47           int32_t             CEBuffer[INITIAL_ARRAY_SIZE_];
48           UBool               hasPrefixAccents;
49           UBool               hasSuffixAccents;
50           int16_t             defaultShiftSize;
51           int16_t             shift[MAX_TABLE_SIZE_];
52           int16_t             backShift[MAX_TABLE_SIZE_];
53 };
54 
55 struct UStringSearch {
56     struct USearch            *search;
57     struct UPattern            pattern;
58     const  UCollator          *collator;
59     // positions within the collation element iterator is used to determine
60     // if we are at the start of the text.
61            UCollationElements *textIter;
62     // utility collation element, used throughout program for temporary
63     // iteration.
64            UCollationElements *utilIter;
65            UBool               ownCollator;
66            UCollationStrength  strength;
67            uint32_t            ceMask;
68            uint32_t            variableTop;
69            UBool               toShift;
70            UChar               canonicalPrefixAccents[INITIAL_ARRAY_SIZE_];
71            UChar               canonicalSuffixAccents[INITIAL_ARRAY_SIZE_];
72 };
73 
74 /**
75 * Exact matches without checking for the ends for extra accents.
76 * The match after the position within the collation element iterator is to be
77 * found.
78 * After a match is found the offset in the collation element iterator will be
79 * shifted to the start of the match.
80 * Implementation note:
81 * For tertiary we can't use the collator->tertiaryMask, that is a
82 * preprocessed mask that takes into account case options. since we are only
83 * concerned with exact matches, we don't need that.
84 * Alternate handling - since only the 16 most significant digits is only used,
85 * we can safely do a compare without masking if the ce is a variable, we mask
86 * and get only the primary values no shifting to quartenary is required since
87 * all primary values less than variabletop will need to be masked off anyway.
88 * If the end character is composite and the pattern ce does not match the text
89 * ce, we skip it until we find a match in the end composite character or when
90 * it has passed the character. This is so that we can match pattern "a" with
91 * the text "\u00e6"
92 * @param strsrch string search data
93 * @param status error status if any
94 * @return TRUE if an exact match is found, FALSE otherwise
95 */
96 U_CFUNC
97 UBool usearch_handleNextExact(UStringSearch *strsrch, UErrorCode *status);
98 
99 /**
100 * Canonical matches.
101 * According to the definition, matches found here will include the whole span
102 * of beginning and ending accents if it overlaps that region.
103 * @param strsrch string search data
104 * @param status error status if any
105 * @return TRUE if a canonical match is found, FALSE otherwise
106 */
107 U_CFUNC
108 UBool usearch_handleNextCanonical(UStringSearch *strsrch, UErrorCode *status);
109 
110 /**
111 * Gets the previous match.
112 * Comments follows from handleNextExact
113 * @param strsrch string search data
114 * @param status error status if any
115 * @return True if a exact math is found, FALSE otherwise.
116 */
117 U_CFUNC
118 UBool usearch_handlePreviousExact(UStringSearch *strsrch, UErrorCode *status);
119 
120 /**
121 * Canonical matches.
122 * According to the definition, matches found here will include the whole span
123 * of beginning and ending accents if it overlaps that region.
124 * @param strsrch string search data
125 * @param status error status if any
126 * @return TRUE if a canonical match is found, FALSE otherwise
127 */
128 U_CFUNC
129 UBool usearch_handlePreviousCanonical(UStringSearch *strsrch,
130                                       UErrorCode    *status);
131 
132 #endif /* #if !UCONFIG_NO_COLLATION */
133 
134 #endif
135