1 /* 2 ********************************************************************** 3 * Copyright (C) 2001-2011 IBM and others. All rights reserved. 4 ********************************************************************** 5 * Date Name Description 6 * 08/13/2001 synwee Creation. 7 ********************************************************************** 8 */ 9 #ifndef USRCHIMP_H 10 #define USRCHIMP_H 11 12 #include "unicode/utypes.h" 13 14 #if !UCONFIG_NO_COLLATION 15 16 #include "unicode/normalizer2.h" 17 #include "unicode/ucol.h" 18 #include "unicode/ucoleitr.h" 19 #include "unicode/ubrk.h" 20 21 #define INITIAL_ARRAY_SIZE_ 256 22 #define MAX_TABLE_SIZE_ 257 23 24 struct USearch { 25 // required since collation element iterator does not have a getText API 26 const UChar *text; 27 int32_t textLength; // exact length 28 UBool isOverlap; 29 UBool isCanonicalMatch; 30 int16_t elementComparisonType; 31 UBreakIterator *internalBreakIter; //internal character breakiterator 32 UBreakIterator *breakIter; 33 // value USEARCH_DONE is the default value 34 // if we are not at the start of the text or the end of the text, 35 // depending on the iteration direction and matchedIndex is USEARCH_DONE 36 // it means that we can't find any more matches in that particular direction 37 int32_t matchedIndex; 38 int32_t matchedLength; 39 UBool isForwardSearching; 40 UBool reset; 41 }; 42 43 struct UPattern { 44 const UChar *text; 45 int32_t textLength; // exact length 46 // length required for backwards ce comparison 47 int32_t CELength; 48 int32_t *CE; 49 int32_t CEBuffer[INITIAL_ARRAY_SIZE_]; 50 int32_t PCELength; 51 int64_t *PCE; 52 int64_t PCEBuffer[INITIAL_ARRAY_SIZE_]; 53 UBool hasPrefixAccents; 54 UBool hasSuffixAccents; 55 int16_t defaultShiftSize; 56 int16_t shift[MAX_TABLE_SIZE_]; 57 int16_t backShift[MAX_TABLE_SIZE_]; 58 }; 59 60 struct UStringSearch { 61 struct USearch *search; 62 struct UPattern pattern; 63 const UCollator *collator; 64 const icu::Normalizer2 *nfd; 65 // positions within the collation element iterator is used to determine 66 // if we are at the start of the text. 67 UCollationElements *textIter; 68 // utility collation element, used throughout program for temporary 69 // iteration. 70 UCollationElements *utilIter; 71 UBool ownCollator; 72 UCollationStrength strength; 73 uint32_t ceMask; 74 uint32_t variableTop; 75 UBool toShift; 76 UChar canonicalPrefixAccents[INITIAL_ARRAY_SIZE_]; 77 UChar canonicalSuffixAccents[INITIAL_ARRAY_SIZE_]; 78 }; 79 80 /** 81 * Exact matches without checking for the ends for extra accents. 82 * The match after the position within the collation element iterator is to be 83 * found. 84 * After a match is found the offset in the collation element iterator will be 85 * shifted to the start of the match. 86 * Implementation note: 87 * For tertiary we can't use the collator->tertiaryMask, that is a 88 * preprocessed mask that takes into account case options. since we are only 89 * concerned with exact matches, we don't need that. 90 * Alternate handling - since only the 16 most significant digits is only used, 91 * we can safely do a compare without masking if the ce is a variable, we mask 92 * and get only the primary values no shifting to quartenary is required since 93 * all primary values less than variabletop will need to be masked off anyway. 94 * If the end character is composite and the pattern ce does not match the text 95 * ce, we skip it until we find a match in the end composite character or when 96 * it has passed the character. This is so that we can match pattern "a" with 97 * the text "\u00e6" 98 * @param strsrch string search data 99 * @param status error status if any 100 * @return TRUE if an exact match is found, FALSE otherwise 101 */ 102 U_CFUNC 103 UBool usearch_handleNextExact(UStringSearch *strsrch, UErrorCode *status); 104 105 /** 106 * Canonical matches. 107 * According to the definition, matches found here will include the whole span 108 * of beginning and ending accents if it overlaps that region. 109 * @param strsrch string search data 110 * @param status error status if any 111 * @return TRUE if a canonical match is found, FALSE otherwise 112 */ 113 U_CFUNC 114 UBool usearch_handleNextCanonical(UStringSearch *strsrch, UErrorCode *status); 115 116 /** 117 * Gets the previous match. 118 * Comments follows from handleNextExact 119 * @param strsrch string search data 120 * @param status error status if any 121 * @return True if a exact math is found, FALSE otherwise. 122 */ 123 U_CFUNC 124 UBool usearch_handlePreviousExact(UStringSearch *strsrch, UErrorCode *status); 125 126 /** 127 * Canonical matches. 128 * According to the definition, matches found here will include the whole span 129 * of beginning and ending accents if it overlaps that region. 130 * @param strsrch string search data 131 * @param status error status if any 132 * @return TRUE if a canonical match is found, FALSE otherwise 133 */ 134 U_CFUNC 135 UBool usearch_handlePreviousCanonical(UStringSearch *strsrch, 136 UErrorCode *status); 137 138 #endif /* #if !UCONFIG_NO_COLLATION */ 139 140 #endif 141