1 /* 2 ********************************************************************** 3 * Copyright (C) 2001-2007 IBM and others. All rights reserved. 4 ********************************************************************** 5 * Date Name Description 6 * 08/13/2001 synwee Creation. 7 ********************************************************************** 8 */ 9 #ifndef USRCHIMP_H 10 #define USRCHIMP_H 11 12 #include "unicode/utypes.h" 13 14 #if !UCONFIG_NO_COLLATION 15 16 #include "unicode/ucol.h" 17 #include "unicode/ucoleitr.h" 18 #include "unicode/ubrk.h" 19 20 #define INITIAL_ARRAY_SIZE_ 256 21 #define MAX_TABLE_SIZE_ 257 22 23 struct USearch { 24 // required since collation element iterator does not have a getText API 25 const UChar *text; 26 int32_t textLength; // exact length 27 UBool isOverlap; 28 UBool isCanonicalMatch; 29 UBreakIterator *internalBreakIter; //internal character breakiterator 30 UBreakIterator *breakIter; 31 // value USEARCH_DONE is the default value 32 // if we are not at the start of the text or the end of the text, 33 // depending on the iteration direction and matchedIndex is USEARCH_DONE 34 // it means that we can find any more matches in that particular direction 35 int32_t matchedIndex; 36 int32_t matchedLength; 37 UBool isForwardSearching; 38 UBool reset; 39 }; 40 41 struct UPattern { 42 const UChar *text; 43 int32_t textLength; // exact length 44 // length required for backwards ce comparison 45 int32_t CELength; 46 int32_t *CE; 47 int32_t CEBuffer[INITIAL_ARRAY_SIZE_]; 48 UBool hasPrefixAccents; 49 UBool hasSuffixAccents; 50 int16_t defaultShiftSize; 51 int16_t shift[MAX_TABLE_SIZE_]; 52 int16_t backShift[MAX_TABLE_SIZE_]; 53 }; 54 55 struct UStringSearch { 56 struct USearch *search; 57 struct UPattern pattern; 58 const UCollator *collator; 59 // positions within the collation element iterator is used to determine 60 // if we are at the start of the text. 61 UCollationElements *textIter; 62 // utility collation element, used throughout program for temporary 63 // iteration. 64 UCollationElements *utilIter; 65 UBool ownCollator; 66 UCollationStrength strength; 67 uint32_t ceMask; 68 uint32_t variableTop; 69 UBool toShift; 70 UChar canonicalPrefixAccents[INITIAL_ARRAY_SIZE_]; 71 UChar canonicalSuffixAccents[INITIAL_ARRAY_SIZE_]; 72 }; 73 74 /** 75 * Exact matches without checking for the ends for extra accents. 76 * The match after the position within the collation element iterator is to be 77 * found. 78 * After a match is found the offset in the collation element iterator will be 79 * shifted to the start of the match. 80 * Implementation note: 81 * For tertiary we can't use the collator->tertiaryMask, that is a 82 * preprocessed mask that takes into account case options. since we are only 83 * concerned with exact matches, we don't need that. 84 * Alternate handling - since only the 16 most significant digits is only used, 85 * we can safely do a compare without masking if the ce is a variable, we mask 86 * and get only the primary values no shifting to quartenary is required since 87 * all primary values less than variabletop will need to be masked off anyway. 88 * If the end character is composite and the pattern ce does not match the text 89 * ce, we skip it until we find a match in the end composite character or when 90 * it has passed the character. This is so that we can match pattern "a" with 91 * the text "\u00e6" 92 * @param strsrch string search data 93 * @param status error status if any 94 * @return TRUE if an exact match is found, FALSE otherwise 95 */ 96 U_CFUNC 97 UBool usearch_handleNextExact(UStringSearch *strsrch, UErrorCode *status); 98 99 /** 100 * Canonical matches. 101 * According to the definition, matches found here will include the whole span 102 * of beginning and ending accents if it overlaps that region. 103 * @param strsrch string search data 104 * @param status error status if any 105 * @return TRUE if a canonical match is found, FALSE otherwise 106 */ 107 U_CFUNC 108 UBool usearch_handleNextCanonical(UStringSearch *strsrch, UErrorCode *status); 109 110 /** 111 * Gets the previous match. 112 * Comments follows from handleNextExact 113 * @param strsrch string search data 114 * @param status error status if any 115 * @return True if a exact math is found, FALSE otherwise. 116 */ 117 U_CFUNC 118 UBool usearch_handlePreviousExact(UStringSearch *strsrch, UErrorCode *status); 119 120 /** 121 * Canonical matches. 122 * According to the definition, matches found here will include the whole span 123 * of beginning and ending accents if it overlaps that region. 124 * @param strsrch string search data 125 * @param status error status if any 126 * @return TRUE if a canonical match is found, FALSE otherwise 127 */ 128 U_CFUNC 129 UBool usearch_handlePreviousCanonical(UStringSearch *strsrch, 130 UErrorCode *status); 131 132 #endif /* #if !UCONFIG_NO_COLLATION */ 133 134 #endif 135