1 // Copyright (C) 2016 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html 3 /* 4 ********************************************************************** 5 * Copyright (C) 1999-2015, International Business Machines 6 * Corporation and others. All Rights Reserved. 7 ********************************************************************** 8 * file name: ustr_imp.h 9 * encoding: US-ASCII 10 * tab size: 8 (not used) 11 * indentation:4 12 * 13 * created on: 2001jan30 14 * created by: Markus W. Scherer 15 */ 16 17 #ifndef __USTR_IMP_H__ 18 #define __USTR_IMP_H__ 19 20 #include "unicode/utypes.h" 21 #include "unicode/uiter.h" 22 #include "ucase.h" 23 24 /** Simple declaration to avoid including unicode/ubrk.h. */ 25 #ifndef UBRK_TYPEDEF_UBREAK_ITERATOR 26 # define UBRK_TYPEDEF_UBREAK_ITERATOR 27 typedef struct UBreakIterator UBreakIterator; 28 #endif 29 30 #ifndef U_COMPARE_IGNORE_CASE 31 /* see also unorm.h */ 32 /** 33 * Option bit for unorm_compare: 34 * Perform case-insensitive comparison. 35 */ 36 #define U_COMPARE_IGNORE_CASE 0x10000 37 #endif 38 39 /** 40 * Internal option for unorm_cmpEquivFold() for strncmp style. 41 * If set, checks for both string length and terminating NUL. 42 */ 43 #define _STRNCMP_STYLE 0x1000 44 45 /** 46 * Compare two strings in code point order or code unit order. 47 * Works in strcmp style (both lengths -1), 48 * strncmp style (lengths equal and >=0, flag TRUE), 49 * and memcmp/UnicodeString style (at least one length >=0). 50 */ 51 U_CFUNC int32_t U_EXPORT2 52 uprv_strCompare(const UChar *s1, int32_t length1, 53 const UChar *s2, int32_t length2, 54 UBool strncmpStyle, UBool codePointOrder); 55 56 /** 57 * Internal API, used by u_strcasecmp() etc. 58 * Compare strings case-insensitively, 59 * in code point order or code unit order. 60 */ 61 U_CFUNC int32_t 62 u_strcmpFold(const UChar *s1, int32_t length1, 63 const UChar *s2, int32_t length2, 64 uint32_t options, 65 UErrorCode *pErrorCode); 66 67 /** 68 * Interanl API, used for detecting length of 69 * shared prefix case-insensitively. 70 * @param s1 input string 1 71 * @param length1 length of string 1, or -1 (NULL terminated) 72 * @param s2 input string 2 73 * @param length2 length of string 2, or -1 (NULL terminated) 74 * @param options compare options 75 * @param matchLen1 (output) length of partial prefix match in s1 76 * @param matchLen2 (output) length of partial prefix match in s2 77 * @param pErrorCode receives error status 78 */ 79 U_CAPI void 80 u_caseInsensitivePrefixMatch(const UChar *s1, int32_t length1, 81 const UChar *s2, int32_t length2, 82 uint32_t options, 83 int32_t *matchLen1, int32_t *matchLen2, 84 UErrorCode *pErrorCode); 85 86 /** 87 * Are the Unicode properties loaded? 88 * This must be used before internal functions are called that do 89 * not perform this check. 90 * Generate a debug assertion failure if data is not loaded. 91 */ 92 U_CFUNC UBool 93 uprv_haveProperties(UErrorCode *pErrorCode); 94 95 /** 96 * Load the Unicode property data. 97 * Intended primarily for use from u_init(). 98 * Has no effect if property data is already loaded. 99 * NOT thread safe. 100 */ 101 /*U_CFUNC int8_t 102 uprv_loadPropsData(UErrorCode *errorCode);*/ 103 104 /* 105 * Internal string casing functions implementing 106 * ustring.h/ustrcase.c and UnicodeString case mapping functions. 107 */ 108 109 struct UCaseMap { 110 const UCaseProps *csp; 111 #if !UCONFIG_NO_BREAK_ITERATION 112 UBreakIterator *iter; /* We adopt the iterator, so we own it. */ 113 #endif 114 char locale[32]; 115 int32_t locCache; 116 uint32_t options; 117 }; 118 119 #ifndef __UCASEMAP_H__ 120 typedef struct UCaseMap UCaseMap; 121 #endif 122 123 #if UCONFIG_NO_BREAK_ITERATION 124 # define UCASEMAP_INITIALIZER { NULL, { 0 }, 0, 0 } 125 #else 126 # define UCASEMAP_INITIALIZER { NULL, NULL, { 0 }, 0, 0 } 127 #endif 128 129 U_CFUNC void 130 ustrcase_setTempCaseMapLocale(UCaseMap *csm, const char *locale); 131 132 #ifndef U_STRING_CASE_MAPPER_DEFINED 133 #define U_STRING_CASE_MAPPER_DEFINED 134 135 /** 136 * String case mapping function type, used by ustrcase_map(). 137 * All error checking must be done. 138 * The UCaseMap must be fully initialized, with locale and/or iter set as needed. 139 * src and dest must not overlap. 140 */ 141 typedef int32_t U_CALLCONV 142 UStringCaseMapper(const UCaseMap *csm, 143 UChar *dest, int32_t destCapacity, 144 const UChar *src, int32_t srcLength, 145 UErrorCode *pErrorCode); 146 147 #endif 148 149 /** Implements UStringCaseMapper. */ 150 U_CFUNC int32_t U_CALLCONV 151 ustrcase_internalToLower(const UCaseMap *csm, 152 UChar *dest, int32_t destCapacity, 153 const UChar *src, int32_t srcLength, 154 UErrorCode *pErrorCode); 155 156 /** Implements UStringCaseMapper. */ 157 U_CFUNC int32_t U_CALLCONV 158 ustrcase_internalToUpper(const UCaseMap *csm, 159 UChar *dest, int32_t destCapacity, 160 const UChar *src, int32_t srcLength, 161 UErrorCode *pErrorCode); 162 163 #if !UCONFIG_NO_BREAK_ITERATION 164 165 /** Implements UStringCaseMapper. */ 166 U_CFUNC int32_t U_CALLCONV 167 ustrcase_internalToTitle(const UCaseMap *csm, 168 UChar *dest, int32_t destCapacity, 169 const UChar *src, int32_t srcLength, 170 UErrorCode *pErrorCode); 171 172 #endif 173 174 /** Implements UStringCaseMapper. */ 175 U_CFUNC int32_t U_CALLCONV 176 ustrcase_internalFold(const UCaseMap *csm, 177 UChar *dest, int32_t destCapacity, 178 const UChar *src, int32_t srcLength, 179 UErrorCode *pErrorCode); 180 181 /** 182 * Implements argument checking and buffer handling 183 * for string case mapping as a common function. 184 */ 185 U_CFUNC int32_t 186 ustrcase_map(const UCaseMap *csm, 187 UChar *dest, int32_t destCapacity, 188 const UChar *src, int32_t srcLength, 189 UStringCaseMapper *stringCaseMapper, 190 UErrorCode *pErrorCode); 191 192 /** 193 * UTF-8 string case mapping function type, used by ucasemap_mapUTF8(). 194 * UTF-8 version of UStringCaseMapper. 195 * All error checking must be done. 196 * The UCaseMap must be fully initialized, with locale and/or iter set as needed. 197 * src and dest must not overlap. 198 */ 199 typedef int32_t U_CALLCONV 200 UTF8CaseMapper(const UCaseMap *csm, 201 uint8_t *dest, int32_t destCapacity, 202 const uint8_t *src, int32_t srcLength, 203 UErrorCode *pErrorCode); 204 205 /** Implements UTF8CaseMapper. */ 206 U_CFUNC int32_t U_CALLCONV 207 ucasemap_internalUTF8ToTitle(const UCaseMap *csm, 208 uint8_t *dest, int32_t destCapacity, 209 const uint8_t *src, int32_t srcLength, 210 UErrorCode *pErrorCode); 211 212 /** 213 * Implements argument checking and buffer handling 214 * for UTF-8 string case mapping as a common function. 215 */ 216 U_CFUNC int32_t 217 ucasemap_mapUTF8(const UCaseMap *csm, 218 uint8_t *dest, int32_t destCapacity, 219 const uint8_t *src, int32_t srcLength, 220 UTF8CaseMapper *stringCaseMapper, 221 UErrorCode *pErrorCode); 222 223 #ifdef __cplusplus 224 225 U_NAMESPACE_BEGIN 226 namespace GreekUpper { 227 228 // Data bits. 229 static const uint32_t UPPER_MASK = 0x3ff; 230 static const uint32_t HAS_VOWEL = 0x1000; 231 static const uint32_t HAS_YPOGEGRAMMENI = 0x2000; 232 static const uint32_t HAS_ACCENT = 0x4000; 233 static const uint32_t HAS_DIALYTIKA = 0x8000; 234 // Further bits during data building and processing, not stored in the data map. 235 static const uint32_t HAS_COMBINING_DIALYTIKA = 0x10000; 236 static const uint32_t HAS_OTHER_GREEK_DIACRITIC = 0x20000; 237 238 static const uint32_t HAS_VOWEL_AND_ACCENT = HAS_VOWEL | HAS_ACCENT; 239 static const uint32_t HAS_VOWEL_AND_ACCENT_AND_DIALYTIKA = 240 HAS_VOWEL_AND_ACCENT | HAS_DIALYTIKA; 241 static const uint32_t HAS_EITHER_DIALYTIKA = HAS_DIALYTIKA | HAS_COMBINING_DIALYTIKA; 242 243 // State bits. 244 static const uint32_t AFTER_CASED = 1; 245 static const uint32_t AFTER_VOWEL_WITH_ACCENT = 2; 246 247 uint32_t getLetterData(UChar32 c); 248 249 /** 250 * Returns a non-zero value for each of the Greek combining diacritics 251 * listed in The Unicode Standard, version 8, chapter 7.2 Greek, 252 * plus some perispomeni look-alikes. 253 */ 254 uint32_t getDiacriticData(UChar32 c); 255 256 } // namespace GreekUpper 257 U_NAMESPACE_END 258 259 #endif // __cplusplus 260 261 U_CAPI int32_t U_EXPORT2 262 ustr_hashUCharsN(const UChar *str, int32_t length); 263 264 U_CAPI int32_t U_EXPORT2 265 ustr_hashCharsN(const char *str, int32_t length); 266 267 U_CAPI int32_t U_EXPORT2 268 ustr_hashICharsN(const char *str, int32_t length); 269 270 /** 271 * NUL-terminate a UChar * string if possible. 272 * If length < destCapacity then NUL-terminate. 273 * If length == destCapacity then do not terminate but set U_STRING_NOT_TERMINATED_WARNING. 274 * If length > destCapacity then do not terminate but set U_BUFFER_OVERFLOW_ERROR. 275 * 276 * @param dest Destination buffer, can be NULL if destCapacity==0. 277 * @param destCapacity Number of UChars available at dest. 278 * @param length Number of UChars that were (to be) written to dest. 279 * @param pErrorCode ICU error code. 280 * @return length 281 */ 282 U_CAPI int32_t U_EXPORT2 283 u_terminateUChars(UChar *dest, int32_t destCapacity, int32_t length, UErrorCode *pErrorCode); 284 285 /** 286 * NUL-terminate a char * string if possible. 287 * Same as u_terminateUChars() but for a different string type. 288 */ 289 U_CAPI int32_t U_EXPORT2 290 u_terminateChars(char *dest, int32_t destCapacity, int32_t length, UErrorCode *pErrorCode); 291 292 /** 293 * NUL-terminate a UChar32 * string if possible. 294 * Same as u_terminateUChars() but for a different string type. 295 */ 296 U_CAPI int32_t U_EXPORT2 297 u_terminateUChar32s(UChar32 *dest, int32_t destCapacity, int32_t length, UErrorCode *pErrorCode); 298 299 /** 300 * NUL-terminate a wchar_t * string if possible. 301 * Same as u_terminateUChars() but for a different string type. 302 */ 303 U_CAPI int32_t U_EXPORT2 304 u_terminateWChars(wchar_t *dest, int32_t destCapacity, int32_t length, UErrorCode *pErrorCode); 305 306 #endif 307