/* * Copyright (C) 2006 George Staikos * Copyright (C) 2006 Alexey Proskuryakov * Copyright (C) 2007 Apple Computer, Inc. All rights reserved. * Copyright (C) 2007-2009 Torch Mobile, Inc. * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Library General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Library General Public License for more details. * * You should have received a copy of the GNU Library General Public License * along with this library; see the file COPYING.LIB. If not, write to * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, * Boston, MA 02110-1301, USA. * */ #ifndef UNICODE_WINCE_H #define UNICODE_WINCE_H #include "ce_unicode.h" #define TO_MASK(x) (1 << (x)) // some defines from ICU needed one or two places #define U16_IS_LEAD(c) (((c) & 0xfffffc00) == 0xd800) #define U16_IS_TRAIL(c) (((c) & 0xfffffc00) == 0xdc00) #define U16_SURROGATE_OFFSET ((0xd800 << 10UL) + 0xdc00 - 0x10000) #define U16_GET_SUPPLEMENTARY(lead, trail) \ (((UChar32)(lead) << 10UL) + (UChar32)(trail) - U16_SURROGATE_OFFSET) #define U16_LEAD(supplementary) (UChar)(((supplementary) >> 10) + 0xd7c0) #define U16_TRAIL(supplementary) (UChar)(((supplementary) & 0x3ff) | 0xdc00) #define U_IS_SURROGATE(c) (((c) & 0xfffff800) == 0xd800) #define U16_IS_SURROGATE(c) U_IS_SURROGATE(c) #define U16_IS_SURROGATE_LEAD(c) (((c) & 0x400) == 0) #define U16_NEXT(s, i, length, c) { \ (c)=(s)[(i)++]; \ if (U16_IS_LEAD(c)) { \ uint16_t __c2; \ if ((i) < (length) && U16_IS_TRAIL(__c2 = (s)[(i)])) { \ ++(i); \ (c) = U16_GET_SUPPLEMENTARY((c), __c2); \ } \ } \ } #define U16_PREV(s, start, i, c) { \ (c)=(s)[--(i)]; \ if (U16_IS_TRAIL(c)) { \ uint16_t __c2; \ if ((i) > (start) && U16_IS_LEAD(__c2 = (s)[(i) - 1])) { \ --(i); \ (c) = U16_GET_SUPPLEMENTARY(__c2, (c)); \ } \ } \ } #define U16_IS_SINGLE(c) !U_IS_SURROGATE(c) namespace WTF { namespace Unicode { enum Direction { LeftToRight = UnicodeCE::U_LEFT_TO_RIGHT, RightToLeft = UnicodeCE::U_RIGHT_TO_LEFT, EuropeanNumber = UnicodeCE::U_EUROPEAN_NUMBER, EuropeanNumberSeparator = UnicodeCE::U_EUROPEAN_NUMBER_SEPARATOR, EuropeanNumberTerminator = UnicodeCE::U_EUROPEAN_NUMBER_TERMINATOR, ArabicNumber = UnicodeCE::U_ARABIC_NUMBER, CommonNumberSeparator = UnicodeCE::U_COMMON_NUMBER_SEPARATOR, BlockSeparator = UnicodeCE::U_BLOCK_SEPARATOR, SegmentSeparator = UnicodeCE::U_SEGMENT_SEPARATOR, WhiteSpaceNeutral = UnicodeCE::U_WHITE_SPACE_NEUTRAL, OtherNeutral = UnicodeCE::U_OTHER_NEUTRAL, LeftToRightEmbedding = UnicodeCE::U_LEFT_TO_RIGHT_EMBEDDING, LeftToRightOverride = UnicodeCE::U_LEFT_TO_RIGHT_OVERRIDE, RightToLeftArabic = UnicodeCE::U_RIGHT_TO_LEFT_ARABIC, RightToLeftEmbedding = UnicodeCE::U_RIGHT_TO_LEFT_EMBEDDING, RightToLeftOverride = UnicodeCE::U_RIGHT_TO_LEFT_OVERRIDE, PopDirectionalFormat = UnicodeCE::U_POP_DIRECTIONAL_FORMAT, NonSpacingMark = UnicodeCE::U_DIR_NON_SPACING_MARK, BoundaryNeutral = UnicodeCE::U_BOUNDARY_NEUTRAL }; enum DecompositionType { DecompositionNone = UnicodeCE::U_DT_NONE, DecompositionCanonical = UnicodeCE::U_DT_CANONICAL, DecompositionCompat = UnicodeCE::U_DT_COMPAT, DecompositionCircle = UnicodeCE::U_DT_CIRCLE, DecompositionFinal = UnicodeCE::U_DT_FINAL, DecompositionFont = UnicodeCE::U_DT_FONT, DecompositionFraction = UnicodeCE::U_DT_FRACTION, DecompositionInitial = UnicodeCE::U_DT_INITIAL, DecompositionIsolated = UnicodeCE::U_DT_ISOLATED, DecompositionMedial = UnicodeCE::U_DT_MEDIAL, DecompositionNarrow = UnicodeCE::U_DT_NARROW, DecompositionNoBreak = UnicodeCE::U_DT_NOBREAK, DecompositionSmall = UnicodeCE::U_DT_SMALL, DecompositionSquare = UnicodeCE::U_DT_SQUARE, DecompositionSub = UnicodeCE::U_DT_SUB, DecompositionSuper = UnicodeCE::U_DT_SUPER, DecompositionVertical = UnicodeCE::U_DT_VERTICAL, DecompositionWide = UnicodeCE::U_DT_WIDE, }; enum CharCategory { NoCategory = 0, Other_NotAssigned = TO_MASK(UnicodeCE::U_GENERAL_OTHER_TYPES), Letter_Uppercase = TO_MASK(UnicodeCE::U_UPPERCASE_LETTER), Letter_Lowercase = TO_MASK(UnicodeCE::U_LOWERCASE_LETTER), Letter_Titlecase = TO_MASK(UnicodeCE::U_TITLECASE_LETTER), Letter_Modifier = TO_MASK(UnicodeCE::U_MODIFIER_LETTER), Letter_Other = TO_MASK(UnicodeCE::U_OTHER_LETTER), Mark_NonSpacing = TO_MASK(UnicodeCE::U_NON_SPACING_MARK), Mark_Enclosing = TO_MASK(UnicodeCE::U_ENCLOSING_MARK), Mark_SpacingCombining = TO_MASK(UnicodeCE::U_COMBINING_SPACING_MARK), Number_DecimalDigit = TO_MASK(UnicodeCE::U_DECIMAL_DIGIT_NUMBER), Number_Letter = TO_MASK(UnicodeCE::U_LETTER_NUMBER), Number_Other = TO_MASK(UnicodeCE::U_OTHER_NUMBER), Separator_Space = TO_MASK(UnicodeCE::U_SPACE_SEPARATOR), Separator_Line = TO_MASK(UnicodeCE::U_LINE_SEPARATOR), Separator_Paragraph = TO_MASK(UnicodeCE::U_PARAGRAPH_SEPARATOR), Other_Control = TO_MASK(UnicodeCE::U_CONTROL_CHAR), Other_Format = TO_MASK(UnicodeCE::U_FORMAT_CHAR), Other_PrivateUse = TO_MASK(UnicodeCE::U_PRIVATE_USE_CHAR), Other_Surrogate = TO_MASK(UnicodeCE::U_SURROGATE), Punctuation_Dash = TO_MASK(UnicodeCE::U_DASH_PUNCTUATION), Punctuation_Open = TO_MASK(UnicodeCE::U_START_PUNCTUATION), Punctuation_Close = TO_MASK(UnicodeCE::U_END_PUNCTUATION), Punctuation_Connector = TO_MASK(UnicodeCE::U_CONNECTOR_PUNCTUATION), Punctuation_Other = TO_MASK(UnicodeCE::U_OTHER_PUNCTUATION), Symbol_Math = TO_MASK(UnicodeCE::U_MATH_SYMBOL), Symbol_Currency = TO_MASK(UnicodeCE::U_CURRENCY_SYMBOL), Symbol_Modifier = TO_MASK(UnicodeCE::U_MODIFIER_SYMBOL), Symbol_Other = TO_MASK(UnicodeCE::U_OTHER_SYMBOL), Punctuation_InitialQuote = TO_MASK(UnicodeCE::U_INITIAL_PUNCTUATION), Punctuation_FinalQuote = TO_MASK(UnicodeCE::U_FINAL_PUNCTUATION) }; CharCategory category(unsigned int); bool isSpace(wchar_t); bool isLetter(wchar_t); bool isPrintableChar(wchar_t); bool isUpper(wchar_t); bool isLower(wchar_t); bool isPunct(wchar_t); bool isDigit(wchar_t); inline bool isSeparatorSpace(wchar_t c) { return category(c) == Separator_Space; } inline bool isHighSurrogate(wchar_t c) { return (c & 0xfc00) == 0xd800; } inline bool isLowSurrogate(wchar_t c) { return (c & 0xfc00) == 0xdc00; } wchar_t toLower(wchar_t); wchar_t toUpper(wchar_t); wchar_t foldCase(wchar_t); wchar_t toTitleCase(wchar_t); int toLower(wchar_t* result, int resultLength, const wchar_t* source, int sourceLength, bool* isError); int toUpper(wchar_t* result, int resultLength, const wchar_t* source, int sourceLength, bool* isError); int foldCase(UChar* result, int resultLength, const wchar_t* source, int sourceLength, bool* isError); int digitValue(wchar_t); wchar_t mirroredChar(UChar32); unsigned char combiningClass(UChar32); DecompositionType decompositionType(UChar32); Direction direction(UChar32); inline bool isArabicChar(UChar32) { return false; // FIXME: implement! } inline bool hasLineBreakingPropertyComplexContext(UChar32) { return false; // FIXME: implement! } inline int umemcasecmp(const wchar_t* a, const wchar_t* b, int len) { for (int i = 0; i < len; ++i) { wchar_t c1 = foldCase(a[i]); wchar_t c2 = foldCase(b[i]); if (c1 != c2) return c1 - c2; } return 0; } inline UChar32 surrogateToUcs4(wchar_t high, wchar_t low) { return (UChar32(high) << 10) + low - 0x35fdc00; } } // namespace Unicode } // namespace WTF #endif // vim: ts=2 sw=2 et