• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2 *******************************************************************************
3 * Copyright (C) 2012-2014, International Business Machines
4 * Corporation and others.  All Rights Reserved.
5 *******************************************************************************
6 * collationbasedatabuilder.h
7 *
8 * created on: 2012aug11
9 * created by: Markus W. Scherer
10 */
11 
12 #ifndef __COLLATIONBASEDATABUILDER_H__
13 #define __COLLATIONBASEDATABUILDER_H__
14 
15 #include "unicode/utypes.h"
16 
17 #if !UCONFIG_NO_COLLATION
18 
19 #include "unicode/uniset.h"
20 #include "unicode/unistr.h"
21 #include "collation.h"
22 #include "collationdata.h"
23 #include "collationdatabuilder.h"
24 #include "normalizer2impl.h"
25 #include "utrie2.h"
26 #include "uvectr32.h"
27 #include "uvectr64.h"
28 #include "uvector.h"
29 
30 U_NAMESPACE_BEGIN
31 
32 /**
33  * Low-level base CollationData builder.
34  */
35 class U_I18N_API CollationBaseDataBuilder : public CollationDataBuilder {
36 public:
37     CollationBaseDataBuilder(UErrorCode &errorCode);
38 
39     virtual ~CollationBaseDataBuilder();
40 
41     void init(UErrorCode &errorCode);
42 
43     /**
44      * Sets the Han ranges as ranges of offset CE32s.
45      * Note: Unihan extension A sorts after the other BMP ranges.
46      * See http://www.unicode.org/reports/tr10/#Implicit_Weights
47      *
48      * @param ranges array of ranges of [:Unified_Ideograph:] in collation order,
49      *               as (start, end) code point pairs
50      * @param length number of code points (not pairs)
51      * @param errorCode in/out error code
52      */
53     void initHanRanges(const UChar32 ranges[], int32_t length, UErrorCode &errorCode);
54 
setNumericPrimary(uint32_t np)55     void setNumericPrimary(uint32_t np) { numericPrimary = np; }
56 
57     virtual UBool isCompressibleLeadByte(uint32_t b) const;
58 
59     void setCompressibleLeadByte(uint32_t b);
60 
61     static int32_t diffTwoBytePrimaries(uint32_t p1, uint32_t p2, UBool isCompressible);
62     static int32_t diffThreeBytePrimaries(uint32_t p1, uint32_t p2, UBool isCompressible);
63 
64     virtual uint32_t encodeCEs(const int64_t ces[], int32_t cesLength, UErrorCode &errorCode);
65 
66     void addRootElements(const int64_t ces[], int32_t cesLength, UErrorCode &errorCode);
67     void addRootElement(int64_t ce, UErrorCode &errorCode);
68 
69     void addReorderingGroup(uint32_t firstByte, uint32_t lastByte,
70                             const UnicodeString &groupScripts,
71                             UErrorCode &errorCode);
72 
73     virtual void build(CollationData &data, UErrorCode &errorCode);
74 
75     void buildRootElementsTable(UVector32 &table, UErrorCode &errorCode);
76 
77 private:
78     int32_t writeRootElementsRange(
79             uint32_t prevPrimary, uint32_t p, int32_t i,
80             UVector32 &table, UErrorCode &errorCode);
81 
82     // Flags for which primary-weight lead bytes are compressible.
83     UBool compressibleBytes[256];
84     uint32_t numericPrimary;
85     uint32_t firstHanPrimary;
86     uint32_t lastHanPrimary;
87     int32_t hanStep;
88     UVector64 rootElements;
89     UnicodeString scripts;
90 };
91 
92 U_NAMESPACE_END
93 
94 #endif  // !UCONFIG_NO_COLLATION
95 #endif  // __COLLATIONBASEDATABUILDER_H__
96