• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2 *******************************************************************************
3 *
4 *   Copyright (C) 2008-2010, International Business Machines
5 *   Corporation, Google and others.  All Rights Reserved.
6 *
7 *******************************************************************************
8 */
9 /*
10  * Author : eldawy@google.com (Mohamed Eldawy)
11  * ucnvsel.h
12  *
13  * Purpose: To generate a list of encodings capable of handling
14  * a given Unicode text
15  *
16  * Started 09-April-2008
17  */
18 
19 #ifndef __ICU_UCNV_SEL_H__
20 #define __ICU_UCNV_SEL_H__
21 
22 #include "unicode/uset.h"
23 #include "unicode/utypes.h"
24 #include "unicode/utf16.h"
25 #include "unicode/uenum.h"
26 #include "unicode/ucnv.h"
27 #include "unicode/localpointer.h"
28 
29 /**
30  * \file
31  *
32  * A converter selector is built with a set of encoding/charset names
33  * and given an input string returns the set of names of the
34  * corresponding converters which can convert the string.
35  *
36  * A converter selector can be serialized into a buffer and reopened
37  * from the serialized form.
38  */
39 
40 /**
41  * @{
42  * The selector data structure
43  */
44 struct UConverterSelector;
45 typedef struct UConverterSelector UConverterSelector;
46 /** @} */
47 
48 /**
49  * Open a selector.
50  * If converterListSize is 0, build for all available converters.
51  * If excludedCodePoints is NULL, don't exclude any code points.
52  *
53  * @param converterList a pointer to encoding names needed to be involved.
54  *                      Can be NULL if converterListSize==0.
55  *                      The list and the names will be cloned, and the caller
56  *                      retains ownership of the original.
57  * @param converterListSize number of encodings in above list.
58  *                          If 0, builds a selector for all available converters.
59  * @param excludedCodePoints a set of code points to be excluded from consideration.
60  *                           That is, excluded code points in a string do not change
61  *                           the selection result. (They might be handled by a callback.)
62  *                           Use NULL to exclude nothing.
63  * @param whichSet what converter set to use? Use this to determine whether
64  *                 to consider only roundtrip mappings or also fallbacks.
65  * @param status an in/out ICU UErrorCode
66  * @return the new selector
67  *
68  * @stable ICU 4.2
69  */
70 U_STABLE UConverterSelector* U_EXPORT2
71 ucnvsel_open(const char* const*  converterList, int32_t converterListSize,
72              const USet* excludedCodePoints,
73              const UConverterUnicodeSet whichSet, UErrorCode* status);
74 
75 /**
76  * Closes a selector.
77  * If any Enumerations were returned by ucnv_select*, they become invalid.
78  * They can be closed before or after calling ucnv_closeSelector,
79  * but should never be used after the selector is closed.
80  *
81  * @see ucnv_selectForString
82  * @see ucnv_selectForUTF8
83  *
84  * @param sel selector to close
85  *
86  * @stable ICU 4.2
87  */
88 U_STABLE void U_EXPORT2
89 ucnvsel_close(UConverterSelector *sel);
90 
91 #if U_SHOW_CPLUSPLUS_API
92 
93 U_NAMESPACE_BEGIN
94 
95 /**
96  * \class LocalUConverterSelectorPointer
97  * "Smart pointer" class, closes a UConverterSelector via ucnvsel_close().
98  * For most methods see the LocalPointerBase base class.
99  *
100  * @see LocalPointerBase
101  * @see LocalPointer
102  * @stable ICU 4.4
103  */
104 U_DEFINE_LOCAL_OPEN_POINTER(LocalUConverterSelectorPointer, UConverterSelector, ucnvsel_close);
105 
106 U_NAMESPACE_END
107 
108 #endif
109 
110 /**
111  * Open a selector from its serialized form.
112  * The buffer must remain valid and unchanged for the lifetime of the selector.
113  * This is much faster than creating a selector from scratch.
114  * Using a serialized form from a different machine (endianness/charset) is supported.
115  *
116  * @param buffer pointer to the serialized form of a converter selector;
117  *               must be 32-bit-aligned
118  * @param length the capacity of this buffer (can be equal to or larger than
119  *               the actual data length)
120  * @param status an in/out ICU UErrorCode
121  * @return the new selector
122  *
123  * @stable ICU 4.2
124  */
125 U_STABLE UConverterSelector* U_EXPORT2
126 ucnvsel_openFromSerialized(const void* buffer, int32_t length, UErrorCode* status);
127 
128 /**
129  * Serialize a selector into a linear buffer.
130  * The serialized form is portable to different machines.
131  *
132  * @param sel selector to consider
133  * @param buffer pointer to 32-bit-aligned memory to be filled with the
134  *               serialized form of this converter selector
135  * @param bufferCapacity the capacity of this buffer
136  * @param status an in/out ICU UErrorCode
137  * @return the required buffer capacity to hold serialize data (even if the call fails
138  *         with a U_BUFFER_OVERFLOW_ERROR, it will return the required capacity)
139  *
140  * @stable ICU 4.2
141  */
142 U_STABLE int32_t U_EXPORT2
143 ucnvsel_serialize(const UConverterSelector* sel,
144                   void* buffer, int32_t bufferCapacity, UErrorCode* status);
145 
146 /**
147  * Select converters that can map all characters in a UTF-16 string,
148  * ignoring the excluded code points.
149  *
150  * @param sel a selector
151  * @param s UTF-16 string
152  * @param length length of the string, or -1 if NUL-terminated
153  * @param status an in/out ICU UErrorCode
154  * @return an enumeration containing encoding names.
155  *         The returned encoding names and their order will be the same as
156  *         supplied when building the selector.
157  *
158  * @stable ICU 4.2
159  */
160 U_STABLE UEnumeration * U_EXPORT2
161 ucnvsel_selectForString(const UConverterSelector* sel,
162                         const UChar *s, int32_t length, UErrorCode *status);
163 
164 /**
165  * Select converters that can map all characters in a UTF-8 string,
166  * ignoring the excluded code points.
167  *
168  * @param sel a selector
169  * @param s UTF-8 string
170  * @param length length of the string, or -1 if NUL-terminated
171  * @param status an in/out ICU UErrorCode
172  * @return an enumeration containing encoding names.
173  *         The returned encoding names and their order will be the same as
174  *         supplied when building the selector.
175  *
176  * @stable ICU 4.2
177  */
178 U_STABLE UEnumeration * U_EXPORT2
179 ucnvsel_selectForUTF8(const UConverterSelector* sel,
180                       const char *s, int32_t length, UErrorCode *status);
181 
182 #endif  /* __ICU_UCNV_SEL_H__ */
183