1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 ********************************************************************************
5 * Copyright (C) 2005-2016, International Business Machines
6 * Corporation and others. All Rights Reserved.
7 ********************************************************************************
8 */
9
10 #include "unicode/utypes.h"
11
12 #if !UCONFIG_NO_CONVERSION
13 #include "unicode/ucsdet.h"
14 #include "csdetect.h"
15 #include "csmatch.h"
16 #include "csrsbcs.h"
17 #include "csrmbcs.h"
18 #include "csrutf8.h"
19 #include "csrucode.h"
20 #include "csr2022.h"
21
22 #include "cmemory.h"
23
24 U_NAMESPACE_USE
25
26 #define NEW_ARRAY(type,count) (type *) uprv_malloc((count) * sizeof(type))
27 #define DELETE_ARRAY(array) uprv_free((void *) (array))
28
29 U_CDECL_BEGIN
30
31 U_CAPI UCharsetDetector * U_EXPORT2
ucsdet_open(UErrorCode * status)32 ucsdet_open(UErrorCode *status)
33 {
34 if(U_FAILURE(*status)) {
35 return 0;
36 }
37
38 CharsetDetector* csd = new CharsetDetector(*status);
39
40 if (U_FAILURE(*status)) {
41 delete csd;
42 csd = NULL;
43 }
44
45 return (UCharsetDetector *) csd;
46 }
47
48 U_CAPI void U_EXPORT2
ucsdet_close(UCharsetDetector * ucsd)49 ucsdet_close(UCharsetDetector *ucsd)
50 {
51 CharsetDetector *csd = (CharsetDetector *) ucsd;
52 delete csd;
53 }
54
55 U_CAPI void U_EXPORT2
ucsdet_setText(UCharsetDetector * ucsd,const char * textIn,int32_t len,UErrorCode * status)56 ucsdet_setText(UCharsetDetector *ucsd, const char *textIn, int32_t len, UErrorCode *status)
57 {
58 if(U_FAILURE(*status)) {
59 return;
60 }
61
62 ((CharsetDetector *) ucsd)->setText(textIn, len);
63 }
64
65 U_CAPI const char * U_EXPORT2
ucsdet_getName(const UCharsetMatch * ucsm,UErrorCode * status)66 ucsdet_getName(const UCharsetMatch *ucsm, UErrorCode *status)
67 {
68 if(U_FAILURE(*status)) {
69 return NULL;
70 }
71
72 return ((CharsetMatch *) ucsm)->getName();
73 }
74
75 U_CAPI int32_t U_EXPORT2
ucsdet_getConfidence(const UCharsetMatch * ucsm,UErrorCode * status)76 ucsdet_getConfidence(const UCharsetMatch *ucsm, UErrorCode *status)
77 {
78 if(U_FAILURE(*status)) {
79 return 0;
80 }
81
82 return ((CharsetMatch *) ucsm)->getConfidence();
83 }
84
85 U_CAPI const char * U_EXPORT2
ucsdet_getLanguage(const UCharsetMatch * ucsm,UErrorCode * status)86 ucsdet_getLanguage(const UCharsetMatch *ucsm, UErrorCode *status)
87 {
88 if(U_FAILURE(*status)) {
89 return NULL;
90 }
91
92 return ((CharsetMatch *) ucsm)->getLanguage();
93 }
94
95 U_CAPI const UCharsetMatch * U_EXPORT2
ucsdet_detect(UCharsetDetector * ucsd,UErrorCode * status)96 ucsdet_detect(UCharsetDetector *ucsd, UErrorCode *status)
97 {
98 if(U_FAILURE(*status)) {
99 return NULL;
100 }
101
102 return (const UCharsetMatch *) ((CharsetDetector *) ucsd)->detect(*status);
103 }
104
105 U_CAPI void U_EXPORT2
ucsdet_setDeclaredEncoding(UCharsetDetector * ucsd,const char * encoding,int32_t length,UErrorCode * status)106 ucsdet_setDeclaredEncoding(UCharsetDetector *ucsd, const char *encoding, int32_t length, UErrorCode *status)
107 {
108 if(U_FAILURE(*status)) {
109 return;
110 }
111
112 ((CharsetDetector *) ucsd)->setDeclaredEncoding(encoding,length);
113 }
114
115 U_CAPI const UCharsetMatch**
ucsdet_detectAll(UCharsetDetector * ucsd,int32_t * maxMatchesFound,UErrorCode * status)116 ucsdet_detectAll(UCharsetDetector *ucsd,
117 int32_t *maxMatchesFound, UErrorCode *status)
118 {
119 if(U_FAILURE(*status)) {
120 return NULL;
121 }
122
123 CharsetDetector *csd = (CharsetDetector *) ucsd;
124
125 return (const UCharsetMatch**)csd->detectAll(*maxMatchesFound,*status);
126 }
127
128 // U_CAPI const char * U_EXPORT2
129 // ucsdet_getDetectableCharsetName(const UCharsetDetector *csd, int32_t index, UErrorCode *status)
130 // {
131 // if(U_FAILURE(*status)) {
132 // return 0;
133 // }
134 // return csd->getCharsetName(index,*status);
135 // }
136
137 // U_CAPI int32_t U_EXPORT2
138 // ucsdet_getDetectableCharsetsCount(const UCharsetDetector *csd, UErrorCode *status)
139 // {
140 // if(U_FAILURE(*status)) {
141 // return -1;
142 // }
143 // return UCharsetDetector::getDetectableCount();
144 // }
145
146 U_CAPI UBool U_EXPORT2
ucsdet_isInputFilterEnabled(const UCharsetDetector * ucsd)147 ucsdet_isInputFilterEnabled(const UCharsetDetector *ucsd)
148 {
149 // todo: could use an error return...
150 if (ucsd == NULL) {
151 return FALSE;
152 }
153
154 return ((CharsetDetector *) ucsd)->getStripTagsFlag();
155 }
156
157 U_CAPI UBool U_EXPORT2
ucsdet_enableInputFilter(UCharsetDetector * ucsd,UBool filter)158 ucsdet_enableInputFilter(UCharsetDetector *ucsd, UBool filter)
159 {
160 // todo: could use an error return...
161 if (ucsd == NULL) {
162 return FALSE;
163 }
164
165 CharsetDetector *csd = (CharsetDetector *) ucsd;
166 UBool prev = csd->getStripTagsFlag();
167
168 csd->setStripTagsFlag(filter);
169
170 return prev;
171 }
172
173 U_CAPI int32_t U_EXPORT2
ucsdet_getUChars(const UCharsetMatch * ucsm,UChar * buf,int32_t cap,UErrorCode * status)174 ucsdet_getUChars(const UCharsetMatch *ucsm,
175 UChar *buf, int32_t cap, UErrorCode *status)
176 {
177 if(U_FAILURE(*status)) {
178 return 0;
179 }
180
181 return ((CharsetMatch *) ucsm)->getUChars(buf, cap, status);
182 }
183
184 U_CAPI void U_EXPORT2
ucsdet_setDetectableCharset(UCharsetDetector * ucsd,const char * encoding,UBool enabled,UErrorCode * status)185 ucsdet_setDetectableCharset(UCharsetDetector *ucsd, const char *encoding, UBool enabled, UErrorCode *status)
186 {
187 ((CharsetDetector *)ucsd)->setDetectableCharset(encoding, enabled, *status);
188 }
189
190 U_CAPI UEnumeration * U_EXPORT2
ucsdet_getAllDetectableCharsets(const UCharsetDetector *,UErrorCode * status)191 ucsdet_getAllDetectableCharsets(const UCharsetDetector * /*ucsd*/, UErrorCode *status)
192 {
193 return CharsetDetector::getAllDetectableCharsets(*status);
194 }
195
196 U_DRAFT UEnumeration * U_EXPORT2
ucsdet_getDetectableCharsets(const UCharsetDetector * ucsd,UErrorCode * status)197 ucsdet_getDetectableCharsets(const UCharsetDetector *ucsd, UErrorCode *status)
198 {
199 return ((CharsetDetector *)ucsd)->getDetectableCharsets(*status);
200 }
201
202 U_CDECL_END
203
204
205 #endif
206