1 /*
2 **********************************************************************
3 * Copyright (C) 2000-2007, International Business Machines
4 * Corporation and others. All Rights Reserved.
5 **********************************************************************
6 * file name: ucnvisci.c
7 * encoding: US-ASCII
8 * tab size: 8 (not used)
9 * indentation:4
10 *
11 * created on: 2001JUN26
12 * created by: Ram Viswanadha
13 *
14 * Date Name Description
15 * 24/7/2001 Ram Added support for EXT character handling
16 */
17
18 #include "unicode/utypes.h"
19
20 #if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION
21
22 #include "cmemory.h"
23 #include "ucnv_bld.h"
24 #include "unicode/ucnv.h"
25 #include "ucnv_cnv.h"
26 #include "unicode/ucnv_cb.h"
27 #include "unicode/uset.h"
28 #include "cstring.h"
29
30 #define UCNV_OPTIONS_VERSION_MASK 0xf
31 #define NUKTA 0x093c
32 #define HALANT 0x094d
33 #define ZWNJ 0x200c /* Zero Width Non Joiner */
34 #define ZWJ 0x200d /* Zero width Joiner */
35 #define INVALID_CHAR 0xffff
36 #define ATR 0xEF /* Attribute code */
37 #define EXT 0xF0 /* Extension code */
38 #define DANDA 0x0964
39 #define DOUBLE_DANDA 0x0965
40 #define ISCII_NUKTA 0xE9
41 #define ISCII_HALANT 0xE8
42 #define ISCII_DANDA 0xEA
43 #define ISCII_INV 0xD9
44 #define ISCII_VOWEL_SIGN_E 0xE0
45 #define INDIC_BLOCK_BEGIN 0x0900
46 #define INDIC_BLOCK_END 0x0D7F
47 #define INDIC_RANGE (INDIC_BLOCK_END - INDIC_BLOCK_BEGIN)
48 #define VOCALLIC_RR 0x0931
49 #define LF 0x0A
50 #define ASCII_END 0xA0
51 #define NO_CHAR_MARKER 0xFFFE
52 #define TELUGU_DELTA DELTA * TELUGU
53 #define DEV_ABBR_SIGN 0x0970
54 #define DEV_ANUDATTA 0x0952
55 #define EXT_RANGE_BEGIN 0xA1
56 #define EXT_RANGE_END 0xEE
57
58
59 typedef enum {
60 DEVANAGARI =0,
61 BENGALI,
62 GURMUKHI,
63 GUJARATI,
64 ORIYA,
65 TAMIL,
66 TELUGU,
67 KANNADA,
68 MALAYALAM,
69 DELTA=0x80
70 }UniLang;
71
72
73 /**
74 * Enumeration for switching code pages if <ATR>+<one of below values>
75 * is encountered
76 */
77 typedef enum {
78 DEF = 0x40,
79 RMN = 0x41,
80 DEV = 0x42,
81 BNG = 0x43,
82 TML = 0x44,
83 TLG = 0x45,
84 ASM = 0x46,
85 ORI = 0x47,
86 KND = 0x48,
87 MLM = 0x49,
88 GJR = 0x4A,
89 PNJ = 0x4B,
90 ARB = 0x71,
91 PES = 0x72,
92 URD = 0x73,
93 SND = 0x74,
94 KSM = 0x75,
95 PST = 0x76
96 }ISCIILang;
97
98 typedef enum{
99 DEV_MASK =0x80,
100 PNJ_MASK =0x40,
101 GJR_MASK =0x20,
102 ORI_MASK =0x10,
103 BNG_MASK =0x08,
104 KND_MASK =0x04,
105 MLM_MASK =0x02,
106 TML_MASK =0x01,
107 ZERO =0x00
108 }MaskEnum;
109
110 #define ISCII_CNV_PREFIX "ISCII,version="
111
112 typedef struct{
113 UChar contextCharToUnicode; /* previous Unicode codepoint for contextual analysis */
114 UChar contextCharFromUnicode; /* previous Unicode codepoint for contextual analysis */
115 uint16_t defDeltaToUnicode; /* delta for switching to default state when DEF is encountered */
116 uint16_t currentDeltaFromUnicode;/* current delta in Indic block */
117 uint16_t currentDeltaToUnicode; /* current delta in Indic block */
118 MaskEnum currentMaskFromUnicode; /* mask for current state in toUnicode */
119 MaskEnum currentMaskToUnicode; /* mask for current state in toUnicode */
120 MaskEnum defMaskToUnicode; /* mask for default state in toUnicode */
121 UBool isFirstBuffer; /* boolean for fromUnicode to see if we need to announce the first script */
122 UBool resetToDefaultToUnicode; /* boolean for reseting to default delta and mask when a newline is encountered*/
123 char name[sizeof(ISCII_CNV_PREFIX) + 1];
124 }UConverterDataISCII;
125
126 typedef struct LookupDataStruct
127 {
128 UniLang uniLang;
129 MaskEnum maskEnum;
130 ISCIILang isciiLang;
131 } LookupDataStruct;
132
133 static const LookupDataStruct lookupInitialData[]={
134 { DEVANAGARI, DEV_MASK, DEV },
135 { BENGALI, BNG_MASK, BNG },
136 { GURMUKHI, PNJ_MASK, PNJ },
137 { GUJARATI, GJR_MASK, GJR },
138 { ORIYA, ORI_MASK, ORI },
139 { TAMIL, TML_MASK, TML },
140 { TELUGU, KND_MASK, TLG },
141 { KANNADA, KND_MASK, KND },
142 { MALAYALAM, MLM_MASK, MLM }
143 };
144
145 static void
_ISCIIOpen(UConverter * cnv,const char * name,const char * locale,uint32_t options,UErrorCode * errorCode)146 _ISCIIOpen(UConverter *cnv, const char *name,const char *locale,uint32_t options, UErrorCode *errorCode){
147 cnv->extraInfo = uprv_malloc (sizeof (UConverterDataISCII));
148
149 if(cnv->extraInfo != NULL) {
150 int32_t len=0;
151 UConverterDataISCII *converterData=(UConverterDataISCII *) cnv->extraInfo;
152 converterData->contextCharToUnicode=NO_CHAR_MARKER;
153 cnv->toUnicodeStatus = missingCharMarker;
154 converterData->contextCharFromUnicode=0x0000;
155 converterData->resetToDefaultToUnicode=FALSE;
156 /* check if the version requested is supported */
157 if((options & UCNV_OPTIONS_VERSION_MASK) < 9){
158 /* initialize state variables */
159 converterData->currentDeltaFromUnicode=converterData->currentDeltaToUnicode=
160 converterData->defDeltaToUnicode=
161 (uint16_t)(lookupInitialData[options & UCNV_OPTIONS_VERSION_MASK].uniLang * DELTA);
162
163 converterData->currentMaskFromUnicode = converterData->currentMaskToUnicode =
164 converterData->defMaskToUnicode=lookupInitialData[options & UCNV_OPTIONS_VERSION_MASK].maskEnum;
165
166 converterData->isFirstBuffer=TRUE;
167 (void)uprv_strcpy(converterData->name, ISCII_CNV_PREFIX);
168 len = (int32_t)uprv_strlen(converterData->name);
169 converterData->name[len]= (char)((options & UCNV_OPTIONS_VERSION_MASK) + '0');
170 converterData->name[len+1]=0;
171 }else{
172 uprv_free(cnv->extraInfo);
173 cnv->extraInfo = NULL;
174 *errorCode = U_ILLEGAL_ARGUMENT_ERROR;
175 }
176
177 }else{
178 *errorCode =U_MEMORY_ALLOCATION_ERROR;
179 }
180 }
181 static void
_ISCIIClose(UConverter * cnv)182 _ISCIIClose(UConverter *cnv){
183 if(cnv->extraInfo!=NULL) {
184 if(!cnv->isExtraLocal) {
185 uprv_free(cnv->extraInfo);
186 }
187 cnv->extraInfo=NULL;
188 }
189 }
190
191 static const char*
_ISCIIgetName(const UConverter * cnv)192 _ISCIIgetName(const UConverter* cnv){
193 if(cnv->extraInfo){
194 UConverterDataISCII* myData= (UConverterDataISCII*)cnv->extraInfo;
195 return myData->name;
196 }
197 return NULL;
198 }
199
200 static void
_ISCIIReset(UConverter * cnv,UConverterResetChoice choice)201 _ISCIIReset(UConverter *cnv, UConverterResetChoice choice){
202 UConverterDataISCII* data =(UConverterDataISCII *) (cnv->extraInfo);
203 if(choice<=UCNV_RESET_TO_UNICODE) {
204 cnv->toUnicodeStatus = missingCharMarker;
205 cnv->mode=0;
206 data->currentDeltaToUnicode=data->defDeltaToUnicode;
207 data->currentMaskToUnicode = data->defMaskToUnicode;
208 data->contextCharToUnicode=NO_CHAR_MARKER;
209 }
210 if(choice!=UCNV_RESET_TO_UNICODE) {
211 cnv->fromUChar32=0x0000;
212 data->contextCharFromUnicode=0x00;
213 data->currentMaskFromUnicode=data->defMaskToUnicode;
214 data->currentDeltaFromUnicode=data->defDeltaToUnicode;
215 data->isFirstBuffer=TRUE;
216 data->resetToDefaultToUnicode=FALSE;
217 }
218 }
219
220 /**
221 * The values in validity table are indexed by the lower bits of Unicode
222 * range 0x0900 - 0x09ff. The values have a structure like:
223 * ---------------------------------------------------------------
224 * | DEV | PNJ | GJR | ORI | BNG | TLG | MLM | TML |
225 * | | | | | ASM | KND | | |
226 * ---------------------------------------------------------------
227 * If a code point is valid in a particular script
228 * then that bit is turned on
229 *
230 * Unicode does not distinguish between Bengali and Assamese so we use 1 bit for
231 * to represent these languages
232 *
233 * Telugu and Kannada have same codepoints except for Vocallic_RR which we special case
234 * and combine and use 1 bit to represent these languages.
235 *
236 * TODO: It is probably easier to understand and maintain to change this
237 * to use uint16_t and give each of the 9 Unicode/script blocks its own bit.
238 */
239
240 static const uint8_t validityTable[128] = {
241 /* This state table is tool generated please do not edit unless you know exactly what you are doing */
242 /* Note: This table was edited to mirror the Windows XP implementation */
243 /*ISCII:Valid:Unicode */
244 /*0xa0 : 0x00: 0x900 */ ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO ,
245 /*0xa1 : 0xb8: 0x901 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + ZERO + ZERO + ZERO ,
246 /*0xa2 : 0xfe: 0x902 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
247 /*0xa3 : 0xbf: 0x903 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
248 /*0x00 : 0x00: 0x904 */ DEV_MASK + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO ,
249 /*0xa4 : 0xff: 0x905 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
250 /*0xa5 : 0xff: 0x906 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
251 /*0xa6 : 0xff: 0x907 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
252 /*0xa7 : 0xff: 0x908 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
253 /*0xa8 : 0xff: 0x909 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
254 /*0xa9 : 0xff: 0x90a */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
255 /*0xaa : 0xfe: 0x90b */ DEV_MASK + ZERO + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO ,
256 /*0x00 : 0x00: 0x90c */ DEV_MASK + ZERO + ZERO + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO ,
257 /*0xae : 0x80: 0x90d */ DEV_MASK + ZERO + GJR_MASK + ZERO + ZERO + ZERO + ZERO + ZERO ,
258 /*0xab : 0x87: 0x90e */ DEV_MASK + ZERO + ZERO + ZERO + ZERO + KND_MASK + MLM_MASK + TML_MASK ,
259 /*0xac : 0xff: 0x90f */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
260 /*0xad : 0xff: 0x910 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
261 /*0xb2 : 0x80: 0x911 */ DEV_MASK + ZERO + GJR_MASK + ZERO + ZERO + ZERO + ZERO + ZERO ,
262 /*0xaf : 0x87: 0x912 */ DEV_MASK + ZERO + ZERO + ZERO + ZERO + KND_MASK + MLM_MASK + TML_MASK ,
263 /*0xb0 : 0xff: 0x913 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
264 /*0xb1 : 0xff: 0x914 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
265 /*0xb3 : 0xff: 0x915 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
266 /*0xb4 : 0xfe: 0x916 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO ,
267 /*0xb5 : 0xfe: 0x917 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO ,
268 /*0xb6 : 0xfe: 0x918 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO ,
269 /*0xb7 : 0xff: 0x919 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
270 /*0xb8 : 0xff: 0x91a */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
271 /*0xb9 : 0xfe: 0x91b */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO ,
272 /*0xba : 0xff: 0x91c */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
273 /*0xbb : 0xfe: 0x91d */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO ,
274 /*0xbc : 0xff: 0x91e */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
275 /*0xbd : 0xff: 0x91f */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
276 /*0xbe : 0xfe: 0x920 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO ,
277 /*0xbf : 0xfe: 0x921 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO ,
278 /*0xc0 : 0xfe: 0x922 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO ,
279 /*0xc1 : 0xff: 0x923 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
280 /*0xc2 : 0xff: 0x924 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
281 /*0xc3 : 0xfe: 0x925 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO ,
282 /*0xc4 : 0xfe: 0x926 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO ,
283 /*0xc5 : 0xfe: 0x927 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO ,
284 /*0xc6 : 0xff: 0x928 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
285 /*0xc7 : 0x81: 0x929 */ DEV_MASK + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + TML_MASK ,
286 /*0xc8 : 0xff: 0x92a */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
287 /*0xc9 : 0xfe: 0x92b */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO ,
288 /*0xca : 0xfe: 0x92c */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO ,
289 /*0xcb : 0xfe: 0x92d */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO ,
290 /*0xcc : 0xfe: 0x92e */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
291 /*0xcd : 0xff: 0x92f */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
292 /*0xcf : 0xff: 0x930 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
293 /*0xd0 : 0x87: 0x931 */ DEV_MASK + ZERO + ZERO + ZERO + ZERO + KND_MASK + MLM_MASK + TML_MASK ,
294 /*0xd1 : 0xff: 0x932 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
295 /*0xd2 : 0xb7: 0x933 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + ZERO + KND_MASK + MLM_MASK + TML_MASK ,
296 /*0xd3 : 0x83: 0x934 */ DEV_MASK + ZERO + ZERO + ZERO + ZERO + ZERO + MLM_MASK + TML_MASK ,
297 /*0xd4 : 0xff: 0x935 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + ZERO + KND_MASK + MLM_MASK + TML_MASK ,
298 /*0xd5 : 0xfe: 0x936 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO ,
299 /*0xd6 : 0xbf: 0x937 */ DEV_MASK + ZERO + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
300 /*0xd7 : 0xff: 0x938 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
301 /*0xd8 : 0xff: 0x939 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
302 /*0x00 : 0x00: 0x93A */ ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO ,
303 /*0x00 : 0x00: 0x93B */ ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO ,
304 /*0xe9 : 0xda: 0x93c */ DEV_MASK + PNJ_MASK + ZERO + ORI_MASK + BNG_MASK + ZERO + ZERO + ZERO ,
305 /*0x00 : 0x00: 0x93d */ DEV_MASK + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO ,
306 /*0xda : 0xff: 0x93e */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
307 /*0xdb : 0xff: 0x93f */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
308 /*0xdc : 0xff: 0x940 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
309 /*0xdd : 0xff: 0x941 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
310 /*0xde : 0xff: 0x942 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
311 /*0xdf : 0xbe: 0x943 */ DEV_MASK + ZERO + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO ,
312 /*0x00 : 0x00: 0x944 */ DEV_MASK + ZERO + GJR_MASK + ZERO + BNG_MASK + KND_MASK + ZERO + ZERO ,
313 /*0xe3 : 0x80: 0x945 */ DEV_MASK + ZERO + GJR_MASK + ZERO + ZERO + ZERO + ZERO + ZERO ,
314 /*0xe0 : 0x87: 0x946 */ DEV_MASK + ZERO + ZERO + ZERO + ZERO + KND_MASK + MLM_MASK + TML_MASK ,
315 /*0xe1 : 0xff: 0x947 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
316 /*0xe2 : 0xff: 0x948 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
317 /*0xe7 : 0x80: 0x949 */ DEV_MASK + ZERO + GJR_MASK + ZERO + ZERO + ZERO + ZERO + ZERO ,
318 /*0xe4 : 0x87: 0x94a */ DEV_MASK + ZERO + ZERO + ZERO + ZERO + KND_MASK + MLM_MASK + TML_MASK ,
319 /*0xe5 : 0xff: 0x94b */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
320 /*0xe6 : 0xff: 0x94c */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
321 /*0xe8 : 0xff: 0x94d */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
322 /*0xec : 0x00: 0x94e */ ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO ,
323 /*0xed : 0x00: 0x94f */ ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO ,
324 /*0x00 : 0x00: 0x950 */ DEV_MASK + ZERO + GJR_MASK + ZERO + ZERO + ZERO + ZERO + ZERO ,
325 /*0x00 : 0x00: 0x951 */ DEV_MASK + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO ,
326 /*0x00 : 0x00: 0x952 */ DEV_MASK + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO ,
327 /*0x00 : 0x00: 0x953 */ DEV_MASK + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO ,
328 /*0x00 : 0x00: 0x954 */ DEV_MASK + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO ,
329 /*0x00 : 0x00: 0x955 */ ZERO + ZERO + ZERO + ZERO + ZERO + KND_MASK + ZERO + ZERO ,
330 /*0x00 : 0x00: 0x956 */ ZERO + ZERO + ZERO + ORI_MASK + ZERO + KND_MASK + ZERO + ZERO ,
331 /*0x00 : 0x00: 0x957 */ ZERO + ZERO + ZERO + ORI_MASK + BNG_MASK + ZERO + MLM_MASK + ZERO ,
332 /*0x00 : 0x00: 0x958 */ DEV_MASK + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO ,
333 /*0x00 : 0x00: 0x959 */ DEV_MASK + PNJ_MASK + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO ,
334 /*0x00 : 0x00: 0x95a */ DEV_MASK + PNJ_MASK + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO ,
335 /*0x00 : 0x00: 0x95b */ DEV_MASK + PNJ_MASK + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO ,
336 /*0x00 : 0x00: 0x95c */ DEV_MASK + PNJ_MASK + ZERO + ZERO + BNG_MASK + ZERO + ZERO + ZERO ,
337 /*0x00 : 0x00: 0x95d */ DEV_MASK + ZERO + ZERO + ORI_MASK + BNG_MASK + ZERO + ZERO + ZERO ,
338 /*0x00 : 0x00: 0x95e */ DEV_MASK + PNJ_MASK + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO ,
339 /*0xce : 0x98: 0x95f */ DEV_MASK + ZERO + ZERO + ORI_MASK + BNG_MASK + ZERO + ZERO + ZERO ,
340 /*0x00 : 0x00: 0x960 */ DEV_MASK + ZERO + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO ,
341 /*0x00 : 0x00: 0x961 */ DEV_MASK + ZERO + ZERO + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO ,
342 /*0x00 : 0x00: 0x962 */ DEV_MASK + ZERO + ZERO + ZERO + BNG_MASK + ZERO + ZERO + ZERO ,
343 /*0x00 : 0x00: 0x963 */ DEV_MASK + ZERO + ZERO + ZERO + BNG_MASK + ZERO + ZERO + ZERO ,
344 /*0xea : 0xf8: 0x964 */ DEV_MASK + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO ,
345 /*0xeaea : 0x00: 0x965*/ DEV_MASK + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO ,
346 /*0xf1 : 0xff: 0x966 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
347 /*0xf2 : 0xff: 0x967 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
348 /*0xf3 : 0xff: 0x968 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
349 /*0xf4 : 0xff: 0x969 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
350 /*0xf5 : 0xff: 0x96a */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
351 /*0xf6 : 0xff: 0x96b */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
352 /*0xf7 : 0xff: 0x96c */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
353 /*0xf8 : 0xff: 0x96d */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
354 /*0xf9 : 0xff: 0x96e */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
355 /*0xfa : 0xff: 0x96f */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
356 /*0x00 : 0x80: 0x970 */ DEV_MASK + PNJ_MASK + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO ,
357
358 /*
359 * The length of the array is 128 to provide values for 0x900..0x97f.
360 * The last 15 entries for 0x971..0x97f of the validity table are all zero
361 * because no Indic script uses such Unicode code points.
362 */
363 /*0x00 : 0x00: 0x9yz */ ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO
364 };
365
366 static const uint16_t fromUnicodeTable[128]={
367 0x00a0 ,/* 0x0900 */
368 0x00a1 ,/* 0x0901 */
369 0x00a2 ,/* 0x0902 */
370 0x00a3 ,/* 0x0903 */
371 0xa4e0 ,/* 0x0904 */
372 0x00a4 ,/* 0x0905 */
373 0x00a5 ,/* 0x0906 */
374 0x00a6 ,/* 0x0907 */
375 0x00a7 ,/* 0x0908 */
376 0x00a8 ,/* 0x0909 */
377 0x00a9 ,/* 0x090a */
378 0x00aa ,/* 0x090b */
379 0xA6E9 ,/* 0x090c */
380 0x00ae ,/* 0x090d */
381 0x00ab ,/* 0x090e */
382 0x00ac ,/* 0x090f */
383 0x00ad ,/* 0x0910 */
384 0x00b2 ,/* 0x0911 */
385 0x00af ,/* 0x0912 */
386 0x00b0 ,/* 0x0913 */
387 0x00b1 ,/* 0x0914 */
388 0x00b3 ,/* 0x0915 */
389 0x00b4 ,/* 0x0916 */
390 0x00b5 ,/* 0x0917 */
391 0x00b6 ,/* 0x0918 */
392 0x00b7 ,/* 0x0919 */
393 0x00b8 ,/* 0x091a */
394 0x00b9 ,/* 0x091b */
395 0x00ba ,/* 0x091c */
396 0x00bb ,/* 0x091d */
397 0x00bc ,/* 0x091e */
398 0x00bd ,/* 0x091f */
399 0x00be ,/* 0x0920 */
400 0x00bf ,/* 0x0921 */
401 0x00c0 ,/* 0x0922 */
402 0x00c1 ,/* 0x0923 */
403 0x00c2 ,/* 0x0924 */
404 0x00c3 ,/* 0x0925 */
405 0x00c4 ,/* 0x0926 */
406 0x00c5 ,/* 0x0927 */
407 0x00c6 ,/* 0x0928 */
408 0x00c7 ,/* 0x0929 */
409 0x00c8 ,/* 0x092a */
410 0x00c9 ,/* 0x092b */
411 0x00ca ,/* 0x092c */
412 0x00cb ,/* 0x092d */
413 0x00cc ,/* 0x092e */
414 0x00cd ,/* 0x092f */
415 0x00cf ,/* 0x0930 */
416 0x00d0 ,/* 0x0931 */
417 0x00d1 ,/* 0x0932 */
418 0x00d2 ,/* 0x0933 */
419 0x00d3 ,/* 0x0934 */
420 0x00d4 ,/* 0x0935 */
421 0x00d5 ,/* 0x0936 */
422 0x00d6 ,/* 0x0937 */
423 0x00d7 ,/* 0x0938 */
424 0x00d8 ,/* 0x0939 */
425 0xFFFF ,/* 0x093A */
426 0xFFFF ,/* 0x093B */
427 0x00e9 ,/* 0x093c */
428 0xEAE9 ,/* 0x093d */
429 0x00da ,/* 0x093e */
430 0x00db ,/* 0x093f */
431 0x00dc ,/* 0x0940 */
432 0x00dd ,/* 0x0941 */
433 0x00de ,/* 0x0942 */
434 0x00df ,/* 0x0943 */
435 0xDFE9 ,/* 0x0944 */
436 0x00e3 ,/* 0x0945 */
437 0x00e0 ,/* 0x0946 */
438 0x00e1 ,/* 0x0947 */
439 0x00e2 ,/* 0x0948 */
440 0x00e7 ,/* 0x0949 */
441 0x00e4 ,/* 0x094a */
442 0x00e5 ,/* 0x094b */
443 0x00e6 ,/* 0x094c */
444 0x00e8 ,/* 0x094d */
445 0x00ec ,/* 0x094e */
446 0x00ed ,/* 0x094f */
447 0xA1E9 ,/* 0x0950 */ /* OM Symbol */
448 0xFFFF ,/* 0x0951 */
449 0xF0B8 ,/* 0x0952 */
450 0xFFFF ,/* 0x0953 */
451 0xFFFF ,/* 0x0954 */
452 0xFFFF ,/* 0x0955 */
453 0xFFFF ,/* 0x0956 */
454 0xFFFF ,/* 0x0957 */
455 0xb3e9 ,/* 0x0958 */
456 0xb4e9 ,/* 0x0959 */
457 0xb5e9 ,/* 0x095a */
458 0xbae9 ,/* 0x095b */
459 0xbfe9 ,/* 0x095c */
460 0xC0E9 ,/* 0x095d */
461 0xc9e9 ,/* 0x095e */
462 0x00ce ,/* 0x095f */
463 0xAAe9 ,/* 0x0960 */
464 0xA7E9 ,/* 0x0961 */
465 0xDBE9 ,/* 0x0962 */
466 0xDCE9 ,/* 0x0963 */
467 0x00ea ,/* 0x0964 */
468 0xeaea ,/* 0x0965 */
469 0x00f1 ,/* 0x0966 */
470 0x00f2 ,/* 0x0967 */
471 0x00f3 ,/* 0x0968 */
472 0x00f4 ,/* 0x0969 */
473 0x00f5 ,/* 0x096a */
474 0x00f6 ,/* 0x096b */
475 0x00f7 ,/* 0x096c */
476 0x00f8 ,/* 0x096d */
477 0x00f9 ,/* 0x096e */
478 0x00fa ,/* 0x096f */
479 0xF0BF ,/* 0x0970 */
480 0xFFFF ,/* 0x0971 */
481 0xFFFF ,/* 0x0972 */
482 0xFFFF ,/* 0x0973 */
483 0xFFFF ,/* 0x0974 */
484 0xFFFF ,/* 0x0975 */
485 0xFFFF ,/* 0x0976 */
486 0xFFFF ,/* 0x0977 */
487 0xFFFF ,/* 0x0978 */
488 0xFFFF ,/* 0x0979 */
489 0xFFFF ,/* 0x097a */
490 0xFFFF ,/* 0x097b */
491 0xFFFF ,/* 0x097c */
492 0xFFFF ,/* 0x097d */
493 0xFFFF ,/* 0x097e */
494 0xFFFF ,/* 0x097f */
495 };
496 static const uint16_t toUnicodeTable[256]={
497 0x0000,/* 0x00 */
498 0x0001,/* 0x01 */
499 0x0002,/* 0x02 */
500 0x0003,/* 0x03 */
501 0x0004,/* 0x04 */
502 0x0005,/* 0x05 */
503 0x0006,/* 0x06 */
504 0x0007,/* 0x07 */
505 0x0008,/* 0x08 */
506 0x0009,/* 0x09 */
507 0x000a,/* 0x0a */
508 0x000b,/* 0x0b */
509 0x000c,/* 0x0c */
510 0x000d,/* 0x0d */
511 0x000e,/* 0x0e */
512 0x000f,/* 0x0f */
513 0x0010,/* 0x10 */
514 0x0011,/* 0x11 */
515 0x0012,/* 0x12 */
516 0x0013,/* 0x13 */
517 0x0014,/* 0x14 */
518 0x0015,/* 0x15 */
519 0x0016,/* 0x16 */
520 0x0017,/* 0x17 */
521 0x0018,/* 0x18 */
522 0x0019,/* 0x19 */
523 0x001a,/* 0x1a */
524 0x001b,/* 0x1b */
525 0x001c,/* 0x1c */
526 0x001d,/* 0x1d */
527 0x001e,/* 0x1e */
528 0x001f,/* 0x1f */
529 0x0020,/* 0x20 */
530 0x0021,/* 0x21 */
531 0x0022,/* 0x22 */
532 0x0023,/* 0x23 */
533 0x0024,/* 0x24 */
534 0x0025,/* 0x25 */
535 0x0026,/* 0x26 */
536 0x0027,/* 0x27 */
537 0x0028,/* 0x28 */
538 0x0029,/* 0x29 */
539 0x002a,/* 0x2a */
540 0x002b,/* 0x2b */
541 0x002c,/* 0x2c */
542 0x002d,/* 0x2d */
543 0x002e,/* 0x2e */
544 0x002f,/* 0x2f */
545 0x0030,/* 0x30 */
546 0x0031,/* 0x31 */
547 0x0032,/* 0x32 */
548 0x0033,/* 0x33 */
549 0x0034,/* 0x34 */
550 0x0035,/* 0x35 */
551 0x0036,/* 0x36 */
552 0x0037,/* 0x37 */
553 0x0038,/* 0x38 */
554 0x0039,/* 0x39 */
555 0x003A,/* 0x3A */
556 0x003B,/* 0x3B */
557 0x003c,/* 0x3c */
558 0x003d,/* 0x3d */
559 0x003e,/* 0x3e */
560 0x003f,/* 0x3f */
561 0x0040,/* 0x40 */
562 0x0041,/* 0x41 */
563 0x0042,/* 0x42 */
564 0x0043,/* 0x43 */
565 0x0044,/* 0x44 */
566 0x0045,/* 0x45 */
567 0x0046,/* 0x46 */
568 0x0047,/* 0x47 */
569 0x0048,/* 0x48 */
570 0x0049,/* 0x49 */
571 0x004a,/* 0x4a */
572 0x004b,/* 0x4b */
573 0x004c,/* 0x4c */
574 0x004d,/* 0x4d */
575 0x004e,/* 0x4e */
576 0x004f,/* 0x4f */
577 0x0050,/* 0x50 */
578 0x0051,/* 0x51 */
579 0x0052,/* 0x52 */
580 0x0053,/* 0x53 */
581 0x0054,/* 0x54 */
582 0x0055,/* 0x55 */
583 0x0056,/* 0x56 */
584 0x0057,/* 0x57 */
585 0x0058,/* 0x58 */
586 0x0059,/* 0x59 */
587 0x005a,/* 0x5a */
588 0x005b,/* 0x5b */
589 0x005c,/* 0x5c */
590 0x005d,/* 0x5d */
591 0x005e,/* 0x5e */
592 0x005f,/* 0x5f */
593 0x0060,/* 0x60 */
594 0x0061,/* 0x61 */
595 0x0062,/* 0x62 */
596 0x0063,/* 0x63 */
597 0x0064,/* 0x64 */
598 0x0065,/* 0x65 */
599 0x0066,/* 0x66 */
600 0x0067,/* 0x67 */
601 0x0068,/* 0x68 */
602 0x0069,/* 0x69 */
603 0x006a,/* 0x6a */
604 0x006b,/* 0x6b */
605 0x006c,/* 0x6c */
606 0x006d,/* 0x6d */
607 0x006e,/* 0x6e */
608 0x006f,/* 0x6f */
609 0x0070,/* 0x70 */
610 0x0071,/* 0x71 */
611 0x0072,/* 0x72 */
612 0x0073,/* 0x73 */
613 0x0074,/* 0x74 */
614 0x0075,/* 0x75 */
615 0x0076,/* 0x76 */
616 0x0077,/* 0x77 */
617 0x0078,/* 0x78 */
618 0x0079,/* 0x79 */
619 0x007a,/* 0x7a */
620 0x007b,/* 0x7b */
621 0x007c,/* 0x7c */
622 0x007d,/* 0x7d */
623 0x007e,/* 0x7e */
624 0x007f,/* 0x7f */
625 0x0080,/* 0x80 */
626 0x0081,/* 0x81 */
627 0x0082,/* 0x82 */
628 0x0083,/* 0x83 */
629 0x0084,/* 0x84 */
630 0x0085,/* 0x85 */
631 0x0086,/* 0x86 */
632 0x0087,/* 0x87 */
633 0x0088,/* 0x88 */
634 0x0089,/* 0x89 */
635 0x008a,/* 0x8a */
636 0x008b,/* 0x8b */
637 0x008c,/* 0x8c */
638 0x008d,/* 0x8d */
639 0x008e,/* 0x8e */
640 0x008f,/* 0x8f */
641 0x0090,/* 0x90 */
642 0x0091,/* 0x91 */
643 0x0092,/* 0x92 */
644 0x0093,/* 0x93 */
645 0x0094,/* 0x94 */
646 0x0095,/* 0x95 */
647 0x0096,/* 0x96 */
648 0x0097,/* 0x97 */
649 0x0098,/* 0x98 */
650 0x0099,/* 0x99 */
651 0x009a,/* 0x9a */
652 0x009b,/* 0x9b */
653 0x009c,/* 0x9c */
654 0x009d,/* 0x9d */
655 0x009e,/* 0x9e */
656 0x009f,/* 0x9f */
657 0x00A0,/* 0xa0 */
658 0x0901,/* 0xa1 */
659 0x0902,/* 0xa2 */
660 0x0903,/* 0xa3 */
661 0x0905,/* 0xa4 */
662 0x0906,/* 0xa5 */
663 0x0907,/* 0xa6 */
664 0x0908,/* 0xa7 */
665 0x0909,/* 0xa8 */
666 0x090a,/* 0xa9 */
667 0x090b,/* 0xaa */
668 0x090e,/* 0xab */
669 0x090f,/* 0xac */
670 0x0910,/* 0xad */
671 0x090d,/* 0xae */
672 0x0912,/* 0xaf */
673 0x0913,/* 0xb0 */
674 0x0914,/* 0xb1 */
675 0x0911,/* 0xb2 */
676 0x0915,/* 0xb3 */
677 0x0916,/* 0xb4 */
678 0x0917,/* 0xb5 */
679 0x0918,/* 0xb6 */
680 0x0919,/* 0xb7 */
681 0x091a,/* 0xb8 */
682 0x091b,/* 0xb9 */
683 0x091c,/* 0xba */
684 0x091d,/* 0xbb */
685 0x091e,/* 0xbc */
686 0x091f,/* 0xbd */
687 0x0920,/* 0xbe */
688 0x0921,/* 0xbf */
689 0x0922,/* 0xc0 */
690 0x0923,/* 0xc1 */
691 0x0924,/* 0xc2 */
692 0x0925,/* 0xc3 */
693 0x0926,/* 0xc4 */
694 0x0927,/* 0xc5 */
695 0x0928,/* 0xc6 */
696 0x0929,/* 0xc7 */
697 0x092a,/* 0xc8 */
698 0x092b,/* 0xc9 */
699 0x092c,/* 0xca */
700 0x092d,/* 0xcb */
701 0x092e,/* 0xcc */
702 0x092f,/* 0xcd */
703 0x095f,/* 0xce */
704 0x0930,/* 0xcf */
705 0x0931,/* 0xd0 */
706 0x0932,/* 0xd1 */
707 0x0933,/* 0xd2 */
708 0x0934,/* 0xd3 */
709 0x0935,/* 0xd4 */
710 0x0936,/* 0xd5 */
711 0x0937,/* 0xd6 */
712 0x0938,/* 0xd7 */
713 0x0939,/* 0xd8 */
714 0x200D,/* 0xd9 */
715 0x093e,/* 0xda */
716 0x093f,/* 0xdb */
717 0x0940,/* 0xdc */
718 0x0941,/* 0xdd */
719 0x0942,/* 0xde */
720 0x0943,/* 0xdf */
721 0x0946,/* 0xe0 */
722 0x0947,/* 0xe1 */
723 0x0948,/* 0xe2 */
724 0x0945,/* 0xe3 */
725 0x094a,/* 0xe4 */
726 0x094b,/* 0xe5 */
727 0x094c,/* 0xe6 */
728 0x0949,/* 0xe7 */
729 0x094d,/* 0xe8 */
730 0x093c,/* 0xe9 */
731 0x0964,/* 0xea */
732 0xFFFF,/* 0xeb */
733 0xFFFF,/* 0xec */
734 0xFFFF,/* 0xed */
735 0xFFFF,/* 0xee */
736 0xFFFF,/* 0xef */
737 0xFFFF,/* 0xf0 */
738 0x0966,/* 0xf1 */
739 0x0967,/* 0xf2 */
740 0x0968,/* 0xf3 */
741 0x0969,/* 0xf4 */
742 0x096a,/* 0xf5 */
743 0x096b,/* 0xf6 */
744 0x096c,/* 0xf7 */
745 0x096d,/* 0xf8 */
746 0x096e,/* 0xf9 */
747 0x096f,/* 0xfa */
748 0xFFFF,/* 0xfb */
749 0xFFFF,/* 0xfc */
750 0xFFFF,/* 0xfd */
751 0xFFFF,/* 0xfe */
752 0xFFFF /* 0xff */
753 };
754
755 static const uint16_t vowelSignESpecialCases[][2]={
756 { 2 /*length of array*/ , 0 },
757 { 0xA4 , 0x0904 },
758 };
759
760 static const uint16_t nuktaSpecialCases[][2]={
761 { 16 /*length of array*/ , 0 },
762 { 0xA6 , 0x090c },
763 { 0xEA , 0x093D },
764 { 0xDF , 0x0944 },
765 { 0xA1 , 0x0950 },
766 { 0xb3 , 0x0958 },
767 { 0xb4 , 0x0959 },
768 { 0xb5 , 0x095a },
769 { 0xba , 0x095b },
770 { 0xbf , 0x095c },
771 { 0xC0 , 0x095d },
772 { 0xc9 , 0x095e },
773 { 0xAA , 0x0960 },
774 { 0xA7 , 0x0961 },
775 { 0xDB , 0x0962 },
776 { 0xDC , 0x0963 },
777 };
778
779 #define WRITE_TO_TARGET_FROM_U(args,offsets,source,target,targetLimit,targetByteUnit,err){ \
780 /* write the targetUniChar to target */ \
781 if(target <targetLimit){ \
782 if(targetByteUnit <= 0xFF){ \
783 *(target)++ = (uint8_t)(targetByteUnit); \
784 if(offsets){ \
785 *(offsets++) = (int32_t)(source - args->source-1); \
786 } \
787 }else{ \
788 *(target)++ = (uint8_t)(targetByteUnit>>8); \
789 if(offsets){ \
790 *(offsets++) = (int32_t)(source - args->source-1); \
791 } \
792 if(target < targetLimit){ \
793 *(target)++ = (uint8_t) targetByteUnit; \
794 if(offsets){ \
795 *(offsets++) = (int32_t)(source - args->source-1); \
796 } \
797 }else{ \
798 args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = \
799 (uint8_t) (targetByteUnit); \
800 *err = U_BUFFER_OVERFLOW_ERROR; \
801 } \
802 } \
803 }else{ \
804 if(targetByteUnit & 0xFF00){ \
805 args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = \
806 (uint8_t) (targetByteUnit >>8); \
807 } \
808 args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = \
809 (uint8_t) (targetByteUnit); \
810 *err = U_BUFFER_OVERFLOW_ERROR; \
811 } \
812 }
813
814 /* Rules:
815 * Explicit Halant :
816 * <HALANT> + <ZWNJ>
817 * Soft Halant :
818 * <HALANT> + <ZWJ>
819 */
820
821 static void
UConverter_fromUnicode_ISCII_OFFSETS_LOGIC(UConverterFromUnicodeArgs * args,UErrorCode * err)822 UConverter_fromUnicode_ISCII_OFFSETS_LOGIC (UConverterFromUnicodeArgs * args,
823 UErrorCode * err){
824 const UChar *source = args->source;
825 const UChar *sourceLimit = args->sourceLimit;
826 unsigned char *target = (unsigned char *) args->target;
827 unsigned char *targetLimit = (unsigned char *) args->targetLimit;
828 int32_t* offsets = args->offsets;
829 uint32_t targetByteUnit = 0x0000;
830 UChar32 sourceChar = 0x0000;
831 UConverterDataISCII *converterData;
832 uint16_t newDelta=0;
833 uint16_t range = 0;
834 UBool deltaChanged = FALSE;
835
836 if ((args->converter == NULL) || (args->targetLimit < args->target) || (args->sourceLimit < args->source)){
837 *err = U_ILLEGAL_ARGUMENT_ERROR;
838 return;
839 }
840 /* initialize data */
841 converterData=(UConverterDataISCII*)args->converter->extraInfo;
842 newDelta=converterData->currentDeltaFromUnicode;
843 range = (uint16_t)(newDelta/DELTA);
844
845 if((sourceChar = args->converter->fromUChar32)!=0) {
846 goto getTrail;
847 }
848
849 /*writing the char to the output stream */
850 while(source < sourceLimit){
851
852 targetByteUnit = missingCharMarker;
853
854 sourceChar = *source++;
855
856 /*check if input is in ASCII and C0 control codes range*/
857 if (sourceChar <= ASCII_END) {
858 WRITE_TO_TARGET_FROM_U(args,offsets,source,target,targetLimit,sourceChar,err);
859 if(U_FAILURE(*err)){
860 break;
861 }
862 if(sourceChar == LF){
863 targetByteUnit = ATR<<8;
864 targetByteUnit += (uint8_t) lookupInitialData[range].isciiLang;
865 args->converter->fromUnicodeStatus=sourceChar;
866 /* now append ATR and language code */
867 WRITE_TO_TARGET_FROM_U(args,offsets,source,target,targetLimit,targetByteUnit,err);
868 if(U_FAILURE(*err)){
869 break;
870 }
871 }
872 continue;
873 }
874 switch(sourceChar){
875 case ZWNJ:
876 /* contextChar has HALANT */
877 if(converterData->contextCharFromUnicode){
878 converterData->contextCharFromUnicode = 0x00;
879 targetByteUnit = ISCII_HALANT;
880 }else{
881 /* consume ZWNJ and continue */
882 converterData->contextCharFromUnicode = 0x00;
883 continue;
884 }
885 break;
886 case ZWJ:
887 /* contextChar has HALANT */
888 if(converterData->contextCharFromUnicode){
889 targetByteUnit = ISCII_NUKTA;
890 }else{
891 targetByteUnit =ISCII_INV;
892 }
893 converterData->contextCharFromUnicode = 0x00;
894 break;
895 default:
896 /* is the sourceChar in the INDIC_RANGE? */
897 if((uint16_t)(INDIC_BLOCK_END-sourceChar) <= INDIC_RANGE){
898 /* Danda and Double Danda are valid in Northern scripts.. since Unicode
899 * does not include these codepoints in all Northern scrips we need to
900 * filter them out
901 */
902 if(sourceChar!= DANDA && sourceChar != DOUBLE_DANDA){
903 /* find out to which block the souceChar belongs*/
904 range =(uint16_t)((sourceChar-INDIC_BLOCK_BEGIN)/DELTA);
905 newDelta =(uint16_t)(range*DELTA);
906
907 /* Now are we in the same block as the previous? */
908 if(newDelta!= converterData->currentDeltaFromUnicode || converterData->isFirstBuffer){
909 converterData->currentDeltaFromUnicode = newDelta;
910 converterData->currentMaskFromUnicode = lookupInitialData[range].maskEnum;
911 deltaChanged =TRUE;
912 converterData->isFirstBuffer=FALSE;
913 }
914 /* Normalize all Indic codepoints to Devanagari and map them to ISCII */
915 /* now subtract the new delta from sourceChar*/
916 sourceChar -= converterData->currentDeltaFromUnicode ;
917 }
918
919 /* get the target byte unit */
920 targetByteUnit=fromUnicodeTable[(uint8_t)sourceChar];
921
922 /* is the code point valid in current script? */
923 if((validityTable[(uint8_t)sourceChar] & converterData->currentMaskFromUnicode)==0){
924 /* Vocallic RR is assigne in ISCII Telugu and Unicode */
925 if(converterData->currentDeltaFromUnicode!=(TELUGU_DELTA) && sourceChar!=VOCALLIC_RR){
926 targetByteUnit=missingCharMarker;
927 }
928 }
929
930 if(deltaChanged){
931 /* we are in a script block which is different than
932 * previous sourceChar's script block write ATR and language codes
933 */
934 uint16_t temp=0;
935 temp =(uint16_t)(ATR<<8);
936 temp += (uint16_t)((uint8_t) lookupInitialData[range].isciiLang);
937 /* reset */
938 deltaChanged=FALSE;
939 /* now append ATR and language code */
940 WRITE_TO_TARGET_FROM_U(args,offsets,source,target,targetLimit,temp,err);
941 if(U_FAILURE(*err)){
942 break;
943 }
944 }
945 }
946 /* reset context char */
947 converterData->contextCharFromUnicode = 0x00;
948 break;
949 }
950
951
952 if(targetByteUnit != missingCharMarker){
953 if(targetByteUnit==ISCII_HALANT){
954 converterData->contextCharFromUnicode = (UChar)targetByteUnit;
955 }
956 /* write targetByteUnit to target*/
957 WRITE_TO_TARGET_FROM_U(args,offsets,source,target,targetLimit,targetByteUnit,err);
958 if(U_FAILURE(*err)){
959 break;
960 }
961 }
962 else{
963 /* oops.. the code point is unassigned */
964 /*check if the char is a First surrogate*/
965 if(UTF_IS_SURROGATE(sourceChar)) {
966 if(UTF_IS_SURROGATE_FIRST(sourceChar)) {
967 getTrail:
968 /*look ahead to find the trail surrogate*/
969 if(source < sourceLimit) {
970 /* test the following code unit */
971 UChar trail= (*source);
972 if(UTF_IS_SECOND_SURROGATE(trail)) {
973 source++;
974 sourceChar=UTF16_GET_PAIR_VALUE(sourceChar, trail);
975 *err =U_INVALID_CHAR_FOUND;
976 /* convert this surrogate code point */
977 /* exit this condition tree */
978 } else {
979 /* this is an unmatched lead code unit (1st surrogate) */
980 /* callback(illegal) */
981 *err=U_ILLEGAL_CHAR_FOUND;
982 }
983 } else {
984 /* no more input */
985 *err = U_ZERO_ERROR;
986 }
987 } else {
988 /* this is an unmatched trail code unit (2nd surrogate) */
989 /* callback(illegal) */
990 *err=U_ILLEGAL_CHAR_FOUND;
991 }
992 } else {
993 /* callback(unassigned) for a BMP code point */
994 *err = U_INVALID_CHAR_FOUND;
995 }
996
997 args->converter->fromUChar32=sourceChar;
998 break;
999 }
1000 }/* end while(mySourceIndex<mySourceLength) */
1001
1002 /*save the state and return */
1003 args->source = source;
1004 args->target = (char*)target;
1005 }
1006
1007 static const int32_t lookupTable[][2]={
1008 { ZERO, ZERO }, /*DEFALT*/
1009 { ZERO, ZERO }, /*ROMAN*/
1010 { DEVANAGARI, DEV_MASK },
1011 { BENGALI, BNG_MASK },
1012 { TAMIL, TML_MASK },
1013 { TELUGU, KND_MASK },
1014 { BENGALI, BNG_MASK },
1015 { ORIYA, ORI_MASK },
1016 { KANNADA, KND_MASK },
1017 { MALAYALAM, MLM_MASK },
1018 { GUJARATI, GJR_MASK },
1019 { GURMUKHI, PNJ_MASK }
1020
1021 };
1022
1023 #define WRITE_TO_TARGET_TO_U(args,source,target,offsets,offset,targetUniChar,delta, err){\
1024 /* add offset to current Indic Block */ \
1025 if(targetUniChar>ASCII_END && \
1026 targetUniChar != ZWJ && \
1027 targetUniChar != ZWNJ && \
1028 targetUniChar != DANDA && \
1029 targetUniChar != DOUBLE_DANDA){ \
1030 \
1031 targetUniChar+=(uint16_t)(delta); \
1032 } \
1033 /* now write the targetUniChar */ \
1034 if(target<args->targetLimit){ \
1035 *(target)++ = (UChar)targetUniChar; \
1036 if(offsets){ \
1037 *(offsets)++ = (int32_t)(offset); \
1038 } \
1039 }else{ \
1040 args->converter->UCharErrorBuffer[args->converter->UCharErrorBufferLength++] = \
1041 (UChar)targetUniChar; \
1042 *err = U_BUFFER_OVERFLOW_ERROR; \
1043 } \
1044 }
1045
1046 #define GET_MAPPING(sourceChar,targetUniChar,data){ \
1047 targetUniChar = toUnicodeTable[(sourceChar)] ; \
1048 /* is the code point valid in current script? */ \
1049 if(sourceChar> ASCII_END && \
1050 (validityTable[(uint8_t)targetUniChar] & data->currentMaskToUnicode)==0){ \
1051 /* Vocallic RR is assigne in ISCII Telugu and Unicode */ \
1052 if(data->currentDeltaToUnicode!=(TELUGU_DELTA) && \
1053 targetUniChar!=VOCALLIC_RR){ \
1054 targetUniChar=missingCharMarker; \
1055 } \
1056 } \
1057 }
1058
1059 /***********
1060 * Rules for ISCII to Unicode converter
1061 * ISCII is stateful encoding. To convert ISCII bytes to Unicode,
1062 * which has both precomposed and decomposed forms characters
1063 * pre-context and post-context need to be considered.
1064 *
1065 * Post context
1066 * i) ATR : Attribute code is used to declare the font and script switching.
1067 * Currently we only switch scripts and font codes consumed without generating an error
1068 * ii) EXT : Extention code is used to declare switching to Sanskrit and for obscure,
1069 * obsolete characters
1070 * Pre context
1071 * i) Halant: if preceeded by a halant then it is a explicit halant
1072 * ii) Nukta :
1073 * a) if preceeded by a halant then it is a soft halant
1074 * b) if preceeded by specific consonants and the ligatures have pre-composed
1075 * characters in Unicode then convert to pre-composed characters
1076 * iii) Danda: If Danda is preceeded by a Danda then convert to Double Danda
1077 *
1078 */
1079
1080 static void
UConverter_toUnicode_ISCII_OFFSETS_LOGIC(UConverterToUnicodeArgs * args,UErrorCode * err)1081 UConverter_toUnicode_ISCII_OFFSETS_LOGIC(UConverterToUnicodeArgs *args,
1082 UErrorCode* err){
1083 const char *source = ( char *) args->source;
1084 UChar *target = args->target;
1085 const char *sourceLimit = args->sourceLimit;
1086 const UChar* targetLimit = args->targetLimit;
1087 uint32_t targetUniChar = 0x0000;
1088 uint8_t sourceChar = 0x0000;
1089 UConverterDataISCII* data;
1090 UChar32* toUnicodeStatus=NULL;
1091 UChar* contextCharToUnicode = NULL;
1092 UBool found;
1093 int i;
1094
1095 if ((args->converter == NULL) || (target < args->target) || (source < args->source)){
1096 *err = U_ILLEGAL_ARGUMENT_ERROR;
1097 return;
1098 }
1099
1100 data = (UConverterDataISCII*)(args->converter->extraInfo);
1101 contextCharToUnicode = &data->contextCharToUnicode; /* contains previous ISCII codepoint visited */
1102 toUnicodeStatus = (UChar32*)&args->converter->toUnicodeStatus;/* contains the mapping to Unicode of the above codepoint*/
1103
1104 while(source<sourceLimit){
1105
1106 targetUniChar = missingCharMarker;
1107
1108 if(target < targetLimit){
1109 sourceChar = (unsigned char)*(source)++;
1110
1111 /* look at the post-context preform special processing */
1112 if(*contextCharToUnicode==ATR){
1113
1114 /* If we have ATR in *contextCharToUnicode then we need to change our
1115 * state to the Indic Script specified by sourceChar
1116 */
1117
1118 /* check if the sourceChar is supported script range*/
1119 if((uint8_t)(PNJ-sourceChar)<=PNJ-DEV){
1120 data->currentDeltaToUnicode =
1121 (uint16_t)(lookupTable[sourceChar & 0x0F][0] * DELTA);
1122 data->currentMaskToUnicode =
1123 (MaskEnum)lookupTable[sourceChar & 0x0F][1] ;
1124 }
1125 else if(sourceChar==DEF){
1126 /* switch back to default */
1127 data->currentDeltaToUnicode = data->defDeltaToUnicode;
1128 data->currentMaskToUnicode = data->defMaskToUnicode;
1129 }else{
1130 if((sourceChar >= 0x21 && sourceChar <= 0x3F)){
1131 /* these are display codes consume and continue */
1132 }else{
1133 *err =U_ILLEGAL_CHAR_FOUND;
1134 /* reset */
1135 *contextCharToUnicode=NO_CHAR_MARKER;
1136 goto CALLBACK;
1137 }
1138 }
1139
1140 /* reset */
1141 *contextCharToUnicode=NO_CHAR_MARKER;
1142
1143 continue;
1144
1145 }else if(*contextCharToUnicode==EXT){
1146 /* check if sourceChar is in 0xA1-0xEE range */
1147 if((uint8_t) (EXT_RANGE_END - sourceChar) <= (EXT_RANGE_END - EXT_RANGE_BEGIN)){
1148 /* We currently support only Anudatta and Devanagari abbreviation sign */
1149 if(sourceChar==0xBF || sourceChar == 0xB8){
1150 targetUniChar = (sourceChar==0xBF) ? DEV_ABBR_SIGN : DEV_ANUDATTA;
1151
1152 /* find out if the mapping is valid in this state */
1153 if(validityTable[(uint8_t)targetUniChar] & data->currentMaskToUnicode){
1154
1155 *contextCharToUnicode= NO_CHAR_MARKER;
1156
1157 /* write to target */
1158 WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source-args->source -2),
1159 targetUniChar,data->currentDeltaToUnicode,err);
1160
1161 continue;
1162 }
1163 }
1164 /* byte unit is unassigned */
1165 targetUniChar = missingCharMarker;
1166 *err= U_INVALID_CHAR_FOUND;
1167 }else{
1168 /* only 0xA1 - 0xEE are legal after EXT char */
1169 *contextCharToUnicode= NO_CHAR_MARKER;
1170 *err = U_ILLEGAL_CHAR_FOUND;
1171 }
1172 goto CALLBACK;
1173 }else if(*contextCharToUnicode==ISCII_INV){
1174 if(sourceChar==ISCII_HALANT){
1175 targetUniChar = 0x0020; /* replace with space accoding to Indic FAQ */
1176 }else{
1177 targetUniChar = ZWJ;
1178 }
1179
1180 /* write to target */
1181 WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source-args->source -2),
1182 targetUniChar,data->currentDeltaToUnicode,err);
1183 /* reset */
1184 *contextCharToUnicode=NO_CHAR_MARKER;
1185 }
1186
1187 /* look at the pre-context and perform special processing */
1188 switch(sourceChar){
1189 case ISCII_INV:
1190 case EXT: /*falls through*/
1191 case ATR:
1192 *contextCharToUnicode = (UChar)sourceChar;
1193
1194 if(*toUnicodeStatus != missingCharMarker){
1195
1196 WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source-args->source -2),
1197 *toUnicodeStatus,data->currentDeltaToUnicode,err);
1198 *toUnicodeStatus = missingCharMarker;
1199 }
1200 continue;
1201 case ISCII_DANDA:
1202 /* handle double danda*/
1203 if(*contextCharToUnicode== ISCII_DANDA){
1204 targetUniChar = DOUBLE_DANDA;
1205 /* clear the context */
1206 *contextCharToUnicode = NO_CHAR_MARKER;
1207 *toUnicodeStatus = missingCharMarker;
1208 }else{
1209 GET_MAPPING(sourceChar,targetUniChar,data);
1210 *contextCharToUnicode = sourceChar;
1211 }
1212 break;
1213 case ISCII_HALANT:
1214 /* handle explicit halant */
1215 if(*contextCharToUnicode == ISCII_HALANT){
1216 targetUniChar = ZWNJ;
1217 /* clear the context */
1218 *contextCharToUnicode = NO_CHAR_MARKER;
1219 }else{
1220 GET_MAPPING(sourceChar,targetUniChar,data);
1221 *contextCharToUnicode = sourceChar;
1222 }
1223 break;
1224 case 0x0A:
1225 /* fall through */
1226 case 0x0D:
1227 data->resetToDefaultToUnicode = TRUE;
1228 GET_MAPPING(sourceChar,targetUniChar,data);
1229 *contextCharToUnicode = sourceChar;
1230 break;
1231
1232 case ISCII_VOWEL_SIGN_E:
1233 i=1;
1234 found=FALSE;
1235 for( ;i<vowelSignESpecialCases[0][0];i++){
1236 if(vowelSignESpecialCases[i][0]==(uint8_t)*contextCharToUnicode){
1237 targetUniChar=vowelSignESpecialCases[i][1];
1238 found=TRUE;
1239 break;
1240 }
1241 }
1242 if(found) {
1243 /* find out if the mapping is valid in this state */
1244 if(validityTable[(uint8_t)targetUniChar] & data->currentMaskToUnicode){
1245 /*targetUniChar += data->currentDeltaToUnicode ;*/
1246 *contextCharToUnicode= NO_CHAR_MARKER;
1247 *toUnicodeStatus = missingCharMarker;
1248 break;
1249 }
1250 }
1251 GET_MAPPING(sourceChar,targetUniChar,data);
1252 *contextCharToUnicode = sourceChar;
1253 break;
1254
1255 case ISCII_NUKTA:
1256 /* handle soft halant */
1257 if(*contextCharToUnicode == ISCII_HALANT){
1258 targetUniChar = ZWJ;
1259 /* clear the context */
1260 *contextCharToUnicode = NO_CHAR_MARKER;
1261 break;
1262 }else{
1263 /* try to handle <CHAR> + ISCII_NUKTA special mappings */
1264 i=1;
1265 found =FALSE;
1266 for( ;i<nuktaSpecialCases[0][0];i++){
1267 if(nuktaSpecialCases[i][0]==(uint8_t)*contextCharToUnicode){
1268 targetUniChar=nuktaSpecialCases[i][1];
1269 found =TRUE;
1270 break;
1271 }
1272 }
1273 if(found){
1274 /* find out if the mapping is valid in this state */
1275 if(validityTable[(uint8_t)targetUniChar] & data->currentMaskToUnicode){
1276 /*targetUniChar += data->currentDeltaToUnicode ;*/
1277 *contextCharToUnicode= NO_CHAR_MARKER;
1278 *toUnicodeStatus = missingCharMarker;
1279 break;
1280 }
1281 /* else fall through to default */
1282 }
1283 /* else fall through to default */
1284 }
1285 default:
1286 GET_MAPPING(sourceChar,targetUniChar,data);
1287 *contextCharToUnicode = sourceChar;
1288 break;
1289 }
1290
1291
1292 if(*toUnicodeStatus != missingCharMarker){
1293 /* write the previously mapped codepoint */
1294 WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source-args->source -2),
1295 *toUnicodeStatus,data->currentDeltaToUnicode,err);
1296 *toUnicodeStatus = missingCharMarker;
1297 }
1298
1299
1300 if(targetUniChar != missingCharMarker ){
1301 /* now save the targetUniChar for delayed write */
1302 *toUnicodeStatus = (UChar) targetUniChar;
1303 if(data->resetToDefaultToUnicode==TRUE){
1304 data->currentDeltaToUnicode = data->defDeltaToUnicode;
1305 data->currentMaskToUnicode = data->defMaskToUnicode;
1306 data->resetToDefaultToUnicode=FALSE;
1307 }
1308 }else{
1309
1310 /* we reach here only if targetUniChar == missingCharMarker
1311 * so assign codes to reason and err
1312 */
1313 *err = U_INVALID_CHAR_FOUND;
1314 CALLBACK:
1315 args->converter->toUBytes[0] = (uint8_t) sourceChar;
1316 args->converter->toULength = 1;
1317 break;
1318 }
1319
1320 }
1321 else{
1322 *err =U_BUFFER_OVERFLOW_ERROR;
1323 break;
1324 }
1325 }
1326
1327 if(U_SUCCESS(*err) && args->flush && source == sourceLimit) {
1328 /* end of the input stream */
1329 UConverter *cnv = args->converter;
1330
1331 if(*contextCharToUnicode==ATR || *contextCharToUnicode==EXT || *contextCharToUnicode==ISCII_INV){
1332 /* set toUBytes[] */
1333 cnv->toUBytes[0] = (uint8_t)*contextCharToUnicode;
1334 cnv->toULength = 1;
1335
1336 /* avoid looping on truncated sequences */
1337 *contextCharToUnicode = NO_CHAR_MARKER;
1338 }else{
1339 cnv->toULength = 0;
1340 }
1341
1342 if(*toUnicodeStatus != missingCharMarker) {
1343 /* output a remaining target character */
1344 WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source - args->source -1),
1345 *toUnicodeStatus,data->currentDeltaToUnicode,err);
1346 *toUnicodeStatus = missingCharMarker;
1347 }
1348 }
1349
1350 args->target = target;
1351 args->source = source;
1352 }
1353
1354 /* structure for SafeClone calculations */
1355 struct cloneISCIIStruct
1356 {
1357 UConverter cnv;
1358 UConverterDataISCII mydata;
1359 };
1360
1361
1362 static UConverter *
_ISCII_SafeClone(const UConverter * cnv,void * stackBuffer,int32_t * pBufferSize,UErrorCode * status)1363 _ISCII_SafeClone(const UConverter *cnv,
1364 void *stackBuffer,
1365 int32_t *pBufferSize,
1366 UErrorCode *status)
1367 {
1368 struct cloneISCIIStruct * localClone;
1369 int32_t bufferSizeNeeded = sizeof(struct cloneISCIIStruct);
1370
1371 if (U_FAILURE(*status)){
1372 return 0;
1373 }
1374
1375 if (*pBufferSize == 0){ /* 'preflighting' request - set needed size into *pBufferSize */
1376 *pBufferSize = bufferSizeNeeded;
1377 return 0;
1378 }
1379
1380 localClone = (struct cloneISCIIStruct *)stackBuffer;
1381 /* ucnv.c/ucnv_safeClone() copied the main UConverter already */
1382
1383 uprv_memcpy(&localClone->mydata, cnv->extraInfo, sizeof(UConverterDataISCII));
1384 localClone->cnv.extraInfo = &localClone->mydata;
1385 localClone->cnv.isExtraLocal = TRUE;
1386
1387 return &localClone->cnv;
1388 }
1389
1390 static void
_ISCIIGetUnicodeSet(const UConverter * cnv,const USetAdder * sa,UConverterUnicodeSet which,UErrorCode * pErrorCode)1391 _ISCIIGetUnicodeSet(const UConverter *cnv,
1392 const USetAdder *sa,
1393 UConverterUnicodeSet which,
1394 UErrorCode *pErrorCode)
1395 {
1396 int32_t idx, script;
1397 uint8_t mask;
1398
1399 /* Since all ISCII versions allow switching to other ISCII
1400 scripts, we add all roundtrippable characters to this set. */
1401 sa->addRange(sa->set, 0, ASCII_END);
1402 for (script = DEVANAGARI; script <= MALAYALAM; script++) {
1403 mask = (uint8_t)(lookupInitialData[script].maskEnum);
1404 for (idx = 0; idx < DELTA; idx++) {
1405 if (validityTable[idx] & mask) {
1406 sa->add(sa->set, idx + (script * DELTA) + INDIC_BLOCK_BEGIN);
1407 }
1408 }
1409 }
1410 sa->add(sa->set, DANDA);
1411 sa->add(sa->set, DOUBLE_DANDA);
1412 sa->add(sa->set, ZWNJ);
1413 sa->add(sa->set, ZWJ);
1414 }
1415
1416 static const UConverterImpl _ISCIIImpl={
1417
1418 UCNV_ISCII,
1419
1420 NULL,
1421 NULL,
1422
1423 _ISCIIOpen,
1424 _ISCIIClose,
1425 _ISCIIReset,
1426
1427 UConverter_toUnicode_ISCII_OFFSETS_LOGIC,
1428 UConverter_toUnicode_ISCII_OFFSETS_LOGIC,
1429 UConverter_fromUnicode_ISCII_OFFSETS_LOGIC,
1430 UConverter_fromUnicode_ISCII_OFFSETS_LOGIC,
1431 NULL,
1432
1433 NULL,
1434 _ISCIIgetName,
1435 NULL,
1436 _ISCII_SafeClone,
1437 _ISCIIGetUnicodeSet
1438 };
1439
1440 static const UConverterStaticData _ISCIIStaticData={
1441 sizeof(UConverterStaticData),
1442 "ISCII",
1443 0,
1444 UCNV_IBM,
1445 UCNV_ISCII,
1446 1,
1447 4,
1448 { 0x1a, 0, 0, 0 },
1449 0x1,
1450 FALSE,
1451 FALSE,
1452 0x0,
1453 0x0,
1454 { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 }, /* reserved */
1455
1456 };
1457
1458 const UConverterSharedData _ISCIIData={
1459 sizeof(UConverterSharedData),
1460 ~((uint32_t) 0),
1461 NULL,
1462 NULL,
1463 &_ISCIIStaticData,
1464 FALSE,
1465 &_ISCIIImpl,
1466 0
1467 };
1468
1469 #endif /* #if !UCONFIG_NO_LEGACY_CONVERSION */
1470