• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2 **********************************************************************
3 *   Copyright (C) 2000-2007, International Business Machines
4 *   Corporation and others.  All Rights Reserved.
5 **********************************************************************
6 *   file name:  ucnvisci.c
7 *   encoding:   US-ASCII
8 *   tab size:   8 (not used)
9 *   indentation:4
10 *
11 *   created on: 2001JUN26
12 *   created by: Ram Viswanadha
13 *
14 *   Date        Name        Description
15 *   24/7/2001   Ram         Added support for EXT character handling
16 */
17 
18 #include "unicode/utypes.h"
19 
20 #if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION
21 
22 #include "cmemory.h"
23 #include "ucnv_bld.h"
24 #include "unicode/ucnv.h"
25 #include "ucnv_cnv.h"
26 #include "unicode/ucnv_cb.h"
27 #include "unicode/uset.h"
28 #include "cstring.h"
29 
30 #define UCNV_OPTIONS_VERSION_MASK 0xf
31 #define NUKTA               0x093c
32 #define HALANT              0x094d
33 #define ZWNJ                0x200c /* Zero Width Non Joiner */
34 #define ZWJ                 0x200d /* Zero width Joiner */
35 #define INVALID_CHAR        0xffff
36 #define ATR                 0xEF   /* Attribute code */
37 #define EXT                 0xF0   /* Extension code */
38 #define DANDA               0x0964
39 #define DOUBLE_DANDA        0x0965
40 #define ISCII_NUKTA         0xE9
41 #define ISCII_HALANT        0xE8
42 #define ISCII_DANDA         0xEA
43 #define ISCII_INV           0xD9
44 #define ISCII_VOWEL_SIGN_E  0xE0
45 #define INDIC_BLOCK_BEGIN   0x0900
46 #define INDIC_BLOCK_END     0x0D7F
47 #define INDIC_RANGE         (INDIC_BLOCK_END - INDIC_BLOCK_BEGIN)
48 #define VOCALLIC_RR         0x0931
49 #define LF                  0x0A
50 #define ASCII_END           0xA0
51 #define NO_CHAR_MARKER      0xFFFE
52 #define TELUGU_DELTA        DELTA * TELUGU
53 #define DEV_ABBR_SIGN       0x0970
54 #define DEV_ANUDATTA        0x0952
55 #define EXT_RANGE_BEGIN     0xA1
56 #define EXT_RANGE_END       0xEE
57 
58 
59 typedef enum  {
60     DEVANAGARI =0,
61     BENGALI,
62     GURMUKHI,
63     GUJARATI,
64     ORIYA,
65     TAMIL,
66     TELUGU,
67     KANNADA,
68     MALAYALAM,
69     DELTA=0x80
70 }UniLang;
71 
72 
73 /**
74  * Enumeration for switching code pages if <ATR>+<one of below values>
75  * is encountered
76  */
77 typedef enum {
78     DEF = 0x40,
79     RMN = 0x41,
80     DEV = 0x42,
81     BNG = 0x43,
82     TML = 0x44,
83     TLG = 0x45,
84     ASM = 0x46,
85     ORI = 0x47,
86     KND = 0x48,
87     MLM = 0x49,
88     GJR = 0x4A,
89     PNJ = 0x4B,
90     ARB = 0x71,
91     PES = 0x72,
92     URD = 0x73,
93     SND = 0x74,
94     KSM = 0x75,
95     PST = 0x76
96 }ISCIILang;
97 
98 typedef enum{
99     DEV_MASK =0x80,
100     PNJ_MASK =0x40,
101     GJR_MASK =0x20,
102     ORI_MASK =0x10,
103     BNG_MASK =0x08,
104     KND_MASK =0x04,
105     MLM_MASK =0x02,
106     TML_MASK =0x01,
107     ZERO     =0x00
108 }MaskEnum;
109 
110 #define ISCII_CNV_PREFIX "ISCII,version="
111 
112 typedef struct{
113     UChar contextCharToUnicode;      /* previous Unicode codepoint for contextual analysis */
114     UChar contextCharFromUnicode;    /* previous Unicode codepoint for contextual analysis */
115     uint16_t defDeltaToUnicode;      /* delta for switching to default state when DEF is encountered  */
116     uint16_t currentDeltaFromUnicode;/* current delta in Indic block */
117     uint16_t currentDeltaToUnicode;  /* current delta in Indic block */
118     MaskEnum currentMaskFromUnicode; /* mask for current state in toUnicode */
119     MaskEnum currentMaskToUnicode;   /* mask for current state in toUnicode */
120     MaskEnum defMaskToUnicode;       /* mask for default state in toUnicode */
121     UBool isFirstBuffer;             /* boolean for fromUnicode to see if we need to announce the first script */
122     UBool resetToDefaultToUnicode;   /* boolean for reseting to default delta and mask when a newline is encountered*/
123     char name[sizeof(ISCII_CNV_PREFIX) + 1];
124 }UConverterDataISCII;
125 
126 typedef struct LookupDataStruct
127 {
128     UniLang uniLang;
129     MaskEnum maskEnum;
130     ISCIILang isciiLang;
131 } LookupDataStruct;
132 
133 static const LookupDataStruct lookupInitialData[]={
134     { DEVANAGARI, DEV_MASK,  DEV },
135     { BENGALI,    BNG_MASK,  BNG },
136     { GURMUKHI,   PNJ_MASK,  PNJ },
137     { GUJARATI,   GJR_MASK,  GJR },
138     { ORIYA,      ORI_MASK,  ORI },
139     { TAMIL,      TML_MASK,  TML },
140     { TELUGU,     KND_MASK,  TLG },
141     { KANNADA,    KND_MASK,  KND },
142     { MALAYALAM,  MLM_MASK,  MLM }
143 };
144 
145 static void
_ISCIIOpen(UConverter * cnv,const char * name,const char * locale,uint32_t options,UErrorCode * errorCode)146 _ISCIIOpen(UConverter *cnv, const char *name,const char *locale,uint32_t options, UErrorCode *errorCode){
147     cnv->extraInfo = uprv_malloc (sizeof (UConverterDataISCII));
148 
149     if(cnv->extraInfo != NULL) {
150         int32_t len=0;
151         UConverterDataISCII *converterData=(UConverterDataISCII *) cnv->extraInfo;
152         converterData->contextCharToUnicode=NO_CHAR_MARKER;
153         cnv->toUnicodeStatus = missingCharMarker;
154         converterData->contextCharFromUnicode=0x0000;
155         converterData->resetToDefaultToUnicode=FALSE;
156         /* check if the version requested is supported */
157         if((options & UCNV_OPTIONS_VERSION_MASK) < 9){
158             /* initialize state variables */
159             converterData->currentDeltaFromUnicode=converterData->currentDeltaToUnicode=
160             converterData->defDeltaToUnicode=
161                     (uint16_t)(lookupInitialData[options & UCNV_OPTIONS_VERSION_MASK].uniLang * DELTA);
162 
163             converterData->currentMaskFromUnicode = converterData->currentMaskToUnicode =
164             converterData->defMaskToUnicode=lookupInitialData[options & UCNV_OPTIONS_VERSION_MASK].maskEnum;
165 
166             converterData->isFirstBuffer=TRUE;
167             (void)uprv_strcpy(converterData->name, ISCII_CNV_PREFIX);
168             len = (int32_t)uprv_strlen(converterData->name);
169             converterData->name[len]= (char)((options & UCNV_OPTIONS_VERSION_MASK) + '0');
170             converterData->name[len+1]=0;
171         }else{
172             uprv_free(cnv->extraInfo);
173             cnv->extraInfo = NULL;
174             *errorCode = U_ILLEGAL_ARGUMENT_ERROR;
175         }
176 
177     }else{
178         *errorCode =U_MEMORY_ALLOCATION_ERROR;
179     }
180 }
181 static void
_ISCIIClose(UConverter * cnv)182 _ISCIIClose(UConverter *cnv){
183     if(cnv->extraInfo!=NULL) {
184         if(!cnv->isExtraLocal) {
185             uprv_free(cnv->extraInfo);
186         }
187         cnv->extraInfo=NULL;
188     }
189 }
190 
191 static const char*
_ISCIIgetName(const UConverter * cnv)192 _ISCIIgetName(const UConverter* cnv){
193     if(cnv->extraInfo){
194         UConverterDataISCII* myData= (UConverterDataISCII*)cnv->extraInfo;
195         return myData->name;
196     }
197     return NULL;
198 }
199 
200 static void
_ISCIIReset(UConverter * cnv,UConverterResetChoice choice)201 _ISCIIReset(UConverter *cnv, UConverterResetChoice choice){
202     UConverterDataISCII* data =(UConverterDataISCII *) (cnv->extraInfo);
203     if(choice<=UCNV_RESET_TO_UNICODE) {
204         cnv->toUnicodeStatus = missingCharMarker;
205         cnv->mode=0;
206         data->currentDeltaToUnicode=data->defDeltaToUnicode;
207         data->currentMaskToUnicode = data->defMaskToUnicode;
208         data->contextCharToUnicode=NO_CHAR_MARKER;
209     }
210     if(choice!=UCNV_RESET_TO_UNICODE) {
211         cnv->fromUChar32=0x0000;
212         data->contextCharFromUnicode=0x00;
213         data->currentMaskFromUnicode=data->defMaskToUnicode;
214         data->currentDeltaFromUnicode=data->defDeltaToUnicode;
215         data->isFirstBuffer=TRUE;
216         data->resetToDefaultToUnicode=FALSE;
217     }
218 }
219 
220 /**
221  * The values in validity table are indexed by the lower bits of Unicode
222  * range 0x0900 - 0x09ff. The values have a structure like:
223  *       ---------------------------------------------------------------
224  *      | DEV   | PNJ   | GJR   | ORI   | BNG   | TLG   | MLM   | TML   |
225  *      |       |       |       |       | ASM   | KND   |       |       |
226  *       ---------------------------------------------------------------
227  * If a code point is valid in a particular script
228  * then that bit is turned on
229  *
230  * Unicode does not distinguish between Bengali and Assamese so we use 1 bit for
231  * to represent these languages
232  *
233  * Telugu and Kannada have same codepoints except for Vocallic_RR which we special case
234  * and combine and use 1 bit to represent these languages.
235  *
236  * TODO: It is probably easier to understand and maintain to change this
237  * to use uint16_t and give each of the 9 Unicode/script blocks its own bit.
238  */
239 
240 static const uint8_t validityTable[128] = {
241 /* This state table is tool generated please do not edit unless you know exactly what you are doing */
242 /* Note: This table was edited to mirror the Windows XP implementation */
243 /*ISCII:Valid:Unicode */
244 /*0xa0 : 0x00: 0x900  */ ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     ,
245 /*0xa1 : 0xb8: 0x901  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + ZERO     + ZERO     + ZERO     ,
246 /*0xa2 : 0xfe: 0x902  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
247 /*0xa3 : 0xbf: 0x903  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
248 /*0x00 : 0x00: 0x904  */ DEV_MASK + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     ,
249 /*0xa4 : 0xff: 0x905  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
250 /*0xa5 : 0xff: 0x906  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
251 /*0xa6 : 0xff: 0x907  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
252 /*0xa7 : 0xff: 0x908  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
253 /*0xa8 : 0xff: 0x909  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
254 /*0xa9 : 0xff: 0x90a  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
255 /*0xaa : 0xfe: 0x90b  */ DEV_MASK + ZERO     + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO     ,
256 /*0x00 : 0x00: 0x90c  */ DEV_MASK + ZERO     + ZERO     + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO     ,
257 /*0xae : 0x80: 0x90d  */ DEV_MASK + ZERO     + GJR_MASK + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     ,
258 /*0xab : 0x87: 0x90e  */ DEV_MASK + ZERO     + ZERO     + ZERO     + ZERO     + KND_MASK + MLM_MASK + TML_MASK ,
259 /*0xac : 0xff: 0x90f  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
260 /*0xad : 0xff: 0x910  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
261 /*0xb2 : 0x80: 0x911  */ DEV_MASK + ZERO     + GJR_MASK + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     ,
262 /*0xaf : 0x87: 0x912  */ DEV_MASK + ZERO     + ZERO     + ZERO     + ZERO     + KND_MASK + MLM_MASK + TML_MASK ,
263 /*0xb0 : 0xff: 0x913  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
264 /*0xb1 : 0xff: 0x914  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
265 /*0xb3 : 0xff: 0x915  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
266 /*0xb4 : 0xfe: 0x916  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO     ,
267 /*0xb5 : 0xfe: 0x917  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO     ,
268 /*0xb6 : 0xfe: 0x918  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO     ,
269 /*0xb7 : 0xff: 0x919  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
270 /*0xb8 : 0xff: 0x91a  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
271 /*0xb9 : 0xfe: 0x91b  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO     ,
272 /*0xba : 0xff: 0x91c  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
273 /*0xbb : 0xfe: 0x91d  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO     ,
274 /*0xbc : 0xff: 0x91e  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
275 /*0xbd : 0xff: 0x91f  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
276 /*0xbe : 0xfe: 0x920  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO     ,
277 /*0xbf : 0xfe: 0x921  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO     ,
278 /*0xc0 : 0xfe: 0x922  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO     ,
279 /*0xc1 : 0xff: 0x923  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
280 /*0xc2 : 0xff: 0x924  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
281 /*0xc3 : 0xfe: 0x925  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO     ,
282 /*0xc4 : 0xfe: 0x926  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO     ,
283 /*0xc5 : 0xfe: 0x927  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO     ,
284 /*0xc6 : 0xff: 0x928  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
285 /*0xc7 : 0x81: 0x929  */ DEV_MASK + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + TML_MASK ,
286 /*0xc8 : 0xff: 0x92a  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
287 /*0xc9 : 0xfe: 0x92b  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO     ,
288 /*0xca : 0xfe: 0x92c  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO     ,
289 /*0xcb : 0xfe: 0x92d  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO     ,
290 /*0xcc : 0xfe: 0x92e  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
291 /*0xcd : 0xff: 0x92f  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
292 /*0xcf : 0xff: 0x930  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
293 /*0xd0 : 0x87: 0x931  */ DEV_MASK + ZERO     + ZERO     + ZERO     + ZERO     + KND_MASK + MLM_MASK + TML_MASK ,
294 /*0xd1 : 0xff: 0x932  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
295 /*0xd2 : 0xb7: 0x933  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + ZERO     + KND_MASK + MLM_MASK + TML_MASK ,
296 /*0xd3 : 0x83: 0x934  */ DEV_MASK + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + MLM_MASK + TML_MASK ,
297 /*0xd4 : 0xff: 0x935  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + ZERO     + KND_MASK + MLM_MASK + TML_MASK ,
298 /*0xd5 : 0xfe: 0x936  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO     ,
299 /*0xd6 : 0xbf: 0x937  */ DEV_MASK + ZERO     + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
300 /*0xd7 : 0xff: 0x938  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
301 /*0xd8 : 0xff: 0x939  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
302 /*0x00 : 0x00: 0x93A  */ ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     ,
303 /*0x00 : 0x00: 0x93B  */ ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     ,
304 /*0xe9 : 0xda: 0x93c  */ DEV_MASK + PNJ_MASK + ZERO     + ORI_MASK + BNG_MASK + ZERO     + ZERO     + ZERO     ,
305 /*0x00 : 0x00: 0x93d  */ DEV_MASK + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     ,
306 /*0xda : 0xff: 0x93e  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
307 /*0xdb : 0xff: 0x93f  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
308 /*0xdc : 0xff: 0x940  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
309 /*0xdd : 0xff: 0x941  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
310 /*0xde : 0xff: 0x942  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
311 /*0xdf : 0xbe: 0x943  */ DEV_MASK + ZERO     + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO     ,
312 /*0x00 : 0x00: 0x944  */ DEV_MASK + ZERO     + GJR_MASK + ZERO     + BNG_MASK + KND_MASK + ZERO     + ZERO     ,
313 /*0xe3 : 0x80: 0x945  */ DEV_MASK + ZERO     + GJR_MASK + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     ,
314 /*0xe0 : 0x87: 0x946  */ DEV_MASK + ZERO     + ZERO     + ZERO     + ZERO     + KND_MASK + MLM_MASK + TML_MASK ,
315 /*0xe1 : 0xff: 0x947  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
316 /*0xe2 : 0xff: 0x948  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
317 /*0xe7 : 0x80: 0x949  */ DEV_MASK + ZERO     + GJR_MASK + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     ,
318 /*0xe4 : 0x87: 0x94a  */ DEV_MASK + ZERO     + ZERO     + ZERO     + ZERO     + KND_MASK + MLM_MASK + TML_MASK ,
319 /*0xe5 : 0xff: 0x94b  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
320 /*0xe6 : 0xff: 0x94c  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
321 /*0xe8 : 0xff: 0x94d  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
322 /*0xec : 0x00: 0x94e  */ ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     ,
323 /*0xed : 0x00: 0x94f  */ ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     ,
324 /*0x00 : 0x00: 0x950  */ DEV_MASK + ZERO     + GJR_MASK + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     ,
325 /*0x00 : 0x00: 0x951  */ DEV_MASK + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     ,
326 /*0x00 : 0x00: 0x952  */ DEV_MASK + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     ,
327 /*0x00 : 0x00: 0x953  */ DEV_MASK + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     ,
328 /*0x00 : 0x00: 0x954  */ DEV_MASK + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     ,
329 /*0x00 : 0x00: 0x955  */ ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + KND_MASK + ZERO     + ZERO     ,
330 /*0x00 : 0x00: 0x956  */ ZERO     + ZERO     + ZERO     + ORI_MASK + ZERO     + KND_MASK + ZERO     + ZERO     ,
331 /*0x00 : 0x00: 0x957  */ ZERO     + ZERO     + ZERO     + ORI_MASK + BNG_MASK + ZERO     + MLM_MASK + ZERO     ,
332 /*0x00 : 0x00: 0x958  */ DEV_MASK + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     ,
333 /*0x00 : 0x00: 0x959  */ DEV_MASK + PNJ_MASK + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     ,
334 /*0x00 : 0x00: 0x95a  */ DEV_MASK + PNJ_MASK + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     ,
335 /*0x00 : 0x00: 0x95b  */ DEV_MASK + PNJ_MASK + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     ,
336 /*0x00 : 0x00: 0x95c  */ DEV_MASK + PNJ_MASK + ZERO     + ZERO     + BNG_MASK + ZERO     + ZERO     + ZERO     ,
337 /*0x00 : 0x00: 0x95d  */ DEV_MASK + ZERO     + ZERO     + ORI_MASK + BNG_MASK + ZERO     + ZERO     + ZERO     ,
338 /*0x00 : 0x00: 0x95e  */ DEV_MASK + PNJ_MASK + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     ,
339 /*0xce : 0x98: 0x95f  */ DEV_MASK + ZERO     + ZERO     + ORI_MASK + BNG_MASK + ZERO     + ZERO     + ZERO     ,
340 /*0x00 : 0x00: 0x960  */ DEV_MASK + ZERO     + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO     ,
341 /*0x00 : 0x00: 0x961  */ DEV_MASK + ZERO     + ZERO     + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO     ,
342 /*0x00 : 0x00: 0x962  */ DEV_MASK + ZERO     + ZERO     + ZERO     + BNG_MASK + ZERO     + ZERO     + ZERO     ,
343 /*0x00 : 0x00: 0x963  */ DEV_MASK + ZERO     + ZERO     + ZERO     + BNG_MASK + ZERO     + ZERO     + ZERO     ,
344 /*0xea : 0xf8: 0x964  */ DEV_MASK + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     ,
345 /*0xeaea : 0x00: 0x965*/ DEV_MASK + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     ,
346 /*0xf1 : 0xff: 0x966  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
347 /*0xf2 : 0xff: 0x967  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
348 /*0xf3 : 0xff: 0x968  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
349 /*0xf4 : 0xff: 0x969  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
350 /*0xf5 : 0xff: 0x96a  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
351 /*0xf6 : 0xff: 0x96b  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
352 /*0xf7 : 0xff: 0x96c  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
353 /*0xf8 : 0xff: 0x96d  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
354 /*0xf9 : 0xff: 0x96e  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
355 /*0xfa : 0xff: 0x96f  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
356 /*0x00 : 0x80: 0x970  */ DEV_MASK + PNJ_MASK + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     ,
357 
358 /*
359  * The length of the array is 128 to provide values for 0x900..0x97f.
360  * The last 15 entries for 0x971..0x97f of the validity table are all zero
361  * because no Indic script uses such Unicode code points.
362  */
363 /*0x00 : 0x00: 0x9yz  */ ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO
364 };
365 
366 static const uint16_t fromUnicodeTable[128]={
367     0x00a0 ,/* 0x0900 */
368     0x00a1 ,/* 0x0901 */
369     0x00a2 ,/* 0x0902 */
370     0x00a3 ,/* 0x0903 */
371     0xa4e0 ,/* 0x0904 */
372     0x00a4 ,/* 0x0905 */
373     0x00a5 ,/* 0x0906 */
374     0x00a6 ,/* 0x0907 */
375     0x00a7 ,/* 0x0908 */
376     0x00a8 ,/* 0x0909 */
377     0x00a9 ,/* 0x090a */
378     0x00aa ,/* 0x090b */
379     0xA6E9 ,/* 0x090c */
380     0x00ae ,/* 0x090d */
381     0x00ab ,/* 0x090e */
382     0x00ac ,/* 0x090f */
383     0x00ad ,/* 0x0910 */
384     0x00b2 ,/* 0x0911 */
385     0x00af ,/* 0x0912 */
386     0x00b0 ,/* 0x0913 */
387     0x00b1 ,/* 0x0914 */
388     0x00b3 ,/* 0x0915 */
389     0x00b4 ,/* 0x0916 */
390     0x00b5 ,/* 0x0917 */
391     0x00b6 ,/* 0x0918 */
392     0x00b7 ,/* 0x0919 */
393     0x00b8 ,/* 0x091a */
394     0x00b9 ,/* 0x091b */
395     0x00ba ,/* 0x091c */
396     0x00bb ,/* 0x091d */
397     0x00bc ,/* 0x091e */
398     0x00bd ,/* 0x091f */
399     0x00be ,/* 0x0920 */
400     0x00bf ,/* 0x0921 */
401     0x00c0 ,/* 0x0922 */
402     0x00c1 ,/* 0x0923 */
403     0x00c2 ,/* 0x0924 */
404     0x00c3 ,/* 0x0925 */
405     0x00c4 ,/* 0x0926 */
406     0x00c5 ,/* 0x0927 */
407     0x00c6 ,/* 0x0928 */
408     0x00c7 ,/* 0x0929 */
409     0x00c8 ,/* 0x092a */
410     0x00c9 ,/* 0x092b */
411     0x00ca ,/* 0x092c */
412     0x00cb ,/* 0x092d */
413     0x00cc ,/* 0x092e */
414     0x00cd ,/* 0x092f */
415     0x00cf ,/* 0x0930 */
416     0x00d0 ,/* 0x0931 */
417     0x00d1 ,/* 0x0932 */
418     0x00d2 ,/* 0x0933 */
419     0x00d3 ,/* 0x0934 */
420     0x00d4 ,/* 0x0935 */
421     0x00d5 ,/* 0x0936 */
422     0x00d6 ,/* 0x0937 */
423     0x00d7 ,/* 0x0938 */
424     0x00d8 ,/* 0x0939 */
425     0xFFFF ,/* 0x093A */
426     0xFFFF ,/* 0x093B */
427     0x00e9 ,/* 0x093c */
428     0xEAE9 ,/* 0x093d */
429     0x00da ,/* 0x093e */
430     0x00db ,/* 0x093f */
431     0x00dc ,/* 0x0940 */
432     0x00dd ,/* 0x0941 */
433     0x00de ,/* 0x0942 */
434     0x00df ,/* 0x0943 */
435     0xDFE9 ,/* 0x0944 */
436     0x00e3 ,/* 0x0945 */
437     0x00e0 ,/* 0x0946 */
438     0x00e1 ,/* 0x0947 */
439     0x00e2 ,/* 0x0948 */
440     0x00e7 ,/* 0x0949 */
441     0x00e4 ,/* 0x094a */
442     0x00e5 ,/* 0x094b */
443     0x00e6 ,/* 0x094c */
444     0x00e8 ,/* 0x094d */
445     0x00ec ,/* 0x094e */
446     0x00ed ,/* 0x094f */
447     0xA1E9 ,/* 0x0950 */ /* OM Symbol */
448     0xFFFF ,/* 0x0951 */
449     0xF0B8 ,/* 0x0952 */
450     0xFFFF ,/* 0x0953 */
451     0xFFFF ,/* 0x0954 */
452     0xFFFF ,/* 0x0955 */
453     0xFFFF ,/* 0x0956 */
454     0xFFFF ,/* 0x0957 */
455     0xb3e9 ,/* 0x0958 */
456     0xb4e9 ,/* 0x0959 */
457     0xb5e9 ,/* 0x095a */
458     0xbae9 ,/* 0x095b */
459     0xbfe9 ,/* 0x095c */
460     0xC0E9 ,/* 0x095d */
461     0xc9e9 ,/* 0x095e */
462     0x00ce ,/* 0x095f */
463     0xAAe9 ,/* 0x0960 */
464     0xA7E9 ,/* 0x0961 */
465     0xDBE9 ,/* 0x0962 */
466     0xDCE9 ,/* 0x0963 */
467     0x00ea ,/* 0x0964 */
468     0xeaea ,/* 0x0965 */
469     0x00f1 ,/* 0x0966 */
470     0x00f2 ,/* 0x0967 */
471     0x00f3 ,/* 0x0968 */
472     0x00f4 ,/* 0x0969 */
473     0x00f5 ,/* 0x096a */
474     0x00f6 ,/* 0x096b */
475     0x00f7 ,/* 0x096c */
476     0x00f8 ,/* 0x096d */
477     0x00f9 ,/* 0x096e */
478     0x00fa ,/* 0x096f */
479     0xF0BF ,/* 0x0970 */
480     0xFFFF ,/* 0x0971 */
481     0xFFFF ,/* 0x0972 */
482     0xFFFF ,/* 0x0973 */
483     0xFFFF ,/* 0x0974 */
484     0xFFFF ,/* 0x0975 */
485     0xFFFF ,/* 0x0976 */
486     0xFFFF ,/* 0x0977 */
487     0xFFFF ,/* 0x0978 */
488     0xFFFF ,/* 0x0979 */
489     0xFFFF ,/* 0x097a */
490     0xFFFF ,/* 0x097b */
491     0xFFFF ,/* 0x097c */
492     0xFFFF ,/* 0x097d */
493     0xFFFF ,/* 0x097e */
494     0xFFFF ,/* 0x097f */
495 };
496 static const uint16_t toUnicodeTable[256]={
497     0x0000,/* 0x00 */
498     0x0001,/* 0x01 */
499     0x0002,/* 0x02 */
500     0x0003,/* 0x03 */
501     0x0004,/* 0x04 */
502     0x0005,/* 0x05 */
503     0x0006,/* 0x06 */
504     0x0007,/* 0x07 */
505     0x0008,/* 0x08 */
506     0x0009,/* 0x09 */
507     0x000a,/* 0x0a */
508     0x000b,/* 0x0b */
509     0x000c,/* 0x0c */
510     0x000d,/* 0x0d */
511     0x000e,/* 0x0e */
512     0x000f,/* 0x0f */
513     0x0010,/* 0x10 */
514     0x0011,/* 0x11 */
515     0x0012,/* 0x12 */
516     0x0013,/* 0x13 */
517     0x0014,/* 0x14 */
518     0x0015,/* 0x15 */
519     0x0016,/* 0x16 */
520     0x0017,/* 0x17 */
521     0x0018,/* 0x18 */
522     0x0019,/* 0x19 */
523     0x001a,/* 0x1a */
524     0x001b,/* 0x1b */
525     0x001c,/* 0x1c */
526     0x001d,/* 0x1d */
527     0x001e,/* 0x1e */
528     0x001f,/* 0x1f */
529     0x0020,/* 0x20 */
530     0x0021,/* 0x21 */
531     0x0022,/* 0x22 */
532     0x0023,/* 0x23 */
533     0x0024,/* 0x24 */
534     0x0025,/* 0x25 */
535     0x0026,/* 0x26 */
536     0x0027,/* 0x27 */
537     0x0028,/* 0x28 */
538     0x0029,/* 0x29 */
539     0x002a,/* 0x2a */
540     0x002b,/* 0x2b */
541     0x002c,/* 0x2c */
542     0x002d,/* 0x2d */
543     0x002e,/* 0x2e */
544     0x002f,/* 0x2f */
545     0x0030,/* 0x30 */
546     0x0031,/* 0x31 */
547     0x0032,/* 0x32 */
548     0x0033,/* 0x33 */
549     0x0034,/* 0x34 */
550     0x0035,/* 0x35 */
551     0x0036,/* 0x36 */
552     0x0037,/* 0x37 */
553     0x0038,/* 0x38 */
554     0x0039,/* 0x39 */
555     0x003A,/* 0x3A */
556     0x003B,/* 0x3B */
557     0x003c,/* 0x3c */
558     0x003d,/* 0x3d */
559     0x003e,/* 0x3e */
560     0x003f,/* 0x3f */
561     0x0040,/* 0x40 */
562     0x0041,/* 0x41 */
563     0x0042,/* 0x42 */
564     0x0043,/* 0x43 */
565     0x0044,/* 0x44 */
566     0x0045,/* 0x45 */
567     0x0046,/* 0x46 */
568     0x0047,/* 0x47 */
569     0x0048,/* 0x48 */
570     0x0049,/* 0x49 */
571     0x004a,/* 0x4a */
572     0x004b,/* 0x4b */
573     0x004c,/* 0x4c */
574     0x004d,/* 0x4d */
575     0x004e,/* 0x4e */
576     0x004f,/* 0x4f */
577     0x0050,/* 0x50 */
578     0x0051,/* 0x51 */
579     0x0052,/* 0x52 */
580     0x0053,/* 0x53 */
581     0x0054,/* 0x54 */
582     0x0055,/* 0x55 */
583     0x0056,/* 0x56 */
584     0x0057,/* 0x57 */
585     0x0058,/* 0x58 */
586     0x0059,/* 0x59 */
587     0x005a,/* 0x5a */
588     0x005b,/* 0x5b */
589     0x005c,/* 0x5c */
590     0x005d,/* 0x5d */
591     0x005e,/* 0x5e */
592     0x005f,/* 0x5f */
593     0x0060,/* 0x60 */
594     0x0061,/* 0x61 */
595     0x0062,/* 0x62 */
596     0x0063,/* 0x63 */
597     0x0064,/* 0x64 */
598     0x0065,/* 0x65 */
599     0x0066,/* 0x66 */
600     0x0067,/* 0x67 */
601     0x0068,/* 0x68 */
602     0x0069,/* 0x69 */
603     0x006a,/* 0x6a */
604     0x006b,/* 0x6b */
605     0x006c,/* 0x6c */
606     0x006d,/* 0x6d */
607     0x006e,/* 0x6e */
608     0x006f,/* 0x6f */
609     0x0070,/* 0x70 */
610     0x0071,/* 0x71 */
611     0x0072,/* 0x72 */
612     0x0073,/* 0x73 */
613     0x0074,/* 0x74 */
614     0x0075,/* 0x75 */
615     0x0076,/* 0x76 */
616     0x0077,/* 0x77 */
617     0x0078,/* 0x78 */
618     0x0079,/* 0x79 */
619     0x007a,/* 0x7a */
620     0x007b,/* 0x7b */
621     0x007c,/* 0x7c */
622     0x007d,/* 0x7d */
623     0x007e,/* 0x7e */
624     0x007f,/* 0x7f */
625     0x0080,/* 0x80 */
626     0x0081,/* 0x81 */
627     0x0082,/* 0x82 */
628     0x0083,/* 0x83 */
629     0x0084,/* 0x84 */
630     0x0085,/* 0x85 */
631     0x0086,/* 0x86 */
632     0x0087,/* 0x87 */
633     0x0088,/* 0x88 */
634     0x0089,/* 0x89 */
635     0x008a,/* 0x8a */
636     0x008b,/* 0x8b */
637     0x008c,/* 0x8c */
638     0x008d,/* 0x8d */
639     0x008e,/* 0x8e */
640     0x008f,/* 0x8f */
641     0x0090,/* 0x90 */
642     0x0091,/* 0x91 */
643     0x0092,/* 0x92 */
644     0x0093,/* 0x93 */
645     0x0094,/* 0x94 */
646     0x0095,/* 0x95 */
647     0x0096,/* 0x96 */
648     0x0097,/* 0x97 */
649     0x0098,/* 0x98 */
650     0x0099,/* 0x99 */
651     0x009a,/* 0x9a */
652     0x009b,/* 0x9b */
653     0x009c,/* 0x9c */
654     0x009d,/* 0x9d */
655     0x009e,/* 0x9e */
656     0x009f,/* 0x9f */
657     0x00A0,/* 0xa0 */
658     0x0901,/* 0xa1 */
659     0x0902,/* 0xa2 */
660     0x0903,/* 0xa3 */
661     0x0905,/* 0xa4 */
662     0x0906,/* 0xa5 */
663     0x0907,/* 0xa6 */
664     0x0908,/* 0xa7 */
665     0x0909,/* 0xa8 */
666     0x090a,/* 0xa9 */
667     0x090b,/* 0xaa */
668     0x090e,/* 0xab */
669     0x090f,/* 0xac */
670     0x0910,/* 0xad */
671     0x090d,/* 0xae */
672     0x0912,/* 0xaf */
673     0x0913,/* 0xb0 */
674     0x0914,/* 0xb1 */
675     0x0911,/* 0xb2 */
676     0x0915,/* 0xb3 */
677     0x0916,/* 0xb4 */
678     0x0917,/* 0xb5 */
679     0x0918,/* 0xb6 */
680     0x0919,/* 0xb7 */
681     0x091a,/* 0xb8 */
682     0x091b,/* 0xb9 */
683     0x091c,/* 0xba */
684     0x091d,/* 0xbb */
685     0x091e,/* 0xbc */
686     0x091f,/* 0xbd */
687     0x0920,/* 0xbe */
688     0x0921,/* 0xbf */
689     0x0922,/* 0xc0 */
690     0x0923,/* 0xc1 */
691     0x0924,/* 0xc2 */
692     0x0925,/* 0xc3 */
693     0x0926,/* 0xc4 */
694     0x0927,/* 0xc5 */
695     0x0928,/* 0xc6 */
696     0x0929,/* 0xc7 */
697     0x092a,/* 0xc8 */
698     0x092b,/* 0xc9 */
699     0x092c,/* 0xca */
700     0x092d,/* 0xcb */
701     0x092e,/* 0xcc */
702     0x092f,/* 0xcd */
703     0x095f,/* 0xce */
704     0x0930,/* 0xcf */
705     0x0931,/* 0xd0 */
706     0x0932,/* 0xd1 */
707     0x0933,/* 0xd2 */
708     0x0934,/* 0xd3 */
709     0x0935,/* 0xd4 */
710     0x0936,/* 0xd5 */
711     0x0937,/* 0xd6 */
712     0x0938,/* 0xd7 */
713     0x0939,/* 0xd8 */
714     0x200D,/* 0xd9 */
715     0x093e,/* 0xda */
716     0x093f,/* 0xdb */
717     0x0940,/* 0xdc */
718     0x0941,/* 0xdd */
719     0x0942,/* 0xde */
720     0x0943,/* 0xdf */
721     0x0946,/* 0xe0 */
722     0x0947,/* 0xe1 */
723     0x0948,/* 0xe2 */
724     0x0945,/* 0xe3 */
725     0x094a,/* 0xe4 */
726     0x094b,/* 0xe5 */
727     0x094c,/* 0xe6 */
728     0x0949,/* 0xe7 */
729     0x094d,/* 0xe8 */
730     0x093c,/* 0xe9 */
731     0x0964,/* 0xea */
732     0xFFFF,/* 0xeb */
733     0xFFFF,/* 0xec */
734     0xFFFF,/* 0xed */
735     0xFFFF,/* 0xee */
736     0xFFFF,/* 0xef */
737     0xFFFF,/* 0xf0 */
738     0x0966,/* 0xf1 */
739     0x0967,/* 0xf2 */
740     0x0968,/* 0xf3 */
741     0x0969,/* 0xf4 */
742     0x096a,/* 0xf5 */
743     0x096b,/* 0xf6 */
744     0x096c,/* 0xf7 */
745     0x096d,/* 0xf8 */
746     0x096e,/* 0xf9 */
747     0x096f,/* 0xfa */
748     0xFFFF,/* 0xfb */
749     0xFFFF,/* 0xfc */
750     0xFFFF,/* 0xfd */
751     0xFFFF,/* 0xfe */
752     0xFFFF /* 0xff */
753 };
754 
755 static const uint16_t vowelSignESpecialCases[][2]={
756 	{ 2 /*length of array*/    , 0      },
757 	{ 0xA4 , 0x0904 },
758 };
759 
760 static const uint16_t nuktaSpecialCases[][2]={
761     { 16 /*length of array*/   , 0      },
762     { 0xA6 , 0x090c },
763     { 0xEA , 0x093D },
764     { 0xDF , 0x0944 },
765     { 0xA1 , 0x0950 },
766     { 0xb3 , 0x0958 },
767     { 0xb4 , 0x0959 },
768     { 0xb5 , 0x095a },
769     { 0xba , 0x095b },
770     { 0xbf , 0x095c },
771     { 0xC0 , 0x095d },
772     { 0xc9 , 0x095e },
773     { 0xAA , 0x0960 },
774     { 0xA7 , 0x0961 },
775     { 0xDB , 0x0962 },
776     { 0xDC , 0x0963 },
777 };
778 
779 #define WRITE_TO_TARGET_FROM_U(args,offsets,source,target,targetLimit,targetByteUnit,err){       \
780       /* write the targetUniChar  to target */                                                  \
781     if(target <targetLimit){                                                                    \
782         if(targetByteUnit <= 0xFF){                                                             \
783             *(target)++ = (uint8_t)(targetByteUnit);                                            \
784             if(offsets){                                                                        \
785                 *(offsets++) = (int32_t)(source - args->source-1);                              \
786             }                                                                                   \
787         }else{                                                                                  \
788             *(target)++ = (uint8_t)(targetByteUnit>>8);                                         \
789             if(offsets){                                                                        \
790                 *(offsets++) = (int32_t)(source - args->source-1);                              \
791             }                                                                                   \
792             if(target < targetLimit){                                                           \
793                 *(target)++ = (uint8_t)  targetByteUnit;                                        \
794                 if(offsets){                                                                    \
795                     *(offsets++) = (int32_t)(source - args->source-1);                          \
796                 }                                                                               \
797             }else{                                                                              \
798                 args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] =    \
799                             (uint8_t) (targetByteUnit);                                         \
800                 *err = U_BUFFER_OVERFLOW_ERROR;                                                 \
801             }                                                                                   \
802         }                                                                                       \
803     }else{                                                                                      \
804         if(targetByteUnit & 0xFF00){                                                            \
805             args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] =        \
806                         (uint8_t) (targetByteUnit >>8);                                         \
807         }                                                                                       \
808         args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] =            \
809                         (uint8_t) (targetByteUnit);                                             \
810         *err = U_BUFFER_OVERFLOW_ERROR;                                                         \
811     }                                                                                           \
812 }
813 
814 /* Rules:
815  *    Explicit Halant :
816  *                      <HALANT> + <ZWNJ>
817  *    Soft Halant :
818  *                      <HALANT> + <ZWJ>
819  */
820 
821 static void
UConverter_fromUnicode_ISCII_OFFSETS_LOGIC(UConverterFromUnicodeArgs * args,UErrorCode * err)822 UConverter_fromUnicode_ISCII_OFFSETS_LOGIC (UConverterFromUnicodeArgs * args,
823                                                       UErrorCode * err){
824     const UChar *source = args->source;
825     const UChar *sourceLimit = args->sourceLimit;
826     unsigned char *target = (unsigned char *) args->target;
827     unsigned char *targetLimit = (unsigned char *) args->targetLimit;
828     int32_t* offsets = args->offsets;
829     uint32_t targetByteUnit = 0x0000;
830     UChar32 sourceChar = 0x0000;
831     UConverterDataISCII *converterData;
832     uint16_t newDelta=0;
833     uint16_t range = 0;
834     UBool deltaChanged = FALSE;
835 
836     if ((args->converter == NULL) || (args->targetLimit < args->target) || (args->sourceLimit < args->source)){
837         *err = U_ILLEGAL_ARGUMENT_ERROR;
838         return;
839     }
840     /* initialize data */
841     converterData=(UConverterDataISCII*)args->converter->extraInfo;
842     newDelta=converterData->currentDeltaFromUnicode;
843     range = (uint16_t)(newDelta/DELTA);
844 
845     if((sourceChar = args->converter->fromUChar32)!=0) {
846         goto getTrail;
847     }
848 
849     /*writing the char to the output stream */
850     while(source < sourceLimit){
851 
852         targetByteUnit = missingCharMarker;
853 
854         sourceChar = *source++;
855 
856         /*check if input is in ASCII and C0 control codes range*/
857         if (sourceChar <= ASCII_END) {
858             WRITE_TO_TARGET_FROM_U(args,offsets,source,target,targetLimit,sourceChar,err);
859             if(U_FAILURE(*err)){
860                 break;
861             }
862             if(sourceChar == LF){
863                 targetByteUnit = ATR<<8;
864                 targetByteUnit += (uint8_t) lookupInitialData[range].isciiLang;
865                 args->converter->fromUnicodeStatus=sourceChar;
866                 /* now append ATR and language code */
867                 WRITE_TO_TARGET_FROM_U(args,offsets,source,target,targetLimit,targetByteUnit,err);
868                 if(U_FAILURE(*err)){
869                     break;
870                 }
871             }
872             continue;
873         }
874         switch(sourceChar){
875         case ZWNJ:
876             /* contextChar has HALANT */
877             if(converterData->contextCharFromUnicode){
878                 converterData->contextCharFromUnicode = 0x00;
879                 targetByteUnit = ISCII_HALANT;
880             }else{
881                 /* consume ZWNJ and continue */
882                 converterData->contextCharFromUnicode = 0x00;
883                 continue;
884             }
885             break;
886         case ZWJ:
887             /* contextChar has HALANT */
888             if(converterData->contextCharFromUnicode){
889                 targetByteUnit = ISCII_NUKTA;
890             }else{
891                 targetByteUnit =ISCII_INV;
892             }
893             converterData->contextCharFromUnicode = 0x00;
894             break;
895        default:
896             /* is the sourceChar in the INDIC_RANGE? */
897             if((uint16_t)(INDIC_BLOCK_END-sourceChar) <= INDIC_RANGE){
898                 /* Danda and Double Danda are valid in Northern scripts.. since Unicode
899                  * does not include these codepoints in all Northern scrips we need to
900                  * filter them out
901                  */
902                 if(sourceChar!= DANDA && sourceChar != DOUBLE_DANDA){
903                     /* find out to which block the souceChar belongs*/
904                     range =(uint16_t)((sourceChar-INDIC_BLOCK_BEGIN)/DELTA);
905                     newDelta =(uint16_t)(range*DELTA);
906 
907                     /* Now are we in the same block as the previous? */
908                     if(newDelta!= converterData->currentDeltaFromUnicode || converterData->isFirstBuffer){
909                         converterData->currentDeltaFromUnicode = newDelta;
910                         converterData->currentMaskFromUnicode = lookupInitialData[range].maskEnum;
911                         deltaChanged =TRUE;
912                         converterData->isFirstBuffer=FALSE;
913                     }
914                     /* Normalize all Indic codepoints to Devanagari and map them to ISCII */
915                     /* now subtract the new delta from sourceChar*/
916                     sourceChar -= converterData->currentDeltaFromUnicode ;
917                 }
918 
919                 /* get the target byte unit */
920                 targetByteUnit=fromUnicodeTable[(uint8_t)sourceChar];
921 
922                 /* is the code point valid in current script? */
923                 if((validityTable[(uint8_t)sourceChar] & converterData->currentMaskFromUnicode)==0){
924                     /* Vocallic RR is assigne in ISCII Telugu and Unicode */
925                     if(converterData->currentDeltaFromUnicode!=(TELUGU_DELTA) && sourceChar!=VOCALLIC_RR){
926                         targetByteUnit=missingCharMarker;
927                     }
928                 }
929 
930                 if(deltaChanged){
931                     /* we are in a script block which is different than
932                      * previous sourceChar's script block write ATR and language codes
933                      */
934                     uint16_t temp=0;
935                     temp =(uint16_t)(ATR<<8);
936                     temp += (uint16_t)((uint8_t) lookupInitialData[range].isciiLang);
937                     /* reset */
938                     deltaChanged=FALSE;
939                     /* now append ATR and language code */
940                     WRITE_TO_TARGET_FROM_U(args,offsets,source,target,targetLimit,temp,err);
941                     if(U_FAILURE(*err)){
942                         break;
943                     }
944                 }
945             }
946             /* reset context char */
947             converterData->contextCharFromUnicode = 0x00;
948             break;
949         }
950 
951 
952         if(targetByteUnit != missingCharMarker){
953             if(targetByteUnit==ISCII_HALANT){
954                 converterData->contextCharFromUnicode = (UChar)targetByteUnit;
955             }
956              /* write targetByteUnit to target*/
957              WRITE_TO_TARGET_FROM_U(args,offsets,source,target,targetLimit,targetByteUnit,err);
958              if(U_FAILURE(*err)){
959                   break;
960              }
961         }
962         else{
963             /* oops.. the code point is unassigned */
964             /*check if the char is a First surrogate*/
965             if(UTF_IS_SURROGATE(sourceChar)) {
966                 if(UTF_IS_SURROGATE_FIRST(sourceChar)) {
967 getTrail:
968                     /*look ahead to find the trail surrogate*/
969                     if(source <  sourceLimit) {
970                         /* test the following code unit */
971                         UChar trail= (*source);
972                         if(UTF_IS_SECOND_SURROGATE(trail)) {
973                             source++;
974                             sourceChar=UTF16_GET_PAIR_VALUE(sourceChar, trail);
975                             *err =U_INVALID_CHAR_FOUND;
976                             /* convert this surrogate code point */
977                             /* exit this condition tree */
978                         } else {
979                             /* this is an unmatched lead code unit (1st surrogate) */
980                             /* callback(illegal) */
981                             *err=U_ILLEGAL_CHAR_FOUND;
982                         }
983                     } else {
984                         /* no more input */
985                         *err = U_ZERO_ERROR;
986                     }
987                 } else {
988                     /* this is an unmatched trail code unit (2nd surrogate) */
989                     /* callback(illegal) */
990                     *err=U_ILLEGAL_CHAR_FOUND;
991                 }
992             } else {
993                 /* callback(unassigned) for a BMP code point */
994                 *err = U_INVALID_CHAR_FOUND;
995             }
996 
997             args->converter->fromUChar32=sourceChar;
998             break;
999         }
1000     }/* end while(mySourceIndex<mySourceLength) */
1001 
1002     /*save the state and return */
1003     args->source = source;
1004     args->target = (char*)target;
1005 }
1006 
1007 static const int32_t lookupTable[][2]={
1008     { ZERO,       ZERO     },     /*DEFALT*/
1009     { ZERO,       ZERO     },     /*ROMAN*/
1010     { DEVANAGARI, DEV_MASK },
1011     { BENGALI,    BNG_MASK },
1012     { TAMIL,      TML_MASK },
1013     { TELUGU,     KND_MASK },
1014     { BENGALI,    BNG_MASK },
1015     { ORIYA,      ORI_MASK },
1016     { KANNADA,    KND_MASK },
1017     { MALAYALAM,  MLM_MASK },
1018     { GUJARATI,   GJR_MASK },
1019     { GURMUKHI,   PNJ_MASK }
1020 
1021 };
1022 
1023 #define WRITE_TO_TARGET_TO_U(args,source,target,offsets,offset,targetUniChar,delta, err){\
1024     /* add offset to current Indic Block */                                              \
1025     if(targetUniChar>ASCII_END &&                                                        \
1026            targetUniChar != ZWJ &&                                                       \
1027            targetUniChar != ZWNJ &&                                                      \
1028            targetUniChar != DANDA &&                                                     \
1029            targetUniChar != DOUBLE_DANDA){                                               \
1030                                                                                          \
1031            targetUniChar+=(uint16_t)(delta);                                             \
1032     }                                                                                    \
1033     /* now write the targetUniChar */                                                    \
1034     if(target<args->targetLimit){                                                        \
1035         *(target)++ = (UChar)targetUniChar;                                              \
1036         if(offsets){                                                                     \
1037             *(offsets)++ = (int32_t)(offset);                                            \
1038         }                                                                                \
1039     }else{                                                                               \
1040         args->converter->UCharErrorBuffer[args->converter->UCharErrorBufferLength++] =   \
1041             (UChar)targetUniChar;                                                        \
1042         *err = U_BUFFER_OVERFLOW_ERROR;                                                  \
1043     }                                                                                    \
1044 }
1045 
1046 #define GET_MAPPING(sourceChar,targetUniChar,data){                                      \
1047     targetUniChar = toUnicodeTable[(sourceChar)] ;                                       \
1048     /* is the code point valid in current script? */                                     \
1049     if(sourceChar> ASCII_END &&                                                          \
1050             (validityTable[(uint8_t)targetUniChar] & data->currentMaskToUnicode)==0){    \
1051         /* Vocallic RR is assigne in ISCII Telugu and Unicode */                         \
1052         if(data->currentDeltaToUnicode!=(TELUGU_DELTA) &&                                \
1053                     targetUniChar!=VOCALLIC_RR){                                         \
1054             targetUniChar=missingCharMarker;                                             \
1055         }                                                                                \
1056     }                                                                                    \
1057 }
1058 
1059 /***********
1060  *  Rules for ISCII to Unicode converter
1061  *  ISCII is stateful encoding. To convert ISCII bytes to Unicode,
1062  *  which has both precomposed and decomposed forms characters
1063  *  pre-context and post-context need to be considered.
1064  *
1065  *  Post context
1066  *  i)  ATR : Attribute code is used to declare the font and script switching.
1067  *      Currently we only switch scripts and font codes consumed without generating an error
1068  *  ii) EXT : Extention code is used to declare switching to Sanskrit and for obscure,
1069  *      obsolete characters
1070  *  Pre context
1071  *  i)  Halant: if preceeded by a halant then it is a explicit halant
1072  *  ii) Nukta :
1073  *       a) if preceeded by a halant then it is a soft halant
1074  *       b) if preceeded by specific consonants and the ligatures have pre-composed
1075  *          characters in Unicode then convert to pre-composed characters
1076  *  iii) Danda: If Danda is preceeded by a Danda then convert to Double Danda
1077  *
1078  */
1079 
1080 static void
UConverter_toUnicode_ISCII_OFFSETS_LOGIC(UConverterToUnicodeArgs * args,UErrorCode * err)1081 UConverter_toUnicode_ISCII_OFFSETS_LOGIC(UConverterToUnicodeArgs *args,
1082                                                             UErrorCode* err){
1083     const char *source = ( char *) args->source;
1084     UChar *target = args->target;
1085     const char *sourceLimit = args->sourceLimit;
1086     const UChar* targetLimit = args->targetLimit;
1087     uint32_t targetUniChar = 0x0000;
1088     uint8_t sourceChar = 0x0000;
1089     UConverterDataISCII* data;
1090     UChar32* toUnicodeStatus=NULL;
1091     UChar* contextCharToUnicode = NULL;
1092     UBool found;
1093     int i;
1094 
1095     if ((args->converter == NULL) || (target < args->target) || (source < args->source)){
1096         *err = U_ILLEGAL_ARGUMENT_ERROR;
1097         return;
1098     }
1099 
1100     data = (UConverterDataISCII*)(args->converter->extraInfo);
1101     contextCharToUnicode = &data->contextCharToUnicode; /* contains previous ISCII codepoint visited */
1102     toUnicodeStatus = (UChar32*)&args->converter->toUnicodeStatus;/* contains the mapping to Unicode of the above codepoint*/
1103 
1104     while(source<sourceLimit){
1105 
1106         targetUniChar = missingCharMarker;
1107 
1108         if(target < targetLimit){
1109             sourceChar = (unsigned char)*(source)++;
1110 
1111             /* look at the post-context preform special processing */
1112             if(*contextCharToUnicode==ATR){
1113 
1114                 /* If we have ATR in *contextCharToUnicode then we need to change our
1115                  * state to the Indic Script specified by sourceChar
1116                  */
1117 
1118                 /* check if the sourceChar is supported script range*/
1119                 if((uint8_t)(PNJ-sourceChar)<=PNJ-DEV){
1120                     data->currentDeltaToUnicode =
1121                         (uint16_t)(lookupTable[sourceChar & 0x0F][0] * DELTA);
1122                     data->currentMaskToUnicode =
1123                         (MaskEnum)lookupTable[sourceChar & 0x0F][1] ;
1124                 }
1125                 else if(sourceChar==DEF){
1126                     /* switch back to default */
1127                     data->currentDeltaToUnicode = data->defDeltaToUnicode;
1128                     data->currentMaskToUnicode = data->defMaskToUnicode;
1129                 }else{
1130                     if((sourceChar >= 0x21 && sourceChar <= 0x3F)){
1131                         /* these are display codes consume and continue */
1132                     }else{
1133                         *err =U_ILLEGAL_CHAR_FOUND;
1134                         /* reset */
1135                         *contextCharToUnicode=NO_CHAR_MARKER;
1136                         goto CALLBACK;
1137                     }
1138                 }
1139 
1140                 /* reset */
1141                 *contextCharToUnicode=NO_CHAR_MARKER;
1142 
1143                 continue;
1144 
1145             }else if(*contextCharToUnicode==EXT){
1146                 /* check if sourceChar is in 0xA1-0xEE range */
1147                 if((uint8_t) (EXT_RANGE_END - sourceChar) <= (EXT_RANGE_END - EXT_RANGE_BEGIN)){
1148                     /* We currently support only Anudatta and Devanagari abbreviation sign */
1149                     if(sourceChar==0xBF || sourceChar == 0xB8){
1150                         targetUniChar = (sourceChar==0xBF) ? DEV_ABBR_SIGN : DEV_ANUDATTA;
1151 
1152                         /* find out if the mapping is valid in this state */
1153                         if(validityTable[(uint8_t)targetUniChar] & data->currentMaskToUnicode){
1154 
1155                             *contextCharToUnicode= NO_CHAR_MARKER;
1156 
1157                             /* write to target */
1158                             WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source-args->source -2),
1159                                                  targetUniChar,data->currentDeltaToUnicode,err);
1160 
1161                             continue;
1162                         }
1163                     }
1164                     /* byte unit is unassigned */
1165                     targetUniChar = missingCharMarker;
1166                     *err= U_INVALID_CHAR_FOUND;
1167                 }else{
1168                     /* only 0xA1 - 0xEE are legal after EXT char */
1169                     *contextCharToUnicode= NO_CHAR_MARKER;
1170                     *err = U_ILLEGAL_CHAR_FOUND;
1171                 }
1172                 goto CALLBACK;
1173             }else if(*contextCharToUnicode==ISCII_INV){
1174                 if(sourceChar==ISCII_HALANT){
1175                     targetUniChar = 0x0020; /* replace with space accoding to Indic FAQ */
1176                 }else{
1177                     targetUniChar = ZWJ;
1178                 }
1179 
1180                 /* write to target */
1181                 WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source-args->source -2),
1182                                                  targetUniChar,data->currentDeltaToUnicode,err);
1183                 /* reset */
1184                 *contextCharToUnicode=NO_CHAR_MARKER;
1185             }
1186 
1187             /* look at the pre-context and perform special processing */
1188             switch(sourceChar){
1189             case ISCII_INV:
1190             case EXT: /*falls through*/
1191             case ATR:
1192                 *contextCharToUnicode = (UChar)sourceChar;
1193 
1194                 if(*toUnicodeStatus != missingCharMarker){
1195 
1196                     WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source-args->source -2),
1197                                     *toUnicodeStatus,data->currentDeltaToUnicode,err);
1198                     *toUnicodeStatus = missingCharMarker;
1199                 }
1200                 continue;
1201             case ISCII_DANDA:
1202                 /* handle double danda*/
1203                 if(*contextCharToUnicode== ISCII_DANDA){
1204                     targetUniChar = DOUBLE_DANDA;
1205                     /* clear the context */
1206                     *contextCharToUnicode = NO_CHAR_MARKER;
1207                     *toUnicodeStatus = missingCharMarker;
1208                 }else{
1209                     GET_MAPPING(sourceChar,targetUniChar,data);
1210                     *contextCharToUnicode = sourceChar;
1211                 }
1212                 break;
1213             case ISCII_HALANT:
1214                 /* handle explicit halant */
1215                 if(*contextCharToUnicode == ISCII_HALANT){
1216                     targetUniChar = ZWNJ;
1217                     /* clear the context */
1218                     *contextCharToUnicode = NO_CHAR_MARKER;
1219                 }else{
1220                     GET_MAPPING(sourceChar,targetUniChar,data);
1221                     *contextCharToUnicode = sourceChar;
1222                 }
1223                 break;
1224             case 0x0A:
1225                 /* fall through */
1226             case 0x0D:
1227                 data->resetToDefaultToUnicode = TRUE;
1228                 GET_MAPPING(sourceChar,targetUniChar,data);
1229                 *contextCharToUnicode = sourceChar;
1230                 break;
1231 
1232          	case ISCII_VOWEL_SIGN_E:
1233          		i=1;
1234          		found=FALSE;
1235          		for( ;i<vowelSignESpecialCases[0][0];i++){
1236          			if(vowelSignESpecialCases[i][0]==(uint8_t)*contextCharToUnicode){
1237          				targetUniChar=vowelSignESpecialCases[i][1];
1238          				found=TRUE;
1239          				break;
1240          			}
1241          		}
1242          		if(found) {
1243                     /* find out if the mapping is valid in this state */
1244                     if(validityTable[(uint8_t)targetUniChar] & data->currentMaskToUnicode){
1245                         /*targetUniChar += data->currentDeltaToUnicode ;*/
1246                         *contextCharToUnicode= NO_CHAR_MARKER;
1247                         *toUnicodeStatus = missingCharMarker;
1248                         break;
1249                     }
1250          		}
1251          		GET_MAPPING(sourceChar,targetUniChar,data);
1252                 *contextCharToUnicode = sourceChar;
1253                 break;
1254 
1255             case ISCII_NUKTA:
1256                 /* handle soft halant */
1257                 if(*contextCharToUnicode == ISCII_HALANT){
1258                     targetUniChar = ZWJ;
1259                     /* clear the context */
1260                     *contextCharToUnicode = NO_CHAR_MARKER;
1261                     break;
1262                 }else{
1263                     /* try to handle <CHAR> + ISCII_NUKTA special mappings */
1264                     i=1;
1265                     found =FALSE;
1266                     for( ;i<nuktaSpecialCases[0][0];i++){
1267                         if(nuktaSpecialCases[i][0]==(uint8_t)*contextCharToUnicode){
1268                             targetUniChar=nuktaSpecialCases[i][1];
1269                             found =TRUE;
1270                             break;
1271                         }
1272                     }
1273                     if(found){
1274                         /* find out if the mapping is valid in this state */
1275                         if(validityTable[(uint8_t)targetUniChar] & data->currentMaskToUnicode){
1276                             /*targetUniChar += data->currentDeltaToUnicode ;*/
1277                             *contextCharToUnicode= NO_CHAR_MARKER;
1278                             *toUnicodeStatus = missingCharMarker;
1279                             break;
1280                         }
1281                         /* else fall through to default */
1282                     }
1283                     /* else fall through to default */
1284                 }
1285             default:
1286                 GET_MAPPING(sourceChar,targetUniChar,data);
1287                 *contextCharToUnicode = sourceChar;
1288                 break;
1289             }
1290 
1291 
1292             if(*toUnicodeStatus != missingCharMarker){
1293                 /* write the previously mapped codepoint */
1294                 WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source-args->source -2),
1295                                 *toUnicodeStatus,data->currentDeltaToUnicode,err);
1296                 *toUnicodeStatus = missingCharMarker;
1297             }
1298 
1299 
1300             if(targetUniChar != missingCharMarker ){
1301                 /* now save the targetUniChar for delayed write */
1302                 *toUnicodeStatus = (UChar) targetUniChar;
1303                 if(data->resetToDefaultToUnicode==TRUE){
1304                     data->currentDeltaToUnicode = data->defDeltaToUnicode;
1305                     data->currentMaskToUnicode = data->defMaskToUnicode;
1306                     data->resetToDefaultToUnicode=FALSE;
1307                 }
1308             }else{
1309 
1310                 /* we reach here only if targetUniChar == missingCharMarker
1311                  * so assign codes to reason and err
1312                  */
1313                 *err = U_INVALID_CHAR_FOUND;
1314 CALLBACK:
1315                 args->converter->toUBytes[0] = (uint8_t) sourceChar;
1316                 args->converter->toULength = 1;
1317                 break;
1318             }
1319 
1320         }
1321         else{
1322             *err =U_BUFFER_OVERFLOW_ERROR;
1323             break;
1324         }
1325     }
1326 
1327     if(U_SUCCESS(*err) && args->flush && source == sourceLimit) {
1328         /* end of the input stream */
1329         UConverter *cnv = args->converter;
1330 
1331         if(*contextCharToUnicode==ATR || *contextCharToUnicode==EXT || *contextCharToUnicode==ISCII_INV){
1332             /* set toUBytes[] */
1333             cnv->toUBytes[0] = (uint8_t)*contextCharToUnicode;
1334             cnv->toULength = 1;
1335 
1336             /* avoid looping on truncated sequences */
1337             *contextCharToUnicode = NO_CHAR_MARKER;
1338         }else{
1339             cnv->toULength = 0;
1340         }
1341 
1342         if(*toUnicodeStatus != missingCharMarker) {
1343             /* output a remaining target character */
1344             WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source - args->source -1),
1345                             *toUnicodeStatus,data->currentDeltaToUnicode,err);
1346             *toUnicodeStatus = missingCharMarker;
1347         }
1348     }
1349 
1350     args->target = target;
1351     args->source = source;
1352 }
1353 
1354 /* structure for SafeClone calculations */
1355 struct cloneISCIIStruct
1356 {
1357     UConverter cnv;
1358     UConverterDataISCII mydata;
1359 };
1360 
1361 
1362 static UConverter *
_ISCII_SafeClone(const UConverter * cnv,void * stackBuffer,int32_t * pBufferSize,UErrorCode * status)1363 _ISCII_SafeClone(const UConverter *cnv,
1364               void *stackBuffer,
1365               int32_t *pBufferSize,
1366               UErrorCode *status)
1367 {
1368     struct cloneISCIIStruct * localClone;
1369     int32_t bufferSizeNeeded = sizeof(struct cloneISCIIStruct);
1370 
1371     if (U_FAILURE(*status)){
1372         return 0;
1373     }
1374 
1375     if (*pBufferSize == 0){ /* 'preflighting' request - set needed size into *pBufferSize */
1376         *pBufferSize = bufferSizeNeeded;
1377         return 0;
1378     }
1379 
1380     localClone = (struct cloneISCIIStruct *)stackBuffer;
1381     /* ucnv.c/ucnv_safeClone() copied the main UConverter already */
1382 
1383     uprv_memcpy(&localClone->mydata, cnv->extraInfo, sizeof(UConverterDataISCII));
1384     localClone->cnv.extraInfo = &localClone->mydata;
1385     localClone->cnv.isExtraLocal = TRUE;
1386 
1387     return &localClone->cnv;
1388 }
1389 
1390 static void
_ISCIIGetUnicodeSet(const UConverter * cnv,const USetAdder * sa,UConverterUnicodeSet which,UErrorCode * pErrorCode)1391 _ISCIIGetUnicodeSet(const UConverter *cnv,
1392                     const USetAdder *sa,
1393                     UConverterUnicodeSet which,
1394                     UErrorCode *pErrorCode)
1395 {
1396     int32_t idx, script;
1397     uint8_t mask;
1398 
1399     /* Since all ISCII versions allow switching to other ISCII
1400     scripts, we add all roundtrippable characters to this set. */
1401     sa->addRange(sa->set, 0, ASCII_END);
1402     for (script = DEVANAGARI; script <= MALAYALAM; script++) {
1403         mask = (uint8_t)(lookupInitialData[script].maskEnum);
1404         for (idx = 0; idx < DELTA; idx++) {
1405             if (validityTable[idx] & mask) {
1406                 sa->add(sa->set, idx + (script * DELTA) + INDIC_BLOCK_BEGIN);
1407             }
1408         }
1409     }
1410     sa->add(sa->set, DANDA);
1411     sa->add(sa->set, DOUBLE_DANDA);
1412     sa->add(sa->set, ZWNJ);
1413     sa->add(sa->set, ZWJ);
1414 }
1415 
1416 static const UConverterImpl _ISCIIImpl={
1417 
1418     UCNV_ISCII,
1419 
1420     NULL,
1421     NULL,
1422 
1423     _ISCIIOpen,
1424     _ISCIIClose,
1425     _ISCIIReset,
1426 
1427     UConverter_toUnicode_ISCII_OFFSETS_LOGIC,
1428     UConverter_toUnicode_ISCII_OFFSETS_LOGIC,
1429     UConverter_fromUnicode_ISCII_OFFSETS_LOGIC,
1430     UConverter_fromUnicode_ISCII_OFFSETS_LOGIC,
1431     NULL,
1432 
1433     NULL,
1434     _ISCIIgetName,
1435     NULL,
1436     _ISCII_SafeClone,
1437     _ISCIIGetUnicodeSet
1438 };
1439 
1440 static const UConverterStaticData _ISCIIStaticData={
1441     sizeof(UConverterStaticData),
1442         "ISCII",
1443          0,
1444          UCNV_IBM,
1445          UCNV_ISCII,
1446          1,
1447          4,
1448         { 0x1a, 0, 0, 0 },
1449         0x1,
1450         FALSE,
1451         FALSE,
1452         0x0,
1453         0x0,
1454         { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 }, /* reserved */
1455 
1456 };
1457 
1458 const UConverterSharedData _ISCIIData={
1459     sizeof(UConverterSharedData),
1460         ~((uint32_t) 0),
1461         NULL,
1462         NULL,
1463         &_ISCIIStaticData,
1464         FALSE,
1465         &_ISCIIImpl,
1466         0
1467 };
1468 
1469 #endif /* #if !UCONFIG_NO_LEGACY_CONVERSION */
1470