• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /** \file
2  * Implementation of the ANTLR3 string and string factory classes
3  */
4 
5 // [The "BSD licence"]
6 // Copyright (c) 2005-2009 Jim Idle, Temporal Wave LLC
7 // http://www.temporal-wave.com
8 // http://www.linkedin.com/in/jimidle
9 //
10 // All rights reserved.
11 //
12 // Redistribution and use in source and binary forms, with or without
13 // modification, are permitted provided that the following conditions
14 // are met:
15 // 1. Redistributions of source code must retain the above copyright
16 //    notice, this list of conditions and the following disclaimer.
17 // 2. Redistributions in binary form must reproduce the above copyright
18 //    notice, this list of conditions and the following disclaimer in the
19 //    documentation and/or other materials provided with the distribution.
20 // 3. The name of the author may not be used to endorse or promote products
21 //    derived from this software without specific prior written permission.
22 //
23 // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
24 // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
25 // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
26 // IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
27 // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
28 // NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
29 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
30 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
31 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
32 // THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33 
34 #include    <antlr3string.h>
35 
36 /* Factory API
37  */
38 static    pANTLR3_STRING    newRaw8	(pANTLR3_STRING_FACTORY factory);
39 static    pANTLR3_STRING    newRawUTF16	(pANTLR3_STRING_FACTORY factory);
40 static    pANTLR3_STRING    newSize8	(pANTLR3_STRING_FACTORY factory, ANTLR3_UINT32 size);
41 static    pANTLR3_STRING    newSizeUTF16	(pANTLR3_STRING_FACTORY factory, ANTLR3_UINT32 size);
42 static    pANTLR3_STRING    newPtr8	(pANTLR3_STRING_FACTORY factory, pANTLR3_UINT8 string, ANTLR3_UINT32 size);
43 static    pANTLR3_STRING    newPtrUTF16_8	(pANTLR3_STRING_FACTORY factory, pANTLR3_UINT8 string, ANTLR3_UINT32 size);
44 static    pANTLR3_STRING    newPtrUTF16_UTF16	(pANTLR3_STRING_FACTORY factory, pANTLR3_UINT8 string, ANTLR3_UINT32 size);
45 static    pANTLR3_STRING    newStr8	(pANTLR3_STRING_FACTORY factory, pANTLR3_UINT8 string);
46 static    pANTLR3_STRING    newStrUTF16_8	(pANTLR3_STRING_FACTORY factory, pANTLR3_UINT8 string);
47 static    pANTLR3_STRING    newStrUTF16_UTF16	(pANTLR3_STRING_FACTORY factory, pANTLR3_UINT8 string);
48 static    void		    destroy	(pANTLR3_STRING_FACTORY factory, pANTLR3_STRING string);
49 static    pANTLR3_STRING    printable8	(pANTLR3_STRING_FACTORY factory, pANTLR3_STRING string);
50 static    pANTLR3_STRING    printableUTF16	(pANTLR3_STRING_FACTORY factory, pANTLR3_STRING string);
51 static    void		    closeFactory(pANTLR3_STRING_FACTORY factory);
52 
53 /* String API
54  */
55 static    pANTLR3_UINT8	    set8	(pANTLR3_STRING string, const char * chars);
56 static    pANTLR3_UINT8	    setUTF16_8	(pANTLR3_STRING string, const char * chars);
57 static    pANTLR3_UINT8	    setUTF16_UTF16	(pANTLR3_STRING string, const char * chars);
58 static    pANTLR3_UINT8	    append8	(pANTLR3_STRING string, const char * newbit);
59 static    pANTLR3_UINT8	    appendUTF16_8	(pANTLR3_STRING string, const char * newbit);
60 static    pANTLR3_UINT8	    appendUTF16_UTF16	(pANTLR3_STRING string, const char * newbit);
61 static	  pANTLR3_UINT8	    insert8	(pANTLR3_STRING string, ANTLR3_UINT32 point, const char * newbit);
62 static	  pANTLR3_UINT8	    insertUTF16_8	(pANTLR3_STRING string, ANTLR3_UINT32 point, const char * newbit);
63 static	  pANTLR3_UINT8	    insertUTF16_UTF16	(pANTLR3_STRING string, ANTLR3_UINT32 point, const char * newbit);
64 
65 static    pANTLR3_UINT8	    setS	(pANTLR3_STRING string, pANTLR3_STRING chars);
66 static    pANTLR3_UINT8	    appendS	(pANTLR3_STRING string, pANTLR3_STRING newbit);
67 static	  pANTLR3_UINT8	    insertS	(pANTLR3_STRING string, ANTLR3_UINT32 point, pANTLR3_STRING newbit);
68 
69 static    pANTLR3_UINT8	    addc8	(pANTLR3_STRING string, ANTLR3_UINT32 c);
70 static    pANTLR3_UINT8	    addcUTF16	(pANTLR3_STRING string, ANTLR3_UINT32 c);
71 static    pANTLR3_UINT8	    addi8	(pANTLR3_STRING string, ANTLR3_INT32 i);
72 static    pANTLR3_UINT8	    addiUTF16	(pANTLR3_STRING string, ANTLR3_INT32 i);
73 static	  pANTLR3_UINT8	    inserti8	(pANTLR3_STRING string, ANTLR3_UINT32 point, ANTLR3_INT32 i);
74 static	  pANTLR3_UINT8	    insertiUTF16	(pANTLR3_STRING string, ANTLR3_UINT32 point, ANTLR3_INT32 i);
75 
76 static    ANTLR3_UINT32     compare8	(pANTLR3_STRING string, const char * compStr);
77 static    ANTLR3_UINT32     compareUTF16_8	(pANTLR3_STRING string, const char * compStr);
78 static    ANTLR3_UINT32     compareUTF16_UTF16(pANTLR3_STRING string, const char * compStr);
79 static    ANTLR3_UINT32     compareS	(pANTLR3_STRING string, pANTLR3_STRING compStr);
80 static    ANTLR3_UCHAR      charAt8	(pANTLR3_STRING string, ANTLR3_UINT32 offset);
81 static    ANTLR3_UCHAR      charAtUTF16	(pANTLR3_STRING string, ANTLR3_UINT32 offset);
82 static    pANTLR3_STRING    subString8	(pANTLR3_STRING string, ANTLR3_UINT32 startIndex, ANTLR3_UINT32 endIndex);
83 static    pANTLR3_STRING    subStringUTF16	(pANTLR3_STRING string, ANTLR3_UINT32 startIndex, ANTLR3_UINT32 endIndex);
84 static	  ANTLR3_INT32	    toInt32_8	(pANTLR3_STRING string);
85 static	  ANTLR3_INT32	    toInt32_UTF16  (pANTLR3_STRING string);
86 static	  pANTLR3_STRING    to8_8		(pANTLR3_STRING string);
87 static	  pANTLR3_STRING    to8_UTF16		(pANTLR3_STRING string);
88 static	pANTLR3_STRING		toUTF8_8	(pANTLR3_STRING string);
89 static	pANTLR3_STRING		toUTF8_UTF16	(pANTLR3_STRING string);
90 
91 /* Local helpers
92  */
93 static	void			stringInit8	(pANTLR3_STRING string);
94 static	void			stringInitUTF16	(pANTLR3_STRING string);
95 static	void	ANTLR3_CDECL	stringFree	(pANTLR3_STRING string);
96 
97 ANTLR3_API pANTLR3_STRING_FACTORY
antlr3StringFactoryNew(ANTLR3_UINT32 encoding)98 antlr3StringFactoryNew(ANTLR3_UINT32 encoding)
99 {
100 	pANTLR3_STRING_FACTORY  factory;
101 
102 	/* Allocate memory
103 	*/
104 	factory	= (pANTLR3_STRING_FACTORY) ANTLR3_CALLOC(1, sizeof(ANTLR3_STRING_FACTORY));
105 
106 	if	(factory == NULL)
107 	{
108 		return	NULL;
109 	}
110 
111 	/* Now we make a new list to track the strings.
112 	*/
113 	factory->strings	= antlr3VectorNew(0);
114 	factory->index	= 0;
115 
116 	if	(factory->strings == NULL)
117 	{
118 		ANTLR3_FREE(factory);
119 		return	NULL;
120 	}
121 
122     // Install the API
123     //
124     // TODO: These encodings need equivalent functions to
125     // UTF16 and 8Bit if I am going to support those encodings in the STRING stuff.
126 	// The STRING stuff was intended as a quick and dirty hack for people that did not
127 	// want to worry about memory and performance very much, but nobody ever reads the
128 	// notes or comments or uses the email list search. I want to discourage using these
129 	// interfaces as it is much more efficient to use the pointers within the tokens
130 	// directly, so I am not implementing the string stuff for the newer encodings.
131     // We install the standard 8 and 16 bit functions for the UTF 8 and 16 but they
132 	// will not be useful beyond returning the text.
133 	//
134     switch(encoding)
135     {
136 		case    ANTLR3_ENC_UTF32:
137 			break;
138 
139 		case    ANTLR3_ENC_UTF32BE:
140 			break;
141 
142 		case    ANTLR3_ENC_UTF32LE:
143 			break;
144 
145 		case    ANTLR3_ENC_UTF16BE:
146 		case    ANTLR3_ENC_UTF16LE:
147 		case    ANTLR3_ENC_UTF16:
148 
149 			factory->newRaw	    =  newRawUTF16;
150 			factory->newSize	=  newSizeUTF16;
151 			factory->newPtr	    =  newPtrUTF16_UTF16;
152 			factory->newPtr8	=  newPtrUTF16_8;
153 			factory->newStr	    =  newStrUTF16_UTF16;
154 			factory->newStr8	=  newStrUTF16_8;
155 			factory->printable	=  printableUTF16;
156 			factory->destroy	=  destroy;
157 			factory->close	    =  closeFactory;
158 			break;
159 
160 		case    ANTLR3_ENC_UTF8:
161 		case    ANTLR3_ENC_EBCDIC:
162 		case    ANTLR3_ENC_8BIT:
163 		default:
164 
165 			factory->newRaw	    =  newRaw8;
166 			factory->newSize	=  newSize8;
167 			factory->newPtr	    =  newPtr8;
168 			factory->newPtr8	=  newPtr8;
169 			factory->newStr	    =  newStr8;
170 			factory->newStr8	=  newStr8;
171 			factory->printable	=  printable8;
172 			factory->destroy	=  destroy;
173 			factory->close	    =  closeFactory;
174 			break;
175     }
176 	return  factory;
177 }
178 
179 
180 /**
181  *
182  * \param factory
183  * \return
184  */
185 static    pANTLR3_STRING
newRaw8(pANTLR3_STRING_FACTORY factory)186 newRaw8	(pANTLR3_STRING_FACTORY factory)
187 {
188     pANTLR3_STRING  string;
189 
190     string  = (pANTLR3_STRING) ANTLR3_MALLOC(sizeof(ANTLR3_STRING));
191 
192     if	(string == NULL)
193     {
194 		return	NULL;
195     }
196 
197     /* Structure is allocated, now fill in the API etc.
198      */
199     stringInit8(string);
200     string->factory = factory;
201 
202     /* Add the string into the allocated list
203      */
204     factory->strings->set(factory->strings, factory->index, (void *) string, (void (ANTLR3_CDECL *)(void *))(stringFree), ANTLR3_TRUE);
205     string->index   = factory->index++;
206 
207     return string;
208 }
209 /**
210  *
211  * \param factory
212  * \return
213  */
214 static    pANTLR3_STRING
newRawUTF16(pANTLR3_STRING_FACTORY factory)215 newRawUTF16	(pANTLR3_STRING_FACTORY factory)
216 {
217     pANTLR3_STRING  string;
218 
219     string  = (pANTLR3_STRING) ANTLR3_MALLOC(sizeof(ANTLR3_STRING));
220 
221     if	(string == NULL)
222     {
223 		return	NULL;
224     }
225 
226     /* Structure is allocated, now fill in the API etc.
227      */
228     stringInitUTF16(string);
229     string->factory = factory;
230 
231     /* Add the string into the allocated list
232      */
233     factory->strings->set(factory->strings, factory->index, (void *) string, (void (ANTLR3_CDECL *)(void *))(stringFree), ANTLR3_TRUE);
234     string->index   = factory->index++;
235 
236     return string;
237 }
238 static
stringFree(pANTLR3_STRING string)239 void	ANTLR3_CDECL stringFree  (pANTLR3_STRING string)
240 {
241     /* First free the string itself if there was anything in it
242      */
243     if	(string->chars)
244     {
245 	ANTLR3_FREE(string->chars);
246     }
247 
248     /* Now free the space for this string
249      */
250     ANTLR3_FREE(string);
251 
252     return;
253 }
254 /**
255  *
256  * \param string
257  * \return
258  */
259 static	void
stringInit8(pANTLR3_STRING string)260 stringInit8  (pANTLR3_STRING string)
261 {
262     string->len			= 0;
263     string->size		= 0;
264     string->chars		= NULL;
265     string->encoding	= ANTLR3_ENC_8BIT ;
266 
267     /* API for 8 bit strings*/
268 
269     string->set		= set8;
270     string->set8	= set8;
271     string->append	= append8;
272     string->append8	= append8;
273     string->insert	= insert8;
274     string->insert8	= insert8;
275     string->addi	= addi8;
276     string->inserti	= inserti8;
277     string->addc	= addc8;
278     string->charAt	= charAt8;
279     string->compare	= compare8;
280     string->compare8	= compare8;
281     string->subString	= subString8;
282     string->toInt32	= toInt32_8;
283     string->to8		= to8_8;
284     string->toUTF8	= toUTF8_8;
285     string->compareS	= compareS;
286     string->setS	= setS;
287     string->appendS	= appendS;
288     string->insertS	= insertS;
289 
290 }
291 /**
292  *
293  * \param string
294  * \return
295  */
296 static	void
stringInitUTF16(pANTLR3_STRING string)297 stringInitUTF16  (pANTLR3_STRING string)
298 {
299     string->len		= 0;
300     string->size	= 0;
301     string->chars	= NULL;
302     string->encoding	= ANTLR3_ENC_8BIT;
303 
304     /* API for UTF16 strings */
305 
306     string->set		= setUTF16_UTF16;
307     string->set8	= setUTF16_8;
308     string->append	= appendUTF16_UTF16;
309     string->append8	= appendUTF16_8;
310     string->insert	= insertUTF16_UTF16;
311     string->insert8	= insertUTF16_8;
312     string->addi	= addiUTF16;
313     string->inserti	= insertiUTF16;
314     string->addc	= addcUTF16;
315     string->charAt	= charAtUTF16;
316     string->compare	= compareUTF16_UTF16;
317     string->compare8	= compareUTF16_8;
318     string->subString	= subStringUTF16;
319     string->toInt32	= toInt32_UTF16;
320     string->to8		= to8_UTF16;
321     string->toUTF8	= toUTF8_UTF16;
322 
323     string->compareS	= compareS;
324     string->setS	= setS;
325     string->appendS	= appendS;
326     string->insertS	= insertS;
327 }
328 /**
329  *
330  * \param string
331  * \return
332  * TODO: Implement UTF-8
333  */
334 static	void
stringInitUTF8(pANTLR3_STRING string)335 stringInitUTF8  (pANTLR3_STRING string)
336 {
337     string->len	    = 0;
338     string->size    = 0;
339     string->chars   = NULL;
340 
341     /* API */
342 
343 }
344 
345 // Convert an 8 bit string into a UTF8 representation, which is in fact just the string itself
346 // a memcpy as we make no assumptions about the 8 bit encoding.
347 //
348 static	pANTLR3_STRING
toUTF8_8(pANTLR3_STRING string)349 toUTF8_8	(pANTLR3_STRING string)
350 {
351 	return string->factory->newPtr(string->factory, (pANTLR3_UINT8)(string->chars), string->len);
352 }
353 
354 // Convert a UTF16 string into a UTF8 representation using the Unicode.org
355 // supplied C algorithms, which are now contained within the ANTLR3 C runtime
356 // as permitted by the Unicode license (within the source code antlr3convertutf.c/.h
357 // UCS2 has the same encoding as UTF16 so we can use UTF16 converter.
358 //
359 static	pANTLR3_STRING
toUTF8_UTF16(pANTLR3_STRING string)360 toUTF8_UTF16	(pANTLR3_STRING string)
361 {
362 
363     UTF8	      * outputEnd;
364     UTF16	      * inputEnd;
365     pANTLR3_STRING	utf8String;
366 
367     ConversionResult	cResult;
368 
369     // Allocate the output buffer, which needs to accommodate potentially
370     // 3X (in bytes) the input size (in chars).
371     //
372     utf8String	= string->factory->newStr8(string->factory, (pANTLR3_UINT8)"");
373 
374     if	(utf8String != NULL)
375     {
376         // Free existing allocation
377         //
378         ANTLR3_FREE(utf8String->chars);
379 
380         // Reallocate according to maximum expected size
381         //
382         utf8String->size	= string->len *3;
383         utf8String->chars	= (pANTLR3_UINT8)ANTLR3_MALLOC(utf8String->size +1);
384 
385         if	(utf8String->chars != NULL)
386         {
387             inputEnd  = (UTF16 *)	(string->chars);
388             outputEnd = (UTF8 *)	(utf8String->chars);
389 
390             // Call the Unicode converter
391             //
392             cResult =  ConvertUTF16toUTF8
393                 (
394                 (const UTF16**)&inputEnd,
395                 ((const UTF16 *)(string->chars)) + string->len,
396                 &outputEnd,
397                 outputEnd + utf8String->size - 1,
398                 lenientConversion
399                 );
400 
401             // We don't really care if things failed or not here, we just converted
402             // everything that was vaguely possible and stopped when it wasn't. It is
403             // up to the grammar programmer to verify that the input is sensible.
404             //
405             utf8String->len = ANTLR3_UINT32_CAST(((pANTLR3_UINT8)outputEnd) - utf8String->chars);
406 
407             *(outputEnd+1) = '\0';		// Always null terminate
408         }
409     }
410     return utf8String;
411 }
412 
413 /**
414  * Creates a new string with enough capacity for size 8 bit characters plus a terminator.
415  *
416  * \param[in] factory - Pointer to the string factory that owns strings
417  * \param[in] size - In characters
418  * \return pointer to the new string.
419  */
420 static    pANTLR3_STRING
newSize8(pANTLR3_STRING_FACTORY factory,ANTLR3_UINT32 size)421 newSize8	(pANTLR3_STRING_FACTORY factory, ANTLR3_UINT32 size)
422 {
423     pANTLR3_STRING  string;
424 
425     string  = factory->newRaw(factory);
426 
427     if	(string == NULL)
428     {
429         return	string;
430     }
431 
432     /* Always add one more byte for a terminator ;-)
433     */
434     string->chars	= (pANTLR3_UINT8) ANTLR3_MALLOC((size_t)(sizeof(ANTLR3_UINT8) * (size+1)));
435 	if (string->chars != NULL)
436     {
437 		*(string->chars)	= '\0';
438 		string->size	= size + 1;
439 	}
440 
441     return string;
442 }
443 /**
444  * Creates a new string with enough capacity for size UTF16 characters plus a terminator.
445  *
446  * \param[in] factory - Pointer to the string factory that owns strings
447  * \param[in] size - In characters (count double for surrogate pairs!!!)
448  * \return pointer to the new string.
449  */
450 static    pANTLR3_STRING
newSizeUTF16(pANTLR3_STRING_FACTORY factory,ANTLR3_UINT32 size)451 newSizeUTF16	(pANTLR3_STRING_FACTORY factory, ANTLR3_UINT32 size)
452 {
453     pANTLR3_STRING  string;
454 
455     string  = factory->newRaw(factory);
456 
457     if	(string == NULL)
458     {
459         return	string;
460     }
461 
462     /* Always add one more byte for a terminator ;-)
463     */
464     string->chars	= (pANTLR3_UINT8) ANTLR3_MALLOC((size_t)(sizeof(ANTLR3_UINT16) * (size+1)));
465     if (string->chars != NULL)
466 	{
467 		*(string->chars)	= '\0';
468 		string->size	= size+1;	/* Size is always in characters, as is len */
469 	}
470 
471     return string;
472 }
473 
474 /** Creates a new 8 bit string initialized with the 8 bit characters at the
475  *  supplied ptr, of pre-determined size.
476  * \param[in] factory - Pointer to the string factory that owns the strings
477  * \param[in] ptr - Pointer to 8 bit encoded characters
478  * \return pointer to the new string
479  */
480 static    pANTLR3_STRING
newPtr8(pANTLR3_STRING_FACTORY factory,pANTLR3_UINT8 ptr,ANTLR3_UINT32 size)481 newPtr8	(pANTLR3_STRING_FACTORY factory, pANTLR3_UINT8 ptr, ANTLR3_UINT32 size)
482 {
483 	pANTLR3_STRING  string;
484 
485 	string  = factory->newSize(factory, size);
486 
487 	if	(string == NULL)
488 	{
489 		return	NULL;
490 	}
491 
492 	if	(size <= 0)
493 	{
494 		return	string;
495 	}
496 
497 	if	(ptr != NULL)
498 	{
499 		ANTLR3_MEMMOVE(string->chars, (const void *)ptr, size);
500 		*(string->chars + size) = '\0';	    /* Terminate, these strings are usually used for Token streams and printing etc.	*/
501 		string->len = size;
502 	}
503 
504 	return  string;
505 }
506 
507 /** Creates a new UTF16 string initialized with the 8 bit characters at the
508  *  supplied 8 bit character ptr, of pre-determined size.
509  * \param[in] factory - Pointer to the string factory that owns the strings
510  * \param[in] ptr - Pointer to 8 bit encoded characters
511  * \return pointer to the new string
512  */
513 static    pANTLR3_STRING
newPtrUTF16_8(pANTLR3_STRING_FACTORY factory,pANTLR3_UINT8 ptr,ANTLR3_UINT32 size)514 newPtrUTF16_8	(pANTLR3_STRING_FACTORY factory, pANTLR3_UINT8 ptr, ANTLR3_UINT32 size)
515 {
516 	pANTLR3_STRING  string;
517 
518 	/* newSize accepts size in characters, not bytes
519 	*/
520 	string  = factory->newSize(factory, size);
521 
522 	if	(string == NULL)
523 	{
524 		return	NULL;
525 	}
526 
527 	if	(size <= 0)
528 	{
529 		return	string;
530 	}
531 
532 	if	(ptr != NULL)
533 	{
534 		pANTLR3_UINT16	out;
535 		ANTLR3_INT32    inSize;
536 
537 		out = (pANTLR3_UINT16)(string->chars);
538 		inSize	= size;
539 
540 		while	(inSize-- > 0)
541 		{
542 			*out++ = (ANTLR3_UINT16)(*ptr++);
543 		}
544 
545 		/* Terminate, these strings are usually used for Token streams and printing etc.
546 		*/
547 		*(((pANTLR3_UINT16)(string->chars)) + size) = '\0';
548 
549 		string->len = size;
550 	}
551 
552 	return  string;
553 }
554 
555 /** Creates a new UTF16 string initialized with the UTF16 characters at the
556  *  supplied ptr, of pre-determined size.
557  * \param[in] factory - Pointer to the string factory that owns the strings
558  * \param[in] ptr - Pointer to UTF16 encoded characters
559  * \return pointer to the new string
560  */
561 static    pANTLR3_STRING
newPtrUTF16_UTF16(pANTLR3_STRING_FACTORY factory,pANTLR3_UINT8 ptr,ANTLR3_UINT32 size)562 newPtrUTF16_UTF16	(pANTLR3_STRING_FACTORY factory, pANTLR3_UINT8 ptr, ANTLR3_UINT32 size)
563 {
564 	pANTLR3_STRING  string;
565 
566 	string  = factory->newSize(factory, size);
567 
568 	if	(string == NULL)
569 	{
570 		return	NULL;
571 	}
572 
573 	if	(size <= 0)
574 	{
575 		return	string;
576 	}
577 
578 	if	(ptr != NULL)
579 	{
580 		ANTLR3_MEMMOVE(string->chars, (const void *)ptr, (size * sizeof(ANTLR3_UINT16)));
581 
582 		/* Terminate, these strings are usually used for Token streams and printing etc.
583 		*/
584 		*(((pANTLR3_UINT16)(string->chars)) + size) = '\0';
585 		string->len = size;
586 	}
587 
588 	return  string;
589 }
590 
591 /** Create a new 8 bit string from the supplied, null terminated, 8 bit string pointer.
592  * \param[in] factory - Pointer to the string factory that owns strings.
593  * \param[in] ptr - Pointer to the 8 bit encoded string
594  * \return Pointer to the newly initialized string
595  */
596 static    pANTLR3_STRING
newStr8(pANTLR3_STRING_FACTORY factory,pANTLR3_UINT8 ptr)597 newStr8	(pANTLR3_STRING_FACTORY factory, pANTLR3_UINT8 ptr)
598 {
599     return factory->newPtr8(factory, ptr, (ANTLR3_UINT32)strlen((const char *)ptr));
600 }
601 
602 /** Create a new UTF16 string from the supplied, null terminated, 8 bit string pointer.
603  * \param[in] factory - Pointer to the string factory that owns strings.
604  * \param[in] ptr - Pointer to the 8 bit encoded string
605  * \return POinter to the newly initialized string
606  */
607 static    pANTLR3_STRING
newStrUTF16_8(pANTLR3_STRING_FACTORY factory,pANTLR3_UINT8 ptr)608 newStrUTF16_8	(pANTLR3_STRING_FACTORY factory, pANTLR3_UINT8 ptr)
609 {
610     return factory->newPtr8(factory, ptr, (ANTLR3_UINT32)strlen((const char *)ptr));
611 }
612 
613 /** Create a new UTF16 string from the supplied, null terminated, UTF16 string pointer.
614  * \param[in] factory - Pointer to the string factory that owns strings.
615  * \param[in] ptr - Pointer to the UTF16 encoded string
616  * \return Pointer to the newly initialized string
617  */
618 static    pANTLR3_STRING
newStrUTF16_UTF16(pANTLR3_STRING_FACTORY factory,pANTLR3_UINT8 ptr)619 newStrUTF16_UTF16	(pANTLR3_STRING_FACTORY factory, pANTLR3_UINT8 ptr)
620 {
621     pANTLR3_UINT16  in;
622     ANTLR3_UINT32   count;
623 
624     /** First, determine the length of the input string
625      */
626     in	    = (pANTLR3_UINT16)ptr;
627     count   = 0;
628 
629     while   (*in++ != '\0')
630     {
631 		count++;
632     }
633     return factory->newPtr(factory, ptr, count);
634 }
635 
636 static    void
destroy(pANTLR3_STRING_FACTORY factory,pANTLR3_STRING string)637 destroy	(pANTLR3_STRING_FACTORY factory, pANTLR3_STRING string)
638 {
639     // Record which string we are deleting
640     //
641     ANTLR3_UINT32 strIndex = string->index;
642 
643     // Ensure that the string was not factory made, or we would try
644     // to delete memory that wasn't allocated outside the factory
645     // block.
646     // Remove the specific indexed string from the vector
647     //
648     factory->strings->del(factory->strings, strIndex);
649 
650     // One less string in the vector, so decrement the factory index
651     // so that the next string allocated is indexed correctly with
652     // respect to the vector.
653     //
654     factory->index--;
655 
656     // Now we have to reindex the strings in the vector that followed
657     // the one we just deleted. We only do this if the one we just deleted
658     // was not the last one.
659     //
660     if  (strIndex< factory->index)
661     {
662         // We must reindex the strings after the one we just deleted.
663         // The one that follows the one we just deleted is also out
664         // of whack, so we start there.
665         //
666         ANTLR3_UINT32 i;
667 
668         for (i = strIndex; i < factory->index; i++)
669         {
670             // Renumber the entry
671             //
672             ((pANTLR3_STRING)(factory->strings->elements[i].element))->index = i;
673         }
674     }
675 
676     // The string has been destroyed and the elements of the factory are reindexed.
677     //
678 
679 }
680 
681 static    pANTLR3_STRING
printable8(pANTLR3_STRING_FACTORY factory,pANTLR3_STRING instr)682 printable8(pANTLR3_STRING_FACTORY factory, pANTLR3_STRING instr)
683 {
684     pANTLR3_STRING  string;
685 
686     /* We don't need to be too efficient here, this is mostly for error messages and so on.
687      */
688     pANTLR3_UINT8   scannedText;
689     ANTLR3_UINT32   i;
690 
691     /* Assume we need as much as twice as much space to parse out the control characters
692      */
693     string  = factory->newSize(factory, instr->len *2 + 1);
694 
695     /* Scan through and replace unprintable (in terms of this routine)
696      * characters
697      */
698     scannedText = string->chars;
699 
700     for	(i = 0; i < instr->len; i++)
701     {
702 		if (*(instr->chars + i) == '\n')
703 		{
704 			*scannedText++ = '\\';
705 			*scannedText++ = 'n';
706 		}
707 		else if (*(instr->chars + i) == '\r')
708 		{
709 			*scannedText++ = '\\';
710 			*scannedText++ = 'r';
711 		}
712 		else if	(!isprint(*(instr->chars +i)))
713 		{
714 			*scannedText++ = '?';
715 		}
716 		else
717 		{
718 			*scannedText++ = *(instr->chars + i);
719 		}
720     }
721     *scannedText  = '\0';
722 
723     string->len	= (ANTLR3_UINT32)(scannedText - string->chars);
724 
725     return  string;
726 }
727 
728 static    pANTLR3_STRING
printableUTF16(pANTLR3_STRING_FACTORY factory,pANTLR3_STRING instr)729 printableUTF16(pANTLR3_STRING_FACTORY factory, pANTLR3_STRING instr)
730 {
731     pANTLR3_STRING  string;
732 
733     /* We don't need to be too efficient here, this is mostly for error messages and so on.
734      */
735     pANTLR3_UINT16  scannedText;
736     pANTLR3_UINT16  inText;
737     ANTLR3_UINT32   i;
738     ANTLR3_UINT32   outLen;
739 
740     /* Assume we need as much as twice as much space to parse out the control characters
741      */
742     string  = factory->newSize(factory, instr->len *2 + 1);
743 
744     /* Scan through and replace unprintable (in terms of this routine)
745      * characters
746      */
747     scannedText = (pANTLR3_UINT16)(string->chars);
748     inText	= (pANTLR3_UINT16)(instr->chars);
749     outLen	= 0;
750 
751     for	(i = 0; i < instr->len; i++)
752     {
753 		if (*(inText + i) == '\n')
754 		{
755 			*scannedText++   = '\\';
756 			*scannedText++   = 'n';
757 			outLen	    += 2;
758 		}
759 		else if (*(inText + i) == '\r')
760 		{
761 			*scannedText++   = '\\';
762 			*scannedText++   = 'r';
763 			outLen	    += 2;
764 		}
765 		else if	(!isprint(*(inText +i)))
766 		{
767 			*scannedText++ = '?';
768 			outLen++;
769 		}
770 		else
771 		{
772 			*scannedText++ = *(inText + i);
773 			outLen++;
774 		}
775     }
776     *scannedText  = '\0';
777 
778     string->len	= outLen;
779 
780     return  string;
781 }
782 
783 /** Fascist Capitalist Pig function created
784  *  to oppress the workers comrade.
785  */
786 static    void
closeFactory(pANTLR3_STRING_FACTORY factory)787 closeFactory	(pANTLR3_STRING_FACTORY factory)
788 {
789     /* Delete the vector we were tracking the strings with, this will
790      * causes all the allocated strings to be deallocated too
791      */
792     factory->strings->free(factory->strings);
793 
794     /* Delete the space for the factory itself
795      */
796     ANTLR3_FREE((void *)factory);
797 }
798 
799 static    pANTLR3_UINT8
append8(pANTLR3_STRING string,const char * newbit)800 append8	(pANTLR3_STRING string, const char * newbit)
801 {
802     ANTLR3_UINT32 len;
803 
804     len	= (ANTLR3_UINT32)strlen(newbit);
805 
806     if	(string->size < (string->len + len + 1))
807     {
808 		pANTLR3_UINT8 newAlloc = (pANTLR3_UINT8) ANTLR3_REALLOC((void *)string->chars, (ANTLR3_UINT32)(string->len + len + 1));
809 		if (newAlloc == NULL)
810 		{
811 			return NULL;
812 		}
813 		string->chars	= newAlloc;
814 		string->size	= string->len + len + 1;
815     }
816 
817     /* Note we copy one more byte than the strlen in order to get the trailing
818      */
819     ANTLR3_MEMMOVE((void *)(string->chars + string->len), newbit, (ANTLR3_UINT32)(len+1));
820     string->len	+= len;
821 
822     return string->chars;
823 }
824 
825 static    pANTLR3_UINT8
appendUTF16_8(pANTLR3_STRING string,const char * newbit)826 appendUTF16_8	(pANTLR3_STRING string, const char * newbit)
827 {
828     ANTLR3_UINT32   len;
829     pANTLR3_UINT16  apPoint;
830     ANTLR3_UINT32   count;
831 
832     len	= (ANTLR3_UINT32)strlen(newbit);
833 
834     if	(string->size < (string->len + len + 1))
835     {
836 		pANTLR3_UINT8 newAlloc = (pANTLR3_UINT8) ANTLR3_REALLOC((void *)string->chars, (ANTLR3_UINT32)((sizeof(ANTLR3_UINT16)*(string->len + len + 1))));
837 		if (newAlloc == NULL)
838 		{
839 			return NULL;
840 		}
841 		string->chars	= newAlloc;
842 		string->size	= string->len + len + 1;
843     }
844 
845     apPoint = ((pANTLR3_UINT16)string->chars) + string->len;
846     string->len	+= len;
847 
848     for	(count = 0; count < len; count++)
849     {
850 		*apPoint++   = *(newbit + count);
851     }
852     *apPoint = '\0';
853 
854     return string->chars;
855 }
856 
857 static    pANTLR3_UINT8
appendUTF16_UTF16(pANTLR3_STRING string,const char * newbit)858 appendUTF16_UTF16	(pANTLR3_STRING string, const char * newbit)
859 {
860     ANTLR3_UINT32 len;
861     pANTLR3_UINT16  in;
862 
863     /** First, determine the length of the input string
864      */
865     in	    = (pANTLR3_UINT16)newbit;
866     len   = 0;
867 
868     while   (*in++ != '\0')
869     {
870 		len++;
871     }
872 
873     if	(string->size < (string->len + len + 1))
874     {
875 		pANTLR3_UINT8 newAlloc = (pANTLR3_UINT8) ANTLR3_REALLOC((void *)string->chars, (ANTLR3_UINT32)( sizeof(ANTLR3_UINT16) *(string->len + len + 1) ));
876 		if (newAlloc == NULL)
877 		{
878 			return NULL;
879 		}
880 		string->chars	= newAlloc;
881 		string->size	= string->len + len + 1;
882     }
883 
884     /* Note we copy one more byte than the strlen in order to get the trailing delimiter
885      */
886     ANTLR3_MEMMOVE((void *)(((pANTLR3_UINT16)string->chars) + string->len), newbit, (ANTLR3_UINT32)(sizeof(ANTLR3_UINT16)*(len+1)));
887     string->len	+= len;
888 
889     return string->chars;
890 }
891 
892 static    pANTLR3_UINT8
set8(pANTLR3_STRING string,const char * chars)893 set8	(pANTLR3_STRING string, const char * chars)
894 {
895     ANTLR3_UINT32	len;
896 
897     len = (ANTLR3_UINT32)strlen(chars);
898     if	(string->size < len + 1)
899     {
900 		pANTLR3_UINT8 newAlloc = (pANTLR3_UINT8) ANTLR3_REALLOC((void *)string->chars, (ANTLR3_UINT32)(len + 1));
901 		if (newAlloc == NULL)
902 		{
903 			return NULL;
904 		}
905 		string->chars	= newAlloc;
906 		string->size	= len + 1;
907     }
908 
909     /* Note we copy one more byte than the strlen in order to get the trailing '\0'
910      */
911     ANTLR3_MEMMOVE((void *)(string->chars), chars, (ANTLR3_UINT32)(len+1));
912     string->len	    = len;
913 
914     return  string->chars;
915 
916 }
917 
918 static    pANTLR3_UINT8
setUTF16_8(pANTLR3_STRING string,const char * chars)919 setUTF16_8	(pANTLR3_STRING string, const char * chars)
920 {
921     ANTLR3_UINT32	len;
922     ANTLR3_UINT32	count;
923     pANTLR3_UINT16	apPoint;
924 
925     len = (ANTLR3_UINT32)strlen(chars);
926     if	(string->size < len + 1)
927 	{
928 		pANTLR3_UINT8 newAlloc = (pANTLR3_UINT8) ANTLR3_REALLOC((void *)string->chars, (ANTLR3_UINT32)(sizeof(ANTLR3_UINT16)*(len + 1)));
929 		if (newAlloc == NULL)
930 		{
931 			return NULL;
932 		}
933 		string->chars	= newAlloc;
934 		string->size	= len + 1;
935     }
936     apPoint = ((pANTLR3_UINT16)string->chars);
937     string->len	= len;
938 
939     for	(count = 0; count < string->len; count++)
940     {
941 		*apPoint++   = *(chars + count);
942     }
943     *apPoint = '\0';
944 
945     return  string->chars;
946 }
947 
948 static    pANTLR3_UINT8
setUTF16_UTF16(pANTLR3_STRING string,const char * chars)949 setUTF16_UTF16    (pANTLR3_STRING string, const char * chars)
950 {
951     ANTLR3_UINT32   len;
952     pANTLR3_UINT16  in;
953 
954     /** First, determine the length of the input string
955      */
956     in	    = (pANTLR3_UINT16)chars;
957     len   = 0;
958 
959     while   (*in++ != '\0')
960     {
961 		len++;
962     }
963 
964     if	(string->size < len + 1)
965     {
966 		pANTLR3_UINT8 newAlloc = (pANTLR3_UINT8) ANTLR3_REALLOC((void *)string->chars, (ANTLR3_UINT32)(sizeof(ANTLR3_UINT16)*(len + 1)));
967 		if (newAlloc == NULL)
968 		{
969 			return NULL;
970 		}
971 		string->chars	= newAlloc;
972 		string->size	= len + 1;
973     }
974 
975     /* Note we copy one more byte than the strlen in order to get the trailing '\0'
976      */
977     ANTLR3_MEMMOVE((void *)(string->chars), chars, (ANTLR3_UINT32)((len+1) * sizeof(ANTLR3_UINT16)));
978     string->len	    = len;
979 
980     return  string->chars;
981 
982 }
983 
984 static    pANTLR3_UINT8
addc8(pANTLR3_STRING string,ANTLR3_UINT32 c)985 addc8	(pANTLR3_STRING string, ANTLR3_UINT32 c)
986 {
987     if	(string->size < string->len + 2)
988     {
989 		pANTLR3_UINT8 newAlloc = (pANTLR3_UINT8) ANTLR3_REALLOC((void *)string->chars, (ANTLR3_UINT32)(string->len + 2));
990 		if (newAlloc == NULL)
991 		{
992 			return NULL;
993 		}
994 		string->chars	= newAlloc;
995 		string->size	= string->len + 2;
996     }
997     *(string->chars + string->len)	= (ANTLR3_UINT8)c;
998     *(string->chars + string->len + 1)	= '\0';
999     string->len++;
1000 
1001     return  string->chars;
1002 }
1003 
1004 static    pANTLR3_UINT8
addcUTF16(pANTLR3_STRING string,ANTLR3_UINT32 c)1005 addcUTF16	(pANTLR3_STRING string, ANTLR3_UINT32 c)
1006 {
1007     pANTLR3_UINT16  ptr;
1008 
1009     if	(string->size < string->len + 2)
1010     {
1011 		pANTLR3_UINT8 newAlloc = (pANTLR3_UINT8) ANTLR3_REALLOC((void *)string->chars, (ANTLR3_UINT32)(sizeof(ANTLR3_UINT16) * (string->len + 2)));
1012 		if (newAlloc == NULL)
1013 		{
1014 			return NULL;
1015 		}
1016 		string->chars	= newAlloc;
1017 		string->size	= string->len + 2;
1018     }
1019     ptr	= (pANTLR3_UINT16)(string->chars);
1020 
1021     *(ptr + string->len)	= (ANTLR3_UINT16)c;
1022     *(ptr + string->len + 1)	= '\0';
1023     string->len++;
1024 
1025     return  string->chars;
1026 }
1027 
1028 static    pANTLR3_UINT8
addi8(pANTLR3_STRING string,ANTLR3_INT32 i)1029 addi8	(pANTLR3_STRING string, ANTLR3_INT32 i)
1030 {
1031     ANTLR3_UINT8	    newbit[32];
1032 
1033     sprintf((char *)newbit, "%d", i);
1034 
1035     return  string->append8(string, (const char *)newbit);
1036 }
1037 static    pANTLR3_UINT8
addiUTF16(pANTLR3_STRING string,ANTLR3_INT32 i)1038 addiUTF16	(pANTLR3_STRING string, ANTLR3_INT32 i)
1039 {
1040     ANTLR3_UINT8	    newbit[32];
1041 
1042     sprintf((char *)newbit, "%d", i);
1043 
1044     return  string->append8(string, (const char *)newbit);
1045 }
1046 
1047 static	  pANTLR3_UINT8
inserti8(pANTLR3_STRING string,ANTLR3_UINT32 point,ANTLR3_INT32 i)1048 inserti8    (pANTLR3_STRING string, ANTLR3_UINT32 point, ANTLR3_INT32 i)
1049 {
1050     ANTLR3_UINT8	    newbit[32];
1051 
1052     sprintf((char *)newbit, "%d", i);
1053     return  string->insert8(string, point, (const char *)newbit);
1054 }
1055 static	  pANTLR3_UINT8
insertiUTF16(pANTLR3_STRING string,ANTLR3_UINT32 point,ANTLR3_INT32 i)1056 insertiUTF16    (pANTLR3_STRING string, ANTLR3_UINT32 point, ANTLR3_INT32 i)
1057 {
1058     ANTLR3_UINT8	    newbit[32];
1059 
1060     sprintf((char *)newbit, "%d", i);
1061     return  string->insert8(string, point, (const char *)newbit);
1062 }
1063 
1064 static	pANTLR3_UINT8
insert8(pANTLR3_STRING string,ANTLR3_UINT32 point,const char * newbit)1065 insert8	(pANTLR3_STRING string, ANTLR3_UINT32 point, const char * newbit)
1066 {
1067     ANTLR3_UINT32	len;
1068 
1069     if	(point >= string->len)
1070     {
1071 		return	string->append(string, newbit);
1072     }
1073 
1074     len	= (ANTLR3_UINT32)strlen(newbit);
1075 
1076     if	(len == 0)
1077     {
1078 		return	string->chars;
1079     }
1080 
1081     if	(string->size < (string->len + len + 1))
1082     {
1083 		pANTLR3_UINT8 newAlloc = (pANTLR3_UINT8) ANTLR3_REALLOC((void *)string->chars, (ANTLR3_UINT32)(string->len + len + 1));
1084 		if (newAlloc == NULL)
1085 		{
1086 			return NULL;
1087 		}
1088 		string->chars	= newAlloc;
1089 		string->size	= string->len + len + 1;
1090     }
1091 
1092     /* Move the characters we are inserting before, including the delimiter
1093      */
1094     ANTLR3_MEMMOVE((void *)(string->chars + point + len), (void *)(string->chars + point), (ANTLR3_UINT32)(string->len - point + 1));
1095 
1096     /* Note we copy the exact number of bytes
1097      */
1098     ANTLR3_MEMMOVE((void *)(string->chars + point), newbit, (ANTLR3_UINT32)(len));
1099 
1100     string->len += len;
1101 
1102     return  string->chars;
1103 }
1104 
1105 static	pANTLR3_UINT8
insertUTF16_8(pANTLR3_STRING string,ANTLR3_UINT32 point,const char * newbit)1106 insertUTF16_8	(pANTLR3_STRING string, ANTLR3_UINT32 point, const char * newbit)
1107 {
1108     ANTLR3_UINT32	len;
1109     ANTLR3_UINT32	count;
1110     pANTLR3_UINT16	inPoint;
1111 
1112     if	(point >= string->len)
1113     {
1114 		return	string->append8(string, newbit);
1115     }
1116 
1117     len	= (ANTLR3_UINT32)strlen(newbit);
1118 
1119     if	(len == 0)
1120     {
1121 		return	string->chars;
1122     }
1123 
1124     if	(string->size < (string->len + len + 1))
1125     {
1126 		pANTLR3_UINT8 newAlloc = (pANTLR3_UINT8) ANTLR3_REALLOC((void *)string->chars, (ANTLR3_UINT32)(sizeof(ANTLR3_UINT16)*(string->len + len + 1)));
1127 		if (newAlloc == NULL)
1128 		{
1129 			return NULL;
1130 		}
1131 		string->chars	= newAlloc;
1132 		string->size	= string->len + len + 1;
1133     }
1134 
1135     /* Move the characters we are inserting before, including the delimiter
1136      */
1137     ANTLR3_MEMMOVE((void *)(((pANTLR3_UINT16)string->chars) + point + len), (void *)(((pANTLR3_UINT16)string->chars) + point), (ANTLR3_UINT32)(sizeof(ANTLR3_UINT16)*(string->len - point + 1)));
1138 
1139     string->len += len;
1140 
1141     inPoint = ((pANTLR3_UINT16)(string->chars))+point;
1142     for	(count = 0; count<len; count++)
1143     {
1144 		*(inPoint + count) = (ANTLR3_UINT16)(*(newbit+count));
1145     }
1146 
1147     return  string->chars;
1148 }
1149 
1150 static	pANTLR3_UINT8
insertUTF16_UTF16(pANTLR3_STRING string,ANTLR3_UINT32 point,const char * newbit)1151 insertUTF16_UTF16	(pANTLR3_STRING string, ANTLR3_UINT32 point, const char * newbit)
1152 {
1153     ANTLR3_UINT32	len;
1154     pANTLR3_UINT16	in;
1155 
1156     if	(point >= string->len)
1157     {
1158 		return	string->append(string, newbit);
1159     }
1160 
1161     /** First, determine the length of the input string
1162      */
1163     in	    = (pANTLR3_UINT16)newbit;
1164     len	    = 0;
1165 
1166     while   (*in++ != '\0')
1167     {
1168 		len++;
1169     }
1170 
1171     if	(len == 0)
1172     {
1173 		return	string->chars;
1174     }
1175 
1176     if	(string->size < (string->len + len + 1))
1177     {
1178 		pANTLR3_UINT8 newAlloc = (pANTLR3_UINT8) ANTLR3_REALLOC((void *)string->chars, (ANTLR3_UINT32)(sizeof(ANTLR3_UINT16)*(string->len + len + 1)));
1179 		if (newAlloc == NULL)
1180 		{
1181 			return NULL;
1182 		}
1183 		string->chars	= newAlloc;
1184 		string->size	= string->len + len + 1;
1185     }
1186 
1187     /* Move the characters we are inserting before, including the delimiter
1188      */
1189     ANTLR3_MEMMOVE((void *)(((pANTLR3_UINT16)string->chars) + point + len), (void *)(((pANTLR3_UINT16)string->chars) + point), (ANTLR3_UINT32)(sizeof(ANTLR3_UINT16)*(string->len - point + 1)));
1190 
1191 
1192     /* Note we copy the exact number of characters
1193      */
1194     ANTLR3_MEMMOVE((void *)(((pANTLR3_UINT16)string->chars) + point), newbit, (ANTLR3_UINT32)(sizeof(ANTLR3_UINT16)*(len)));
1195 
1196     string->len += len;
1197 
1198     return  string->chars;
1199 }
1200 
setS(pANTLR3_STRING string,pANTLR3_STRING chars)1201 static    pANTLR3_UINT8	    setS	(pANTLR3_STRING string, pANTLR3_STRING chars)
1202 {
1203     return  string->set(string, (const char *)(chars->chars));
1204 }
1205 
appendS(pANTLR3_STRING string,pANTLR3_STRING newbit)1206 static    pANTLR3_UINT8	    appendS	(pANTLR3_STRING string, pANTLR3_STRING newbit)
1207 {
1208     /* We may be passed an empty string, in which case we just return the current pointer
1209      */
1210     if	(newbit == NULL || newbit->len == 0 || newbit->size == 0 || newbit->chars == NULL)
1211     {
1212 		return	string->chars;
1213     }
1214     else
1215     {
1216 		return  string->append(string, (const char *)(newbit->chars));
1217     }
1218 }
1219 
insertS(pANTLR3_STRING string,ANTLR3_UINT32 point,pANTLR3_STRING newbit)1220 static	  pANTLR3_UINT8	    insertS	(pANTLR3_STRING string, ANTLR3_UINT32 point, pANTLR3_STRING newbit)
1221 {
1222     return  string->insert(string, point, (const char *)(newbit->chars));
1223 }
1224 
1225 /* Function that compares the text of a string to the supplied
1226  * 8 bit character string and returns a result a la strcmp()
1227  */
1228 static ANTLR3_UINT32
compare8(pANTLR3_STRING string,const char * compStr)1229 compare8	(pANTLR3_STRING string, const char * compStr)
1230 {
1231     return  strcmp((const char *)(string->chars), compStr);
1232 }
1233 
1234 /* Function that compares the text of a string with the supplied character string
1235  * (which is assumed to be in the same encoding as the string itself) and returns a result
1236  * a la strcmp()
1237  */
1238 static ANTLR3_UINT32
compareUTF16_8(pANTLR3_STRING string,const char * compStr)1239 compareUTF16_8	(pANTLR3_STRING string, const char * compStr)
1240 {
1241     pANTLR3_UINT16  ourString;
1242     ANTLR3_UINT32   charDiff;
1243 
1244     ourString	= (pANTLR3_UINT16)(string->chars);
1245 
1246     while   (((ANTLR3_UCHAR)(*ourString) != '\0') && ((ANTLR3_UCHAR)(*compStr) != '\0'))
1247     {
1248 		charDiff = *ourString - *compStr;
1249 		if  (charDiff != 0)
1250 		{
1251 			return charDiff;
1252 		}
1253 		ourString++;
1254 		compStr++;
1255     }
1256 
1257     /* At this point, one of the strings was terminated
1258      */
1259     return (ANTLR3_UINT32)((ANTLR3_UCHAR)(*ourString) - (ANTLR3_UCHAR)(*compStr));
1260 
1261 }
1262 
1263 /* Function that compares the text of a string with the supplied character string
1264  * (which is assumed to be in the same encoding as the string itself) and returns a result
1265  * a la strcmp()
1266  */
1267 static ANTLR3_UINT32
compareUTF16_UTF16(pANTLR3_STRING string,const char * compStr8)1268 compareUTF16_UTF16	(pANTLR3_STRING string, const char * compStr8)
1269 {
1270     pANTLR3_UINT16  ourString;
1271     pANTLR3_UINT16  compStr;
1272     ANTLR3_UINT32   charDiff;
1273 
1274     ourString	= (pANTLR3_UINT16)(string->chars);
1275     compStr	= (pANTLR3_UINT16)(compStr8);
1276 
1277     while   (((ANTLR3_UCHAR)(*ourString) != '\0') && ((ANTLR3_UCHAR)(*((pANTLR3_UINT16)compStr)) != '\0'))
1278     {
1279 		charDiff = *ourString - *compStr;
1280 		if  (charDiff != 0)
1281 		{
1282 			return charDiff;
1283 		}
1284 		ourString++;
1285 		compStr++;
1286     }
1287 
1288     /* At this point, one of the strings was terminated
1289      */
1290     return (ANTLR3_UINT32)((ANTLR3_UCHAR)(*ourString) - (ANTLR3_UCHAR)(*compStr));
1291 }
1292 
1293 /* Function that compares the text of a string with the supplied string
1294  * (which is assumed to be in the same encoding as the string itself) and returns a result
1295  * a la strcmp()
1296  */
1297 static ANTLR3_UINT32
compareS(pANTLR3_STRING string,pANTLR3_STRING compStr)1298 compareS    (pANTLR3_STRING string, pANTLR3_STRING compStr)
1299 {
1300     return  string->compare(string, (const char *)compStr->chars);
1301 }
1302 
1303 
1304 /* Function that returns the character indexed at the supplied
1305  * offset as a 32 bit character.
1306  */
1307 static ANTLR3_UCHAR
charAt8(pANTLR3_STRING string,ANTLR3_UINT32 offset)1308 charAt8	    (pANTLR3_STRING string, ANTLR3_UINT32 offset)
1309 {
1310     if	(offset > string->len)
1311     {
1312 		return (ANTLR3_UCHAR)'\0';
1313     }
1314     else
1315     {
1316 		return  (ANTLR3_UCHAR)(*(string->chars + offset));
1317     }
1318 }
1319 
1320 /* Function that returns the character indexed at the supplied
1321  * offset as a 32 bit character.
1322  */
1323 static ANTLR3_UCHAR
charAtUTF16(pANTLR3_STRING string,ANTLR3_UINT32 offset)1324 charAtUTF16    (pANTLR3_STRING string, ANTLR3_UINT32 offset)
1325 {
1326     if	(offset > string->len)
1327     {
1328 		return (ANTLR3_UCHAR)'\0';
1329     }
1330     else
1331     {
1332 		return  (ANTLR3_UCHAR)(*((pANTLR3_UINT16)(string->chars) + offset));
1333     }
1334 }
1335 
1336 /* Function that returns a substring of the supplied string a la .subString(s,e)
1337  * in java runtimes.
1338  */
1339 static pANTLR3_STRING
subString8(pANTLR3_STRING string,ANTLR3_UINT32 startIndex,ANTLR3_UINT32 endIndex)1340 subString8   (pANTLR3_STRING string, ANTLR3_UINT32 startIndex, ANTLR3_UINT32 endIndex)
1341 {
1342     pANTLR3_STRING newStr;
1343 
1344     if	(endIndex > string->len)
1345     {
1346 		endIndex = string->len + 1;
1347     }
1348     newStr  = string->factory->newPtr(string->factory, string->chars + startIndex, endIndex - startIndex);
1349 
1350     return newStr;
1351 }
1352 
1353 /* Returns a substring of the supplied string a la .subString(s,e)
1354  * in java runtimes.
1355  */
1356 static pANTLR3_STRING
subStringUTF16(pANTLR3_STRING string,ANTLR3_UINT32 startIndex,ANTLR3_UINT32 endIndex)1357 subStringUTF16  (pANTLR3_STRING string, ANTLR3_UINT32 startIndex, ANTLR3_UINT32 endIndex)
1358 {
1359     pANTLR3_STRING newStr;
1360 
1361     if	(endIndex > string->len)
1362     {
1363 		endIndex = string->len + 1;
1364     }
1365     newStr  = string->factory->newPtr(string->factory, (pANTLR3_UINT8)((pANTLR3_UINT16)(string->chars) + startIndex), endIndex - startIndex);
1366 
1367     return newStr;
1368 }
1369 
1370 /* Function that can convert the characters in the string to an integer
1371  */
1372 static ANTLR3_INT32
toInt32_8(struct ANTLR3_STRING_struct * string)1373 toInt32_8	    (struct ANTLR3_STRING_struct * string)
1374 {
1375     return  atoi((const char *)(string->chars));
1376 }
1377 
1378 /* Function that can convert the characters in the string to an integer
1379  */
1380 static ANTLR3_INT32
toInt32_UTF16(struct ANTLR3_STRING_struct * string)1381 toInt32_UTF16       (struct ANTLR3_STRING_struct * string)
1382 {
1383     pANTLR3_UINT16  input;
1384     ANTLR3_INT32   value;
1385     ANTLR3_BOOLEAN  negate;
1386 
1387     value   = 0;
1388     input   = (pANTLR3_UINT16)(string->chars);
1389     negate  = ANTLR3_FALSE;
1390 
1391     if	(*input == (ANTLR3_UCHAR)'-')
1392     {
1393 		negate = ANTLR3_TRUE;
1394 		input++;
1395     }
1396     else if (*input == (ANTLR3_UCHAR)'+')
1397     {
1398 		input++;
1399     }
1400 
1401     while   (*input != '\0' && isdigit(*input))
1402     {
1403 		value	 = value * 10;
1404 		value	+= ((ANTLR3_UINT32)(*input) - (ANTLR3_UINT32)'0');
1405 		input++;
1406     }
1407 
1408     return negate ? -value : value;
1409 }
1410 
1411 /* Function that returns a pointer to an 8 bit version of the string,
1412  * which in this case is just the string as this is
1413  * 8 bit encodiing anyway.
1414  */
to8_8(pANTLR3_STRING string)1415 static	  pANTLR3_STRING	    to8_8	(pANTLR3_STRING string)
1416 {
1417     return  string;
1418 }
1419 
1420 /* Function that returns an 8 bit version of the string,
1421  * which in this case is returning all the UTF16 characters
1422  * narrowed back into 8 bits, with characters that are too large
1423  * replaced with '_'
1424  */
to8_UTF16(pANTLR3_STRING string)1425 static	  pANTLR3_STRING    to8_UTF16	(pANTLR3_STRING string)
1426 {
1427 	pANTLR3_STRING  newStr;
1428 	ANTLR3_UINT32   i;
1429 
1430 	/* Create a new 8 bit string
1431 	*/
1432 	newStr  = newRaw8(string->factory);
1433 
1434 	if	(newStr == NULL)
1435 	{
1436 		return	NULL;
1437 	}
1438 
1439 	/* Always add one more byte for a terminator
1440 	*/
1441 	newStr->chars   = (pANTLR3_UINT8) ANTLR3_MALLOC((size_t)(string->len + 1));
1442 	if (newStr->chars != NULL)
1443 	{
1444 		newStr->size    = string->len + 1;
1445 		newStr->len	    = string->len;
1446 
1447 		/* Now copy each UTF16 charActer , making it an 8 bit character of
1448 		* some sort.
1449 		*/
1450 		for	(i=0; i<string->len; i++)
1451 		{
1452 			ANTLR3_UCHAR	c;
1453 
1454 			c = *(((pANTLR3_UINT16)(string->chars)) + i);
1455 
1456 			*(newStr->chars + i) = (ANTLR3_UINT8)(c > 255 ? '_' : c);
1457 		}
1458 
1459 		/* Terminate
1460 		*/
1461 		*(newStr->chars + newStr->len) = '\0';
1462 	}
1463 
1464 	return newStr;
1465 }
1466