1 /** \file
2 * Implementation of the ANTLR3 string and string factory classes
3 */
4
5 // [The "BSD licence"]
6 // Copyright (c) 2005-2009 Jim Idle, Temporal Wave LLC
7 // http://www.temporal-wave.com
8 // http://www.linkedin.com/in/jimidle
9 //
10 // All rights reserved.
11 //
12 // Redistribution and use in source and binary forms, with or without
13 // modification, are permitted provided that the following conditions
14 // are met:
15 // 1. Redistributions of source code must retain the above copyright
16 // notice, this list of conditions and the following disclaimer.
17 // 2. Redistributions in binary form must reproduce the above copyright
18 // notice, this list of conditions and the following disclaimer in the
19 // documentation and/or other materials provided with the distribution.
20 // 3. The name of the author may not be used to endorse or promote products
21 // derived from this software without specific prior written permission.
22 //
23 // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
24 // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
25 // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
26 // IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
27 // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
28 // NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
29 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
30 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
31 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
32 // THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33
34 #include <antlr3string.h>
35
36 /* Factory API
37 */
38 static pANTLR3_STRING newRaw8 (pANTLR3_STRING_FACTORY factory);
39 static pANTLR3_STRING newRawUTF16 (pANTLR3_STRING_FACTORY factory);
40 static pANTLR3_STRING newSize8 (pANTLR3_STRING_FACTORY factory, ANTLR3_UINT32 size);
41 static pANTLR3_STRING newSizeUTF16 (pANTLR3_STRING_FACTORY factory, ANTLR3_UINT32 size);
42 static pANTLR3_STRING newPtr8 (pANTLR3_STRING_FACTORY factory, pANTLR3_UINT8 string, ANTLR3_UINT32 size);
43 static pANTLR3_STRING newPtrUTF16_8 (pANTLR3_STRING_FACTORY factory, pANTLR3_UINT8 string, ANTLR3_UINT32 size);
44 static pANTLR3_STRING newPtrUTF16_UTF16 (pANTLR3_STRING_FACTORY factory, pANTLR3_UINT8 string, ANTLR3_UINT32 size);
45 static pANTLR3_STRING newStr8 (pANTLR3_STRING_FACTORY factory, pANTLR3_UINT8 string);
46 static pANTLR3_STRING newStrUTF16_8 (pANTLR3_STRING_FACTORY factory, pANTLR3_UINT8 string);
47 static pANTLR3_STRING newStrUTF16_UTF16 (pANTLR3_STRING_FACTORY factory, pANTLR3_UINT8 string);
48 static void destroy (pANTLR3_STRING_FACTORY factory, pANTLR3_STRING string);
49 static pANTLR3_STRING printable8 (pANTLR3_STRING_FACTORY factory, pANTLR3_STRING string);
50 static pANTLR3_STRING printableUTF16 (pANTLR3_STRING_FACTORY factory, pANTLR3_STRING string);
51 static void closeFactory(pANTLR3_STRING_FACTORY factory);
52
53 /* String API
54 */
55 static pANTLR3_UINT8 set8 (pANTLR3_STRING string, const char * chars);
56 static pANTLR3_UINT8 setUTF16_8 (pANTLR3_STRING string, const char * chars);
57 static pANTLR3_UINT8 setUTF16_UTF16 (pANTLR3_STRING string, const char * chars);
58 static pANTLR3_UINT8 append8 (pANTLR3_STRING string, const char * newbit);
59 static pANTLR3_UINT8 appendUTF16_8 (pANTLR3_STRING string, const char * newbit);
60 static pANTLR3_UINT8 appendUTF16_UTF16 (pANTLR3_STRING string, const char * newbit);
61 static pANTLR3_UINT8 insert8 (pANTLR3_STRING string, ANTLR3_UINT32 point, const char * newbit);
62 static pANTLR3_UINT8 insertUTF16_8 (pANTLR3_STRING string, ANTLR3_UINT32 point, const char * newbit);
63 static pANTLR3_UINT8 insertUTF16_UTF16 (pANTLR3_STRING string, ANTLR3_UINT32 point, const char * newbit);
64
65 static pANTLR3_UINT8 setS (pANTLR3_STRING string, pANTLR3_STRING chars);
66 static pANTLR3_UINT8 appendS (pANTLR3_STRING string, pANTLR3_STRING newbit);
67 static pANTLR3_UINT8 insertS (pANTLR3_STRING string, ANTLR3_UINT32 point, pANTLR3_STRING newbit);
68
69 static pANTLR3_UINT8 addc8 (pANTLR3_STRING string, ANTLR3_UINT32 c);
70 static pANTLR3_UINT8 addcUTF16 (pANTLR3_STRING string, ANTLR3_UINT32 c);
71 static pANTLR3_UINT8 addi8 (pANTLR3_STRING string, ANTLR3_INT32 i);
72 static pANTLR3_UINT8 addiUTF16 (pANTLR3_STRING string, ANTLR3_INT32 i);
73 static pANTLR3_UINT8 inserti8 (pANTLR3_STRING string, ANTLR3_UINT32 point, ANTLR3_INT32 i);
74 static pANTLR3_UINT8 insertiUTF16 (pANTLR3_STRING string, ANTLR3_UINT32 point, ANTLR3_INT32 i);
75
76 static ANTLR3_UINT32 compare8 (pANTLR3_STRING string, const char * compStr);
77 static ANTLR3_UINT32 compareUTF16_8 (pANTLR3_STRING string, const char * compStr);
78 static ANTLR3_UINT32 compareUTF16_UTF16(pANTLR3_STRING string, const char * compStr);
79 static ANTLR3_UINT32 compareS (pANTLR3_STRING string, pANTLR3_STRING compStr);
80 static ANTLR3_UCHAR charAt8 (pANTLR3_STRING string, ANTLR3_UINT32 offset);
81 static ANTLR3_UCHAR charAtUTF16 (pANTLR3_STRING string, ANTLR3_UINT32 offset);
82 static pANTLR3_STRING subString8 (pANTLR3_STRING string, ANTLR3_UINT32 startIndex, ANTLR3_UINT32 endIndex);
83 static pANTLR3_STRING subStringUTF16 (pANTLR3_STRING string, ANTLR3_UINT32 startIndex, ANTLR3_UINT32 endIndex);
84 static ANTLR3_INT32 toInt32_8 (pANTLR3_STRING string);
85 static ANTLR3_INT32 toInt32_UTF16 (pANTLR3_STRING string);
86 static pANTLR3_STRING to8_8 (pANTLR3_STRING string);
87 static pANTLR3_STRING to8_UTF16 (pANTLR3_STRING string);
88 static pANTLR3_STRING toUTF8_8 (pANTLR3_STRING string);
89 static pANTLR3_STRING toUTF8_UTF16 (pANTLR3_STRING string);
90
91 /* Local helpers
92 */
93 static void stringInit8 (pANTLR3_STRING string);
94 static void stringInitUTF16 (pANTLR3_STRING string);
95 static void ANTLR3_CDECL stringFree (pANTLR3_STRING string);
96
97 ANTLR3_API pANTLR3_STRING_FACTORY
antlr3StringFactoryNew(ANTLR3_UINT32 encoding)98 antlr3StringFactoryNew(ANTLR3_UINT32 encoding)
99 {
100 pANTLR3_STRING_FACTORY factory;
101
102 /* Allocate memory
103 */
104 factory = (pANTLR3_STRING_FACTORY) ANTLR3_CALLOC(1, sizeof(ANTLR3_STRING_FACTORY));
105
106 if (factory == NULL)
107 {
108 return NULL;
109 }
110
111 /* Now we make a new list to track the strings.
112 */
113 factory->strings = antlr3VectorNew(0);
114 factory->index = 0;
115
116 if (factory->strings == NULL)
117 {
118 ANTLR3_FREE(factory);
119 return NULL;
120 }
121
122 // Install the API
123 //
124 // TODO: These encodings need equivalent functions to
125 // UTF16 and 8Bit if I am going to support those encodings in the STRING stuff.
126 // The STRING stuff was intended as a quick and dirty hack for people that did not
127 // want to worry about memory and performance very much, but nobody ever reads the
128 // notes or comments or uses the email list search. I want to discourage using these
129 // interfaces as it is much more efficient to use the pointers within the tokens
130 // directly, so I am not implementing the string stuff for the newer encodings.
131 // We install the standard 8 and 16 bit functions for the UTF 8 and 16 but they
132 // will not be useful beyond returning the text.
133 //
134 switch(encoding)
135 {
136 case ANTLR3_ENC_UTF32:
137 break;
138
139 case ANTLR3_ENC_UTF32BE:
140 break;
141
142 case ANTLR3_ENC_UTF32LE:
143 break;
144
145 case ANTLR3_ENC_UTF16BE:
146 case ANTLR3_ENC_UTF16LE:
147 case ANTLR3_ENC_UTF16:
148
149 factory->newRaw = newRawUTF16;
150 factory->newSize = newSizeUTF16;
151 factory->newPtr = newPtrUTF16_UTF16;
152 factory->newPtr8 = newPtrUTF16_8;
153 factory->newStr = newStrUTF16_UTF16;
154 factory->newStr8 = newStrUTF16_8;
155 factory->printable = printableUTF16;
156 factory->destroy = destroy;
157 factory->close = closeFactory;
158 break;
159
160 case ANTLR3_ENC_UTF8:
161 case ANTLR3_ENC_EBCDIC:
162 case ANTLR3_ENC_8BIT:
163 default:
164
165 factory->newRaw = newRaw8;
166 factory->newSize = newSize8;
167 factory->newPtr = newPtr8;
168 factory->newPtr8 = newPtr8;
169 factory->newStr = newStr8;
170 factory->newStr8 = newStr8;
171 factory->printable = printable8;
172 factory->destroy = destroy;
173 factory->close = closeFactory;
174 break;
175 }
176 return factory;
177 }
178
179
180 /**
181 *
182 * \param factory
183 * \return
184 */
185 static pANTLR3_STRING
newRaw8(pANTLR3_STRING_FACTORY factory)186 newRaw8 (pANTLR3_STRING_FACTORY factory)
187 {
188 pANTLR3_STRING string;
189
190 string = (pANTLR3_STRING) ANTLR3_MALLOC(sizeof(ANTLR3_STRING));
191
192 if (string == NULL)
193 {
194 return NULL;
195 }
196
197 /* Structure is allocated, now fill in the API etc.
198 */
199 stringInit8(string);
200 string->factory = factory;
201
202 /* Add the string into the allocated list
203 */
204 factory->strings->set(factory->strings, factory->index, (void *) string, (void (ANTLR3_CDECL *)(void *))(stringFree), ANTLR3_TRUE);
205 string->index = factory->index++;
206
207 return string;
208 }
209 /**
210 *
211 * \param factory
212 * \return
213 */
214 static pANTLR3_STRING
newRawUTF16(pANTLR3_STRING_FACTORY factory)215 newRawUTF16 (pANTLR3_STRING_FACTORY factory)
216 {
217 pANTLR3_STRING string;
218
219 string = (pANTLR3_STRING) ANTLR3_MALLOC(sizeof(ANTLR3_STRING));
220
221 if (string == NULL)
222 {
223 return NULL;
224 }
225
226 /* Structure is allocated, now fill in the API etc.
227 */
228 stringInitUTF16(string);
229 string->factory = factory;
230
231 /* Add the string into the allocated list
232 */
233 factory->strings->set(factory->strings, factory->index, (void *) string, (void (ANTLR3_CDECL *)(void *))(stringFree), ANTLR3_TRUE);
234 string->index = factory->index++;
235
236 return string;
237 }
238 static
stringFree(pANTLR3_STRING string)239 void ANTLR3_CDECL stringFree (pANTLR3_STRING string)
240 {
241 /* First free the string itself if there was anything in it
242 */
243 if (string->chars)
244 {
245 ANTLR3_FREE(string->chars);
246 }
247
248 /* Now free the space for this string
249 */
250 ANTLR3_FREE(string);
251
252 return;
253 }
254 /**
255 *
256 * \param string
257 * \return
258 */
259 static void
stringInit8(pANTLR3_STRING string)260 stringInit8 (pANTLR3_STRING string)
261 {
262 string->len = 0;
263 string->size = 0;
264 string->chars = NULL;
265 string->encoding = ANTLR3_ENC_8BIT ;
266
267 /* API for 8 bit strings*/
268
269 string->set = set8;
270 string->set8 = set8;
271 string->append = append8;
272 string->append8 = append8;
273 string->insert = insert8;
274 string->insert8 = insert8;
275 string->addi = addi8;
276 string->inserti = inserti8;
277 string->addc = addc8;
278 string->charAt = charAt8;
279 string->compare = compare8;
280 string->compare8 = compare8;
281 string->subString = subString8;
282 string->toInt32 = toInt32_8;
283 string->to8 = to8_8;
284 string->toUTF8 = toUTF8_8;
285 string->compareS = compareS;
286 string->setS = setS;
287 string->appendS = appendS;
288 string->insertS = insertS;
289
290 }
291 /**
292 *
293 * \param string
294 * \return
295 */
296 static void
stringInitUTF16(pANTLR3_STRING string)297 stringInitUTF16 (pANTLR3_STRING string)
298 {
299 string->len = 0;
300 string->size = 0;
301 string->chars = NULL;
302 string->encoding = ANTLR3_ENC_8BIT;
303
304 /* API for UTF16 strings */
305
306 string->set = setUTF16_UTF16;
307 string->set8 = setUTF16_8;
308 string->append = appendUTF16_UTF16;
309 string->append8 = appendUTF16_8;
310 string->insert = insertUTF16_UTF16;
311 string->insert8 = insertUTF16_8;
312 string->addi = addiUTF16;
313 string->inserti = insertiUTF16;
314 string->addc = addcUTF16;
315 string->charAt = charAtUTF16;
316 string->compare = compareUTF16_UTF16;
317 string->compare8 = compareUTF16_8;
318 string->subString = subStringUTF16;
319 string->toInt32 = toInt32_UTF16;
320 string->to8 = to8_UTF16;
321 string->toUTF8 = toUTF8_UTF16;
322
323 string->compareS = compareS;
324 string->setS = setS;
325 string->appendS = appendS;
326 string->insertS = insertS;
327 }
328 /**
329 *
330 * \param string
331 * \return
332 * TODO: Implement UTF-8
333 */
334 static void
stringInitUTF8(pANTLR3_STRING string)335 stringInitUTF8 (pANTLR3_STRING string)
336 {
337 string->len = 0;
338 string->size = 0;
339 string->chars = NULL;
340
341 /* API */
342
343 }
344
345 // Convert an 8 bit string into a UTF8 representation, which is in fact just the string itself
346 // a memcpy as we make no assumptions about the 8 bit encoding.
347 //
348 static pANTLR3_STRING
toUTF8_8(pANTLR3_STRING string)349 toUTF8_8 (pANTLR3_STRING string)
350 {
351 return string->factory->newPtr(string->factory, (pANTLR3_UINT8)(string->chars), string->len);
352 }
353
354 // Convert a UTF16 string into a UTF8 representation using the Unicode.org
355 // supplied C algorithms, which are now contained within the ANTLR3 C runtime
356 // as permitted by the Unicode license (within the source code antlr3convertutf.c/.h
357 // UCS2 has the same encoding as UTF16 so we can use UTF16 converter.
358 //
359 static pANTLR3_STRING
toUTF8_UTF16(pANTLR3_STRING string)360 toUTF8_UTF16 (pANTLR3_STRING string)
361 {
362
363 UTF8 * outputEnd;
364 UTF16 * inputEnd;
365 pANTLR3_STRING utf8String;
366
367 ConversionResult cResult;
368
369 // Allocate the output buffer, which needs to accommodate potentially
370 // 3X (in bytes) the input size (in chars).
371 //
372 utf8String = string->factory->newStr8(string->factory, (pANTLR3_UINT8)"");
373
374 if (utf8String != NULL)
375 {
376 // Free existing allocation
377 //
378 ANTLR3_FREE(utf8String->chars);
379
380 // Reallocate according to maximum expected size
381 //
382 utf8String->size = string->len *3;
383 utf8String->chars = (pANTLR3_UINT8)ANTLR3_MALLOC(utf8String->size +1);
384
385 if (utf8String->chars != NULL)
386 {
387 inputEnd = (UTF16 *) (string->chars);
388 outputEnd = (UTF8 *) (utf8String->chars);
389
390 // Call the Unicode converter
391 //
392 cResult = ConvertUTF16toUTF8
393 (
394 (const UTF16**)&inputEnd,
395 ((const UTF16 *)(string->chars)) + string->len,
396 &outputEnd,
397 outputEnd + utf8String->size - 1,
398 lenientConversion
399 );
400
401 // We don't really care if things failed or not here, we just converted
402 // everything that was vaguely possible and stopped when it wasn't. It is
403 // up to the grammar programmer to verify that the input is sensible.
404 //
405 utf8String->len = ANTLR3_UINT32_CAST(((pANTLR3_UINT8)outputEnd) - utf8String->chars);
406
407 *(outputEnd+1) = '\0'; // Always null terminate
408 }
409 }
410 return utf8String;
411 }
412
413 /**
414 * Creates a new string with enough capacity for size 8 bit characters plus a terminator.
415 *
416 * \param[in] factory - Pointer to the string factory that owns strings
417 * \param[in] size - In characters
418 * \return pointer to the new string.
419 */
420 static pANTLR3_STRING
newSize8(pANTLR3_STRING_FACTORY factory,ANTLR3_UINT32 size)421 newSize8 (pANTLR3_STRING_FACTORY factory, ANTLR3_UINT32 size)
422 {
423 pANTLR3_STRING string;
424
425 string = factory->newRaw(factory);
426
427 if (string == NULL)
428 {
429 return string;
430 }
431
432 /* Always add one more byte for a terminator ;-)
433 */
434 string->chars = (pANTLR3_UINT8) ANTLR3_MALLOC((size_t)(sizeof(ANTLR3_UINT8) * (size+1)));
435 *(string->chars) = '\0';
436 string->size = size + 1;
437
438
439 return string;
440 }
441 /**
442 * Creates a new string with enough capacity for size UTF16 characters plus a terminator.
443 *
444 * \param[in] factory - Pointer to the string factory that owns strings
445 * \param[in] size - In characters (count double for surrogate pairs!!!)
446 * \return pointer to the new string.
447 */
448 static pANTLR3_STRING
newSizeUTF16(pANTLR3_STRING_FACTORY factory,ANTLR3_UINT32 size)449 newSizeUTF16 (pANTLR3_STRING_FACTORY factory, ANTLR3_UINT32 size)
450 {
451 pANTLR3_STRING string;
452
453 string = factory->newRaw(factory);
454
455 if (string == NULL)
456 {
457 return string;
458 }
459
460 /* Always add one more byte for a terminator ;-)
461 */
462 string->chars = (pANTLR3_UINT8) ANTLR3_MALLOC((size_t)(sizeof(ANTLR3_UINT16) * (size+1)));
463 *(string->chars) = '\0';
464 string->size = size+1; /* Size is always in characters, as is len */
465
466 return string;
467 }
468
469 /** Creates a new 8 bit string initialized with the 8 bit characters at the
470 * supplied ptr, of pre-determined size.
471 * \param[in] factory - Pointer to the string factory that owns the strings
472 * \param[in] ptr - Pointer to 8 bit encoded characters
473 * \return pointer to the new string
474 */
475 static pANTLR3_STRING
newPtr8(pANTLR3_STRING_FACTORY factory,pANTLR3_UINT8 ptr,ANTLR3_UINT32 size)476 newPtr8 (pANTLR3_STRING_FACTORY factory, pANTLR3_UINT8 ptr, ANTLR3_UINT32 size)
477 {
478 pANTLR3_STRING string;
479
480 string = factory->newSize(factory, size);
481
482 if (string == NULL)
483 {
484 return NULL;
485 }
486
487 if (size <= 0)
488 {
489 return string;
490 }
491
492 if (ptr != NULL)
493 {
494 ANTLR3_MEMMOVE(string->chars, (const void *)ptr, size);
495 *(string->chars + size) = '\0'; /* Terminate, these strings are usually used for Token streams and printing etc. */
496 string->len = size;
497 }
498
499 return string;
500 }
501
502 /** Creates a new UTF16 string initialized with the 8 bit characters at the
503 * supplied 8 bit character ptr, of pre-determined size.
504 * \param[in] factory - Pointer to the string factory that owns the strings
505 * \param[in] ptr - Pointer to 8 bit encoded characters
506 * \return pointer to the new string
507 */
508 static pANTLR3_STRING
newPtrUTF16_8(pANTLR3_STRING_FACTORY factory,pANTLR3_UINT8 ptr,ANTLR3_UINT32 size)509 newPtrUTF16_8 (pANTLR3_STRING_FACTORY factory, pANTLR3_UINT8 ptr, ANTLR3_UINT32 size)
510 {
511 pANTLR3_STRING string;
512
513 /* newSize accepts size in characters, not bytes
514 */
515 string = factory->newSize(factory, size);
516
517 if (string == NULL)
518 {
519 return NULL;
520 }
521
522 if (size <= 0)
523 {
524 return string;
525 }
526
527 if (ptr != NULL)
528 {
529 pANTLR3_UINT16 out;
530 ANTLR3_INT32 inSize;
531
532 out = (pANTLR3_UINT16)(string->chars);
533 inSize = size;
534
535 while (inSize-- > 0)
536 {
537 *out++ = (ANTLR3_UINT16)(*ptr++);
538 }
539
540 /* Terminate, these strings are usually used for Token streams and printing etc.
541 */
542 *(((pANTLR3_UINT16)(string->chars)) + size) = '\0';
543
544 string->len = size;
545 }
546
547 return string;
548 }
549
550 /** Creates a new UTF16 string initialized with the UTF16 characters at the
551 * supplied ptr, of pre-determined size.
552 * \param[in] factory - Pointer to the string factory that owns the strings
553 * \param[in] ptr - Pointer to UTF16 encoded characters
554 * \return pointer to the new string
555 */
556 static pANTLR3_STRING
newPtrUTF16_UTF16(pANTLR3_STRING_FACTORY factory,pANTLR3_UINT8 ptr,ANTLR3_UINT32 size)557 newPtrUTF16_UTF16 (pANTLR3_STRING_FACTORY factory, pANTLR3_UINT8 ptr, ANTLR3_UINT32 size)
558 {
559 pANTLR3_STRING string;
560
561 string = factory->newSize(factory, size);
562
563 if (string == NULL)
564 {
565 return NULL;
566 }
567
568 if (size <= 0)
569 {
570 return string;
571 }
572
573 if (ptr != NULL)
574 {
575 ANTLR3_MEMMOVE(string->chars, (const void *)ptr, (size * sizeof(ANTLR3_UINT16)));
576
577 /* Terminate, these strings are usually used for Token streams and printing etc.
578 */
579 *(((pANTLR3_UINT16)(string->chars)) + size) = '\0';
580 string->len = size;
581 }
582
583 return string;
584 }
585
586 /** Create a new 8 bit string from the supplied, null terminated, 8 bit string pointer.
587 * \param[in] factory - Pointer to the string factory that owns strings.
588 * \param[in] ptr - Pointer to the 8 bit encoded string
589 * \return Pointer to the newly initialized string
590 */
591 static pANTLR3_STRING
newStr8(pANTLR3_STRING_FACTORY factory,pANTLR3_UINT8 ptr)592 newStr8 (pANTLR3_STRING_FACTORY factory, pANTLR3_UINT8 ptr)
593 {
594 return factory->newPtr8(factory, ptr, (ANTLR3_UINT32)strlen((const char *)ptr));
595 }
596
597 /** Create a new UTF16 string from the supplied, null terminated, 8 bit string pointer.
598 * \param[in] factory - Pointer to the string factory that owns strings.
599 * \param[in] ptr - Pointer to the 8 bit encoded string
600 * \return POinter to the newly initialized string
601 */
602 static pANTLR3_STRING
newStrUTF16_8(pANTLR3_STRING_FACTORY factory,pANTLR3_UINT8 ptr)603 newStrUTF16_8 (pANTLR3_STRING_FACTORY factory, pANTLR3_UINT8 ptr)
604 {
605 return factory->newPtr8(factory, ptr, (ANTLR3_UINT32)strlen((const char *)ptr));
606 }
607
608 /** Create a new UTF16 string from the supplied, null terminated, UTF16 string pointer.
609 * \param[in] factory - Pointer to the string factory that owns strings.
610 * \param[in] ptr - Pointer to the UTF16 encoded string
611 * \return Pointer to the newly initialized string
612 */
613 static pANTLR3_STRING
newStrUTF16_UTF16(pANTLR3_STRING_FACTORY factory,pANTLR3_UINT8 ptr)614 newStrUTF16_UTF16 (pANTLR3_STRING_FACTORY factory, pANTLR3_UINT8 ptr)
615 {
616 pANTLR3_UINT16 in;
617 ANTLR3_UINT32 count;
618
619 /** First, determine the length of the input string
620 */
621 in = (pANTLR3_UINT16)ptr;
622 count = 0;
623
624 while (*in++ != '\0')
625 {
626 count++;
627 }
628 return factory->newPtr(factory, ptr, count);
629 }
630
631 static void
destroy(pANTLR3_STRING_FACTORY factory,pANTLR3_STRING string)632 destroy (pANTLR3_STRING_FACTORY factory, pANTLR3_STRING string)
633 {
634 // Record which string we are deleting
635 //
636 ANTLR3_UINT32 strIndex = string->index;
637
638 // Ensure that the string was not factory made, or we would try
639 // to delete memory that wasn't allocated outside the factory
640 // block.
641 // Remove the specific indexed string from the vector
642 //
643 factory->strings->del(factory->strings, strIndex);
644
645 // One less string in the vector, so decrement the factory index
646 // so that the next string allocated is indexed correctly with
647 // respect to the vector.
648 //
649 factory->index--;
650
651 // Now we have to reindex the strings in the vector that followed
652 // the one we just deleted. We only do this if the one we just deleted
653 // was not the last one.
654 //
655 if (strIndex< factory->index)
656 {
657 // We must reindex the strings after the one we just deleted.
658 // The one that follows the one we just deleted is also out
659 // of whack, so we start there.
660 //
661 ANTLR3_UINT32 i;
662
663 for (i = strIndex; i < factory->index; i++)
664 {
665 // Renumber the entry
666 //
667 ((pANTLR3_STRING)(factory->strings->elements[i].element))->index = i;
668 }
669 }
670
671 // The string has been destroyed and the elements of the factory are reindexed.
672 //
673
674 }
675
676 static pANTLR3_STRING
printable8(pANTLR3_STRING_FACTORY factory,pANTLR3_STRING instr)677 printable8(pANTLR3_STRING_FACTORY factory, pANTLR3_STRING instr)
678 {
679 pANTLR3_STRING string;
680
681 /* We don't need to be too efficient here, this is mostly for error messages and so on.
682 */
683 pANTLR3_UINT8 scannedText;
684 ANTLR3_UINT32 i;
685
686 /* Assume we need as much as twice as much space to parse out the control characters
687 */
688 string = factory->newSize(factory, instr->len *2 + 1);
689
690 /* Scan through and replace unprintable (in terms of this routine)
691 * characters
692 */
693 scannedText = string->chars;
694
695 for (i = 0; i < instr->len; i++)
696 {
697 if (*(instr->chars + i) == '\n')
698 {
699 *scannedText++ = '\\';
700 *scannedText++ = 'n';
701 }
702 else if (*(instr->chars + i) == '\r')
703 {
704 *scannedText++ = '\\';
705 *scannedText++ = 'r';
706 }
707 else if (!isprint(*(instr->chars +i)))
708 {
709 *scannedText++ = '?';
710 }
711 else
712 {
713 *scannedText++ = *(instr->chars + i);
714 }
715 }
716 *scannedText = '\0';
717
718 string->len = (ANTLR3_UINT32)(scannedText - string->chars);
719
720 return string;
721 }
722
723 static pANTLR3_STRING
printableUTF16(pANTLR3_STRING_FACTORY factory,pANTLR3_STRING instr)724 printableUTF16(pANTLR3_STRING_FACTORY factory, pANTLR3_STRING instr)
725 {
726 pANTLR3_STRING string;
727
728 /* We don't need to be too efficient here, this is mostly for error messages and so on.
729 */
730 pANTLR3_UINT16 scannedText;
731 pANTLR3_UINT16 inText;
732 ANTLR3_UINT32 i;
733 ANTLR3_UINT32 outLen;
734
735 /* Assume we need as much as twice as much space to parse out the control characters
736 */
737 string = factory->newSize(factory, instr->len *2 + 1);
738
739 /* Scan through and replace unprintable (in terms of this routine)
740 * characters
741 */
742 scannedText = (pANTLR3_UINT16)(string->chars);
743 inText = (pANTLR3_UINT16)(instr->chars);
744 outLen = 0;
745
746 for (i = 0; i < instr->len; i++)
747 {
748 if (*(inText + i) == '\n')
749 {
750 *scannedText++ = '\\';
751 *scannedText++ = 'n';
752 outLen += 2;
753 }
754 else if (*(inText + i) == '\r')
755 {
756 *scannedText++ = '\\';
757 *scannedText++ = 'r';
758 outLen += 2;
759 }
760 else if (!isprint(*(inText +i)))
761 {
762 *scannedText++ = '?';
763 outLen++;
764 }
765 else
766 {
767 *scannedText++ = *(inText + i);
768 outLen++;
769 }
770 }
771 *scannedText = '\0';
772
773 string->len = outLen;
774
775 return string;
776 }
777
778 /** Fascist Capitalist Pig function created
779 * to oppress the workers comrade.
780 */
781 static void
closeFactory(pANTLR3_STRING_FACTORY factory)782 closeFactory (pANTLR3_STRING_FACTORY factory)
783 {
784 /* Delete the vector we were tracking the strings with, this will
785 * causes all the allocated strings to be deallocated too
786 */
787 factory->strings->free(factory->strings);
788
789 /* Delete the space for the factory itself
790 */
791 ANTLR3_FREE((void *)factory);
792 }
793
794 static pANTLR3_UINT8
append8(pANTLR3_STRING string,const char * newbit)795 append8 (pANTLR3_STRING string, const char * newbit)
796 {
797 ANTLR3_UINT32 len;
798
799 len = (ANTLR3_UINT32)strlen(newbit);
800
801 if (string->size < (string->len + len + 1))
802 {
803 string->chars = (pANTLR3_UINT8) ANTLR3_REALLOC((void *)string->chars, (ANTLR3_UINT32)(string->len + len + 1));
804 string->size = string->len + len + 1;
805 }
806
807 /* Note we copy one more byte than the strlen in order to get the trailing
808 */
809 ANTLR3_MEMMOVE((void *)(string->chars + string->len), newbit, (ANTLR3_UINT32)(len+1));
810 string->len += len;
811
812 return string->chars;
813 }
814
815 static pANTLR3_UINT8
appendUTF16_8(pANTLR3_STRING string,const char * newbit)816 appendUTF16_8 (pANTLR3_STRING string, const char * newbit)
817 {
818 ANTLR3_UINT32 len;
819 pANTLR3_UINT16 apPoint;
820 ANTLR3_UINT32 count;
821
822 len = (ANTLR3_UINT32)strlen(newbit);
823
824 if (string->size < (string->len + len + 1))
825 {
826 string->chars = (pANTLR3_UINT8) ANTLR3_REALLOC((void *)string->chars, (ANTLR3_UINT32)((sizeof(ANTLR3_UINT16)*(string->len + len + 1))));
827 string->size = string->len + len + 1;
828 }
829
830 apPoint = ((pANTLR3_UINT16)string->chars) + string->len;
831 string->len += len;
832
833 for (count = 0; count < len; count++)
834 {
835 *apPoint++ = *(newbit + count);
836 }
837 *apPoint = '\0';
838
839 return string->chars;
840 }
841
842 static pANTLR3_UINT8
appendUTF16_UTF16(pANTLR3_STRING string,const char * newbit)843 appendUTF16_UTF16 (pANTLR3_STRING string, const char * newbit)
844 {
845 ANTLR3_UINT32 len;
846 pANTLR3_UINT16 in;
847
848 /** First, determine the length of the input string
849 */
850 in = (pANTLR3_UINT16)newbit;
851 len = 0;
852
853 while (*in++ != '\0')
854 {
855 len++;
856 }
857
858 if (string->size < (string->len + len + 1))
859 {
860 string->chars = (pANTLR3_UINT8) ANTLR3_REALLOC((void *)string->chars, (ANTLR3_UINT32)( sizeof(ANTLR3_UINT16) *(string->len + len + 1) ));
861 string->size = string->len + len + 1;
862 }
863
864 /* Note we copy one more byte than the strlen in order to get the trailing delimiter
865 */
866 ANTLR3_MEMMOVE((void *)(((pANTLR3_UINT16)string->chars) + string->len), newbit, (ANTLR3_UINT32)(sizeof(ANTLR3_UINT16)*(len+1)));
867 string->len += len;
868
869 return string->chars;
870 }
871
872 static pANTLR3_UINT8
set8(pANTLR3_STRING string,const char * chars)873 set8 (pANTLR3_STRING string, const char * chars)
874 {
875 ANTLR3_UINT32 len;
876
877 len = (ANTLR3_UINT32)strlen(chars);
878 if (string->size < len + 1)
879 {
880 string->chars = (pANTLR3_UINT8) ANTLR3_REALLOC((void *)string->chars, (ANTLR3_UINT32)(len + 1));
881 string->size = len + 1;
882 }
883
884 /* Note we copy one more byte than the strlen in order to get the trailing '\0'
885 */
886 ANTLR3_MEMMOVE((void *)(string->chars), chars, (ANTLR3_UINT32)(len+1));
887 string->len = len;
888
889 return string->chars;
890
891 }
892
893 static pANTLR3_UINT8
setUTF16_8(pANTLR3_STRING string,const char * chars)894 setUTF16_8 (pANTLR3_STRING string, const char * chars)
895 {
896 ANTLR3_UINT32 len;
897 ANTLR3_UINT32 count;
898 pANTLR3_UINT16 apPoint;
899
900 len = (ANTLR3_UINT32)strlen(chars);
901 if (string->size < len + 1)
902 {
903 string->chars = (pANTLR3_UINT8) ANTLR3_REALLOC((void *)string->chars, (ANTLR3_UINT32)(sizeof(ANTLR3_UINT16)*(len + 1)));
904 string->size = len + 1;
905 }
906 apPoint = ((pANTLR3_UINT16)string->chars);
907 string->len = len;
908
909 for (count = 0; count < string->len; count++)
910 {
911 *apPoint++ = *(chars + count);
912 }
913 *apPoint = '\0';
914
915 return string->chars;
916 }
917
918 static pANTLR3_UINT8
setUTF16_UTF16(pANTLR3_STRING string,const char * chars)919 setUTF16_UTF16 (pANTLR3_STRING string, const char * chars)
920 {
921 ANTLR3_UINT32 len;
922 pANTLR3_UINT16 in;
923
924 /** First, determine the length of the input string
925 */
926 in = (pANTLR3_UINT16)chars;
927 len = 0;
928
929 while (*in++ != '\0')
930 {
931 len++;
932 }
933
934 if (string->size < len + 1)
935 {
936 string->chars = (pANTLR3_UINT8) ANTLR3_REALLOC((void *)string->chars, (ANTLR3_UINT32)(sizeof(ANTLR3_UINT16)*(len + 1)));
937 string->size = len + 1;
938 }
939
940 /* Note we copy one more byte than the strlen in order to get the trailing '\0'
941 */
942 ANTLR3_MEMMOVE((void *)(string->chars), chars, (ANTLR3_UINT32)((len+1) * sizeof(ANTLR3_UINT16)));
943 string->len = len;
944
945 return string->chars;
946
947 }
948
949 static pANTLR3_UINT8
addc8(pANTLR3_STRING string,ANTLR3_UINT32 c)950 addc8 (pANTLR3_STRING string, ANTLR3_UINT32 c)
951 {
952 if (string->size < string->len + 2)
953 {
954 string->chars = (pANTLR3_UINT8) ANTLR3_REALLOC((void *)string->chars, (ANTLR3_UINT32)(string->len + 2));
955 string->size = string->len + 2;
956 }
957 *(string->chars + string->len) = (ANTLR3_UINT8)c;
958 *(string->chars + string->len + 1) = '\0';
959 string->len++;
960
961 return string->chars;
962 }
963
964 static pANTLR3_UINT8
addcUTF16(pANTLR3_STRING string,ANTLR3_UINT32 c)965 addcUTF16 (pANTLR3_STRING string, ANTLR3_UINT32 c)
966 {
967 pANTLR3_UINT16 ptr;
968
969 if (string->size < string->len + 2)
970 {
971 string->chars = (pANTLR3_UINT8) ANTLR3_REALLOC((void *)string->chars, (ANTLR3_UINT32)(sizeof(ANTLR3_UINT16) * (string->len + 2)));
972 string->size = string->len + 2;
973 }
974 ptr = (pANTLR3_UINT16)(string->chars);
975
976 *(ptr + string->len) = (ANTLR3_UINT16)c;
977 *(ptr + string->len + 1) = '\0';
978 string->len++;
979
980 return string->chars;
981 }
982
983 static pANTLR3_UINT8
addi8(pANTLR3_STRING string,ANTLR3_INT32 i)984 addi8 (pANTLR3_STRING string, ANTLR3_INT32 i)
985 {
986 ANTLR3_UINT8 newbit[32];
987
988 sprintf((char *)newbit, "%d", i);
989
990 return string->append8(string, (const char *)newbit);
991 }
992 static pANTLR3_UINT8
addiUTF16(pANTLR3_STRING string,ANTLR3_INT32 i)993 addiUTF16 (pANTLR3_STRING string, ANTLR3_INT32 i)
994 {
995 ANTLR3_UINT8 newbit[32];
996
997 sprintf((char *)newbit, "%d", i);
998
999 return string->append8(string, (const char *)newbit);
1000 }
1001
1002 static pANTLR3_UINT8
inserti8(pANTLR3_STRING string,ANTLR3_UINT32 point,ANTLR3_INT32 i)1003 inserti8 (pANTLR3_STRING string, ANTLR3_UINT32 point, ANTLR3_INT32 i)
1004 {
1005 ANTLR3_UINT8 newbit[32];
1006
1007 sprintf((char *)newbit, "%d", i);
1008 return string->insert8(string, point, (const char *)newbit);
1009 }
1010 static pANTLR3_UINT8
insertiUTF16(pANTLR3_STRING string,ANTLR3_UINT32 point,ANTLR3_INT32 i)1011 insertiUTF16 (pANTLR3_STRING string, ANTLR3_UINT32 point, ANTLR3_INT32 i)
1012 {
1013 ANTLR3_UINT8 newbit[32];
1014
1015 sprintf((char *)newbit, "%d", i);
1016 return string->insert8(string, point, (const char *)newbit);
1017 }
1018
1019 static pANTLR3_UINT8
insert8(pANTLR3_STRING string,ANTLR3_UINT32 point,const char * newbit)1020 insert8 (pANTLR3_STRING string, ANTLR3_UINT32 point, const char * newbit)
1021 {
1022 ANTLR3_UINT32 len;
1023
1024 if (point >= string->len)
1025 {
1026 return string->append(string, newbit);
1027 }
1028
1029 len = (ANTLR3_UINT32)strlen(newbit);
1030
1031 if (len == 0)
1032 {
1033 return string->chars;
1034 }
1035
1036 if (string->size < (string->len + len + 1))
1037 {
1038 string->chars = (pANTLR3_UINT8) ANTLR3_REALLOC((void *)string->chars, (ANTLR3_UINT32)(string->len + len + 1));
1039 string->size = string->len + len + 1;
1040 }
1041
1042 /* Move the characters we are inserting before, including the delimiter
1043 */
1044 ANTLR3_MEMMOVE((void *)(string->chars + point + len), (void *)(string->chars + point), (ANTLR3_UINT32)(string->len - point + 1));
1045
1046 /* Note we copy the exact number of bytes
1047 */
1048 ANTLR3_MEMMOVE((void *)(string->chars + point), newbit, (ANTLR3_UINT32)(len));
1049
1050 string->len += len;
1051
1052 return string->chars;
1053 }
1054
1055 static pANTLR3_UINT8
insertUTF16_8(pANTLR3_STRING string,ANTLR3_UINT32 point,const char * newbit)1056 insertUTF16_8 (pANTLR3_STRING string, ANTLR3_UINT32 point, const char * newbit)
1057 {
1058 ANTLR3_UINT32 len;
1059 ANTLR3_UINT32 count;
1060 pANTLR3_UINT16 inPoint;
1061
1062 if (point >= string->len)
1063 {
1064 return string->append8(string, newbit);
1065 }
1066
1067 len = (ANTLR3_UINT32)strlen(newbit);
1068
1069 if (len == 0)
1070 {
1071 return string->chars;
1072 }
1073
1074 if (string->size < (string->len + len + 1))
1075 {
1076 string->chars = (pANTLR3_UINT8) ANTLR3_REALLOC((void *)string->chars, (ANTLR3_UINT32)(sizeof(ANTLR3_UINT16)*(string->len + len + 1)));
1077 string->size = string->len + len + 1;
1078 }
1079
1080 /* Move the characters we are inserting before, including the delimiter
1081 */
1082 ANTLR3_MEMMOVE((void *)(((pANTLR3_UINT16)string->chars) + point + len), (void *)(((pANTLR3_UINT16)string->chars) + point), (ANTLR3_UINT32)(sizeof(ANTLR3_UINT16)*(string->len - point + 1)));
1083
1084 string->len += len;
1085
1086 inPoint = ((pANTLR3_UINT16)(string->chars))+point;
1087 for (count = 0; count<len; count++)
1088 {
1089 *(inPoint + count) = (ANTLR3_UINT16)(*(newbit+count));
1090 }
1091
1092 return string->chars;
1093 }
1094
1095 static pANTLR3_UINT8
insertUTF16_UTF16(pANTLR3_STRING string,ANTLR3_UINT32 point,const char * newbit)1096 insertUTF16_UTF16 (pANTLR3_STRING string, ANTLR3_UINT32 point, const char * newbit)
1097 {
1098 ANTLR3_UINT32 len;
1099 pANTLR3_UINT16 in;
1100
1101 if (point >= string->len)
1102 {
1103 return string->append(string, newbit);
1104 }
1105
1106 /** First, determine the length of the input string
1107 */
1108 in = (pANTLR3_UINT16)newbit;
1109 len = 0;
1110
1111 while (*in++ != '\0')
1112 {
1113 len++;
1114 }
1115
1116 if (len == 0)
1117 {
1118 return string->chars;
1119 }
1120
1121 if (string->size < (string->len + len + 1))
1122 {
1123 string->chars = (pANTLR3_UINT8) ANTLR3_REALLOC((void *)string->chars, (ANTLR3_UINT32)(sizeof(ANTLR3_UINT16)*(string->len + len + 1)));
1124 string->size = string->len + len + 1;
1125 }
1126
1127 /* Move the characters we are inserting before, including the delimiter
1128 */
1129 ANTLR3_MEMMOVE((void *)(((pANTLR3_UINT16)string->chars) + point + len), (void *)(((pANTLR3_UINT16)string->chars) + point), (ANTLR3_UINT32)(sizeof(ANTLR3_UINT16)*(string->len - point + 1)));
1130
1131
1132 /* Note we copy the exact number of characters
1133 */
1134 ANTLR3_MEMMOVE((void *)(((pANTLR3_UINT16)string->chars) + point), newbit, (ANTLR3_UINT32)(sizeof(ANTLR3_UINT16)*(len)));
1135
1136 string->len += len;
1137
1138 return string->chars;
1139 }
1140
setS(pANTLR3_STRING string,pANTLR3_STRING chars)1141 static pANTLR3_UINT8 setS (pANTLR3_STRING string, pANTLR3_STRING chars)
1142 {
1143 return string->set(string, (const char *)(chars->chars));
1144 }
1145
appendS(pANTLR3_STRING string,pANTLR3_STRING newbit)1146 static pANTLR3_UINT8 appendS (pANTLR3_STRING string, pANTLR3_STRING newbit)
1147 {
1148 /* We may be passed an empty string, in which case we just return the current pointer
1149 */
1150 if (newbit == NULL || newbit->len == 0 || newbit->size == 0 || newbit->chars == NULL)
1151 {
1152 return string->chars;
1153 }
1154 else
1155 {
1156 return string->append(string, (const char *)(newbit->chars));
1157 }
1158 }
1159
insertS(pANTLR3_STRING string,ANTLR3_UINT32 point,pANTLR3_STRING newbit)1160 static pANTLR3_UINT8 insertS (pANTLR3_STRING string, ANTLR3_UINT32 point, pANTLR3_STRING newbit)
1161 {
1162 return string->insert(string, point, (const char *)(newbit->chars));
1163 }
1164
1165 /* Function that compares the text of a string to the supplied
1166 * 8 bit character string and returns a result a la strcmp()
1167 */
1168 static ANTLR3_UINT32
compare8(pANTLR3_STRING string,const char * compStr)1169 compare8 (pANTLR3_STRING string, const char * compStr)
1170 {
1171 return strcmp((const char *)(string->chars), compStr);
1172 }
1173
1174 /* Function that compares the text of a string with the supplied character string
1175 * (which is assumed to be in the same encoding as the string itself) and returns a result
1176 * a la strcmp()
1177 */
1178 static ANTLR3_UINT32
compareUTF16_8(pANTLR3_STRING string,const char * compStr)1179 compareUTF16_8 (pANTLR3_STRING string, const char * compStr)
1180 {
1181 pANTLR3_UINT16 ourString;
1182 ANTLR3_UINT32 charDiff;
1183
1184 ourString = (pANTLR3_UINT16)(string->chars);
1185
1186 while (((ANTLR3_UCHAR)(*ourString) != '\0') && ((ANTLR3_UCHAR)(*compStr) != '\0'))
1187 {
1188 charDiff = *ourString - *compStr;
1189 if (charDiff != 0)
1190 {
1191 return charDiff;
1192 }
1193 ourString++;
1194 compStr++;
1195 }
1196
1197 /* At this point, one of the strings was terminated
1198 */
1199 return (ANTLR3_UINT32)((ANTLR3_UCHAR)(*ourString) - (ANTLR3_UCHAR)(*compStr));
1200
1201 }
1202
1203 /* Function that compares the text of a string with the supplied character string
1204 * (which is assumed to be in the same encoding as the string itself) and returns a result
1205 * a la strcmp()
1206 */
1207 static ANTLR3_UINT32
compareUTF16_UTF16(pANTLR3_STRING string,const char * compStr8)1208 compareUTF16_UTF16 (pANTLR3_STRING string, const char * compStr8)
1209 {
1210 pANTLR3_UINT16 ourString;
1211 pANTLR3_UINT16 compStr;
1212 ANTLR3_UINT32 charDiff;
1213
1214 ourString = (pANTLR3_UINT16)(string->chars);
1215 compStr = (pANTLR3_UINT16)(compStr8);
1216
1217 while (((ANTLR3_UCHAR)(*ourString) != '\0') && ((ANTLR3_UCHAR)(*((pANTLR3_UINT16)compStr)) != '\0'))
1218 {
1219 charDiff = *ourString - *compStr;
1220 if (charDiff != 0)
1221 {
1222 return charDiff;
1223 }
1224 ourString++;
1225 compStr++;
1226 }
1227
1228 /* At this point, one of the strings was terminated
1229 */
1230 return (ANTLR3_UINT32)((ANTLR3_UCHAR)(*ourString) - (ANTLR3_UCHAR)(*compStr));
1231 }
1232
1233 /* Function that compares the text of a string with the supplied string
1234 * (which is assumed to be in the same encoding as the string itself) and returns a result
1235 * a la strcmp()
1236 */
1237 static ANTLR3_UINT32
compareS(pANTLR3_STRING string,pANTLR3_STRING compStr)1238 compareS (pANTLR3_STRING string, pANTLR3_STRING compStr)
1239 {
1240 return string->compare(string, (const char *)compStr->chars);
1241 }
1242
1243
1244 /* Function that returns the character indexed at the supplied
1245 * offset as a 32 bit character.
1246 */
1247 static ANTLR3_UCHAR
charAt8(pANTLR3_STRING string,ANTLR3_UINT32 offset)1248 charAt8 (pANTLR3_STRING string, ANTLR3_UINT32 offset)
1249 {
1250 if (offset > string->len)
1251 {
1252 return (ANTLR3_UCHAR)'\0';
1253 }
1254 else
1255 {
1256 return (ANTLR3_UCHAR)(*(string->chars + offset));
1257 }
1258 }
1259
1260 /* Function that returns the character indexed at the supplied
1261 * offset as a 32 bit character.
1262 */
1263 static ANTLR3_UCHAR
charAtUTF16(pANTLR3_STRING string,ANTLR3_UINT32 offset)1264 charAtUTF16 (pANTLR3_STRING string, ANTLR3_UINT32 offset)
1265 {
1266 if (offset > string->len)
1267 {
1268 return (ANTLR3_UCHAR)'\0';
1269 }
1270 else
1271 {
1272 return (ANTLR3_UCHAR)(*((pANTLR3_UINT16)(string->chars) + offset));
1273 }
1274 }
1275
1276 /* Function that returns a substring of the supplied string a la .subString(s,e)
1277 * in java runtimes.
1278 */
1279 static pANTLR3_STRING
subString8(pANTLR3_STRING string,ANTLR3_UINT32 startIndex,ANTLR3_UINT32 endIndex)1280 subString8 (pANTLR3_STRING string, ANTLR3_UINT32 startIndex, ANTLR3_UINT32 endIndex)
1281 {
1282 pANTLR3_STRING newStr;
1283
1284 if (endIndex > string->len)
1285 {
1286 endIndex = string->len + 1;
1287 }
1288 newStr = string->factory->newPtr(string->factory, string->chars + startIndex, endIndex - startIndex);
1289
1290 return newStr;
1291 }
1292
1293 /* Returns a substring of the supplied string a la .subString(s,e)
1294 * in java runtimes.
1295 */
1296 static pANTLR3_STRING
subStringUTF16(pANTLR3_STRING string,ANTLR3_UINT32 startIndex,ANTLR3_UINT32 endIndex)1297 subStringUTF16 (pANTLR3_STRING string, ANTLR3_UINT32 startIndex, ANTLR3_UINT32 endIndex)
1298 {
1299 pANTLR3_STRING newStr;
1300
1301 if (endIndex > string->len)
1302 {
1303 endIndex = string->len + 1;
1304 }
1305 newStr = string->factory->newPtr(string->factory, (pANTLR3_UINT8)((pANTLR3_UINT16)(string->chars) + startIndex), endIndex - startIndex);
1306
1307 return newStr;
1308 }
1309
1310 /* Function that can convert the characters in the string to an integer
1311 */
1312 static ANTLR3_INT32
toInt32_8(struct ANTLR3_STRING_struct * string)1313 toInt32_8 (struct ANTLR3_STRING_struct * string)
1314 {
1315 return atoi((const char *)(string->chars));
1316 }
1317
1318 /* Function that can convert the characters in the string to an integer
1319 */
1320 static ANTLR3_INT32
toInt32_UTF16(struct ANTLR3_STRING_struct * string)1321 toInt32_UTF16 (struct ANTLR3_STRING_struct * string)
1322 {
1323 pANTLR3_UINT16 input;
1324 ANTLR3_INT32 value;
1325 ANTLR3_BOOLEAN negate;
1326
1327 value = 0;
1328 input = (pANTLR3_UINT16)(string->chars);
1329 negate = ANTLR3_FALSE;
1330
1331 if (*input == (ANTLR3_UCHAR)'-')
1332 {
1333 negate = ANTLR3_TRUE;
1334 input++;
1335 }
1336 else if (*input == (ANTLR3_UCHAR)'+')
1337 {
1338 input++;
1339 }
1340
1341 while (*input != '\0' && isdigit(*input))
1342 {
1343 value = value * 10;
1344 value += ((ANTLR3_UINT32)(*input) - (ANTLR3_UINT32)'0');
1345 input++;
1346 }
1347
1348 return negate ? -value : value;
1349 }
1350
1351 /* Function that returns a pointer to an 8 bit version of the string,
1352 * which in this case is just the string as this is
1353 * 8 bit encodiing anyway.
1354 */
to8_8(pANTLR3_STRING string)1355 static pANTLR3_STRING to8_8 (pANTLR3_STRING string)
1356 {
1357 return string;
1358 }
1359
1360 /* Function that returns an 8 bit version of the string,
1361 * which in this case is returning all the UTF16 characters
1362 * narrowed back into 8 bits, with characters that are too large
1363 * replaced with '_'
1364 */
to8_UTF16(pANTLR3_STRING string)1365 static pANTLR3_STRING to8_UTF16 (pANTLR3_STRING string)
1366 {
1367 pANTLR3_STRING newStr;
1368 ANTLR3_UINT32 i;
1369
1370 /* Create a new 8 bit string
1371 */
1372 newStr = newRaw8(string->factory);
1373
1374 if (newStr == NULL)
1375 {
1376 return NULL;
1377 }
1378
1379 /* Always add one more byte for a terminator
1380 */
1381 newStr->chars = (pANTLR3_UINT8) ANTLR3_MALLOC((size_t)(string->len + 1));
1382 newStr->size = string->len + 1;
1383 newStr->len = string->len;
1384
1385 /* Now copy each UTF16 charActer , making it an 8 bit character of
1386 * some sort.
1387 */
1388 for (i=0; i<string->len; i++)
1389 {
1390 ANTLR3_UCHAR c;
1391
1392 c = *(((pANTLR3_UINT16)(string->chars)) + i);
1393
1394 *(newStr->chars + i) = (ANTLR3_UINT8)(c > 255 ? '_' : c);
1395 }
1396
1397 /* Terminate
1398 */
1399 *(newStr->chars + newStr->len) = '\0';
1400
1401 return newStr;
1402 }
1403